crash-reporter: write conversion failure diagnostics into fake dmp files

Change-Id: I9f1ca92def3e1d0fa43b3bef0f2a72d367953926

BUG=6299,7782
TEST=bvts

Review URL: http://codereview.chromium.org/4088003
diff --git a/crash_reporter/crash_collector.cc b/crash_reporter/crash_collector.cc
index 5ea06e2..1e2cd1a 100644
--- a/crash_reporter/crash_collector.cc
+++ b/crash_reporter/crash_collector.cc
@@ -86,6 +86,14 @@
                       pid);
 }
 
+FilePath CrashCollector::GetCrashPath(const FilePath &crash_directory,
+                                      const std::string &basename,
+                                      const std::string &extension) {
+  return crash_directory.Append(StringPrintf("%s.%s",
+                                             basename.c_str(),
+                                             extension.c_str()));
+}
+
 FilePath CrashCollector::GetCrashDirectoryInfo(
     uid_t process_euid,
     uid_t default_user_id,
@@ -125,10 +133,13 @@
 }
 
 bool CrashCollector::GetCreatedCrashDirectoryByEuid(uid_t euid,
-                                                    FilePath *crash_directory) {
+                                                    FilePath *crash_directory,
+                                                    bool *out_of_capacity) {
   uid_t default_user_id;
   gid_t default_user_group;
 
+  if (out_of_capacity != NULL) *out_of_capacity = false;
+
   // For testing.
   if (forced_crash_directory_ != NULL) {
     *crash_directory = FilePath(forced_crash_directory_);
@@ -173,6 +184,7 @@
   }
 
   if (!CheckHasCapacity(*crash_directory)) {
+    if (out_of_capacity != NULL) *out_of_capacity = true;
     return false;
   }
 
@@ -271,11 +283,13 @@
   file_util::GetFileSize(FilePath(payload_path), &payload_size);
   std::string meta_data = StringPrintf("%sexec_name=%s\n"
                                        "ver=%s\n"
+                                       "payload=%s\n"
                                        "payload_size=%lld\n"
                                        "done=1\n",
                                        extra_metadata_.c_str(),
                                        exec_name.c_str(),
                                        version.c_str(),
+                                       payload_path.c_str(),
                                        payload_size);
   if (!file_util::WriteFile(meta_path, meta_data.c_str(), meta_data.size())) {
     logger_->LogError("Unable to write %s", meta_path.value().c_str());
diff --git a/crash_reporter/crash_collector.h b/crash_reporter/crash_collector.h
index b4cabf9..e5eddcd 100644
--- a/crash_reporter/crash_collector.h
+++ b/crash_reporter/crash_collector.h
@@ -10,9 +10,9 @@
 #include <map>
 #include <string>
 
+#include "base/file_path.h"
 #include "gtest/gtest_prod.h"  // for FRIEND_TEST
 
-class FilePath;
 class SystemLogging;
 
 // User crash collector.
@@ -38,6 +38,7 @@
   FRIEND_TEST(CrashCollectorTest, CheckHasCapacityStrangeNames);
   FRIEND_TEST(CrashCollectorTest, CheckHasCapacityUsual);
   FRIEND_TEST(CrashCollectorTest, GetCrashDirectoryInfo);
+  FRIEND_TEST(CrashCollectorTest, GetCrashPath);
   FRIEND_TEST(CrashCollectorTest, FormatDumpBasename);
   FRIEND_TEST(CrashCollectorTest, Initialize);
   FRIEND_TEST(CrashCollectorTest, MetaData);
@@ -67,17 +68,26 @@
                            uid_t *uid,
                            gid_t *gid);
   // Determines the crash directory for given eud, and creates the
-  // directory if necessary with appropriate permissions.  Returns
+  // directory if necessary with appropriate permissions.  If
+  // |out_of_capacity| is not NULL, it is set to indicate if the call
+  // failed due to not having capacity in the crash directory. Returns
   // true whether or not directory needed to be created, false on any
-  // failure.  If the crash directory is already full, returns false.
+  // failure.  If the crash directory is at capacity, returns false.
   bool GetCreatedCrashDirectoryByEuid(uid_t euid,
-                                      FilePath *crash_file_path);
+                                      FilePath *crash_file_path,
+                                      bool *out_of_capacity);
 
   // Format crash name based on components.
   std::string FormatDumpBasename(const std::string &exec_name,
                                  time_t timestamp,
                                  pid_t pid);
 
+  // Create a file path to a file in |crash_directory| with the given
+  // |basename| and |extension|.
+  FilePath GetCrashPath(const FilePath &crash_directory,
+                        const std::string &basename,
+                        const std::string &extension);
+
   // Check given crash directory still has remaining capacity for another
   // crash.
   bool CheckHasCapacity(const FilePath &crash_directory);
diff --git a/crash_reporter/crash_collector_test.cc b/crash_reporter/crash_collector_test.cc
index ffab06a..859653b 100644
--- a/crash_reporter/crash_collector_test.cc
+++ b/crash_reporter/crash_collector_test.cc
@@ -120,6 +120,18 @@
   ASSERT_EQ("foo.20100523.135015.100", basename);
 }
 
+TEST_F(CrashCollectorTest, GetCrashPath) {
+  EXPECT_EQ("/var/spool/crash/myprog.20100101.1200.1234.core",
+            collector_.GetCrashPath(FilePath("/var/spool/crash"),
+                                    "myprog.20100101.1200.1234",
+                                    "core").value());
+  EXPECT_EQ("/home/chronos/user/crash/chrome.20100101.1200.1234.dmp",
+            collector_.GetCrashPath(FilePath("/home/chronos/user/crash"),
+                                    "chrome.20100101.1200.1234",
+                                    "dmp").value());
+}
+
+
 bool CrashCollectorTest::CheckHasCapacity() {
   static const char kFullMessage[] = "Crash directory test already full";
   bool has_capacity = collector_.CheckHasCapacity(test_dir_);
@@ -238,6 +250,7 @@
   EXPECT_EQ("foo=bar\n"
             "exec_name=kernel\n"
             "ver=version\n"
+            "payload=test/payload-file\n"
             "payload_size=3\n"
             "done=1\n", contents);
 }
diff --git a/crash_reporter/crash_sender b/crash_reporter/crash_sender
index ebb380e..c01a52d 100644
--- a/crash_reporter/crash_sender
+++ b/crash_reporter/crash_sender
@@ -152,20 +152,24 @@
   echo "${1%.*}"
 }
 
+get_extension() {
+  echo "${1##*.}"
+}
+
 # Return which kind of report the given metadata file relates to
 get_kind() {
-  # There should never be a report with both a dmp and kcrash file.
-  # If that were to happen we arbitrarily consider this a minidump
-  # report and effectively ignore the kcrash.
-  local base="$(get_base "$1")"
-  if [ -r "${base}.dmp" ]; then
+  local payload="$(get_key_value "$1" "payload")"
+  if [ ! -r "${payload}" ]; then
+    lecho "Missing payload: ${payload}"
+    echo "unknown"
+    return
+  fi
+  local kind="$(get_extension "${payload}")"
+  if [ "${kind}" = "dmp" ]; then
     echo "minidump"
     return
   fi
-  if [ -r "${base}.kcrash" ]; then
-    echo "kcrash"
-    return
-  fi
+  echo "${kind}"
 }
 
 get_key_value() {
@@ -192,6 +196,7 @@
 
 send_crash() {
   local meta_path="$1"
+  local report_payload="$(get_key_value "${meta_path}" "payload")"
   local kind="$(get_kind "${meta_path}")"
   local exec_name="$(get_key_value "${meta_path}" "exec_name")"
   local sleep_time=$(generate_uniform_random $SECONDS_SEND_SPREAD)
@@ -199,12 +204,9 @@
   local chromeos_version="$(get_key_value "${meta_path}" "ver")"
   local board="$(get_board)"
   local hwclass="$(get_hardware_class)"
-  local payload_extension="${kind}"
   local write_payload_size="$(get_key_value "${meta_path}" "payload_size")"
   local sig="$(get_key_value "${meta_path}" "sig")"
-  [ "${kind}" = "minidump" ] && payload_extension="dmp"
-  local report_payload="$(get_base "${meta_path}").${payload_extension}"
-  local send_payload_size="$(stat --printf=%s "${report_payload}")"
+  local send_payload_size="$(stat --printf=%s "${report_payload}" 2>/dev/null)"
   lecho "Sending crash:"
   lecho "  Scheduled to send in ${sleep_time}s"
   lecho "  Metadata: ${meta_path} (${kind})"
@@ -241,7 +243,7 @@
   local extra_value1="${write_payload_size}"
   local extra_key2="send_payload_size"
   local extra_value2="${send_payload_size}"
-  if [ "${kind}" = "kcrash" ]; then
+  if [ "${sig}" != "unknown" ]; then
     extra_key1="sig"
     extra_value1="${sig}"
     extra_key2="sig2"
@@ -309,8 +311,10 @@
     lecho "Considering metadata ${meta_path}."
     local kind=$(get_kind "${meta_path}")
 
-    if [ "${kind}" != "minidump" ] && [ "${kind}" != "kcrash" ]; then
-      lecho "Unknown report kind.  Removing report."
+    if [ "${kind}" != "minidump" ] && \
+       [ "${kind}" != "kcrash" ] && \
+       [ "${kind}" != "log" ]; then
+      lecho "Unknown report kind ${kind}.  Removing report."
       remove_report "${meta_path}"
       continue
     fi
diff --git a/crash_reporter/kernel_collector.cc b/crash_reporter/kernel_collector.cc
index a95d757..c958733 100644
--- a/crash_reporter/kernel_collector.cc
+++ b/crash_reporter/kernel_collector.cc
@@ -259,13 +259,14 @@
   logger_->LogInfo("Received prior crash notification from "
                    "kernel (signature %s) (%s)",
                    signature.c_str(),
-                   feedback ? "handling" : "ignoring");
+                   feedback ? "handling" : "ignoring - no consent");
 
   if (feedback) {
     count_crash_function_();
 
     if (!GetCreatedCrashDirectoryByEuid(kRootUid,
-                                        &root_crash_directory)) {
+                                        &root_crash_directory,
+                                        NULL)) {
       return true;
     }
 
diff --git a/crash_reporter/kernel_collector_test.cc b/crash_reporter/kernel_collector_test.cc
index 93b253d..7b84229 100644
--- a/crash_reporter/kernel_collector_test.cc
+++ b/crash_reporter/kernel_collector_test.cc
@@ -135,7 +135,7 @@
   SetUpSuccessfulCollect();
   s_metrics = false;
   ASSERT_TRUE(collector_.Collect());
-  ASSERT_NE(std::string::npos, logging_.log().find("(ignoring)"));
+  ASSERT_NE(std::string::npos, logging_.log().find("(ignoring - no consent)"));
   ASSERT_EQ(0, s_crashes);
 
   CheckPreservedDumpClear();
diff --git a/crash_reporter/user_collector.cc b/crash_reporter/user_collector.cc
index 8d99b47..f9cb494 100644
--- a/crash_reporter/user_collector.cc
+++ b/crash_reporter/user_collector.cc
@@ -19,7 +19,13 @@
 #include "base/logging.h"
 #include "base/string_util.h"
 #include "crash-reporter/system_logging.h"
+#include "gflags/gflags.h"
 
+DEFINE_bool(core2md_failure_test, false, "Core2md failure test");
+DEFINE_bool(directory_failure_test, false, "Spool directory failure test");
+
+static const char kCollectionErrorSignature[] =
+    "crash_reporter-user-collection";
 // This procfs file is used to cause kernel core file writing to
 // instead pipe the core file into a user space process.  See
 // core(5) man page.
@@ -149,16 +155,41 @@
   return true;
 }
 
+void UserCollector::LogCollectionError(const std::string &error_message) {
+  error_log_.append(error_message.c_str());
+  error_log_.append("\n");
+  logger_->LogError(error_message.c_str());
+}
+
+void UserCollector::EnqueueCollectionErrorLog(pid_t pid,
+                                              const std::string &exec) {
+  FilePath crash_path;
+  logger_->LogInfo("Writing conversion problems as separate crash report.");
+  if (!GetCreatedCrashDirectoryByEuid(0, &crash_path, NULL)) {
+    logger_->LogError("Could not even get log directory; out of space?");
+    return;
+  }
+  std::string dump_basename = FormatDumpBasename(exec, time(NULL), pid);
+  FilePath log_path = GetCrashPath(crash_path, dump_basename, "log");
+  FilePath meta_path = GetCrashPath(crash_path, dump_basename, "meta");
+  file_util::WriteFile(log_path,
+                       error_log_.data(),
+                       error_log_.length());
+  AddCrashMetaData("sig", kCollectionErrorSignature);
+  WriteCrashMetaData(meta_path, exec, log_path.value());
+}
+
 bool UserCollector::CopyOffProcFiles(pid_t pid,
                                      const FilePath &container_dir) {
   if (!file_util::CreateDirectory(container_dir)) {
-    logger_->LogInfo("Could not create %s", container_dir.value().c_str());
+    LogCollectionError(StringPrintf("Could not create %s",
+                                    container_dir.value().c_str()));
     return false;
   }
   FilePath process_path = GetProcessPath(pid);
   if (!file_util::PathExists(process_path)) {
-    logger_->LogWarning("Path %s does not exist",
-                        process_path.value().c_str());
+    LogCollectionError(StringPrintf("Path %s does not exist",
+                                    process_path.value().c_str()));
     return false;
   }
   static const char *proc_files[] = {
@@ -171,7 +202,8 @@
   for (unsigned i = 0; i < arraysize(proc_files); ++i) {
     if (!file_util::CopyFile(process_path.Append(proc_files[i]),
                              container_dir.Append(proc_files[i]))) {
-      logger_->LogWarning("Could not copy %s file", proc_files[i]);
+      LogCollectionError(StringPrintf("Could not copy %s file",
+                                      proc_files[i]));
       return false;
     }
   }
@@ -179,21 +211,31 @@
 }
 
 bool UserCollector::GetCreatedCrashDirectory(pid_t pid,
-                                             FilePath *crash_file_path) {
+                                             FilePath *crash_file_path,
+                                             bool *out_of_capacity) {
   FilePath process_path = GetProcessPath(pid);
   std::string status;
+  if (FLAGS_directory_failure_test) {
+    LogCollectionError("Purposefully failing to create spool directory");
+    return false;
+  }
   if (!file_util::ReadFileToString(process_path.Append("status"),
                                    &status)) {
-    logger_->LogError("Could not read status file");
+    LogCollectionError("Could not read status file");
     return false;
   }
   int process_euid;
   if (!GetIdFromStatus(kUserId, kIdEffective, status, &process_euid)) {
-    logger_->LogError("Could not find euid in status file");
+    LogCollectionError("Could not find euid in status file");
     return false;
   }
-  return GetCreatedCrashDirectoryByEuid(process_euid,
-                                        crash_file_path);
+  if (!GetCreatedCrashDirectoryByEuid(process_euid,
+                                      crash_file_path,
+                                      out_of_capacity)) {
+    LogCollectionError("Could not create crash directory");
+    return false;
+  }
+  return true;
 }
 
 bool UserCollector::CopyStdinToCoreFile(const FilePath &core_path) {
@@ -203,7 +245,7 @@
     return true;
   }
 
-  logger_->LogError("Could not write core file");
+  LogCollectionError("Could not write core file");
   // If the file system was full, make sure we remove any remnants.
   file_util::Delete(core_path, false);
   return false;
@@ -259,16 +301,16 @@
     return -1;
   }
   if (!WIFEXITED(status)) {
-    logger_->LogError("Process did not exit normally: %x", status);
+    logger_->LogError("Process did not exit normally: %d", status);
     return -1;
   }
   return WEXITSTATUS(status);
 }
 
-bool UserCollector::ConvertCoreToMinidump(const FilePath &core_path,
-                                          const FilePath &procfs_directory,
-                                          const FilePath &minidump_path,
-                                          const FilePath &temp_directory) {
+bool UserCollector::RunCoreToMinidump(const FilePath &core_path,
+                                      const FilePath &procfs_directory,
+                                      const FilePath &minidump_path,
+                                      const FilePath &temp_directory) {
   FilePath output_path = temp_directory.Append("output");
   std::vector<const char *> core2md_arguments;
   core2md_arguments.push_back(kCoreToMinidumpConverterPath);
@@ -276,83 +318,99 @@
   core2md_arguments.push_back(procfs_directory.value().c_str());
   core2md_arguments.push_back(minidump_path.value().c_str());
 
+  if (FLAGS_core2md_failure_test) {
+    // To test how core2md errors are propagaged, cause an error
+    // by forgetting a required argument.
+    core2md_arguments.pop_back();
+  }
+
   int errorlevel = ForkExecAndPipe(core2md_arguments,
                                    output_path.value().c_str());
 
   std::string output;
   file_util::ReadFileToString(output_path, &output);
   if (errorlevel != 0) {
-    logger_->LogInfo("Problem during %s [result=%d]: %s",
-                     kCoreToMinidumpConverterPath,
-                     errorlevel,
-                     output.c_str());
+    LogCollectionError(StringPrintf("Problem during %s [result=%d]: %s",
+                                    kCoreToMinidumpConverterPath,
+                                    errorlevel,
+                                    output.c_str()));
     return false;
   }
 
   if (!file_util::PathExists(minidump_path)) {
-    logger_->LogError("Minidump file %s was not created",
-                      minidump_path.value().c_str());
+    LogCollectionError(StringPrintf("Minidump file %s was not created",
+                                    minidump_path.value().c_str()));
     return false;
   }
   return true;
 }
 
-bool UserCollector::GenerateDiagnostics(pid_t pid,
-                                        const std::string &exec_name) {
-  FilePath container_dir("/tmp");
-  container_dir = container_dir.Append(
-      StringPrintf("crash_reporter.%d", pid));
-
+bool UserCollector::ConvertCoreToMinidump(pid_t pid,
+                                          const FilePath &container_dir,
+                                          const FilePath &core_path,
+                                          const FilePath &minidump_path) {
   if (!CopyOffProcFiles(pid, container_dir)) {
-    file_util::Delete(container_dir, true);
     return false;
   }
 
-  FilePath crash_path;
-  if (!GetCreatedCrashDirectory(pid, &crash_path)) {
-    file_util::Delete(container_dir, true);
-    return false;
-  }
-  std::string dump_basename = FormatDumpBasename(exec_name, time(NULL), pid);
-  FilePath core_path = crash_path.Append(
-      StringPrintf("%s.core", dump_basename.c_str()));
-
   if (!CopyStdinToCoreFile(core_path)) {
-    file_util::Delete(container_dir, true);
     return false;
   }
 
-  FilePath minidump_path = crash_path.Append(
-      StringPrintf("%s.dmp", dump_basename.c_str()));
-
-  bool conversion_result = true;
-  if (!ConvertCoreToMinidump(core_path,
-                             container_dir,  // procfs directory
-                             minidump_path,
-                             container_dir)) {  // temporary directory
-    // Note we leave the container directory for inspection.
-    conversion_result = false;
-  } else {
-    file_util::Delete(container_dir, true);
-  }
-
-  WriteCrashMetaData(
-      crash_path.Append(
-          StringPrintf("%s.meta", dump_basename.c_str())),
-      exec_name,
-      minidump_path.value());
+  bool conversion_result = RunCoreToMinidump(
+      core_path,
+      container_dir,  // procfs directory
+      minidump_path,
+      container_dir);  // temporary directory
 
   if (conversion_result) {
     logger_->LogInfo("Stored minidump to %s", minidump_path.value().c_str());
   }
 
+  return conversion_result;
+}
+
+bool UserCollector::ConvertAndEnqueueCrash(int pid,
+                                           const std::string &exec,
+                                           bool *out_of_capacity) {
+  FilePath crash_path;
+  if (!GetCreatedCrashDirectory(pid, &crash_path, out_of_capacity)) {
+    LogCollectionError("Unable to find/create process-specific crash path");
+    return false;
+  }
+
+  // Directory like /tmp/crash_reporter.1234 which contains the
+  // procfs entries and other temporary files used during conversion.
+  FilePath container_dir = FilePath("/tmp").Append(
+      StringPrintf("crash_reporter.%d", pid));
+  std::string dump_basename = FormatDumpBasename(exec, time(NULL), pid);
+  FilePath core_path = GetCrashPath(crash_path, dump_basename, "core");
+  FilePath meta_path = GetCrashPath(crash_path, dump_basename, "meta");
+  FilePath minidump_path = GetCrashPath(crash_path, dump_basename, "dmp");
+
+  if (!ConvertCoreToMinidump(pid, container_dir, core_path,
+                            minidump_path)) {
+    logger_->LogInfo("Leaving core file at %s due to conversion error",
+                     core_path.value().c_str());
+    return false;
+  }
+
+  // Here we commit to sending this file.  We must not return false
+  // after this point or we will generate a log report as well as a
+  // crash report.
+  WriteCrashMetaData(meta_path,
+                     exec,
+                     minidump_path.value());
+
   if (!file_util::PathExists(FilePath(kLeaveCoreFile))) {
     file_util::Delete(core_path, false);
   } else {
-    logger_->LogInfo("Leaving core file at %s", core_path.value().c_str());
+    logger_->LogInfo("Leaving core file at %s due to developer image",
+                     core_path.value().c_str());
   }
 
-  return conversion_result;
+  file_util::Delete(container_dir, true);
+  return true;
 }
 
 bool UserCollector::HandleCrash(int signal, int pid, const char *force_exec) {
@@ -368,14 +426,20 @@
   bool feedback = is_feedback_allowed_function_();
   logger_->LogWarning("Received crash notification for %s[%d] sig %d (%s)",
                       exec.c_str(), pid, signal,
-                      feedback ? "handling" : "ignoring");
+                      feedback ? "handling" : "ignoring - no consent");
 
   if (feedback) {
     count_crash_function_();
 
     if (generate_diagnostics_) {
-      return GenerateDiagnostics(pid, exec);
+      bool out_of_capacity = false;
+      if (!ConvertAndEnqueueCrash(pid, exec, &out_of_capacity)) {
+        if (!out_of_capacity)
+          EnqueueCollectionErrorLog(pid, exec);
+        return false;
+      }
     }
   }
+
   return true;
 }
diff --git a/crash_reporter/user_collector.h b/crash_reporter/user_collector.h
index 7d6c711..2ed8e6f 100644
--- a/crash_reporter/user_collector.h
+++ b/crash_reporter/user_collector.h
@@ -82,27 +82,40 @@
                        IdKind kind,
                        const std::string &status_contents,
                        int *id);
+
+  void LogCollectionError(const std::string &error_message);
+  void EnqueueCollectionErrorLog(pid_t pid, const std::string &exec_name);
+
   bool CopyOffProcFiles(pid_t pid, const FilePath &process_map);
   // Determines the crash directory for given pid based on pid's owner,
   // and creates the directory if necessary with appropriate permissions.
   // Returns true whether or not directory needed to be created, false on
   // any failure.
   bool GetCreatedCrashDirectory(pid_t pid,
-                                FilePath *crash_file_path);
+                                FilePath *crash_file_path,
+                                bool *out_of_capacity);
   bool CopyStdinToCoreFile(const FilePath &core_path);
   int ForkExecAndPipe(std::vector<const char *> &arguments,
                       const char *output_file);
-  bool ConvertCoreToMinidump(const FilePath &core_path,
-                             const FilePath &procfs_directory,
-                             const FilePath &minidump_path,
-                             const FilePath &temp_directory);
-  bool GenerateDiagnostics(pid_t pid, const std::string &exec_name);
+  bool RunCoreToMinidump(const FilePath &core_path,
+                         const FilePath &procfs_directory,
+                         const FilePath &minidump_path,
+                         const FilePath &temp_directory);
+  bool ConvertCoreToMinidump(pid_t pid,
+                             const FilePath &container_dir,
+                             const FilePath &core_path,
+                             const FilePath &minidump_path);
+  bool ConvertAndEnqueueCrash(int pid, const std::string &exec_name,
+                              bool *out_of_capacity);
 
   bool generate_diagnostics_;
   std::string core_pattern_file_;
   std::string our_path_;
   bool initialized_;
 
+  // String containing the current log of crash handling errors.
+  std::string error_log_;
+
   static const char *kUserId;
   static const char *kGroupId;
 };