Improve error diagnostics in UserCollector.

This CL makes the following changes to improve error diagnostics in
UserCollector:
1. Add an error type to describe why the crash reporter fails to
   collects a user crash, which is reported to the crash server along
   with the "crash_reporter-user-collection" error signature.
2. Perform some preliminary validations of the core file before
   converting it to a minidump file using core2md.
3. Identify the case when a 32-bit core file is produced on a 64-bit
   platform as core2md currently cannot handle 32-bit core files on
   a 64-bit platform.

BUG=chromium-os:7871
TEST=Tested the following:
1. emerge-{x86,amd64,arm}-generic crash-reporter
2. FEATURES="test" emerge-{x86,amd64,arm}-generic crash-reporter
3. Run the following autotest tests on x86-mario and stumpy64:
   - logging_CrashSender
   - logging_UserCrash

Change-Id: Ib50b4cc81a91f7cd75f9440005200d4027ce1f6f
Reviewed-on: https://gerrit.chromium.org/gerrit/17166
Reviewed-by: Michael Krebs <mkrebs@chromium.org>
Tested-by: Ben Chan <benchan@chromium.org>
Commit-Ready: Ben Chan <benchan@chromium.org>
diff --git a/crash_reporter/user_collector.cc b/crash_reporter/user_collector.cc
index 308fe9a..28c5ce6 100644
--- a/crash_reporter/user_collector.cc
+++ b/crash_reporter/user_collector.cc
@@ -4,6 +4,9 @@
 
 #include "crash-reporter/user_collector.h"
 
+#include <bits/wordsize.h>
+#include <elf.h>
+#include <fcntl.h>
 #include <grp.h>  // For struct group.
 #include <pcrecpp.h>
 #include <pwd.h>  // For struct passwd.
@@ -67,6 +70,25 @@
 UserCollector::~UserCollector() {
 }
 
+std::string UserCollector::GetErrorTypeSignature(ErrorType error_type) const {
+  switch (error_type) {
+    case kErrorSystemIssue:
+      return "system-issue";
+    case kErrorReadCoreData:
+      return "read-core-data";
+    case kErrorUnusableProcFiles:
+      return "unusable-proc-files";
+    case kErrorInvalidCoreFile:
+      return "invalid-core-file";
+    case kErrorUnsupported32BitCoreFile:
+      return "unsupported-32bit-core-file";
+    case kErrorCore2MinidumpConversion:
+      return "core2md-conversion";
+    default:
+      return "";
+  }
+}
+
 std::string UserCollector::GetPattern(bool enabled) const {
   if (enabled) {
     // Combine the three crash attributes into one parameter to try to reduce
@@ -210,6 +232,7 @@
 }
 
 void UserCollector::EnqueueCollectionErrorLog(pid_t pid,
+                                              ErrorType error_type,
                                               const std::string &exec) {
   FilePath crash_path;
   LOG(INFO) << "Writing conversion problems as separate crash report.";
@@ -237,6 +260,7 @@
   // might have created.
   WriteNewFile(log_path, error_log.data(), error_log.length());
   AddCrashMetaData("sig", kCollectionErrorSignature);
+  AddCrashMetaData("error_type", GetErrorTypeSignature(error_type));
   WriteCrashMetaData(meta_path, exec, log_path.value());
 }
 
@@ -268,7 +292,7 @@
   return true;
 }
 
-bool UserCollector::ValidateProcFiles(const FilePath &container_dir) {
+bool UserCollector::ValidateProcFiles(const FilePath &container_dir) const {
   // Check if the maps file is empty, which could be due to the crashed
   // process being reaped by the kernel before finishing a core dump.
   int64 file_size = 0;
@@ -283,6 +307,41 @@
   return true;
 }
 
+UserCollector::ErrorType UserCollector::ValidateCoreFile(
+    const FilePath &core_path) const {
+  int fd = HANDLE_EINTR(open(core_path.value().c_str(), O_RDONLY));
+  if (fd < 0) {
+    LOG(ERROR) << "Could not open core file " << core_path.value();
+    return kErrorInvalidCoreFile;
+  }
+
+  char e_ident[EI_NIDENT];
+  bool read_ok = file_util::ReadFromFD(fd, e_ident, sizeof(e_ident));
+  HANDLE_EINTR(close(fd));
+  if (!read_ok) {
+    LOG(ERROR) << "Could not read header of core file";
+    return kErrorInvalidCoreFile;
+  }
+
+  if (e_ident[EI_MAG0] != ELFMAG0 || e_ident[EI_MAG1] != ELFMAG1 ||
+      e_ident[EI_MAG2] != ELFMAG2 || e_ident[EI_MAG3] != ELFMAG3) {
+    LOG(ERROR) << "Invalid core file";
+    return kErrorInvalidCoreFile;
+  }
+
+#if __WORDSIZE == 64
+  // TODO(benchan, mkrebs): Remove this check once core2md can
+  // handles both 32-bit and 64-bit ELF on a 64-bit platform.
+  if (e_ident[EI_CLASS] == ELFCLASS32) {
+    LOG(ERROR) << "Conversion of 32-bit core file on 64-bit platform is "
+               << "currently not supported";
+    return kErrorUnsupported32BitCoreFile;
+  }
+#endif
+
+  return kErrorNone;
+}
+
 bool UserCollector::GetCreatedCrashDirectory(pid_t pid,
                                              FilePath *crash_file_path,
                                              bool *out_of_capacity) {
@@ -373,10 +432,11 @@
   return true;
 }
 
-bool UserCollector::ConvertCoreToMinidump(pid_t pid,
-                                          const FilePath &container_dir,
-                                          const FilePath &core_path,
-                                          const FilePath &minidump_path) {
+UserCollector::ErrorType UserCollector::ConvertCoreToMinidump(
+    pid_t pid,
+    const FilePath &container_dir,
+    const FilePath &core_path,
+    const FilePath &minidump_path) {
   // If proc files are unuable, we continue to read the core file from stdin,
   // but only skip the core-to-minidump conversion, so that we may still use
   // the core file for debugging.
@@ -384,35 +444,37 @@
       CopyOffProcFiles(pid, container_dir) && ValidateProcFiles(container_dir);
 
   if (!CopyStdinToCoreFile(core_path)) {
-    return false;
+    return kErrorReadCoreData;
   }
 
   if (!proc_files_usable) {
     LOG(INFO) << "Skipped converting core file to minidump due to "
               << "unusable proc files";
-    return false;
+    return kErrorUnusableProcFiles;
   }
 
-  bool conversion_result = RunCoreToMinidump(
-      core_path,
-      container_dir,  // procfs directory
-      minidump_path,
-      container_dir);  // temporary directory
-
-  if (conversion_result) {
-    LOG(INFO) << "Stored minidump to " << minidump_path.value();
+  ErrorType error = ValidateCoreFile(core_path);
+  if (error != kErrorNone) {
+    return error;
   }
 
-  return conversion_result;
+  if (!RunCoreToMinidump(core_path,
+                         container_dir,  // procfs directory
+                         minidump_path,
+                         container_dir)) {  // temporary directory
+    return kErrorCore2MinidumpConversion;
+  }
+
+  LOG(INFO) << "Stored minidump to " << minidump_path.value();
+  return kErrorNone;
 }
 
-bool UserCollector::ConvertAndEnqueueCrash(int pid,
-                                           const std::string &exec,
-                                           bool *out_of_capacity) {
+UserCollector::ErrorType UserCollector::ConvertAndEnqueueCrash(
+    int pid, const std::string &exec, bool *out_of_capacity) {
   FilePath crash_path;
   if (!GetCreatedCrashDirectory(pid, &crash_path, out_of_capacity)) {
     LOG(ERROR) << "Unable to find/create process-specific crash path";
-    return false;
+    return kErrorSystemIssue;
   }
 
   // Directory like /tmp/crash_reporter/1234 which contains the
@@ -431,11 +493,12 @@
   if (GetLogContents(FilePath(kDefaultLogConfig), exec, log_path))
     AddCrashMetaData("log", log_path.value());
 
-  if (!ConvertCoreToMinidump(pid, container_dir, core_path,
-                            minidump_path)) {
+  ErrorType error_type =
+      ConvertCoreToMinidump(pid, container_dir, core_path, minidump_path);
+  if (error_type != kErrorNone) {
     LOG(INFO) << "Leaving core file at " << core_path.value()
               << " due to conversion error";
-    return false;
+    return error_type;
   }
 
   // Here we commit to sending this file.  We must not return false
@@ -453,7 +516,7 @@
   }
 
   file_util::Delete(container_dir, true);
-  return true;
+  return kErrorNone;
 }
 
 bool UserCollector::ParseCrashAttributes(const std::string &crash_attributes,
@@ -546,11 +609,11 @@
 
     if (generate_diagnostics_) {
       bool out_of_capacity = false;
-      bool convert_and_enqueue_result =
+      ErrorType error_type =
           ConvertAndEnqueueCrash(pid, exec, &out_of_capacity);
-      if (!convert_and_enqueue_result) {
+      if (error_type != kErrorNone) {
         if (!out_of_capacity)
-          EnqueueCollectionErrorLog(pid, exec);
+          EnqueueCollectionErrorLog(pid, error_type, exec);
         return false;
       }
     }
diff --git a/crash_reporter/user_collector.h b/crash_reporter/user_collector.h
index 043d38c..01ca2af 100644
--- a/crash_reporter/user_collector.h
+++ b/crash_reporter/user_collector.h
@@ -69,6 +69,7 @@
   FRIEND_TEST(UserCollectorTest, ShouldDumpDeveloperImageOverridesConsent);
   FRIEND_TEST(UserCollectorTest, ShouldDumpUseConsentProductionImage);
   FRIEND_TEST(UserCollectorTest, ValidateProcFiles);
+  FRIEND_TEST(UserCollectorTest, ValidateCoreFile);
 
   // Enumeration to pass to GetIdFromStatus.  Must match the order
   // that the kernel lists IDs in the status file.
@@ -80,8 +81,23 @@
     kIdMax
   };
 
+  enum ErrorType {
+    kErrorNone,
+    kErrorSystemIssue,
+    kErrorReadCoreData,
+    kErrorUnusableProcFiles,
+    kErrorInvalidCoreFile,
+    kErrorUnsupported32BitCoreFile,
+    kErrorCore2MinidumpConversion,
+  };
+
   static const int kForkProblem = 255;
 
+  // Returns an error type signature for a given |error_type| value,
+  // which is reported to the crash server along with the
+  // crash_reporter-user-collection signature.
+  std::string GetErrorTypeSignature(ErrorType error_type) const;
+
   std::string GetPattern(bool enabled) const;
   bool SetUpInternal(bool enabled);
 
@@ -110,7 +126,8 @@
                           std::string *state);
 
   void LogCollectionError(const std::string &error_message);
-  void EnqueueCollectionErrorLog(pid_t pid, const std::string &exec_name);
+  void EnqueueCollectionErrorLog(pid_t pid, ErrorType error_type,
+                                 const std::string &exec_name);
 
   bool CopyOffProcFiles(pid_t pid, const FilePath &container_dir);
 
@@ -119,7 +136,14 @@
   // a process is reaped by the kernel before the copying of its proc files
   // takes place, some proc files like /proc/<pid>/maps may contain nothing
   // and thus become unusable.
-  bool ValidateProcFiles(const FilePath &container_dir);
+  bool ValidateProcFiles(const FilePath &container_dir) const;
+
+  // Validates the core file at |core_path| and returns kErrorNone if
+  // the file contains the ELF magic bytes and an ELF class that matches the
+  // platform (i.e. 32-bit ELF on a 32-bit platform or 64-bit ELF on a 64-bit
+  // platform), which is due to the limitation in core2md. It returns an error
+  // type otherwise.
+  ErrorType ValidateCoreFile(const FilePath &core_path) const;
 
   // Determines the crash directory for given pid based on pid's owner,
   // and creates the directory if necessary with appropriate permissions.
@@ -133,12 +157,12 @@
                          const FilePath &procfs_directory,
                          const FilePath &minidump_path,
                          const FilePath &temp_directory);
-  bool ConvertCoreToMinidump(pid_t pid,
-                             const FilePath &container_dir,
-                             const FilePath &core_path,
-                             const FilePath &minidump_path);
-  bool ConvertAndEnqueueCrash(int pid, const std::string &exec_name,
-                              bool *out_of_capacity);
+  ErrorType ConvertCoreToMinidump(pid_t pid,
+                                  const FilePath &container_dir,
+                                  const FilePath &core_path,
+                                  const FilePath &minidump_path);
+  ErrorType ConvertAndEnqueueCrash(int pid, const std::string &exec_name,
+                                   bool *out_of_capacity);
   bool ParseCrashAttributes(const std::string &crash_attributes,
                             pid_t *pid, int *signal,
                             std::string *kernel_supplied_name);
diff --git a/crash_reporter/user_collector_test.cc b/crash_reporter/user_collector_test.cc
index 4259f4e..2d3ac0c 100644
--- a/crash_reporter/user_collector_test.cc
+++ b/crash_reporter/user_collector_test.cc
@@ -2,6 +2,8 @@
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 
+#include <bits/wordsize.h>
+#include <elf.h>
 #include <unistd.h>
 
 #include "base/file_util.h"
@@ -432,6 +434,53 @@
   EXPECT_TRUE(collector_.ValidateProcFiles(container_dir));
 }
 
+TEST_F(UserCollectorTest, ValidateCoreFile) {
+  ScopedTempDir temp_dir;
+  ASSERT_TRUE(temp_dir.CreateUniqueTempDir());
+  FilePath container_dir = temp_dir.path();
+  FilePath core_file = container_dir.Append("core");
+
+  // Core file does not exist
+  EXPECT_EQ(UserCollector::kErrorInvalidCoreFile,
+            collector_.ValidateCoreFile(core_file));
+  char e_ident[EI_NIDENT];
+  e_ident[EI_MAG0] = ELFMAG0;
+  e_ident[EI_MAG1] = ELFMAG1;
+  e_ident[EI_MAG2] = ELFMAG2;
+  e_ident[EI_MAG3] = ELFMAG3;
+#if __WORDSIZE == 32
+  e_ident[EI_CLASS] = ELFCLASS32;
+#elif __WORDSIZE == 64
+  e_ident[EI_CLASS] = ELFCLASS64;
+#else
+#error Unknown/unsupported value of __WORDSIZE.
+#endif
+
+  // Core file has the expected header
+  ASSERT_TRUE(file_util::WriteFile(core_file, e_ident, sizeof(e_ident)));
+  EXPECT_EQ(UserCollector::kErrorNone,
+            collector_.ValidateCoreFile(core_file));
+
+#if __WORDSIZE == 64
+  // 32-bit core file on 64-bit platform
+  e_ident[EI_CLASS] = ELFCLASS32;
+  ASSERT_TRUE(file_util::WriteFile(core_file, e_ident, sizeof(e_ident)));
+  EXPECT_EQ(UserCollector::kErrorUnsupported32BitCoreFile,
+            collector_.ValidateCoreFile(core_file));
+  e_ident[EI_CLASS] = ELFCLASS64;
+#endif
+
+  // Invalid core files
+  ASSERT_TRUE(file_util::WriteFile(core_file, e_ident, sizeof(e_ident) - 1));
+  EXPECT_EQ(UserCollector::kErrorInvalidCoreFile,
+            collector_.ValidateCoreFile(core_file));
+
+  e_ident[EI_MAG0] = 0;
+  ASSERT_TRUE(file_util::WriteFile(core_file, e_ident, sizeof(e_ident)));
+  EXPECT_EQ(UserCollector::kErrorInvalidCoreFile,
+            collector_.ValidateCoreFile(core_file));
+}
+
 int main(int argc, char **argv) {
   SetUpTests(&argc, argv, false);
   return RUN_ALL_TESTS();