Retry^2 "[llvm-profdata] Add option to ingest filepaths from a file"

Changes since the initial commit:
- Normalize file paths read from the file to prevent Windows path
  separators from escaping parts of the path.
- Since we need to store the normalized file paths in WeightedFile,
  don't do tricky things to keep the source MemoryBuffer alive.
- Don't use list-initialization for a std::string in WeightedFile.

Differential Revision: http://reviews.llvm.org/D20980

llvm-svn: 271953
diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp
index 20a1672..b5966a4 100644
--- a/llvm/tools/llvm-profdata/llvm-profdata.cpp
+++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp
@@ -108,12 +108,12 @@
 }
 
 struct WeightedFile {
-  StringRef Filename;
+  std::string Filename;
   uint64_t Weight;
 
   WeightedFile() {}
 
-  WeightedFile(StringRef F, uint64_t W) : Filename{F}, Weight{W} {}
+  WeightedFile(std::string F, uint64_t W) : Filename(F), Weight(W) {}
 };
 typedef SmallVector<WeightedFile, 5> WeightedFileVector;
 
@@ -208,19 +208,53 @@
   Writer->write(ProfileMap);
 }
 
+static std::string canonicalizeFilePath(StringRef Filename) {
+  SmallString<256> CanonicalFilename;
+  sys::path::native(Filename, CanonicalFilename);
+  return StringRef(CanonicalFilename).str();
+}
+
 static WeightedFile parseWeightedFile(const StringRef &WeightedFilename) {
-  StringRef WeightStr, FileName;
-  std::tie(WeightStr, FileName) = WeightedFilename.split(',');
+  StringRef WeightStr, FilenameStr;
+  std::tie(WeightStr, FilenameStr) = WeightedFilename.split(',');
 
   uint64_t Weight;
   if (WeightStr.getAsInteger(10, Weight) || Weight < 1)
     exitWithError("Input weight must be a positive integer.");
 
-  if (!sys::fs::exists(FileName))
-    exitWithErrorCode(make_error_code(errc::no_such_file_or_directory),
-                      FileName);
+  std::string CanonicalFilename = canonicalizeFilePath(FilenameStr);
 
-  return WeightedFile(FileName, Weight);
+  if (!sys::fs::exists(CanonicalFilename))
+    exitWithErrorCode(make_error_code(errc::no_such_file_or_directory),
+                      CanonicalFilename);
+
+  return WeightedFile(StringRef(CanonicalFilename).str(), Weight);
+}
+
+static void parseInputFilenamesFile(const StringRef &InputFilenamesFile,
+                                    WeightedFileVector &WFV) {
+  if (InputFilenamesFile == "")
+    return;
+
+  auto BufOrError = MemoryBuffer::getFileOrSTDIN(InputFilenamesFile);
+  if (!BufOrError)
+    exitWithErrorCode(BufOrError.getError(), InputFilenamesFile);
+
+  auto Buffer = std::move(*BufOrError);
+  StringRef Data = Buffer->getBuffer();
+  SmallVector<StringRef, 8> Entries;
+  Data.split(Entries, '\n', /*MaxSplit=*/-1, /*KeepEmpty=*/false);
+  for (const StringRef &FileWeightEntry : Entries) {
+    StringRef SanitizedEntry = FileWeightEntry.trim(" \t\v\f\r");
+    // Skip comments.
+    if (SanitizedEntry.startswith("#"))
+      continue;
+    // If there's no comma, it's an unweighted profile.
+    else if (SanitizedEntry.find(',') == StringRef::npos)
+      WFV.emplace_back(canonicalizeFilePath(SanitizedEntry), 1);
+    else
+      WFV.emplace_back(parseWeightedFile(SanitizedEntry));
+  }
 }
 
 static int merge_main(int argc, const char *argv[]) {
@@ -228,6 +262,15 @@
                                        cl::desc("<filename...>"));
   cl::list<std::string> WeightedInputFilenames("weighted-input",
                                                cl::desc("<weight>,<filename>"));
+  cl::opt<std::string> InputFilenamesFile(
+      "input-files", cl::init(""),
+      cl::desc("Path to file containing newline-separated "
+               "[<weight>,]<filename> entries"));
+  cl::alias InputFilenamesFileA("f", cl::desc("Alias for --input-files"),
+                                cl::aliasopt(InputFilenamesFile));
+  cl::opt<bool> DumpInputFileList(
+      "dump-input-file-list", cl::init(false), cl::Hidden,
+      cl::desc("Dump the list of input files and their weights, then exit"));
   cl::opt<std::string> OutputFilename("output", cl::value_desc("output"),
                                       cl::init("-"), cl::Required,
                                       cl::desc("Output file"));
@@ -249,15 +292,22 @@
 
   cl::ParseCommandLineOptions(argc, argv, "LLVM profile data merger\n");
 
-  if (InputFilenames.empty() && WeightedInputFilenames.empty())
+  WeightedFileVector WeightedInputs;
+  for (StringRef Filename : InputFilenames)
+    WeightedInputs.emplace_back(canonicalizeFilePath(Filename), 1);
+  for (StringRef WeightedFilename : WeightedInputFilenames)
+    WeightedInputs.emplace_back(parseWeightedFile(WeightedFilename));
+  parseInputFilenamesFile(InputFilenamesFile, WeightedInputs);
+
+  if (WeightedInputs.empty())
     exitWithError("No input files specified. See " +
                   sys::path::filename(argv[0]) + " -help");
 
-  WeightedFileVector WeightedInputs;
-  for (StringRef Filename : InputFilenames)
-    WeightedInputs.push_back(WeightedFile(Filename, 1));
-  for (StringRef WeightedFilename : WeightedInputFilenames)
-    WeightedInputs.push_back(parseWeightedFile(WeightedFilename));
+  if (DumpInputFileList) {
+    for (auto &WF : WeightedInputs)
+      outs() << WF.Weight << "," << WF.Filename << "\n";
+    return 0;
+  }
 
   if (ProfileKind == instr)
     mergeInstrProfile(WeightedInputs, OutputFilename, OutputFormat,