Initial import of ThreadSanitizer.

https://data-race-test.googlecode.com/svn/trunk@3163

Change-Id: I8a134064048ae808e3c68b2eae10b795e15b9459
diff --git a/tsan/ts_race_verifier.cc b/tsan/ts_race_verifier.cc
new file mode 100644
index 0000000..a2e9fd6
--- /dev/null
+++ b/tsan/ts_race_verifier.cc
@@ -0,0 +1,445 @@
+/* Copyright (c) 2008-2010, Google Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *     * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+// This file is part of ThreadSanitizer, a dynamic data race detector.
+// Author: Evgeniy Stepanov.
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <vector>
+#include <set>
+#include <iterator>
+#include <algorithm>
+
+#include "ts_lock.h"
+#include "ts_util.h"
+#include "ts_race_verifier.h"
+#include "thread_sanitizer.h"
+
+struct PossibleRace {
+  PossibleRace() : pc(0), reported(false) {}
+  // racy instruction
+  uintptr_t pc;
+  // concurrent traces
+  vector<uintptr_t> traces;
+  // report text
+  string report;
+  // whether this race has already been reported
+  bool reported;
+};
+
+// pc -> race
+static map<uintptr_t, PossibleRace*>* races_map;
+
+// Data about a call site.
+struct CallSite {
+  int thread_id;
+  uintptr_t pc;
+};
+
+struct TypedCallSites {
+  vector<CallSite> reads;
+  vector<CallSite> writes;
+};
+
+// data address -> ([write callsites], [read callsites])
+typedef map<uintptr_t, TypedCallSites> AddressMap;
+
+static TSLock racecheck_lock;
+static AddressMap* racecheck_map;
+// data addresses that are ignored (they have already been reported)
+static set<uintptr_t>* ignore_addresses;
+
+// starting pc of the trace -> visit count
+// used to reduce the sleep time for hot traces
+typedef map<uintptr_t, int> VisitCountMap;
+static VisitCountMap* visit_count_map;
+
+static int n_reports;
+
+/**
+ * Given max and min pc of a trace (both inclusive!), returns whether this trace
+ * is interesting to us at all (via the return value), and whether it should be
+ * instrumented fully (*instrument_pc=0), or 1 instruction only. In the latter
+ * case, *instrument_pc contains the address of the said instruction.
+ */
+bool RaceVerifierGetAddresses(uintptr_t min_pc, uintptr_t max_pc,
+    uintptr_t* instrument_pc) {
+  uintptr_t pc = 0;
+  for (map<uintptr_t, PossibleRace*>::iterator it = races_map->begin();
+       it != races_map->end(); ++it) {
+    PossibleRace* race = it->second;
+    if (race->reported)
+      continue;
+    if (race->pc >= min_pc && race->pc <= max_pc) {
+      if (pc) {
+        // Two race candidates in one trace. Just instrument it fully.
+        *instrument_pc = 0;
+        return true;
+      }
+      pc = race->pc;
+    }
+    for (vector<uintptr_t>::iterator it2 = race->traces.begin();
+         it2 != race->traces.end(); ++it2) {
+      if (*it2 >= min_pc && *it2 <= max_pc) {
+        *instrument_pc = 0;
+        return true;
+      }
+    }
+  }
+  *instrument_pc = pc;
+  return !!pc;
+}
+
+static void UpdateSummary() {
+  if (!G_flags->summary_file.empty()) {
+    char buff[100];
+    snprintf(buff, sizeof(buff),
+	     "RaceVerifier: %d report(s) verified\n", n_reports);
+    // We overwrite the contents of this file with the new summary.
+    // We don't do that at the end because even if we crash later
+    // we will already have the summary.
+    OpenFileWriteStringAndClose(G_flags->summary_file, buff);
+  }
+}
+
+/* Build and print a race report for a data address. Does not print stack traces
+   and symbols and all the fancy stuff - we don't have that info. Used when we
+   don't have a ready report - for unexpected races and for
+   --race-verifier-extra races.
+
+   racecheck_lock must be held by the current thread.
+*/
+static void PrintRaceReportEmpty(uintptr_t addr) {
+  TypedCallSites* typedCallSites = &(*racecheck_map)[addr];
+  vector<CallSite>& writes = typedCallSites->writes;
+  vector<CallSite>& reads = typedCallSites->reads;
+  for (vector<CallSite>::const_iterator it = writes.begin();
+       it != writes.end(); ++ it) {
+    Printf("  write at %p\n", it->pc);
+  }
+  for (vector<CallSite>::const_iterator it = reads.begin();
+       it != reads.end(); ++ it) {
+    Printf("  read at %p\n", it->pc);
+  }
+}
+
+/* Find a PossibleRace that matches current accesses (racecheck_map) to the
+   given data address.
+
+   racecheck_lock must be held by the current thread.
+ */
+static PossibleRace* FindRaceForAddr(uintptr_t addr) {
+  TypedCallSites* typedCallSites = &(*racecheck_map)[addr];
+  vector<CallSite>& writes = typedCallSites->writes;
+  vector<CallSite>& reads = typedCallSites->reads;
+  for (vector<CallSite>::const_iterator it = writes.begin();
+       it != writes.end(); ++ it) {
+    map<uintptr_t, PossibleRace*>::iterator it2 = races_map->find(it->pc);
+    if (it2 != races_map->end())
+      return it2->second;
+  }
+  for (vector<CallSite>::const_iterator it = reads.begin();
+       it != reads.end(); ++ it) {
+    map<uintptr_t, PossibleRace*>::iterator it2 = races_map->find(it->pc);
+    if (it2 != races_map->end())
+      return it2->second;
+  }
+  return NULL;
+}
+
+/* Prints a race report for the given data address, either finding one in a
+   matching PossibleRace, or just printing pc's of the mops.
+
+   racecheck_lock must be held by the current thread.
+*/
+static void PrintRaceReport(uintptr_t addr) {
+  PossibleRace* race = FindRaceForAddr(addr);
+  if (race) {
+    ExpectedRace* expected_race = ThreadSanitizerFindExpectedRace(addr);
+    if (expected_race)
+      expected_race->count++;
+    bool is_expected = !!expected_race;
+    bool is_unverifiable = is_expected && !expected_race->is_verifiable;
+
+    if (is_expected && !is_unverifiable && !G_flags->show_expected_races)
+      return;
+
+    if (is_unverifiable)
+      Printf("WARNING: Confirmed a race that was marked as UNVERIFIABLE:\n");
+    else
+      Printf("WARNING: Confirmed a race:\n");
+    const string& report = race->report;
+    if (report.empty()) {
+      PrintRaceReportEmpty(addr);
+    } else {
+      Printf("%s", report.c_str());
+    }
+    // Suppress future reports for this race.
+    race->reported = true;
+    ignore_addresses->insert(addr);
+
+    n_reports++;
+  } else {
+    Printf("Warning: unexpected race found!\n");
+    PrintRaceReportEmpty(addr);
+
+    n_reports ++;
+  }
+  UpdateSummary();
+}
+
+/**
+ * This function is called before the mop delay.
+ * @param thread_id Thread id.
+ * @param addr Data address.
+ * @param pc Instruction pc.
+ * @param is_w Whether this is a write (true) or a read (false).
+ * @return True if this access is interesting to us at all. If true, the caller
+ *     should delay and then call RaceVerifierEndAccess. If false, it should do
+ *     nothing more for this mop.
+ */
+bool RaceVerifierStartAccess(int thread_id, uintptr_t addr, uintptr_t pc,
+    bool is_w) {
+  CallSite callSite;
+  callSite.thread_id = thread_id;
+  callSite.pc = pc;
+  racecheck_lock.Lock();
+
+  if (debug_race_verifier)
+    Printf("[%d] pc %p %s addr %p start\n", thread_id, pc,
+        is_w ? "write" : "read", addr);
+
+  if (ignore_addresses->count(addr)) {
+    racecheck_lock.Unlock();
+    return false;
+  }
+
+  TypedCallSites* typedCallSites = &(*racecheck_map)[addr];
+  vector<CallSite>& writes = typedCallSites->writes;
+  vector<CallSite>& reads = typedCallSites->reads;
+  (is_w ? writes : reads).push_back(callSite);
+  if (writes.size() > 0 && writes.size() + reads.size() > 1) {
+    bool is_race = false;
+    for (size_t i = 0; !is_race && i < writes.size(); ++i) {
+      for (size_t j = 0; !is_race && j < writes.size(); ++j)
+        if (writes[i].thread_id != writes[j].thread_id)
+          is_race = true;
+      for (size_t j = 0; !is_race && j < reads.size(); ++j)
+        if (writes[i].thread_id != reads[j].thread_id)
+          is_race = true;
+    }
+    if (is_race)
+      PrintRaceReport(addr);
+  }
+  racecheck_lock.Unlock();
+  return true;
+}
+
+/* This function is called after the mop delay, only if RaceVerifierStartAccess
+   returned true. The arguments are exactly the same. */
+void RaceVerifierEndAccess(int thread_id, uintptr_t addr, uintptr_t pc,
+    bool is_w) {
+  racecheck_lock.Lock();
+
+  if (debug_race_verifier)
+    Printf("[%d] pc %p %s addr %p end\n", thread_id, pc,
+        is_w ? "write" : "read", addr);
+  if (ignore_addresses->count(addr)) {
+    racecheck_lock.Unlock();
+    return;
+  }
+
+  TypedCallSites* typedCallSites = &(*racecheck_map)[addr];
+  vector<CallSite>& vec =
+      is_w ? typedCallSites->writes : typedCallSites->reads;
+  for (int i = vec.size() - 1; i >= 0; --i) {
+    if (vec[i].thread_id == thread_id) {
+      vec.erase(vec.begin() + i);
+      break;
+    }
+  }
+  racecheck_lock.Unlock();
+}
+
+/* Parse a race description that appears in TSan logs after the words
+   "Race verifier data: ", not including the said words. It looks like
+   "pc,trace[,trace]...", without spaces. */
+static PossibleRace* ParseRaceInfo(const string& raceInfo) {
+  PossibleRace* race = new PossibleRace();
+  const char* p = raceInfo.c_str();
+  while (true) {
+    char* end;
+    uintptr_t addr = my_strtol(p, &end, 16);
+    if (p == end) {
+      Printf("Parse error: %s\n", p);
+      exit(1);
+    }
+    if (!race->pc)
+      race->pc = addr;
+    else
+      race->traces.push_back(addr);
+    while (*end == '\n' || *end == '\r')
+      ++end;
+    if (*end == '\0') {
+      // raceInfo already ends with \n
+      Printf("Possible race: %s", raceInfo.c_str());
+      return race;
+    }
+    if (*end != ',') {
+      Printf("Parse error: comma expected: %s\n", end);
+      delete race;
+      return NULL;
+    }
+    p = end + 1;
+  }
+}
+
+/* Parse a race description and add it to races_map. */
+static void RaceVerifierParseRaceInfo(const string& raceInfo) {
+  PossibleRace* race = ParseRaceInfo(raceInfo);
+  if (race)
+    (*races_map)[race->pc] = race;
+  else
+    Printf("Bad raceInfo: %s\n", raceInfo.c_str());
+}
+
+
+class StringStream {
+ public:
+  StringStream(const string &s) : s_(s), data_(s.c_str()), p_(data_) {}
+
+  bool Eof() {
+    return !*p_;
+  }
+
+  string NextLine() {
+    const char* first = p_;
+    while (*p_ && *p_ != '\n') {
+      ++p_;
+    }
+    if (*p_)
+      ++p_;
+    return string(first, p_ - first);
+  }
+
+ private:
+  const string& s_;
+  const char* data_;
+  const char* p_;
+};
+
+/* Parse a TSan log and add all race verifier info's from it to our storage of
+   possible races. */
+static void RaceVerifierParseFile(const string& fileName) {
+  Printf("Reading race data from %s\n", fileName.c_str());
+  const string RACEINFO_MARKER = "Race verifier data: ";
+  string log = ReadFileToString(fileName, true /* die_if_failed */);
+  StringStream ss(log);
+  string* desc = NULL;
+  int count = 0;
+  while (!ss.Eof()) {
+    string line = ss.NextLine();
+    size_t pos;
+    if ((line.find("WARNING: Possible data race during") !=
+            string::npos) ||
+        (line.find("WARNING: Expected data race during") !=
+            string::npos)) {
+      desc = new string();
+      (*desc) += line;
+    } else if ((pos = line.find(RACEINFO_MARKER)) != string::npos) {
+      pos += RACEINFO_MARKER.size();
+      string raceInfo = line.substr(pos);
+      PossibleRace* race = ParseRaceInfo(raceInfo);
+      (*desc) += "}}}\n";
+      race->report = *desc;
+      (*races_map)[race->pc] = race;
+      count ++;
+      delete desc;
+      desc = NULL;
+    } else if (desc) {
+      (*desc) += line;
+    }
+  }
+  Printf("Got %d possible races\n", count);
+}
+
+/**
+ * Return the time to sleep for the given trace.
+ * @param trace_pc The starting pc of the trace.
+ * @return Time to sleep in ms, or 0 if this trace should be ignored.
+ */
+int RaceVerifierGetSleepTime(uintptr_t trace_pc) {
+  racecheck_lock.Lock();
+  int visit_count = ++(*visit_count_map)[trace_pc];
+  int tm;
+  if (visit_count < 20) {
+    tm = G_flags->race_verifier_sleep_ms;
+  } else if (visit_count < 200) {
+    tm = G_flags->race_verifier_sleep_ms / 10;
+  } else {
+    tm = 0;
+  }
+  if (debug_race_verifier) {
+    if (visit_count == 20) {
+      Printf("RaceVerifier: Trace %x: sleep time reduced.\n", trace_pc);
+    } else if (visit_count == 200) {
+      Printf("RaceVerifier: Trace %x: ignored.\n", trace_pc);
+    }
+  }
+  racecheck_lock.Unlock();
+  return tm;
+}
+
+/**
+ * Init the race verifier. Should be called exactly once before any other
+ * functions in this file.
+ * @param fileNames Names of TSan log to parse.
+ * @param raceInfos Additional race description strings.
+ */
+void RaceVerifierInit(const vector<string>& fileNames,
+    const vector<string>& raceInfos) {
+  races_map = new map<uintptr_t, PossibleRace*>();
+  racecheck_map = new AddressMap();
+  visit_count_map = new VisitCountMap();
+  ignore_addresses = new set<uintptr_t>();
+
+  for (vector<string>::const_iterator it = fileNames.begin();
+       it != fileNames.end(); ++it) {
+    RaceVerifierParseFile(*it);
+  }
+  for (vector<string>::const_iterator it = raceInfos.begin();
+       it != raceInfos.end(); ++it) {
+    RaceVerifierParseRaceInfo(*it);
+  }
+}
+
+void RaceVerifierFini() {
+  Report("RaceVerifier summary: verified %d race(s)\n", n_reports);
+  int n_errors = GetNumberOfFoundErrors();
+  SetNumberOfFoundErrors(n_errors + n_reports);
+}