Initial import of ThreadSanitizer.
https://data-race-test.googlecode.com/svn/trunk@3163
Change-Id: I8a134064048ae808e3c68b2eae10b795e15b9459
diff --git a/tsan/ts_race_verifier.cc b/tsan/ts_race_verifier.cc
new file mode 100644
index 0000000..a2e9fd6
--- /dev/null
+++ b/tsan/ts_race_verifier.cc
@@ -0,0 +1,445 @@
+/* Copyright (c) 2008-2010, Google Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+// This file is part of ThreadSanitizer, a dynamic data race detector.
+// Author: Evgeniy Stepanov.
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <vector>
+#include <set>
+#include <iterator>
+#include <algorithm>
+
+#include "ts_lock.h"
+#include "ts_util.h"
+#include "ts_race_verifier.h"
+#include "thread_sanitizer.h"
+
+struct PossibleRace {
+ PossibleRace() : pc(0), reported(false) {}
+ // racy instruction
+ uintptr_t pc;
+ // concurrent traces
+ vector<uintptr_t> traces;
+ // report text
+ string report;
+ // whether this race has already been reported
+ bool reported;
+};
+
+// pc -> race
+static map<uintptr_t, PossibleRace*>* races_map;
+
+// Data about a call site.
+struct CallSite {
+ int thread_id;
+ uintptr_t pc;
+};
+
+struct TypedCallSites {
+ vector<CallSite> reads;
+ vector<CallSite> writes;
+};
+
+// data address -> ([write callsites], [read callsites])
+typedef map<uintptr_t, TypedCallSites> AddressMap;
+
+static TSLock racecheck_lock;
+static AddressMap* racecheck_map;
+// data addresses that are ignored (they have already been reported)
+static set<uintptr_t>* ignore_addresses;
+
+// starting pc of the trace -> visit count
+// used to reduce the sleep time for hot traces
+typedef map<uintptr_t, int> VisitCountMap;
+static VisitCountMap* visit_count_map;
+
+static int n_reports;
+
+/**
+ * Given max and min pc of a trace (both inclusive!), returns whether this trace
+ * is interesting to us at all (via the return value), and whether it should be
+ * instrumented fully (*instrument_pc=0), or 1 instruction only. In the latter
+ * case, *instrument_pc contains the address of the said instruction.
+ */
+bool RaceVerifierGetAddresses(uintptr_t min_pc, uintptr_t max_pc,
+ uintptr_t* instrument_pc) {
+ uintptr_t pc = 0;
+ for (map<uintptr_t, PossibleRace*>::iterator it = races_map->begin();
+ it != races_map->end(); ++it) {
+ PossibleRace* race = it->second;
+ if (race->reported)
+ continue;
+ if (race->pc >= min_pc && race->pc <= max_pc) {
+ if (pc) {
+ // Two race candidates in one trace. Just instrument it fully.
+ *instrument_pc = 0;
+ return true;
+ }
+ pc = race->pc;
+ }
+ for (vector<uintptr_t>::iterator it2 = race->traces.begin();
+ it2 != race->traces.end(); ++it2) {
+ if (*it2 >= min_pc && *it2 <= max_pc) {
+ *instrument_pc = 0;
+ return true;
+ }
+ }
+ }
+ *instrument_pc = pc;
+ return !!pc;
+}
+
+static void UpdateSummary() {
+ if (!G_flags->summary_file.empty()) {
+ char buff[100];
+ snprintf(buff, sizeof(buff),
+ "RaceVerifier: %d report(s) verified\n", n_reports);
+ // We overwrite the contents of this file with the new summary.
+ // We don't do that at the end because even if we crash later
+ // we will already have the summary.
+ OpenFileWriteStringAndClose(G_flags->summary_file, buff);
+ }
+}
+
+/* Build and print a race report for a data address. Does not print stack traces
+ and symbols and all the fancy stuff - we don't have that info. Used when we
+ don't have a ready report - for unexpected races and for
+ --race-verifier-extra races.
+
+ racecheck_lock must be held by the current thread.
+*/
+static void PrintRaceReportEmpty(uintptr_t addr) {
+ TypedCallSites* typedCallSites = &(*racecheck_map)[addr];
+ vector<CallSite>& writes = typedCallSites->writes;
+ vector<CallSite>& reads = typedCallSites->reads;
+ for (vector<CallSite>::const_iterator it = writes.begin();
+ it != writes.end(); ++ it) {
+ Printf(" write at %p\n", it->pc);
+ }
+ for (vector<CallSite>::const_iterator it = reads.begin();
+ it != reads.end(); ++ it) {
+ Printf(" read at %p\n", it->pc);
+ }
+}
+
+/* Find a PossibleRace that matches current accesses (racecheck_map) to the
+ given data address.
+
+ racecheck_lock must be held by the current thread.
+ */
+static PossibleRace* FindRaceForAddr(uintptr_t addr) {
+ TypedCallSites* typedCallSites = &(*racecheck_map)[addr];
+ vector<CallSite>& writes = typedCallSites->writes;
+ vector<CallSite>& reads = typedCallSites->reads;
+ for (vector<CallSite>::const_iterator it = writes.begin();
+ it != writes.end(); ++ it) {
+ map<uintptr_t, PossibleRace*>::iterator it2 = races_map->find(it->pc);
+ if (it2 != races_map->end())
+ return it2->second;
+ }
+ for (vector<CallSite>::const_iterator it = reads.begin();
+ it != reads.end(); ++ it) {
+ map<uintptr_t, PossibleRace*>::iterator it2 = races_map->find(it->pc);
+ if (it2 != races_map->end())
+ return it2->second;
+ }
+ return NULL;
+}
+
+/* Prints a race report for the given data address, either finding one in a
+ matching PossibleRace, or just printing pc's of the mops.
+
+ racecheck_lock must be held by the current thread.
+*/
+static void PrintRaceReport(uintptr_t addr) {
+ PossibleRace* race = FindRaceForAddr(addr);
+ if (race) {
+ ExpectedRace* expected_race = ThreadSanitizerFindExpectedRace(addr);
+ if (expected_race)
+ expected_race->count++;
+ bool is_expected = !!expected_race;
+ bool is_unverifiable = is_expected && !expected_race->is_verifiable;
+
+ if (is_expected && !is_unverifiable && !G_flags->show_expected_races)
+ return;
+
+ if (is_unverifiable)
+ Printf("WARNING: Confirmed a race that was marked as UNVERIFIABLE:\n");
+ else
+ Printf("WARNING: Confirmed a race:\n");
+ const string& report = race->report;
+ if (report.empty()) {
+ PrintRaceReportEmpty(addr);
+ } else {
+ Printf("%s", report.c_str());
+ }
+ // Suppress future reports for this race.
+ race->reported = true;
+ ignore_addresses->insert(addr);
+
+ n_reports++;
+ } else {
+ Printf("Warning: unexpected race found!\n");
+ PrintRaceReportEmpty(addr);
+
+ n_reports ++;
+ }
+ UpdateSummary();
+}
+
+/**
+ * This function is called before the mop delay.
+ * @param thread_id Thread id.
+ * @param addr Data address.
+ * @param pc Instruction pc.
+ * @param is_w Whether this is a write (true) or a read (false).
+ * @return True if this access is interesting to us at all. If true, the caller
+ * should delay and then call RaceVerifierEndAccess. If false, it should do
+ * nothing more for this mop.
+ */
+bool RaceVerifierStartAccess(int thread_id, uintptr_t addr, uintptr_t pc,
+ bool is_w) {
+ CallSite callSite;
+ callSite.thread_id = thread_id;
+ callSite.pc = pc;
+ racecheck_lock.Lock();
+
+ if (debug_race_verifier)
+ Printf("[%d] pc %p %s addr %p start\n", thread_id, pc,
+ is_w ? "write" : "read", addr);
+
+ if (ignore_addresses->count(addr)) {
+ racecheck_lock.Unlock();
+ return false;
+ }
+
+ TypedCallSites* typedCallSites = &(*racecheck_map)[addr];
+ vector<CallSite>& writes = typedCallSites->writes;
+ vector<CallSite>& reads = typedCallSites->reads;
+ (is_w ? writes : reads).push_back(callSite);
+ if (writes.size() > 0 && writes.size() + reads.size() > 1) {
+ bool is_race = false;
+ for (size_t i = 0; !is_race && i < writes.size(); ++i) {
+ for (size_t j = 0; !is_race && j < writes.size(); ++j)
+ if (writes[i].thread_id != writes[j].thread_id)
+ is_race = true;
+ for (size_t j = 0; !is_race && j < reads.size(); ++j)
+ if (writes[i].thread_id != reads[j].thread_id)
+ is_race = true;
+ }
+ if (is_race)
+ PrintRaceReport(addr);
+ }
+ racecheck_lock.Unlock();
+ return true;
+}
+
+/* This function is called after the mop delay, only if RaceVerifierStartAccess
+ returned true. The arguments are exactly the same. */
+void RaceVerifierEndAccess(int thread_id, uintptr_t addr, uintptr_t pc,
+ bool is_w) {
+ racecheck_lock.Lock();
+
+ if (debug_race_verifier)
+ Printf("[%d] pc %p %s addr %p end\n", thread_id, pc,
+ is_w ? "write" : "read", addr);
+ if (ignore_addresses->count(addr)) {
+ racecheck_lock.Unlock();
+ return;
+ }
+
+ TypedCallSites* typedCallSites = &(*racecheck_map)[addr];
+ vector<CallSite>& vec =
+ is_w ? typedCallSites->writes : typedCallSites->reads;
+ for (int i = vec.size() - 1; i >= 0; --i) {
+ if (vec[i].thread_id == thread_id) {
+ vec.erase(vec.begin() + i);
+ break;
+ }
+ }
+ racecheck_lock.Unlock();
+}
+
+/* Parse a race description that appears in TSan logs after the words
+ "Race verifier data: ", not including the said words. It looks like
+ "pc,trace[,trace]...", without spaces. */
+static PossibleRace* ParseRaceInfo(const string& raceInfo) {
+ PossibleRace* race = new PossibleRace();
+ const char* p = raceInfo.c_str();
+ while (true) {
+ char* end;
+ uintptr_t addr = my_strtol(p, &end, 16);
+ if (p == end) {
+ Printf("Parse error: %s\n", p);
+ exit(1);
+ }
+ if (!race->pc)
+ race->pc = addr;
+ else
+ race->traces.push_back(addr);
+ while (*end == '\n' || *end == '\r')
+ ++end;
+ if (*end == '\0') {
+ // raceInfo already ends with \n
+ Printf("Possible race: %s", raceInfo.c_str());
+ return race;
+ }
+ if (*end != ',') {
+ Printf("Parse error: comma expected: %s\n", end);
+ delete race;
+ return NULL;
+ }
+ p = end + 1;
+ }
+}
+
+/* Parse a race description and add it to races_map. */
+static void RaceVerifierParseRaceInfo(const string& raceInfo) {
+ PossibleRace* race = ParseRaceInfo(raceInfo);
+ if (race)
+ (*races_map)[race->pc] = race;
+ else
+ Printf("Bad raceInfo: %s\n", raceInfo.c_str());
+}
+
+
+class StringStream {
+ public:
+ StringStream(const string &s) : s_(s), data_(s.c_str()), p_(data_) {}
+
+ bool Eof() {
+ return !*p_;
+ }
+
+ string NextLine() {
+ const char* first = p_;
+ while (*p_ && *p_ != '\n') {
+ ++p_;
+ }
+ if (*p_)
+ ++p_;
+ return string(first, p_ - first);
+ }
+
+ private:
+ const string& s_;
+ const char* data_;
+ const char* p_;
+};
+
+/* Parse a TSan log and add all race verifier info's from it to our storage of
+ possible races. */
+static void RaceVerifierParseFile(const string& fileName) {
+ Printf("Reading race data from %s\n", fileName.c_str());
+ const string RACEINFO_MARKER = "Race verifier data: ";
+ string log = ReadFileToString(fileName, true /* die_if_failed */);
+ StringStream ss(log);
+ string* desc = NULL;
+ int count = 0;
+ while (!ss.Eof()) {
+ string line = ss.NextLine();
+ size_t pos;
+ if ((line.find("WARNING: Possible data race during") !=
+ string::npos) ||
+ (line.find("WARNING: Expected data race during") !=
+ string::npos)) {
+ desc = new string();
+ (*desc) += line;
+ } else if ((pos = line.find(RACEINFO_MARKER)) != string::npos) {
+ pos += RACEINFO_MARKER.size();
+ string raceInfo = line.substr(pos);
+ PossibleRace* race = ParseRaceInfo(raceInfo);
+ (*desc) += "}}}\n";
+ race->report = *desc;
+ (*races_map)[race->pc] = race;
+ count ++;
+ delete desc;
+ desc = NULL;
+ } else if (desc) {
+ (*desc) += line;
+ }
+ }
+ Printf("Got %d possible races\n", count);
+}
+
+/**
+ * Return the time to sleep for the given trace.
+ * @param trace_pc The starting pc of the trace.
+ * @return Time to sleep in ms, or 0 if this trace should be ignored.
+ */
+int RaceVerifierGetSleepTime(uintptr_t trace_pc) {
+ racecheck_lock.Lock();
+ int visit_count = ++(*visit_count_map)[trace_pc];
+ int tm;
+ if (visit_count < 20) {
+ tm = G_flags->race_verifier_sleep_ms;
+ } else if (visit_count < 200) {
+ tm = G_flags->race_verifier_sleep_ms / 10;
+ } else {
+ tm = 0;
+ }
+ if (debug_race_verifier) {
+ if (visit_count == 20) {
+ Printf("RaceVerifier: Trace %x: sleep time reduced.\n", trace_pc);
+ } else if (visit_count == 200) {
+ Printf("RaceVerifier: Trace %x: ignored.\n", trace_pc);
+ }
+ }
+ racecheck_lock.Unlock();
+ return tm;
+}
+
+/**
+ * Init the race verifier. Should be called exactly once before any other
+ * functions in this file.
+ * @param fileNames Names of TSan log to parse.
+ * @param raceInfos Additional race description strings.
+ */
+void RaceVerifierInit(const vector<string>& fileNames,
+ const vector<string>& raceInfos) {
+ races_map = new map<uintptr_t, PossibleRace*>();
+ racecheck_map = new AddressMap();
+ visit_count_map = new VisitCountMap();
+ ignore_addresses = new set<uintptr_t>();
+
+ for (vector<string>::const_iterator it = fileNames.begin();
+ it != fileNames.end(); ++it) {
+ RaceVerifierParseFile(*it);
+ }
+ for (vector<string>::const_iterator it = raceInfos.begin();
+ it != raceInfos.end(); ++it) {
+ RaceVerifierParseRaceInfo(*it);
+ }
+}
+
+void RaceVerifierFini() {
+ Report("RaceVerifier summary: verified %d race(s)\n", n_reports);
+ int n_errors = GetNumberOfFoundErrors();
+ SetNumberOfFoundErrors(n_errors + n_reports);
+}