Update Google Benchmark library

llvm-svn: 322812
diff --git a/libcxx/utils/google-benchmark/src/CMakeLists.txt b/libcxx/utils/google-benchmark/src/CMakeLists.txt
index 7707773..e22620a 100644
--- a/libcxx/utils/google-benchmark/src/CMakeLists.txt
+++ b/libcxx/utils/google-benchmark/src/CMakeLists.txt
@@ -18,6 +18,9 @@
   VERSION ${GENERIC_LIB_VERSION}
   SOVERSION ${GENERIC_LIB_SOVERSION}
 )
+target_include_directories(benchmark PUBLIC
+    $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../include>
+    )
 
 # Link threads.
 target_link_libraries(benchmark  ${BENCHMARK_CXX_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT})
@@ -35,11 +38,13 @@
 set(lib_install_dir "lib/")
 set(bin_install_dir "bin/")
 set(config_install_dir "lib/cmake/${PROJECT_NAME}")
+set(pkgconfig_install_dir "lib/pkgconfig")
 
 set(generated_dir "${CMAKE_CURRENT_BINARY_DIR}/generated")
 
 set(version_config "${generated_dir}/${PROJECT_NAME}ConfigVersion.cmake")
 set(project_config "${generated_dir}/${PROJECT_NAME}Config.cmake")
+set(pkg_config "${generated_dir}/${PROJECT_NAME}.pc")
 set(targets_export_name "${PROJECT_NAME}Targets")
 
 set(namespace "${PROJECT_NAME}::")
@@ -50,26 +55,33 @@
 )
 
 configure_file("${PROJECT_SOURCE_DIR}/cmake/Config.cmake.in" "${project_config}" @ONLY)
+configure_file("${PROJECT_SOURCE_DIR}/cmake/benchmark.pc.in" "${pkg_config}" @ONLY)
 
-# Install target (will install the library to specified CMAKE_INSTALL_PREFIX variable)
-install(
-  TARGETS benchmark
-  EXPORT ${targets_export_name}
-  ARCHIVE DESTINATION ${lib_install_dir}
-  LIBRARY DESTINATION ${lib_install_dir}
-  RUNTIME DESTINATION ${bin_install_dir}
-  INCLUDES DESTINATION ${include_install_dir})
+if (BENCHMARK_ENABLE_INSTALL)
+  # Install target (will install the library to specified CMAKE_INSTALL_PREFIX variable)
+  install(
+    TARGETS benchmark
+    EXPORT ${targets_export_name}
+    ARCHIVE DESTINATION ${lib_install_dir}
+    LIBRARY DESTINATION ${lib_install_dir}
+    RUNTIME DESTINATION ${bin_install_dir}
+    INCLUDES DESTINATION ${include_install_dir})
 
-install(
-  DIRECTORY "${PROJECT_SOURCE_DIR}/include/benchmark"
-  DESTINATION ${include_install_dir}
-  FILES_MATCHING PATTERN "*.*h")
+  install(
+    DIRECTORY "${PROJECT_SOURCE_DIR}/include/benchmark"
+    DESTINATION ${include_install_dir}
+    FILES_MATCHING PATTERN "*.*h")
 
-install(
-    FILES "${project_config}" "${version_config}"
-    DESTINATION "${config_install_dir}")
+  install(
+      FILES "${project_config}" "${version_config}"
+      DESTINATION "${config_install_dir}")
 
-install(
-    EXPORT "${targets_export_name}"
-    NAMESPACE "${namespace}"
-    DESTINATION "${config_install_dir}")
+  install(
+      FILES "${pkg_config}"
+      DESTINATION "${pkgconfig_install_dir}")
+
+  install(
+      EXPORT "${targets_export_name}"
+      NAMESPACE "${namespace}"
+      DESTINATION "${config_install_dir}")
+endif()
diff --git a/libcxx/utils/google-benchmark/src/benchmark.cc b/libcxx/utils/google-benchmark/src/benchmark.cc
index 00ffa07..1a7d218 100644
--- a/libcxx/utils/google-benchmark/src/benchmark.cc
+++ b/libcxx/utils/google-benchmark/src/benchmark.cc
@@ -38,12 +38,12 @@
 #include "commandlineflags.h"
 #include "complexity.h"
 #include "counter.h"
+#include "internal_macros.h"
 #include "log.h"
 #include "mutex.h"
 #include "re.h"
-#include "stat.h"
+#include "statistics.h"
 #include "string_util.h"
-#include "sysinfo.h"
 #include "timers.h"
 
 DEFINE_bool(benchmark_list_tests, false,
@@ -91,23 +91,23 @@
               "environment variable is set to a terminal type that supports "
               "colors.");
 
+DEFINE_bool(benchmark_counters_tabular, false,
+            "Whether to use tabular format when printing user counters to "
+            "the console.  Valid values: 'true'/'yes'/1, 'false'/'no'/0."
+            "Defaults to false.");
+
 DEFINE_int32(v, 0, "The level of verbose logging to output");
 
 namespace benchmark {
-namespace internal {
-
-void UseCharPointer(char const volatile*) {}
-
-}  // end namespace internal
 
 namespace {
-
 static const size_t kMaxIterations = 1000000000;
-
 }  // end namespace
 
 namespace internal {
 
+void UseCharPointer(char const volatile*) {}
+
 class ThreadManager {
  public:
   ThreadManager(int num_threads)
@@ -175,7 +175,9 @@
     CHECK(running_);
     running_ = false;
     real_time_used_ += ChronoClockNow() - start_real_time_;
-    cpu_time_used_ += ThreadCPUUsage() - start_cpu_time_;
+    // Floating point error can result in the subtraction producing a negative
+    // time. Guard against that.
+    cpu_time_used_ += std::max<double>(ThreadCPUUsage() - start_cpu_time_, 0);
   }
 
   // Called by each thread
@@ -251,7 +253,9 @@
     report.complexity_n = results.complexity_n;
     report.complexity = b.complexity;
     report.complexity_lambda = b.complexity_lambda;
+    report.statistics = b.statistics;
     report.counters = results.counters;
+    internal::Finish(&report.counters, seconds, b.threads);
   }
   return report;
 }
@@ -395,7 +399,7 @@
              internal::ThreadManager* manager)
     : started_(false),
       finished_(false),
-      total_iterations_(0),
+      total_iterations_(max_iters + 1),
       range_(ranges),
       bytes_processed_(0),
       items_processed_(0),
@@ -408,6 +412,7 @@
       timer_(timer),
       manager_(manager) {
   CHECK(max_iterations != 0) << "At least one iteration must be run";
+  CHECK(total_iterations_ != 0) << "max iterations wrapped around";
   CHECK_LT(thread_index, threads) << "thread_index must be less than threads";
 }
 
@@ -432,7 +437,7 @@
       manager_->results.has_error_ = true;
     }
   }
-  total_iterations_ = max_iterations;
+  total_iterations_ = 1;
   if (timer_->running()) timer_->StopTimer();
 }
 
@@ -457,8 +462,8 @@
   if (!error_occurred_) {
     PauseTiming();
   }
-  // Total iterations now is one greater than max iterations. Fix this.
-  total_iterations_ = max_iterations;
+  // Total iterations has now wrapped around zero. Fix this.
+  total_iterations_ = 1;
   finished_ = true;
   manager_->StartStopBarrier();
 }
@@ -475,19 +480,19 @@
   // Determine the width of the name field using a minimum width of 10.
   bool has_repetitions = FLAGS_benchmark_repetitions > 1;
   size_t name_field_width = 10;
+  size_t stat_field_width = 0;
   for (const Benchmark::Instance& benchmark : benchmarks) {
     name_field_width =
         std::max<size_t>(name_field_width, benchmark.name.size());
     has_repetitions |= benchmark.repetitions > 1;
+
+    for(const auto& Stat : *benchmark.statistics)
+      stat_field_width = std::max<size_t>(stat_field_width, Stat.name_.size());
   }
-  if (has_repetitions) name_field_width += std::strlen("_stddev");
+  if (has_repetitions) name_field_width += 1 + stat_field_width;
 
   // Print header here
   BenchmarkReporter::Context context;
-  context.num_cpus = NumCPUs();
-  context.mhz_per_cpu = CyclesPerSecond() / 1000000.0f;
-
-  context.cpu_scaling_enabled = CpuScalingEnabled();
   context.name_field_width = name_field_width;
 
   // Keep track of runing times of all instances of current benchmark
@@ -521,10 +526,10 @@
 }
 
 std::unique_ptr<BenchmarkReporter> CreateReporter(
-    std::string const& name, ConsoleReporter::OutputOptions allow_color) {
+    std::string const& name, ConsoleReporter::OutputOptions output_opts) {
   typedef std::unique_ptr<BenchmarkReporter> PtrType;
   if (name == "console") {
-    return PtrType(new ConsoleReporter(allow_color));
+    return PtrType(new ConsoleReporter(output_opts));
   } else if (name == "json") {
     return PtrType(new JSONReporter);
   } else if (name == "csv") {
@@ -536,6 +541,30 @@
 }
 
 }  // end namespace
+
+bool IsZero(double n) {
+  return std::abs(n) < std::numeric_limits<double>::epsilon();
+}
+
+ConsoleReporter::OutputOptions GetOutputOptions(bool force_no_color) {
+  int output_opts = ConsoleReporter::OO_Defaults;
+  if ((FLAGS_benchmark_color == "auto" && IsColorTerminal()) ||
+      IsTruthyFlagValue(FLAGS_benchmark_color)) {
+    output_opts |= ConsoleReporter::OO_Color;
+  } else {
+    output_opts &= ~ConsoleReporter::OO_Color;
+  }
+  if(force_no_color) {
+    output_opts &= ~ConsoleReporter::OO_Color;
+  }
+  if(FLAGS_benchmark_counters_tabular) {
+    output_opts |= ConsoleReporter::OO_Tabular;
+  } else {
+    output_opts &= ~ConsoleReporter::OO_Tabular;
+  }
+  return static_cast< ConsoleReporter::OutputOptions >(output_opts);
+}
+
 }  // end namespace internal
 
 size_t RunSpecifiedBenchmarks() {
@@ -557,29 +586,21 @@
   std::unique_ptr<BenchmarkReporter> default_console_reporter;
   std::unique_ptr<BenchmarkReporter> default_file_reporter;
   if (!console_reporter) {
-    auto output_opts = ConsoleReporter::OO_None;
-    if (FLAGS_benchmark_color == "auto")
-      output_opts = IsColorTerminal() ? ConsoleReporter::OO_Color
-                                      : ConsoleReporter::OO_None;
-    else
-      output_opts = IsTruthyFlagValue(FLAGS_benchmark_color)
-                        ? ConsoleReporter::OO_Color
-                        : ConsoleReporter::OO_None;
-    default_console_reporter =
-        internal::CreateReporter(FLAGS_benchmark_format, output_opts);
+    default_console_reporter = internal::CreateReporter(
+          FLAGS_benchmark_format, internal::GetOutputOptions());
     console_reporter = default_console_reporter.get();
   }
   auto& Out = console_reporter->GetOutputStream();
   auto& Err = console_reporter->GetErrorStream();
 
   std::string const& fname = FLAGS_benchmark_out;
-  if (fname == "" && file_reporter) {
+  if (fname.empty() && file_reporter) {
     Err << "A custom file reporter was provided but "
            "--benchmark_out=<file> was not specified."
         << std::endl;
     std::exit(1);
   }
-  if (fname != "") {
+  if (!fname.empty()) {
     output_file.open(fname);
     if (!output_file.is_open()) {
       Err << "invalid file name: '" << fname << std::endl;
@@ -625,6 +646,7 @@
           "          [--benchmark_out=<filename>]\n"
           "          [--benchmark_out_format=<json|console|csv>]\n"
           "          [--benchmark_color={auto|true|false}]\n"
+          "          [--benchmark_counters_tabular={true|false}]\n"
           "          [--v=<verbosity>]\n");
   exit(0);
 }
@@ -649,6 +671,8 @@
         // "color_print" is the deprecated name for "benchmark_color".
         // TODO: Remove this.
         ParseStringFlag(argv[i], "color_print", &FLAGS_benchmark_color) ||
+        ParseBoolFlag(argv[i], "benchmark_counters_tabular",
+                        &FLAGS_benchmark_counters_tabular) ||
         ParseInt32Flag(argv[i], "v", &FLAGS_v)) {
       for (int j = i; j != *argc - 1; ++j) argv[j] = argv[j + 1];
 
diff --git a/libcxx/utils/google-benchmark/src/benchmark_api_internal.h b/libcxx/utils/google-benchmark/src/benchmark_api_internal.h
index 828ed12..d481dc5 100644
--- a/libcxx/utils/google-benchmark/src/benchmark_api_internal.h
+++ b/libcxx/utils/google-benchmark/src/benchmark_api_internal.h
@@ -1,7 +1,7 @@
 #ifndef BENCHMARK_API_INTERNAL_H
 #define BENCHMARK_API_INTERNAL_H
 
-#include "benchmark/benchmark_api.h"
+#include "benchmark/benchmark.h"
 
 #include <cmath>
 #include <iosfwd>
@@ -25,6 +25,7 @@
   BigO complexity;
   BigOFunc* complexity_lambda;
   UserCounters counters;
+  const std::vector<Statistics>* statistics;
   bool last_benchmark_instance;
   int repetitions;
   double min_time;
@@ -36,13 +37,10 @@
                             std::vector<Benchmark::Instance>* benchmarks,
                             std::ostream* Err);
 
-namespace {
+bool IsZero(double n);
 
-bool IsZero(double n) {
-  return std::abs(n) < std::numeric_limits<double>::epsilon();
-}
+ConsoleReporter::OutputOptions GetOutputOptions(bool force_no_color = false);
 
-}  // end namespace
 }  // end namespace internal
 }  // end namespace benchmark
 
diff --git a/libcxx/utils/google-benchmark/src/benchmark_register.cc b/libcxx/utils/google-benchmark/src/benchmark_register.cc
index fe37320..d5746a3 100644
--- a/libcxx/utils/google-benchmark/src/benchmark_register.cc
+++ b/libcxx/utils/google-benchmark/src/benchmark_register.cc
@@ -31,17 +31,17 @@
 #include <fstream>
 #include <iostream>
 #include <memory>
+#include <sstream>
 #include <thread>
 
 #include "check.h"
 #include "commandlineflags.h"
 #include "complexity.h"
+#include "statistics.h"
 #include "log.h"
 #include "mutex.h"
 #include "re.h"
-#include "stat.h"
 #include "string_util.h"
-#include "sysinfo.h"
 #include "timers.h"
 
 namespace benchmark {
@@ -69,6 +69,9 @@
   // Registers a benchmark family and returns the index assigned to it.
   size_t AddBenchmark(std::unique_ptr<Benchmark> family);
 
+  // Clear all registered benchmark families.
+  void ClearBenchmarks();
+
   // Extract the list of benchmark instances that match the specified
   // regular expression.
   bool FindBenchmarks(const std::string& re,
@@ -94,6 +97,12 @@
   return index;
 }
 
+void BenchmarkFamilies::ClearBenchmarks() {
+  MutexLock l(mutex_);
+  families_.clear();
+  families_.shrink_to_fit();
+}
+
 bool BenchmarkFamilies::FindBenchmarks(
     const std::string& spec, std::vector<Benchmark::Instance>* benchmarks,
     std::ostream* ErrStream) {
@@ -149,6 +158,7 @@
         instance.use_manual_time = family->use_manual_time_;
         instance.complexity = family->complexity_;
         instance.complexity_lambda = family->complexity_lambda_;
+        instance.statistics = &family->statistics_;
         instance.threads = num_threads;
 
         // Add arguments to instance name
@@ -163,8 +173,8 @@
                   StringPrintF("%s:", family->arg_names_[arg_i].c_str());
             }
           }
-
-          instance.name += std::to_string(arg);
+          
+          instance.name += StringPrintF("%d", arg);
           ++arg_i;
         }
 
@@ -226,7 +236,11 @@
       use_real_time_(false),
       use_manual_time_(false),
       complexity_(oNone),
-      complexity_lambda_(nullptr) {}
+      complexity_lambda_(nullptr) {
+  ComputeStatistics("mean", StatisticsMean);
+  ComputeStatistics("median", StatisticsMedian);
+  ComputeStatistics("stddev", StatisticsStdDev);
+}
 
 Benchmark::~Benchmark() {}
 
@@ -399,6 +413,12 @@
   return this;
 }
 
+Benchmark* Benchmark::ComputeStatistics(std::string name,
+                                        StatisticsFunc* statistics) {
+  statistics_.emplace_back(name, statistics);
+  return this;
+}
+
 Benchmark* Benchmark::Threads(int t) {
   CHECK_GT(t, 0);
   thread_counts_.push_back(t);
@@ -427,8 +447,7 @@
 }
 
 Benchmark* Benchmark::ThreadPerCpu() {
-  static int num_cpus = NumCPUs();
-  thread_counts_.push_back(num_cpus);
+  thread_counts_.push_back(CPUInfo::Get().num_cpus);
   return this;
 }
 
@@ -449,4 +468,9 @@
 void FunctionBenchmark::Run(State& st) { func_(st); }
 
 }  // end namespace internal
+
+void ClearRegisteredBenchmarks() {
+  internal::BenchmarkFamilies::GetInstance()->ClearBenchmarks();
+}
+
 }  // end namespace benchmark
diff --git a/libcxx/utils/google-benchmark/src/check.h b/libcxx/utils/google-benchmark/src/check.h
index 6f1fe0c..73bead2 100644
--- a/libcxx/utils/google-benchmark/src/check.h
+++ b/libcxx/utils/google-benchmark/src/check.h
@@ -3,6 +3,7 @@
 
 #include <cstdlib>
 #include <ostream>
+#include <cmath>
 
 #include "internal_macros.h"
 #include "log.h"
@@ -68,4 +69,11 @@
 #define CHECK_GT(a, b) CHECK((a) > (b))
 #define CHECK_LT(a, b) CHECK((a) < (b))
 
+#define CHECK_FLOAT_EQ(a, b, eps) CHECK(std::fabs((a) - (b)) <  (eps))
+#define CHECK_FLOAT_NE(a, b, eps) CHECK(std::fabs((a) - (b)) >= (eps))
+#define CHECK_FLOAT_GE(a, b, eps) CHECK((a) - (b) > -(eps))
+#define CHECK_FLOAT_LE(a, b, eps) CHECK((b) - (a) > -(eps))
+#define CHECK_FLOAT_GT(a, b, eps) CHECK((a) - (b) >  (eps))
+#define CHECK_FLOAT_LT(a, b, eps) CHECK((b) - (a) >  (eps))
+
 #endif  // CHECK_H_
diff --git a/libcxx/utils/google-benchmark/src/colorprint.cc b/libcxx/utils/google-benchmark/src/colorprint.cc
index 513376b..2dec4a8 100644
--- a/libcxx/utils/google-benchmark/src/colorprint.cc
+++ b/libcxx/utils/google-benchmark/src/colorprint.cc
@@ -89,7 +89,7 @@
 
   std::size_t size = 256;
   char local_buff[256];
-  auto ret = std::vsnprintf(local_buff, size, msg, args_cp);
+  auto ret = vsnprintf(local_buff, size, msg, args_cp);
 
   va_end(args_cp);
 
@@ -104,7 +104,7 @@
     // we did not provide a long enough buffer on our first attempt.
     size = (size_t)ret + 1;  // + 1 for the null byte
     std::unique_ptr<char[]> buff(new char[size]);
-    ret = std::vsnprintf(buff.get(), size, msg, args);
+    ret = vsnprintf(buff.get(), size, msg, args);
     CHECK(ret > 0 && ((size_t)ret) < size);
     return buff.get();
   }
diff --git a/libcxx/utils/google-benchmark/src/commandlineflags.cc b/libcxx/utils/google-benchmark/src/commandlineflags.cc
index 72534e0..2fc92517 100644
--- a/libcxx/utils/google-benchmark/src/commandlineflags.cc
+++ b/libcxx/utils/google-benchmark/src/commandlineflags.cc
@@ -209,9 +209,9 @@
   return (ParseFlagValue(str, flag, true) != nullptr);
 }
 
-bool IsTruthyFlagValue(const std::string& str) {
-  if (str.empty()) return true;
-  char ch = str[0];
+bool IsTruthyFlagValue(const std::string& value) {
+  if (value.empty()) return true;
+  char ch = value[0];
   return isalnum(ch) &&
          !(ch == '0' || ch == 'f' || ch == 'F' || ch == 'n' || ch == 'N');
 }
diff --git a/libcxx/utils/google-benchmark/src/complexity.cc b/libcxx/utils/google-benchmark/src/complexity.cc
index 02adbef..8883269 100644
--- a/libcxx/utils/google-benchmark/src/complexity.cc
+++ b/libcxx/utils/google-benchmark/src/complexity.cc
@@ -15,13 +15,12 @@
 // Source project : https://github.com/ismaelJimenez/cpp.leastsq
 // Adapted to be used with google benchmark
 
-#include "benchmark/benchmark_api.h"
+#include "benchmark/benchmark.h"
 
 #include <algorithm>
 #include <cmath>
 #include "check.h"
 #include "complexity.h"
-#include "stat.h"
 
 namespace benchmark {
 
@@ -35,9 +34,9 @@
     case oNCubed:
       return [](int n) -> double { return std::pow(n, 3); };
     case oLogN:
-      return [](int n) { return std::log2(n); };
+      return [](int n) { return log2(n); };
     case oNLogN:
-      return [](int n) { return n * std::log2(n); };
+      return [](int n) { return n * log2(n); };
     case o1:
     default:
       return [](int) { return 1.0; };
@@ -150,109 +149,6 @@
   return best_fit;
 }
 
-std::vector<BenchmarkReporter::Run> ComputeStats(
-    const std::vector<BenchmarkReporter::Run>& reports) {
-  typedef BenchmarkReporter::Run Run;
-  std::vector<Run> results;
-
-  auto error_count =
-      std::count_if(reports.begin(), reports.end(),
-                    [](Run const& run) { return run.error_occurred; });
-
-  if (reports.size() - error_count < 2) {
-    // We don't report aggregated data if there was a single run.
-    return results;
-  }
-  // Accumulators.
-  Stat1_d real_accumulated_time_stat;
-  Stat1_d cpu_accumulated_time_stat;
-  Stat1_d bytes_per_second_stat;
-  Stat1_d items_per_second_stat;
-  // All repetitions should be run with the same number of iterations so we
-  // can take this information from the first benchmark.
-  int64_t const run_iterations = reports.front().iterations;
-  // create stats for user counters
-  struct CounterStat {
-    Counter c;
-    Stat1_d s;
-  };
-  std::map< std::string, CounterStat > counter_stats;
-  for(Run const& r : reports) {
-    for(auto const& cnt : r.counters) {
-      auto it = counter_stats.find(cnt.first);
-      if(it == counter_stats.end()) {
-        counter_stats.insert({cnt.first, {cnt.second, Stat1_d{}}});
-      } else {
-        CHECK_EQ(counter_stats[cnt.first].c.flags, cnt.second.flags);
-      }
-    }
-  }
-
-  // Populate the accumulators.
-  for (Run const& run : reports) {
-    CHECK_EQ(reports[0].benchmark_name, run.benchmark_name);
-    CHECK_EQ(run_iterations, run.iterations);
-    if (run.error_occurred) continue;
-    real_accumulated_time_stat +=
-        Stat1_d(run.real_accumulated_time / run.iterations, run.iterations);
-    cpu_accumulated_time_stat +=
-        Stat1_d(run.cpu_accumulated_time / run.iterations, run.iterations);
-    items_per_second_stat += Stat1_d(run.items_per_second, run.iterations);
-    bytes_per_second_stat += Stat1_d(run.bytes_per_second, run.iterations);
-    // user counters
-    for(auto const& cnt : run.counters) {
-      auto it = counter_stats.find(cnt.first);
-      CHECK_NE(it, counter_stats.end());
-      it->second.s += Stat1_d(cnt.second, run.iterations);
-    }
-  }
-
-  // Get the data from the accumulator to BenchmarkReporter::Run's.
-  Run mean_data;
-  mean_data.benchmark_name = reports[0].benchmark_name + "_mean";
-  mean_data.iterations = run_iterations;
-  mean_data.real_accumulated_time =
-      real_accumulated_time_stat.Mean() * run_iterations;
-  mean_data.cpu_accumulated_time =
-      cpu_accumulated_time_stat.Mean() * run_iterations;
-  mean_data.bytes_per_second = bytes_per_second_stat.Mean();
-  mean_data.items_per_second = items_per_second_stat.Mean();
-  mean_data.time_unit = reports[0].time_unit;
-  // user counters
-  for(auto const& kv : counter_stats) {
-    auto c = Counter(kv.second.s.Mean(), counter_stats[kv.first].c.flags);
-    mean_data.counters[kv.first] = c;
-  }
-
-  // Only add label to mean/stddev if it is same for all runs
-  mean_data.report_label = reports[0].report_label;
-  for (std::size_t i = 1; i < reports.size(); i++) {
-    if (reports[i].report_label != reports[0].report_label) {
-      mean_data.report_label = "";
-      break;
-    }
-  }
-
-  Run stddev_data;
-  stddev_data.benchmark_name = reports[0].benchmark_name + "_stddev";
-  stddev_data.report_label = mean_data.report_label;
-  stddev_data.iterations = 0;
-  stddev_data.real_accumulated_time = real_accumulated_time_stat.StdDev();
-  stddev_data.cpu_accumulated_time = cpu_accumulated_time_stat.StdDev();
-  stddev_data.bytes_per_second = bytes_per_second_stat.StdDev();
-  stddev_data.items_per_second = items_per_second_stat.StdDev();
-  stddev_data.time_unit = reports[0].time_unit;
-  // user counters
-  for(auto const& kv : counter_stats) {
-    auto c = Counter(kv.second.s.StdDev(), counter_stats[kv.first].c.flags);
-    stddev_data.counters[kv.first] = c;
-  }
-
-  results.push_back(mean_data);
-  results.push_back(stddev_data);
-  return results;
-}
-
 std::vector<BenchmarkReporter::Run> ComputeBigO(
     const std::vector<BenchmarkReporter::Run>& reports) {
   typedef BenchmarkReporter::Run Run;
diff --git a/libcxx/utils/google-benchmark/src/complexity.h b/libcxx/utils/google-benchmark/src/complexity.h
index 23cd9bb..df29b48 100644
--- a/libcxx/utils/google-benchmark/src/complexity.h
+++ b/libcxx/utils/google-benchmark/src/complexity.h
@@ -21,17 +21,10 @@
 #include <string>
 #include <vector>
 
-#include "benchmark/benchmark_api.h"
-#include "benchmark/reporter.h"
+#include "benchmark/benchmark.h"
 
 namespace benchmark {
 
-// Return a vector containing the mean and standard devation information for
-// the specified list of reports. If 'reports' contains less than two
-// non-errored runs an empty vector is returned
-std::vector<BenchmarkReporter::Run> ComputeStats(
-    const std::vector<BenchmarkReporter::Run>& reports);
-
 // Return a vector containing the bigO and RMS information for the specified
 // list of reports. If 'reports.size() < 2' an empty vector is returned.
 std::vector<BenchmarkReporter::Run> ComputeBigO(
@@ -58,4 +51,5 @@
 std::string GetBigOString(BigO complexity);
 
 }  // end namespace benchmark
+
 #endif  // COMPLEXITY_H_
diff --git a/libcxx/utils/google-benchmark/src/console_reporter.cc b/libcxx/utils/google-benchmark/src/console_reporter.cc
index 3f3de02..48920ca 100644
--- a/libcxx/utils/google-benchmark/src/console_reporter.cc
+++ b/libcxx/utils/google-benchmark/src/console_reporter.cc
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "benchmark/reporter.h"
+#include "benchmark/benchmark.h"
 #include "complexity.h"
 #include "counter.h"
 
@@ -36,15 +36,16 @@
 bool ConsoleReporter::ReportContext(const Context& context) {
   name_field_width_ = context.name_field_width;
   printed_header_ = false;
+  prev_counters_.clear();
 
   PrintBasicContext(&GetErrorStream(), context);
 
 #ifdef BENCHMARK_OS_WINDOWS
-  if (color_output_ && &std::cout != &GetOutputStream()) {
+  if ((output_options_ & OO_Color) && &std::cout != &GetOutputStream()) {
     GetErrorStream()
         << "Color printing is only supported for stdout on windows."
            " Disabling color printing\n";
-    color_output_ = false;
+    output_options_ = static_cast< OutputOptions >(output_options_ & ~OO_Color);
   }
 #endif
 
@@ -52,25 +53,39 @@
 }
 
 void ConsoleReporter::PrintHeader(const Run& run) {
-  std::string str =
-      FormatString("%-*s %13s %13s %10s\n", static_cast<int>(name_field_width_),
-                   "Benchmark", "Time", "CPU", "Iterations");
+  std::string str = FormatString("%-*s %13s %13s %10s", static_cast<int>(name_field_width_),
+                                 "Benchmark", "Time", "CPU", "Iterations");
   if(!run.counters.empty()) {
-    str += " UserCounters...";
+    if(output_options_ & OO_Tabular) {
+      for(auto const& c : run.counters) {
+        str += FormatString(" %10s", c.first.c_str());
+      }
+    } else {
+      str += " UserCounters...";
+    }
   }
+  str += "\n";
   std::string line = std::string(str.length(), '-');
   GetOutputStream() << line << "\n" << str << line << "\n";
 }
 
 void ConsoleReporter::ReportRuns(const std::vector<Run>& reports) {
   for (const auto& run : reports) {
-    // print the header if none was printed yet
-    if (!printed_header_) {
+    // print the header:
+    // --- if none was printed yet
+    bool print_header = !printed_header_;
+    // --- or if the format is tabular and this run
+    //     has different fields from the prev header
+    print_header |= (output_options_ & OO_Tabular) &&
+                    (!internal::SameNames(run.counters, prev_counters_));
+    if (print_header) {
       printed_header_ = true;
+      prev_counters_ = run.counters;
       PrintHeader(run);
     }
     // As an alternative to printing the headers like this, we could sort
-    // the benchmarks by header and then print like that.
+    // the benchmarks by header and then print. But this would require
+    // waiting for the full results before printing, or printing twice.
     PrintRunData(run);
   }
 }
@@ -86,8 +101,8 @@
 void ConsoleReporter::PrintRunData(const Run& result) {
   typedef void(PrinterFn)(std::ostream&, LogColor, const char*, ...);
   auto& Out = GetOutputStream();
-  PrinterFn* printer =
-      color_output_ ? (PrinterFn*)ColorPrintf : IgnoreColorPrint;
+  PrinterFn* printer = (output_options_ & OO_Color) ?
+                         (PrinterFn*)ColorPrintf : IgnoreColorPrint;
   auto name_color =
       (result.report_big_o || result.report_rms) ? COLOR_BLUE : COLOR_GREEN;
   printer(Out, name_color, "%-*s ", name_field_width_,
@@ -133,8 +148,20 @@
   }
 
   for (auto& c : result.counters) {
-    auto const& s = HumanReadableNumber(c.second.value);
-    printer(Out, COLOR_DEFAULT, " %s=%s", c.first.c_str(), s.c_str());
+    const std::size_t cNameLen = std::max(std::string::size_type(10),
+                                          c.first.length());
+    auto const& s = HumanReadableNumber(c.second.value, 1000);
+    if (output_options_ & OO_Tabular) {
+      if (c.second.flags & Counter::kIsRate) {
+        printer(Out, COLOR_DEFAULT, " %*s/s", cNameLen - 2, s.c_str());
+      } else {
+        printer(Out, COLOR_DEFAULT, " %*s", cNameLen, s.c_str());
+      }
+    } else {
+      const char* unit = (c.second.flags & Counter::kIsRate) ? "/s" : "";
+      printer(Out, COLOR_DEFAULT, " %s=%s%s", c.first.c_str(), s.c_str(),
+              unit);
+    }
   }
 
   if (!rate.empty()) {
diff --git a/libcxx/utils/google-benchmark/src/counter.cc b/libcxx/utils/google-benchmark/src/counter.cc
index 307863d..ed1aa04 100644
--- a/libcxx/utils/google-benchmark/src/counter.cc
+++ b/libcxx/utils/google-benchmark/src/counter.cc
@@ -30,7 +30,7 @@
 
 void Finish(UserCounters *l, double cpu_time, double num_threads) {
   for (auto &c : *l) {
-    c.second = Finish(c.second, cpu_time, num_threads);
+    c.second.value = Finish(c.second, cpu_time, num_threads);
   }
 }
 
@@ -39,7 +39,7 @@
   for (auto &c : *l) {
     auto it = r.find(c.first);
     if (it != r.end()) {
-      c.second = c.second + it->second;
+      c.second.value = c.second + it->second;
     }
   }
   // add counters present in r, but not in *l
@@ -57,7 +57,7 @@
     return false;
   }
   for (auto const& c : l) {
-    if ( r.find(c.first) == r.end()) {
+    if (r.find(c.first) == r.end()) {
       return false;
     }
   }
diff --git a/libcxx/utils/google-benchmark/src/counter.h b/libcxx/utils/google-benchmark/src/counter.h
index bbb92d9..dd6865a 100644
--- a/libcxx/utils/google-benchmark/src/counter.h
+++ b/libcxx/utils/google-benchmark/src/counter.h
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "benchmark/benchmark_api.h"
+#include "benchmark/benchmark.h"
 
 namespace benchmark {
 
diff --git a/libcxx/utils/google-benchmark/src/csv_reporter.cc b/libcxx/utils/google-benchmark/src/csv_reporter.cc
index 6779815..3551064 100644
--- a/libcxx/utils/google-benchmark/src/csv_reporter.cc
+++ b/libcxx/utils/google-benchmark/src/csv_reporter.cc
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "benchmark/reporter.h"
+#include "benchmark/benchmark.h"
 #include "complexity.h"
 
 #include <algorithm>
@@ -35,7 +35,7 @@
     "name",           "iterations",       "real_time",        "cpu_time",
     "time_unit",      "bytes_per_second", "items_per_second", "label",
     "error_occurred", "error_message"};
-}
+}  // namespace
 
 bool CSVReporter::ReportContext(const Context& context) {
   PrintBasicContext(&GetErrorStream(), context);
@@ -137,8 +137,11 @@
   // Print user counters
   for (const auto &ucn : user_counter_names_) {
     auto it = run.counters.find(ucn);
-    CHECK(it != run.counters.end());
-    Out << "," << it->second;
+    if(it == run.counters.end()) {
+      Out << ",";
+    } else {
+      Out << "," << it->second;
+    }
   }
   Out << '\n';
 }
diff --git a/libcxx/utils/google-benchmark/src/cycleclock.h b/libcxx/utils/google-benchmark/src/cycleclock.h
index e0f9b01..4251fe4 100644
--- a/libcxx/utils/google-benchmark/src/cycleclock.h
+++ b/libcxx/utils/google-benchmark/src/cycleclock.h
@@ -23,7 +23,7 @@
 
 #include <cstdint>
 
-#include "benchmark/macros.h"
+#include "benchmark/benchmark.h"
 #include "internal_macros.h"
 
 #if defined(BENCHMARK_OS_MACOSX)
diff --git a/libcxx/utils/google-benchmark/src/internal_macros.h b/libcxx/utils/google-benchmark/src/internal_macros.h
index ab9dd85..c34f571 100644
--- a/libcxx/utils/google-benchmark/src/internal_macros.h
+++ b/libcxx/utils/google-benchmark/src/internal_macros.h
@@ -1,36 +1,45 @@
 #ifndef BENCHMARK_INTERNAL_MACROS_H_
 #define BENCHMARK_INTERNAL_MACROS_H_
 
-#include "benchmark/macros.h"
+#include "benchmark/benchmark.h"
 
 #ifndef __has_feature
 #define __has_feature(x) 0
 #endif
+#ifndef __has_builtin
+#define __has_builtin(x) 0
+#endif
 
 #if defined(__clang__)
-#define COMPILER_CLANG
+  #if !defined(COMPILER_CLANG)
+    #define COMPILER_CLANG
+  #endif
 #elif defined(_MSC_VER)
-#define COMPILER_MSVC
+  #if !defined(COMPILER_MSVC)
+    #define COMPILER_MSVC
+  #endif
 #elif defined(__GNUC__)
-#define COMPILER_GCC
+  #if !defined(COMPILER_GCC)
+    #define COMPILER_GCC
+  #endif
 #endif
 
 #if __has_feature(cxx_attributes)
-#define BENCHMARK_NORETURN [[noreturn]]
+  #define BENCHMARK_NORETURN [[noreturn]]
 #elif defined(__GNUC__)
-#define BENCHMARK_NORETURN __attribute__((noreturn))
+  #define BENCHMARK_NORETURN __attribute__((noreturn))
 #elif defined(COMPILER_MSVC)
-#define BENCHMARK_NORETURN __declspec(noreturn)
+  #define BENCHMARK_NORETURN __declspec(noreturn)
 #else
-#define BENCHMARK_NORETURN
+  #define BENCHMARK_NORETURN
 #endif
 
 #if defined(__CYGWIN__)
-#define BENCHMARK_OS_CYGWIN 1
+  #define BENCHMARK_OS_CYGWIN 1
 #elif defined(_WIN32)
-#define BENCHMARK_OS_WINDOWS 1
+  #define BENCHMARK_OS_WINDOWS 1
 #elif defined(__APPLE__)
-#include "TargetConditionals.h"
+  #include "TargetConditionals.h"
   #if defined(TARGET_OS_MAC)
     #define BENCHMARK_OS_MACOSX 1
     #if defined(TARGET_OS_IPHONE)
@@ -38,18 +47,36 @@
     #endif
   #endif
 #elif defined(__FreeBSD__)
-#define BENCHMARK_OS_FREEBSD 1
+  #define BENCHMARK_OS_FREEBSD 1
+#elif defined(__NetBSD__)
+  #define BENCHMARK_OS_NETBSD 1
 #elif defined(__linux__)
-#define BENCHMARK_OS_LINUX 1
+  #define BENCHMARK_OS_LINUX 1
 #elif defined(__native_client__)
-#define BENCHMARK_OS_NACL 1
+  #define BENCHMARK_OS_NACL 1
 #elif defined(EMSCRIPTEN)
-#define BENCHMARK_OS_EMSCRIPTEN 1
+  #define BENCHMARK_OS_EMSCRIPTEN 1
+#elif defined(__rtems__)
+  #define BENCHMARK_OS_RTEMS 1
 #endif
 
 #if !__has_feature(cxx_exceptions) && !defined(__cpp_exceptions) \
      && !defined(__EXCEPTIONS)
-#define BENCHMARK_HAS_NO_EXCEPTIONS
+  #define BENCHMARK_HAS_NO_EXCEPTIONS
+#endif
+
+#if defined(COMPILER_CLANG) || defined(COMPILER_GCC)
+  #define BENCHMARK_MAYBE_UNUSED __attribute__((unused))
+#else
+  #define BENCHMARK_MAYBE_UNUSED
+#endif
+
+#if defined(COMPILER_GCC) || __has_builtin(__builtin_unreachable)
+  #define BENCHMARK_UNREACHABLE() __builtin_unreachable()
+#elif defined(COMPILER_MSVC)
+  #define BENCHMARK_UNREACHABLE() __assume(false)
+#else
+  #define BENCHMARK_UNREACHABLE() ((void)0)
 #endif
 
 #endif  // BENCHMARK_INTERNAL_MACROS_H_
diff --git a/libcxx/utils/google-benchmark/src/json_reporter.cc b/libcxx/utils/google-benchmark/src/json_reporter.cc
index 5a65308..b5ae302 100644
--- a/libcxx/utils/google-benchmark/src/json_reporter.cc
+++ b/libcxx/utils/google-benchmark/src/json_reporter.cc
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "benchmark/reporter.h"
+#include "benchmark/benchmark.h"
 #include "complexity.h"
 
 #include <algorithm>
@@ -21,6 +21,8 @@
 #include <string>
 #include <tuple>
 #include <vector>
+#include <iomanip> // for setprecision
+#include <limits>
 
 #include "string_util.h"
 #include "timers.h"
@@ -48,7 +50,14 @@
 }
 
 std::string FormatKV(std::string const& key, double value) {
-  return StringPrintF("\"%s\": %.2f", key.c_str(), value);
+  std::stringstream ss;
+  ss << '"' << key << "\": ";
+
+  const auto max_digits10 = std::numeric_limits<decltype (value)>::max_digits10;
+  const auto max_fractional_digits10 = max_digits10 - 1;
+
+  ss << std::scientific << std::setprecision(max_fractional_digits10) << value;
+  return ss.str();
 }
 
 int64_t RoundDouble(double v) { return static_cast<int64_t>(v + 0.5); }
@@ -68,13 +77,37 @@
   std::string walltime_value = LocalDateTimeString();
   out << indent << FormatKV("date", walltime_value) << ",\n";
 
-  out << indent << FormatKV("num_cpus", static_cast<int64_t>(context.num_cpus))
+  CPUInfo const& info = context.cpu_info;
+  out << indent << FormatKV("num_cpus", static_cast<int64_t>(info.num_cpus))
       << ",\n";
-  out << indent << FormatKV("mhz_per_cpu", RoundDouble(context.mhz_per_cpu))
+  out << indent
+      << FormatKV("mhz_per_cpu",
+                  RoundDouble(info.cycles_per_second / 1000000.0))
       << ",\n";
-  out << indent << FormatKV("cpu_scaling_enabled", context.cpu_scaling_enabled)
+  out << indent << FormatKV("cpu_scaling_enabled", info.scaling_enabled)
       << ",\n";
 
+  out << indent << "\"caches\": [\n";
+  indent = std::string(6, ' ');
+  std::string cache_indent(8, ' ');
+  for (size_t i = 0; i < info.caches.size(); ++i) {
+    auto& CI = info.caches[i];
+    out << indent << "{\n";
+    out << cache_indent << FormatKV("type", CI.type) << ",\n";
+    out << cache_indent << FormatKV("level", static_cast<int64_t>(CI.level))
+        << ",\n";
+    out << cache_indent
+        << FormatKV("size", static_cast<int64_t>(CI.size) * 1000u) << ",\n";
+    out << cache_indent
+        << FormatKV("num_sharing", static_cast<int64_t>(CI.num_sharing))
+        << "\n";
+    out << indent << "}";
+    if (i != info.caches.size() - 1) out << ",";
+    out << "\n";
+  }
+  indent = std::string(4, ' ');
+  out << indent << "],\n";
+
 #if defined(NDEBUG)
   const char build_type[] = "release";
 #else
@@ -125,18 +158,18 @@
   if (!run.report_big_o && !run.report_rms) {
     out << indent << FormatKV("iterations", run.iterations) << ",\n";
     out << indent
-        << FormatKV("real_time", RoundDouble(run.GetAdjustedRealTime()))
+        << FormatKV("real_time", run.GetAdjustedRealTime())
         << ",\n";
     out << indent
-        << FormatKV("cpu_time", RoundDouble(run.GetAdjustedCPUTime()));
+        << FormatKV("cpu_time", run.GetAdjustedCPUTime());
     out << ",\n"
         << indent << FormatKV("time_unit", GetTimeUnitString(run.time_unit));
   } else if (run.report_big_o) {
     out << indent
-        << FormatKV("cpu_coefficient", RoundDouble(run.GetAdjustedCPUTime()))
+        << FormatKV("cpu_coefficient", run.GetAdjustedCPUTime())
         << ",\n";
     out << indent
-        << FormatKV("real_coefficient", RoundDouble(run.GetAdjustedRealTime()))
+        << FormatKV("real_coefficient", run.GetAdjustedRealTime())
         << ",\n";
     out << indent << FormatKV("big_o", GetBigOString(run.complexity)) << ",\n";
     out << indent << FormatKV("time_unit", GetTimeUnitString(run.time_unit));
@@ -147,17 +180,17 @@
   if (run.bytes_per_second > 0.0) {
     out << ",\n"
         << indent
-        << FormatKV("bytes_per_second", RoundDouble(run.bytes_per_second));
+        << FormatKV("bytes_per_second", run.bytes_per_second);
   }
   if (run.items_per_second > 0.0) {
     out << ",\n"
         << indent
-        << FormatKV("items_per_second", RoundDouble(run.items_per_second));
+        << FormatKV("items_per_second", run.items_per_second);
   }
   for(auto &c : run.counters) {
     out << ",\n"
         << indent
-        << FormatKV(c.first, RoundDouble(c.second));
+        << FormatKV(c.first, c.second);
   }
   if (!run.report_label.empty()) {
     out << ",\n" << indent << FormatKV("label", run.report_label);
diff --git a/libcxx/utils/google-benchmark/src/log.h b/libcxx/utils/google-benchmark/src/log.h
index 978cb0b..d06e103 100644
--- a/libcxx/utils/google-benchmark/src/log.h
+++ b/libcxx/utils/google-benchmark/src/log.h
@@ -4,7 +4,7 @@
 #include <iostream>
 #include <ostream>
 
-#include "benchmark/macros.h"
+#include "benchmark/benchmark.h"
 
 namespace benchmark {
 namespace internal {
@@ -70,4 +70,4 @@
   (::benchmark::internal::GetLogInstanceForLevel(x) << "-- LOG(" << x << "):" \
                                                                          " ")
 
-#endif
\ No newline at end of file
+#endif
diff --git a/libcxx/utils/google-benchmark/src/reporter.cc b/libcxx/utils/google-benchmark/src/reporter.cc
index 6474242..5d2fa05 100644
--- a/libcxx/utils/google-benchmark/src/reporter.cc
+++ b/libcxx/utils/google-benchmark/src/reporter.cc
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "benchmark/reporter.h"
+#include "benchmark/benchmark.h"
 #include "timers.h"
 
 #include <cstdlib>
@@ -22,7 +22,6 @@
 #include <vector>
 
 #include "check.h"
-#include "stat.h"
 
 namespace benchmark {
 
@@ -31,17 +30,29 @@
 
 BenchmarkReporter::~BenchmarkReporter() {}
 
-void BenchmarkReporter::PrintBasicContext(std::ostream *out_ptr,
+void BenchmarkReporter::PrintBasicContext(std::ostream *out,
                                           Context const &context) {
-  CHECK(out_ptr) << "cannot be null";
-  auto &Out = *out_ptr;
-
-  Out << "Run on (" << context.num_cpus << " X " << context.mhz_per_cpu
-      << " MHz CPU " << ((context.num_cpus > 1) ? "s" : "") << ")\n";
+  CHECK(out) << "cannot be null";
+  auto &Out = *out;
 
   Out << LocalDateTimeString() << "\n";
 
-  if (context.cpu_scaling_enabled) {
+  const CPUInfo &info = context.cpu_info;
+  Out << "Run on (" << info.num_cpus << " X "
+      << (info.cycles_per_second / 1000000.0) << " MHz CPU "
+      << ((info.num_cpus > 1) ? "s" : "") << ")\n";
+  if (info.caches.size() != 0) {
+    Out << "CPU Caches:\n";
+    for (auto &CInfo : info.caches) {
+      Out << "  L" << CInfo.level << " " << CInfo.type << " "
+          << (CInfo.size / 1000) << "K";
+      if (CInfo.num_sharing != 0)
+        Out << " (x" << (info.num_cpus / CInfo.num_sharing) << ")";
+      Out << "\n";
+    }
+  }
+
+  if (info.scaling_enabled) {
     Out << "***WARNING*** CPU scaling is enabled, the benchmark "
            "real time measurements may be noisy and will incur extra "
            "overhead.\n";
@@ -53,6 +64,8 @@
 #endif
 }
 
+BenchmarkReporter::Context::Context() : cpu_info(CPUInfo::Get()) {}
+
 double BenchmarkReporter::Run::GetAdjustedRealTime() const {
   double new_time = real_accumulated_time * GetTimeUnitMultiplier(time_unit);
   if (iterations != 0) new_time /= static_cast<double>(iterations);
diff --git a/libcxx/utils/google-benchmark/src/stat.h b/libcxx/utils/google-benchmark/src/stat.h
deleted file mode 100644
index 136c3aa..0000000
--- a/libcxx/utils/google-benchmark/src/stat.h
+++ /dev/null
@@ -1,306 +0,0 @@
-#ifndef BENCHMARK_STAT_H_
-#define BENCHMARK_STAT_H_
-
-#include <cmath>
-#include <limits>
-#include <ostream>
-#include <type_traits>
-
-namespace benchmark {
-
-template <typename VType, typename NumType>
-class Stat1;
-
-template <typename VType, typename NumType>
-class Stat1MinMax;
-
-typedef Stat1<float, int64_t> Stat1_f;
-typedef Stat1<double, int64_t> Stat1_d;
-typedef Stat1MinMax<float, int64_t> Stat1MinMax_f;
-typedef Stat1MinMax<double, int64_t> Stat1MinMax_d;
-
-template <typename VType>
-class Vector2;
-template <typename VType>
-class Vector3;
-template <typename VType>
-class Vector4;
-
-template <typename VType, typename NumType>
-class Stat1 {
- public:
-  typedef Stat1<VType, NumType> Self;
-
-  Stat1() { Clear(); }
-  // Create a sample of value dat and weight 1
-  explicit Stat1(const VType &dat) {
-    sum_ = dat;
-    sum_squares_ = Sqr(dat);
-    numsamples_ = 1;
-  }
-  // Create statistics for all the samples between begin (included)
-  // and end(excluded)
-  explicit Stat1(const VType *begin, const VType *end) {
-    Clear();
-    for (const VType *item = begin; item < end; ++item) {
-      (*this) += Stat1(*item);
-    }
-  }
-  // Create a sample of value dat and weight w
-  Stat1(const VType &dat, const NumType &w) {
-    sum_ = w * dat;
-    sum_squares_ = w * Sqr(dat);
-    numsamples_ = w;
-  }
-  // Copy operator
-  Stat1(const Self &stat) {
-    sum_ = stat.sum_;
-    sum_squares_ = stat.sum_squares_;
-    numsamples_ = stat.numsamples_;
-  }
-
-  void Clear() {
-    numsamples_ = NumType();
-    sum_squares_ = sum_ = VType();
-  }
-
-  Self &operator=(const Self &stat) {
-    sum_ = stat.sum_;
-    sum_squares_ = stat.sum_squares_;
-    numsamples_ = stat.numsamples_;
-    return (*this);
-  }
-  // Merge statistics from two sample sets.
-  Self &operator+=(const Self &stat) {
-    sum_ += stat.sum_;
-    sum_squares_ += stat.sum_squares_;
-    numsamples_ += stat.numsamples_;
-    return (*this);
-  }
-  // The operation opposite to +=
-  Self &operator-=(const Self &stat) {
-    sum_ -= stat.sum_;
-    sum_squares_ -= stat.sum_squares_;
-    numsamples_ -= stat.numsamples_;
-    return (*this);
-  }
-  // Multiply the weight of the set of samples by a factor k
-  Self &operator*=(const VType &k) {
-    sum_ *= k;
-    sum_squares_ *= k;
-    numsamples_ *= k;
-    return (*this);
-  }
-
-  // Merge statistics from two sample sets.
-  Self operator+(const Self &stat) const { return Self(*this) += stat; }
-
-  // The operation opposite to +
-  Self operator-(const Self &stat) const { return Self(*this) -= stat; }
-
-  // Multiply the weight of the set of samples by a factor k
-  Self operator*(const VType &k) const { return Self(*this) *= k; }
-
-  // Return the total weight of this sample set
-  NumType numSamples() const { return numsamples_; }
-
-  // Return the sum of this sample set
-  VType Sum() const { return sum_; }
-
-  // Return the mean of this sample set
-  VType Mean() const {
-    if (numsamples_ == 0) return VType();
-    return sum_ * (1.0 / numsamples_);
-  }
-
-  // Return the mean of this sample set and compute the standard deviation at
-  // the same time.
-  VType Mean(VType *stddev) const {
-    if (numsamples_ == 0) return VType();
-    VType mean = sum_ * (1.0 / numsamples_);
-    if (stddev) {
-      VType avg_squares = sum_squares_ * (1.0 / numsamples_);
-      *stddev = Sqrt(avg_squares - Sqr(mean));
-    }
-    return mean;
-  }
-
-  // Return the standard deviation of the sample set
-  VType StdDev() const {
-    if (numsamples_ == 0) return VType();
-    VType mean = Mean();
-    VType avg_squares = sum_squares_ * (1.0 / numsamples_);
-    return Sqrt(avg_squares - Sqr(mean));
-  }
-
- private:
-  static_assert(std::is_integral<NumType>::value &&
-                    !std::is_same<NumType, bool>::value,
-                "NumType must be an integral type that is not bool.");
-  // Let i be the index of the samples provided (using +=)
-  // and weight[i],value[i] be the data of sample #i
-  // then the variables have the following meaning:
-  NumType numsamples_;  // sum of weight[i];
-  VType sum_;           // sum of weight[i]*value[i];
-  VType sum_squares_;   // sum of weight[i]*value[i]^2;
-
-  // Template function used to square a number.
-  // For a vector we square all components
-  template <typename SType>
-  static inline SType Sqr(const SType &dat) {
-    return dat * dat;
-  }
-
-  template <typename SType>
-  static inline Vector2<SType> Sqr(const Vector2<SType> &dat) {
-    return dat.MulComponents(dat);
-  }
-
-  template <typename SType>
-  static inline Vector3<SType> Sqr(const Vector3<SType> &dat) {
-    return dat.MulComponents(dat);
-  }
-
-  template <typename SType>
-  static inline Vector4<SType> Sqr(const Vector4<SType> &dat) {
-    return dat.MulComponents(dat);
-  }
-
-  // Template function used to take the square root of a number.
-  // For a vector we square all components
-  template <typename SType>
-  static inline SType Sqrt(const SType &dat) {
-    // Avoid NaN due to imprecision in the calculations
-    if (dat < 0) return 0;
-    return sqrt(dat);
-  }
-
-  template <typename SType>
-  static inline Vector2<SType> Sqrt(const Vector2<SType> &dat) {
-    // Avoid NaN due to imprecision in the calculations
-    return Max(dat, Vector2<SType>()).Sqrt();
-  }
-
-  template <typename SType>
-  static inline Vector3<SType> Sqrt(const Vector3<SType> &dat) {
-    // Avoid NaN due to imprecision in the calculations
-    return Max(dat, Vector3<SType>()).Sqrt();
-  }
-
-  template <typename SType>
-  static inline Vector4<SType> Sqrt(const Vector4<SType> &dat) {
-    // Avoid NaN due to imprecision in the calculations
-    return Max(dat, Vector4<SType>()).Sqrt();
-  }
-};
-
-// Useful printing function
-template <typename VType, typename NumType>
-std::ostream &operator<<(std::ostream &out, const Stat1<VType, NumType> &s) {
-  out << "{ avg = " << s.Mean() << " std = " << s.StdDev()
-      << " nsamples = " << s.NumSamples() << "}";
-  return out;
-}
-
-// Stat1MinMax: same as Stat1, but it also
-// keeps the Min and Max values; the "-"
-// operator is disabled because it cannot be implemented
-// efficiently
-template <typename VType, typename NumType>
-class Stat1MinMax : public Stat1<VType, NumType> {
- public:
-  typedef Stat1MinMax<VType, NumType> Self;
-
-  Stat1MinMax() { Clear(); }
-  // Create a sample of value dat and weight 1
-  explicit Stat1MinMax(const VType &dat) : Stat1<VType, NumType>(dat) {
-    max_ = dat;
-    min_ = dat;
-  }
-  // Create statistics for all the samples between begin (included)
-  // and end(excluded)
-  explicit Stat1MinMax(const VType *begin, const VType *end) {
-    Clear();
-    for (const VType *item = begin; item < end; ++item) {
-      (*this) += Stat1MinMax(*item);
-    }
-  }
-  // Create a sample of value dat and weight w
-  Stat1MinMax(const VType &dat, const NumType &w)
-      : Stat1<VType, NumType>(dat, w) {
-    max_ = dat;
-    min_ = dat;
-  }
-  // Copy operator
-  Stat1MinMax(const Self &stat) : Stat1<VType, NumType>(stat) {
-    max_ = stat.max_;
-    min_ = stat.min_;
-  }
-
-  void Clear() {
-    Stat1<VType, NumType>::Clear();
-    if (std::numeric_limits<VType>::has_infinity) {
-      min_ = std::numeric_limits<VType>::infinity();
-      max_ = -std::numeric_limits<VType>::infinity();
-    } else {
-      min_ = std::numeric_limits<VType>::max();
-      max_ = std::numeric_limits<VType>::min();
-    }
-  }
-
-  Self &operator=(const Self &stat) {
-    this->Stat1<VType, NumType>::operator=(stat);
-    max_ = stat.max_;
-    min_ = stat.min_;
-    return (*this);
-  }
-  // Merge statistics from two sample sets.
-  Self &operator+=(const Self &stat) {
-    this->Stat1<VType, NumType>::operator+=(stat);
-    if (stat.max_ > max_) max_ = stat.max_;
-    if (stat.min_ < min_) min_ = stat.min_;
-    return (*this);
-  }
-  // Multiply the weight of the set of samples by a factor k
-  Self &operator*=(const VType &stat) {
-    this->Stat1<VType, NumType>::operator*=(stat);
-    return (*this);
-  }
-  // Merge statistics from two sample sets.
-  Self operator+(const Self &stat) const { return Self(*this) += stat; }
-  // Multiply the weight of the set of samples by a factor k
-  Self operator*(const VType &k) const { return Self(*this) *= k; }
-
-  // Return the maximal value in this sample set
-  VType Max() const { return max_; }
-  // Return the minimal value in this sample set
-  VType Min() const { return min_; }
-
- private:
-  // The - operation makes no sense with Min/Max
-  // unless we keep the full list of values (but we don't)
-  // make it private, and let it undefined so nobody can call it
-  Self &operator-=(const Self &stat);  // senseless. let it undefined.
-
-  // The operation opposite to -
-  Self operator-(const Self &stat) const;  // senseless. let it undefined.
-
-  // Let i be the index of the samples provided (using +=)
-  // and weight[i],value[i] be the data of sample #i
-  // then the variables have the following meaning:
-  VType max_;  // max of value[i]
-  VType min_;  // min of value[i]
-};
-
-// Useful printing function
-template <typename VType, typename NumType>
-std::ostream &operator<<(std::ostream &out,
-                         const Stat1MinMax<VType, NumType> &s) {
-  out << "{ avg = " << s.Mean() << " std = " << s.StdDev()
-      << " nsamples = " << s.NumSamples() << " min = " << s.Min()
-      << " max = " << s.Max() << "}";
-  return out;
-}
-}  // end namespace benchmark
-
-#endif  // BENCHMARK_STAT_H_
diff --git a/libcxx/utils/google-benchmark/src/statistics.cc b/libcxx/utils/google-benchmark/src/statistics.cc
new file mode 100644
index 0000000..5932ad4
--- /dev/null
+++ b/libcxx/utils/google-benchmark/src/statistics.cc
@@ -0,0 +1,175 @@
+// Copyright 2016 Ismael Jimenez Martinez. All rights reserved.
+// Copyright 2017 Roman Lebedev. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "benchmark/benchmark.h"
+
+#include <algorithm>
+#include <cmath>
+#include <string>
+#include <vector>
+#include <numeric>
+#include "check.h"
+#include "statistics.h"
+
+namespace benchmark {
+
+auto StatisticsSum = [](const std::vector<double>& v) {
+  return std::accumulate(v.begin(), v.end(), 0.0);
+};
+
+double StatisticsMean(const std::vector<double>& v) {
+  if (v.size() == 0) return 0.0;
+  return StatisticsSum(v) * (1.0 / v.size());
+}
+
+double StatisticsMedian(const std::vector<double>& v) {
+  if (v.size() < 3) return StatisticsMean(v);
+  std::vector<double> partial;
+  // we need roundDown(count/2)+1 slots
+  partial.resize(1 + (v.size() / 2));
+  std::partial_sort_copy(v.begin(), v.end(), partial.begin(), partial.end());
+  // did we have odd number of samples?
+  // if yes, then the last element of partially-sorted vector is the median
+  // it no, then the average of the last two elements is the median
+  if(v.size() % 2 == 1)
+    return partial.back();
+  return (partial[partial.size() - 2] + partial[partial.size() - 1]) / 2.0;
+}
+
+// Return the sum of the squares of this sample set
+auto SumSquares = [](const std::vector<double>& v) {
+  return std::inner_product(v.begin(), v.end(), v.begin(), 0.0);
+};
+
+auto Sqr = [](const double dat) { return dat * dat; };
+auto Sqrt = [](const double dat) {
+  // Avoid NaN due to imprecision in the calculations
+  if (dat < 0.0) return 0.0;
+  return std::sqrt(dat);
+};
+
+double StatisticsStdDev(const std::vector<double>& v) {
+  const auto mean = StatisticsMean(v);
+  if (v.size() == 0) return mean;
+
+  // Sample standard deviation is undefined for n = 1
+  if (v.size() == 1)
+    return 0.0;
+
+  const double avg_squares = SumSquares(v) * (1.0 / v.size());
+  return Sqrt(v.size() / (v.size() - 1.0) * (avg_squares - Sqr(mean)));
+}
+
+std::vector<BenchmarkReporter::Run> ComputeStats(
+    const std::vector<BenchmarkReporter::Run>& reports) {
+  typedef BenchmarkReporter::Run Run;
+  std::vector<Run> results;
+
+  auto error_count =
+      std::count_if(reports.begin(), reports.end(),
+                    [](Run const& run) { return run.error_occurred; });
+
+  if (reports.size() - error_count < 2) {
+    // We don't report aggregated data if there was a single run.
+    return results;
+  }
+
+  // Accumulators.
+  std::vector<double> real_accumulated_time_stat;
+  std::vector<double> cpu_accumulated_time_stat;
+  std::vector<double> bytes_per_second_stat;
+  std::vector<double> items_per_second_stat;
+
+  real_accumulated_time_stat.reserve(reports.size());
+  cpu_accumulated_time_stat.reserve(reports.size());
+  bytes_per_second_stat.reserve(reports.size());
+  items_per_second_stat.reserve(reports.size());
+
+  // All repetitions should be run with the same number of iterations so we
+  // can take this information from the first benchmark.
+  int64_t const run_iterations = reports.front().iterations;
+  // create stats for user counters
+  struct CounterStat {
+    Counter c;
+    std::vector<double> s;
+  };
+  std::map< std::string, CounterStat > counter_stats;
+  for(Run const& r : reports) {
+    for(auto const& cnt : r.counters) {
+      auto it = counter_stats.find(cnt.first);
+      if(it == counter_stats.end()) {
+        counter_stats.insert({cnt.first, {cnt.second, std::vector<double>{}}});
+        it = counter_stats.find(cnt.first);
+        it->second.s.reserve(reports.size());
+      } else {
+        CHECK_EQ(counter_stats[cnt.first].c.flags, cnt.second.flags);
+      }
+    }
+  }
+
+  // Populate the accumulators.
+  for (Run const& run : reports) {
+    CHECK_EQ(reports[0].benchmark_name, run.benchmark_name);
+    CHECK_EQ(run_iterations, run.iterations);
+    if (run.error_occurred) continue;
+    real_accumulated_time_stat.emplace_back(run.real_accumulated_time);
+    cpu_accumulated_time_stat.emplace_back(run.cpu_accumulated_time);
+    items_per_second_stat.emplace_back(run.items_per_second);
+    bytes_per_second_stat.emplace_back(run.bytes_per_second);
+    // user counters
+    for(auto const& cnt : run.counters) {
+      auto it = counter_stats.find(cnt.first);
+      CHECK_NE(it, counter_stats.end());
+      it->second.s.emplace_back(cnt.second);
+    }
+  }
+
+  // Only add label if it is same for all runs
+  std::string report_label = reports[0].report_label;
+  for (std::size_t i = 1; i < reports.size(); i++) {
+    if (reports[i].report_label != report_label) {
+      report_label = "";
+      break;
+    }
+  }
+
+  for(const auto& Stat : *reports[0].statistics) {
+    // Get the data from the accumulator to BenchmarkReporter::Run's.
+    Run data;
+    data.benchmark_name = reports[0].benchmark_name + "_" + Stat.name_;
+    data.report_label = report_label;
+    data.iterations = run_iterations;
+
+    data.real_accumulated_time = Stat.compute_(real_accumulated_time_stat);
+    data.cpu_accumulated_time = Stat.compute_(cpu_accumulated_time_stat);
+    data.bytes_per_second = Stat.compute_(bytes_per_second_stat);
+    data.items_per_second = Stat.compute_(items_per_second_stat);
+
+    data.time_unit = reports[0].time_unit;
+
+    // user counters
+    for(auto const& kv : counter_stats) {
+      const auto uc_stat = Stat.compute_(kv.second.s);
+      auto c = Counter(uc_stat, counter_stats[kv.first].c.flags);
+      data.counters[kv.first] = c;
+    }
+
+    results.push_back(data);
+  }
+
+  return results;
+}
+
+}  // end namespace benchmark
diff --git a/libcxx/utils/google-benchmark/src/statistics.h b/libcxx/utils/google-benchmark/src/statistics.h
new file mode 100644
index 0000000..7eccc85
--- /dev/null
+++ b/libcxx/utils/google-benchmark/src/statistics.h
@@ -0,0 +1,37 @@
+// Copyright 2016 Ismael Jimenez Martinez. All rights reserved.
+// Copyright 2017 Roman Lebedev. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef STATISTICS_H_
+#define STATISTICS_H_
+
+#include <vector>
+
+#include "benchmark/benchmark.h"
+
+namespace benchmark {
+
+// Return a vector containing the mean, median and standard devation information
+// (and any user-specified info) for the specified list of reports. If 'reports'
+// contains less than two non-errored runs an empty vector is returned
+std::vector<BenchmarkReporter::Run> ComputeStats(
+    const std::vector<BenchmarkReporter::Run>& reports);
+
+double StatisticsMean(const std::vector<double>& v);
+double StatisticsMedian(const std::vector<double>& v);
+double StatisticsStdDev(const std::vector<double>& v);
+
+}  // end namespace benchmark
+
+#endif  // STATISTICS_H_
diff --git a/libcxx/utils/google-benchmark/src/string_util.cc b/libcxx/utils/google-benchmark/src/string_util.cc
index cd4e7cf..29edb2a 100644
--- a/libcxx/utils/google-benchmark/src/string_util.cc
+++ b/libcxx/utils/google-benchmark/src/string_util.cc
@@ -27,8 +27,6 @@
 
 static const int64_t kUnitsSize = arraysize(kBigSIUnits);
 
-}  // end anonymous namespace
-
 void ToExponentAndMantissa(double val, double thresh, int precision,
                            double one_k, std::string* mantissa,
                            int64_t* exponent) {
@@ -100,14 +98,16 @@
 }
 
 std::string ToBinaryStringFullySpecified(double value, double threshold,
-                                         int precision) {
+                                         int precision, double one_k = 1024.0) {
   std::string mantissa;
   int64_t exponent;
-  ToExponentAndMantissa(value, threshold, precision, 1024.0, &mantissa,
+  ToExponentAndMantissa(value, threshold, precision, one_k, &mantissa,
                         &exponent);
   return mantissa + ExponentToPrefix(exponent, false);
 }
 
+}  // end namespace
+
 void AppendHumanReadable(int n, std::string* str) {
   std::stringstream ss;
   // Round down to the nearest SI prefix.
@@ -115,11 +115,11 @@
   *str += ss.str();
 }
 
-std::string HumanReadableNumber(double n) {
+std::string HumanReadableNumber(double n, double one_k) {
   // 1.1 means that figures up to 1.1k should be shown with the next unit down;
   // this softens edge effects.
   // 1 means that we should show one decimal place of precision.
-  return ToBinaryStringFullySpecified(n, 1.1, 1);
+  return ToBinaryStringFullySpecified(n, 1.1, 1, one_k);
 }
 
 std::string StringPrintFImp(const char* msg, va_list args) {
diff --git a/libcxx/utils/google-benchmark/src/string_util.h b/libcxx/utils/google-benchmark/src/string_util.h
index 0b190b9..c3d53bf 100644
--- a/libcxx/utils/google-benchmark/src/string_util.h
+++ b/libcxx/utils/google-benchmark/src/string_util.h
@@ -10,7 +10,7 @@
 
 void AppendHumanReadable(int n, std::string* str);
 
-std::string HumanReadableNumber(double n);
+std::string HumanReadableNumber(double n, double one_k = 1024.0);
 
 std::string StringPrintF(const char* format, ...);
 
diff --git a/libcxx/utils/google-benchmark/src/sysinfo.cc b/libcxx/utils/google-benchmark/src/sysinfo.cc
index 7feb79e..2520ad5 100644
--- a/libcxx/utils/google-benchmark/src/sysinfo.cc
+++ b/libcxx/utils/google-benchmark/src/sysinfo.cc
@@ -12,7 +12,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "sysinfo.h"
 #include "internal_macros.h"
 
 #ifdef BENCHMARK_OS_WINDOWS
@@ -25,21 +24,29 @@
 #include <sys/time.h>
 #include <sys/types.h>  // this header must be included before 'sys/sysctl.h' to avoid compilation error on FreeBSD
 #include <unistd.h>
-#if defined BENCHMARK_OS_FREEBSD || defined BENCHMARK_OS_MACOSX
+#if defined BENCHMARK_OS_FREEBSD || defined BENCHMARK_OS_MACOSX || \
+    defined BENCHMARK_OS_NETBSD
+#define BENCHMARK_HAS_SYSCTL
 #include <sys/sysctl.h>
 #endif
 #endif
 
+#include <algorithm>
+#include <array>
+#include <bitset>
 #include <cerrno>
+#include <climits>
 #include <cstdint>
 #include <cstdio>
 #include <cstdlib>
 #include <cstring>
+#include <fstream>
 #include <iostream>
+#include <iterator>
 #include <limits>
-#include <mutex>
+#include <memory>
+#include <sstream>
 
-#include "arraysize.h"
 #include "check.h"
 #include "cycleclock.h"
 #include "internal_macros.h"
@@ -49,214 +56,431 @@
 
 namespace benchmark {
 namespace {
-std::once_flag cpuinfo_init;
-double cpuinfo_cycles_per_second = 1.0;
-int cpuinfo_num_cpus = 1;  // Conservative guess
 
-#if !defined BENCHMARK_OS_MACOSX
-const int64_t estimate_time_ms = 1000;
+void PrintImp(std::ostream& out) { out << std::endl; }
 
-// Helper function estimates cycles/sec by observing cycles elapsed during
-// sleep(). Using small sleep time decreases accuracy significantly.
-int64_t EstimateCyclesPerSecond() {
-  const int64_t start_ticks = cycleclock::Now();
-  SleepForMilliseconds(estimate_time_ms);
-  return cycleclock::Now() - start_ticks;
+template <class First, class... Rest>
+void PrintImp(std::ostream& out, First&& f, Rest&&... rest) {
+  out << std::forward<First>(f);
+  PrintImp(out, std::forward<Rest>(rest)...);
+}
+
+template <class... Args>
+BENCHMARK_NORETURN void PrintErrorAndDie(Args&&... args) {
+  PrintImp(std::cerr, std::forward<Args>(args)...);
+  std::exit(EXIT_FAILURE);
+}
+
+#ifdef BENCHMARK_HAS_SYSCTL
+
+/// ValueUnion - A type used to correctly alias the byte-for-byte output of
+/// `sysctl` with the result type it's to be interpreted as.
+struct ValueUnion {
+  union DataT {
+    uint32_t uint32_value;
+    uint64_t uint64_value;
+    // For correct aliasing of union members from bytes.
+    char bytes[8];
+  };
+  using DataPtr = std::unique_ptr<DataT, decltype(&std::free)>;
+
+  // The size of the data union member + its trailing array size.
+  size_t Size;
+  DataPtr Buff;
+
+ public:
+  ValueUnion() : Size(0), Buff(nullptr, &std::free) {}
+
+  explicit ValueUnion(size_t BuffSize)
+      : Size(sizeof(DataT) + BuffSize),
+        Buff(::new (std::malloc(Size)) DataT(), &std::free) {}
+
+  ValueUnion(ValueUnion&& other) = default;
+
+  explicit operator bool() const { return bool(Buff); }
+
+  char* data() const { return Buff->bytes; }
+
+  std::string GetAsString() const { return std::string(data()); }
+
+  int64_t GetAsInteger() const {
+    if (Size == sizeof(Buff->uint32_value))
+      return static_cast<int32_t>(Buff->uint32_value);
+    else if (Size == sizeof(Buff->uint64_value))
+      return static_cast<int64_t>(Buff->uint64_value);
+    BENCHMARK_UNREACHABLE();
+  }
+
+  uint64_t GetAsUnsigned() const {
+    if (Size == sizeof(Buff->uint32_value))
+      return Buff->uint32_value;
+    else if (Size == sizeof(Buff->uint64_value))
+      return Buff->uint64_value;
+    BENCHMARK_UNREACHABLE();
+  }
+
+  template <class T, int N>
+  std::array<T, N> GetAsArray() {
+    const int ArrSize = sizeof(T) * N;
+    CHECK_LE(ArrSize, Size);
+    std::array<T, N> Arr;
+    std::memcpy(Arr.data(), data(), ArrSize);
+    return Arr;
+  }
+};
+
+ValueUnion GetSysctlImp(std::string const& Name) {
+  size_t CurBuffSize = 0;
+  if (sysctlbyname(Name.c_str(), nullptr, &CurBuffSize, nullptr, 0) == -1)
+    return ValueUnion();
+
+  ValueUnion buff(CurBuffSize);
+  if (sysctlbyname(Name.c_str(), buff.data(), &buff.Size, nullptr, 0) == 0)
+    return buff;
+  return ValueUnion();
+}
+
+BENCHMARK_MAYBE_UNUSED
+bool GetSysctl(std::string const& Name, std::string* Out) {
+  Out->clear();
+  auto Buff = GetSysctlImp(Name);
+  if (!Buff) return false;
+  Out->assign(Buff.data());
+  return true;
+}
+
+template <class Tp,
+          class = typename std::enable_if<std::is_integral<Tp>::value>::type>
+bool GetSysctl(std::string const& Name, Tp* Out) {
+  *Out = 0;
+  auto Buff = GetSysctlImp(Name);
+  if (!Buff) return false;
+  *Out = static_cast<Tp>(Buff.GetAsUnsigned());
+  return true;
+}
+
+template <class Tp, size_t N>
+bool GetSysctl(std::string const& Name, std::array<Tp, N>* Out) {
+  auto Buff = GetSysctlImp(Name);
+  if (!Buff) return false;
+  *Out = Buff.GetAsArray<Tp, N>();
+  return true;
 }
 #endif
 
-#if defined BENCHMARK_OS_LINUX || defined BENCHMARK_OS_CYGWIN
-// Helper function for reading an int from a file. Returns true if successful
-// and the memory location pointed to by value is set to the value read.
-bool ReadIntFromFile(const char* file, long* value) {
-  bool ret = false;
-  int fd = open(file, O_RDONLY);
-  if (fd != -1) {
-    char line[1024];
-    char* err;
-    memset(line, '\0', sizeof(line));
-    ssize_t read_err = read(fd, line, sizeof(line) - 1);
-    ((void)read_err); // prevent unused warning
-    CHECK(read_err >= 0);
-    const long temp_value = strtol(line, &err, 10);
-    if (line[0] != '\0' && (*err == '\n' || *err == '\0')) {
-      *value = temp_value;
-      ret = true;
+template <class ArgT>
+bool ReadFromFile(std::string const& fname, ArgT* arg) {
+  *arg = ArgT();
+  std::ifstream f(fname.c_str());
+  if (!f.is_open()) return false;
+  f >> *arg;
+  return f.good();
+}
+
+bool CpuScalingEnabled(int num_cpus) {
+  // We don't have a valid CPU count, so don't even bother.
+  if (num_cpus <= 0) return false;
+#ifndef BENCHMARK_OS_WINDOWS
+  // On Linux, the CPUfreq subsystem exposes CPU information as files on the
+  // local file system. If reading the exported files fails, then we may not be
+  // running on Linux, so we silently ignore all the read errors.
+  std::string res;
+  for (int cpu = 0; cpu < num_cpus; ++cpu) {
+    std::string governor_file =
+        StrCat("/sys/devices/system/cpu/cpu", cpu, "/cpufreq/scaling_governor");
+    if (ReadFromFile(governor_file, &res) && res != "performance") return true;
+  }
+#endif
+  return false;
+}
+
+int CountSetBitsInCPUMap(std::string Val) {
+  auto CountBits = [](std::string Part) {
+    using CPUMask = std::bitset<sizeof(std::uintptr_t) * CHAR_BIT>;
+    Part = "0x" + Part;
+    CPUMask Mask(std::stoul(Part, nullptr, 16));
+    return static_cast<int>(Mask.count());
+  };
+  size_t Pos;
+  int total = 0;
+  while ((Pos = Val.find(',')) != std::string::npos) {
+    total += CountBits(Val.substr(0, Pos));
+    Val = Val.substr(Pos + 1);
+  }
+  if (!Val.empty()) {
+    total += CountBits(Val);
+  }
+  return total;
+}
+
+BENCHMARK_MAYBE_UNUSED
+std::vector<CPUInfo::CacheInfo> GetCacheSizesFromKVFS() {
+  std::vector<CPUInfo::CacheInfo> res;
+  std::string dir = "/sys/devices/system/cpu/cpu0/cache/";
+  int Idx = 0;
+  while (true) {
+    CPUInfo::CacheInfo info;
+    std::string FPath = StrCat(dir, "index", Idx++, "/");
+    std::ifstream f(StrCat(FPath, "size").c_str());
+    if (!f.is_open()) break;
+    std::string suffix;
+    f >> info.size;
+    if (f.fail())
+      PrintErrorAndDie("Failed while reading file '", FPath, "size'");
+    if (f.good()) {
+      f >> suffix;
+      if (f.bad())
+        PrintErrorAndDie(
+            "Invalid cache size format: failed to read size suffix");
+      else if (f && suffix != "K")
+        PrintErrorAndDie("Invalid cache size format: Expected bytes ", suffix);
+      else if (suffix == "K")
+        info.size *= 1000;
     }
-    close(fd);
+    if (!ReadFromFile(StrCat(FPath, "type"), &info.type))
+      PrintErrorAndDie("Failed to read from file ", FPath, "type");
+    if (!ReadFromFile(StrCat(FPath, "level"), &info.level))
+      PrintErrorAndDie("Failed to read from file ", FPath, "level");
+    std::string map_str;
+    if (!ReadFromFile(StrCat(FPath, "shared_cpu_map"), &map_str))
+      PrintErrorAndDie("Failed to read from file ", FPath, "shared_cpu_map");
+    info.num_sharing = CountSetBitsInCPUMap(map_str);
+    res.push_back(info);
   }
-  return ret;
+
+  return res;
+}
+
+#ifdef BENCHMARK_OS_MACOSX
+std::vector<CPUInfo::CacheInfo> GetCacheSizesMacOSX() {
+  std::vector<CPUInfo::CacheInfo> res;
+  std::array<uint64_t, 4> CacheCounts{{0, 0, 0, 0}};
+  GetSysctl("hw.cacheconfig", &CacheCounts);
+
+  struct {
+    std::string name;
+    std::string type;
+    int level;
+    size_t num_sharing;
+  } Cases[] = {{"hw.l1dcachesize", "Data", 1, CacheCounts[1]},
+               {"hw.l1icachesize", "Instruction", 1, CacheCounts[1]},
+               {"hw.l2cachesize", "Unified", 2, CacheCounts[2]},
+               {"hw.l3cachesize", "Unified", 3, CacheCounts[3]}};
+  for (auto& C : Cases) {
+    int val;
+    if (!GetSysctl(C.name, &val)) continue;
+    CPUInfo::CacheInfo info;
+    info.type = C.type;
+    info.level = C.level;
+    info.size = val;
+    info.num_sharing = static_cast<int>(C.num_sharing);
+    res.push_back(std::move(info));
+  }
+  return res;
+}
+#elif defined(BENCHMARK_OS_WINDOWS)
+std::vector<CPUInfo::CacheInfo> GetCacheSizesWindows() {
+  std::vector<CPUInfo::CacheInfo> res;
+  DWORD buffer_size = 0;
+  using PInfo = SYSTEM_LOGICAL_PROCESSOR_INFORMATION;
+  using CInfo = CACHE_DESCRIPTOR;
+
+  using UPtr = std::unique_ptr<PInfo, decltype(&std::free)>;
+  GetLogicalProcessorInformation(nullptr, &buffer_size);
+  UPtr buff((PInfo*)malloc(buffer_size), &std::free);
+  if (!GetLogicalProcessorInformation(buff.get(), &buffer_size))
+    PrintErrorAndDie("Failed during call to GetLogicalProcessorInformation: ",
+                     GetLastError());
+
+  PInfo* it = buff.get();
+  PInfo* end = buff.get() + (buffer_size / sizeof(PInfo));
+
+  for (; it != end; ++it) {
+    if (it->Relationship != RelationCache) continue;
+    using BitSet = std::bitset<sizeof(ULONG_PTR) * CHAR_BIT>;
+    BitSet B(it->ProcessorMask);
+    // To prevent duplicates, only consider caches where CPU 0 is specified
+    if (!B.test(0)) continue;
+    CInfo* Cache = &it->Cache;
+    CPUInfo::CacheInfo C;
+    C.num_sharing = B.count();
+    C.level = Cache->Level;
+    C.size = Cache->Size;
+    switch (Cache->Type) {
+      case CacheUnified:
+        C.type = "Unified";
+        break;
+      case CacheInstruction:
+        C.type = "Instruction";
+        break;
+      case CacheData:
+        C.type = "Data";
+        break;
+      case CacheTrace:
+        C.type = "Trace";
+        break;
+      default:
+        C.type = "Unknown";
+        break;
+    }
+    res.push_back(C);
+  }
+  return res;
 }
 #endif
 
-#if defined BENCHMARK_OS_LINUX || defined BENCHMARK_OS_CYGWIN
-static std::string convertToLowerCase(std::string s) {
-  for (auto& ch : s)
-    ch = std::tolower(ch);
-  return s;
-}
-static bool startsWithKey(std::string Value, std::string Key,
-                          bool IgnoreCase = true) {
-  if (IgnoreCase) {
-    Key = convertToLowerCase(std::move(Key));
-    Value = convertToLowerCase(std::move(Value));
-  }
-  return Value.compare(0, Key.size(), Key) == 0;
-}
+std::vector<CPUInfo::CacheInfo> GetCacheSizes() {
+#ifdef BENCHMARK_OS_MACOSX
+  return GetCacheSizesMacOSX();
+#elif defined(BENCHMARK_OS_WINDOWS)
+  return GetCacheSizesWindows();
+#else
+  return GetCacheSizesFromKVFS();
 #endif
+}
 
-void InitializeSystemInfo() {
+int GetNumCPUs() {
+#ifdef BENCHMARK_HAS_SYSCTL
+  int NumCPU = -1;
+  if (GetSysctl("hw.ncpu", &NumCPU)) return NumCPU;
+  fprintf(stderr, "Err: %s\n", strerror(errno));
+  std::exit(EXIT_FAILURE);
+#elif defined(BENCHMARK_OS_WINDOWS)
+  SYSTEM_INFO sysinfo;
+  // Use memset as opposed to = {} to avoid GCC missing initializer false
+  // positives.
+  std::memset(&sysinfo, 0, sizeof(SYSTEM_INFO));
+  GetSystemInfo(&sysinfo);
+  return sysinfo.dwNumberOfProcessors;  // number of logical
+                                        // processors in the current
+                                        // group
+#else
+  int NumCPUs = 0;
+  int MaxID = -1;
+  std::ifstream f("/proc/cpuinfo");
+  if (!f.is_open()) {
+    std::cerr << "failed to open /proc/cpuinfo\n";
+    return -1;
+  }
+  const std::string Key = "processor";
+  std::string ln;
+  while (std::getline(f, ln)) {
+    if (ln.empty()) continue;
+    size_t SplitIdx = ln.find(':');
+    std::string value;
+    if (SplitIdx != std::string::npos) value = ln.substr(SplitIdx + 1);
+    if (ln.size() >= Key.size() && ln.compare(0, Key.size(), Key) == 0) {
+      NumCPUs++;
+      if (!value.empty()) {
+        int CurID = std::stoi(value);
+        MaxID = std::max(CurID, MaxID);
+      }
+    }
+  }
+  if (f.bad()) {
+    std::cerr << "Failure reading /proc/cpuinfo\n";
+    return -1;
+  }
+  if (!f.eof()) {
+    std::cerr << "Failed to read to end of /proc/cpuinfo\n";
+    return -1;
+  }
+  f.close();
+
+  if ((MaxID + 1) != NumCPUs) {
+    fprintf(stderr,
+            "CPU ID assignments in /proc/cpuinfo seem messed up."
+            " This is usually caused by a bad BIOS.\n");
+  }
+  return NumCPUs;
+#endif
+  BENCHMARK_UNREACHABLE();
+}
+
+double GetCPUCyclesPerSecond() {
 #if defined BENCHMARK_OS_LINUX || defined BENCHMARK_OS_CYGWIN
-  char line[1024];
-  char* err;
   long freq;
 
-  bool saw_mhz = false;
-
   // If the kernel is exporting the tsc frequency use that. There are issues
   // where cpuinfo_max_freq cannot be relied on because the BIOS may be
   // exporintg an invalid p-state (on x86) or p-states may be used to put the
   // processor in a new mode (turbo mode). Essentially, those frequencies
   // cannot always be relied upon. The same reasons apply to /proc/cpuinfo as
   // well.
-  if (!saw_mhz &&
-      ReadIntFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz", &freq)) {
+  if (ReadFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz", &freq)
+      // If CPU scaling is in effect, we want to use the *maximum* frequency,
+      // not whatever CPU speed some random processor happens to be using now.
+      || ReadFromFile("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq",
+                      &freq)) {
     // The value is in kHz (as the file name suggests).  For example, on a
     // 2GHz warpstation, the file contains the value "2000000".
-    cpuinfo_cycles_per_second = freq * 1000.0;
-    saw_mhz = true;
+    return freq * 1000.0;
   }
 
-  // If CPU scaling is in effect, we want to use the *maximum* frequency,
-  // not whatever CPU speed some random processor happens to be using now.
-  if (!saw_mhz &&
-      ReadIntFromFile("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq",
-                      &freq)) {
-    // The value is in kHz.  For example, on a 2GHz warpstation, the file
-    // contains the value "2000000".
-    cpuinfo_cycles_per_second = freq * 1000.0;
-    saw_mhz = true;
+  const double error_value = -1;
+  double bogo_clock = error_value;
+
+  std::ifstream f("/proc/cpuinfo");
+  if (!f.is_open()) {
+    std::cerr << "failed to open /proc/cpuinfo\n";
+    return error_value;
   }
 
-  // Read /proc/cpuinfo for other values, and if there is no cpuinfo_max_freq.
-  const char* pname = "/proc/cpuinfo";
-  int fd = open(pname, O_RDONLY);
-  if (fd == -1) {
-    perror(pname);
-    if (!saw_mhz) {
-      cpuinfo_cycles_per_second =
-          static_cast<double>(EstimateCyclesPerSecond());
-    }
-    return;
-  }
+  auto startsWithKey = [](std::string const& Value, std::string const& Key) {
+    if (Key.size() > Value.size()) return false;
+    auto Cmp = [&](char X, char Y) {
+      return std::tolower(X) == std::tolower(Y);
+    };
+    return std::equal(Key.begin(), Key.end(), Value.begin(), Cmp);
+  };
 
-  double bogo_clock = 1.0;
-  bool saw_bogo = false;
-  long max_cpu_id = 0;
-  int num_cpus = 0;
-  line[0] = line[1] = '\0';
-  size_t chars_read = 0;
-  do {  // we'll exit when the last read didn't read anything
-    // Move the next line to the beginning of the buffer
-    const size_t oldlinelen = strlen(line);
-    if (sizeof(line) == oldlinelen + 1)  // oldlinelen took up entire line
-      line[0] = '\0';
-    else  // still other lines left to save
-      memmove(line, line + oldlinelen + 1, sizeof(line) - (oldlinelen + 1));
-    // Terminate the new line, reading more if we can't find the newline
-    char* newline = strchr(line, '\n');
-    if (newline == nullptr) {
-      const size_t linelen = strlen(line);
-      const size_t bytes_to_read = sizeof(line) - 1 - linelen;
-      CHECK(bytes_to_read > 0);  // because the memmove recovered >=1 bytes
-      chars_read = read(fd, line + linelen, bytes_to_read);
-      line[linelen + chars_read] = '\0';
-      newline = strchr(line, '\n');
-    }
-    if (newline != nullptr) *newline = '\0';
-
+  std::string ln;
+  while (std::getline(f, ln)) {
+    if (ln.empty()) continue;
+    size_t SplitIdx = ln.find(':');
+    std::string value;
+    if (SplitIdx != std::string::npos) value = ln.substr(SplitIdx + 1);
     // When parsing the "cpu MHz" and "bogomips" (fallback) entries, we only
     // accept postive values. Some environments (virtual machines) report zero,
     // which would cause infinite looping in WallTime_Init.
-    if (!saw_mhz && startsWithKey(line, "cpu MHz")) {
-      const char* freqstr = strchr(line, ':');
-      if (freqstr) {
-        cpuinfo_cycles_per_second = strtod(freqstr + 1, &err) * 1000000.0;
-        if (freqstr[1] != '\0' && *err == '\0' && cpuinfo_cycles_per_second > 0)
-          saw_mhz = true;
+    if (startsWithKey(ln, "cpu MHz")) {
+      if (!value.empty()) {
+        double cycles_per_second = std::stod(value) * 1000000.0;
+        if (cycles_per_second > 0) return cycles_per_second;
       }
-    } else if (startsWithKey(line, "bogomips")) {
-      const char* freqstr = strchr(line, ':');
-      if (freqstr) {
-        bogo_clock = strtod(freqstr + 1, &err) * 1000000.0;
-        if (freqstr[1] != '\0' && *err == '\0' && bogo_clock > 0)
-          saw_bogo = true;
+    } else if (startsWithKey(ln, "bogomips")) {
+      if (!value.empty()) {
+        bogo_clock = std::stod(value) * 1000000.0;
+        if (bogo_clock < 0.0) bogo_clock = error_value;
       }
-    } else if (startsWithKey(line, "processor", /*IgnoreCase*/false)) {
-      // The above comparison is case-sensitive because ARM kernels often
-      // include a "Processor" line that tells you about the CPU, distinct
-      // from the usual "processor" lines that give you CPU ids. No current
-      // Linux architecture is using "Processor" for CPU ids.
-      num_cpus++;  // count up every time we see an "processor :" entry
-      const char* id_str = strchr(line, ':');
-      if (id_str) {
-        const long cpu_id = strtol(id_str + 1, &err, 10);
-        if (id_str[1] != '\0' && *err == '\0' && max_cpu_id < cpu_id)
-          max_cpu_id = cpu_id;
-      }
-    }
-  } while (chars_read > 0);
-  close(fd);
-
-  if (!saw_mhz) {
-    if (saw_bogo) {
-      // If we didn't find anything better, we'll use bogomips, but
-      // we're not happy about it.
-      cpuinfo_cycles_per_second = bogo_clock;
-    } else {
-      // If we don't even have bogomips, we'll use the slow estimation.
-      cpuinfo_cycles_per_second =
-          static_cast<double>(EstimateCyclesPerSecond());
     }
   }
-  if (num_cpus == 0) {
-    fprintf(stderr, "Failed to read num. CPUs correctly from /proc/cpuinfo\n");
-  } else {
-    if ((max_cpu_id + 1) != num_cpus) {
-      fprintf(stderr,
-              "CPU ID assignments in /proc/cpuinfo seem messed up."
-              " This is usually caused by a bad BIOS.\n");
-    }
-    cpuinfo_num_cpus = num_cpus;
+  if (f.bad()) {
+    std::cerr << "Failure reading /proc/cpuinfo\n";
+    return error_value;
   }
+  if (!f.eof()) {
+    std::cerr << "Failed to read to end of /proc/cpuinfo\n";
+    return error_value;
+  }
+  f.close();
+  // If we found the bogomips clock, but nothing better, we'll use it (but
+  // we're not happy about it); otherwise, fallback to the rough estimation
+  // below.
+  if (bogo_clock >= 0.0) return bogo_clock;
 
-#elif defined BENCHMARK_OS_FREEBSD
-// For this sysctl to work, the machine must be configured without
-// SMP, APIC, or APM support.  hz should be 64-bit in freebsd 7.0
-// and later.  Before that, it's a 32-bit quantity (and gives the
-// wrong answer on machines faster than 2^32 Hz).  See
-//  http://lists.freebsd.org/pipermail/freebsd-i386/2004-November/001846.html
-// But also compare FreeBSD 7.0:
-//  http://fxr.watson.org/fxr/source/i386/i386/tsc.c?v=RELENG70#L223
-//  231         error = sysctl_handle_quad(oidp, &freq, 0, req);
-// To FreeBSD 6.3 (it's the same in 6-STABLE):
-//  http://fxr.watson.org/fxr/source/i386/i386/tsc.c?v=RELENG6#L131
-//  139         error = sysctl_handle_int(oidp, &freq, sizeof(freq), req);
-#if __FreeBSD__ >= 7
-  uint64_t hz = 0;
+#elif defined BENCHMARK_HAS_SYSCTL
+  constexpr auto* FreqStr =
+#if defined(BENCHMARK_OS_FREEBSD) || defined(BENCHMARK_OS_NETBSD)
+      "machdep.tsc_freq";
 #else
-  unsigned int hz = 0;
+      "hw.cpufrequency";
 #endif
-  size_t sz = sizeof(hz);
-  const char* sysctl_path = "machdep.tsc_freq";
-  if (sysctlbyname(sysctl_path, &hz, &sz, nullptr, 0) != 0) {
-    fprintf(stderr, "Unable to determine clock rate from sysctl: %s: %s\n",
-            sysctl_path, strerror(errno));
-    cpuinfo_cycles_per_second = static_cast<double>(EstimateCyclesPerSecond());
-  } else {
-    cpuinfo_cycles_per_second = hz;
-  }
-// TODO: also figure out cpuinfo_num_cpus
+  unsigned long long hz = 0;
+  if (GetSysctl(FreqStr, &hz)) return hz;
+
+  fprintf(stderr, "Unable to determine clock rate from sysctl: %s: %s\n",
+          FreqStr, strerror(errno));
 
 #elif defined BENCHMARK_OS_WINDOWS
   // In NT, read MHz from the registry. If we fail to do so or we're in win9x
@@ -267,89 +491,27 @@
           SHGetValueA(HKEY_LOCAL_MACHINE,
                       "HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0",
                       "~MHz", nullptr, &data, &data_size)))
-    cpuinfo_cycles_per_second =
-        static_cast<double>((int64_t)data * (int64_t)(1000 * 1000));  // was mhz
-  else
-    cpuinfo_cycles_per_second = static_cast<double>(EstimateCyclesPerSecond());
-
-  SYSTEM_INFO sysinfo;
-  // Use memset as opposed to = {} to avoid GCC missing initializer false
-  // positives.
-  std::memset(&sysinfo, 0, sizeof(SYSTEM_INFO));
-  GetSystemInfo(&sysinfo);
-  cpuinfo_num_cpus = sysinfo.dwNumberOfProcessors;  // number of logical
-                                                    // processors in the current
-                                                    // group
-
-#elif defined BENCHMARK_OS_MACOSX
-  int32_t num_cpus = 0;
-  size_t size = sizeof(num_cpus);
-  if (::sysctlbyname("hw.ncpu", &num_cpus, &size, nullptr, 0) == 0 &&
-      (size == sizeof(num_cpus))) {
-    cpuinfo_num_cpus = num_cpus;
-  } else {
-    fprintf(stderr, "%s\n", strerror(errno));
-    std::exit(EXIT_FAILURE);
-  }
-  int64_t cpu_freq = 0;
-  size = sizeof(cpu_freq);
-  if (::sysctlbyname("hw.cpufrequency", &cpu_freq, &size, nullptr, 0) == 0 &&
-      (size == sizeof(cpu_freq))) {
-    cpuinfo_cycles_per_second = cpu_freq;
-  } else {
-    #if defined BENCHMARK_OS_IOS
-    fprintf(stderr, "CPU frequency cannot be detected. \n");
-    cpuinfo_cycles_per_second = 0;
-    #else
-    fprintf(stderr, "%s\n", strerror(errno));
-    std::exit(EXIT_FAILURE);
-    #endif
-  }
-#else
-  // Generic cycles per second counter
-  cpuinfo_cycles_per_second = static_cast<double>(EstimateCyclesPerSecond());
+    return static_cast<double>((int64_t)data *
+                               (int64_t)(1000 * 1000));  // was mhz
 #endif
+  // If we've fallen through, attempt to roughly estimate the CPU clock rate.
+  const int estimate_time_ms = 1000;
+  const auto start_ticks = cycleclock::Now();
+  SleepForMilliseconds(estimate_time_ms);
+  return static_cast<double>(cycleclock::Now() - start_ticks);
 }
 
 }  // end namespace
 
-double CyclesPerSecond(void) {
-  std::call_once(cpuinfo_init, InitializeSystemInfo);
-  return cpuinfo_cycles_per_second;
+const CPUInfo& CPUInfo::Get() {
+  static const CPUInfo* info = new CPUInfo();
+  return *info;
 }
 
-int NumCPUs(void) {
-  std::call_once(cpuinfo_init, InitializeSystemInfo);
-  return cpuinfo_num_cpus;
-}
-
-// The ""'s catch people who don't pass in a literal for "str"
-#define strliterallen(str) (sizeof("" str "") - 1)
-
-// Must use a string literal for prefix.
-#define memprefix(str, len, prefix)                       \
-  ((((len) >= strliterallen(prefix)) &&                   \
-    std::memcmp(str, prefix, strliterallen(prefix)) == 0) \
-       ? str + strliterallen(prefix)                      \
-       : nullptr)
-
-bool CpuScalingEnabled() {
-#ifndef BENCHMARK_OS_WINDOWS
-  // On Linux, the CPUfreq subsystem exposes CPU information as files on the
-  // local file system. If reading the exported files fails, then we may not be
-  // running on Linux, so we silently ignore all the read errors.
-  for (int cpu = 0, num_cpus = NumCPUs(); cpu < num_cpus; ++cpu) {
-    std::string governor_file =
-        StrCat("/sys/devices/system/cpu/cpu", cpu, "/cpufreq/scaling_governor");
-    FILE* file = fopen(governor_file.c_str(), "r");
-    if (!file) break;
-    char buff[16];
-    size_t bytes_read = fread(buff, 1, sizeof(buff), file);
-    fclose(file);
-    if (memprefix(buff, bytes_read, "performance") == nullptr) return true;
-  }
-#endif
-  return false;
-}
+CPUInfo::CPUInfo()
+    : num_cpus(GetNumCPUs()),
+      cycles_per_second(GetCPUCyclesPerSecond()),
+      caches(GetCacheSizes()),
+      scaling_enabled(CpuScalingEnabled(num_cpus)) {}
 
 }  // end namespace benchmark
diff --git a/libcxx/utils/google-benchmark/src/sysinfo.h b/libcxx/utils/google-benchmark/src/sysinfo.h
deleted file mode 100644
index c5d9916..0000000
--- a/libcxx/utils/google-benchmark/src/sysinfo.h
+++ /dev/null
@@ -1,10 +0,0 @@
-#ifndef BENCHMARK_SYSINFO_H_
-#define BENCHMARK_SYSINFO_H_
-
-namespace benchmark {
-int NumCPUs();
-double CyclesPerSecond();
-bool CpuScalingEnabled();
-}  // end namespace benchmark
-
-#endif  // BENCHMARK_SYSINFO_H_
diff --git a/libcxx/utils/google-benchmark/src/timers.cc b/libcxx/utils/google-benchmark/src/timers.cc
index 8d56e8a..817272d 100644
--- a/libcxx/utils/google-benchmark/src/timers.cc
+++ b/libcxx/utils/google-benchmark/src/timers.cc
@@ -158,6 +158,10 @@
 #elif defined(BENCHMARK_OS_EMSCRIPTEN)
   // Emscripten doesn't support traditional threads
   return ProcessCPUUsage();
+#elif defined(BENCHMARK_OS_RTEMS)
+  // RTEMS doesn't support CLOCK_THREAD_CPUTIME_ID. See
+  // https://github.com/RTEMS/rtems/blob/master/cpukit/posix/src/clockgettime.c
+  return ProcessCPUUsage();
 #elif defined(CLOCK_THREAD_CPUTIME_ID)
   struct timespec ts;
   if (clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts) == 0) return MakeTime(ts);