[libcxx] Add support for benchmark tests using Google Benchmark.

Summary:
This patch does the following:

1. Checks in a copy of the Google Benchmark library into the libc++ repo under `utils/google-benchmark`.
2. Teaches libc++ how to build Google Benchmark against both (A) in-tree libc++ and (B) the platforms native STL.
3. Allows performance benchmarks to be built as part of the libc++ build.

Building the benchmarks (and Google Benchmark) is off by default. It must be enabled using the CMake option `-DLIBCXX_INCLUDE_BENCHMARKS=ON`. When this option is enabled the tests under `libcxx/benchmarks`  can be built using the `libcxx-benchmarks` target.

On Linux platforms where libstdc++ is the default STL the CMake option `-DLIBCXX_BUILD_BENCHMARKS_NATIVE_STDLIB=ON` can be used to build each benchmark test against libstdc++ as well. This is useful for comparing performance between standard libraries.

Support for benchmarks is currently very minimal. They must be manually run by the user and there is no mechanism for detecting performance regressions.

Known Issues:

* `-DLIBCXX_INCLUDE_BENCHMARKS=ON` is only supported for Clang, and not GCC, since the `-stdlib=libc++` option is needed to build Google Benchmark.








Reviewers: danalbert, dberlin, chandlerc, mclow.lists, jroelofs

Subscribers: chandlerc, dberlin, tberghammer, danalbert, srhines, hfinkel

Differential Revision: https://reviews.llvm.org/D22240

llvm-svn: 276049
diff --git a/libcxx/utils/google-benchmark/src/CMakeLists.txt b/libcxx/utils/google-benchmark/src/CMakeLists.txt
new file mode 100644
index 0000000..6dab64b
--- /dev/null
+++ b/libcxx/utils/google-benchmark/src/CMakeLists.txt
@@ -0,0 +1,51 @@
+# Allow the source files to find headers in src/
+include_directories(${PROJECT_SOURCE_DIR}/src)
+
+# Define the source files
+set(SOURCE_FILES "benchmark.cc" "colorprint.cc" "commandlineflags.cc"
+                 "console_reporter.cc" "csv_reporter.cc" "json_reporter.cc"
+                 "log.cc" "reporter.cc" "sleep.cc" "string_util.cc"
+                 "sysinfo.cc" "walltime.cc" "complexity.cc")
+# Determine the correct regular expression engine to use
+if(HAVE_STD_REGEX)
+  set(RE_FILES "re_std.cc")
+elseif(HAVE_GNU_POSIX_REGEX)
+  set(RE_FILES "re_posix.cc")
+elseif(HAVE_POSIX_REGEX)
+  set(RE_FILES "re_posix.cc")
+else()
+  message(FATAL_ERROR "Failed to determine the source files for the regular expression backend")
+endif()
+
+add_library(benchmark ${SOURCE_FILES} ${RE_FILES})
+
+
+set_target_properties(benchmark PROPERTIES
+  OUTPUT_NAME "benchmark"
+  VERSION ${GENERIC_LIB_VERSION}
+  SOVERSION ${GENERIC_LIB_SOVERSION}
+)
+
+# Link threads.
+target_link_libraries(benchmark ${CMAKE_THREAD_LIBS_INIT})
+
+# We need extra libraries on Windows
+if(${CMAKE_SYSTEM_NAME} MATCHES "Windows")
+  target_link_libraries(benchmark Shlwapi)
+endif()
+
+# Expose public API
+target_include_directories(benchmark PUBLIC ${PROJECT_SOURCE_DIR}/include)
+
+# Install target (will install the library to specified CMAKE_INSTALL_PREFIX variable)
+install(
+  TARGETS benchmark
+  ARCHIVE DESTINATION lib
+  LIBRARY DESTINATION lib
+  RUNTIME DESTINATION bin
+  COMPONENT library)
+
+install(
+  DIRECTORY "${PROJECT_SOURCE_DIR}/include/benchmark"
+  DESTINATION include
+  FILES_MATCHING PATTERN "*.*h")
diff --git a/libcxx/utils/google-benchmark/src/arraysize.h b/libcxx/utils/google-benchmark/src/arraysize.h
new file mode 100644
index 0000000..638a52a
--- /dev/null
+++ b/libcxx/utils/google-benchmark/src/arraysize.h
@@ -0,0 +1,34 @@
+#ifndef BENCHMARK_ARRAYSIZE_H_
+#define BENCHMARK_ARRAYSIZE_H_
+
+#include "internal_macros.h"
+
+namespace benchmark {
+namespace internal {
+// The arraysize(arr) macro returns the # of elements in an array arr.
+// The expression is a compile-time constant, and therefore can be
+// used in defining new arrays, for example.  If you use arraysize on
+// a pointer by mistake, you will get a compile-time error.
+//
+
+
+// This template function declaration is used in defining arraysize.
+// Note that the function doesn't need an implementation, as we only
+// use its type.
+template <typename T, size_t N>
+char (&ArraySizeHelper(T (&array)[N]))[N];
+
+// That gcc wants both of these prototypes seems mysterious. VC, for
+// its part, can't decide which to use (another mystery). Matching of
+// template overloads: the final frontier.
+#ifndef COMPILER_MSVC
+template <typename T, size_t N>
+char (&ArraySizeHelper(const T (&array)[N]))[N];
+#endif
+
+#define arraysize(array) (sizeof(::benchmark::internal::ArraySizeHelper(array)))
+
+} // end namespace internal
+} // end namespace benchmark
+
+#endif // BENCHMARK_ARRAYSIZE_H_
diff --git a/libcxx/utils/google-benchmark/src/benchmark.cc b/libcxx/utils/google-benchmark/src/benchmark.cc
new file mode 100644
index 0000000..cb8e132
--- /dev/null
+++ b/libcxx/utils/google-benchmark/src/benchmark.cc
@@ -0,0 +1,1123 @@
+// Copyright 2015 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "benchmark/benchmark.h"
+#include "internal_macros.h"
+
+#ifndef BENCHMARK_OS_WINDOWS
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <unistd.h>
+#endif
+
+#include <cstdlib>
+#include <cstring>
+#include <cstdio>
+#include <algorithm>
+#include <atomic>
+#include <condition_variable>
+#include <iostream>
+#include <memory>
+#include <thread>
+
+#include "check.h"
+#include "commandlineflags.h"
+#include "complexity.h"
+#include "log.h"
+#include "mutex.h"
+#include "re.h"
+#include "stat.h"
+#include "string_util.h"
+#include "sysinfo.h"
+#include "walltime.h"
+
+DEFINE_bool(benchmark_list_tests, false,
+            "Print a list of benchmarks. This option overrides all other "
+            "options.");
+
+DEFINE_string(benchmark_filter, ".",
+              "A regular expression that specifies the set of benchmarks "
+              "to execute.  If this flag is empty, no benchmarks are run.  "
+              "If this flag is the string \"all\", all benchmarks linked "
+              "into the process are run.");
+
+DEFINE_double(benchmark_min_time, 0.5,
+              "Minimum number of seconds we should run benchmark before "
+              "results are considered significant.  For cpu-time based "
+              "tests, this is the lower bound on the total cpu time "
+              "used by all threads that make up the test.  For real-time "
+              "based tests, this is the lower bound on the elapsed time "
+              "of the benchmark execution, regardless of number of "
+              "threads.");
+
+DEFINE_int32(benchmark_repetitions, 1,
+             "The number of runs of each benchmark. If greater than 1, the "
+             "mean and standard deviation of the runs will be reported.");
+
+DEFINE_string(benchmark_format, "console",
+              "The format to use for console output. Valid values are "
+              "'console', 'json', or 'csv'.");
+
+DEFINE_bool(color_print, true, "Enables colorized logging.");
+
+DEFINE_int32(v, 0, "The level of verbose logging to output");
+
+
+namespace benchmark {
+
+namespace internal {
+
+void UseCharPointer(char const volatile*) {}
+
+// NOTE: This is a dummy "mutex" type used to denote the actual mutex
+// returned by GetBenchmarkLock(). This is only used to placate the thread
+// safety warnings by giving the return of GetBenchmarkLock() a name.
+struct CAPABILITY("mutex") BenchmarkLockType {};
+BenchmarkLockType BenchmarkLockVar;
+
+} // end namespace internal
+
+inline Mutex& RETURN_CAPABILITY(::benchmark::internal::BenchmarkLockVar)
+GetBenchmarkLock()
+{
+  static Mutex lock;
+  return lock;
+}
+
+namespace {
+
+bool IsZero(double n) {
+    return std::abs(n) < std::numeric_limits<double>::epsilon();
+}
+
+// For non-dense Range, intermediate values are powers of kRangeMultiplier.
+static const int kRangeMultiplier = 8;
+static const size_t kMaxIterations = 1000000000;
+
+bool running_benchmark = false;
+
+// Global variable so that a benchmark can cause a little extra printing
+std::string* GetReportLabel() {
+    static std::string label GUARDED_BY(GetBenchmarkLock());
+    return &label;
+}
+
+// Global variable so that a benchmark can report an error as a human readable
+// string. If error_message is null no error occurred.
+#if defined(_MSC_VER) && _MSC_VER <= 1800
+typedef char* error_message_type;
+#else
+typedef const char* error_message_type;
+#endif
+
+static std::atomic<error_message_type> error_message = ATOMIC_VAR_INIT(nullptr);
+
+// TODO(ericwf): support MallocCounter.
+//static benchmark::MallocCounter *benchmark_mc;
+
+struct ThreadStats {
+    ThreadStats() : bytes_processed(0), items_processed(0), complexity_n(0) {}
+    int64_t bytes_processed;
+    int64_t items_processed;
+    int  complexity_n;
+};
+
+// Timer management class
+class TimerManager {
+ public:
+  TimerManager(int num_threads, Notification* done)
+      : num_threads_(num_threads),
+        running_threads_(num_threads),
+        done_(done),
+        running_(false),
+        real_time_used_(0),
+        cpu_time_used_(0),
+        manual_time_used_(0),
+        num_finalized_(0),
+        phase_number_(0),
+        entered_(0)
+  {
+  }
+
+  // Called by each thread
+  void StartTimer() EXCLUDES(lock_) {
+    bool last_thread = false;
+    {
+      MutexLock ml(lock_);
+      last_thread = Barrier(ml);
+      if (last_thread) {
+        CHECK(!running_) << "Called StartTimer when timer is already running";
+        running_ = true;
+        start_real_time_ = walltime::Now();
+        start_cpu_time_ = MyCPUUsage() + ChildrenCPUUsage();
+       }
+     }
+     if (last_thread) {
+       phase_condition_.notify_all();
+     }
+  }
+
+  // Called by each thread
+  void StopTimer() EXCLUDES(lock_) {
+    bool last_thread = false;
+    {
+      MutexLock ml(lock_);
+      last_thread = Barrier(ml);
+      if (last_thread) {
+        CHECK(running_) << "Called StopTimer when timer is already stopped";
+        InternalStop();
+      }
+    }
+    if (last_thread) {
+      phase_condition_.notify_all();
+    }
+  }
+
+  // Called by each thread
+  void SetIterationTime(double seconds) EXCLUDES(lock_) {
+    bool last_thread = false;
+    {
+      MutexLock ml(lock_);
+      last_thread = Barrier(ml);
+      if (last_thread) {
+        manual_time_used_ += seconds;
+      }
+    }
+    if (last_thread) {
+      phase_condition_.notify_all();
+    }
+  }
+
+  // Called by each thread
+  void Finalize() EXCLUDES(lock_) {
+    MutexLock l(lock_);
+    num_finalized_++;
+    if (num_finalized_ == num_threads_) {
+      CHECK(!running_) <<
+        "The timer should be stopped before the timer is finalized";
+      done_->Notify();
+    }
+  }
+
+  void RemoveErroredThread() EXCLUDES(lock_) {
+    MutexLock ml(lock_);
+    int last_thread = --running_threads_ == 0;
+    if (last_thread && running_)
+      InternalStop();
+    else if (!last_thread)
+      phase_condition_.notify_all();
+  }
+
+  // REQUIRES: timer is not running
+  double real_time_used() EXCLUDES(lock_) {
+    MutexLock l(lock_);
+    CHECK(!running_);
+    return real_time_used_;
+  }
+
+  // REQUIRES: timer is not running
+  double cpu_time_used() EXCLUDES(lock_) {
+    MutexLock l(lock_);
+    CHECK(!running_);
+    return cpu_time_used_;
+  }
+
+  // REQUIRES: timer is not running
+  double manual_time_used() EXCLUDES(lock_) {
+    MutexLock l(lock_);
+    CHECK(!running_);
+    return manual_time_used_;
+  }
+
+ private:
+  Mutex lock_;
+  Condition phase_condition_;
+  int num_threads_;
+  int running_threads_;
+  Notification* done_;
+
+  bool running_;                // Is the timer running
+  double start_real_time_;      // If running_
+  double start_cpu_time_;       // If running_
+
+  // Accumulated time so far (does not contain current slice if running_)
+  double real_time_used_;
+  double cpu_time_used_;
+  // Manually set iteration time. User sets this with SetIterationTime(seconds).
+  double manual_time_used_;
+
+  // How many threads have called Finalize()
+  int num_finalized_;
+
+  // State for barrier management
+  int phase_number_;
+  int entered_;         // Number of threads that have entered this barrier
+
+  void InternalStop() REQUIRES(lock_) {
+    CHECK(running_);
+    running_ = false;
+    real_time_used_ += walltime::Now() - start_real_time_;
+    cpu_time_used_ += ((MyCPUUsage() + ChildrenCPUUsage())
+                       - start_cpu_time_);
+  }
+
+  // Enter the barrier and wait until all other threads have also
+  // entered the barrier.  Returns iff this is the last thread to
+  // enter the barrier.
+  bool Barrier(MutexLock& ml) REQUIRES(lock_) {
+    CHECK_LT(entered_, running_threads_);
+    entered_++;
+    if (entered_ < running_threads_) {
+      // Wait for all threads to enter
+      int phase_number_cp = phase_number_;
+      auto cb = [this, phase_number_cp]() {
+        return this->phase_number_ > phase_number_cp ||
+               entered_ == running_threads_; // A thread has aborted in error
+      };
+      phase_condition_.wait(ml.native_handle(), cb);
+      if (phase_number_ > phase_number_cp)
+        return false;
+      // else (running_threads_ == entered_) and we are the last thread.
+    }
+    // Last thread has reached the barrier
+    phase_number_++;
+    entered_ = 0;
+    return true;
+  }
+};
+
+// TimerManager for current run.
+static std::unique_ptr<TimerManager> timer_manager = nullptr;
+
+} // end namespace
+
+namespace internal {
+
+// Information kept per benchmark we may want to run
+struct Benchmark::Instance {
+  std::string    name;
+  Benchmark*     benchmark;
+  bool           has_arg1;
+  int            arg1;
+  bool           has_arg2;
+  int            arg2;
+  TimeUnit       time_unit;
+  int            range_multiplier;
+  bool           use_real_time;
+  bool           use_manual_time;
+  BigO           complexity;
+  BigOFunc*      complexity_lambda;
+  bool           last_benchmark_instance;
+  int            repetitions;
+  double         min_time;
+  int            threads;    // Number of concurrent threads to use
+  bool           multithreaded;  // Is benchmark multi-threaded?
+};
+
+// Class for managing registered benchmarks.  Note that each registered
+// benchmark identifies a family of related benchmarks to run.
+class BenchmarkFamilies {
+ public:
+  static BenchmarkFamilies* GetInstance();
+
+  // Registers a benchmark family and returns the index assigned to it.
+  size_t AddBenchmark(std::unique_ptr<Benchmark> family);
+
+  // Extract the list of benchmark instances that match the specified
+  // regular expression.
+  bool FindBenchmarks(const std::string& re,
+                      std::vector<Benchmark::Instance>* benchmarks);
+ private:
+  BenchmarkFamilies() {}
+
+  std::vector<std::unique_ptr<Benchmark>> families_;
+  Mutex mutex_;
+};
+
+
+class BenchmarkImp {
+public:
+  explicit BenchmarkImp(const char* name);
+  ~BenchmarkImp();
+
+  void Arg(int x);
+  void Unit(TimeUnit unit);
+  void Range(int start, int limit);
+  void DenseRange(int start, int limit);
+  void ArgPair(int start, int limit);
+  void RangePair(int lo1, int hi1, int lo2, int hi2);
+  void RangeMultiplier(int multiplier);
+  void MinTime(double n);
+  void Repetitions(int n);
+  void UseRealTime();
+  void UseManualTime();
+  void Complexity(BigO complexity);
+  void ComplexityLambda(BigOFunc* complexity);
+  void Threads(int t);
+  void ThreadRange(int min_threads, int max_threads);
+  void ThreadPerCpu();
+  void SetName(const char* name);
+
+  static void AddRange(std::vector<int>* dst, int lo, int hi, int mult);
+
+private:
+  friend class BenchmarkFamilies;
+
+  std::string name_;
+  int arg_count_;
+  std::vector< std::pair<int, int> > args_;  // Args for all benchmark runs
+  TimeUnit time_unit_;
+  int range_multiplier_;
+  double min_time_;
+  int repetitions_;
+  bool use_real_time_;
+  bool use_manual_time_;
+  BigO complexity_;
+  BigOFunc* complexity_lambda_;
+  std::vector<int> thread_counts_;
+
+  BenchmarkImp& operator=(BenchmarkImp const&);
+};
+
+BenchmarkFamilies* BenchmarkFamilies::GetInstance() {
+  static BenchmarkFamilies instance;
+  return &instance;
+}
+
+
+size_t BenchmarkFamilies::AddBenchmark(std::unique_ptr<Benchmark> family) {
+  MutexLock l(mutex_);
+  size_t index = families_.size();
+  families_.push_back(std::move(family));
+  return index;
+}
+
+bool BenchmarkFamilies::FindBenchmarks(
+    const std::string& spec,
+    std::vector<Benchmark::Instance>* benchmarks) {
+  // Make regular expression out of command-line flag
+  std::string error_msg;
+  Regex re;
+  if (!re.Init(spec, &error_msg)) {
+    std::cerr << "Could not compile benchmark re: " << error_msg << std::endl;
+    return false;
+  }
+
+  // Special list of thread counts to use when none are specified
+  std::vector<int> one_thread;
+  one_thread.push_back(1);
+
+  MutexLock l(mutex_);
+  for (std::unique_ptr<Benchmark>& bench_family : families_) {
+    // Family was deleted or benchmark doesn't match
+    if (!bench_family) continue;
+    BenchmarkImp* family = bench_family->imp_;
+
+    if (family->arg_count_ == -1) {
+      family->arg_count_ = 0;
+      family->args_.emplace_back(-1, -1);
+    }
+    for (auto const& args : family->args_) {
+      const std::vector<int>* thread_counts =
+        (family->thread_counts_.empty()
+         ? &one_thread
+         : &family->thread_counts_);
+      for (int num_threads : *thread_counts) {
+
+        Benchmark::Instance instance;
+        instance.name = family->name_;
+        instance.benchmark = bench_family.get();
+        instance.has_arg1 = family->arg_count_ >= 1;
+        instance.arg1 = args.first;
+        instance.has_arg2 = family->arg_count_ == 2;
+        instance.arg2 = args.second;
+        instance.time_unit = family->time_unit_;
+        instance.range_multiplier = family->range_multiplier_;
+        instance.min_time = family->min_time_;
+        instance.repetitions = family->repetitions_;
+        instance.use_real_time = family->use_real_time_;
+        instance.use_manual_time = family->use_manual_time_;
+        instance.complexity = family->complexity_;
+        instance.complexity_lambda = family->complexity_lambda_;
+        instance.threads = num_threads;
+        instance.multithreaded = !(family->thread_counts_.empty());
+
+        // Add arguments to instance name
+        if (family->arg_count_ >= 1) {
+          AppendHumanReadable(instance.arg1, &instance.name);
+        }
+        if (family->arg_count_ >= 2) {
+          AppendHumanReadable(instance.arg2, &instance.name);
+        }
+        if (!IsZero(family->min_time_)) {
+          instance.name +=  StringPrintF("/min_time:%0.3f",  family->min_time_);
+        }
+        if (family->repetitions_ != 0) {
+          instance.name +=  StringPrintF("/repeats:%d",  family->repetitions_);
+        }
+        if (family->use_manual_time_) {
+          instance.name +=  "/manual_time";
+        } else if (family->use_real_time_) {
+          instance.name +=  "/real_time";
+        }
+
+        // Add the number of threads used to the name
+        if (!family->thread_counts_.empty()) {
+          instance.name += StringPrintF("/threads:%d", instance.threads);
+        }
+
+        if (re.Match(instance.name)) {
+          instance.last_benchmark_instance = (args == family->args_.back());
+          benchmarks->push_back(instance);
+        }
+      }
+    }
+  }
+  return true;
+}
+
+BenchmarkImp::BenchmarkImp(const char* name)
+    : name_(name), arg_count_(-1), time_unit_(kNanosecond),
+      range_multiplier_(kRangeMultiplier), min_time_(0.0), repetitions_(0),
+      use_real_time_(false), use_manual_time_(false),
+      complexity_(oNone) {
+}
+
+BenchmarkImp::~BenchmarkImp() {
+}
+
+void BenchmarkImp::Arg(int x) {
+  CHECK(arg_count_ == -1 || arg_count_ == 1);
+  arg_count_ = 1;
+  args_.emplace_back(x, -1);
+}
+
+void BenchmarkImp::Unit(TimeUnit unit) {
+  time_unit_ = unit;
+}
+
+void BenchmarkImp::Range(int start, int limit) {
+  CHECK(arg_count_ == -1 || arg_count_ == 1);
+  arg_count_ = 1;
+  std::vector<int> arglist;
+  AddRange(&arglist, start, limit, range_multiplier_);
+
+  for (int i : arglist) {
+    args_.emplace_back(i, -1);
+  }
+}
+
+void BenchmarkImp::DenseRange(int start, int limit) {
+  CHECK(arg_count_ == -1 || arg_count_ == 1);
+  arg_count_ = 1;
+  CHECK_GE(start, 0);
+  CHECK_LE(start, limit);
+  for (int arg = start; arg <= limit; arg++) {
+    args_.emplace_back(arg, -1);
+  }
+}
+
+void BenchmarkImp::ArgPair(int x, int y) {
+  CHECK(arg_count_ == -1 || arg_count_ == 2);
+  arg_count_ = 2;
+  args_.emplace_back(x, y);
+}
+
+void BenchmarkImp::RangePair(int lo1, int hi1, int lo2, int hi2) {
+  CHECK(arg_count_ == -1 || arg_count_ == 2);
+  arg_count_ = 2;
+  std::vector<int> arglist1, arglist2;
+  AddRange(&arglist1, lo1, hi1, range_multiplier_);
+  AddRange(&arglist2, lo2, hi2, range_multiplier_);
+
+  for (int i : arglist1) {
+    for (int j : arglist2) {
+      args_.emplace_back(i, j);
+    }
+  }
+}
+
+void BenchmarkImp::RangeMultiplier(int multiplier) {
+  CHECK(multiplier > 1);
+  range_multiplier_ = multiplier;
+}
+
+void BenchmarkImp::MinTime(double t) {
+  CHECK(t > 0.0);
+  min_time_ = t;
+}
+
+
+void BenchmarkImp::Repetitions(int n) {
+  CHECK(n > 0);
+  repetitions_ = n;
+}
+
+void BenchmarkImp::UseRealTime() {
+  CHECK(!use_manual_time_) << "Cannot set UseRealTime and UseManualTime simultaneously.";
+  use_real_time_ = true;
+}
+
+void BenchmarkImp::UseManualTime() {
+  CHECK(!use_real_time_) << "Cannot set UseRealTime and UseManualTime simultaneously.";
+  use_manual_time_ = true;
+}
+
+void BenchmarkImp::Complexity(BigO complexity){
+  complexity_ = complexity;
+}
+
+void BenchmarkImp::ComplexityLambda(BigOFunc* complexity) {
+  complexity_lambda_ = complexity;
+}
+
+void BenchmarkImp::Threads(int t) {
+  CHECK_GT(t, 0);
+  thread_counts_.push_back(t);
+}
+
+void BenchmarkImp::ThreadRange(int min_threads, int max_threads) {
+  CHECK_GT(min_threads, 0);
+  CHECK_GE(max_threads, min_threads);
+
+  AddRange(&thread_counts_, min_threads, max_threads, 2);
+}
+
+void BenchmarkImp::ThreadPerCpu() {
+  static int num_cpus = NumCPUs();
+  thread_counts_.push_back(num_cpus);
+}
+
+void BenchmarkImp::SetName(const char* name) {
+  name_ = name;
+}
+
+void BenchmarkImp::AddRange(std::vector<int>* dst, int lo, int hi, int mult) {
+  CHECK_GE(lo, 0);
+  CHECK_GE(hi, lo);
+  CHECK_GE(mult, 2);
+
+  // Add "lo"
+  dst->push_back(lo);
+
+  static const int kint32max = std::numeric_limits<int32_t>::max();
+
+  // Now space out the benchmarks in multiples of "mult"
+  for (int32_t i = 1; i < kint32max/mult; i *= mult) {
+    if (i >= hi) break;
+    if (i > lo) {
+      dst->push_back(i);
+    }
+  }
+  // Add "hi" (if different from "lo")
+  if (hi != lo) {
+    dst->push_back(hi);
+  }
+}
+
+Benchmark::Benchmark(const char* name)
+    : imp_(new BenchmarkImp(name))
+{
+}
+
+Benchmark::~Benchmark()  {
+  delete imp_;
+}
+
+Benchmark::Benchmark(Benchmark const& other)
+  : imp_(new BenchmarkImp(*other.imp_))
+{
+}
+
+Benchmark* Benchmark::Arg(int x) {
+  imp_->Arg(x);
+  return this;
+}
+
+Benchmark* Benchmark::Unit(TimeUnit unit) {
+  imp_->Unit(unit);
+  return this;
+}
+
+Benchmark* Benchmark::Range(int start, int limit) {
+  imp_->Range(start, limit);
+  return this;
+}
+
+Benchmark* Benchmark::DenseRange(int start, int limit) {
+  imp_->DenseRange(start, limit);
+  return this;
+}
+
+Benchmark* Benchmark::ArgPair(int x, int y) {
+  imp_->ArgPair(x, y);
+  return this;
+}
+
+Benchmark* Benchmark::RangePair(int lo1, int hi1, int lo2, int hi2) {
+  imp_->RangePair(lo1, hi1, lo2, hi2);
+  return this;
+}
+
+Benchmark* Benchmark::Apply(void (*custom_arguments)(Benchmark* benchmark)) {
+  custom_arguments(this);
+  return this;
+}
+
+Benchmark* Benchmark::RangeMultiplier(int multiplier) {
+  imp_->RangeMultiplier(multiplier);
+  return this;
+}
+
+
+Benchmark* Benchmark::Repetitions(int t) {
+  imp_->Repetitions(t);
+  return this;
+}
+
+Benchmark* Benchmark::MinTime(double t) {
+  imp_->MinTime(t);
+  return this;
+}
+
+Benchmark* Benchmark::UseRealTime() {
+  imp_->UseRealTime();
+  return this;
+}
+
+Benchmark* Benchmark::UseManualTime() {
+  imp_->UseManualTime();
+  return this;
+}
+
+Benchmark* Benchmark::Complexity(BigO complexity) {
+  imp_->Complexity(complexity);
+  return this;
+}
+
+Benchmark* Benchmark::Complexity(BigOFunc* complexity) {
+  imp_->Complexity(oLambda);
+  imp_->ComplexityLambda(complexity);
+  return this;
+}
+
+Benchmark* Benchmark::Threads(int t) {
+  imp_->Threads(t);
+  return this;
+}
+
+Benchmark* Benchmark::ThreadRange(int min_threads, int max_threads) {
+  imp_->ThreadRange(min_threads, max_threads);
+  return this;
+}
+
+Benchmark* Benchmark::ThreadPerCpu() {
+  imp_->ThreadPerCpu();
+  return this;
+}
+
+void Benchmark::SetName(const char* name) {
+  imp_->SetName(name);
+}
+
+void FunctionBenchmark::Run(State& st) {
+  func_(st);
+}
+
+} // end namespace internal
+
+namespace {
+
+// Execute one thread of benchmark b for the specified number of iterations.
+// Adds the stats collected for the thread into *total.
+void RunInThread(const benchmark::internal::Benchmark::Instance* b,
+                 size_t iters, int thread_id,
+                 ThreadStats* total) EXCLUDES(GetBenchmarkLock()) {
+  State st(iters, b->has_arg1, b->arg1, b->has_arg2, b->arg2, thread_id, b->threads);
+  b->benchmark->Run(st);
+  CHECK(st.iterations() == st.max_iterations) <<
+    "Benchmark returned before State::KeepRunning() returned false!";
+  {
+    MutexLock l(GetBenchmarkLock());
+    total->bytes_processed += st.bytes_processed();
+    total->items_processed += st.items_processed();
+    total->complexity_n += st.complexity_length_n();
+  }
+
+  timer_manager->Finalize();
+}
+
+void RunBenchmark(const benchmark::internal::Benchmark::Instance& b,
+                  BenchmarkReporter* br,
+                  std::vector<BenchmarkReporter::Run>& complexity_reports)
+  EXCLUDES(GetBenchmarkLock()) {
+  size_t iters = 1;
+
+  std::vector<BenchmarkReporter::Run> reports;
+
+  std::vector<std::thread> pool;
+  if (b.multithreaded)
+    pool.resize(b.threads);
+
+  const int repeats = b.repetitions != 0 ? b.repetitions
+                                         : FLAGS_benchmark_repetitions;
+  for (int i = 0; i < repeats; i++) {
+    std::string mem;
+    for (;;) {
+      // Try benchmark
+      VLOG(2) << "Running " << b.name << " for " << iters << "\n";
+
+      {
+        MutexLock l(GetBenchmarkLock());
+        GetReportLabel()->clear();
+      }
+      error_message = nullptr;
+
+      Notification done;
+      timer_manager = std::unique_ptr<TimerManager>(new TimerManager(b.threads, &done));
+
+      ThreadStats total;
+      running_benchmark = true;
+      if (b.multithreaded) {
+        // If this is out first iteration of the while(true) loop then the
+        // threads haven't been started and can't be joined. Otherwise we need
+        // to join the thread before replacing them.
+        for (std::thread& thread : pool) {
+          if (thread.joinable())
+            thread.join();
+        }
+        for (std::size_t ti = 0; ti < pool.size(); ++ti) {
+            pool[ti] = std::thread(&RunInThread, &b, iters, static_cast<int>(ti), &total);
+        }
+      } else {
+        // Run directly in this thread
+        RunInThread(&b, iters, 0, &total);
+      }
+      done.WaitForNotification();
+      running_benchmark = false;
+
+      const double cpu_accumulated_time = timer_manager->cpu_time_used();
+      const double real_accumulated_time = timer_manager->real_time_used();
+      const double manual_accumulated_time = timer_manager->manual_time_used();
+      timer_manager.reset();
+
+      VLOG(2) << "Ran in " << cpu_accumulated_time << "/"
+              << real_accumulated_time << "\n";
+
+      // Base decisions off of real time if requested by this benchmark.
+      double seconds = cpu_accumulated_time;
+      if (b.use_manual_time) {
+          seconds = manual_accumulated_time;
+      } else if (b.use_real_time) {
+          seconds = real_accumulated_time;
+      }
+
+      std::string label;
+      {
+        MutexLock l(GetBenchmarkLock());
+        label = *GetReportLabel();
+      }
+      error_message_type error_msg = error_message;
+
+      const double min_time = !IsZero(b.min_time) ? b.min_time
+                                                  : FLAGS_benchmark_min_time;
+
+      // If this was the first run, was elapsed time or cpu time large enough?
+      // If this is not the first run, go with the current value of iter.
+      if ((i > 0) || (error_msg != nullptr) ||
+          (iters >= kMaxIterations) ||
+          (seconds >= min_time) ||
+          (real_accumulated_time >= 5*min_time)) {
+
+        // Create report about this benchmark run.
+        BenchmarkReporter::Run report;
+        report.benchmark_name = b.name;
+        report.error_occurred = error_msg != nullptr;
+        report.error_message = error_msg != nullptr ? error_msg : "";
+        report.report_label = label;
+        // Report the total iterations across all threads.
+        report.iterations = static_cast<int64_t>(iters) * b.threads;
+        report.time_unit = b.time_unit;
+
+        if (!report.error_occurred) {
+          double bytes_per_second = 0;
+          if (total.bytes_processed > 0 && seconds > 0.0) {
+            bytes_per_second = (total.bytes_processed / seconds);
+          }
+          double items_per_second = 0;
+          if (total.items_processed > 0 && seconds > 0.0) {
+            items_per_second = (total.items_processed / seconds);
+          }
+
+          if (b.use_manual_time) {
+            report.real_accumulated_time = manual_accumulated_time;
+          } else {
+            report.real_accumulated_time = real_accumulated_time;
+          }
+          report.cpu_accumulated_time = cpu_accumulated_time;
+          report.bytes_per_second = bytes_per_second;
+          report.items_per_second = items_per_second;
+          report.complexity_n = total.complexity_n;
+          report.complexity = b.complexity;
+          report.complexity_lambda = b.complexity_lambda;
+          if(report.complexity != oNone)
+            complexity_reports.push_back(report);
+        }
+
+        reports.push_back(report);
+        break;
+      }
+
+      // See how much iterations should be increased by
+      // Note: Avoid division by zero with max(seconds, 1ns).
+      double multiplier = min_time * 1.4 / std::max(seconds, 1e-9);
+      // If our last run was at least 10% of FLAGS_benchmark_min_time then we
+      // use the multiplier directly. Otherwise we use at most 10 times
+      // expansion.
+      // NOTE: When the last run was at least 10% of the min time the max
+      // expansion should be 14x.
+      bool is_significant = (seconds / min_time) > 0.1;
+      multiplier = is_significant ? multiplier : std::min(10.0, multiplier);
+      if (multiplier <= 1.0) multiplier = 2.0;
+      double next_iters = std::max(multiplier * iters, iters + 1.0);
+      if (next_iters > kMaxIterations) {
+        next_iters = kMaxIterations;
+      }
+      VLOG(3) << "Next iters: " << next_iters << ", " << multiplier << "\n";
+      iters = static_cast<int>(next_iters + 0.5);
+    }
+  }
+  std::vector<BenchmarkReporter::Run> additional_run_stats = ComputeStats(reports);
+  reports.insert(reports.end(), additional_run_stats.begin(),
+                 additional_run_stats.end());
+
+  if((b.complexity != oNone) && b.last_benchmark_instance) {
+    additional_run_stats = ComputeBigO(complexity_reports);
+    reports.insert(reports.end(), additional_run_stats.begin(),
+                   additional_run_stats.end());
+    complexity_reports.clear();
+  }
+
+  br->ReportRuns(reports);
+
+  if (b.multithreaded) {
+    for (std::thread& thread : pool)
+      thread.join();
+  }
+}
+
+}  // namespace
+
+State::State(size_t max_iters, bool has_x, int x, bool has_y, int y,
+             int thread_i, int n_threads)
+    : started_(false), finished_(false), total_iterations_(0),
+      has_range_x_(has_x), range_x_(x),
+      has_range_y_(has_y), range_y_(y),
+      bytes_processed_(0), items_processed_(0),
+      complexity_n_(0),
+      error_occurred_(false),
+      thread_index(thread_i),
+      threads(n_threads),
+      max_iterations(max_iters)
+{
+    CHECK(max_iterations != 0) << "At least one iteration must be run";
+    CHECK_LT(thread_index, threads) << "thread_index must be less than threads";
+}
+
+void State::PauseTiming() {
+  // Add in time accumulated so far
+  CHECK(running_benchmark);
+  CHECK(started_ && !finished_ && !error_occurred_);
+  timer_manager->StopTimer();
+}
+
+void State::ResumeTiming() {
+  CHECK(running_benchmark);
+  CHECK(started_ && !finished_ && !error_occurred_);
+  timer_manager->StartTimer();
+}
+
+void State::SkipWithError(const char* msg) {
+  CHECK(msg);
+  error_occurred_ = true;
+  error_message_type expected_no_error_msg = nullptr;
+  error_message.compare_exchange_weak(expected_no_error_msg,
+    const_cast<error_message_type>(msg));
+  started_ = finished_ = true;
+  total_iterations_ = max_iterations;
+  timer_manager->RemoveErroredThread();
+}
+
+void State::SetIterationTime(double seconds)
+{
+  CHECK(running_benchmark);
+  timer_manager->SetIterationTime(seconds);
+}
+
+void State::SetLabel(const char* label) {
+  CHECK(running_benchmark);
+  MutexLock l(GetBenchmarkLock());
+  *GetReportLabel() = label;
+}
+
+namespace internal {
+namespace {
+
+void RunMatchingBenchmarks(const std::vector<Benchmark::Instance>& benchmarks,
+                           BenchmarkReporter* reporter) {
+  CHECK(reporter != nullptr);
+
+  // Determine the width of the name field using a minimum width of 10.
+  bool has_repetitions = FLAGS_benchmark_repetitions > 1;
+  size_t name_field_width = 10;
+  for (const Benchmark::Instance& benchmark : benchmarks) {
+    name_field_width =
+        std::max<size_t>(name_field_width, benchmark.name.size());
+    has_repetitions |= benchmark.repetitions > 1;
+  }
+  if (has_repetitions)
+    name_field_width += std::strlen("_stddev");
+
+  // Print header here
+  BenchmarkReporter::Context context;
+  context.num_cpus = NumCPUs();
+  context.mhz_per_cpu = CyclesPerSecond() / 1000000.0f;
+
+  context.cpu_scaling_enabled = CpuScalingEnabled();
+  context.name_field_width = name_field_width;
+
+  // Keep track of runing times of all instances of current benchmark
+  std::vector<BenchmarkReporter::Run> complexity_reports;
+
+  if (reporter->ReportContext(context)) {
+    for (const auto& benchmark : benchmarks) {
+      RunBenchmark(benchmark, reporter, complexity_reports);
+    }
+  }
+}
+
+std::unique_ptr<BenchmarkReporter> GetDefaultReporter() {
+  typedef std::unique_ptr<BenchmarkReporter> PtrType;
+  if (FLAGS_benchmark_format == "console") {
+    return PtrType(new ConsoleReporter);
+  } else if (FLAGS_benchmark_format == "json") {
+    return PtrType(new JSONReporter);
+  } else if (FLAGS_benchmark_format == "csv") {
+    return PtrType(new CSVReporter);
+  } else {
+    std::cerr << "Unexpected format: '" << FLAGS_benchmark_format << "'\n";
+    std::exit(1);
+  }
+}
+
+} // end namespace
+} // end namespace internal
+
+size_t RunSpecifiedBenchmarks() {
+  return RunSpecifiedBenchmarks(nullptr);
+}
+
+size_t RunSpecifiedBenchmarks(BenchmarkReporter* reporter) {
+  std::string spec = FLAGS_benchmark_filter;
+  if (spec.empty() || spec == "all")
+    spec = ".";  // Regexp that matches all benchmarks
+
+  std::vector<internal::Benchmark::Instance> benchmarks;
+  auto families = internal::BenchmarkFamilies::GetInstance();
+  if (!families->FindBenchmarks(spec, &benchmarks)) return 0;
+
+  if (FLAGS_benchmark_list_tests) {
+    for (auto const& benchmark : benchmarks)
+      std::cout <<  benchmark.name << "\n";
+  } else {
+    std::unique_ptr<BenchmarkReporter> default_reporter;
+    if (!reporter) {
+      default_reporter = internal::GetDefaultReporter();
+      reporter = default_reporter.get();
+    }
+    internal::RunMatchingBenchmarks(benchmarks, reporter);
+    reporter->Finalize();
+  }
+  return benchmarks.size();
+}
+
+namespace internal {
+
+void PrintUsageAndExit() {
+  fprintf(stdout,
+          "benchmark"
+          " [--benchmark_list_tests={true|false}]\n"
+          "          [--benchmark_filter=<regex>]\n"
+          "          [--benchmark_min_time=<min_time>]\n"
+          "          [--benchmark_repetitions=<num_repetitions>]\n"
+          "          [--benchmark_format=<console|json|csv>]\n"
+          "          [--color_print={true|false}]\n"
+          "          [--v=<verbosity>]\n");
+  exit(0);
+}
+
+void ParseCommandLineFlags(int* argc, char** argv) {
+  using namespace benchmark;
+  for (int i = 1; i < *argc; ++i) {
+    if (
+        ParseBoolFlag(argv[i], "benchmark_list_tests",
+                      &FLAGS_benchmark_list_tests) ||
+        ParseStringFlag(argv[i], "benchmark_filter",
+                        &FLAGS_benchmark_filter) ||
+        ParseDoubleFlag(argv[i], "benchmark_min_time",
+                        &FLAGS_benchmark_min_time) ||
+        ParseInt32Flag(argv[i], "benchmark_repetitions",
+                       &FLAGS_benchmark_repetitions) ||
+        ParseStringFlag(argv[i], "benchmark_format",
+                        &FLAGS_benchmark_format) ||
+        ParseBoolFlag(argv[i], "color_print",
+                       &FLAGS_color_print) ||
+        ParseInt32Flag(argv[i], "v", &FLAGS_v)) {
+      for (int j = i; j != *argc; ++j) argv[j] = argv[j + 1];
+
+      --(*argc);
+      --i;
+    } else if (IsFlag(argv[i], "help")) {
+      PrintUsageAndExit();
+    }
+  }
+
+  if (FLAGS_benchmark_format != "console" &&
+      FLAGS_benchmark_format != "json" &&
+      FLAGS_benchmark_format != "csv") {
+    PrintUsageAndExit();
+  }
+}
+
+Benchmark* RegisterBenchmarkInternal(Benchmark* bench) {
+    std::unique_ptr<Benchmark> bench_ptr(bench);
+    BenchmarkFamilies* families = BenchmarkFamilies::GetInstance();
+    families->AddBenchmark(std::move(bench_ptr));
+    return bench;
+}
+
+} // end namespace internal
+
+void Initialize(int* argc, char** argv) {
+  internal::ParseCommandLineFlags(argc, argv);
+  internal::SetLogLevel(FLAGS_v);
+  // TODO remove this. It prints some output the first time it is called.
+  // We don't want to have this ouput printed during benchmarking.
+  MyCPUUsage();
+  // The first call to walltime::Now initialized it. Call it once to
+  // prevent the initialization from happening in a benchmark.
+  walltime::Now();
+}
+
+} // end namespace benchmark
diff --git a/libcxx/utils/google-benchmark/src/check.h b/libcxx/utils/google-benchmark/src/check.h
new file mode 100644
index 0000000..4572bab
--- /dev/null
+++ b/libcxx/utils/google-benchmark/src/check.h
@@ -0,0 +1,72 @@
+#ifndef CHECK_H_
+#define CHECK_H_
+
+#include <cstdlib>
+#include <ostream>
+
+#include "internal_macros.h"
+#include "log.h"
+
+namespace benchmark {
+namespace internal {
+
+typedef void(AbortHandlerT)();
+
+inline AbortHandlerT*& GetAbortHandler() {
+    static AbortHandlerT* handler = &std::abort;
+    return handler;
+}
+
+BENCHMARK_NORETURN inline void CallAbortHandler() {
+    GetAbortHandler()();
+    std::abort(); // fallback to enforce noreturn
+}
+
+// CheckHandler is the class constructed by failing CHECK macros. CheckHandler
+// will log information about the failures and abort when it is destructed.
+class CheckHandler {
+public:
+  CheckHandler(const char* check, const char* file, const char* func, int line)
+    : log_(GetErrorLogInstance())
+  {
+    log_ << file << ":" << line << ": " << func << ": Check `"
+          << check << "' failed. ";
+  }
+
+  std::ostream& GetLog() {
+    return log_;
+  }
+
+  BENCHMARK_NORETURN ~CheckHandler() BENCHMARK_NOEXCEPT_OP(false) {
+      log_ << std::endl;
+      CallAbortHandler();
+  }
+
+  CheckHandler & operator=(const CheckHandler&) = delete;
+  CheckHandler(const CheckHandler&) = delete;
+  CheckHandler() = delete;
+private:
+  std::ostream& log_;
+};
+
+} // end namespace internal
+} // end namespace benchmark
+
+// The CHECK macro returns a std::ostream object that can have extra information
+// written to it.
+#ifndef NDEBUG
+# define CHECK(b)  (b ? ::benchmark::internal::GetNullLogInstance()        \
+                      : ::benchmark::internal::CheckHandler(               \
+                          #b, __FILE__, __func__, __LINE__).GetLog())
+#else
+# define CHECK(b) ::benchmark::internal::GetNullLogInstance()
+#endif
+
+#define CHECK_EQ(a, b) CHECK((a) == (b))
+#define CHECK_NE(a, b) CHECK((a) != (b))
+#define CHECK_GE(a, b) CHECK((a) >= (b))
+#define CHECK_LE(a, b) CHECK((a) <= (b))
+#define CHECK_GT(a, b) CHECK((a) > (b))
+#define CHECK_LT(a, b) CHECK((a) < (b))
+
+#endif  // CHECK_H_
diff --git a/libcxx/utils/google-benchmark/src/colorprint.cc b/libcxx/utils/google-benchmark/src/colorprint.cc
new file mode 100644
index 0000000..efb8626
--- /dev/null
+++ b/libcxx/utils/google-benchmark/src/colorprint.cc
@@ -0,0 +1,158 @@
+// Copyright 2015 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "colorprint.h"
+
+#include <cstdarg>
+#include <cstdio>
+#include <cstdarg>
+#include <string>
+#include <memory>
+
+#include "commandlineflags.h"
+#include "check.h"
+#include "internal_macros.h"
+
+#ifdef BENCHMARK_OS_WINDOWS
+#include <Windows.h>
+#endif
+
+DECLARE_bool(color_print);
+
+namespace benchmark {
+namespace {
+#ifdef BENCHMARK_OS_WINDOWS
+typedef WORD PlatformColorCode;
+#else
+typedef const char* PlatformColorCode;
+#endif
+
+PlatformColorCode GetPlatformColorCode(LogColor color) {
+#ifdef BENCHMARK_OS_WINDOWS
+  switch (color) {
+    case COLOR_RED:
+      return FOREGROUND_RED;
+    case COLOR_GREEN:
+      return FOREGROUND_GREEN;
+    case COLOR_YELLOW:
+      return FOREGROUND_RED | FOREGROUND_GREEN;
+    case COLOR_BLUE:
+      return FOREGROUND_BLUE;
+    case COLOR_MAGENTA:
+      return FOREGROUND_BLUE | FOREGROUND_RED;
+    case COLOR_CYAN:
+      return FOREGROUND_BLUE | FOREGROUND_GREEN;
+    case COLOR_WHITE:  // fall through to default
+    default:
+      return 0;
+  }
+#else
+  switch (color) {
+    case COLOR_RED:
+      return "1";
+    case COLOR_GREEN:
+      return "2";
+    case COLOR_YELLOW:
+      return "3";
+    case COLOR_BLUE:
+      return "4";
+    case COLOR_MAGENTA:
+      return "5";
+    case COLOR_CYAN:
+      return "6";
+    case COLOR_WHITE:
+      return "7";
+    default:
+      return nullptr;
+  };
+#endif
+}
+
+}  // end namespace
+
+std::string FormatString(const char *msg, va_list args) {
+  // we might need a second shot at this, so pre-emptivly make a copy
+  va_list args_cp;
+  va_copy(args_cp, args);
+
+  std::size_t size = 256;
+  char local_buff[256];
+  auto ret = std::vsnprintf(local_buff, size, msg, args_cp);
+
+  va_end(args_cp);
+
+  // currently there is no error handling for failure, so this is hack.
+  CHECK(ret >= 0);
+
+  if (ret == 0) // handle empty expansion
+    return {};
+  else if (static_cast<size_t>(ret) < size)
+    return local_buff;
+  else {
+    // we did not provide a long enough buffer on our first attempt.
+    size = (size_t)ret + 1; // + 1 for the null byte
+    std::unique_ptr<char[]> buff(new char[size]);
+    ret = std::vsnprintf(buff.get(), size, msg, args);
+    CHECK(ret > 0 && ((size_t)ret) < size);
+    return buff.get();
+  }
+}
+
+std::string FormatString(const char *msg, ...) {
+  va_list args;
+  va_start(args, msg);
+  auto tmp = FormatString(msg, args);
+  va_end(args);
+  return tmp;
+}
+
+void ColorPrintf(std::ostream& out, LogColor color, const char* fmt, ...) {
+  va_list args;
+  va_start(args, fmt);
+
+  if (!FLAGS_color_print) {
+    out << FormatString(fmt, args);
+    va_end(args);
+    return;
+  }
+
+#ifdef BENCHMARK_OS_WINDOWS
+  const HANDLE stdout_handle = GetStdHandle(STD_OUTPUT_HANDLE);
+
+  // Gets the current text color.
+  CONSOLE_SCREEN_BUFFER_INFO buffer_info;
+  GetConsoleScreenBufferInfo(stdout_handle, &buffer_info);
+  const WORD old_color_attrs = buffer_info.wAttributes;
+
+  // We need to flush the stream buffers into the console before each
+  // SetConsoleTextAttribute call lest it affect the text that is already
+  // printed but has not yet reached the console.
+  fflush(stdout);
+  SetConsoleTextAttribute(stdout_handle,
+                          GetPlatformColorCode(color) | FOREGROUND_INTENSITY);
+  vprintf(fmt, args);
+
+  fflush(stdout);
+  // Restores the text color.
+  SetConsoleTextAttribute(stdout_handle, old_color_attrs);
+#else
+  const char* color_code = GetPlatformColorCode(color);
+  if (color_code) out << FormatString("\033[0;3%sm", color_code);
+  out << FormatString(fmt, args) << "\033[m";
+#endif
+
+  va_end(args);
+}
+
+}  // end namespace benchmark
diff --git a/libcxx/utils/google-benchmark/src/colorprint.h b/libcxx/utils/google-benchmark/src/colorprint.h
new file mode 100644
index 0000000..2b3c082
--- /dev/null
+++ b/libcxx/utils/google-benchmark/src/colorprint.h
@@ -0,0 +1,27 @@
+#ifndef BENCHMARK_COLORPRINT_H_
+#define BENCHMARK_COLORPRINT_H_
+
+#include <cstdarg>
+#include <string>
+#include <iostream>
+
+namespace benchmark {
+enum LogColor {
+  COLOR_DEFAULT,
+  COLOR_RED,
+  COLOR_GREEN,
+  COLOR_YELLOW,
+  COLOR_BLUE,
+  COLOR_MAGENTA,
+  COLOR_CYAN,
+  COLOR_WHITE
+};
+
+std::string FormatString(const char* msg, va_list args);
+std::string FormatString(const char* msg, ...);
+
+void ColorPrintf(std::ostream& out, LogColor color, const char* fmt, ...);
+
+}  // end namespace benchmark
+
+#endif  // BENCHMARK_COLORPRINT_H_
diff --git a/libcxx/utils/google-benchmark/src/commandlineflags.cc b/libcxx/utils/google-benchmark/src/commandlineflags.cc
new file mode 100644
index 0000000..3e9a37a
--- /dev/null
+++ b/libcxx/utils/google-benchmark/src/commandlineflags.cc
@@ -0,0 +1,220 @@
+// Copyright 2015 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "commandlineflags.h"
+
+#include <cstdlib>
+#include <cstring>
+#include <iostream>
+#include <limits>
+
+namespace benchmark {
+// Parses 'str' for a 32-bit signed integer.  If successful, writes
+// the result to *value and returns true; otherwise leaves *value
+// unchanged and returns false.
+bool ParseInt32(const std::string& src_text, const char* str, int32_t* value) {
+  // Parses the environment variable as a decimal integer.
+  char* end = nullptr;
+  const long long_value = strtol(str, &end, 10);  // NOLINT
+
+  // Has strtol() consumed all characters in the string?
+  if (*end != '\0') {
+    // No - an invalid character was encountered.
+    std::cerr << src_text << " is expected to be a 32-bit integer, "
+              << "but actually has value \"" << str << "\".\n";
+    return false;
+  }
+
+  // Is the parsed value in the range of an Int32?
+  const int32_t result = static_cast<int32_t>(long_value);
+  if (long_value == std::numeric_limits<long>::max() ||
+      long_value == std::numeric_limits<long>::min() ||
+      // The parsed value overflows as a long.  (strtol() returns
+      // LONG_MAX or LONG_MIN when the input overflows.)
+      result != long_value
+          // The parsed value overflows as an Int32.
+      ) {
+    std::cerr << src_text << " is expected to be a 32-bit integer, "
+              << "but actually has value \"" << str << "\", "
+              << "which overflows.\n";
+    return false;
+  }
+
+  *value = result;
+  return true;
+}
+
+// Parses 'str' for a double.  If successful, writes the result to *value and
+// returns true; otherwise leaves *value unchanged and returns false.
+bool ParseDouble(const std::string& src_text, const char* str, double* value) {
+  // Parses the environment variable as a decimal integer.
+  char* end = nullptr;
+  const double double_value = strtod(str, &end);  // NOLINT
+
+  // Has strtol() consumed all characters in the string?
+  if (*end != '\0') {
+    // No - an invalid character was encountered.
+    std::cerr << src_text << " is expected to be a double, "
+              << "but actually has value \"" << str << "\".\n";
+    return false;
+  }
+
+  *value = double_value;
+  return true;
+}
+
+inline const char* GetEnv(const char* name) {
+#if defined(__BORLANDC__) || defined(__SunOS_5_8) || defined(__SunOS_5_9)
+  // Environment variables which we programmatically clear will be set to the
+  // empty string rather than unset (nullptr).  Handle that case.
+  const char* const env = getenv(name);
+  return (env != nullptr && env[0] != '\0') ? env : nullptr;
+#else
+  return getenv(name);
+#endif
+}
+
+// Returns the name of the environment variable corresponding to the
+// given flag.  For example, FlagToEnvVar("foo") will return
+// "BENCHMARK_FOO" in the open-source version.
+static std::string FlagToEnvVar(const char* flag) {
+  const std::string flag_str(flag);
+
+  std::string env_var;
+  for (size_t i = 0; i != flag_str.length(); ++i)
+    env_var += static_cast<char>(::toupper(flag_str.c_str()[i]));
+
+  return "BENCHMARK_" + env_var;
+}
+
+// Reads and returns the Boolean environment variable corresponding to
+// the given flag; if it's not set, returns default_value.
+//
+// The value is considered true iff it's not "0".
+bool BoolFromEnv(const char* flag, bool default_value) {
+  const std::string env_var = FlagToEnvVar(flag);
+  const char* const string_value = GetEnv(env_var.c_str());
+  return string_value == nullptr ? default_value : strcmp(string_value, "0") != 0;
+}
+
+// Reads and returns a 32-bit integer stored in the environment
+// variable corresponding to the given flag; if it isn't set or
+// doesn't represent a valid 32-bit integer, returns default_value.
+int32_t Int32FromEnv(const char* flag, int32_t default_value) {
+  const std::string env_var = FlagToEnvVar(flag);
+  const char* const string_value = GetEnv(env_var.c_str());
+  if (string_value == nullptr) {
+    // The environment variable is not set.
+    return default_value;
+  }
+
+  int32_t result = default_value;
+  if (!ParseInt32(std::string("Environment variable ") + env_var, string_value,
+                  &result)) {
+    std::cout << "The default value " << default_value << " is used.\n";
+    return default_value;
+  }
+
+  return result;
+}
+
+// Reads and returns the string environment variable corresponding to
+// the given flag; if it's not set, returns default_value.
+const char* StringFromEnv(const char* flag, const char* default_value) {
+  const std::string env_var = FlagToEnvVar(flag);
+  const char* const value = GetEnv(env_var.c_str());
+  return value == nullptr ? default_value : value;
+}
+
+// Parses a string as a command line flag.  The string should have
+// the format "--flag=value".  When def_optional is true, the "=value"
+// part can be omitted.
+//
+// Returns the value of the flag, or nullptr if the parsing failed.
+const char* ParseFlagValue(const char* str, const char* flag,
+                           bool def_optional) {
+  // str and flag must not be nullptr.
+  if (str == nullptr || flag == nullptr) return nullptr;
+
+  // The flag must start with "--".
+  const std::string flag_str = std::string("--") + std::string(flag);
+  const size_t flag_len = flag_str.length();
+  if (strncmp(str, flag_str.c_str(), flag_len) != 0) return nullptr;
+
+  // Skips the flag name.
+  const char* flag_end = str + flag_len;
+
+  // When def_optional is true, it's OK to not have a "=value" part.
+  if (def_optional && (flag_end[0] == '\0')) return flag_end;
+
+  // If def_optional is true and there are more characters after the
+  // flag name, or if def_optional is false, there must be a '=' after
+  // the flag name.
+  if (flag_end[0] != '=') return nullptr;
+
+  // Returns the string after "=".
+  return flag_end + 1;
+}
+
+bool ParseBoolFlag(const char* str, const char* flag, bool* value) {
+  // Gets the value of the flag as a string.
+  const char* const value_str = ParseFlagValue(str, flag, true);
+
+  // Aborts if the parsing failed.
+  if (value_str == nullptr) return false;
+
+  // Converts the string value to a bool.
+  *value = !(*value_str == '0' || *value_str == 'f' || *value_str == 'F');
+  return true;
+}
+
+bool ParseInt32Flag(const char* str, const char* flag, int32_t* value) {
+  // Gets the value of the flag as a string.
+  const char* const value_str = ParseFlagValue(str, flag, false);
+
+  // Aborts if the parsing failed.
+  if (value_str == nullptr) return false;
+
+  // Sets *value to the value of the flag.
+  return ParseInt32(std::string("The value of flag --") + flag, value_str,
+                    value);
+}
+
+bool ParseDoubleFlag(const char* str, const char* flag, double* value) {
+  // Gets the value of the flag as a string.
+  const char* const value_str = ParseFlagValue(str, flag, false);
+
+  // Aborts if the parsing failed.
+  if (value_str == nullptr) return false;
+
+  // Sets *value to the value of the flag.
+  return ParseDouble(std::string("The value of flag --") + flag, value_str,
+                     value);
+}
+
+bool ParseStringFlag(const char* str, const char* flag, std::string* value) {
+  // Gets the value of the flag as a string.
+  const char* const value_str = ParseFlagValue(str, flag, false);
+
+  // Aborts if the parsing failed.
+  if (value_str == nullptr) return false;
+
+  *value = value_str;
+  return true;
+}
+
+bool IsFlag(const char* str, const char* flag) {
+  return (ParseFlagValue(str, flag, true) != nullptr);
+}
+}  // end namespace benchmark
diff --git a/libcxx/utils/google-benchmark/src/commandlineflags.h b/libcxx/utils/google-benchmark/src/commandlineflags.h
new file mode 100644
index 0000000..34b9c6f
--- /dev/null
+++ b/libcxx/utils/google-benchmark/src/commandlineflags.h
@@ -0,0 +1,76 @@
+#ifndef BENCHMARK_COMMANDLINEFLAGS_H_
+#define BENCHMARK_COMMANDLINEFLAGS_H_
+
+#include <cstdint>
+#include <string>
+
+// Macro for referencing flags.
+#define FLAG(name) FLAGS_##name
+
+// Macros for declaring flags.
+#define DECLARE_bool(name) extern bool FLAG(name)
+#define DECLARE_int32(name) extern int32_t FLAG(name)
+#define DECLARE_int64(name) extern int64_t FLAG(name)
+#define DECLARE_double(name) extern double FLAG(name)
+#define DECLARE_string(name) extern std::string FLAG(name)
+
+// Macros for defining flags.
+#define DEFINE_bool(name, default_val, doc) bool FLAG(name) = (default_val)
+#define DEFINE_int32(name, default_val, doc) int32_t FLAG(name) = (default_val)
+#define DEFINE_int64(name, default_val, doc) int64_t FLAG(name) = (default_val)
+#define DEFINE_double(name, default_val, doc) double FLAG(name) = (default_val)
+#define DEFINE_string(name, default_val, doc) \
+  std::string FLAG(name) = (default_val)
+
+namespace benchmark {
+// Parses 'str' for a 32-bit signed integer.  If successful, writes the result
+// to *value and returns true; otherwise leaves *value unchanged and returns
+// false.
+bool ParseInt32(const std::string& src_text, const char* str, int32_t* value);
+
+// Parses a bool/Int32/string from the environment variable
+// corresponding to the given Google Test flag.
+bool BoolFromEnv(const char* flag, bool default_val);
+int32_t Int32FromEnv(const char* flag, int32_t default_val);
+double DoubleFromEnv(const char* flag, double default_val);
+const char* StringFromEnv(const char* flag, const char* default_val);
+
+// Parses a string for a bool flag, in the form of either
+// "--flag=value" or "--flag".
+//
+// In the former case, the value is taken as true as long as it does
+// not start with '0', 'f', or 'F'.
+//
+// In the latter case, the value is taken as true.
+//
+// On success, stores the value of the flag in *value, and returns
+// true.  On failure, returns false without changing *value.
+bool ParseBoolFlag(const char* str, const char* flag, bool* value);
+
+// Parses a string for an Int32 flag, in the form of
+// "--flag=value".
+//
+// On success, stores the value of the flag in *value, and returns
+// true.  On failure, returns false without changing *value.
+bool ParseInt32Flag(const char* str, const char* flag, int32_t* value);
+
+// Parses a string for a Double flag, in the form of
+// "--flag=value".
+//
+// On success, stores the value of the flag in *value, and returns
+// true.  On failure, returns false without changing *value.
+bool ParseDoubleFlag(const char* str, const char* flag, double* value);
+
+// Parses a string for a string flag, in the form of
+// "--flag=value".
+//
+// On success, stores the value of the flag in *value, and returns
+// true.  On failure, returns false without changing *value.
+bool ParseStringFlag(const char* str, const char* flag, std::string* value);
+
+// Returns true if the string matches the flag.
+bool IsFlag(const char* str, const char* flag);
+
+}  // end namespace benchmark
+
+#endif  // BENCHMARK_COMMANDLINEFLAGS_H_
diff --git a/libcxx/utils/google-benchmark/src/complexity.cc b/libcxx/utils/google-benchmark/src/complexity.cc
new file mode 100644
index 0000000..b42bd38
--- /dev/null
+++ b/libcxx/utils/google-benchmark/src/complexity.cc
@@ -0,0 +1,283 @@
+// Copyright 2016 Ismael Jimenez Martinez. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Source project : https://github.com/ismaelJimenez/cpp.leastsq
+// Adapted to be used with google benchmark
+
+#include "benchmark/benchmark_api.h"
+
+#include <algorithm>
+#include <cmath>
+#include "check.h"
+#include "complexity.h"
+#include "stat.h"
+
+namespace benchmark {
+
+// Internal function to calculate the different scalability forms
+BigOFunc* FittingCurve(BigO complexity) {
+  switch (complexity) {
+    case oN:
+      return [](int n) -> double { return n; };
+    case oNSquared:
+      return [](int n) -> double { return n * n; };
+    case oNCubed:
+      return [](int n) -> double { return n * n * n; };
+    case oLogN:
+      return [](int n) { return std::log2(n); };
+    case oNLogN:
+      return [](int n) { return n * std::log2(n); };
+    case o1:
+    default:
+      return [](int) { return 1.0; };
+  }
+}
+
+// Function to return an string for the calculated complexity
+std::string GetBigOString(BigO complexity) {
+  switch (complexity) {
+    case oN:
+      return "N";
+    case oNSquared:
+      return "N^2";
+    case oNCubed:
+      return "N^3";
+    case oLogN:
+      return "lgN";
+    case oNLogN:
+      return "NlgN";
+    case o1:
+      return "(1)";
+    default:
+      return "f(N)";
+  }
+}
+
+// Find the coefficient for the high-order term in the running time, by
+// minimizing the sum of squares of relative error, for the fitting curve
+// given by the lambda expresion.
+//   - n             : Vector containing the size of the benchmark tests.
+//   - time          : Vector containing the times for the benchmark tests.
+//   - fitting_curve : lambda expresion (e.g. [](int n) {return n; };).
+
+// For a deeper explanation on the algorithm logic, look the README file at
+// http://github.com/ismaelJimenez/Minimal-Cpp-Least-Squared-Fit
+
+LeastSq MinimalLeastSq(const std::vector<int>& n,
+                       const std::vector<double>& time,
+                       BigOFunc* fitting_curve) {
+  double sigma_gn = 0.0;
+  double sigma_gn_squared = 0.0;
+  double sigma_time = 0.0;
+  double sigma_time_gn = 0.0;
+
+  // Calculate least square fitting parameter
+  for (size_t i = 0; i < n.size(); ++i) {
+    double gn_i = fitting_curve(n[i]);
+    sigma_gn += gn_i;
+    sigma_gn_squared += gn_i * gn_i;
+    sigma_time += time[i];
+    sigma_time_gn += time[i] * gn_i;
+  }
+
+  LeastSq result;
+  result.complexity = oLambda;
+
+  // Calculate complexity.
+  result.coef = sigma_time_gn / sigma_gn_squared;
+
+  // Calculate RMS
+  double rms = 0.0;
+  for (size_t i = 0; i < n.size(); ++i) {
+    double fit = result.coef * fitting_curve(n[i]);
+    rms += pow((time[i] - fit), 2);
+  }
+
+  // Normalized RMS by the mean of the observed values
+  double mean = sigma_time / n.size();
+  result.rms = sqrt(rms / n.size()) / mean;
+
+  return result;
+}
+
+// Find the coefficient for the high-order term in the running time, by
+// minimizing the sum of squares of relative error.
+//   - n          : Vector containing the size of the benchmark tests.
+//   - time       : Vector containing the times for the benchmark tests.
+//   - complexity : If different than oAuto, the fitting curve will stick to
+//                  this one. If it is oAuto, it will be calculated the best
+//                  fitting curve.
+LeastSq MinimalLeastSq(const std::vector<int>& n,
+                       const std::vector<double>& time,
+                       const BigO complexity) {
+  CHECK_EQ(n.size(), time.size());
+  CHECK_GE(n.size(), 2);  // Do not compute fitting curve is less than two
+                          // benchmark runs are given
+  CHECK_NE(complexity, oNone);
+
+  LeastSq best_fit;
+
+  if (complexity == oAuto) {
+    std::vector<BigO> fit_curves = {oLogN, oN, oNLogN, oNSquared, oNCubed};
+
+    // Take o1 as default best fitting curve
+    best_fit = MinimalLeastSq(n, time, FittingCurve(o1));
+    best_fit.complexity = o1;
+
+    // Compute all possible fitting curves and stick to the best one
+    for (const auto& fit : fit_curves) {
+      LeastSq current_fit = MinimalLeastSq(n, time, FittingCurve(fit));
+      if (current_fit.rms < best_fit.rms) {
+        best_fit = current_fit;
+        best_fit.complexity = fit;
+      }
+    }
+  } else {
+    best_fit = MinimalLeastSq(n, time, FittingCurve(complexity));
+    best_fit.complexity = complexity;
+  }
+
+  return best_fit;
+}
+
+std::vector<BenchmarkReporter::Run> ComputeStats(
+    const std::vector<BenchmarkReporter::Run>& reports) {
+  typedef BenchmarkReporter::Run Run;
+  std::vector<Run> results;
+
+  auto error_count =
+      std::count_if(reports.begin(), reports.end(),
+                    [](Run const& run) { return run.error_occurred; });
+
+  if (reports.size() - error_count < 2) {
+    // We don't report aggregated data if there was a single run.
+    return results;
+  }
+  // Accumulators.
+  Stat1_d real_accumulated_time_stat;
+  Stat1_d cpu_accumulated_time_stat;
+  Stat1_d bytes_per_second_stat;
+  Stat1_d items_per_second_stat;
+  // All repetitions should be run with the same number of iterations so we
+  // can take this information from the first benchmark.
+  int64_t const run_iterations = reports.front().iterations;
+
+  // Populate the accumulators.
+  for (Run const& run : reports) {
+    CHECK_EQ(reports[0].benchmark_name, run.benchmark_name);
+    CHECK_EQ(run_iterations, run.iterations);
+    if (run.error_occurred) continue;
+    real_accumulated_time_stat +=
+        Stat1_d(run.real_accumulated_time / run.iterations, run.iterations);
+    cpu_accumulated_time_stat +=
+        Stat1_d(run.cpu_accumulated_time / run.iterations, run.iterations);
+    items_per_second_stat += Stat1_d(run.items_per_second, run.iterations);
+    bytes_per_second_stat += Stat1_d(run.bytes_per_second, run.iterations);
+  }
+
+  // Get the data from the accumulator to BenchmarkReporter::Run's.
+  Run mean_data;
+  mean_data.benchmark_name = reports[0].benchmark_name + "_mean";
+  mean_data.iterations = run_iterations;
+  mean_data.real_accumulated_time =
+      real_accumulated_time_stat.Mean() * run_iterations;
+  mean_data.cpu_accumulated_time =
+      cpu_accumulated_time_stat.Mean() * run_iterations;
+  mean_data.bytes_per_second = bytes_per_second_stat.Mean();
+  mean_data.items_per_second = items_per_second_stat.Mean();
+
+  // Only add label to mean/stddev if it is same for all runs
+  mean_data.report_label = reports[0].report_label;
+  for (std::size_t i = 1; i < reports.size(); i++) {
+    if (reports[i].report_label != reports[0].report_label) {
+      mean_data.report_label = "";
+      break;
+    }
+  }
+
+  Run stddev_data;
+  stddev_data.benchmark_name = reports[0].benchmark_name + "_stddev";
+  stddev_data.report_label = mean_data.report_label;
+  stddev_data.iterations = 0;
+  stddev_data.real_accumulated_time = real_accumulated_time_stat.StdDev();
+  stddev_data.cpu_accumulated_time = cpu_accumulated_time_stat.StdDev();
+  stddev_data.bytes_per_second = bytes_per_second_stat.StdDev();
+  stddev_data.items_per_second = items_per_second_stat.StdDev();
+
+  results.push_back(mean_data);
+  results.push_back(stddev_data);
+  return results;
+}
+
+std::vector<BenchmarkReporter::Run> ComputeBigO(
+    const std::vector<BenchmarkReporter::Run>& reports) {
+  typedef BenchmarkReporter::Run Run;
+  std::vector<Run> results;
+
+  if (reports.size() < 2) return results;
+
+  // Accumulators.
+  std::vector<int> n;
+  std::vector<double> real_time;
+  std::vector<double> cpu_time;
+
+  // Populate the accumulators.
+  for (const Run& run : reports) {
+    CHECK_GT(run.complexity_n, 0) << "Did you forget to call SetComplexityN?";
+    n.push_back(run.complexity_n);
+    real_time.push_back(run.real_accumulated_time / run.iterations);
+    cpu_time.push_back(run.cpu_accumulated_time / run.iterations);
+  }
+
+  LeastSq result_cpu;
+  LeastSq result_real;
+
+  if (reports[0].complexity == oLambda) {
+    result_cpu = MinimalLeastSq(n, cpu_time, reports[0].complexity_lambda);
+    result_real = MinimalLeastSq(n, real_time, reports[0].complexity_lambda);
+  } else {
+    result_cpu = MinimalLeastSq(n, cpu_time, reports[0].complexity);
+    result_real = MinimalLeastSq(n, real_time, result_cpu.complexity);
+  }
+  std::string benchmark_name =
+      reports[0].benchmark_name.substr(0, reports[0].benchmark_name.find('/'));
+
+  // Get the data from the accumulator to BenchmarkReporter::Run's.
+  Run big_o;
+  big_o.benchmark_name = benchmark_name + "_BigO";
+  big_o.iterations = 0;
+  big_o.real_accumulated_time = result_real.coef;
+  big_o.cpu_accumulated_time = result_cpu.coef;
+  big_o.report_big_o = true;
+  big_o.complexity = result_cpu.complexity;
+
+  double multiplier = GetTimeUnitMultiplier(reports[0].time_unit);
+
+  // Only add label to mean/stddev if it is same for all runs
+  Run rms;
+  big_o.report_label = reports[0].report_label;
+  rms.benchmark_name = benchmark_name + "_RMS";
+  rms.report_label = big_o.report_label;
+  rms.iterations = 0;
+  rms.real_accumulated_time = result_real.rms / multiplier;
+  rms.cpu_accumulated_time = result_cpu.rms / multiplier;
+  rms.report_rms = true;
+  rms.complexity = result_cpu.complexity;
+
+  results.push_back(big_o);
+  results.push_back(rms);
+  return results;
+}
+
+}  // end namespace benchmark
diff --git a/libcxx/utils/google-benchmark/src/complexity.h b/libcxx/utils/google-benchmark/src/complexity.h
new file mode 100644
index 0000000..85cc125
--- /dev/null
+++ b/libcxx/utils/google-benchmark/src/complexity.h
@@ -0,0 +1,64 @@
+// Copyright 2016 Ismael Jimenez Martinez. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Source project : https://github.com/ismaelJimenez/cpp.leastsq
+// Adapted to be used with google benchmark
+
+#ifndef COMPLEXITY_H_
+#define COMPLEXITY_H_
+
+#include <string>
+#include <vector>
+
+#include "benchmark/benchmark_api.h"
+#include "benchmark/reporter.h"
+
+namespace benchmark {
+
+// Return a vector containing the mean and standard devation information for
+// the specified list of reports. If 'reports' contains less than two
+// non-errored runs an empty vector is returned
+std::vector<BenchmarkReporter::Run> ComputeStats(
+    const std::vector<BenchmarkReporter::Run>& reports);
+
+// Return a vector containing the bigO and RMS information for the specified
+// list of reports. If 'reports.size() < 2' an empty vector is returned.
+std::vector<BenchmarkReporter::Run> ComputeBigO(
+    const std::vector<BenchmarkReporter::Run>& reports);
+
+// This data structure will contain the result returned by MinimalLeastSq
+//   - coef        : Estimated coeficient for the high-order term as
+//                   interpolated from data.
+//   - rms         : Normalized Root Mean Squared Error.
+//   - complexity  : Scalability form (e.g. oN, oNLogN). In case a scalability
+//                   form has been provided to MinimalLeastSq this will return
+//                   the same value. In case BigO::oAuto has been selected, this
+//                   parameter will return the best fitting curve detected.
+
+struct LeastSq {
+  LeastSq() :
+    coef(0.0),
+    rms(0.0),
+    complexity(oNone) {}
+
+  double coef;
+  double rms;
+  BigO complexity;
+};
+
+// Function to return an string for the calculated complexity
+std::string GetBigOString(BigO complexity);
+
+} // end namespace benchmark
+#endif // COMPLEXITY_H_
diff --git a/libcxx/utils/google-benchmark/src/console_reporter.cc b/libcxx/utils/google-benchmark/src/console_reporter.cc
new file mode 100644
index 0000000..080c324
--- /dev/null
+++ b/libcxx/utils/google-benchmark/src/console_reporter.cc
@@ -0,0 +1,124 @@
+// Copyright 2015 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "benchmark/reporter.h"
+#include "complexity.h"
+
+#include <algorithm>
+#include <cstdint>
+#include <cstdio>
+#include <iostream>
+#include <string>
+#include <tuple>
+#include <vector>
+
+#include "check.h"
+#include "colorprint.h"
+#include "commandlineflags.h"
+#include "internal_macros.h"
+#include "string_util.h"
+#include "walltime.h"
+
+DECLARE_bool(color_print);
+
+namespace benchmark {
+
+bool ConsoleReporter::ReportContext(const Context& context) {
+  name_field_width_ = context.name_field_width;
+
+  PrintBasicContext(&GetErrorStream(), context);
+
+#ifdef BENCHMARK_OS_WINDOWS
+  if (FLAGS_color_print && &std::cout != &GetOutputStream()) {
+      GetErrorStream() << "Color printing is only supported for stdout on windows."
+                          " Disabling color printing\n";
+      FLAGS_color_print = false;
+  }
+#endif
+  std::string str = FormatString("%-*s %13s %13s %10s\n",
+                             static_cast<int>(name_field_width_), "Benchmark",
+                             "Time", "CPU", "Iterations");
+  GetOutputStream() << str << std::string(str.length() - 1, '-') << "\n";
+
+  return true;
+}
+
+void ConsoleReporter::ReportRuns(const std::vector<Run>& reports) {
+  for (const auto& run : reports)
+    PrintRunData(run);
+}
+
+void ConsoleReporter::PrintRunData(const Run& result) {
+  auto& Out = GetOutputStream();
+
+  auto name_color =
+      (result.report_big_o || result.report_rms) ? COLOR_BLUE : COLOR_GREEN;
+  ColorPrintf(Out, name_color, "%-*s ", name_field_width_,
+              result.benchmark_name.c_str());
+
+  if (result.error_occurred) {
+    ColorPrintf(Out, COLOR_RED, "ERROR OCCURRED: \'%s\'",
+                result.error_message.c_str());
+    ColorPrintf(Out, COLOR_DEFAULT, "\n");
+    return;
+  }
+  // Format bytes per second
+  std::string rate;
+  if (result.bytes_per_second > 0) {
+    rate = StrCat(" ", HumanReadableNumber(result.bytes_per_second), "B/s");
+  }
+
+  // Format items per second
+  std::string items;
+  if (result.items_per_second > 0) {
+    items = StrCat(" ", HumanReadableNumber(result.items_per_second),
+                   " items/s");
+ }
+
+  const double real_time = result.GetAdjustedRealTime();
+  const double cpu_time = result.GetAdjustedCPUTime();
+
+  if (result.report_big_o) {
+    std::string big_o = GetBigOString(result.complexity);
+    ColorPrintf(Out, COLOR_YELLOW, "%10.2f %s %10.2f %s ", real_time,
+                big_o.c_str(), cpu_time, big_o.c_str());
+  } else if (result.report_rms) {
+    ColorPrintf(Out, COLOR_YELLOW, "%10.0f %% %10.0f %% ", real_time * 100,
+                cpu_time * 100);
+  } else {
+    const char* timeLabel = GetTimeUnitString(result.time_unit);
+    ColorPrintf(Out, COLOR_YELLOW, "%10.0f %s %10.0f %s ", real_time, timeLabel,
+                cpu_time, timeLabel);
+  }
+
+  if (!result.report_big_o && !result.report_rms) {
+    ColorPrintf(Out, COLOR_CYAN, "%10lld", result.iterations);
+  }
+
+  if (!rate.empty()) {
+    ColorPrintf(Out, COLOR_DEFAULT, " %*s", 13, rate.c_str());
+  }
+
+  if (!items.empty()) {
+    ColorPrintf(Out, COLOR_DEFAULT, " %*s", 18, items.c_str());
+  }
+
+  if (!result.report_label.empty()) {
+    ColorPrintf(Out, COLOR_DEFAULT, " %s", result.report_label.c_str());
+  }
+
+  ColorPrintf(Out, COLOR_DEFAULT, "\n");
+}
+
+}  // end namespace benchmark
diff --git a/libcxx/utils/google-benchmark/src/csv_reporter.cc b/libcxx/utils/google-benchmark/src/csv_reporter.cc
new file mode 100644
index 0000000..7bc7ef3
--- /dev/null
+++ b/libcxx/utils/google-benchmark/src/csv_reporter.cc
@@ -0,0 +1,118 @@
+// Copyright 2015 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "benchmark/reporter.h"
+#include "complexity.h"
+
+#include <algorithm>
+#include <cstdint>
+#include <iostream>
+#include <string>
+#include <tuple>
+#include <vector>
+
+#include "string_util.h"
+#include "walltime.h"
+
+// File format reference: http://edoceo.com/utilitas/csv-file-format.
+
+namespace benchmark {
+
+namespace {
+std::vector<std::string> elements = {
+  "name",
+  "iterations",
+  "real_time",
+  "cpu_time",
+  "time_unit",
+  "bytes_per_second",
+  "items_per_second",
+  "label",
+  "error_occurred",
+  "error_message"
+};
+}
+
+bool CSVReporter::ReportContext(const Context& context) {
+  PrintBasicContext(&GetErrorStream(), context);
+
+  std::ostream& Out = GetOutputStream();
+  for (auto B = elements.begin(); B != elements.end(); ) {
+    Out << *B++;
+    if (B != elements.end())
+      Out << ",";
+  }
+  Out << "\n";
+  return true;
+}
+
+void CSVReporter::ReportRuns(const std::vector<Run> & reports) {
+  for (const auto& run : reports)
+    PrintRunData(run);
+}
+
+void CSVReporter::PrintRunData(const Run & run) {
+  std::ostream& Out = GetOutputStream();
+
+  // Field with embedded double-quote characters must be doubled and the field
+  // delimited with double-quotes.
+  std::string name = run.benchmark_name;
+  ReplaceAll(&name, "\"", "\"\"");
+  Out << '"' << name << "\",";
+  if (run.error_occurred) {
+    Out << std::string(elements.size() - 3, ',');
+    Out << "true,";
+    std::string msg = run.error_message;
+    ReplaceAll(&msg, "\"", "\"\"");
+    Out << '"' << msg << "\"\n";
+    return;
+  }
+
+  // Do not print iteration on bigO and RMS report
+  if (!run.report_big_o && !run.report_rms) {
+    Out << run.iterations;
+  }
+  Out << ",";
+
+  Out << run.GetAdjustedRealTime() << ",";
+  Out << run.GetAdjustedCPUTime() << ",";
+
+  // Do not print timeLabel on bigO and RMS report
+  if (run.report_big_o) {
+    Out << GetBigOString(run.complexity);
+  } else if (!run.report_rms) {
+    Out << GetTimeUnitString(run.time_unit);
+  }
+  Out << ",";
+
+  if (run.bytes_per_second > 0.0) {
+    Out << run.bytes_per_second;
+  }
+  Out << ",";
+  if (run.items_per_second > 0.0) {
+    Out << run.items_per_second;
+  }
+  Out << ",";
+  if (!run.report_label.empty()) {
+    // Field with embedded double-quote characters must be doubled and the field
+    // delimited with double-quotes.
+    std::string label = run.report_label;
+    ReplaceAll(&label, "\"", "\"\"");
+    Out << "\"" << label << "\"";
+  }
+  Out << ",,";  // for error_occurred and error_message
+  Out << '\n';
+}
+
+}  // end namespace benchmark
diff --git a/libcxx/utils/google-benchmark/src/cycleclock.h b/libcxx/utils/google-benchmark/src/cycleclock.h
new file mode 100644
index 0000000..3110804
--- /dev/null
+++ b/libcxx/utils/google-benchmark/src/cycleclock.h
@@ -0,0 +1,145 @@
+// ----------------------------------------------------------------------
+// CycleClock
+//    A CycleClock tells you the current time in Cycles.  The "time"
+//    is actually time since power-on.  This is like time() but doesn't
+//    involve a system call and is much more precise.
+//
+// NOTE: Not all cpu/platform/kernel combinations guarantee that this
+// clock increments at a constant rate or is synchronized across all logical
+// cpus in a system.
+//
+// If you need the above guarantees, please consider using a different
+// API. There are efforts to provide an interface which provides a millisecond
+// granularity and implemented as a memory read. A memory read is generally
+// cheaper than the CycleClock for many architectures.
+//
+// Also, in some out of order CPU implementations, the CycleClock is not
+// serializing. So if you're trying to count at cycles granularity, your
+// data might be inaccurate due to out of order instruction execution.
+// ----------------------------------------------------------------------
+
+#ifndef BENCHMARK_CYCLECLOCK_H_
+#define BENCHMARK_CYCLECLOCK_H_
+
+#include <cstdint>
+
+#include "benchmark/macros.h"
+#include "internal_macros.h"
+
+#if defined(BENCHMARK_OS_MACOSX)
+#include <mach/mach_time.h>
+#endif
+// For MSVC, we want to use '_asm rdtsc' when possible (since it works
+// with even ancient MSVC compilers), and when not possible the
+// __rdtsc intrinsic, declared in <intrin.h>.  Unfortunately, in some
+// environments, <windows.h> and <intrin.h> have conflicting
+// declarations of some other intrinsics, breaking compilation.
+// Therefore, we simply declare __rdtsc ourselves. See also
+// http://connect.microsoft.com/VisualStudio/feedback/details/262047
+#if defined(COMPILER_MSVC) && !defined(_M_IX86)
+extern "C" uint64_t __rdtsc();
+#pragma intrinsic(__rdtsc)
+#endif
+
+#ifndef BENCHMARK_OS_WINDOWS
+#include <sys/time.h>
+#endif
+
+namespace benchmark {
+// NOTE: only i386 and x86_64 have been well tested.
+// PPC, sparc, alpha, and ia64 are based on
+//    http://peter.kuscsik.com/wordpress/?p=14
+// with modifications by m3b.  See also
+//    https://setisvn.ssl.berkeley.edu/svn/lib/fftw-3.0.1/kernel/cycle.h
+namespace cycleclock {
+// This should return the number of cycles since power-on.  Thread-safe.
+inline BENCHMARK_ALWAYS_INLINE int64_t Now() {
+#if defined(BENCHMARK_OS_MACOSX)
+  // this goes at the top because we need ALL Macs, regardless of
+  // architecture, to return the number of "mach time units" that
+  // have passed since startup.  See sysinfo.cc where
+  // InitializeSystemInfo() sets the supposed cpu clock frequency of
+  // macs to the number of mach time units per second, not actual
+  // CPU clock frequency (which can change in the face of CPU
+  // frequency scaling).  Also note that when the Mac sleeps, this
+  // counter pauses; it does not continue counting, nor does it
+  // reset to zero.
+  return mach_absolute_time();
+#elif defined(__i386__)
+  int64_t ret;
+  __asm__ volatile("rdtsc" : "=A"(ret));
+  return ret;
+#elif defined(__x86_64__) || defined(__amd64__)
+  uint64_t low, high;
+  __asm__ volatile("rdtsc" : "=a"(low), "=d"(high));
+  return (high << 32) | low;
+#elif defined(__powerpc__) || defined(__ppc__)
+  // This returns a time-base, which is not always precisely a cycle-count.
+  int64_t tbl, tbu0, tbu1;
+  asm("mftbu %0" : "=r"(tbu0));
+  asm("mftb  %0" : "=r"(tbl));
+  asm("mftbu %0" : "=r"(tbu1));
+  tbl &= -static_cast<int64>(tbu0 == tbu1);
+  // high 32 bits in tbu1; low 32 bits in tbl  (tbu0 is garbage)
+  return (tbu1 << 32) | tbl;
+#elif defined(__sparc__)
+  int64_t tick;
+  asm(".byte 0x83, 0x41, 0x00, 0x00");
+  asm("mov   %%g1, %0" : "=r"(tick));
+  return tick;
+#elif defined(__ia64__)
+  int64_t itc;
+  asm("mov %0 = ar.itc" : "=r"(itc));
+  return itc;
+#elif defined(COMPILER_MSVC) && defined(_M_IX86)
+  // Older MSVC compilers (like 7.x) don't seem to support the
+  // __rdtsc intrinsic properly, so I prefer to use _asm instead
+  // when I know it will work.  Otherwise, I'll use __rdtsc and hope
+  // the code is being compiled with a non-ancient compiler.
+  _asm rdtsc
+#elif defined(COMPILER_MSVC)
+  return __rdtsc();
+#elif defined(__aarch64__)
+  // System timer of ARMv8 runs at a different frequency than the CPU's.
+  // The frequency is fixed, typically in the range 1-50MHz.  It can be
+  // read at CNTFRQ special register.  We assume the OS has set up
+  // the virtual timer properly.
+  int64_t virtual_timer_value;
+  asm volatile("mrs %0, cntvct_el0" : "=r"(virtual_timer_value));
+  return virtual_timer_value;
+#elif defined(__ARM_ARCH)
+#if (__ARM_ARCH >= 6)  // V6 is the earliest arch that has a standard cyclecount
+  uint32_t pmccntr;
+  uint32_t pmuseren;
+  uint32_t pmcntenset;
+  // Read the user mode perf monitor counter access permissions.
+  asm("mrc p15, 0, %0, c9, c14, 0" : "=r"(pmuseren));
+  if (pmuseren & 1) {  // Allows reading perfmon counters for user mode code.
+    asm("mrc p15, 0, %0, c9, c12, 1" : "=r"(pmcntenset));
+    if (pmcntenset & 0x80000000ul) {  // Is it counting?
+      asm("mrc p15, 0, %0, c9, c13, 0" : "=r"(pmccntr));
+      // The counter is set up to count every 64th cycle
+      return static_cast<int64_t>(pmccntr) * 64;  // Should optimize to << 6
+    }
+  }
+#endif
+  struct timeval tv;
+  gettimeofday(&tv, nullptr);
+  return static_cast<int64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
+#elif defined(__mips__)
+  // mips apparently only allows rdtsc for superusers, so we fall
+  // back to gettimeofday.  It's possible clock_gettime would be better.
+  struct timeval tv;
+  gettimeofday(&tv, nullptr);
+  return static_cast<int64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
+#else
+// The soft failover to a generic implementation is automatic only for ARM.
+// For other platforms the developer is expected to make an attempt to create
+// a fast implementation and use generic version if nothing better is available.
+#error You need to define CycleTimer for your OS and CPU
+#endif
+}
+}  // end namespace cycleclock
+}  // end namespace benchmark
+
+#endif  // BENCHMARK_CYCLECLOCK_H_
diff --git a/libcxx/utils/google-benchmark/src/internal_macros.h b/libcxx/utils/google-benchmark/src/internal_macros.h
new file mode 100644
index 0000000..1080ac9
--- /dev/null
+++ b/libcxx/utils/google-benchmark/src/internal_macros.h
@@ -0,0 +1,40 @@
+#ifndef BENCHMARK_INTERNAL_MACROS_H_
+#define BENCHMARK_INTERNAL_MACROS_H_
+
+#include "benchmark/macros.h"
+
+#ifndef __has_feature
+# define __has_feature(x) 0
+#endif
+
+#if __has_feature(cxx_attributes)
+# define BENCHMARK_NORETURN [[noreturn]]
+#elif defined(__GNUC__)
+# define BENCHMARK_NORETURN __attribute__((noreturn))
+#else
+# define BENCHMARK_NORETURN
+#endif
+
+#if defined(__CYGWIN__)
+# define BENCHMARK_OS_CYGWIN 1
+#elif defined(_WIN32)
+# define BENCHMARK_OS_WINDOWS 1
+#elif defined(__APPLE__)
+// TODO(ericwf) This doesn't actually check that it is a Mac OSX system. Just
+// that it is an apple system.
+# define BENCHMARK_OS_MACOSX 1
+#elif defined(__FreeBSD__)
+# define BENCHMARK_OS_FREEBSD 1
+#elif defined(__linux__)
+# define BENCHMARK_OS_LINUX 1
+#endif
+
+#if defined(__clang__)
+# define COMPILER_CLANG
+#elif defined(_MSC_VER)
+# define COMPILER_MSVC
+#elif defined(__GNUC__)
+# define COMPILER_GCC
+#endif
+
+#endif // BENCHMARK_INTERNAL_MACROS_H_
diff --git a/libcxx/utils/google-benchmark/src/json_reporter.cc b/libcxx/utils/google-benchmark/src/json_reporter.cc
new file mode 100644
index 0000000..485d305
--- /dev/null
+++ b/libcxx/utils/google-benchmark/src/json_reporter.cc
@@ -0,0 +1,178 @@
+// Copyright 2015 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "benchmark/reporter.h"
+#include "complexity.h"
+
+#include <algorithm>
+#include <cstdint>
+#include <iostream>
+#include <string>
+#include <tuple>
+#include <vector>
+
+#include "string_util.h"
+#include "walltime.h"
+
+namespace benchmark {
+
+namespace {
+
+std::string FormatKV(std::string const& key, std::string const& value) {
+  return StringPrintF("\"%s\": \"%s\"", key.c_str(), value.c_str());
+}
+
+std::string FormatKV(std::string const& key, const char* value) {
+  return StringPrintF("\"%s\": \"%s\"", key.c_str(), value);
+}
+
+std::string FormatKV(std::string const& key, bool value) {
+  return StringPrintF("\"%s\": %s", key.c_str(), value ? "true" : "false");
+}
+
+std::string FormatKV(std::string const& key, int64_t value) {
+  std::stringstream ss;
+  ss << '"' << key << "\": " << value;
+  return ss.str();
+}
+
+int64_t RoundDouble(double v) {
+    return static_cast<int64_t>(v + 0.5);
+}
+
+} // end namespace
+
+bool JSONReporter::ReportContext(const Context& context) {
+  std::ostream& out = GetOutputStream();
+
+  out << "{\n";
+  std::string inner_indent(2, ' ');
+
+  // Open context block and print context information.
+  out << inner_indent << "\"context\": {\n";
+  std::string indent(4, ' ');
+
+  std::string walltime_value = LocalDateTimeString();
+  out << indent << FormatKV("date", walltime_value) << ",\n";
+
+  out << indent
+      << FormatKV("num_cpus", static_cast<int64_t>(context.num_cpus))
+      << ",\n";
+  out << indent
+      << FormatKV("mhz_per_cpu", RoundDouble(context.mhz_per_cpu))
+      << ",\n";
+  out << indent
+      << FormatKV("cpu_scaling_enabled", context.cpu_scaling_enabled)
+      << ",\n";
+
+#if defined(NDEBUG)
+  const char build_type[] = "release";
+#else
+  const char build_type[] = "debug";
+#endif
+  out << indent << FormatKV("library_build_type", build_type) << "\n";
+  // Close context block and open the list of benchmarks.
+  out << inner_indent << "},\n";
+  out << inner_indent << "\"benchmarks\": [\n";
+  return true;
+}
+
+void JSONReporter::ReportRuns(std::vector<Run> const& reports) {
+  if (reports.empty()) {
+    return;
+  }
+  std::string indent(4, ' ');
+  std::ostream& out = GetOutputStream();
+  if (!first_report_) {
+    out << ",\n";
+  }
+  first_report_ = false;
+
+  for (auto it = reports.begin(); it != reports.end(); ++it) {
+    out << indent << "{\n";
+    PrintRunData(*it);
+    out << indent << '}';
+    auto it_cp = it;
+    if (++it_cp != reports.end()) {
+      out << ",\n";
+    }
+  }
+}
+
+void JSONReporter::Finalize() {
+  // Close the list of benchmarks and the top level object.
+  GetOutputStream() << "\n  ]\n}\n";
+}
+
+void JSONReporter::PrintRunData(Run const& run) {
+  std::string indent(6, ' ');
+  std::ostream& out = GetOutputStream();
+    out << indent
+        << FormatKV("name", run.benchmark_name)
+        << ",\n";
+    if (run.error_occurred) {
+        out << indent
+            << FormatKV("error_occurred", run.error_occurred)
+            << ",\n";
+        out << indent
+            << FormatKV("error_message", run.error_message)
+            << ",\n";
+    }
+  if (!run.report_big_o && !run.report_rms) {
+        out << indent
+            << FormatKV("iterations", run.iterations)
+            << ",\n";
+        out << indent
+            << FormatKV("real_time", RoundDouble(run.GetAdjustedRealTime()))
+            << ",\n";
+        out << indent
+            << FormatKV("cpu_time", RoundDouble(run.GetAdjustedCPUTime()));
+        out << ",\n" << indent
+            << FormatKV("time_unit", GetTimeUnitString(run.time_unit));
+  } else if (run.report_big_o) {
+    out << indent
+        << FormatKV("cpu_coefficient", RoundDouble(run.GetAdjustedCPUTime()))
+        << ",\n";
+    out << indent
+        << FormatKV("real_coefficient", RoundDouble(run.GetAdjustedRealTime()))
+        << ",\n";
+    out << indent
+            << FormatKV("big_o", GetBigOString(run.complexity))
+            << ",\n";
+        out << indent
+            << FormatKV("time_unit", GetTimeUnitString(run.time_unit));
+    } else if(run.report_rms) {
+        out << indent
+            << FormatKV("rms", RoundDouble(run.GetAdjustedCPUTime()*100))
+            << '%';
+  }
+  if (run.bytes_per_second > 0.0) {
+    out << ",\n"
+        << indent
+        << FormatKV("bytes_per_second", RoundDouble(run.bytes_per_second));
+  }
+  if (run.items_per_second > 0.0) {
+    out << ",\n"
+        << indent
+        << FormatKV("items_per_second", RoundDouble(run.items_per_second));
+  }
+  if (!run.report_label.empty()) {
+    out << ",\n"
+        << indent
+        << FormatKV("label", run.report_label);
+  }
+  out << '\n';
+}
+
+}  // end namespace benchmark
diff --git a/libcxx/utils/google-benchmark/src/log.cc b/libcxx/utils/google-benchmark/src/log.cc
new file mode 100644
index 0000000..b660309
--- /dev/null
+++ b/libcxx/utils/google-benchmark/src/log.cc
@@ -0,0 +1,40 @@
+#include "log.h"
+
+#include <iostream>
+
+namespace benchmark {
+namespace internal {
+
+int& LoggingLevelImp() {
+    static int level = 0;
+    return level;
+}
+
+void SetLogLevel(int value) {
+    LoggingLevelImp() = value;
+}
+
+int GetLogLevel() {
+    return LoggingLevelImp();
+}
+
+class NullLogBuffer : public std::streambuf
+{
+public:
+  int overflow(int c) {
+    return c;
+  }
+};
+
+std::ostream& GetNullLogInstance() {
+  static NullLogBuffer log_buff;
+  static std::ostream null_log(&log_buff);
+  return null_log;
+}
+
+std::ostream& GetErrorLogInstance() {
+  return std::clog;
+}
+
+} // end namespace internal
+} // end namespace benchmark
\ No newline at end of file
diff --git a/libcxx/utils/google-benchmark/src/log.h b/libcxx/utils/google-benchmark/src/log.h
new file mode 100644
index 0000000..3777810
--- /dev/null
+++ b/libcxx/utils/google-benchmark/src/log.h
@@ -0,0 +1,28 @@
+#ifndef BENCHMARK_LOG_H_
+#define BENCHMARK_LOG_H_
+
+#include <ostream>
+
+namespace benchmark {
+namespace internal {
+
+int GetLogLevel();
+void SetLogLevel(int level);
+
+std::ostream& GetNullLogInstance();
+std::ostream& GetErrorLogInstance();
+
+inline std::ostream& GetLogInstanceForLevel(int level) {
+  if (level <= GetLogLevel()) {
+    return GetErrorLogInstance();
+  }
+  return GetNullLogInstance();
+}
+
+} // end namespace internal
+} // end namespace benchmark
+
+#define VLOG(x) (::benchmark::internal::GetLogInstanceForLevel(x) \
+                 << "-- LOG(" << x << "): ")
+
+#endif
\ No newline at end of file
diff --git a/libcxx/utils/google-benchmark/src/mutex.h b/libcxx/utils/google-benchmark/src/mutex.h
new file mode 100644
index 0000000..f37ec35
--- /dev/null
+++ b/libcxx/utils/google-benchmark/src/mutex.h
@@ -0,0 +1,142 @@
+#ifndef BENCHMARK_MUTEX_H_
+#define BENCHMARK_MUTEX_H_
+
+#include <mutex>
+#include <condition_variable>
+
+// Enable thread safety attributes only with clang.
+// The attributes can be safely erased when compiling with other compilers.
+#if defined(HAVE_THREAD_SAFETY_ATTRIBUTES)
+#define THREAD_ANNOTATION_ATTRIBUTE__(x)   __attribute__((x))
+#else
+#define THREAD_ANNOTATION_ATTRIBUTE__(x)   // no-op
+#endif
+
+#define CAPABILITY(x) \
+  THREAD_ANNOTATION_ATTRIBUTE__(capability(x))
+
+#define SCOPED_CAPABILITY \
+  THREAD_ANNOTATION_ATTRIBUTE__(scoped_lockable)
+
+#define GUARDED_BY(x) \
+  THREAD_ANNOTATION_ATTRIBUTE__(guarded_by(x))
+
+#define PT_GUARDED_BY(x) \
+  THREAD_ANNOTATION_ATTRIBUTE__(pt_guarded_by(x))
+
+#define ACQUIRED_BEFORE(...) \
+  THREAD_ANNOTATION_ATTRIBUTE__(acquired_before(__VA_ARGS__))
+
+#define ACQUIRED_AFTER(...) \
+  THREAD_ANNOTATION_ATTRIBUTE__(acquired_after(__VA_ARGS__))
+
+#define REQUIRES(...) \
+  THREAD_ANNOTATION_ATTRIBUTE__(requires_capability(__VA_ARGS__))
+
+#define REQUIRES_SHARED(...) \
+  THREAD_ANNOTATION_ATTRIBUTE__(requires_shared_capability(__VA_ARGS__))
+
+#define ACQUIRE(...) \
+  THREAD_ANNOTATION_ATTRIBUTE__(acquire_capability(__VA_ARGS__))
+
+#define ACQUIRE_SHARED(...) \
+  THREAD_ANNOTATION_ATTRIBUTE__(acquire_shared_capability(__VA_ARGS__))
+
+#define RELEASE(...) \
+  THREAD_ANNOTATION_ATTRIBUTE__(release_capability(__VA_ARGS__))
+
+#define RELEASE_SHARED(...) \
+  THREAD_ANNOTATION_ATTRIBUTE__(release_shared_capability(__VA_ARGS__))
+
+#define TRY_ACQUIRE(...) \
+  THREAD_ANNOTATION_ATTRIBUTE__(try_acquire_capability(__VA_ARGS__))
+
+#define TRY_ACQUIRE_SHARED(...) \
+  THREAD_ANNOTATION_ATTRIBUTE__(try_acquire_shared_capability(__VA_ARGS__))
+
+#define EXCLUDES(...) \
+  THREAD_ANNOTATION_ATTRIBUTE__(locks_excluded(__VA_ARGS__))
+
+#define ASSERT_CAPABILITY(x) \
+  THREAD_ANNOTATION_ATTRIBUTE__(assert_capability(x))
+
+#define ASSERT_SHARED_CAPABILITY(x) \
+  THREAD_ANNOTATION_ATTRIBUTE__(assert_shared_capability(x))
+
+#define RETURN_CAPABILITY(x) \
+  THREAD_ANNOTATION_ATTRIBUTE__(lock_returned(x))
+
+#define NO_THREAD_SAFETY_ANALYSIS \
+  THREAD_ANNOTATION_ATTRIBUTE__(no_thread_safety_analysis)
+
+
+namespace benchmark {
+
+typedef std::condition_variable Condition;
+
+// NOTE: Wrappers for std::mutex and std::unique_lock are provided so that
+// we can annotate them with thread safety attributes and use the
+// -Wthread-safety warning with clang. The standard library types cannot be
+// used directly because they do not provided the required annotations.
+class CAPABILITY("mutex") Mutex
+{
+public:
+  Mutex() {}
+
+  void lock() ACQUIRE() { mut_.lock(); }
+  void unlock() RELEASE() { mut_.unlock(); }
+  std::mutex& native_handle() {
+    return mut_;
+  }
+private:
+  std::mutex mut_;
+};
+
+
+class SCOPED_CAPABILITY MutexLock
+{
+  typedef std::unique_lock<std::mutex> MutexLockImp;
+public:
+  MutexLock(Mutex& m) ACQUIRE(m) : ml_(m.native_handle())
+  { }
+  ~MutexLock() RELEASE() {}
+  MutexLockImp& native_handle() { return ml_; }
+private:
+  MutexLockImp ml_;
+};
+
+
+class Notification
+{
+public:
+  Notification() : notified_yet_(false) { }
+
+  void WaitForNotification() const EXCLUDES(mutex_) {
+    MutexLock m_lock(mutex_);
+    auto notified_fn = [this]() REQUIRES(mutex_) {
+                            return this->HasBeenNotified();
+                        };
+    cv_.wait(m_lock.native_handle(), notified_fn);
+  }
+
+  void Notify() EXCLUDES(mutex_) {
+    {
+      MutexLock lock(mutex_);
+      notified_yet_ = 1;
+    }
+    cv_.notify_all();
+  }
+
+private:
+  bool HasBeenNotified() const REQUIRES(mutex_) {
+    return notified_yet_;
+  }
+
+  mutable Mutex mutex_;
+  mutable std::condition_variable cv_;
+  bool notified_yet_ GUARDED_BY(mutex_);
+};
+
+} // end namespace benchmark
+
+#endif // BENCHMARK_MUTEX_H_
diff --git a/libcxx/utils/google-benchmark/src/re.h b/libcxx/utils/google-benchmark/src/re.h
new file mode 100644
index 0000000..af57a39c
--- /dev/null
+++ b/libcxx/utils/google-benchmark/src/re.h
@@ -0,0 +1,60 @@
+// Copyright 2015 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef BENCHMARK_RE_H_
+#define BENCHMARK_RE_H_
+
+#if defined(HAVE_STD_REGEX)
+#include <regex>
+#elif defined(HAVE_GNU_POSIX_REGEX)
+#include <gnuregex.h>
+#elif defined(HAVE_POSIX_REGEX)
+#include <regex.h>
+#else
+#error No regular expression backend was found!
+#endif
+#include <string>
+
+namespace benchmark {
+
+// A wrapper around the POSIX regular expression API that provides automatic
+// cleanup
+class Regex {
+ public:
+  Regex();
+  ~Regex();
+
+  // Compile a regular expression matcher from spec.  Returns true on success.
+  //
+  // On failure (and if error is not nullptr), error is populated with a human
+  // readable error message if an error occurs.
+  bool Init(const std::string& spec, std::string* error);
+
+  // Returns whether str matches the compiled regular expression.
+  bool Match(const std::string& str);
+ private:
+  bool init_;
+  // Underlying regular expression object
+#if defined(HAVE_STD_REGEX)
+  std::regex re_;
+#elif defined(HAVE_POSIX_REGEX) || defined(HAVE_GNU_POSIX_REGEX)
+  regex_t re_;
+#else
+# error No regular expression backend implementation available
+#endif
+};
+
+}  // end namespace benchmark
+
+#endif  // BENCHMARK_RE_H_
diff --git a/libcxx/utils/google-benchmark/src/re_posix.cc b/libcxx/utils/google-benchmark/src/re_posix.cc
new file mode 100644
index 0000000..95b086f
--- /dev/null
+++ b/libcxx/utils/google-benchmark/src/re_posix.cc
@@ -0,0 +1,59 @@
+// Copyright 2015 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "check.h"
+#include "re.h"
+
+namespace benchmark {
+
+Regex::Regex() : init_(false) { }
+
+bool Regex::Init(const std::string& spec, std::string* error) {
+  int ec = regcomp(&re_, spec.c_str(), REG_EXTENDED | REG_NOSUB);
+  if (ec != 0) {
+    if (error) {
+      size_t needed = regerror(ec, &re_, nullptr, 0);
+      char* errbuf = new char[needed];
+      regerror(ec, &re_, errbuf, needed);
+
+      // regerror returns the number of bytes necessary to null terminate
+      // the string, so we move that when assigning to error.
+      CHECK_NE(needed, 0);
+      error->assign(errbuf, needed - 1);
+
+      delete[] errbuf;
+    }
+
+    return false;
+  }
+
+  init_ = true;
+  return true;
+}
+
+Regex::~Regex() {
+  if (init_) {
+    regfree(&re_);
+  }
+}
+
+bool Regex::Match(const std::string& str) {
+  if (!init_) {
+    return false;
+  }
+
+  return regexec(&re_, str.c_str(), 0, nullptr, 0) == 0;
+}
+
+}  // end namespace benchmark
diff --git a/libcxx/utils/google-benchmark/src/re_std.cc b/libcxx/utils/google-benchmark/src/re_std.cc
new file mode 100644
index 0000000..cfd7a21
--- /dev/null
+++ b/libcxx/utils/google-benchmark/src/re_std.cc
@@ -0,0 +1,44 @@
+// Copyright 2015 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "re.h"
+
+namespace benchmark {
+
+Regex::Regex() : init_(false) { }
+
+bool Regex::Init(const std::string& spec, std::string* error) {
+  try {
+    re_ = std::regex(spec, std::regex_constants::extended);
+
+    init_ = true;
+  } catch (const std::regex_error& e) {
+    if (error) {
+      *error = e.what();
+    }
+  }
+  return init_;
+}
+
+Regex::~Regex() { }
+
+bool Regex::Match(const std::string& str) {
+  if (!init_) {
+    return false;
+  }
+
+  return std::regex_search(str, re_);
+}
+
+}  // end namespace benchmark
diff --git a/libcxx/utils/google-benchmark/src/reporter.cc b/libcxx/utils/google-benchmark/src/reporter.cc
new file mode 100644
index 0000000..5187859
--- /dev/null
+++ b/libcxx/utils/google-benchmark/src/reporter.cc
@@ -0,0 +1,75 @@
+// Copyright 2015 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "benchmark/reporter.h"
+#include "walltime.h"
+
+#include <cstdlib>
+
+#include <iostream>
+#include <vector>
+#include <tuple>
+
+#include "check.h"
+#include "stat.h"
+
+namespace benchmark {
+
+BenchmarkReporter::BenchmarkReporter()
+    : output_stream_(&std::cout), error_stream_(&std::cerr)
+{
+}
+
+BenchmarkReporter::~BenchmarkReporter() {
+}
+
+void BenchmarkReporter::PrintBasicContext(std::ostream *out_ptr,
+                                          Context const &context) {
+  CHECK(out_ptr) << "cannot be null";
+  auto& Out = *out_ptr;
+
+  Out << "Run on (" << context.num_cpus << " X " << context.mhz_per_cpu
+            << " MHz CPU " << ((context.num_cpus > 1) ? "s" : "") << ")\n";
+
+  Out << LocalDateTimeString() << "\n";
+
+  if (context.cpu_scaling_enabled) {
+    Out << "***WARNING*** CPU scaling is enabled, the benchmark "
+                 "real time measurements may be noisy and will incur extra "
+                 "overhead.\n";
+  }
+
+#ifndef NDEBUG
+  Out << "***WARNING*** Library was built as DEBUG. Timings may be "
+               "affected.\n";
+#endif
+}
+
+double BenchmarkReporter::Run::GetAdjustedRealTime() const {
+  double new_time = real_accumulated_time * GetTimeUnitMultiplier(time_unit);
+  if (iterations != 0)
+    new_time /= static_cast<double>(iterations);
+  return new_time;
+}
+
+double BenchmarkReporter::Run::GetAdjustedCPUTime() const {
+  double new_time = cpu_accumulated_time * GetTimeUnitMultiplier(time_unit);
+  if (iterations != 0)
+    new_time /= static_cast<double>(iterations);
+  return new_time;
+}
+
+
+
+} // end namespace benchmark
diff --git a/libcxx/utils/google-benchmark/src/sleep.cc b/libcxx/utils/google-benchmark/src/sleep.cc
new file mode 100644
index 0000000..918abc4
--- /dev/null
+++ b/libcxx/utils/google-benchmark/src/sleep.cc
@@ -0,0 +1,50 @@
+// Copyright 2015 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "sleep.h"
+
+#include <cerrno>
+#include <ctime>
+
+#include "internal_macros.h"
+
+#ifdef BENCHMARK_OS_WINDOWS
+#include <Windows.h>
+#endif
+
+namespace benchmark {
+#ifdef BENCHMARK_OS_WINDOWS
+// Window's Sleep takes milliseconds argument.
+void SleepForMilliseconds(int milliseconds) { Sleep(milliseconds); }
+void SleepForSeconds(double seconds) {
+  SleepForMilliseconds(static_cast<int>(kNumMillisPerSecond * seconds));
+}
+#else   // BENCHMARK_OS_WINDOWS
+void SleepForMicroseconds(int microseconds) {
+  struct timespec sleep_time;
+  sleep_time.tv_sec = microseconds / kNumMicrosPerSecond;
+  sleep_time.tv_nsec = (microseconds % kNumMicrosPerSecond) * kNumNanosPerMicro;
+  while (nanosleep(&sleep_time, &sleep_time) != 0 && errno == EINTR)
+    ;  // Ignore signals and wait for the full interval to elapse.
+}
+
+void SleepForMilliseconds(int milliseconds) {
+  SleepForMicroseconds(static_cast<int>(milliseconds) * kNumMicrosPerMilli);
+}
+
+void SleepForSeconds(double seconds) {
+  SleepForMicroseconds(static_cast<int>(seconds * kNumMicrosPerSecond));
+}
+#endif  // BENCHMARK_OS_WINDOWS
+}  // end namespace benchmark
diff --git a/libcxx/utils/google-benchmark/src/sleep.h b/libcxx/utils/google-benchmark/src/sleep.h
new file mode 100644
index 0000000..f1e515c
--- /dev/null
+++ b/libcxx/utils/google-benchmark/src/sleep.h
@@ -0,0 +1,17 @@
+#ifndef BENCHMARK_SLEEP_H_
+#define BENCHMARK_SLEEP_H_
+
+#include <cstdint>
+
+namespace benchmark {
+const int64_t kNumMillisPerSecond = 1000LL;
+const int64_t kNumMicrosPerMilli = 1000LL;
+const int64_t kNumMicrosPerSecond = kNumMillisPerSecond * 1000LL;
+const int64_t kNumNanosPerMicro = 1000LL;
+const int64_t kNumNanosPerSecond = kNumNanosPerMicro * kNumMicrosPerSecond;
+
+void SleepForMilliseconds(int milliseconds);
+void SleepForSeconds(double seconds);
+}  // end namespace benchmark
+
+#endif  // BENCHMARK_SLEEP_H_
diff --git a/libcxx/utils/google-benchmark/src/stat.h b/libcxx/utils/google-benchmark/src/stat.h
new file mode 100644
index 0000000..c4ecfe8
--- /dev/null
+++ b/libcxx/utils/google-benchmark/src/stat.h
@@ -0,0 +1,307 @@
+#ifndef BENCHMARK_STAT_H_
+#define BENCHMARK_STAT_H_
+
+#include <cmath>
+#include <limits>
+#include <ostream>
+#include <type_traits>
+
+
+namespace benchmark {
+
+template <typename VType, typename NumType>
+class Stat1;
+
+template <typename VType, typename NumType>
+class Stat1MinMax;
+
+typedef Stat1<float, int64_t> Stat1_f;
+typedef Stat1<double, int64_t> Stat1_d;
+typedef Stat1MinMax<float, int64_t> Stat1MinMax_f;
+typedef Stat1MinMax<double, int64_t> Stat1MinMax_d;
+
+template <typename VType>
+class Vector2;
+template <typename VType>
+class Vector3;
+template <typename VType>
+class Vector4;
+
+template <typename VType, typename NumType>
+class Stat1 {
+ public:
+  typedef Stat1<VType, NumType> Self;
+
+  Stat1() { Clear(); }
+  // Create a sample of value dat and weight 1
+  explicit Stat1(const VType &dat) {
+    sum_ = dat;
+    sum_squares_ = Sqr(dat);
+    numsamples_ = 1;
+  }
+  // Create statistics for all the samples between begin (included)
+  // and end(excluded)
+  explicit Stat1(const VType *begin, const VType *end) {
+    Clear();
+    for (const VType *item = begin; item < end; ++item) {
+      (*this) += Stat1(*item);
+    }
+  }
+  // Create a sample of value dat and weight w
+  Stat1(const VType &dat, const NumType &w) {
+    sum_ = w * dat;
+    sum_squares_ = w * Sqr(dat);
+    numsamples_ = w;
+  }
+  // Copy operator
+  Stat1(const Self &stat) {
+    sum_ = stat.sum_;
+    sum_squares_ = stat.sum_squares_;
+    numsamples_ = stat.numsamples_;
+  }
+
+  void Clear() {
+    numsamples_ = NumType();
+    sum_squares_ = sum_ = VType();
+  }
+
+  Self &operator=(const Self &stat) {
+    sum_ = stat.sum_;
+    sum_squares_ = stat.sum_squares_;
+    numsamples_ = stat.numsamples_;
+    return (*this);
+  }
+  // Merge statistics from two sample sets.
+  Self &operator+=(const Self &stat) {
+    sum_ += stat.sum_;
+    sum_squares_ += stat.sum_squares_;
+    numsamples_ += stat.numsamples_;
+    return (*this);
+  }
+  // The operation opposite to +=
+  Self &operator-=(const Self &stat) {
+    sum_ -= stat.sum_;
+    sum_squares_ -= stat.sum_squares_;
+    numsamples_ -= stat.numsamples_;
+    return (*this);
+  }
+  // Multiply the weight of the set of samples by a factor k
+  Self &operator*=(const VType &k) {
+    sum_ *= k;
+    sum_squares_ *= k;
+    numsamples_ *= k;
+    return (*this);
+  }
+
+  // Merge statistics from two sample sets.
+  Self operator+(const Self &stat) const { return Self(*this) += stat; }
+
+  // The operation opposite to +
+  Self operator-(const Self &stat) const { return Self(*this) -= stat; }
+
+  // Multiply the weight of the set of samples by a factor k
+  Self operator*(const VType &k) const { return Self(*this) *= k; }
+
+  // Return the total weight of this sample set
+  NumType numSamples() const { return numsamples_; }
+
+  // Return the sum of this sample set
+  VType Sum() const { return sum_; }
+
+  // Return the mean of this sample set
+  VType Mean() const {
+    if (numsamples_ == 0) return VType();
+    return sum_ * (1.0 / numsamples_);
+  }
+
+  // Return the mean of this sample set and compute the standard deviation at
+  // the same time.
+  VType Mean(VType *stddev) const {
+    if (numsamples_ == 0) return VType();
+    VType mean = sum_ * (1.0 / numsamples_);
+    if (stddev) {
+      VType avg_squares = sum_squares_ * (1.0 / numsamples_);
+      *stddev = Sqrt(avg_squares - Sqr(mean));
+    }
+    return mean;
+  }
+
+  // Return the standard deviation of the sample set
+  VType StdDev() const {
+    if (numsamples_ == 0) return VType();
+    VType mean = Mean();
+    VType avg_squares = sum_squares_ * (1.0 / numsamples_);
+    return Sqrt(avg_squares - Sqr(mean));
+  }
+
+ private:
+  static_assert(std::is_integral<NumType>::value &&
+                !std::is_same<NumType, bool>::value,
+                "NumType must be an integral type that is not bool.");
+  // Let i be the index of the samples provided (using +=)
+  // and weight[i],value[i] be the data of sample #i
+  // then the variables have the following meaning:
+  NumType numsamples_;  // sum of weight[i];
+  VType sum_;           // sum of weight[i]*value[i];
+  VType sum_squares_;   // sum of weight[i]*value[i]^2;
+
+  // Template function used to square a number.
+  // For a vector we square all components
+  template <typename SType>
+  static inline SType Sqr(const SType &dat) {
+    return dat * dat;
+  }
+
+  template <typename SType>
+  static inline Vector2<SType> Sqr(const Vector2<SType> &dat) {
+    return dat.MulComponents(dat);
+  }
+
+  template <typename SType>
+  static inline Vector3<SType> Sqr(const Vector3<SType> &dat) {
+    return dat.MulComponents(dat);
+  }
+
+  template <typename SType>
+  static inline Vector4<SType> Sqr(const Vector4<SType> &dat) {
+    return dat.MulComponents(dat);
+  }
+
+  // Template function used to take the square root of a number.
+  // For a vector we square all components
+  template <typename SType>
+  static inline SType Sqrt(const SType &dat) {
+    // Avoid NaN due to imprecision in the calculations
+    if (dat < 0) return 0;
+    return sqrt(dat);
+  }
+
+  template <typename SType>
+  static inline Vector2<SType> Sqrt(const Vector2<SType> &dat) {
+    // Avoid NaN due to imprecision in the calculations
+    return Max(dat, Vector2<SType>()).Sqrt();
+  }
+
+  template <typename SType>
+  static inline Vector3<SType> Sqrt(const Vector3<SType> &dat) {
+    // Avoid NaN due to imprecision in the calculations
+    return Max(dat, Vector3<SType>()).Sqrt();
+  }
+
+  template <typename SType>
+  static inline Vector4<SType> Sqrt(const Vector4<SType> &dat) {
+    // Avoid NaN due to imprecision in the calculations
+    return Max(dat, Vector4<SType>()).Sqrt();
+  }
+};
+
+// Useful printing function
+template <typename VType, typename NumType>
+std::ostream &operator<<(std::ostream &out, const Stat1<VType, NumType> &s) {
+  out << "{ avg = " << s.Mean() << " std = " << s.StdDev()
+      << " nsamples = " << s.NumSamples() << "}";
+  return out;
+}
+
+// Stat1MinMax: same as Stat1, but it also
+// keeps the Min and Max values; the "-"
+// operator is disabled because it cannot be implemented
+// efficiently
+template <typename VType, typename NumType>
+class Stat1MinMax : public Stat1<VType, NumType> {
+ public:
+  typedef Stat1MinMax<VType, NumType> Self;
+
+  Stat1MinMax() { Clear(); }
+  // Create a sample of value dat and weight 1
+  explicit Stat1MinMax(const VType &dat) : Stat1<VType, NumType>(dat) {
+    max_ = dat;
+    min_ = dat;
+  }
+  // Create statistics for all the samples between begin (included)
+  // and end(excluded)
+  explicit Stat1MinMax(const VType *begin, const VType *end) {
+    Clear();
+    for (const VType *item = begin; item < end; ++item) {
+      (*this) += Stat1MinMax(*item);
+    }
+  }
+  // Create a sample of value dat and weight w
+  Stat1MinMax(const VType &dat, const NumType &w)
+      : Stat1<VType, NumType>(dat, w) {
+    max_ = dat;
+    min_ = dat;
+  }
+  // Copy operator
+  Stat1MinMax(const Self &stat) : Stat1<VType, NumType>(stat) {
+    max_ = stat.max_;
+    min_ = stat.min_;
+  }
+
+  void Clear() {
+    Stat1<VType, NumType>::Clear();
+    if (std::numeric_limits<VType>::has_infinity) {
+      min_ = std::numeric_limits<VType>::infinity();
+      max_ = -std::numeric_limits<VType>::infinity();
+    } else {
+      min_ = std::numeric_limits<VType>::max();
+      max_ = std::numeric_limits<VType>::min();
+    }
+  }
+
+  Self &operator=(const Self &stat) {
+    this->Stat1<VType, NumType>::operator=(stat);
+    max_ = stat.max_;
+    min_ = stat.min_;
+    return (*this);
+  }
+  // Merge statistics from two sample sets.
+  Self &operator+=(const Self &stat) {
+    this->Stat1<VType, NumType>::operator+=(stat);
+    if (stat.max_ > max_) max_ = stat.max_;
+    if (stat.min_ < min_) min_ = stat.min_;
+    return (*this);
+  }
+  // Multiply the weight of the set of samples by a factor k
+  Self &operator*=(const VType &stat) {
+    this->Stat1<VType, NumType>::operator*=(stat);
+    return (*this);
+  }
+  // Merge statistics from two sample sets.
+  Self operator+(const Self &stat) const { return Self(*this) += stat; }
+  // Multiply the weight of the set of samples by a factor k
+  Self operator*(const VType &k) const { return Self(*this) *= k; }
+
+  // Return the maximal value in this sample set
+  VType Max() const { return max_; }
+  // Return the minimal value in this sample set
+  VType Min() const { return min_; }
+
+ private:
+  // The - operation makes no sense with Min/Max
+  // unless we keep the full list of values (but we don't)
+  // make it private, and let it undefined so nobody can call it
+  Self &operator-=(const Self &stat);  // senseless. let it undefined.
+
+  // The operation opposite to -
+  Self operator-(const Self &stat) const;  // senseless. let it undefined.
+
+  // Let i be the index of the samples provided (using +=)
+  // and weight[i],value[i] be the data of sample #i
+  // then the variables have the following meaning:
+  VType max_;  // max of value[i]
+  VType min_;  // min of value[i]
+};
+
+// Useful printing function
+template <typename VType, typename NumType>
+std::ostream &operator<<(std::ostream &out,
+                         const Stat1MinMax<VType, NumType> &s) {
+  out << "{ avg = " << s.Mean() << " std = " << s.StdDev()
+      << " nsamples = " << s.NumSamples() << " min = " << s.Min()
+      << " max = " << s.Max() << "}";
+  return out;
+}
+}  // end namespace benchmark
+
+#endif  // BENCHMARK_STAT_H_
diff --git a/libcxx/utils/google-benchmark/src/string_util.cc b/libcxx/utils/google-benchmark/src/string_util.cc
new file mode 100644
index 0000000..30d1305
--- /dev/null
+++ b/libcxx/utils/google-benchmark/src/string_util.cc
@@ -0,0 +1,169 @@
+#include "string_util.h"
+
+#include <cmath>
+#include <cstdarg>
+#include <array>
+#include <memory>
+#include <sstream>
+#include <stdio.h>
+
+#include "arraysize.h"
+
+namespace benchmark {
+namespace {
+
+// kilo, Mega, Giga, Tera, Peta, Exa, Zetta, Yotta.
+const char kBigSIUnits[] = "kMGTPEZY";
+// Kibi, Mebi, Gibi, Tebi, Pebi, Exbi, Zebi, Yobi.
+const char kBigIECUnits[] = "KMGTPEZY";
+// milli, micro, nano, pico, femto, atto, zepto, yocto.
+const char kSmallSIUnits[] = "munpfazy";
+
+// We require that all three arrays have the same size.
+static_assert(arraysize(kBigSIUnits) == arraysize(kBigIECUnits),
+              "SI and IEC unit arrays must be the same size");
+static_assert(arraysize(kSmallSIUnits) == arraysize(kBigSIUnits),
+              "Small SI and Big SI unit arrays must be the same size");
+
+static const int64_t kUnitsSize = arraysize(kBigSIUnits);
+
+} // end anonymous namespace
+
+void ToExponentAndMantissa(double val, double thresh, int precision,
+                           double one_k, std::string* mantissa,
+                           int64_t* exponent) {
+  std::stringstream mantissa_stream;
+
+  if (val < 0) {
+    mantissa_stream << "-";
+    val = -val;
+  }
+
+  // Adjust threshold so that it never excludes things which can't be rendered
+  // in 'precision' digits.
+  const double adjusted_threshold =
+      std::max(thresh, 1.0 / std::pow(10.0, precision));
+  const double big_threshold = adjusted_threshold * one_k;
+  const double small_threshold = adjusted_threshold;
+
+  if (val > big_threshold) {
+    // Positive powers
+    double scaled = val;
+    for (size_t i = 0; i < arraysize(kBigSIUnits); ++i) {
+      scaled /= one_k;
+      if (scaled <= big_threshold) {
+        mantissa_stream << scaled;
+        *exponent = i + 1;
+        *mantissa = mantissa_stream.str();
+        return;
+      }
+    }
+    mantissa_stream << val;
+    *exponent = 0;
+  } else if (val < small_threshold) {
+    // Negative powers
+    double scaled = val;
+    for (size_t i = 0; i < arraysize(kSmallSIUnits); ++i) {
+      scaled *= one_k;
+      if (scaled >= small_threshold) {
+        mantissa_stream << scaled;
+        *exponent = -static_cast<int64_t>(i + 1);
+        *mantissa = mantissa_stream.str();
+        return;
+      }
+    }
+    mantissa_stream << val;
+    *exponent = 0;
+  } else {
+    mantissa_stream << val;
+    *exponent = 0;
+  }
+  *mantissa = mantissa_stream.str();
+}
+
+std::string ExponentToPrefix(int64_t exponent, bool iec) {
+  if (exponent == 0) return "";
+
+  const int64_t index = (exponent > 0 ? exponent - 1 : -exponent - 1);
+  if (index >= kUnitsSize) return "";
+
+  const char* array =
+      (exponent > 0 ? (iec ? kBigIECUnits : kBigSIUnits) : kSmallSIUnits);
+  if (iec)
+    return array[index] + std::string("i");
+  else
+    return std::string(1, array[index]);
+}
+
+std::string ToBinaryStringFullySpecified(double value, double threshold,
+                                         int precision) {
+  std::string mantissa;
+  int64_t exponent;
+  ToExponentAndMantissa(value, threshold, precision, 1024.0, &mantissa,
+                        &exponent);
+  return mantissa + ExponentToPrefix(exponent, false);
+}
+
+void AppendHumanReadable(int n, std::string* str) {
+  std::stringstream ss;
+  // Round down to the nearest SI prefix.
+  ss << "/" << ToBinaryStringFullySpecified(n, 1.0, 0);
+  *str += ss.str();
+}
+
+std::string HumanReadableNumber(double n) {
+  // 1.1 means that figures up to 1.1k should be shown with the next unit down;
+  // this softens edge effects.
+  // 1 means that we should show one decimal place of precision.
+  return ToBinaryStringFullySpecified(n, 1.1, 1);
+}
+
+std::string StringPrintFImp(const char *msg, va_list args)
+{
+  // we might need a second shot at this, so pre-emptivly make a copy
+  va_list args_cp;
+  va_copy(args_cp, args);
+
+  // TODO(ericwf): use std::array for first attempt to avoid one memory
+  // allocation guess what the size might be
+  std::array<char, 256> local_buff;
+  std::size_t size = local_buff.size();
+  // 2015-10-08: vsnprintf is used instead of snd::vsnprintf due to a limitation in the android-ndk
+  auto ret = vsnprintf(local_buff.data(), size, msg, args_cp);
+
+  va_end(args_cp);
+
+  // handle empty expansion
+  if (ret == 0)
+    return std::string{};
+  if (static_cast<std::size_t>(ret) < size)
+    return std::string(local_buff.data());
+
+  // we did not provide a long enough buffer on our first attempt.
+  // add 1 to size to account for null-byte in size cast to prevent overflow
+  size = static_cast<std::size_t>(ret) + 1;
+  auto buff_ptr = std::unique_ptr<char[]>(new char[size]);
+  // 2015-10-08: vsnprintf is used instead of snd::vsnprintf due to a limitation in the android-ndk
+  ret = vsnprintf(buff_ptr.get(), size, msg, args);
+  return std::string(buff_ptr.get());
+}
+
+std::string StringPrintF(const char* format, ...)
+{
+  va_list args;
+  va_start(args, format);
+  std::string tmp = StringPrintFImp(format, args);
+  va_end(args);
+  return tmp;
+}
+
+void ReplaceAll(std::string* str, const std::string& from,
+                const std::string& to) {
+  std::size_t start = 0;
+  while((start = str->find(from, start)) != std::string::npos) {
+    str->replace(start, from.length(), to);
+    start += to.length();
+  }
+}
+
+} // end namespace benchmark
diff --git a/libcxx/utils/google-benchmark/src/string_util.h b/libcxx/utils/google-benchmark/src/string_util.h
new file mode 100644
index 0000000..b89fef5
--- /dev/null
+++ b/libcxx/utils/google-benchmark/src/string_util.h
@@ -0,0 +1,44 @@
+#ifndef BENCHMARK_STRING_UTIL_H_
+#define BENCHMARK_STRING_UTIL_H_
+
+#include <string>
+#include <sstream>
+#include <utility>
+#include "internal_macros.h"
+
+namespace benchmark {
+
+void AppendHumanReadable(int n, std::string* str);
+
+std::string HumanReadableNumber(double n);
+
+std::string StringPrintF(const char* format, ...);
+
+inline std::ostream&
+StringCatImp(std::ostream& out) BENCHMARK_NOEXCEPT
+{
+  return out;
+}
+
+template <class First, class ...Rest>
+inline std::ostream&
+StringCatImp(std::ostream& out, First&& f, Rest&&... rest)
+{
+  out << std::forward<First>(f);
+  return StringCatImp(out, std::forward<Rest>(rest)...);
+}
+
+template<class ...Args>
+inline std::string StrCat(Args&&... args)
+{
+  std::ostringstream ss;
+  StringCatImp(ss, std::forward<Args>(args)...);
+  return ss.str();
+}
+
+void ReplaceAll(std::string* str, const std::string& from,
+                const std::string& to);
+
+} // end namespace benchmark
+
+#endif // BENCHMARK_STRING_UTIL_H_
diff --git a/libcxx/utils/google-benchmark/src/sysinfo.cc b/libcxx/utils/google-benchmark/src/sysinfo.cc
new file mode 100644
index 0000000..e10e19d
--- /dev/null
+++ b/libcxx/utils/google-benchmark/src/sysinfo.cc
@@ -0,0 +1,420 @@
+// Copyright 2015 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "sysinfo.h"
+#include "internal_macros.h"
+
+#ifdef BENCHMARK_OS_WINDOWS
+#include <Shlwapi.h>
+#include <Windows.h>
+#include <VersionHelpers.h>
+#else
+#include <fcntl.h>
+#include <sys/resource.h>
+#include <sys/types.h> // this header must be included before 'sys/sysctl.h' to avoid compilation error on FreeBSD
+#include <sys/time.h>
+#include <unistd.h>
+#if defined BENCHMARK_OS_FREEBSD || defined BENCHMARK_OS_MACOSX
+#include <sys/sysctl.h>
+#endif
+#endif
+
+#include <cerrno>
+#include <cstdio>
+#include <cstdint>
+#include <cstdlib>
+#include <cstring>
+#include <iostream>
+#include <limits>
+#include <mutex>
+
+#include "arraysize.h"
+#include "check.h"
+#include "cycleclock.h"
+#include "internal_macros.h"
+#include "log.h"
+#include "sleep.h"
+#include "string_util.h"
+
+namespace benchmark {
+namespace {
+std::once_flag cpuinfo_init;
+double cpuinfo_cycles_per_second = 1.0;
+int cpuinfo_num_cpus = 1;  // Conservative guess
+std::mutex cputimens_mutex;
+
+#if !defined BENCHMARK_OS_MACOSX
+const int64_t estimate_time_ms = 1000;
+
+// Helper function estimates cycles/sec by observing cycles elapsed during
+// sleep(). Using small sleep time decreases accuracy significantly.
+int64_t EstimateCyclesPerSecond() {
+  const int64_t start_ticks = cycleclock::Now();
+  SleepForMilliseconds(estimate_time_ms);
+  return cycleclock::Now() - start_ticks;
+}
+#endif
+
+#if defined BENCHMARK_OS_LINUX || defined BENCHMARK_OS_CYGWIN
+// Helper function for reading an int from a file. Returns true if successful
+// and the memory location pointed to by value is set to the value read.
+bool ReadIntFromFile(const char* file, long* value) {
+  bool ret = false;
+  int fd = open(file, O_RDONLY);
+  if (fd != -1) {
+    char line[1024];
+    char* err;
+    memset(line, '\0', sizeof(line));
+    CHECK(read(fd, line, sizeof(line) - 1));
+    const long temp_value = strtol(line, &err, 10);
+    if (line[0] != '\0' && (*err == '\n' || *err == '\0')) {
+      *value = temp_value;
+      ret = true;
+    }
+    close(fd);
+  }
+  return ret;
+}
+#endif
+
+void InitializeSystemInfo() {
+#if defined BENCHMARK_OS_LINUX || defined BENCHMARK_OS_CYGWIN
+  char line[1024];
+  char* err;
+  long freq;
+
+  bool saw_mhz = false;
+
+  // If the kernel is exporting the tsc frequency use that. There are issues
+  // where cpuinfo_max_freq cannot be relied on because the BIOS may be
+  // exporintg an invalid p-state (on x86) or p-states may be used to put the
+  // processor in a new mode (turbo mode). Essentially, those frequencies
+  // cannot always be relied upon. The same reasons apply to /proc/cpuinfo as
+  // well.
+  if (!saw_mhz &&
+      ReadIntFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz", &freq)) {
+    // The value is in kHz (as the file name suggests).  For example, on a
+    // 2GHz warpstation, the file contains the value "2000000".
+    cpuinfo_cycles_per_second = freq * 1000.0;
+    saw_mhz = true;
+  }
+
+  // If CPU scaling is in effect, we want to use the *maximum* frequency,
+  // not whatever CPU speed some random processor happens to be using now.
+  if (!saw_mhz &&
+      ReadIntFromFile("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq",
+                      &freq)) {
+    // The value is in kHz.  For example, on a 2GHz warpstation, the file
+    // contains the value "2000000".
+    cpuinfo_cycles_per_second = freq * 1000.0;
+    saw_mhz = true;
+  }
+
+  // Read /proc/cpuinfo for other values, and if there is no cpuinfo_max_freq.
+  const char* pname = "/proc/cpuinfo";
+  int fd = open(pname, O_RDONLY);
+  if (fd == -1) {
+    perror(pname);
+    if (!saw_mhz) {
+      cpuinfo_cycles_per_second = static_cast<double>(EstimateCyclesPerSecond());
+    }
+    return;
+  }
+
+  double bogo_clock = 1.0;
+  bool saw_bogo = false;
+  long max_cpu_id = 0;
+  int num_cpus = 0;
+  line[0] = line[1] = '\0';
+  size_t chars_read = 0;
+  do {  // we'll exit when the last read didn't read anything
+    // Move the next line to the beginning of the buffer
+    const size_t oldlinelen = strlen(line);
+    if (sizeof(line) == oldlinelen + 1)  // oldlinelen took up entire line
+      line[0] = '\0';
+    else  // still other lines left to save
+      memmove(line, line + oldlinelen + 1, sizeof(line) - (oldlinelen + 1));
+    // Terminate the new line, reading more if we can't find the newline
+    char* newline = strchr(line, '\n');
+    if (newline == nullptr) {
+      const size_t linelen = strlen(line);
+      const size_t bytes_to_read = sizeof(line) - 1 - linelen;
+      CHECK(bytes_to_read > 0);  // because the memmove recovered >=1 bytes
+      chars_read = read(fd, line + linelen, bytes_to_read);
+      line[linelen + chars_read] = '\0';
+      newline = strchr(line, '\n');
+    }
+    if (newline != nullptr) *newline = '\0';
+
+    // When parsing the "cpu MHz" and "bogomips" (fallback) entries, we only
+    // accept postive values. Some environments (virtual machines) report zero,
+    // which would cause infinite looping in WallTime_Init.
+    if (!saw_mhz && strncasecmp(line, "cpu MHz", sizeof("cpu MHz") - 1) == 0) {
+      const char* freqstr = strchr(line, ':');
+      if (freqstr) {
+        cpuinfo_cycles_per_second = strtod(freqstr + 1, &err) * 1000000.0;
+        if (freqstr[1] != '\0' && *err == '\0' && cpuinfo_cycles_per_second > 0)
+          saw_mhz = true;
+      }
+    } else if (strncasecmp(line, "bogomips", sizeof("bogomips") - 1) == 0) {
+      const char* freqstr = strchr(line, ':');
+      if (freqstr) {
+        bogo_clock = strtod(freqstr + 1, &err) * 1000000.0;
+        if (freqstr[1] != '\0' && *err == '\0' && bogo_clock > 0)
+          saw_bogo = true;
+      }
+    } else if (strncmp(line, "processor", sizeof("processor") - 1) == 0) {
+      // The above comparison is case-sensitive because ARM kernels often
+      // include a "Processor" line that tells you about the CPU, distinct
+      // from the usual "processor" lines that give you CPU ids. No current
+      // Linux architecture is using "Processor" for CPU ids.
+      num_cpus++;  // count up every time we see an "processor :" entry
+      const char* id_str = strchr(line, ':');
+      if (id_str) {
+        const long cpu_id = strtol(id_str + 1, &err, 10);
+        if (id_str[1] != '\0' && *err == '\0' && max_cpu_id < cpu_id)
+          max_cpu_id = cpu_id;
+      }
+    }
+  } while (chars_read > 0);
+  close(fd);
+
+  if (!saw_mhz) {
+    if (saw_bogo) {
+      // If we didn't find anything better, we'll use bogomips, but
+      // we're not happy about it.
+      cpuinfo_cycles_per_second = bogo_clock;
+    } else {
+      // If we don't even have bogomips, we'll use the slow estimation.
+      cpuinfo_cycles_per_second = static_cast<double>(EstimateCyclesPerSecond());
+    }
+  }
+  if (num_cpus == 0) {
+    fprintf(stderr, "Failed to read num. CPUs correctly from /proc/cpuinfo\n");
+  } else {
+    if ((max_cpu_id + 1) != num_cpus) {
+      fprintf(stderr,
+              "CPU ID assignments in /proc/cpuinfo seem messed up."
+              " This is usually caused by a bad BIOS.\n");
+    }
+    cpuinfo_num_cpus = num_cpus;
+  }
+
+#elif defined BENCHMARK_OS_FREEBSD
+// For this sysctl to work, the machine must be configured without
+// SMP, APIC, or APM support.  hz should be 64-bit in freebsd 7.0
+// and later.  Before that, it's a 32-bit quantity (and gives the
+// wrong answer on machines faster than 2^32 Hz).  See
+//  http://lists.freebsd.org/pipermail/freebsd-i386/2004-November/001846.html
+// But also compare FreeBSD 7.0:
+//  http://fxr.watson.org/fxr/source/i386/i386/tsc.c?v=RELENG70#L223
+//  231         error = sysctl_handle_quad(oidp, &freq, 0, req);
+// To FreeBSD 6.3 (it's the same in 6-STABLE):
+//  http://fxr.watson.org/fxr/source/i386/i386/tsc.c?v=RELENG6#L131
+//  139         error = sysctl_handle_int(oidp, &freq, sizeof(freq), req);
+#if __FreeBSD__ >= 7
+  uint64_t hz = 0;
+#else
+  unsigned int hz = 0;
+#endif
+  size_t sz = sizeof(hz);
+  const char* sysctl_path = "machdep.tsc_freq";
+  if (sysctlbyname(sysctl_path, &hz, &sz, nullptr, 0) != 0) {
+    fprintf(stderr, "Unable to determine clock rate from sysctl: %s: %s\n",
+            sysctl_path, strerror(errno));
+    cpuinfo_cycles_per_second = static_cast<double>(EstimateCyclesPerSecond());
+  } else {
+    cpuinfo_cycles_per_second = hz;
+  }
+// TODO: also figure out cpuinfo_num_cpus
+
+#elif defined BENCHMARK_OS_WINDOWS
+  // In NT, read MHz from the registry. If we fail to do so or we're in win9x
+  // then make a crude estimate.
+  DWORD data, data_size = sizeof(data);
+  if (IsWindowsXPOrGreater() &&
+      SUCCEEDED(
+          SHGetValueA(HKEY_LOCAL_MACHINE,
+                      "HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0",
+                      "~MHz", nullptr, &data, &data_size)))
+    cpuinfo_cycles_per_second = static_cast<double>((int64_t)data * (int64_t)(1000 * 1000));  // was mhz
+  else
+    cpuinfo_cycles_per_second = static_cast<double>(EstimateCyclesPerSecond());
+// TODO: also figure out cpuinfo_num_cpus
+
+#elif defined BENCHMARK_OS_MACOSX
+  // returning "mach time units" per second. the current number of elapsed
+  // mach time units can be found by calling uint64 mach_absolute_time();
+  // while not as precise as actual CPU cycles, it is accurate in the face
+  // of CPU frequency scaling and multi-cpu/core machines.
+  // Our mac users have these types of machines, and accuracy
+  // (i.e. correctness) trumps precision.
+  // See cycleclock.h: CycleClock::Now(), which returns number of mach time
+  // units on Mac OS X.
+  mach_timebase_info_data_t timebase_info;
+  mach_timebase_info(&timebase_info);
+  double mach_time_units_per_nanosecond =
+      static_cast<double>(timebase_info.denom) /
+      static_cast<double>(timebase_info.numer);
+  cpuinfo_cycles_per_second = mach_time_units_per_nanosecond * 1e9;
+
+  int num_cpus = 0;
+  size_t size = sizeof(num_cpus);
+  int numcpus_name[] = {CTL_HW, HW_NCPU};
+  if (::sysctl(numcpus_name, arraysize(numcpus_name), &num_cpus, &size, nullptr, 0) ==
+          0 &&
+      (size == sizeof(num_cpus)))
+    cpuinfo_num_cpus = num_cpus;
+
+#else
+  // Generic cycles per second counter
+  cpuinfo_cycles_per_second = static_cast<double>(EstimateCyclesPerSecond());
+#endif
+}
+}  // end namespace
+
+// getrusage() based implementation of MyCPUUsage
+static double MyCPUUsageRUsage() {
+#ifndef BENCHMARK_OS_WINDOWS
+  struct rusage ru;
+  if (getrusage(RUSAGE_SELF, &ru) == 0) {
+    return (static_cast<double>(ru.ru_utime.tv_sec) +
+            static_cast<double>(ru.ru_utime.tv_usec) * 1e-6 +
+            static_cast<double>(ru.ru_stime.tv_sec) +
+            static_cast<double>(ru.ru_stime.tv_usec) * 1e-6);
+  } else {
+    return 0.0;
+  }
+#else
+  HANDLE proc = GetCurrentProcess();
+  FILETIME creation_time;
+  FILETIME exit_time;
+  FILETIME kernel_time;
+  FILETIME user_time;
+  ULARGE_INTEGER kernel;
+  ULARGE_INTEGER user;
+  GetProcessTimes(proc, &creation_time, &exit_time, &kernel_time, &user_time);
+  kernel.HighPart = kernel_time.dwHighDateTime;
+  kernel.LowPart = kernel_time.dwLowDateTime;
+  user.HighPart = user_time.dwHighDateTime;
+  user.LowPart = user_time.dwLowDateTime;
+  return (static_cast<double>(kernel.QuadPart) +
+          static_cast<double>(user.QuadPart)) * 1e-7;
+#endif  // OS_WINDOWS
+}
+
+#ifndef BENCHMARK_OS_WINDOWS
+static bool MyCPUUsageCPUTimeNsLocked(double* cputime) {
+  static int cputime_fd = -1;
+  if (cputime_fd == -1) {
+    cputime_fd = open("/proc/self/cputime_ns", O_RDONLY);
+    if (cputime_fd < 0) {
+      cputime_fd = -1;
+      return false;
+    }
+  }
+  char buff[64];
+  memset(buff, 0, sizeof(buff));
+  if (pread(cputime_fd, buff, sizeof(buff) - 1, 0) <= 0) {
+    close(cputime_fd);
+    cputime_fd = -1;
+    return false;
+  }
+  unsigned long long result = strtoull(buff, nullptr, 0);
+  if (result == (std::numeric_limits<unsigned long long>::max)()) {
+    close(cputime_fd);
+    cputime_fd = -1;
+    return false;
+  }
+  *cputime = static_cast<double>(result) / 1e9;
+  return true;
+}
+#endif  // OS_WINDOWS
+
+double MyCPUUsage() {
+#ifndef BENCHMARK_OS_WINDOWS
+  {
+    std::lock_guard<std::mutex> l(cputimens_mutex);
+    static bool use_cputime_ns = true;
+    if (use_cputime_ns) {
+      double value;
+      if (MyCPUUsageCPUTimeNsLocked(&value)) {
+        return value;
+      }
+      // Once MyCPUUsageCPUTimeNsLocked fails once fall back to getrusage().
+      VLOG(1) << "Reading /proc/self/cputime_ns failed. Using getrusage().\n";
+      use_cputime_ns = false;
+    }
+  }
+#endif  // OS_WINDOWS
+  return MyCPUUsageRUsage();
+}
+
+double ChildrenCPUUsage() {
+#ifndef BENCHMARK_OS_WINDOWS
+  struct rusage ru;
+  if (getrusage(RUSAGE_CHILDREN, &ru) == 0) {
+    return (static_cast<double>(ru.ru_utime.tv_sec) +
+            static_cast<double>(ru.ru_utime.tv_usec) * 1e-6 +
+            static_cast<double>(ru.ru_stime.tv_sec) +
+            static_cast<double>(ru.ru_stime.tv_usec) * 1e-6);
+  } else {
+    return 0.0;
+  }
+#else
+  // TODO: Not sure what this even means on Windows
+  return 0.0;
+#endif  // OS_WINDOWS
+}
+
+double CyclesPerSecond(void) {
+  std::call_once(cpuinfo_init, InitializeSystemInfo);
+  return cpuinfo_cycles_per_second;
+}
+
+int NumCPUs(void) {
+  std::call_once(cpuinfo_init, InitializeSystemInfo);
+  return cpuinfo_num_cpus;
+}
+
+// The ""'s catch people who don't pass in a literal for "str"
+#define strliterallen(str) (sizeof("" str "") - 1)
+
+// Must use a string literal for prefix.
+#define memprefix(str, len, prefix)                       \
+  ((((len) >= strliterallen(prefix)) &&                   \
+    std::memcmp(str, prefix, strliterallen(prefix)) == 0) \
+       ? str + strliterallen(prefix)                      \
+       : nullptr)
+
+bool CpuScalingEnabled() {
+#ifndef BENCHMARK_OS_WINDOWS
+  // On Linux, the CPUfreq subsystem exposes CPU information as files on the
+  // local file system. If reading the exported files fails, then we may not be
+  // running on Linux, so we silently ignore all the read errors.
+  for (int cpu = 0, num_cpus = NumCPUs(); cpu < num_cpus; ++cpu) {
+    std::string governor_file = StrCat("/sys/devices/system/cpu/cpu", cpu,
+                                       "/cpufreq/scaling_governor");
+    FILE* file = fopen(governor_file.c_str(), "r");
+    if (!file) break;
+    char buff[16];
+    size_t bytes_read = fread(buff, 1, sizeof(buff), file);
+    fclose(file);
+    if (memprefix(buff, bytes_read, "performance") == nullptr) return true;
+  }
+#endif
+  return false;
+}
+
+}  // end namespace benchmark
diff --git a/libcxx/utils/google-benchmark/src/sysinfo.h b/libcxx/utils/google-benchmark/src/sysinfo.h
new file mode 100644
index 0000000..eaf77e0
--- /dev/null
+++ b/libcxx/utils/google-benchmark/src/sysinfo.h
@@ -0,0 +1,12 @@
+#ifndef BENCHMARK_SYSINFO_H_
+#define BENCHMARK_SYSINFO_H_
+
+namespace benchmark {
+double MyCPUUsage();
+double ChildrenCPUUsage();
+int NumCPUs();
+double CyclesPerSecond();
+bool CpuScalingEnabled();
+}  // end namespace benchmark
+
+#endif  // BENCHMARK_SYSINFO_H_
diff --git a/libcxx/utils/google-benchmark/src/walltime.cc b/libcxx/utils/google-benchmark/src/walltime.cc
new file mode 100644
index 0000000..4bdbaa5
--- /dev/null
+++ b/libcxx/utils/google-benchmark/src/walltime.cc
@@ -0,0 +1,263 @@
+// Copyright 2015 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "benchmark/macros.h"
+#include "internal_macros.h"
+#include "walltime.h"
+
+#if defined(BENCHMARK_OS_WINDOWS)
+#include <time.h>
+#include <winsock.h> // for timeval
+#else
+#include <sys/time.h>
+#endif
+
+#include <cstdio>
+#include <cstdint>
+#include <cstring>
+#include <ctime>
+
+#include <atomic>
+#include <chrono>
+#include <limits>
+
+#include "arraysize.h"
+#include "check.h"
+#include "cycleclock.h"
+#include "log.h"
+#include "sysinfo.h"
+
+namespace benchmark {
+namespace walltime {
+
+namespace {
+
+#if defined(HAVE_STEADY_CLOCK)
+template <bool HighResIsSteady = std::chrono::high_resolution_clock::is_steady>
+struct ChooseSteadyClock {
+    typedef std::chrono::high_resolution_clock type;
+};
+
+template <>
+struct ChooseSteadyClock<false> {
+    typedef std::chrono::steady_clock type;
+};
+#endif
+
+struct ChooseClockType {
+#if defined(HAVE_STEADY_CLOCK)
+  typedef ChooseSteadyClock<>::type type;
+#else
+  typedef std::chrono::high_resolution_clock type;
+#endif
+};
+
+class WallTimeImp
+{
+public:
+  WallTime Now();
+
+  static WallTimeImp& GetWallTimeImp() {
+    static WallTimeImp* imp = new WallTimeImp();
+    return *imp;
+  }
+
+private:
+  WallTimeImp();
+  // Helper routines to load/store a float from an AtomicWord. Required because
+  // g++ < 4.7 doesn't support std::atomic<float> correctly. I cannot wait to
+  // get rid of this horror show.
+  void SetDrift(float f) {
+    int32_t w;
+    memcpy(&w, &f, sizeof(f));
+    std::atomic_store(&drift_adjust_, w);
+  }
+
+  float GetDrift() const {
+    float f;
+    int32_t w = std::atomic_load(&drift_adjust_);
+    memcpy(&f, &w, sizeof(f));
+    return f;
+  }
+
+  WallTime Slow() const {
+    struct timeval tv;
+#if defined(BENCHMARK_OS_WINDOWS)
+    FILETIME    file_time;
+    SYSTEMTIME  system_time;
+    ULARGE_INTEGER ularge;
+    const unsigned __int64 epoch = 116444736000000000LL;
+
+    GetSystemTime(&system_time);
+    SystemTimeToFileTime(&system_time, &file_time);
+    ularge.LowPart = file_time.dwLowDateTime;
+    ularge.HighPart = file_time.dwHighDateTime;
+
+    tv.tv_sec = (long)((ularge.QuadPart - epoch) / (10L * 1000 * 1000));
+    tv.tv_usec = (long)(system_time.wMilliseconds * 1000);
+#else
+    gettimeofday(&tv, nullptr);
+#endif
+    return tv.tv_sec + tv.tv_usec * 1e-6;
+  }
+
+private:
+  static_assert(sizeof(float) <= sizeof(int32_t),
+               "type sizes don't allow the drift_adjust hack");
+
+  WallTime base_walltime_;
+  int64_t base_cycletime_;
+  int64_t cycles_per_second_;
+  double seconds_per_cycle_;
+  uint32_t last_adjust_time_;
+  std::atomic<int32_t> drift_adjust_;
+  int64_t max_interval_cycles_;
+
+  BENCHMARK_DISALLOW_COPY_AND_ASSIGN(WallTimeImp);
+};
+
+
+WallTime WallTimeImp::Now() {
+  WallTime now = 0.0;
+  WallTime result = 0.0;
+  int64_t ct = 0;
+  uint32_t top_bits = 0;
+  do {
+    ct = cycleclock::Now();
+    int64_t cycle_delta = ct - base_cycletime_;
+    result = base_walltime_ + cycle_delta * seconds_per_cycle_;
+
+    top_bits = static_cast<uint32_t>(uint64_t(ct) >> 32);
+    // Recompute drift no more often than every 2^32 cycles.
+    // I.e., @2GHz, ~ every two seconds
+    if (top_bits == last_adjust_time_) {  // don't need to recompute drift
+      return result + GetDrift();
+    }
+
+    now = Slow();
+  } while (cycleclock::Now() - ct > max_interval_cycles_);
+  // We are now sure that "now" and "result" were produced within
+  // kMaxErrorInterval of one another.
+
+  SetDrift(static_cast<float>(now - result));
+  last_adjust_time_ = top_bits;
+  return now;
+}
+
+
+WallTimeImp::WallTimeImp()
+    : base_walltime_(0.0), base_cycletime_(0),
+      cycles_per_second_(0), seconds_per_cycle_(0.0),
+      last_adjust_time_(0), drift_adjust_(0),
+      max_interval_cycles_(0) {
+  const double kMaxErrorInterval = 100e-6;
+  cycles_per_second_ = static_cast<int64_t>(CyclesPerSecond());
+  CHECK(cycles_per_second_ != 0);
+  seconds_per_cycle_ = 1.0 / cycles_per_second_;
+  max_interval_cycles_ =
+      static_cast<int64_t>(cycles_per_second_ * kMaxErrorInterval);
+  do {
+    base_cycletime_ = cycleclock::Now();
+    base_walltime_ = Slow();
+  } while (cycleclock::Now() - base_cycletime_ > max_interval_cycles_);
+  // We are now sure that "base_walltime" and "base_cycletime" were produced
+  // within kMaxErrorInterval of one another.
+
+  SetDrift(0.0);
+  last_adjust_time_ = static_cast<uint32_t>(uint64_t(base_cycletime_) >> 32);
+}
+
+WallTime CPUWalltimeNow() {
+  static WallTimeImp& imp = WallTimeImp::GetWallTimeImp();
+  return imp.Now();
+}
+
+WallTime ChronoWalltimeNow() {
+  typedef ChooseClockType::type Clock;
+  typedef std::chrono::duration<WallTime, std::chrono::seconds::period>
+          FPSeconds;
+  static_assert(std::chrono::treat_as_floating_point<WallTime>::value,
+                "This type must be treated as a floating point type.");
+  auto now = Clock::now().time_since_epoch();
+  return std::chrono::duration_cast<FPSeconds>(now).count();
+}
+
+bool UseCpuCycleClock() {
+    bool useWallTime = !CpuScalingEnabled();
+    if (useWallTime) {
+        VLOG(1) << "Using the CPU cycle clock to provide walltime::Now().\n";
+    } else {
+        VLOG(1) << "Using std::chrono to provide walltime::Now().\n";
+    }
+    return useWallTime;
+}
+
+
+} // end anonymous namespace
+
+// WallTimeImp doesn't work when CPU Scaling is enabled. If CPU Scaling is
+// enabled at the start of the program then std::chrono::system_clock is used
+// instead.
+WallTime Now()
+{
+  static bool useCPUClock = UseCpuCycleClock();
+  if (useCPUClock) {
+    return CPUWalltimeNow();
+  } else {
+    return ChronoWalltimeNow();
+  }
+}
+
+}  // end namespace walltime
+
+
+namespace {
+
+std::string DateTimeString(bool local) {
+  typedef std::chrono::system_clock Clock;
+  std::time_t now = Clock::to_time_t(Clock::now());
+  char storage[128];
+  std::size_t written;
+
+  if (local) {
+#if defined(BENCHMARK_OS_WINDOWS)
+    written = std::strftime(storage, sizeof(storage), "%x %X", ::localtime(&now));
+#else
+    std::tm timeinfo;
+    std::memset(&timeinfo, 0, sizeof(std::tm));
+    ::localtime_r(&now, &timeinfo);
+    written = std::strftime(storage, sizeof(storage), "%F %T", &timeinfo);
+#endif
+  } else {
+#if defined(BENCHMARK_OS_WINDOWS)
+    written = std::strftime(storage, sizeof(storage), "%x %X", ::gmtime(&now));
+#else
+    std::tm timeinfo;
+    std::memset(&timeinfo, 0, sizeof(std::tm));
+    ::gmtime_r(&now, &timeinfo);
+    written = std::strftime(storage, sizeof(storage), "%F %T", &timeinfo);
+#endif
+  }
+  CHECK(written < arraysize(storage));
+  ((void)written); // prevent unused variable in optimized mode.
+  return std::string(storage);
+}
+
+} // end namespace
+
+std::string LocalDateTimeString() {
+  return DateTimeString(true);
+}
+
+}  // end namespace benchmark
diff --git a/libcxx/utils/google-benchmark/src/walltime.h b/libcxx/utils/google-benchmark/src/walltime.h
new file mode 100644
index 0000000..38c26f3
--- /dev/null
+++ b/libcxx/utils/google-benchmark/src/walltime.h
@@ -0,0 +1,17 @@
+#ifndef BENCHMARK_WALLTIME_H_
+#define BENCHMARK_WALLTIME_H_
+
+#include <string>
+
+namespace benchmark {
+typedef double WallTime;
+
+namespace walltime {
+WallTime Now();
+}  // end namespace walltime
+
+std::string LocalDateTimeString();
+
+}  // end namespace benchmark
+
+#endif  // BENCHMARK_WALLTIME_H_