diff --git a/tests/Android.mk b/tests/Android.mk
index 66d023f..0da3951 100644
--- a/tests/Android.mk
+++ b/tests/Android.mk
@@ -18,6 +18,34 @@
 
 LOCAL_PATH := $(call my-dir)
 
+# -----------------------------------------------------------------------------
+# Benchmarks.
+# -----------------------------------------------------------------------------
+
+benchmark_c_flags = \
+    -O2 \
+    -Wall -Wextra \
+    -Werror \
+
+benchmark_src_files = \
+    benchmark_main.cpp \
+    string_benchmark.cpp \
+
+# Build benchmarks for the device (with bionic's .so). Run with:
+#   adb shell bionic-benchmarks
+include $(CLEAR_VARS)
+LOCAL_MODULE := bionic-benchmarks
+LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk
+LOCAL_CFLAGS += $(benchmark_c_flags)
+LOCAL_C_INCLUDES += external/stlport/stlport bionic/ bionic/libstdc++/include
+LOCAL_SHARED_LIBRARIES += libstlport
+LOCAL_SRC_FILES := $(benchmark_src_files)
+include $(BUILD_EXECUTABLE)
+
+# -----------------------------------------------------------------------------
+# Unit tests.
+# -----------------------------------------------------------------------------
+
 test_c_flags = \
     -fstack-protector \
     -g \
@@ -41,7 +69,7 @@
 test_dynamic_src_files = \
     dlopen_test.cpp \
 
-# Build for the device (with bionic's .so). Run with:
+# Build tests for the device (with bionic's .so). Run with:
 #   adb shell /data/nativetest/bionic-unit-tests/bionic-unit-tests
 include $(CLEAR_VARS)
 LOCAL_MODULE := bionic-unit-tests
@@ -52,7 +80,7 @@
 LOCAL_SRC_FILES := $(test_src_files) $(test_dynamic_src_files)
 include $(BUILD_NATIVE_TEST)
 
-# Build for the device (with bionic's .a). Run with:
+# Build tests for the device (with bionic's .a). Run with:
 #   adb shell /data/nativetest/bionic-unit-tests-static/bionic-unit-tests-static
 include $(CLEAR_VARS)
 LOCAL_MODULE := bionic-unit-tests-static
@@ -63,8 +91,9 @@
 LOCAL_STATIC_LIBRARIES += libstlport_static libstdc++ libm libc
 include $(BUILD_NATIVE_TEST)
 
-
-
+# -----------------------------------------------------------------------------
+# Test library for the unit tests.
+# -----------------------------------------------------------------------------
 
 # Build no-elf-hash-table-library.so to test dlopen(3) on a library that
 # only has a GNU-style hash table.
@@ -75,10 +104,11 @@
 LOCAL_LDFLAGS := -Wl,--hash-style=gnu
 include $(BUILD_SHARED_LIBRARY)
 
+# -----------------------------------------------------------------------------
+# Unit tests built against glibc.
+# -----------------------------------------------------------------------------
 
-
-
-# Build for the host (with glibc).
+# Build tests for the host (with glibc).
 # Note that this will build against glibc, so it's not useful for testing
 # bionic's implementation, but it does let you use glibc as a reference
 # implementation for testing the tests themselves.
diff --git a/tests/benchmark.h b/tests/benchmark.h
new file mode 100644
index 0000000..70d4c63
--- /dev/null
+++ b/tests/benchmark.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdint.h>
+
+#include <vector>
+
+namespace testing {
+
+class Benchmark {
+ public:
+  Benchmark(const char* name, void (*fn)(int)) {
+    Register(name, fn, NULL);
+  }
+
+  Benchmark(const char* name, void (*fn_range)(int, int)) {
+    Register(name, NULL, fn_range);
+  }
+
+  Benchmark* Arg(int x);
+
+  bool ShouldRun(int argc, char* argv[]);
+  void Run();
+
+ private:
+  const char* name_;
+
+  void (*fn_)(int);
+  void (*fn_range_)(int, int);
+
+  std::vector<int> args_;
+
+  void Register(const char* name, void (*fn)(int), void (*fn_range)(int, int));
+  void RunRepeatedlyWithArg(int iterations, int arg);
+  void RunWithArg(int arg);
+};
+
+}  // namespace testing
+
+void SetBenchmarkBytesProcessed(int64_t);
+void StopBenchmarkTiming();
+void StartBenchmarkTiming();
+
+#define BENCHMARK(f) \
+    static ::testing::Benchmark* _benchmark_##f __attribute__((unused)) = \
+        (new ::testing::Benchmark(#f, f))
diff --git a/tests/benchmark_main.cpp b/tests/benchmark_main.cpp
new file mode 100644
index 0000000..4fbeb07
--- /dev/null
+++ b/tests/benchmark_main.cpp
@@ -0,0 +1,197 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "benchmark.h"
+
+#include <regex.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <string>
+#include <map>
+
+static int64_t gBytesProcessed;
+static int64_t gBenchmarkTotalTimeNs;
+static int64_t gBenchmarkStartTimeNs;
+
+typedef std::map<std::string, ::testing::Benchmark*> BenchmarkMap;
+typedef BenchmarkMap::iterator BenchmarkMapIt;
+static BenchmarkMap gBenchmarks;
+
+static int Round(int n) {
+  int base = 1;
+  while (base*10 < n) {
+    base *= 10;
+  }
+  if (n < 2*base) {
+    return 2*base;
+  }
+  if (n < 5*base) {
+    return 5*base;
+  }
+  return 10*base;
+}
+
+static int64_t NanoTime() {
+  struct timespec t;
+  t.tv_sec = t.tv_nsec = 0;
+  clock_gettime(CLOCK_MONOTONIC, &t);
+  return static_cast<int64_t>(t.tv_sec) * 1000000000LL + t.tv_nsec;
+}
+
+namespace testing {
+
+Benchmark* Benchmark::Arg(int arg) {
+  args_.push_back(arg);
+  return this;
+}
+
+bool Benchmark::ShouldRun(int argc, char* argv[]) {
+  if (argc == 1) {
+    return true;  // With no arguments, we run all benchmarks.
+  }
+  // Otherwise, we interpret each argument as a regular expression and
+  // see if any of our benchmarks match.
+  for (int i = 1; i < argc; i++) {
+    regex_t re;
+    if (regcomp(&re, argv[i], 0) != 0) {
+      fprintf(stderr, "couldn't compile \"%s\" as a regular expression!\n", argv[i]);
+      exit(EXIT_FAILURE);
+    }
+    int match = regexec(&re, name_, 0, NULL, 0);
+    regfree(&re);
+    if (match != REG_NOMATCH) {
+      return true;
+    }
+  }
+  return false;
+}
+
+void Benchmark::Register(const char* name, void (*fn)(int), void (*fn_range)(int, int)) {
+  name_ = name;
+  fn_ = fn;
+  fn_range_ = fn_range;
+
+  if (fn_ == NULL && fn_range_ == NULL) {
+    fprintf(stderr, "%s: missing function\n", name_);
+    exit(EXIT_FAILURE);
+  }
+
+  gBenchmarks.insert(std::make_pair(name, this));
+}
+
+void Benchmark::Run() {
+  if (args_.empty()) {
+    fprintf(stderr, "%s: no args!\n", name_);
+    exit(EXIT_FAILURE);
+  }
+  for (size_t i = 0; i < args_.size(); ++i) {
+    RunWithArg(args_[i]);
+  }
+}
+
+void Benchmark::RunRepeatedlyWithArg(int iterations, int arg) {
+  gBytesProcessed = 0;
+  gBenchmarkTotalTimeNs = 0;
+  gBenchmarkStartTimeNs = NanoTime();
+  if (fn_ != NULL) {
+    fn_(iterations);
+  } else {
+    fn_range_(iterations, arg);
+  }
+  if (gBenchmarkStartTimeNs != 0) {
+    gBenchmarkTotalTimeNs += NanoTime() - gBenchmarkStartTimeNs;
+  }
+}
+
+void Benchmark::RunWithArg(int arg) {
+  // run once in case it's expensive
+  int iterations = 1;
+  RunRepeatedlyWithArg(iterations, arg);
+  while (gBenchmarkTotalTimeNs < 1e9 && iterations < 1e9) {
+    int last = iterations;
+    if (gBenchmarkTotalTimeNs/iterations == 0) {
+      iterations = 1e9;
+    } else {
+      iterations = 1e9 / (gBenchmarkTotalTimeNs/iterations);
+    }
+    iterations = std::max(last + 1, std::min(iterations + iterations/2, 100*last));
+    iterations = Round(iterations);
+    RunRepeatedlyWithArg(iterations, arg);
+  }
+
+  char throughput[100];
+  throughput[0] = '\0';
+  if (gBenchmarkTotalTimeNs > 0 && gBytesProcessed > 0) {
+    double mib_processed = static_cast<double>(gBytesProcessed)/1e6;
+    double seconds = static_cast<double>(gBenchmarkTotalTimeNs)/1e9;
+    snprintf(throughput, sizeof(throughput), " %8.2f MiB/s", mib_processed/seconds);
+  }
+
+  char full_name[100];
+  if (fn_range_ != NULL) {
+    if (arg >= (1<<20)) {
+      snprintf(full_name, sizeof(full_name), "%s/%dM", name_, arg/(1<<20));
+    } else if (arg >= (1<<10)) {
+      snprintf(full_name, sizeof(full_name), "%s/%dK", name_, arg/(1<<10));
+    } else {
+      snprintf(full_name, sizeof(full_name), "%s/%d", name_, arg);
+    }
+  } else {
+    snprintf(full_name, sizeof(full_name), "%s", name_);
+  }
+
+  printf("%-20s %10lld %10lld%s\n", full_name,
+         static_cast<int64_t>(iterations), gBenchmarkTotalTimeNs/iterations, throughput);
+  fflush(stdout);
+}
+
+}  // namespace testing
+
+void SetBenchmarkBytesProcessed(int64_t x) {
+  gBytesProcessed = x;
+}
+
+void StopBenchmarkTiming() {
+  if (gBenchmarkStartTimeNs != 0) {
+    gBenchmarkTotalTimeNs += NanoTime() - gBenchmarkStartTimeNs;
+  }
+  gBenchmarkStartTimeNs = 0;
+}
+
+void StartBenchmarkTiming() {
+  if (gBenchmarkStartTimeNs == 0) {
+    gBenchmarkStartTimeNs = NanoTime();
+  }
+}
+
+int main(int argc, char* argv[]) {
+  if (gBenchmarks.empty()) {
+    fprintf(stderr, "no benchmarks!\n");
+    exit(EXIT_FAILURE);
+  }
+
+  printf("%-20s %10s %10s\n", "", "iterations", "ns/op");
+  fflush(stdout);
+
+  for (BenchmarkMapIt it = gBenchmarks.begin(); it != gBenchmarks.end(); ++it) {
+    ::testing::Benchmark* b = it->second;
+    if (b->ShouldRun(argc, argv)) {
+      b->Run();
+    }
+  }
+  return 0;
+}
diff --git a/tests/string_benchmark.cpp b/tests/string_benchmark.cpp
new file mode 100644
index 0000000..cbb9771
--- /dev/null
+++ b/tests/string_benchmark.cpp
@@ -0,0 +1,96 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "benchmark.h"
+
+#include <string.h>
+
+#define KB 1024
+#define MB 1024*KB
+
+#define AT_COMMON_SIZES \
+    Arg(8)->Arg(64)->Arg(512)->Arg(1*KB)->Arg(8*KB)->Arg(16*KB)->Arg(32*KB)->Arg(64*KB)
+
+// TODO: test unaligned operation too? (currently everything will be 8-byte aligned by malloc.)
+
+static void BM_memcmp(int iters, int nbytes) {
+  StopBenchmarkTiming();
+  char* src = new char[nbytes]; char* dst = new char[nbytes];
+  memset(src, 'x', nbytes);
+  memset(dst, 'x', nbytes);
+  StartBenchmarkTiming();
+
+  volatile int c __attribute__((unused)) = 0;
+  for (int i = 0; i < iters; i++) {
+    c += memcmp(dst, src, nbytes);
+  }
+
+  StopBenchmarkTiming();
+  SetBenchmarkBytesProcessed(int64_t(iters) * int64_t(nbytes));
+  delete[] src;
+  delete[] dst;
+}
+BENCHMARK(BM_memcmp)->AT_COMMON_SIZES;
+
+static void BM_memcpy(int iters, int nbytes) {
+  StopBenchmarkTiming();
+  char* src = new char[nbytes]; char* dst = new char[nbytes];
+  memset(src, 'x', nbytes);
+  StartBenchmarkTiming();
+
+  for (int i = 0; i < iters; i++) {
+    memcpy(dst, src, nbytes);
+  }
+
+  StopBenchmarkTiming();
+  SetBenchmarkBytesProcessed(int64_t(iters) * int64_t(nbytes));
+  delete[] src;
+  delete[] dst;
+}
+BENCHMARK(BM_memcpy)->AT_COMMON_SIZES;
+
+static void BM_memset(int iters, int nbytes) {
+  StopBenchmarkTiming();
+  char* dst = new char[nbytes];
+  StartBenchmarkTiming();
+
+  for (int i = 0; i < iters; i++) {
+    memset(dst, 0, nbytes);
+  }
+
+  StopBenchmarkTiming();
+  SetBenchmarkBytesProcessed(int64_t(iters) * int64_t(nbytes));
+  delete[] dst;
+}
+BENCHMARK(BM_memset)->AT_COMMON_SIZES;
+
+static void BM_strlen(int iters, int nbytes) {
+  StopBenchmarkTiming();
+  char* s = new char[nbytes];
+  memset(s, 'x', nbytes);
+  s[nbytes - 1] = 0;
+  StartBenchmarkTiming();
+
+  volatile int c __attribute__((unused)) = 0;
+  for (int i = 0; i < iters; i++) {
+    c += strlen(s);
+  }
+
+  StopBenchmarkTiming();
+  SetBenchmarkBytesProcessed(int64_t(iters) * int64_t(nbytes));
+  delete[] s;
+}
+BENCHMARK(BM_strlen)->AT_COMMON_SIZES;
