Merge remote-tracking branch 'upstream/master' into client_channel_init_cleanup
diff --git a/.gitmodules b/.gitmodules
index c32881c..04d155c 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -17,6 +17,6 @@
 [submodule "third_party/thrift"]
 	path = third_party/thrift
 	url = https://github.com/apache/thrift.git
-[submodule "third_party/google_benchmark"]
-	path = third_party/google_benchmark
+[submodule "third_party/benchmark"]
+	path = third_party/benchmark
 	url = https://github.com/google/benchmark
diff --git a/Makefile b/Makefile
index db30c21..8f7328a 100644
--- a/Makefile
+++ b/Makefile
@@ -1260,9 +1260,9 @@
 pc_cxx_unsecure: $(LIBDIR)/$(CONFIG)/pkgconfig/grpc++_unsecure.pc
 
 ifeq ($(EMBED_OPENSSL),true)
-privatelibs_cxx:  $(LIBDIR)/$(CONFIG)/libgrpc++_proto_reflection_desc_db.a $(LIBDIR)/$(CONFIG)/libgrpc++_test.a $(LIBDIR)/$(CONFIG)/libgrpc++_test_config.a $(LIBDIR)/$(CONFIG)/libgrpc++_test_util.a $(LIBDIR)/$(CONFIG)/libgrpc_cli_libs.a $(LIBDIR)/$(CONFIG)/libinterop_client_helper.a $(LIBDIR)/$(CONFIG)/libinterop_client_main.a $(LIBDIR)/$(CONFIG)/libinterop_server_helper.a $(LIBDIR)/$(CONFIG)/libinterop_server_lib.a $(LIBDIR)/$(CONFIG)/libinterop_server_main.a $(LIBDIR)/$(CONFIG)/libqps.a $(LIBDIR)/$(CONFIG)/libboringssl_test_util.a $(LIBDIR)/$(CONFIG)/libboringssl_aes_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_asn1_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_base64_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_bio_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_bn_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_bytestring_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_aead_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_cipher_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_cmac_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_ed25519_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_x25519_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_dh_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_digest_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_ec_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_ecdsa_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_err_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_evp_extra_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_evp_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_pbkdf_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_hmac_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_pkcs12_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_pkcs8_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_poly1305_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_rsa_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_x509_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_ssl_test_lib.a $(LIBDIR)/$(CONFIG)/libgoogle_benchmark.a
+privatelibs_cxx:  $(LIBDIR)/$(CONFIG)/libgrpc++_proto_reflection_desc_db.a $(LIBDIR)/$(CONFIG)/libgrpc++_test.a $(LIBDIR)/$(CONFIG)/libgrpc++_test_config.a $(LIBDIR)/$(CONFIG)/libgrpc++_test_util.a $(LIBDIR)/$(CONFIG)/libgrpc_cli_libs.a $(LIBDIR)/$(CONFIG)/libinterop_client_helper.a $(LIBDIR)/$(CONFIG)/libinterop_client_main.a $(LIBDIR)/$(CONFIG)/libinterop_server_helper.a $(LIBDIR)/$(CONFIG)/libinterop_server_lib.a $(LIBDIR)/$(CONFIG)/libinterop_server_main.a $(LIBDIR)/$(CONFIG)/libqps.a $(LIBDIR)/$(CONFIG)/libboringssl_test_util.a $(LIBDIR)/$(CONFIG)/libboringssl_aes_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_asn1_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_base64_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_bio_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_bn_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_bytestring_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_aead_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_cipher_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_cmac_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_ed25519_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_x25519_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_dh_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_digest_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_ec_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_ecdsa_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_err_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_evp_extra_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_evp_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_pbkdf_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_hmac_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_pkcs12_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_pkcs8_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_poly1305_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_rsa_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_x509_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_ssl_test_lib.a $(LIBDIR)/$(CONFIG)/libbenchmark.a
 else
-privatelibs_cxx:  $(LIBDIR)/$(CONFIG)/libgrpc++_proto_reflection_desc_db.a $(LIBDIR)/$(CONFIG)/libgrpc++_test.a $(LIBDIR)/$(CONFIG)/libgrpc++_test_config.a $(LIBDIR)/$(CONFIG)/libgrpc++_test_util.a $(LIBDIR)/$(CONFIG)/libgrpc_cli_libs.a $(LIBDIR)/$(CONFIG)/libinterop_client_helper.a $(LIBDIR)/$(CONFIG)/libinterop_client_main.a $(LIBDIR)/$(CONFIG)/libinterop_server_helper.a $(LIBDIR)/$(CONFIG)/libinterop_server_lib.a $(LIBDIR)/$(CONFIG)/libinterop_server_main.a $(LIBDIR)/$(CONFIG)/libqps.a $(LIBDIR)/$(CONFIG)/libgoogle_benchmark.a
+privatelibs_cxx:  $(LIBDIR)/$(CONFIG)/libgrpc++_proto_reflection_desc_db.a $(LIBDIR)/$(CONFIG)/libgrpc++_test.a $(LIBDIR)/$(CONFIG)/libgrpc++_test_config.a $(LIBDIR)/$(CONFIG)/libgrpc++_test_util.a $(LIBDIR)/$(CONFIG)/libgrpc_cli_libs.a $(LIBDIR)/$(CONFIG)/libinterop_client_helper.a $(LIBDIR)/$(CONFIG)/libinterop_client_main.a $(LIBDIR)/$(CONFIG)/libinterop_server_helper.a $(LIBDIR)/$(CONFIG)/libinterop_server_lib.a $(LIBDIR)/$(CONFIG)/libinterop_server_main.a $(LIBDIR)/$(CONFIG)/libqps.a $(LIBDIR)/$(CONFIG)/libbenchmark.a
 endif
 
 
@@ -6998,43 +6998,43 @@
 endif
 
 
-LIBGOOGLE_BENCHMARK_SRC = \
-    third_party/google_benchmark/src/benchmark.cc \
-    third_party/google_benchmark/src/benchmark_register.cc \
-    third_party/google_benchmark/src/colorprint.cc \
-    third_party/google_benchmark/src/commandlineflags.cc \
-    third_party/google_benchmark/src/complexity.cc \
-    third_party/google_benchmark/src/console_reporter.cc \
-    third_party/google_benchmark/src/csv_reporter.cc \
-    third_party/google_benchmark/src/json_reporter.cc \
-    third_party/google_benchmark/src/reporter.cc \
-    third_party/google_benchmark/src/sleep.cc \
-    third_party/google_benchmark/src/string_util.cc \
-    third_party/google_benchmark/src/sysinfo.cc \
-    third_party/google_benchmark/src/timers.cc \
+LIBBENCHMARK_SRC = \
+    third_party/benchmark/src/benchmark.cc \
+    third_party/benchmark/src/benchmark_register.cc \
+    third_party/benchmark/src/colorprint.cc \
+    third_party/benchmark/src/commandlineflags.cc \
+    third_party/benchmark/src/complexity.cc \
+    third_party/benchmark/src/console_reporter.cc \
+    third_party/benchmark/src/csv_reporter.cc \
+    third_party/benchmark/src/json_reporter.cc \
+    third_party/benchmark/src/reporter.cc \
+    third_party/benchmark/src/sleep.cc \
+    third_party/benchmark/src/string_util.cc \
+    third_party/benchmark/src/sysinfo.cc \
+    third_party/benchmark/src/timers.cc \
 
 PUBLIC_HEADERS_CXX += \
 
-LIBGOOGLE_BENCHMARK_OBJS = $(addprefix $(OBJDIR)/$(CONFIG)/, $(addsuffix .o, $(basename $(LIBGOOGLE_BENCHMARK_SRC))))
+LIBBENCHMARK_OBJS = $(addprefix $(OBJDIR)/$(CONFIG)/, $(addsuffix .o, $(basename $(LIBBENCHMARK_SRC))))
 
-$(LIBGOOGLE_BENCHMARK_OBJS): CPPFLAGS += -Ithird_party/google_benchmark/include -DHAVE_POSIX_REGEX
+$(LIBBENCHMARK_OBJS): CPPFLAGS += -Ithird_party/benchmark/include -DHAVE_POSIX_REGEX
 
 ifeq ($(NO_PROTOBUF),true)
 
 # You can't build a C++ library if you don't have protobuf - a bit overreached, but still okay.
 
-$(LIBDIR)/$(CONFIG)/libgoogle_benchmark.a: protobuf_dep_error
+$(LIBDIR)/$(CONFIG)/libbenchmark.a: protobuf_dep_error
 
 
 else
 
-$(LIBDIR)/$(CONFIG)/libgoogle_benchmark.a: $(ZLIB_DEP)  $(PROTOBUF_DEP) $(LIBGOOGLE_BENCHMARK_OBJS) 
+$(LIBDIR)/$(CONFIG)/libbenchmark.a: $(ZLIB_DEP)  $(PROTOBUF_DEP) $(LIBBENCHMARK_OBJS) 
 	$(E) "[AR]      Creating $@"
 	$(Q) mkdir -p `dirname $@`
-	$(Q) rm -f $(LIBDIR)/$(CONFIG)/libgoogle_benchmark.a
-	$(Q) $(AR) $(AROPTS) $(LIBDIR)/$(CONFIG)/libgoogle_benchmark.a $(LIBGOOGLE_BENCHMARK_OBJS) 
+	$(Q) rm -f $(LIBDIR)/$(CONFIG)/libbenchmark.a
+	$(Q) $(AR) $(AROPTS) $(LIBDIR)/$(CONFIG)/libbenchmark.a $(LIBBENCHMARK_OBJS) 
 ifeq ($(SYSTEM),Darwin)
-	$(Q) ranlib -no_warning_for_no_symbols $(LIBDIR)/$(CONFIG)/libgoogle_benchmark.a
+	$(Q) ranlib -no_warning_for_no_symbols $(LIBDIR)/$(CONFIG)/libbenchmark.a
 endif
 
 
@@ -7043,7 +7043,7 @@
 endif
 
 ifneq ($(NO_DEPS),true)
--include $(LIBGOOGLE_BENCHMARK_OBJS:.o=.dep)
+-include $(LIBBENCHMARK_OBJS:.o=.dep)
 endif
 
 
@@ -11736,16 +11736,16 @@
 
 else
 
-$(BINDIR)/$(CONFIG)/bm_fullstack: $(PROTOBUF_DEP) $(BM_FULLSTACK_OBJS) $(LIBDIR)/$(CONFIG)/libgoogle_benchmark.a $(LIBDIR)/$(CONFIG)/libgrpc++_test_util.a $(LIBDIR)/$(CONFIG)/libgrpc_test_util.a $(LIBDIR)/$(CONFIG)/libgrpc++.a $(LIBDIR)/$(CONFIG)/libgrpc.a $(LIBDIR)/$(CONFIG)/libgpr_test_util.a $(LIBDIR)/$(CONFIG)/libgpr.a
+$(BINDIR)/$(CONFIG)/bm_fullstack: $(PROTOBUF_DEP) $(BM_FULLSTACK_OBJS) $(LIBDIR)/$(CONFIG)/libbenchmark.a $(LIBDIR)/$(CONFIG)/libgrpc++_test_util.a $(LIBDIR)/$(CONFIG)/libgrpc_test_util.a $(LIBDIR)/$(CONFIG)/libgrpc++.a $(LIBDIR)/$(CONFIG)/libgrpc.a $(LIBDIR)/$(CONFIG)/libgpr_test_util.a $(LIBDIR)/$(CONFIG)/libgpr.a
 	$(E) "[LD]      Linking $@"
 	$(Q) mkdir -p `dirname $@`
-	$(Q) $(LDXX) $(LDFLAGS) $(BM_FULLSTACK_OBJS) $(LIBDIR)/$(CONFIG)/libgoogle_benchmark.a $(LIBDIR)/$(CONFIG)/libgrpc++_test_util.a $(LIBDIR)/$(CONFIG)/libgrpc_test_util.a $(LIBDIR)/$(CONFIG)/libgrpc++.a $(LIBDIR)/$(CONFIG)/libgrpc.a $(LIBDIR)/$(CONFIG)/libgpr_test_util.a $(LIBDIR)/$(CONFIG)/libgpr.a $(LDLIBSXX) $(LDLIBS_PROTOBUF) $(LDLIBS) $(LDLIBS_SECURE) $(GTEST_LIB) -o $(BINDIR)/$(CONFIG)/bm_fullstack
+	$(Q) $(LDXX) $(LDFLAGS) $(BM_FULLSTACK_OBJS) $(LIBDIR)/$(CONFIG)/libbenchmark.a $(LIBDIR)/$(CONFIG)/libgrpc++_test_util.a $(LIBDIR)/$(CONFIG)/libgrpc_test_util.a $(LIBDIR)/$(CONFIG)/libgrpc++.a $(LIBDIR)/$(CONFIG)/libgrpc.a $(LIBDIR)/$(CONFIG)/libgpr_test_util.a $(LIBDIR)/$(CONFIG)/libgpr.a $(LDLIBSXX) $(LDLIBS_PROTOBUF) $(LDLIBS) $(LDLIBS_SECURE) $(GTEST_LIB) -o $(BINDIR)/$(CONFIG)/bm_fullstack
 
 endif
 
 endif
 
-$(OBJDIR)/$(CONFIG)/test/cpp/microbenchmarks/bm_fullstack.o:  $(LIBDIR)/$(CONFIG)/libgoogle_benchmark.a $(LIBDIR)/$(CONFIG)/libgrpc++_test_util.a $(LIBDIR)/$(CONFIG)/libgrpc_test_util.a $(LIBDIR)/$(CONFIG)/libgrpc++.a $(LIBDIR)/$(CONFIG)/libgrpc.a $(LIBDIR)/$(CONFIG)/libgpr_test_util.a $(LIBDIR)/$(CONFIG)/libgpr.a
+$(OBJDIR)/$(CONFIG)/test/cpp/microbenchmarks/bm_fullstack.o:  $(LIBDIR)/$(CONFIG)/libbenchmark.a $(LIBDIR)/$(CONFIG)/libgrpc++_test_util.a $(LIBDIR)/$(CONFIG)/libgrpc_test_util.a $(LIBDIR)/$(CONFIG)/libgrpc++.a $(LIBDIR)/$(CONFIG)/libgrpc.a $(LIBDIR)/$(CONFIG)/libgpr_test_util.a $(LIBDIR)/$(CONFIG)/libgpr.a
 
 deps_bm_fullstack: $(BM_FULLSTACK_OBJS:.o=.dep)
 
@@ -13192,16 +13192,16 @@
 
 else
 
-$(BINDIR)/$(CONFIG)/noop-benchmark: $(PROTOBUF_DEP) $(NOOP-BENCHMARK_OBJS) $(LIBDIR)/$(CONFIG)/libgoogle_benchmark.a
+$(BINDIR)/$(CONFIG)/noop-benchmark: $(PROTOBUF_DEP) $(NOOP-BENCHMARK_OBJS) $(LIBDIR)/$(CONFIG)/libbenchmark.a
 	$(E) "[LD]      Linking $@"
 	$(Q) mkdir -p `dirname $@`
-	$(Q) $(LDXX) $(LDFLAGS) $(NOOP-BENCHMARK_OBJS) $(LIBDIR)/$(CONFIG)/libgoogle_benchmark.a $(LDLIBSXX) $(LDLIBS_PROTOBUF) $(LDLIBS) $(LDLIBS_SECURE) $(GTEST_LIB) -o $(BINDIR)/$(CONFIG)/noop-benchmark
+	$(Q) $(LDXX) $(LDFLAGS) $(NOOP-BENCHMARK_OBJS) $(LIBDIR)/$(CONFIG)/libbenchmark.a $(LDLIBSXX) $(LDLIBS_PROTOBUF) $(LDLIBS) $(LDLIBS_SECURE) $(GTEST_LIB) -o $(BINDIR)/$(CONFIG)/noop-benchmark
 
 endif
 
 endif
 
-$(OBJDIR)/$(CONFIG)/test/cpp/microbenchmarks/noop-benchmark.o:  $(LIBDIR)/$(CONFIG)/libgoogle_benchmark.a
+$(OBJDIR)/$(CONFIG)/test/cpp/microbenchmarks/noop-benchmark.o:  $(LIBDIR)/$(CONFIG)/libbenchmark.a
 
 deps_noop-benchmark: $(NOOP-BENCHMARK_OBJS:.o=.dep)
 
diff --git a/build.yaml b/build.yaml
index 68d19a6..de9d253 100644
--- a/build.yaml
+++ b/build.yaml
@@ -2850,7 +2850,7 @@
   src:
   - test/cpp/microbenchmarks/bm_fullstack.cc
   deps:
-  - google_benchmark
+  - benchmark
   - grpc++_test_util
   - grpc_test_util
   - grpc++
@@ -3300,7 +3300,7 @@
   src:
   - test/cpp/microbenchmarks/noop-benchmark.cc
   deps:
-  - google_benchmark
+  - benchmark
 - name: proto_server_reflection_test
   gtest: true
   build: test
@@ -3786,6 +3786,8 @@
       UBSAN_OPTIONS: halt_on_error=1:print_stacktrace=1
     timeout_multiplier: 1.5
 defaults:
+  benchmark:
+    CPPFLAGS: -Ithird_party/benchmark/include -DHAVE_POSIX_REGEX
   boringssl:
     CFLAGS: -Wno-sign-conversion -Wno-conversion -Wno-unused-value -Wno-unknown-pragmas
       -Wno-implicit-function-declaration -Wno-unused-variable -Wno-sign-compare $(NO_W_EXTRA_SEMI)
@@ -3794,8 +3796,6 @@
   global:
     CPPFLAGS: -g -Wall -Wextra -Werror -Wno-long-long -Wno-unused-parameter
     LDFLAGS: -g
-  google_benchmark:
-    CPPFLAGS: -Ithird_party/google_benchmark/include -DHAVE_POSIX_REGEX
   zlib:
     CFLAGS: -Wno-sign-conversion -Wno-conversion -Wno-unused-value -Wno-implicit-function-declaration
       $(W_NO_SHIFT_NEGATIVE_VALUE) -fvisibility=hidden
diff --git a/include/grpc++/impl/codegen/completion_queue.h b/include/grpc++/impl/codegen/completion_queue.h
index ef00163..944f2c3 100644
--- a/include/grpc++/impl/codegen/completion_queue.h
+++ b/include/grpc++/impl/codegen/completion_queue.h
@@ -52,6 +52,7 @@
 #include <grpc++/impl/codegen/grpc_library.h>
 #include <grpc++/impl/codegen/status.h>
 #include <grpc++/impl/codegen/time.h>
+#include <grpc/impl/codegen/atm.h>
 
 struct grpc_completion_queue;
 
@@ -101,6 +102,7 @@
   /// instance.
   CompletionQueue() {
     cq_ = g_core_codegen_interface->grpc_completion_queue_create(nullptr);
+    InitialAvalanching();  // reserve this for the future shutdown
   }
 
   /// Wrap \a take, taking ownership of the instance.
@@ -151,7 +153,8 @@
 
   /// Request the shutdown of the queue.
   ///
-  /// \warning This method must be called at some point. Once invoked, \a Next
+  /// \warning This method must be called at some point if this completion queue
+  /// is accessed with Next or AsyncNext. Once invoked, \a Next
   /// will start to return false and \a AsyncNext will return \a
   /// NextStatus::SHUTDOWN. Only once either one of these methods does that
   /// (that is, once the queue has been \em drained) can an instance of this
@@ -165,6 +168,21 @@
   /// owership is performed.
   grpc_completion_queue* cq() { return cq_; }
 
+  /// Manage state of avalanching operations : completion queue tags that
+  /// trigger other completion queue operations. The underlying core completion
+  /// queue should not really shutdown until all avalanching operations have
+  /// been finalized. Note that we maintain the requirement that an avalanche
+  /// registration must take place before CQ shutdown (which must be maintained
+  /// elsehwere)
+  void InitialAvalanching() {
+    gpr_atm_rel_store(&avalanches_in_flight_, static_cast<gpr_atm>(1));
+  }
+  void RegisterAvalanching() {
+    gpr_atm_no_barrier_fetch_add(&avalanches_in_flight_,
+                                 static_cast<gpr_atm>(1));
+  };
+  void CompleteAvalanching();
+
  private:
   // Friend synchronous wrappers so that they can access Pluck(), which is
   // a semi-private API geared towards the synchronous implementation.
@@ -229,6 +247,8 @@
   }
 
   grpc_completion_queue* cq_;  // owned
+
+  gpr_atm avalanches_in_flight_;
 };
 
 /// A specific type of completion queue used by the processing of notifications
diff --git a/include/grpc++/impl/codegen/server_interface.h b/include/grpc++/impl/codegen/server_interface.h
index 41a64be..666b9ff 100644
--- a/include/grpc++/impl/codegen/server_interface.h
+++ b/include/grpc++/impl/codegen/server_interface.h
@@ -140,7 +140,7 @@
                      ServerAsyncStreamingInterface* stream,
                      CompletionQueue* call_cq, void* tag,
                      bool delete_on_finalize);
-    virtual ~BaseAsyncRequest() {}
+    virtual ~BaseAsyncRequest();
 
     bool FinalizeResult(void** tag, bool* status) override;
 
diff --git a/src/google_benchmark/gen_build_yaml.py b/src/benchmark/gen_build_yaml.py
similarity index 86%
rename from src/google_benchmark/gen_build_yaml.py
rename to src/benchmark/gen_build_yaml.py
index 302e087..09b7611 100755
--- a/src/google_benchmark/gen_build_yaml.py
+++ b/src/benchmark/gen_build_yaml.py
@@ -39,15 +39,15 @@
 out = {}
 
 out['libs'] = [{
-    'name': 'google_benchmark',
+    'name': 'benchmark',
     'build': 'private',
     'language': 'c++',
     'secure': 'no',
-    'defaults': 'google_benchmark',
-    'src': sorted(glob.glob('third_party/google_benchmark/src/*.cc')),
+    'defaults': 'benchmark',
+    'src': sorted(glob.glob('third_party/benchmark/src/*.cc')),
     'headers': sorted(
-        glob.glob('third_party/google_benchmark/src/*.h') +
-        glob.glob('third_party/google_benchmark/include/benchmark/*.h')),
+        glob.glob('third_party/benchmark/src/*.h') +
+        glob.glob('third_party/benchmark/include/benchmark/*.h')),
 }]
 
 print yaml.dump(out)
diff --git a/src/core/ext/transport/chttp2/client/chttp2_connector.c b/src/core/ext/transport/chttp2/client/chttp2_connector.c
index 213395c..568b114 100644
--- a/src/core/ext/transport/chttp2/client/chttp2_connector.c
+++ b/src/core/ext/transport/chttp2/client/chttp2_connector.c
@@ -56,6 +56,7 @@
   gpr_refcount refs;
 
   bool shutdown;
+  bool connecting;
 
   char *server_name;
   grpc_chttp2_create_handshakers_func create_handshakers;
@@ -103,7 +104,9 @@
   }
   // If handshaking is not yet in progress, shutdown the endpoint.
   // Otherwise, the handshaker will do this for us.
-  if (c->endpoint != NULL) grpc_endpoint_shutdown(exec_ctx, c->endpoint);
+  if (!c->connecting && c->endpoint != NULL) {
+    grpc_endpoint_shutdown(exec_ctx, c->endpoint);
+  }
   gpr_mu_unlock(&c->mu);
 }
 
@@ -192,6 +195,8 @@
 static void connected(grpc_exec_ctx *exec_ctx, void *arg, grpc_error *error) {
   chttp2_connector *c = arg;
   gpr_mu_lock(&c->mu);
+  GPR_ASSERT(c->connecting);
+  c->connecting = false;
   if (error != GRPC_ERROR_NONE || c->shutdown) {
     if (error == GRPC_ERROR_NONE) {
       error = GRPC_ERROR_CREATE("connector shutdown");
@@ -202,6 +207,7 @@
     grpc_closure *notify = c->notify;
     c->notify = NULL;
     grpc_exec_ctx_sched(exec_ctx, notify, error, NULL);
+    if (c->endpoint != NULL) grpc_endpoint_shutdown(exec_ctx, c->endpoint);
     gpr_mu_unlock(&c->mu);
     chttp2_connector_unref(exec_ctx, arg);
   } else {
@@ -235,6 +241,8 @@
   GPR_ASSERT(c->endpoint == NULL);
   chttp2_connector_ref(con);  // Ref taken for callback.
   grpc_closure_init(&c->connected, connected, c);
+  GPR_ASSERT(!c->connecting);
+  c->connecting = true;
   grpc_tcp_client_connect(exec_ctx, &c->connected, &c->endpoint,
                           args->interested_parties, args->channel_args,
                           args->addr, args->deadline);
diff --git a/src/core/ext/transport/chttp2/server/chttp2_server.c b/src/core/ext/transport/chttp2/server/chttp2_server.c
index 7795606..8ee7e29 100644
--- a/src/core/ext/transport/chttp2/server/chttp2_server.c
+++ b/src/core/ext/transport/chttp2/server/chttp2_server.c
@@ -58,8 +58,8 @@
     grpc_chttp2_server_handshaker_factory *handshaker_factory,
     grpc_handshake_manager *handshake_mgr) {
   if (handshaker_factory != NULL) {
-    handshaker_factory->vtable->create_handshakers(
-        exec_ctx, handshaker_factory, handshake_mgr);
+    handshaker_factory->vtable->create_handshakers(exec_ctx, handshaker_factory,
+                                                   handshake_mgr);
   }
 }
 
@@ -71,7 +71,6 @@
   }
 }
 
-
 typedef struct pending_handshake_manager_node {
   grpc_handshake_manager *handshake_mgr;
   struct pending_handshake_manager_node *next;
@@ -196,9 +195,9 @@
   // args instead of hard-coding it.
   const gpr_timespec deadline = gpr_time_add(
       gpr_now(GPR_CLOCK_MONOTONIC), gpr_time_from_seconds(120, GPR_TIMESPAN));
-  grpc_handshake_manager_do_handshake(
-      exec_ctx, connection_state->handshake_mgr, tcp, state->args, deadline,
-      acceptor, on_handshake_done, connection_state);
+  grpc_handshake_manager_do_handshake(exec_ctx, connection_state->handshake_mgr,
+                                      tcp, state->args, deadline, acceptor,
+                                      on_handshake_done, connection_state);
 }
 
 /* Server callback: start listening on our ports */
@@ -275,9 +274,8 @@
   memset(state, 0, sizeof(*state));
   grpc_closure_init(&state->tcp_server_shutdown_complete,
                     tcp_server_shutdown_complete, state);
-  err =
-      grpc_tcp_server_create(exec_ctx, &state->tcp_server_shutdown_complete,
-                             args, &tcp_server);
+  err = grpc_tcp_server_create(exec_ctx, &state->tcp_server_shutdown_complete,
+                               args, &tcp_server);
   if (err != GRPC_ERROR_NONE) {
     goto error;
   }
diff --git a/src/core/ext/transport/chttp2/server/chttp2_server.h b/src/core/ext/transport/chttp2/server/chttp2_server.h
index b1ff04b..3073399 100644
--- a/src/core/ext/transport/chttp2/server/chttp2_server.h
+++ b/src/core/ext/transport/chttp2/server/chttp2_server.h
@@ -73,7 +73,6 @@
 grpc_error *grpc_chttp2_server_add_port(
     grpc_exec_ctx *exec_ctx, grpc_server *server, const char *addr,
     grpc_channel_args *args,
-    grpc_chttp2_server_handshaker_factory *handshaker_factory,
-    int *port_num);
+    grpc_chttp2_server_handshaker_factory *handshaker_factory, int *port_num);
 
 #endif /* GRPC_CORE_EXT_TRANSPORT_CHTTP2_SERVER_CHTTP2_SERVER_H */
diff --git a/src/core/ext/transport/chttp2/server/insecure/server_chttp2.c b/src/core/ext/transport/chttp2/server/insecure/server_chttp2.c
index 366312b..7e286d4 100644
--- a/src/core/ext/transport/chttp2/server/insecure/server_chttp2.c
+++ b/src/core/ext/transport/chttp2/server/insecure/server_chttp2.c
@@ -45,7 +45,7 @@
   int port_num = 0;
   GRPC_API_TRACE("grpc_server_add_insecure_http2_port(server=%p, addr=%s)", 2,
                  (server, addr));
-  grpc_error* err = grpc_chttp2_server_add_port(
+  grpc_error *err = grpc_chttp2_server_add_port(
       &exec_ctx, server, addr,
       grpc_channel_args_copy(grpc_server_get_channel_args(server)),
       NULL /* handshaker_factory */, &port_num);
diff --git a/src/core/ext/transport/chttp2/server/secure/server_secure_chttp2.c b/src/core/ext/transport/chttp2/server/secure/server_secure_chttp2.c
index 5f41728..85c21f0 100644
--- a/src/core/ext/transport/chttp2/server/secure/server_secure_chttp2.c
+++ b/src/core/ext/transport/chttp2/server/secure/server_secure_chttp2.c
@@ -64,7 +64,7 @@
 }
 
 static void server_security_handshaker_factory_destroy(
-    grpc_exec_ctx* exec_ctx, grpc_chttp2_server_handshaker_factory *hf) {
+    grpc_exec_ctx *exec_ctx, grpc_chttp2_server_handshaker_factory *hf) {
   server_security_handshaker_factory *handshaker_factory =
       (server_security_handshaker_factory *)hf;
   GRPC_SECURITY_CONNECTOR_UNREF(&handshaker_factory->security_connector->base,
@@ -106,8 +106,8 @@
     goto done;
   }
   // Create handshaker factory.
-  server_security_handshaker_factory* handshaker_factory =
-     gpr_malloc(sizeof(*handshaker_factory));
+  server_security_handshaker_factory *handshaker_factory =
+      gpr_malloc(sizeof(*handshaker_factory));
   memset(handshaker_factory, 0, sizeof(*handshaker_factory));
   handshaker_factory->base.vtable = &server_security_handshaker_factory_vtable;
   handshaker_factory->security_connector = sc;
diff --git a/src/core/ext/transport/chttp2/transport/chttp2_transport.c b/src/core/ext/transport/chttp2/transport/chttp2_transport.c
index 3e7c078..6bc0548 100644
--- a/src/core/ext/transport/chttp2/transport/chttp2_transport.c
+++ b/src/core/ext/transport/chttp2/transport/chttp2_transport.c
@@ -425,7 +425,6 @@
     /* flush writable stream list to avoid dangling references */
     grpc_chttp2_stream *s;
     while (grpc_chttp2_list_pop_writable_stream(t, &s)) {
-      grpc_chttp2_leave_writing_lists(exec_ctx, t, s);
       GRPC_CHTTP2_STREAM_UNREF(exec_ctx, s, "chttp2_writing:close");
     }
     end_all_the_calls(exec_ctx, t, GRPC_ERROR_REF(error));
@@ -521,10 +520,6 @@
     }
   }
 
-  if (s->fail_pending_writes_on_writes_finished_error != NULL) {
-    GRPC_ERROR_UNREF(s->fail_pending_writes_on_writes_finished_error);
-  }
-
   GPR_ASSERT(s->send_initial_metadata_finished == NULL);
   GPR_ASSERT(s->fetching_send_message == NULL);
   GPR_ASSERT(s->send_trailing_metadata_finished == NULL);
@@ -604,11 +599,13 @@
                                  write_state_name(t->write_state),
                                  write_state_name(st), reason));
   t->write_state = st;
-  if (st == GRPC_CHTTP2_WRITE_STATE_IDLE &&
-      t->close_transport_on_writes_finished != NULL) {
-    grpc_error *err = t->close_transport_on_writes_finished;
-    t->close_transport_on_writes_finished = NULL;
-    close_transport_locked(exec_ctx, t, err);
+  if (st == GRPC_CHTTP2_WRITE_STATE_IDLE) {
+    grpc_exec_ctx_enqueue_list(exec_ctx, &t->run_after_write, NULL);
+    if (t->close_transport_on_writes_finished != NULL) {
+      grpc_error *err = t->close_transport_on_writes_finished;
+      t->close_transport_on_writes_finished = NULL;
+      close_transport_locked(exec_ctx, t, err);
+    }
   }
 }
 
@@ -825,7 +822,14 @@
   }
 }
 
+/* Flag that this closure barrier wants stats to be updated before finishing */
 #define CLOSURE_BARRIER_STATS_BIT (1 << 0)
+/* Flag that this closure barrier may be covering a write in a pollset, and so
+   we should not complete this closure until we can prove that the write got
+   scheduled */
+#define CLOSURE_BARRIER_MAY_COVER_WRITE (1 << 1)
+/* First bit of the reference count, stored in the high order bits (with the low
+   bits being used for flags defined above) */
 #define CLOSURE_BARRIER_FIRST_REF_BIT (1 << 16)
 
 static grpc_closure *add_closure_barrier(grpc_closure *closure) {
@@ -852,6 +856,16 @@
     return;
   }
   closure->next_data.scratch -= CLOSURE_BARRIER_FIRST_REF_BIT;
+  if (grpc_http_trace) {
+    const char *errstr = grpc_error_string(error);
+    gpr_log(GPR_DEBUG,
+            "complete_closure_step: %p refs=%d flags=0x%04x desc=%s err=%s",
+            closure,
+            (int)(closure->next_data.scratch / CLOSURE_BARRIER_FIRST_REF_BIT),
+            (int)(closure->next_data.scratch % CLOSURE_BARRIER_FIRST_REF_BIT),
+            desc, errstr);
+    grpc_error_free_string(errstr);
+  }
   if (error != GRPC_ERROR_NONE) {
     if (closure->error_data.error == GRPC_ERROR_NONE) {
       closure->error_data.error =
@@ -868,7 +882,13 @@
       grpc_transport_move_stats(&s->stats, s->collecting_stats);
       s->collecting_stats = NULL;
     }
-    grpc_closure_run(exec_ctx, closure, closure->error_data.error);
+    if ((t->write_state == GRPC_CHTTP2_WRITE_STATE_IDLE) ||
+        !(closure->next_data.scratch & CLOSURE_BARRIER_MAY_COVER_WRITE)) {
+      grpc_closure_run(exec_ctx, closure, closure->error_data.error);
+    } else {
+      grpc_closure_list_append(&t->run_after_write, closure,
+                               closure->error_data.error);
+    }
   }
 }
 
@@ -1013,6 +1033,7 @@
 
   if (op->send_initial_metadata != NULL) {
     GPR_ASSERT(s->send_initial_metadata_finished == NULL);
+    on_complete->next_data.scratch |= CLOSURE_BARRIER_MAY_COVER_WRITE;
     s->send_initial_metadata_finished = add_closure_barrier(on_complete);
     s->send_initial_metadata = op->send_initial_metadata;
     const size_t metadata_size =
@@ -1066,6 +1087,7 @@
   }
 
   if (op->send_message != NULL) {
+    on_complete->next_data.scratch |= CLOSURE_BARRIER_MAY_COVER_WRITE;
     s->fetching_send_message_finished = add_closure_barrier(op->on_complete);
     if (s->write_closed) {
       grpc_chttp2_complete_closure_step(
@@ -1103,6 +1125,7 @@
 
   if (op->send_trailing_metadata != NULL) {
     GPR_ASSERT(s->send_trailing_metadata_finished == NULL);
+    on_complete->next_data.scratch |= CLOSURE_BARRIER_MAY_COVER_WRITE;
     s->send_trailing_metadata_finished = add_closure_barrier(on_complete);
     s->send_trailing_metadata = op->send_trailing_metadata;
     const size_t metadata_size =
@@ -1406,7 +1429,6 @@
     }
   }
   if (grpc_chttp2_list_remove_writable_stream(t, s)) {
-    grpc_chttp2_leave_writing_lists(exec_ctx, t, s);
     GRPC_CHTTP2_STREAM_UNREF(exec_ctx, s, "chttp2_writing:remove_stream");
   }
 
@@ -1537,41 +1559,9 @@
   return error;
 }
 
-void grpc_chttp2_leave_writing_lists(grpc_exec_ctx *exec_ctx,
-                                     grpc_chttp2_transport *t,
-                                     grpc_chttp2_stream *s) {
-  if (s->need_fail_pending_writes_on_writes_finished) {
-    grpc_error *error = s->fail_pending_writes_on_writes_finished_error;
-    s->fail_pending_writes_on_writes_finished_error = NULL;
-    s->need_fail_pending_writes_on_writes_finished = false;
-    grpc_chttp2_fail_pending_writes(exec_ctx, t, s, error);
-  }
-}
-
 void grpc_chttp2_fail_pending_writes(grpc_exec_ctx *exec_ctx,
                                      grpc_chttp2_transport *t,
                                      grpc_chttp2_stream *s, grpc_error *error) {
-  if (s->need_fail_pending_writes_on_writes_finished ||
-      (t->write_state != GRPC_CHTTP2_WRITE_STATE_IDLE &&
-       (s->included[GRPC_CHTTP2_LIST_WRITABLE] ||
-        s->included[GRPC_CHTTP2_LIST_WRITING]))) {
-    /* If a write is in progress, and it involves this stream, wait for the
-     * write to complete before cancelling things out. If we don't do this, then
-     * our combiner lock might think that some operation on its queue might be
-     * covering a completion even though there is none, in which case we might
-     * offload to another thread, which isn't guarateed to exist */
-    if (error != GRPC_ERROR_NONE) {
-      if (s->fail_pending_writes_on_writes_finished_error == GRPC_ERROR_NONE) {
-        s->fail_pending_writes_on_writes_finished_error = GRPC_ERROR_CREATE(
-            "Post-poned fail writes due to in-progress write");
-      }
-      s->fail_pending_writes_on_writes_finished_error = grpc_error_add_child(
-          s->fail_pending_writes_on_writes_finished_error, error);
-    }
-    s->need_fail_pending_writes_on_writes_finished = true;
-    return; /* early out */
-  }
-
   error =
       removal_error(error, s, "Pending writes failed due to stream closure");
   s->send_initial_metadata = NULL;
@@ -1632,6 +1622,9 @@
     if (s->id != 0) {
       remove_stream(exec_ctx, t, s->id,
                     removal_error(GRPC_ERROR_REF(error), s, "Stream removed"));
+    } else {
+      /* Purge streams waiting on concurrency still waiting for id assignment */
+      grpc_chttp2_list_remove_waiting_for_concurrency(t, s);
     }
     GRPC_CHTTP2_STREAM_UNREF(exec_ctx, s, "chttp2");
   }
diff --git a/src/core/ext/transport/chttp2/transport/internal.h b/src/core/ext/transport/chttp2/transport/internal.h
index 6cba1e7..b727965 100644
--- a/src/core/ext/transport/chttp2/transport/internal.h
+++ b/src/core/ext/transport/chttp2/transport/internal.h
@@ -327,6 +327,9 @@
    */
   grpc_error *close_transport_on_writes_finished;
 
+  /* a list of closures to run after writes are finished */
+  grpc_closure_list run_after_write;
+
   /* buffer pool state */
   /** have we scheduled a benign cleanup? */
   bool benign_reclaimer_registered;
@@ -409,9 +412,6 @@
   grpc_error *read_closed_error;
   /** the error that resulted in this stream being write-closed */
   grpc_error *write_closed_error;
-  /** should any writes be cleared once this stream becomes non-writable */
-  bool need_fail_pending_writes_on_writes_finished;
-  grpc_error *fail_pending_writes_on_writes_finished_error;
 
   grpc_published_metadata_method published_metadata[2];
   bool final_metadata_requested;
@@ -496,6 +496,8 @@
                                                   grpc_chttp2_stream *s);
 int grpc_chttp2_list_pop_waiting_for_concurrency(grpc_chttp2_transport *t,
                                                  grpc_chttp2_stream **s);
+void grpc_chttp2_list_remove_waiting_for_concurrency(grpc_chttp2_transport *t,
+                                                     grpc_chttp2_stream *s);
 
 void grpc_chttp2_list_add_stalled_by_transport(grpc_chttp2_transport *t,
                                                grpc_chttp2_stream *s);
@@ -692,9 +694,6 @@
                                                        grpc_chttp2_transport *t,
                                                        grpc_chttp2_stream *s);
 
-void grpc_chttp2_leave_writing_lists(grpc_exec_ctx *exec_ctx,
-                                     grpc_chttp2_transport *t,
-                                     grpc_chttp2_stream *s);
 void grpc_chttp2_fail_pending_writes(grpc_exec_ctx *exec_ctx,
                                      grpc_chttp2_transport *t,
                                      grpc_chttp2_stream *s, grpc_error *error);
diff --git a/src/core/ext/transport/chttp2/transport/stream_lists.c b/src/core/ext/transport/chttp2/transport/stream_lists.c
index 6d25b3a..a60264c 100644
--- a/src/core/ext/transport/chttp2/transport/stream_lists.c
+++ b/src/core/ext/transport/chttp2/transport/stream_lists.c
@@ -158,6 +158,11 @@
   return stream_list_pop(t, s, GRPC_CHTTP2_LIST_WAITING_FOR_CONCURRENCY);
 }
 
+void grpc_chttp2_list_remove_waiting_for_concurrency(grpc_chttp2_transport *t,
+                                                     grpc_chttp2_stream *s) {
+  stream_list_maybe_remove(t, s, GRPC_CHTTP2_LIST_WAITING_FOR_CONCURRENCY);
+}
+
 void grpc_chttp2_list_add_stalled_by_transport(grpc_chttp2_transport *t,
                                                grpc_chttp2_stream *s) {
   stream_list_add(t, s, GRPC_CHTTP2_LIST_STALLED_BY_TRANSPORT);
diff --git a/src/core/ext/transport/chttp2/transport/writing.c b/src/core/ext/transport/chttp2/transport/writing.c
index 769b229..139e738 100644
--- a/src/core/ext/transport/chttp2/transport/writing.c
+++ b/src/core/ext/transport/chttp2/transport/writing.c
@@ -208,7 +208,6 @@
         GRPC_CHTTP2_STREAM_UNREF(exec_ctx, s, "chttp2_writing:already_writing");
       }
     } else {
-      grpc_chttp2_leave_writing_lists(exec_ctx, t, s);
       GRPC_CHTTP2_STREAM_UNREF(exec_ctx, s, "chttp2_writing:no_write");
     }
   }
@@ -253,7 +252,6 @@
       grpc_chttp2_mark_stream_closed(exec_ctx, t, s, !t->is_client, 1,
                                      GRPC_ERROR_REF(error));
     }
-    grpc_chttp2_leave_writing_lists(exec_ctx, t, s);
     GRPC_CHTTP2_STREAM_UNREF(exec_ctx, s, "chttp2_writing:end");
   }
   grpc_slice_buffer_reset_and_unref(&t->outbuf);
diff --git a/src/core/ext/transport/cronet/transport/cronet_transport.c b/src/core/ext/transport/cronet/transport/cronet_transport.c
index a4c1101..afc59f4 100644
--- a/src/core/ext/transport/cronet/transport/cronet_transport.c
+++ b/src/core/ext/transport/cronet/transport/cronet_transport.c
@@ -149,6 +149,9 @@
 struct op_state {
   bool state_op_done[OP_NUM_OPS];
   bool state_callback_received[OP_NUM_OPS];
+  bool fail_state;
+  bool flush_read;
+  grpc_error *cancel_error;
   /* data structure for storing data coming from server */
   struct read_state rs;
   /* data structure for storing data going to the server */
@@ -248,6 +251,12 @@
   }
 }
 
+static grpc_error *make_error_with_desc(int error_code, const char *desc) {
+  grpc_error *error = GRPC_ERROR_CREATE(desc);
+  error = grpc_error_set_int(error, GRPC_ERROR_INT_GRPC_STATUS, error_code);
+  return error;
+}
+
 /*
   Add a new stream op to op storage.
 */
@@ -433,6 +442,18 @@
             grpc_mdstr_from_string(headers->headers[i].value)));
   }
   s->state.state_callback_received[OP_RECV_INITIAL_METADATA] = true;
+  if (!(s->state.state_op_done[OP_CANCEL_ERROR] ||
+        s->state.state_callback_received[OP_FAILED])) {
+    /* Do an extra read to trigger on_succeeded() callback in case connection
+     is closed */
+    GPR_ASSERT(s->state.rs.length_field_received == false);
+    s->state.rs.read_buffer = s->state.rs.grpc_header_bytes;
+    s->state.rs.received_bytes = 0;
+    s->state.rs.remaining_bytes = GRPC_HEADER_SIZE_IN_BYTES;
+    CRONET_LOG(GPR_DEBUG, "cronet_bidirectional_stream_read(%p)", s->cbs);
+    cronet_bidirectional_stream_read(s->cbs, s->state.rs.read_buffer,
+                                     s->state.rs.remaining_bytes);
+  }
   gpr_mu_unlock(&s->mu);
   execute_from_storage(s);
 }
@@ -464,7 +485,11 @@
              count);
   gpr_mu_lock(&s->mu);
   s->state.state_callback_received[OP_RECV_MESSAGE] = true;
-  if (count > 0) {
+  if (count > 0 && s->state.flush_read) {
+    CRONET_LOG(GPR_DEBUG, "cronet_bidirectional_stream_read(%p)", s->cbs);
+    cronet_bidirectional_stream_read(s->cbs, s->state.rs.read_buffer, 4096);
+    gpr_mu_unlock(&s->mu);
+  } else if (count > 0) {
     s->state.rs.received_bytes += count;
     s->state.rs.remaining_bytes -= count;
     if (s->state.rs.remaining_bytes > 0) {
@@ -479,6 +504,10 @@
       execute_from_storage(s);
     }
   } else {
+    if (s->state.flush_read) {
+      gpr_free(s->state.rs.read_buffer);
+      s->state.rs.read_buffer = NULL;
+    }
     s->state.rs.read_stream_closed = true;
     gpr_mu_unlock(&s->mu);
     execute_from_storage(s);
@@ -508,10 +537,27 @@
             grpc_mdstr_from_string(trailers->headers[i].key),
             grpc_mdstr_from_string(trailers->headers[i].value)));
     s->state.rs.trailing_metadata_valid = true;
+    if (0 == strcmp(trailers->headers[i].key, "grpc-status") &&
+        0 != strcmp(trailers->headers[i].value, "0")) {
+      s->state.fail_state = true;
+    }
   }
   s->state.state_callback_received[OP_RECV_TRAILING_METADATA] = true;
-  gpr_mu_unlock(&s->mu);
-  execute_from_storage(s);
+  /* Send a EOS when server terminates the stream (testServerFinishesRequest) to
+   * trigger on_succeeded */
+  if (!s->state.state_op_done[OP_SEND_TRAILING_METADATA] &&
+      !(s->state.state_op_done[OP_CANCEL_ERROR] ||
+        s->state.state_callback_received[OP_FAILED])) {
+    CRONET_LOG(GPR_DEBUG, "cronet_bidirectional_stream_write (%p, 0)", s->cbs);
+    s->state.state_callback_received[OP_SEND_MESSAGE] = false;
+    cronet_bidirectional_stream_write(s->cbs, "", 0, true);
+    s->state.state_op_done[OP_SEND_TRAILING_METADATA] = true;
+
+    gpr_mu_unlock(&s->mu);
+  } else {
+    gpr_mu_unlock(&s->mu);
+    execute_from_storage(s);
+  }
 }
 
 /*
@@ -632,9 +678,9 @@
   /* When call is canceled, every op can be run, except under following
   conditions
   */
-  bool is_canceled_of_failed = stream_state->state_op_done[OP_CANCEL_ERROR] ||
+  bool is_canceled_or_failed = stream_state->state_op_done[OP_CANCEL_ERROR] ||
                                stream_state->state_callback_received[OP_FAILED];
-  if (is_canceled_of_failed) {
+  if (is_canceled_or_failed) {
     if (op_id == OP_SEND_INITIAL_METADATA) result = false;
     if (op_id == OP_SEND_MESSAGE) result = false;
     if (op_id == OP_SEND_TRAILING_METADATA) result = false;
@@ -778,16 +824,10 @@
       op_can_be_run(stream_op, stream_state, &oas->state,
                     OP_SEND_INITIAL_METADATA)) {
     CRONET_LOG(GPR_DEBUG, "running: %p OP_SEND_INITIAL_METADATA", oas);
-    /* This OP is the beginning. Reset various states */
-    memset(&s->header_array, 0, sizeof(s->header_array));
-    memset(&stream_state->rs, 0, sizeof(stream_state->rs));
-    memset(&stream_state->ws, 0, sizeof(stream_state->ws));
-    memset(stream_state->state_op_done, 0, sizeof(stream_state->state_op_done));
-    memset(stream_state->state_callback_received, 0,
-           sizeof(stream_state->state_callback_received));
     /* Start new cronet stream. It is destroyed in on_succeeded, on_canceled,
      * on_failed */
     GPR_ASSERT(s->cbs == NULL);
+    GPR_ASSERT(!stream_state->state_op_done[OP_SEND_INITIAL_METADATA]);
     s->cbs = cronet_bidirectional_stream_create(s->curr_ct.engine, s->curr_gs,
                                                 &cronet_callbacks);
     CRONET_LOG(GPR_DEBUG, "%p = cronet_bidirectional_stream_create()", s->cbs);
@@ -808,10 +848,13 @@
              op_can_be_run(stream_op, stream_state, &oas->state,
                            OP_RECV_INITIAL_METADATA)) {
     CRONET_LOG(GPR_DEBUG, "running: %p  OP_RECV_INITIAL_METADATA", oas);
-    if (stream_state->state_op_done[OP_CANCEL_ERROR] ||
-        stream_state->state_callback_received[OP_FAILED]) {
+    if (stream_state->state_op_done[OP_CANCEL_ERROR]) {
       grpc_exec_ctx_sched(exec_ctx, stream_op->recv_initial_metadata_ready,
                           GRPC_ERROR_CANCELLED, NULL);
+    } else if (stream_state->state_callback_received[OP_FAILED]) {
+      grpc_exec_ctx_sched(
+          exec_ctx, stream_op->recv_initial_metadata_ready,
+          make_error_with_desc(GRPC_STATUS_UNAVAILABLE, "Unavailable."), NULL);
     } else {
       grpc_chttp2_incoming_metadata_buffer_publish(
           &oas->s->state.rs.initial_metadata, stream_op->recv_initial_metadata);
@@ -865,12 +908,19 @@
              op_can_be_run(stream_op, stream_state, &oas->state,
                            OP_RECV_MESSAGE)) {
     CRONET_LOG(GPR_DEBUG, "running: %p  OP_RECV_MESSAGE", oas);
-    if (stream_state->state_op_done[OP_CANCEL_ERROR] ||
-        stream_state->state_callback_received[OP_FAILED]) {
-      CRONET_LOG(GPR_DEBUG, "Stream is either cancelled or failed.");
+    if (stream_state->state_op_done[OP_CANCEL_ERROR]) {
+      CRONET_LOG(GPR_DEBUG, "Stream is cancelled.");
       grpc_exec_ctx_sched(exec_ctx, stream_op->recv_message_ready,
                           GRPC_ERROR_CANCELLED, NULL);
       stream_state->state_op_done[OP_RECV_MESSAGE] = true;
+      result = ACTION_TAKEN_NO_CALLBACK;
+    } else if (stream_state->state_callback_received[OP_FAILED]) {
+      CRONET_LOG(GPR_DEBUG, "Stream failed.");
+      grpc_exec_ctx_sched(
+          exec_ctx, stream_op->recv_message_ready,
+          make_error_with_desc(GRPC_STATUS_UNAVAILABLE, "Unavailable."), NULL);
+      stream_state->state_op_done[OP_RECV_MESSAGE] = true;
+      result = ACTION_TAKEN_NO_CALLBACK;
     } else if (stream_state->rs.read_stream_closed == true) {
       /* No more data will be received */
       CRONET_LOG(GPR_DEBUG, "read stream closed");
@@ -878,6 +928,7 @@
                           GRPC_ERROR_NONE, NULL);
       stream_state->state_op_done[OP_RECV_MESSAGE] = true;
       oas->state.state_op_done[OP_RECV_MESSAGE] = true;
+      result = ACTION_TAKEN_NO_CALLBACK;
     } else if (stream_state->rs.length_field_received == false) {
       if (stream_state->rs.received_bytes == GRPC_HEADER_SIZE_IN_BYTES &&
           stream_state->rs.remaining_bytes == 0) {
@@ -946,10 +997,15 @@
                           GRPC_ERROR_NONE, NULL);
       stream_state->state_op_done[OP_RECV_MESSAGE] = true;
       oas->state.state_op_done[OP_RECV_MESSAGE] = true;
-      /* Clear read state of the stream, so next read op (if it were to come)
-       * will work */
-      stream_state->rs.received_bytes = stream_state->rs.remaining_bytes =
-          stream_state->rs.length_field_received = 0;
+      /* Do an extra read to trigger on_succeeded() callback in case connection
+         is closed */
+      stream_state->rs.read_buffer = stream_state->rs.grpc_header_bytes;
+      stream_state->rs.received_bytes = 0;
+      stream_state->rs.remaining_bytes = GRPC_HEADER_SIZE_IN_BYTES;
+      stream_state->rs.length_field_received = false;
+      CRONET_LOG(GPR_DEBUG, "cronet_bidirectional_stream_read(%p)", s->cbs);
+      cronet_bidirectional_stream_read(s->cbs, stream_state->rs.read_buffer,
+                                       stream_state->rs.remaining_bytes);
       result = ACTION_TAKEN_NO_CALLBACK;
     }
   } else if (stream_op->recv_trailing_metadata &&
@@ -986,17 +1042,25 @@
     CRONET_LOG(GPR_DEBUG, "W: cronet_bidirectional_stream_cancel(%p)", s->cbs);
     if (s->cbs) {
       cronet_bidirectional_stream_cancel(s->cbs);
+      result = ACTION_TAKEN_WITH_CALLBACK;
+    } else {
+      result = ACTION_TAKEN_NO_CALLBACK;
     }
     stream_state->state_op_done[OP_CANCEL_ERROR] = true;
-    result = ACTION_TAKEN_WITH_CALLBACK;
+    if (!stream_state->cancel_error) {
+      stream_state->cancel_error = GRPC_ERROR_REF(stream_op->cancel_error);
+    }
   } else if (stream_op->on_complete &&
              op_can_be_run(stream_op, stream_state, &oas->state,
                            OP_ON_COMPLETE)) {
     CRONET_LOG(GPR_DEBUG, "running: %p  OP_ON_COMPLETE", oas);
-    if (stream_state->state_op_done[OP_CANCEL_ERROR] ||
-        stream_state->state_callback_received[OP_FAILED]) {
+    if (stream_state->state_op_done[OP_CANCEL_ERROR]) {
       grpc_exec_ctx_sched(exec_ctx, stream_op->on_complete,
-                          GRPC_ERROR_CANCELLED, NULL);
+                          GRPC_ERROR_REF(stream_state->cancel_error), NULL);
+    } else if (stream_state->state_callback_received[OP_FAILED]) {
+      grpc_exec_ctx_sched(
+          exec_ctx, stream_op->on_complete,
+          make_error_with_desc(GRPC_STATUS_UNAVAILABLE, "Unavailable."), NULL);
     } else {
       /* All actions in this stream_op are complete. Call the on_complete
        * callback
@@ -1017,6 +1081,15 @@
       make a note */
     if (stream_op->recv_message)
       stream_state->state_op_done[OP_RECV_MESSAGE_AND_ON_COMPLETE] = true;
+  } else if (stream_state->fail_state && !stream_state->flush_read) {
+    CRONET_LOG(GPR_DEBUG, "running: %p  flush read", oas);
+    if (stream_state->rs.read_buffer &&
+        stream_state->rs.read_buffer != stream_state->rs.grpc_header_bytes) {
+      gpr_free(stream_state->rs.read_buffer);
+      stream_state->rs.read_buffer = NULL;
+    }
+    stream_state->rs.read_buffer = gpr_malloc(4096);
+    stream_state->flush_read = true;
   } else {
     result = NO_ACTION_POSSIBLE;
   }
@@ -1042,6 +1115,8 @@
   memset(s->state.state_op_done, 0, sizeof(s->state.state_op_done));
   memset(s->state.state_callback_received, 0,
          sizeof(s->state.state_callback_received));
+  s->state.fail_state = s->state.flush_read = false;
+  s->state.cancel_error = NULL;
   gpr_mu_init(&s->mu);
   return 0;
 }
@@ -1088,7 +1163,10 @@
 }
 
 static void destroy_stream(grpc_exec_ctx *exec_ctx, grpc_transport *gt,
-                           grpc_stream *gs, void *and_free_memory) {}
+                           grpc_stream *gs, void *and_free_memory) {
+  stream_obj *s = (stream_obj *)gs;
+  GRPC_ERROR_UNREF(s->state.cancel_error);
+}
 
 static void destroy_transport(grpc_exec_ctx *exec_ctx, grpc_transport *gt) {}
 
diff --git a/src/core/lib/iomgr/tcp_posix.c b/src/core/lib/iomgr/tcp_posix.c
index 12a4797..fd80779 100644
--- a/src/core/lib/iomgr/tcp_posix.c
+++ b/src/core/lib/iomgr/tcp_posix.c
@@ -107,6 +107,12 @@
   grpc_resource_user_slice_allocator slice_allocator;
 } grpc_tcp;
 
+static grpc_error *tcp_annotate_error(grpc_error *src_error, grpc_tcp *tcp) {
+  return grpc_error_set_str(
+      grpc_error_set_int(src_error, GRPC_ERROR_INT_FD, tcp->fd),
+      GRPC_ERROR_STR_TARGET_ADDRESS, tcp->peer_string);
+}
+
 static void tcp_handle_read(grpc_exec_ctx *exec_ctx, void *arg /* grpc_tcp */,
                             grpc_error *error);
 static void tcp_handle_write(grpc_exec_ctx *exec_ctx, void *arg /* grpc_tcp */,
@@ -230,13 +236,15 @@
       grpc_fd_notify_on_read(exec_ctx, tcp->em_fd, &tcp->read_closure);
     } else {
       grpc_slice_buffer_reset_and_unref(tcp->incoming_buffer);
-      call_read_cb(exec_ctx, tcp, GRPC_OS_ERROR(errno, "recvmsg"));
+      call_read_cb(exec_ctx, tcp,
+                   tcp_annotate_error(GRPC_OS_ERROR(errno, "recvmsg"), tcp));
       TCP_UNREF(exec_ctx, tcp, "read");
     }
   } else if (read_bytes == 0) {
     /* 0 read size ==> end of stream */
     grpc_slice_buffer_reset_and_unref(tcp->incoming_buffer);
-    call_read_cb(exec_ctx, tcp, GRPC_ERROR_CREATE("Socket closed"));
+    call_read_cb(exec_ctx, tcp,
+                 tcp_annotate_error(GRPC_ERROR_CREATE("Socket closed"), tcp));
     TCP_UNREF(exec_ctx, tcp, "read");
   } else {
     GPR_ASSERT((size_t)read_bytes <= tcp->incoming_buffer->length);
@@ -366,7 +374,7 @@
         tcp->outgoing_byte_idx = unwind_byte_idx;
         return false;
       } else {
-        *error = GRPC_OS_ERROR(errno, "sendmsg");
+        *error = tcp_annotate_error(GRPC_OS_ERROR(errno, "sendmsg"), tcp);
         return true;
       }
     }
@@ -447,9 +455,10 @@
 
   if (buf->length == 0) {
     GPR_TIMER_END("tcp_write", 0);
-    grpc_exec_ctx_sched(exec_ctx, cb, grpc_fd_is_shutdown(tcp->em_fd)
-                                          ? GRPC_ERROR_CREATE("EOF")
-                                          : GRPC_ERROR_NONE,
+    grpc_exec_ctx_sched(exec_ctx, cb,
+                        grpc_fd_is_shutdown(tcp->em_fd)
+                            ? tcp_annotate_error(GRPC_ERROR_CREATE("EOF"), tcp)
+                            : GRPC_ERROR_NONE,
                         NULL);
     return;
   }
diff --git a/src/core/lib/surface/call.c b/src/core/lib/surface/call.c
index 1e0f3ee..8ca3cab 100644
--- a/src/core/lib/surface/call.c
+++ b/src/core/lib/surface/call.c
@@ -1551,6 +1551,10 @@
           error = GRPC_CALL_ERROR_TOO_MANY_OPERATIONS;
           goto done_with_error;
         }
+        /* IF this is a server, then GRPC_OP_RECV_INITIAL_METADATA *must* come
+           from server.c. In that case, it's coming from accept_stream, and in
+           that case we're not necessarily covered by a poller. */
+        stream_op->covered_by_poller = call->is_client;
         call->received_initial_metadata = 1;
         call->buffered_metadata[0] = op->data.recv_initial_metadata;
         grpc_closure_init(&call->receiving_initial_metadata_ready,
diff --git a/src/core/lib/surface/completion_queue.c b/src/core/lib/surface/completion_queue.c
index 4e0feb5..184c1a1 100644
--- a/src/core/lib/surface/completion_queue.c
+++ b/src/core/lib/surface/completion_queue.c
@@ -354,11 +354,13 @@
   gpr_strvec v;
   gpr_strvec_init(&v);
   gpr_strvec_add(&v, gpr_strdup("PENDING TAGS:"));
+  gpr_mu_lock(cc->mu);
   for (size_t i = 0; i < cc->outstanding_tag_count; i++) {
     char *s;
     gpr_asprintf(&s, " %p", cc->outstanding_tags[i]);
     gpr_strvec_add(&v, s);
   }
+  gpr_mu_unlock(cc->mu);
   char *out = gpr_strvec_flatten(&v, NULL);
   gpr_strvec_destroy(&v);
   gpr_log(GPR_DEBUG, "%s", out);
diff --git a/src/cpp/common/completion_queue_cc.cc b/src/cpp/common/completion_queue_cc.cc
index 00cc102..0408a41 100644
--- a/src/cpp/common/completion_queue_cc.cc
+++ b/src/cpp/common/completion_queue_cc.cc
@@ -43,11 +43,21 @@
 
 static internal::GrpcLibraryInitializer g_gli_initializer;
 
-CompletionQueue::CompletionQueue(grpc_completion_queue* take) : cq_(take) {}
+CompletionQueue::CompletionQueue(grpc_completion_queue* take) : cq_(take) {
+  InitialAvalanching();
+}
 
 void CompletionQueue::Shutdown() {
   g_gli_initializer.summon();
-  grpc_completion_queue_shutdown(cq_);
+  CompleteAvalanching();
+}
+
+void CompletionQueue::CompleteAvalanching() {
+  // Check if this was the last avalanching operation
+  if (gpr_atm_no_barrier_fetch_add(&avalanches_in_flight_,
+                                   static_cast<gpr_atm>(-1)) == 1) {
+    grpc_completion_queue_shutdown(cq_);
+  }
 }
 
 CompletionQueue::NextStatus CompletionQueue::AsyncNextInternal(
diff --git a/src/cpp/server/server_cc.cc b/src/cpp/server/server_cc.cc
index b7cfd6d..817d85a 100644
--- a/src/cpp/server/server_cc.cc
+++ b/src/cpp/server/server_cc.cc
@@ -510,12 +510,6 @@
     ShutdownTag shutdown_tag;  // Dummy shutdown tag
     grpc_server_shutdown_and_notify(server_, shutdown_cq.cq(), &shutdown_tag);
 
-    // Shutdown all ThreadManagers. This will try to gracefully stop all the
-    // threads in the ThreadManagers (once they process any inflight requests)
-    for (auto it = sync_req_mgrs_.begin(); it != sync_req_mgrs_.end(); it++) {
-      (*it)->Shutdown();  // ThreadManager's Shutdown()
-    }
-
     shutdown_cq.Shutdown();
 
     void* tag;
@@ -531,6 +525,12 @@
     // Else in case of SHUTDOWN or GOT_EVENT, it means that the server has
     // successfully shutdown
 
+    // Shutdown all ThreadManagers. This will try to gracefully stop all the
+    // threads in the ThreadManagers (once they process any inflight requests)
+    for (auto it = sync_req_mgrs_.begin(); it != sync_req_mgrs_.end(); it++) {
+      (*it)->Shutdown();  // ThreadManager's Shutdown()
+    }
+
     // Wait for threads in all ThreadManagers to terminate
     for (auto it = sync_req_mgrs_.begin(); it != sync_req_mgrs_.end(); it++) {
       (*it)->Wait();
@@ -575,9 +575,14 @@
       tag_(tag),
       delete_on_finalize_(delete_on_finalize),
       call_(nullptr) {
+  call_cq_->RegisterAvalanching();  // This op will trigger more ops
   memset(&initial_metadata_array_, 0, sizeof(initial_metadata_array_));
 }
 
+ServerInterface::BaseAsyncRequest::~BaseAsyncRequest() {
+  call_cq_->CompleteAvalanching();
+}
+
 bool ServerInterface::BaseAsyncRequest::FinalizeResult(void** tag,
                                                        bool* status) {
   if (*status) {
diff --git a/src/node/performance/worker_service_impl.js b/src/node/performance/worker_service_impl.js
index 3f317f6..38888a7 100644
--- a/src/node/performance/worker_service_impl.js
+++ b/src/node/performance/worker_service_impl.js
@@ -55,9 +55,8 @@
   }
 
   this.quitWorker = function quitWorker(call, callback) {
-    server.tryShutdown(function() {
-      callback(null, {});
-    });
+    callback(null, {});
+    server.tryShutdown(function() {});
   };
 
   this.runClient = function runClient(call) {
diff --git a/src/objective-c/GRPCClient/private/GRPCWrappedCall.m b/src/objective-c/GRPCClient/private/GRPCWrappedCall.m
index 627b6aa..38fcae0 100644
--- a/src/objective-c/GRPCClient/private/GRPCWrappedCall.m
+++ b/src/objective-c/GRPCClient/private/GRPCWrappedCall.m
@@ -112,7 +112,7 @@
 }
 
 - (void)dealloc {
-  gpr_free(_op.data.send_message);
+  grpc_byte_buffer_destroy(_op.data.send_message);
 }
 
 @end
diff --git a/src/objective-c/tests/CoreCronetEnd2EndTests/CoreCronetEnd2EndTests.m b/src/objective-c/tests/CoreCronetEnd2EndTests/CoreCronetEnd2EndTests.m
index 4a92cc8..4ba7bad 100644
--- a/src/objective-c/tests/CoreCronetEnd2EndTests/CoreCronetEnd2EndTests.m
+++ b/src/objective-c/tests/CoreCronetEnd2EndTests/CoreCronetEnd2EndTests.m
@@ -316,7 +316,8 @@
 }
 
 - (void)testInvokeLargeRequest {
-  [self testIndividualCase:"invoke_large_request"];
+  // NOT SUPPORTED (frame size)
+  // [self testIndividualCase:"invoke_large_request"];
 }
 
 - (void)testLargeMetadata {
@@ -329,7 +330,8 @@
 }
 
 - (void)testMaxMessageLength {
-  [self testIndividualCase:"max_message_length"];
+  // NOT SUPPORTED (close_error)
+  // [self testIndividualCase:"max_message_length"];
 }
 
 - (void)testNegativeDeadline {
diff --git a/src/objective-c/tests/GRPCClientTests.m b/src/objective-c/tests/GRPCClientTests.m
index 7764052..0b72a75 100644
--- a/src/objective-c/tests/GRPCClientTests.m
+++ b/src/objective-c/tests/GRPCClientTests.m
@@ -43,6 +43,8 @@
 #import <RxLibrary/GRXWriteable.h>
 #import <RxLibrary/GRXWriter+Immediate.h>
 
+#define TEST_TIMEOUT 16
+
 static NSString * const kHostAddress = @"localhost:5050";
 static NSString * const kPackage = @"grpc.testing";
 static NSString * const kService = @"TestService";
@@ -137,7 +139,7 @@
 
   [call startWithWriteable:responsesWriteable];
 
-  [self waitForExpectationsWithTimeout:4 handler:nil];
+  [self waitForExpectationsWithTimeout:TEST_TIMEOUT handler:nil];
 }
 
 - (void)testEmptyRPC {
@@ -159,7 +161,7 @@
 
   [call startWithWriteable:responsesWriteable];
 
-  [self waitForExpectationsWithTimeout:8 handler:nil];
+  [self waitForExpectationsWithTimeout:TEST_TIMEOUT handler:nil];
 }
 
 - (void)testSimpleProtoRPC {
@@ -191,7 +193,7 @@
 
   [call startWithWriteable:responsesWriteable];
 
-  [self waitForExpectationsWithTimeout:8 handler:nil];
+  [self waitForExpectationsWithTimeout:TEST_TIMEOUT handler:nil];
 }
 
 - (void)testMetadata {
@@ -225,7 +227,7 @@
 
   [call startWithWriteable:responsesWriteable];
 
-  [self waitForExpectationsWithTimeout:4 handler:nil];
+  [self waitForExpectationsWithTimeout:TEST_TIMEOUT handler:nil];
 }
 
 - (void)testResponseMetadataKVO {
@@ -256,7 +258,7 @@
   
   [call startWithWriteable:responsesWriteable];
   
-  [self waitForExpectationsWithTimeout:8 handler:nil];
+  [self waitForExpectationsWithTimeout:TEST_TIMEOUT handler:nil];
 }
 
 - (void)testUserAgentPrefix {
@@ -287,7 +289,7 @@
 
   [call startWithWriteable:responsesWriteable];
 
-  [self waitForExpectationsWithTimeout:8 handler:nil];
+  [self waitForExpectationsWithTimeout:TEST_TIMEOUT handler:nil];
 }
 
 // TODO(makarandd): Move to a different file that contains only unit tests
@@ -347,7 +349,7 @@
 
   [call startWithWriteable:responsesWriteable];
 
-  [self waitForExpectationsWithTimeout:8 handler:nil];
+  [self waitForExpectationsWithTimeout:TEST_TIMEOUT handler:nil];
 }
 
 @end
diff --git a/src/objective-c/tests/InteropTests.m b/src/objective-c/tests/InteropTests.m
index 9804734..c3935ce 100644
--- a/src/objective-c/tests/InteropTests.m
+++ b/src/objective-c/tests/InteropTests.m
@@ -46,6 +46,8 @@
 #import <RxLibrary/GRXBufferedPipe.h>
 #import <RxLibrary/GRXWriter+Immediate.h>
 
+#define TEST_TIMEOUT 32
+
 // Convenience constructors for the generated proto messages:
 
 @interface RMTStreamingOutputCallRequest (Constructors)
@@ -124,7 +126,7 @@
     [expectation fulfill];
   }];
 
-  [self waitForExpectationsWithTimeout:4 handler:nil];
+  [self waitForExpectationsWithTimeout:TEST_TIMEOUT handler:nil];
 }
 
 - (void)testLargeUnaryRPC {
@@ -147,7 +149,7 @@
     [expectation fulfill];
   }];
 
-  [self waitForExpectationsWithTimeout:16 handler:nil];
+  [self waitForExpectationsWithTimeout:TEST_TIMEOUT handler:nil];
 }
 
 - (void)test4MBResponsesAreAccepted {
@@ -164,7 +166,7 @@
     [expectation fulfill];
   }];
 
-  [self waitForExpectationsWithTimeout:16 handler:nil];
+  [self waitForExpectationsWithTimeout:TEST_TIMEOUT handler:nil];
 }
 
 - (void)testResponsesOverMaxSizeFailWithActionableMessage {
@@ -185,7 +187,7 @@
     [expectation fulfill];
   }];
 
-  [self waitForExpectationsWithTimeout:16 handler:nil];
+  [self waitForExpectationsWithTimeout:TEST_TIMEOUT handler:nil];
 }
 
 - (void)testResponsesOver4MBAreAcceptedIfOptedIn {
@@ -205,7 +207,7 @@
     [expectation fulfill];
   }];
 
-  [self waitForExpectationsWithTimeout:16 handler:nil];
+  [self waitForExpectationsWithTimeout:TEST_TIMEOUT handler:nil];
 }
 
 - (void)testClientStreamingRPC {
@@ -238,7 +240,7 @@
     [expectation fulfill];
   }];
 
-  [self waitForExpectationsWithTimeout:8 handler:nil];
+  [self waitForExpectationsWithTimeout:TEST_TIMEOUT handler:nil];
 }
 
 - (void)testServerStreamingRPC {
@@ -275,7 +277,7 @@
     }
   }];
 
-  [self waitForExpectationsWithTimeout:8 handler:nil];
+  [self waitForExpectationsWithTimeout:TEST_TIMEOUT handler:nil];
 }
 
 - (void)testPingPongRPC {
@@ -319,7 +321,7 @@
       [expectation fulfill];
     }
   }];
-  [self waitForExpectationsWithTimeout:4 handler:nil];
+  [self waitForExpectationsWithTimeout:TEST_TIMEOUT handler:nil];
 }
 
 #ifndef GRPC_COMPILE_WITH_CRONET
@@ -335,7 +337,7 @@
     XCTAssert(done, @"Unexpected response: %@", response);
     [expectation fulfill];
   }];
-  [self waitForExpectationsWithTimeout:2 handler:nil];
+  [self waitForExpectationsWithTimeout:TEST_TIMEOUT handler:nil];
 }
 #endif
 
@@ -361,7 +363,7 @@
   [call cancel];
   XCTAssertEqual(call.state, GRXWriterStateFinished);
 
-  [self waitForExpectationsWithTimeout:1 handler:nil];
+  [self waitForExpectationsWithTimeout:TEST_TIMEOUT handler:nil];
 }
 
 - (void)testCancelAfterFirstResponseRPC {
@@ -396,7 +398,7 @@
     }
   }];
   [call start];
-  [self waitForExpectationsWithTimeout:8 handler:nil];
+  [self waitForExpectationsWithTimeout:TEST_TIMEOUT handler:nil];
 }
 
 - (void)testRPCAfterClosingOpenConnections {
@@ -420,7 +422,7 @@
     }];
   }];
 
-  [self waitForExpectationsWithTimeout:4 handler:nil];
+  [self waitForExpectationsWithTimeout:TEST_TIMEOUT handler:nil];
 }
 
 @end
diff --git a/test/cpp/end2end/server_crash_test.cc b/test/cpp/end2end/server_crash_test.cc
index 8cee140..b1f9216 100644
--- a/test/cpp/end2end/server_crash_test.cc
+++ b/test/cpp/end2end/server_crash_test.cc
@@ -138,7 +138,7 @@
   auto server = CreateServerAndClient("response");
 
   gpr_sleep_until(gpr_time_add(gpr_now(GPR_CLOCK_REALTIME),
-                               gpr_time_from_seconds(5, GPR_TIMESPAN)));
+                               gpr_time_from_seconds(60, GPR_TIMESPAN)));
   KillClient();
   server->Shutdown();
   GPR_ASSERT(HadOneResponseStream());
@@ -148,7 +148,7 @@
   auto server = CreateServerAndClient("bidi");
 
   gpr_sleep_until(gpr_time_add(gpr_now(GPR_CLOCK_REALTIME),
-                               gpr_time_from_seconds(5, GPR_TIMESPAN)));
+                               gpr_time_from_seconds(60, GPR_TIMESPAN)));
   KillClient();
   server->Shutdown();
   GPR_ASSERT(HadOneBidiStream());
diff --git a/test/cpp/interop/interop_server.cc b/test/cpp/interop/interop_server.cc
index 8b50ae8..67456ce 100644
--- a/test/cpp/interop/interop_server.cc
+++ b/test/cpp/interop/interop_server.cc
@@ -344,7 +344,7 @@
   }
   std::unique_ptr<Server> server(builder.BuildAndStart());
   gpr_log(GPR_INFO, "Server listening on %s", server_address.str().c_str());
-  while (!g_got_sigint) {
+  while (!gpr_atm_no_barrier_load(&g_got_sigint)) {
     sleep(5);
   }
 }
diff --git a/test/cpp/interop/interop_server_bootstrap.cc b/test/cpp/interop/interop_server_bootstrap.cc
index 424f7ca..99518c6 100644
--- a/test/cpp/interop/interop_server_bootstrap.cc
+++ b/test/cpp/interop/interop_server_bootstrap.cc
@@ -37,10 +37,10 @@
 #include "test/cpp/interop/server_helper.h"
 #include "test/cpp/util/test_config.h"
 
-bool grpc::testing::interop::g_got_sigint = false;
+gpr_atm grpc::testing::interop::g_got_sigint;
 
 static void sigint_handler(int x) {
-  grpc::testing::interop::g_got_sigint = true;
+  gpr_atm_no_barrier_store(&grpc::testing::interop::g_got_sigint, true);
 }
 
 int main(int argc, char** argv) {
diff --git a/test/cpp/interop/interop_test.cc b/test/cpp/interop/interop_test.cc
index c066598..d400474 100644
--- a/test/cpp/interop/interop_test.cc
+++ b/test/cpp/interop/interop_test.cc
@@ -126,7 +126,7 @@
     return 1;
   }
   /* wait a little */
-  sleep(2);
+  sleep(10);
   /* start the clients */
   ret = test_client(root, "127.0.0.1", port);
   if (ret != 0) return ret;
diff --git a/test/cpp/interop/server_helper.h b/test/cpp/interop/server_helper.h
index fc4ea8b..99539ad 100644
--- a/test/cpp/interop/server_helper.h
+++ b/test/cpp/interop/server_helper.h
@@ -36,9 +36,11 @@
 
 #include <memory>
 
+#include <grpc/compression.h>
+#include <grpc/impl/codegen/atm.h>
+
 #include <grpc++/security/server_credentials.h>
 #include <grpc++/server_context.h>
-#include <grpc/compression.h>
 
 namespace grpc {
 namespace testing {
@@ -62,7 +64,7 @@
 
 namespace interop {
 
-extern bool g_got_sigint;
+extern gpr_atm g_got_sigint;
 void RunServer(std::shared_ptr<ServerCredentials> creds);
 
 }  // namespace interop
diff --git a/test/cpp/microbenchmarks/bm_fullstack.cc b/test/cpp/microbenchmarks/bm_fullstack.cc
index 6cc780d..6c0bf80 100644
--- a/test/cpp/microbenchmarks/bm_fullstack.cc
+++ b/test/cpp/microbenchmarks/bm_fullstack.cc
@@ -59,7 +59,7 @@
 }
 #include "src/cpp/client/create_channel_internal.h"
 #include "src/proto/grpc/testing/echo.grpc.pb.h"
-#include "third_party/google_benchmark/include/benchmark/benchmark.h"
+#include "third_party/benchmark/include/benchmark/benchmark.h"
 
 namespace grpc {
 namespace testing {
diff --git a/test/cpp/microbenchmarks/noop-benchmark.cc b/test/cpp/microbenchmarks/noop-benchmark.cc
index 6b06c69..99fa6d5 100644
--- a/test/cpp/microbenchmarks/noop-benchmark.cc
+++ b/test/cpp/microbenchmarks/noop-benchmark.cc
@@ -31,10 +31,10 @@
  *
  */
 
-/* This benchmark exists to ensure that the google_benchmark integration is
+/* This benchmark exists to ensure that the benchmark integration is
  * working */
 
-#include "third_party/google_benchmark/include/benchmark/benchmark.h"
+#include "third_party/benchmark/include/benchmark/benchmark.h"
 
 static void BM_NoOp(benchmark::State& state) {
   while (state.KeepRunning()) {
diff --git a/test/cpp/qps/driver.cc b/test/cpp/qps/driver.cc
index ea0b38e..22b2cd0 100644
--- a/test/cpp/qps/driver.cc
+++ b/test/cpp/qps/driver.cc
@@ -101,7 +101,7 @@
 
 static deque<string> get_workers(const string& name) {
   char* env = gpr_getenv(name.c_str());
-  if (!env) return deque<string>();
+  if (!env || strlen(env) == 0) return deque<string>();
 
   deque<string> out;
   char* p = env;
diff --git a/test/cpp/qps/gen_build_yaml.py b/test/cpp/qps/gen_build_yaml.py
index 4aa58d2..188d619 100755
--- a/test/cpp/qps/gen_build_yaml.py
+++ b/test/cpp/qps/gen_build_yaml.py
@@ -91,7 +91,7 @@
       'boringssl': True,
       'defaults': 'boringssl',
       'cpu_cost': guess_cpu(scenario_json, False),
-      'exclude_configs': ['tsan'],
+      'exclude_configs': ['tsan', 'asan'],
       'timeout_seconds': 6*60
     }
     for scenario_json in scenario_config.CXXLanguage().scenarios()
@@ -99,7 +99,7 @@
   ] + [
     {
       'name': 'json_run_localhost',
-      'shortname': 'json_run_localhost:%s' % scenario_json['name'],
+      'shortname': 'json_run_localhost:%s_low_thread_count' % scenario_json['name'],
       'args': ['--scenarios_json', _scenario_json_string(scenario_json, True)],
       'ci_platforms': ['linux'],
       'platforms': ['linux'],
@@ -108,7 +108,7 @@
       'boringssl': True,
       'defaults': 'boringssl',
       'cpu_cost': guess_cpu(scenario_json, True),
-      'exclude_configs': sorted(c for c in configs_from_yaml if c != 'tsan'),
+      'exclude_configs': sorted(c for c in configs_from_yaml if c not in ('tsan', 'asan')),
       'timeout_seconds': 6*60
     }
     for scenario_json in scenario_config.CXXLanguage().scenarios()
diff --git a/third_party/google_benchmark b/third_party/benchmark
similarity index 100%
rename from third_party/google_benchmark
rename to third_party/benchmark
diff --git a/tools/buildgen/generate_build_additions.sh b/tools/buildgen/generate_build_additions.sh
index 1ea4704..53c30c7 100644
--- a/tools/buildgen/generate_build_additions.sh
+++ b/tools/buildgen/generate_build_additions.sh
@@ -30,7 +30,7 @@
 
 gen_build_yaml_dirs="  \
   src/boringssl        \
-  src/google_benchmark \
+  src/benchmark \
   src/proto            \
   src/zlib             \
   test/core/bad_client \
diff --git a/tools/gce/linux_performance_worker_init.sh b/tools/gce/linux_performance_worker_init.sh
index 523749e..ab29e01 100755
--- a/tools/gce/linux_performance_worker_init.sh
+++ b/tools/gce/linux_performance_worker_init.sh
@@ -150,3 +150,19 @@
 # Put go on the PATH, keep the usual installation dir
 sudo ln -s /usr/local/go/bin/go /usr/bin/go
 rm go$GO_VERSION.$OS-$ARCH.tar.gz
+
+# Install perf, to profile benchmarks. (need to get the right linux-tools-<> for kernel version)
+sudo apt-get install -y linux-tools-common linux-tools-generic linux-tools-`uname -r`
+# see http://unix.stackexchange.com/questions/14227/do-i-need-root-admin-permissions-to-run-userspace-perf-tool-perf-events-ar
+echo 0 | sudo tee /proc/sys/kernel/perf_event_paranoid
+# see http://stackoverflow.com/questions/21284906/perf-couldnt-record-kernel-reference-relocation-symbol
+echo 0 | sudo tee /proc/sys/kernel/kptr_restrict
+
+# qps workers under perf appear to need a lot of mmap pages under certain scenarios and perf args in
+# order to not lose perf events or time out
+echo 4096 | sudo tee /proc/sys/kernel/perf_event_mlock_kb
+
+# Fetch scripts to generate flame graphs from perf data collected
+# on benchmarks
+git clone -v https://github.com/brendangregg/FlameGraph ~/FlameGraph
+
diff --git a/tools/internal_ci/linux/run_tests.sh b/tools/internal_ci/linux/grpc_fuzzer_client.cfg
old mode 100755
new mode 100644
similarity index 82%
copy from tools/internal_ci/linux/run_tests.sh
copy to tools/internal_ci/linux/grpc_fuzzer_client.cfg
index be477c1..b1bce02
--- a/tools/internal_ci/linux/run_tests.sh
+++ b/tools/internal_ci/linux/grpc_fuzzer_client.cfg
@@ -28,18 +28,13 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-set -ex
+# Config file for the internal CI (in protobuf text format)
 
-# change to grpc repo root
-cd $(dirname $0)/../../..
-
-# TODO(jtattermusch): get rid of the system inspection eventually
-nproc || true
-lsb_release -dc || true
-gcc --version || true
-clang --version || true
-docker --version || true
-
-git submodule update --init
-
-tools/run_tests/run_tests.py -l c --build_only
+# Location of the continuous shell script in repository.
+build_file: "grpc/tools/internal_ci/linux/grpc_fuzzer_client.sh"
+timeout_mins: 1440  # 24 hours is the maximum allowed value
+action {
+  define_artifacts {
+    regex: "git/grpc/fuzzer_output/**"
+  }
+}
diff --git a/tools/internal_ci/linux/run_tests.sh b/tools/internal_ci/linux/grpc_fuzzer_client.sh
similarity index 87%
copy from tools/internal_ci/linux/run_tests.sh
copy to tools/internal_ci/linux/grpc_fuzzer_client.sh
index be477c1..f9ff13d 100755
--- a/tools/internal_ci/linux/run_tests.sh
+++ b/tools/internal_ci/linux/grpc_fuzzer_client.sh
@@ -33,13 +33,9 @@
 # change to grpc repo root
 cd $(dirname $0)/../../..
 
-# TODO(jtattermusch): get rid of the system inspection eventually
-nproc || true
-lsb_release -dc || true
-gcc --version || true
-clang --version || true
-docker --version || true
-
 git submodule update --init
 
-tools/run_tests/run_tests.py -l c --build_only
+# download fuzzer docker image from dockerhub
+export DOCKERHUB_ORGANIZATION=grpctesting
+# runtime 23 * 60 mins
+config=asan-trace-cmp runtime=82800 tools/jenkins/run_fuzzer.sh client_fuzzer
diff --git a/tools/internal_ci/linux/grpc_master.cfg b/tools/internal_ci/linux/grpc_master.cfg
index 1f81660..8ce2ef1 100644
--- a/tools/internal_ci/linux/grpc_master.cfg
+++ b/tools/internal_ci/linux/grpc_master.cfg
@@ -31,4 +31,10 @@
 # Config file for the internal CI (in protobuf text format)
 
 # Location of the continuous shell script in repository.
-build_file: "grpc/tools/internal_ci/linux/run_tests.sh"
+build_file: "grpc/tools/internal_ci/linux/grpc_master.sh"
+timeout_mins: 60
+action {
+  define_artifacts {
+    regex: "**/sponge_log.xml"
+  }
+}
diff --git a/tools/internal_ci/linux/run_tests.sh b/tools/internal_ci/linux/grpc_master.sh
similarity index 88%
rename from tools/internal_ci/linux/run_tests.sh
rename to tools/internal_ci/linux/grpc_master.sh
index be477c1..ea77d11 100755
--- a/tools/internal_ci/linux/run_tests.sh
+++ b/tools/internal_ci/linux/grpc_master.sh
@@ -42,4 +42,12 @@
 
 git submodule update --init
 
-tools/run_tests/run_tests.py -l c --build_only
+tools/run_tests/run_tests.py -l c -t -x sponge_log.xml || FAILED="true"
+
+# kill port_server.py to prevent the build from hanging
+ps aux | grep port_server\\.py | awk '{print $2}' | xargs kill -9
+
+if [ "$FAILED" != "" ]
+then
+  exit 1
+fi
diff --git a/tools/run_tests/build_python.sh b/tools/run_tests/build_python.sh
index fb884ad..7cac394 100755
--- a/tools/run_tests/build_python.sh
+++ b/tools/run_tests/build_python.sh
@@ -171,8 +171,7 @@
 }
 
 $VENV_PYTHON -m pip install --upgrade pip
-# TODO(https://github.com/pypa/setuptools/issues/709) get the latest setuptools
-$VENV_PYTHON -m pip install setuptools==25.1.1
+$VENV_PYTHON -m pip install setuptools
 $VENV_PYTHON -m pip install cython
 pip_install_dir $ROOT
 $VENV_PYTHON $ROOT/tools/distrib/python/make_grpcio_tools.py
diff --git a/tools/run_tests/jobset.py b/tools/run_tests/jobset.py
index 2acc797..1b5d6d6 100755
--- a/tools/run_tests/jobset.py
+++ b/tools/run_tests/jobset.py
@@ -139,16 +139,16 @@
       if explanatory_text:
         print(explanatory_text)
       print('%s: %s' % (tag, msg))
-      return
-    sys.stdout.write('%s%s%s\x1b[%d;%dm%s\x1b[0m: %s%s' % (
-        _BEGINNING_OF_LINE,
-        _CLEAR_LINE,
-        '\n%s' % explanatory_text if explanatory_text is not None else '',
-        _COLORS[_TAG_COLOR[tag]][1],
-        _COLORS[_TAG_COLOR[tag]][0],
-        tag,
-        msg,
-        '\n' if do_newline or explanatory_text is not None else ''))
+    else:
+      sys.stdout.write('%s%s%s\x1b[%d;%dm%s\x1b[0m: %s%s' % (
+          _BEGINNING_OF_LINE,
+          _CLEAR_LINE,
+          '\n%s' % explanatory_text if explanatory_text is not None else '',
+          _COLORS[_TAG_COLOR[tag]][1],
+          _COLORS[_TAG_COLOR[tag]][0],
+          tag,
+          msg,
+          '\n' if do_newline or explanatory_text is not None else ''))
     sys.stdout.flush()
   except:
     pass
@@ -406,7 +406,7 @@
         self.resultset[job.GetSpec().shortname].append(job.result)
         self._running.remove(job)
       if dead: return
-      if (not self._travis):
+      if not self._travis and platform_string() != 'windows':
         rstr = '' if self._remaining is None else '%d queued, ' % self._remaining
         if self._remaining is not None and self._completed > 0:
           now = time.time()
diff --git a/tools/internal_ci/linux/run_tests.sh b/tools/run_tests/performance/process_local_perf_flamegraphs.sh
similarity index 79%
copy from tools/internal_ci/linux/run_tests.sh
copy to tools/run_tests/performance/process_local_perf_flamegraphs.sh
index be477c1..d15610f 100755
--- a/tools/internal_ci/linux/run_tests.sh
+++ b/tools/run_tests/performance/process_local_perf_flamegraphs.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright 2016, Google Inc.
+# Copyright 2015, Google Inc.
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -28,18 +28,13 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-set -ex
+mkdir -p $OUTPUT_DIR
 
-# change to grpc repo root
-cd $(dirname $0)/../../..
+PERF_DATA_FILE=${PERF_BASE_NAME}-perf.data
+PERF_SCRIPT_OUTPUT=${PERF_BASE_NAME}-out.perf
 
-# TODO(jtattermusch): get rid of the system inspection eventually
-nproc || true
-lsb_release -dc || true
-gcc --version || true
-clang --version || true
-docker --version || true
+# Generate Flame graphs
+echo "running perf script on $PERF_DATA_FILE"
+perf script -i $PERF_DATA_FILE > $PERF_SCRIPT_OUTPUT
 
-git submodule update --init
-
-tools/run_tests/run_tests.py -l c --build_only
+~/FlameGraph/stackcollapse-perf.pl $PERF_SCRIPT_OUTPUT | ~/FlameGraph/flamegraph.pl > ${OUTPUT_DIR}/${OUTPUT_FILENAME}.svg
diff --git a/src/google_benchmark/gen_build_yaml.py b/tools/run_tests/performance/process_remote_perf_flamegraphs.sh
similarity index 71%
copy from src/google_benchmark/gen_build_yaml.py
copy to tools/run_tests/performance/process_remote_perf_flamegraphs.sh
index 302e087..cc07535 100755
--- a/src/google_benchmark/gen_build_yaml.py
+++ b/tools/run_tests/performance/process_remote_perf_flamegraphs.sh
@@ -1,5 +1,4 @@
-#!/usr/bin/env python2.7
-
+#!/bin/bash
 # Copyright 2015, Google Inc.
 # All rights reserved.
 #
@@ -29,25 +28,17 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import os
-import sys
-import glob
-import yaml
+mkdir -p $OUTPUT_DIR
 
-os.chdir(os.path.dirname(sys.argv[0])+'/../..')
+PERF_DATA_FILE=${PERF_BASE_NAME}-perf.data
+PERF_SCRIPT_OUTPUT=${PERF_BASE_NAME}-out.perf
 
-out = {}
+# Generate Flame graphs
+echo "running perf script on $USER_AT_HOST with perf.data"
+ssh $USER_AT_HOST "cd ~/performance_workspace/grpc && perf script -i $PERF_DATA_FILE | gzip > ${PERF_SCRIPT_OUTPUT}.gz"
 
-out['libs'] = [{
-    'name': 'google_benchmark',
-    'build': 'private',
-    'language': 'c++',
-    'secure': 'no',
-    'defaults': 'google_benchmark',
-    'src': sorted(glob.glob('third_party/google_benchmark/src/*.cc')),
-    'headers': sorted(
-        glob.glob('third_party/google_benchmark/src/*.h') +
-        glob.glob('third_party/google_benchmark/include/benchmark/*.h')),
-}]
+scp $USER_AT_HOST:~/performance_workspace/grpc/$PERF_SCRIPT_OUTPUT.gz .
 
-print yaml.dump(out)
+gzip -d -f $PERF_SCRIPT_OUTPUT.gz
+
+~/FlameGraph/stackcollapse-perf.pl --kernel $PERF_SCRIPT_OUTPUT | ~/FlameGraph/flamegraph.pl --color=java --hash > ${OUTPUT_DIR}/${OUTPUT_FILENAME}.svg
diff --git a/tools/run_tests/report_utils.py b/tools/run_tests/report_utils.py
index 90055e3..5ce2a87 100644
--- a/tools/run_tests/report_utils.py
+++ b/tools/run_tests/report_utils.py
@@ -122,3 +122,10 @@
   except:
     print(exceptions.text_error_template().render())
     raise
+
+def render_perf_profiling_results(output_filepath, profile_names):
+  with open(output_filepath, 'w') as output_file:
+    output_file.write('<ul>\n')
+    for name in profile_names:
+      output_file.write('<li><a href=%s>%s</a></li>\n' % (name, name))
+    output_file.write('</ul>\n')
diff --git a/tools/run_tests/run_performance_tests.py b/tools/run_tests/run_performance_tests.py
index 1d0c98f..69ccff8 100755
--- a/tools/run_tests/run_performance_tests.py
+++ b/tools/run_tests/run_performance_tests.py
@@ -49,6 +49,7 @@
 import time
 import traceback
 import uuid
+import report_utils
 
 
 _ROOT = os.path.abspath(os.path.join(os.path.dirname(sys.argv[0]), '../..'))
@@ -57,15 +58,18 @@
 
 _REMOTE_HOST_USERNAME = 'jenkins'
 
+_PERF_REPORT_OUTPUT_DIR = 'perf_reports'
+
 
 class QpsWorkerJob:
   """Encapsulates a qps worker server job."""
 
-  def __init__(self, spec, language, host_and_port):
+  def __init__(self, spec, language, host_and_port, perf_file_base_name=None):
     self._spec = spec
     self.language = language
     self.host_and_port = host_and_port
     self._job = None
+    self.perf_file_base_name = perf_file_base_name
 
   def start(self):
     self._job = jobset.Job(self._spec, newline_on_success=True, travis=True, add_env={})
@@ -80,24 +84,32 @@
       self._job = None
 
 
-def create_qpsworker_job(language, shortname=None,
-                         port=10000, remote_host=None):
-  cmdline = language.worker_cmdline() + ['--driver_port=%s' % port]
+def create_qpsworker_job(language, shortname=None, port=10000, remote_host=None, perf_cmd=None):
+  cmdline = (language.worker_cmdline() + ['--driver_port=%s' % port])
+
   if remote_host:
-    user_at_host = '%s@%s' % (_REMOTE_HOST_USERNAME, remote_host)
-    cmdline = ['ssh',
-               str(user_at_host),
-               'cd ~/performance_workspace/grpc/ && %s' % ' '.join(cmdline)]
     host_and_port='%s:%s' % (remote_host, port)
   else:
     host_and_port='localhost:%s' % port
 
+  perf_file_base_name = None
+  if perf_cmd:
+    perf_file_base_name = '%s-%s' % (host_and_port, shortname)
+    # specify -o output file so perf.data gets collected when worker stopped
+    cmdline = perf_cmd + ['-o', '%s-perf.data' % perf_file_base_name] + cmdline
+
+  if remote_host:
+    user_at_host = '%s@%s' % (_REMOTE_HOST_USERNAME, remote_host)
+    ssh_cmd = ['ssh']
+    ssh_cmd.extend([str(user_at_host), 'cd ~/performance_workspace/grpc/ && %s' % ' '.join(cmdline)])
+    cmdline = ssh_cmd
+
   jobspec = jobset.JobSpec(
       cmdline=cmdline,
       shortname=shortname,
       timeout_seconds=5*60,  # workers get restarted after each scenario
       verbose_success=True)
-  return QpsWorkerJob(jobspec, language, host_and_port)
+  return QpsWorkerJob(jobspec, language, host_and_port, perf_file_base_name)
 
 
 def create_scenario_jobspec(scenario_json, workers, remote_host=None,
@@ -259,7 +271,7 @@
     sys.exit(1)
 
 
-def create_qpsworkers(languages, worker_hosts):
+def create_qpsworkers(languages, worker_hosts, perf_cmd=None):
   """Creates QPS workers (but does not start them)."""
   if not worker_hosts:
     # run two workers locally (for each language)
@@ -275,11 +287,32 @@
                                shortname= 'qps_worker_%s_%s' % (language,
                                                                 worker_idx),
                                port=worker[1] + language.worker_port_offset(),
-                               remote_host=worker[0])
+                               remote_host=worker[0],
+                               perf_cmd=perf_cmd)
           for language in languages
           for worker_idx, worker in enumerate(workers)]
 
 
+def perf_report_processor_job(worker_host, perf_base_name, output_filename):
+  print('Creating perf report collection job for %s' % worker_host)
+  cmd = ''
+  if worker_host != 'localhost':
+    user_at_host = "%s@%s" % (_REMOTE_HOST_USERNAME, worker_host)
+    cmd = "USER_AT_HOST=%s OUTPUT_FILENAME=%s OUTPUT_DIR=%s PERF_BASE_NAME=%s\
+         tools/run_tests/performance/process_remote_perf_flamegraphs.sh" \
+          % (user_at_host, output_filename, _PERF_REPORT_OUTPUT_DIR, perf_base_name)
+  else:
+    cmd = "OUTPUT_FILENAME=%s OUTPUT_DIR=%s PERF_BASE_NAME=%s\
+          tools/run_tests/performance/process_local_perf_flamegraphs.sh" \
+          % (output_filename, _PERF_REPORT_OUTPUT_DIR, perf_base_name)
+
+  return jobset.JobSpec(cmdline=cmd,
+                        timeout_seconds=3*60,
+                        shell=True,
+                        verbose_success=True,
+                        shortname='process perf report')
+
+
 Scenario = collections.namedtuple('Scenario', 'jobspec workers name')
 
 
@@ -372,6 +405,31 @@
   print('All QPS workers finished.')
   return num_killed
 
+profile_output_files = []
+
+# Collect perf text reports and flamegraphs if perf_cmd was used
+# Note the base names of perf text reports are used when creating and processing
+# perf data. The scenario name uniqifies the output name in the final
+# perf reports directory. 
+# Alos, the perf profiles need to be fetched and processed after each scenario
+# in order to avoid clobbering the output files.
+def run_collect_perf_profile_jobs(hosts_and_base_names, scenario_name):
+  perf_report_jobs = []
+  global profile_output_files
+  for host_and_port in hosts_and_base_names:
+    perf_base_name = hosts_and_base_names[host_and_port]
+    output_filename = '%s-%s' % (scenario_name, perf_base_name)
+    # from the base filename, create .svg output filename
+    host = host_and_port.split(':')[0]
+    profile_output_files.append('%s.svg' % output_filename)
+    perf_report_jobs.append(perf_report_processor_job(host, perf_base_name, output_filename))
+
+  jobset.message('START', 'Collecting perf reports from qps workers', do_newline=True)
+  failures, _ = jobset.run(perf_report_jobs, newline_on_success=True, maxjobs=1)
+  jobset.message('END', 'Collecting perf reports from qps workers', do_newline=True)
+  return failures
+
+
 argp = argparse.ArgumentParser(description='Run performance tests.')
 argp.add_argument('-l', '--language',
                   choices=['all'] + sorted(scenario_config.LANGUAGES.keys()),
@@ -405,6 +463,33 @@
                   help='Run netperf benchmark as one of the scenarios.')
 argp.add_argument('-x', '--xml_report', default='report.xml', type=str,
                   help='Name of XML report file to generate.')
+argp.add_argument('--perf_args',
+                  help=('Example usage: "--perf_args=record -F 99 -g". '
+                        'Wrap QPS workers in a perf command '
+                        'with the arguments to perf specified here. '
+                        '".svg" flame graph profiles will be '
+                        'created for each Qps Worker on each scenario. '
+                        'Files will output to "<repo_root>/perf_reports" '
+                        'directory. Output files from running the worker '
+                        'under perf are saved in the repo root where its ran. '
+                        'Note that the perf "-g" flag is necessary for '
+                        'flame graphs generation to work (assuming the binary '
+                        'being profiled uses frame pointers, check out '
+                        '"--call-graph dwarf" option using libunwind otherwise.) '
+                        'Also note that the entire "--perf_args=<arg(s)>" must '
+                        'be wrapped in quotes as in the example usage. '
+                        'If the "--perg_args" is unspecified, "perf" will '
+                        'not be used at all. '
+                        'See http://www.brendangregg.com/perf.html '
+                        'for more general perf examples.'))
+argp.add_argument('--skip_generate_flamegraphs',
+                  default=False,
+                  action='store_const',
+                  const=True,
+                  help=('Turn flame graph generation off. '
+                        'May be useful if "perf_args" arguments do not make sense for '
+                        'generating flamegraphs (e.g., "--perf_args=stat ...")'))
+
 
 args = argp.parse_args()
 
@@ -435,7 +520,13 @@
 if not args.dry_run:
   build_on_remote_hosts(remote_hosts, languages=[str(l) for l in languages], build_local=build_local)
 
-qpsworker_jobs = create_qpsworkers(languages, args.remote_worker_host)
+perf_cmd = None
+if args.perf_args:
+  # Expect /usr/bin/perf to be installed here, as is usual
+  perf_cmd = ['/usr/bin/perf'] 
+  perf_cmd.extend(re.split('\s+', args.perf_args))
+
+qpsworker_jobs = create_qpsworkers(languages, args.remote_worker_host, perf_cmd=perf_cmd)
 
 # get list of worker addresses for each language.
 workers_by_lang = dict([(str(language), []) for language in languages])
@@ -457,16 +548,20 @@
 total_scenario_failures = 0
 qps_workers_killed = 0
 merged_resultset = {}
+perf_report_failures = 0
+
 for scenario in scenarios:
   if args.dry_run:
     print(scenario.name)
   else:
+    scenario_failures = 0
     try:
       for worker in scenario.workers:
         worker.start()
-      scenario_failures, resultset = jobset.run([scenario.jobspec,
-                                                create_quit_jobspec(scenario.workers, remote_host=args.remote_driver_host)],
-                                                newline_on_success=True, maxjobs=1)
+      jobs = [scenario.jobspec]
+      if scenario.workers:
+        jobs.append(create_quit_jobspec(scenario.workers, remote_host=args.remote_driver_host))
+      scenario_failures, resultset = jobset.run(jobs, newline_on_success=True, maxjobs=1)
       total_scenario_failures += scenario_failures
       merged_resultset = dict(itertools.chain(merged_resultset.iteritems(),
                                               resultset.iteritems()))
@@ -474,10 +569,27 @@
       # Consider qps workers that need to be killed as failures
       qps_workers_killed += finish_qps_workers(scenario.workers)
 
+    if perf_cmd and scenario_failures == 0 and not args.skip_generate_flamegraphs:
+      workers_and_base_names = {}
+      for worker in scenario.workers:
+        if not worker.perf_file_base_name:
+          raise Exception('using perf buf perf report filename is unspecified')
+        workers_and_base_names[worker.host_and_port] = worker.perf_file_base_name
+      perf_report_failures += run_collect_perf_profile_jobs(workers_and_base_names, scenario.name)
+
+
+# Still write the index.html even if some scenarios failed.
+# 'profile_output_files' will only have names for scenarios that passed
+if perf_cmd and not args.skip_generate_flamegraphs:
+  # write the index fil to the output dir, with all profiles from all scenarios/workers
+  report_utils.render_perf_profiling_results('%s/index.html' % _PERF_REPORT_OUTPUT_DIR, profile_output_files)
+
+if total_scenario_failures > 0 or qps_workers_killed > 0:
+  print('%s scenarios failed and %s qps worker jobs killed' % (total_scenario_failures, qps_workers_killed))
+  sys.exit(1)
 
 report_utils.render_junit_xml_report(merged_resultset, args.xml_report,
                                      suite_name='benchmarks')
-
-if total_scenario_failures > 0 or qps_workers_killed > 0:
-  print ("%s scenarios failed and %s qps worker jobs killed" % (total_scenario_failures, qps_workers_killed))
+if perf_report_failures > 0:
+  print('%s perf profile collection jobs failed' % perf_report_failures)
   sys.exit(1)
diff --git a/tools/run_tests/run_tests.py b/tools/run_tests/run_tests.py
index 1544ff3..c49ee4a 100755
--- a/tools/run_tests/run_tests.py
+++ b/tools/run_tests/run_tests.py
@@ -511,7 +511,7 @@
         config.run,
         timeout_seconds=5*60,
         environ=dict(list(environment.items()) +
-                     [('GRPC_PYTHON_TESTRUNNER_FILTER', suite_name)]),
+                     [('GRPC_PYTHON_TESTRUNNER_FILTER', str(suite_name))]),
         shortname='%s.test.%s' % (config.name, suite_name),)
         for suite_name in tests_json
         for config in self.pythons]
diff --git a/tools/run_tests/sanity/check_submodules.sh b/tools/run_tests/sanity/check_submodules.sh
index 6ec0786..be12f96 100755
--- a/tools/run_tests/sanity/check_submodules.sh
+++ b/tools/run_tests/sanity/check_submodules.sh
@@ -43,7 +43,7 @@
 cat << EOF | awk '{ print $1 }' | sort > $want_submodules
  c880e42ba1c8032d4cdde2aba0541d8a9d9fa2e9 third_party/boringssl (version_for_cocoapods_2.0-100-gc880e42)
  05b155ff59114735ec8cd089f669c4c3d8f59029 third_party/gflags (v2.1.0-45-g05b155f)
- 44c25c892a6229b20db7cd9dc05584ea865896de third_party/google_benchmark (v0.1.0-343-g44c25c8)
+ 44c25c892a6229b20db7cd9dc05584ea865896de third_party/benchmark (v0.1.0-343-g44c25c8)
  c99458533a9b4c743ed51537e25989ea55944908 third_party/googletest (release-1.7.0)
  a428e42072765993ff674fda72863c9f1aa2d268 third_party/protobuf (v3.1.0)
  50893291621658f355bc5b4d450a8d06a563053d third_party/zlib (v1.2.8)
diff --git a/tools/run_tests/sources_and_headers.json b/tools/run_tests/sources_and_headers.json
index 2e6877c..6ae269c 100644
--- a/tools/run_tests/sources_and_headers.json
+++ b/tools/run_tests/sources_and_headers.json
@@ -2263,7 +2263,7 @@
   }, 
   {
     "deps": [
-      "google_benchmark", 
+      "benchmark", 
       "gpr", 
       "gpr_test_util", 
       "grpc", 
@@ -2913,7 +2913,7 @@
   }, 
   {
     "deps": [
-      "google_benchmark"
+      "benchmark"
     ], 
     "headers": [], 
     "is_filegroup": false, 
@@ -6207,30 +6207,30 @@
   {
     "deps": [], 
     "headers": [
-      "third_party/google_benchmark/include/benchmark/benchmark.h", 
-      "third_party/google_benchmark/include/benchmark/benchmark_api.h", 
-      "third_party/google_benchmark/include/benchmark/macros.h", 
-      "third_party/google_benchmark/include/benchmark/reporter.h", 
-      "third_party/google_benchmark/src/arraysize.h", 
-      "third_party/google_benchmark/src/benchmark_api_internal.h", 
-      "third_party/google_benchmark/src/check.h", 
-      "third_party/google_benchmark/src/colorprint.h", 
-      "third_party/google_benchmark/src/commandlineflags.h", 
-      "third_party/google_benchmark/src/complexity.h", 
-      "third_party/google_benchmark/src/cycleclock.h", 
-      "third_party/google_benchmark/src/internal_macros.h", 
-      "third_party/google_benchmark/src/log.h", 
-      "third_party/google_benchmark/src/mutex.h", 
-      "third_party/google_benchmark/src/re.h", 
-      "third_party/google_benchmark/src/sleep.h", 
-      "third_party/google_benchmark/src/stat.h", 
-      "third_party/google_benchmark/src/string_util.h", 
-      "third_party/google_benchmark/src/sysinfo.h", 
-      "third_party/google_benchmark/src/timers.h"
+      "third_party/benchmark/include/benchmark/benchmark.h", 
+      "third_party/benchmark/include/benchmark/benchmark_api.h", 
+      "third_party/benchmark/include/benchmark/macros.h", 
+      "third_party/benchmark/include/benchmark/reporter.h", 
+      "third_party/benchmark/src/arraysize.h", 
+      "third_party/benchmark/src/benchmark_api_internal.h", 
+      "third_party/benchmark/src/check.h", 
+      "third_party/benchmark/src/colorprint.h", 
+      "third_party/benchmark/src/commandlineflags.h", 
+      "third_party/benchmark/src/complexity.h", 
+      "third_party/benchmark/src/cycleclock.h", 
+      "third_party/benchmark/src/internal_macros.h", 
+      "third_party/benchmark/src/log.h", 
+      "third_party/benchmark/src/mutex.h", 
+      "third_party/benchmark/src/re.h", 
+      "third_party/benchmark/src/sleep.h", 
+      "third_party/benchmark/src/stat.h", 
+      "third_party/benchmark/src/string_util.h", 
+      "third_party/benchmark/src/sysinfo.h", 
+      "third_party/benchmark/src/timers.h"
     ], 
     "is_filegroup": false, 
     "language": "c++", 
-    "name": "google_benchmark", 
+    "name": "benchmark", 
     "src": [], 
     "third_party": false, 
     "type": "lib"
diff --git a/tools/run_tests/tests.json b/tools/run_tests/tests.json
index c4bfd0a..b76263b 100644
--- a/tools/run_tests/tests.json
+++ b/tools/run_tests/tests.json
@@ -36765,7 +36765,8 @@
     "cpu_cost": 2, 
     "defaults": "boringssl", 
     "exclude_configs": [
-      "tsan"
+      "tsan", 
+      "asan"
     ], 
     "flaky": false, 
     "language": "c++", 
@@ -36788,7 +36789,8 @@
     "cpu_cost": "capacity", 
     "defaults": "boringssl", 
     "exclude_configs": [
-      "tsan"
+      "tsan", 
+      "asan"
     ], 
     "flaky": false, 
     "language": "c++", 
@@ -36811,7 +36813,8 @@
     "cpu_cost": "capacity", 
     "defaults": "boringssl", 
     "exclude_configs": [
-      "tsan"
+      "tsan", 
+      "asan"
     ], 
     "flaky": false, 
     "language": "c++", 
@@ -36834,7 +36837,8 @@
     "cpu_cost": "capacity", 
     "defaults": "boringssl", 
     "exclude_configs": [
-      "tsan"
+      "tsan", 
+      "asan"
     ], 
     "flaky": false, 
     "language": "c++", 
@@ -36857,7 +36861,8 @@
     "cpu_cost": "capacity", 
     "defaults": "boringssl", 
     "exclude_configs": [
-      "tsan"
+      "tsan", 
+      "asan"
     ], 
     "flaky": false, 
     "language": "c++", 
@@ -36880,7 +36885,8 @@
     "cpu_cost": 2, 
     "defaults": "boringssl", 
     "exclude_configs": [
-      "tsan"
+      "tsan", 
+      "asan"
     ], 
     "flaky": false, 
     "language": "c++", 
@@ -36903,7 +36909,8 @@
     "cpu_cost": 1024, 
     "defaults": "boringssl", 
     "exclude_configs": [
-      "tsan"
+      "tsan", 
+      "asan"
     ], 
     "flaky": false, 
     "language": "c++", 
@@ -36926,7 +36933,8 @@
     "cpu_cost": 1024, 
     "defaults": "boringssl", 
     "exclude_configs": [
-      "tsan"
+      "tsan", 
+      "asan"
     ], 
     "flaky": false, 
     "language": "c++", 
@@ -36949,7 +36957,8 @@
     "cpu_cost": 2, 
     "defaults": "boringssl", 
     "exclude_configs": [
-      "tsan"
+      "tsan", 
+      "asan"
     ], 
     "flaky": false, 
     "language": "c++", 
@@ -36972,7 +36981,8 @@
     "cpu_cost": "capacity", 
     "defaults": "boringssl", 
     "exclude_configs": [
-      "tsan"
+      "tsan", 
+      "asan"
     ], 
     "flaky": false, 
     "language": "c++", 
@@ -36995,7 +37005,8 @@
     "cpu_cost": "capacity", 
     "defaults": "boringssl", 
     "exclude_configs": [
-      "tsan"
+      "tsan", 
+      "asan"
     ], 
     "flaky": false, 
     "language": "c++", 
@@ -37018,7 +37029,8 @@
     "cpu_cost": 2, 
     "defaults": "boringssl", 
     "exclude_configs": [
-      "tsan"
+      "tsan", 
+      "asan"
     ], 
     "flaky": false, 
     "language": "c++", 
@@ -37041,7 +37053,8 @@
     "cpu_cost": 1024, 
     "defaults": "boringssl", 
     "exclude_configs": [
-      "tsan"
+      "tsan", 
+      "asan"
     ], 
     "flaky": false, 
     "language": "c++", 
@@ -37064,7 +37077,8 @@
     "cpu_cost": 1024, 
     "defaults": "boringssl", 
     "exclude_configs": [
-      "tsan"
+      "tsan", 
+      "asan"
     ], 
     "flaky": false, 
     "language": "c++", 
@@ -37087,7 +37101,8 @@
     "cpu_cost": 2, 
     "defaults": "boringssl", 
     "exclude_configs": [
-      "tsan"
+      "tsan", 
+      "asan"
     ], 
     "flaky": false, 
     "language": "c++", 
@@ -37110,7 +37125,8 @@
     "cpu_cost": "capacity", 
     "defaults": "boringssl", 
     "exclude_configs": [
-      "tsan"
+      "tsan", 
+      "asan"
     ], 
     "flaky": false, 
     "language": "c++", 
@@ -37133,7 +37149,8 @@
     "cpu_cost": "capacity", 
     "defaults": "boringssl", 
     "exclude_configs": [
-      "tsan"
+      "tsan", 
+      "asan"
     ], 
     "flaky": false, 
     "language": "c++", 
@@ -37156,7 +37173,8 @@
     "cpu_cost": 2, 
     "defaults": "boringssl", 
     "exclude_configs": [
-      "tsan"
+      "tsan", 
+      "asan"
     ], 
     "flaky": false, 
     "language": "c++", 
@@ -37179,7 +37197,8 @@
     "cpu_cost": "capacity", 
     "defaults": "boringssl", 
     "exclude_configs": [
-      "tsan"
+      "tsan", 
+      "asan"
     ], 
     "flaky": false, 
     "language": "c++", 
@@ -37202,7 +37221,8 @@
     "cpu_cost": "capacity", 
     "defaults": "boringssl", 
     "exclude_configs": [
-      "tsan"
+      "tsan", 
+      "asan"
     ], 
     "flaky": false, 
     "language": "c++", 
@@ -37225,7 +37245,8 @@
     "cpu_cost": "capacity", 
     "defaults": "boringssl", 
     "exclude_configs": [
-      "tsan"
+      "tsan", 
+      "asan"
     ], 
     "flaky": false, 
     "language": "c++", 
@@ -37248,7 +37269,8 @@
     "cpu_cost": "capacity", 
     "defaults": "boringssl", 
     "exclude_configs": [
-      "tsan"
+      "tsan", 
+      "asan"
     ], 
     "flaky": false, 
     "language": "c++", 
@@ -37271,7 +37293,8 @@
     "cpu_cost": 2, 
     "defaults": "boringssl", 
     "exclude_configs": [
-      "tsan"
+      "tsan", 
+      "asan"
     ], 
     "flaky": false, 
     "language": "c++", 
@@ -37294,7 +37317,8 @@
     "cpu_cost": 1024, 
     "defaults": "boringssl", 
     "exclude_configs": [
-      "tsan"
+      "tsan", 
+      "asan"
     ], 
     "flaky": false, 
     "language": "c++", 
@@ -37317,7 +37341,8 @@
     "cpu_cost": 1024, 
     "defaults": "boringssl", 
     "exclude_configs": [
-      "tsan"
+      "tsan", 
+      "asan"
     ], 
     "flaky": false, 
     "language": "c++", 
@@ -37340,7 +37365,8 @@
     "cpu_cost": 2, 
     "defaults": "boringssl", 
     "exclude_configs": [
-      "tsan"
+      "tsan", 
+      "asan"
     ], 
     "flaky": false, 
     "language": "c++", 
@@ -37363,7 +37389,8 @@
     "cpu_cost": "capacity", 
     "defaults": "boringssl", 
     "exclude_configs": [
-      "tsan"
+      "tsan", 
+      "asan"
     ], 
     "flaky": false, 
     "language": "c++", 
@@ -37386,7 +37413,8 @@
     "cpu_cost": "capacity", 
     "defaults": "boringssl", 
     "exclude_configs": [
-      "tsan"
+      "tsan", 
+      "asan"
     ], 
     "flaky": false, 
     "language": "c++", 
@@ -37409,7 +37437,8 @@
     "cpu_cost": 2, 
     "defaults": "boringssl", 
     "exclude_configs": [
-      "tsan"
+      "tsan", 
+      "asan"
     ], 
     "flaky": false, 
     "language": "c++", 
@@ -37432,7 +37461,8 @@
     "cpu_cost": 1024, 
     "defaults": "boringssl", 
     "exclude_configs": [
-      "tsan"
+      "tsan", 
+      "asan"
     ], 
     "flaky": false, 
     "language": "c++", 
@@ -37455,7 +37485,8 @@
     "cpu_cost": 1024, 
     "defaults": "boringssl", 
     "exclude_configs": [
-      "tsan"
+      "tsan", 
+      "asan"
     ], 
     "flaky": false, 
     "language": "c++", 
@@ -37478,7 +37509,8 @@
     "cpu_cost": 2, 
     "defaults": "boringssl", 
     "exclude_configs": [
-      "tsan"
+      "tsan", 
+      "asan"
     ], 
     "flaky": false, 
     "language": "c++", 
@@ -37501,7 +37533,8 @@
     "cpu_cost": "capacity", 
     "defaults": "boringssl", 
     "exclude_configs": [
-      "tsan"
+      "tsan", 
+      "asan"
     ], 
     "flaky": false, 
     "language": "c++", 
@@ -37524,7 +37557,8 @@
     "cpu_cost": "capacity", 
     "defaults": "boringssl", 
     "exclude_configs": [
-      "tsan"
+      "tsan", 
+      "asan"
     ], 
     "flaky": false, 
     "language": "c++", 
@@ -37547,7 +37581,6 @@
     "cpu_cost": 2, 
     "defaults": "boringssl", 
     "exclude_configs": [
-      "asan", 
       "asan-noleaks", 
       "asan-trace-cmp", 
       "basicprof", 
@@ -37570,7 +37603,7 @@
     "platforms": [
       "linux"
     ], 
-    "shortname": "json_run_localhost:cpp_generic_async_streaming_ping_pong_secure", 
+    "shortname": "json_run_localhost:cpp_generic_async_streaming_ping_pong_secure_low_thread_count", 
     "timeout_seconds": 360
   }, 
   {
@@ -37585,7 +37618,6 @@
     "cpu_cost": "capacity", 
     "defaults": "boringssl", 
     "exclude_configs": [
-      "asan", 
       "asan-noleaks", 
       "asan-trace-cmp", 
       "basicprof", 
@@ -37608,7 +37640,7 @@
     "platforms": [
       "linux"
     ], 
-    "shortname": "json_run_localhost:cpp_generic_async_streaming_qps_unconstrained_secure", 
+    "shortname": "json_run_localhost:cpp_generic_async_streaming_qps_unconstrained_secure_low_thread_count", 
     "timeout_seconds": 360
   }, 
   {
@@ -37623,7 +37655,6 @@
     "cpu_cost": "capacity", 
     "defaults": "boringssl", 
     "exclude_configs": [
-      "asan", 
       "asan-noleaks", 
       "asan-trace-cmp", 
       "basicprof", 
@@ -37646,7 +37677,7 @@
     "platforms": [
       "linux"
     ], 
-    "shortname": "json_run_localhost:cpp_generic_async_streaming_qps_one_server_core_secure", 
+    "shortname": "json_run_localhost:cpp_generic_async_streaming_qps_one_server_core_secure_low_thread_count", 
     "timeout_seconds": 360
   }, 
   {
@@ -37661,7 +37692,6 @@
     "cpu_cost": "capacity", 
     "defaults": "boringssl", 
     "exclude_configs": [
-      "asan", 
       "asan-noleaks", 
       "asan-trace-cmp", 
       "basicprof", 
@@ -37684,7 +37714,7 @@
     "platforms": [
       "linux"
     ], 
-    "shortname": "json_run_localhost:cpp_protobuf_async_client_sync_server_unary_qps_unconstrained_secure", 
+    "shortname": "json_run_localhost:cpp_protobuf_async_client_sync_server_unary_qps_unconstrained_secure_low_thread_count", 
     "timeout_seconds": 360
   }, 
   {
@@ -37699,7 +37729,6 @@
     "cpu_cost": "capacity", 
     "defaults": "boringssl", 
     "exclude_configs": [
-      "asan", 
       "asan-noleaks", 
       "asan-trace-cmp", 
       "basicprof", 
@@ -37722,7 +37751,7 @@
     "platforms": [
       "linux"
     ], 
-    "shortname": "json_run_localhost:cpp_protobuf_async_client_sync_server_streaming_qps_unconstrained_secure", 
+    "shortname": "json_run_localhost:cpp_protobuf_async_client_sync_server_streaming_qps_unconstrained_secure_low_thread_count", 
     "timeout_seconds": 360
   }, 
   {
@@ -37737,7 +37766,6 @@
     "cpu_cost": 2, 
     "defaults": "boringssl", 
     "exclude_configs": [
-      "asan", 
       "asan-noleaks", 
       "asan-trace-cmp", 
       "basicprof", 
@@ -37760,7 +37788,7 @@
     "platforms": [
       "linux"
     ], 
-    "shortname": "json_run_localhost:cpp_protobuf_sync_unary_ping_pong_secure", 
+    "shortname": "json_run_localhost:cpp_protobuf_sync_unary_ping_pong_secure_low_thread_count", 
     "timeout_seconds": 360
   }, 
   {
@@ -37775,7 +37803,6 @@
     "cpu_cost": 64, 
     "defaults": "boringssl", 
     "exclude_configs": [
-      "asan", 
       "asan-noleaks", 
       "asan-trace-cmp", 
       "basicprof", 
@@ -37798,7 +37825,7 @@
     "platforms": [
       "linux"
     ], 
-    "shortname": "json_run_localhost:cpp_protobuf_sync_unary_qps_unconstrained_secure", 
+    "shortname": "json_run_localhost:cpp_protobuf_sync_unary_qps_unconstrained_secure_low_thread_count", 
     "timeout_seconds": 360
   }, 
   {
@@ -37813,7 +37840,6 @@
     "cpu_cost": 64, 
     "defaults": "boringssl", 
     "exclude_configs": [
-      "asan", 
       "asan-noleaks", 
       "asan-trace-cmp", 
       "basicprof", 
@@ -37836,7 +37862,7 @@
     "platforms": [
       "linux"
     ], 
-    "shortname": "json_run_localhost:cpp_protobuf_sync_unary_qps_unconstrained_secure_500kib_resource_quota", 
+    "shortname": "json_run_localhost:cpp_protobuf_sync_unary_qps_unconstrained_secure_500kib_resource_quota_low_thread_count", 
     "timeout_seconds": 360
   }, 
   {
@@ -37851,7 +37877,6 @@
     "cpu_cost": 2, 
     "defaults": "boringssl", 
     "exclude_configs": [
-      "asan", 
       "asan-noleaks", 
       "asan-trace-cmp", 
       "basicprof", 
@@ -37874,7 +37899,7 @@
     "platforms": [
       "linux"
     ], 
-    "shortname": "json_run_localhost:cpp_protobuf_async_unary_ping_pong_secure", 
+    "shortname": "json_run_localhost:cpp_protobuf_async_unary_ping_pong_secure_low_thread_count", 
     "timeout_seconds": 360
   }, 
   {
@@ -37889,7 +37914,6 @@
     "cpu_cost": "capacity", 
     "defaults": "boringssl", 
     "exclude_configs": [
-      "asan", 
       "asan-noleaks", 
       "asan-trace-cmp", 
       "basicprof", 
@@ -37912,7 +37936,7 @@
     "platforms": [
       "linux"
     ], 
-    "shortname": "json_run_localhost:cpp_protobuf_async_unary_qps_unconstrained_secure", 
+    "shortname": "json_run_localhost:cpp_protobuf_async_unary_qps_unconstrained_secure_low_thread_count", 
     "timeout_seconds": 360
   }, 
   {
@@ -37927,7 +37951,6 @@
     "cpu_cost": "capacity", 
     "defaults": "boringssl", 
     "exclude_configs": [
-      "asan", 
       "asan-noleaks", 
       "asan-trace-cmp", 
       "basicprof", 
@@ -37950,7 +37973,7 @@
     "platforms": [
       "linux"
     ], 
-    "shortname": "json_run_localhost:cpp_protobuf_async_unary_qps_unconstrained_secure_500kib_resource_quota", 
+    "shortname": "json_run_localhost:cpp_protobuf_async_unary_qps_unconstrained_secure_500kib_resource_quota_low_thread_count", 
     "timeout_seconds": 360
   }, 
   {
@@ -37965,7 +37988,6 @@
     "cpu_cost": 2, 
     "defaults": "boringssl", 
     "exclude_configs": [
-      "asan", 
       "asan-noleaks", 
       "asan-trace-cmp", 
       "basicprof", 
@@ -37988,7 +38010,7 @@
     "platforms": [
       "linux"
     ], 
-    "shortname": "json_run_localhost:cpp_protobuf_sync_streaming_ping_pong_secure", 
+    "shortname": "json_run_localhost:cpp_protobuf_sync_streaming_ping_pong_secure_low_thread_count", 
     "timeout_seconds": 360
   }, 
   {
@@ -38003,7 +38025,6 @@
     "cpu_cost": 64, 
     "defaults": "boringssl", 
     "exclude_configs": [
-      "asan", 
       "asan-noleaks", 
       "asan-trace-cmp", 
       "basicprof", 
@@ -38026,7 +38047,7 @@
     "platforms": [
       "linux"
     ], 
-    "shortname": "json_run_localhost:cpp_protobuf_sync_streaming_qps_unconstrained_secure", 
+    "shortname": "json_run_localhost:cpp_protobuf_sync_streaming_qps_unconstrained_secure_low_thread_count", 
     "timeout_seconds": 360
   }, 
   {
@@ -38041,7 +38062,6 @@
     "cpu_cost": 64, 
     "defaults": "boringssl", 
     "exclude_configs": [
-      "asan", 
       "asan-noleaks", 
       "asan-trace-cmp", 
       "basicprof", 
@@ -38064,7 +38084,7 @@
     "platforms": [
       "linux"
     ], 
-    "shortname": "json_run_localhost:cpp_protobuf_sync_streaming_qps_unconstrained_secure_500kib_resource_quota", 
+    "shortname": "json_run_localhost:cpp_protobuf_sync_streaming_qps_unconstrained_secure_500kib_resource_quota_low_thread_count", 
     "timeout_seconds": 360
   }, 
   {
@@ -38079,7 +38099,6 @@
     "cpu_cost": 2, 
     "defaults": "boringssl", 
     "exclude_configs": [
-      "asan", 
       "asan-noleaks", 
       "asan-trace-cmp", 
       "basicprof", 
@@ -38102,7 +38121,7 @@
     "platforms": [
       "linux"
     ], 
-    "shortname": "json_run_localhost:cpp_protobuf_async_streaming_ping_pong_secure", 
+    "shortname": "json_run_localhost:cpp_protobuf_async_streaming_ping_pong_secure_low_thread_count", 
     "timeout_seconds": 360
   }, 
   {
@@ -38117,7 +38136,6 @@
     "cpu_cost": "capacity", 
     "defaults": "boringssl", 
     "exclude_configs": [
-      "asan", 
       "asan-noleaks", 
       "asan-trace-cmp", 
       "basicprof", 
@@ -38140,7 +38158,7 @@
     "platforms": [
       "linux"
     ], 
-    "shortname": "json_run_localhost:cpp_protobuf_async_streaming_qps_unconstrained_secure", 
+    "shortname": "json_run_localhost:cpp_protobuf_async_streaming_qps_unconstrained_secure_low_thread_count", 
     "timeout_seconds": 360
   }, 
   {
@@ -38155,7 +38173,6 @@
     "cpu_cost": "capacity", 
     "defaults": "boringssl", 
     "exclude_configs": [
-      "asan", 
       "asan-noleaks", 
       "asan-trace-cmp", 
       "basicprof", 
@@ -38178,7 +38195,7 @@
     "platforms": [
       "linux"
     ], 
-    "shortname": "json_run_localhost:cpp_protobuf_async_streaming_qps_unconstrained_secure_500kib_resource_quota", 
+    "shortname": "json_run_localhost:cpp_protobuf_async_streaming_qps_unconstrained_secure_500kib_resource_quota_low_thread_count", 
     "timeout_seconds": 360
   }, 
   {
@@ -38193,7 +38210,6 @@
     "cpu_cost": 2, 
     "defaults": "boringssl", 
     "exclude_configs": [
-      "asan", 
       "asan-noleaks", 
       "asan-trace-cmp", 
       "basicprof", 
@@ -38216,7 +38232,7 @@
     "platforms": [
       "linux"
     ], 
-    "shortname": "json_run_localhost:cpp_generic_async_streaming_ping_pong_insecure", 
+    "shortname": "json_run_localhost:cpp_generic_async_streaming_ping_pong_insecure_low_thread_count", 
     "timeout_seconds": 360
   }, 
   {
@@ -38231,7 +38247,6 @@
     "cpu_cost": "capacity", 
     "defaults": "boringssl", 
     "exclude_configs": [
-      "asan", 
       "asan-noleaks", 
       "asan-trace-cmp", 
       "basicprof", 
@@ -38254,7 +38269,7 @@
     "platforms": [
       "linux"
     ], 
-    "shortname": "json_run_localhost:cpp_generic_async_streaming_qps_unconstrained_insecure", 
+    "shortname": "json_run_localhost:cpp_generic_async_streaming_qps_unconstrained_insecure_low_thread_count", 
     "timeout_seconds": 360
   }, 
   {
@@ -38269,7 +38284,6 @@
     "cpu_cost": "capacity", 
     "defaults": "boringssl", 
     "exclude_configs": [
-      "asan", 
       "asan-noleaks", 
       "asan-trace-cmp", 
       "basicprof", 
@@ -38292,7 +38306,7 @@
     "platforms": [
       "linux"
     ], 
-    "shortname": "json_run_localhost:cpp_generic_async_streaming_qps_one_server_core_insecure", 
+    "shortname": "json_run_localhost:cpp_generic_async_streaming_qps_one_server_core_insecure_low_thread_count", 
     "timeout_seconds": 360
   }, 
   {
@@ -38307,7 +38321,6 @@
     "cpu_cost": "capacity", 
     "defaults": "boringssl", 
     "exclude_configs": [
-      "asan", 
       "asan-noleaks", 
       "asan-trace-cmp", 
       "basicprof", 
@@ -38330,7 +38343,7 @@
     "platforms": [
       "linux"
     ], 
-    "shortname": "json_run_localhost:cpp_protobuf_async_client_sync_server_unary_qps_unconstrained_insecure", 
+    "shortname": "json_run_localhost:cpp_protobuf_async_client_sync_server_unary_qps_unconstrained_insecure_low_thread_count", 
     "timeout_seconds": 360
   }, 
   {
@@ -38345,7 +38358,6 @@
     "cpu_cost": "capacity", 
     "defaults": "boringssl", 
     "exclude_configs": [
-      "asan", 
       "asan-noleaks", 
       "asan-trace-cmp", 
       "basicprof", 
@@ -38368,7 +38380,7 @@
     "platforms": [
       "linux"
     ], 
-    "shortname": "json_run_localhost:cpp_protobuf_async_client_sync_server_streaming_qps_unconstrained_insecure", 
+    "shortname": "json_run_localhost:cpp_protobuf_async_client_sync_server_streaming_qps_unconstrained_insecure_low_thread_count", 
     "timeout_seconds": 360
   }, 
   {
@@ -38383,7 +38395,6 @@
     "cpu_cost": 2, 
     "defaults": "boringssl", 
     "exclude_configs": [
-      "asan", 
       "asan-noleaks", 
       "asan-trace-cmp", 
       "basicprof", 
@@ -38406,7 +38417,7 @@
     "platforms": [
       "linux"
     ], 
-    "shortname": "json_run_localhost:cpp_protobuf_sync_unary_ping_pong_insecure", 
+    "shortname": "json_run_localhost:cpp_protobuf_sync_unary_ping_pong_insecure_low_thread_count", 
     "timeout_seconds": 360
   }, 
   {
@@ -38421,7 +38432,6 @@
     "cpu_cost": 64, 
     "defaults": "boringssl", 
     "exclude_configs": [
-      "asan", 
       "asan-noleaks", 
       "asan-trace-cmp", 
       "basicprof", 
@@ -38444,7 +38454,7 @@
     "platforms": [
       "linux"
     ], 
-    "shortname": "json_run_localhost:cpp_protobuf_sync_unary_qps_unconstrained_insecure", 
+    "shortname": "json_run_localhost:cpp_protobuf_sync_unary_qps_unconstrained_insecure_low_thread_count", 
     "timeout_seconds": 360
   }, 
   {
@@ -38459,7 +38469,6 @@
     "cpu_cost": 64, 
     "defaults": "boringssl", 
     "exclude_configs": [
-      "asan", 
       "asan-noleaks", 
       "asan-trace-cmp", 
       "basicprof", 
@@ -38482,7 +38491,7 @@
     "platforms": [
       "linux"
     ], 
-    "shortname": "json_run_localhost:cpp_protobuf_sync_unary_qps_unconstrained_insecure_500kib_resource_quota", 
+    "shortname": "json_run_localhost:cpp_protobuf_sync_unary_qps_unconstrained_insecure_500kib_resource_quota_low_thread_count", 
     "timeout_seconds": 360
   }, 
   {
@@ -38497,7 +38506,6 @@
     "cpu_cost": 2, 
     "defaults": "boringssl", 
     "exclude_configs": [
-      "asan", 
       "asan-noleaks", 
       "asan-trace-cmp", 
       "basicprof", 
@@ -38520,7 +38528,7 @@
     "platforms": [
       "linux"
     ], 
-    "shortname": "json_run_localhost:cpp_protobuf_async_unary_ping_pong_insecure", 
+    "shortname": "json_run_localhost:cpp_protobuf_async_unary_ping_pong_insecure_low_thread_count", 
     "timeout_seconds": 360
   }, 
   {
@@ -38535,7 +38543,6 @@
     "cpu_cost": "capacity", 
     "defaults": "boringssl", 
     "exclude_configs": [
-      "asan", 
       "asan-noleaks", 
       "asan-trace-cmp", 
       "basicprof", 
@@ -38558,7 +38565,7 @@
     "platforms": [
       "linux"
     ], 
-    "shortname": "json_run_localhost:cpp_protobuf_async_unary_qps_unconstrained_insecure", 
+    "shortname": "json_run_localhost:cpp_protobuf_async_unary_qps_unconstrained_insecure_low_thread_count", 
     "timeout_seconds": 360
   }, 
   {
@@ -38573,7 +38580,6 @@
     "cpu_cost": "capacity", 
     "defaults": "boringssl", 
     "exclude_configs": [
-      "asan", 
       "asan-noleaks", 
       "asan-trace-cmp", 
       "basicprof", 
@@ -38596,7 +38602,7 @@
     "platforms": [
       "linux"
     ], 
-    "shortname": "json_run_localhost:cpp_protobuf_async_unary_qps_unconstrained_insecure_500kib_resource_quota", 
+    "shortname": "json_run_localhost:cpp_protobuf_async_unary_qps_unconstrained_insecure_500kib_resource_quota_low_thread_count", 
     "timeout_seconds": 360
   }, 
   {
@@ -38611,7 +38617,6 @@
     "cpu_cost": 2, 
     "defaults": "boringssl", 
     "exclude_configs": [
-      "asan", 
       "asan-noleaks", 
       "asan-trace-cmp", 
       "basicprof", 
@@ -38634,7 +38639,7 @@
     "platforms": [
       "linux"
     ], 
-    "shortname": "json_run_localhost:cpp_protobuf_sync_streaming_ping_pong_insecure", 
+    "shortname": "json_run_localhost:cpp_protobuf_sync_streaming_ping_pong_insecure_low_thread_count", 
     "timeout_seconds": 360
   }, 
   {
@@ -38649,7 +38654,6 @@
     "cpu_cost": 64, 
     "defaults": "boringssl", 
     "exclude_configs": [
-      "asan", 
       "asan-noleaks", 
       "asan-trace-cmp", 
       "basicprof", 
@@ -38672,7 +38676,7 @@
     "platforms": [
       "linux"
     ], 
-    "shortname": "json_run_localhost:cpp_protobuf_sync_streaming_qps_unconstrained_insecure", 
+    "shortname": "json_run_localhost:cpp_protobuf_sync_streaming_qps_unconstrained_insecure_low_thread_count", 
     "timeout_seconds": 360
   }, 
   {
@@ -38687,7 +38691,6 @@
     "cpu_cost": 64, 
     "defaults": "boringssl", 
     "exclude_configs": [
-      "asan", 
       "asan-noleaks", 
       "asan-trace-cmp", 
       "basicprof", 
@@ -38710,7 +38713,7 @@
     "platforms": [
       "linux"
     ], 
-    "shortname": "json_run_localhost:cpp_protobuf_sync_streaming_qps_unconstrained_insecure_500kib_resource_quota", 
+    "shortname": "json_run_localhost:cpp_protobuf_sync_streaming_qps_unconstrained_insecure_500kib_resource_quota_low_thread_count", 
     "timeout_seconds": 360
   }, 
   {
@@ -38725,7 +38728,6 @@
     "cpu_cost": 2, 
     "defaults": "boringssl", 
     "exclude_configs": [
-      "asan", 
       "asan-noleaks", 
       "asan-trace-cmp", 
       "basicprof", 
@@ -38748,7 +38750,7 @@
     "platforms": [
       "linux"
     ], 
-    "shortname": "json_run_localhost:cpp_protobuf_async_streaming_ping_pong_insecure", 
+    "shortname": "json_run_localhost:cpp_protobuf_async_streaming_ping_pong_insecure_low_thread_count", 
     "timeout_seconds": 360
   }, 
   {
@@ -38763,7 +38765,6 @@
     "cpu_cost": "capacity", 
     "defaults": "boringssl", 
     "exclude_configs": [
-      "asan", 
       "asan-noleaks", 
       "asan-trace-cmp", 
       "basicprof", 
@@ -38786,7 +38787,7 @@
     "platforms": [
       "linux"
     ], 
-    "shortname": "json_run_localhost:cpp_protobuf_async_streaming_qps_unconstrained_insecure", 
+    "shortname": "json_run_localhost:cpp_protobuf_async_streaming_qps_unconstrained_insecure_low_thread_count", 
     "timeout_seconds": 360
   }, 
   {
@@ -38801,7 +38802,6 @@
     "cpu_cost": "capacity", 
     "defaults": "boringssl", 
     "exclude_configs": [
-      "asan", 
       "asan-noleaks", 
       "asan-trace-cmp", 
       "basicprof", 
@@ -38824,7 +38824,7 @@
     "platforms": [
       "linux"
     ], 
-    "shortname": "json_run_localhost:cpp_protobuf_async_streaming_qps_unconstrained_insecure_500kib_resource_quota", 
+    "shortname": "json_run_localhost:cpp_protobuf_async_streaming_qps_unconstrained_insecure_500kib_resource_quota_low_thread_count", 
     "timeout_seconds": 360
   }, 
   {
diff --git a/vsprojects/vcxproj/google_benchmark/google_benchmark.vcxproj b/vsprojects/vcxproj/benchmark/benchmark.vcxproj
similarity index 70%
rename from vsprojects/vcxproj/google_benchmark/google_benchmark.vcxproj
rename to vsprojects/vcxproj/benchmark/benchmark.vcxproj
index 52774e0..9f262b3 100644
--- a/vsprojects/vcxproj/google_benchmark/google_benchmark.vcxproj
+++ b/vsprojects/vcxproj/benchmark/benchmark.vcxproj
@@ -19,7 +19,7 @@
     </ProjectConfiguration>
   </ItemGroup>
   <PropertyGroup Label="Globals">
-    <ProjectGuid>{AAD4AEF3-DF1E-7A6D-EC35-233BD1031BF4}</ProjectGuid>
+    <ProjectGuid>{07978586-E47C-8709-A63E-895FBF3C3C7D}</ProjectGuid>
     <IgnoreWarnIntDirInTempDetected>true</IgnoreWarnIntDirInTempDetected>
     <IntDir>$(SolutionDir)IntDir\$(MSBuildProjectName)\</IntDir>
   </PropertyGroup>
@@ -57,10 +57,10 @@
   </ImportGroup>
   <PropertyGroup Label="UserMacros" />
   <PropertyGroup Condition="'$(Configuration)'=='Debug'">
-    <TargetName>google_benchmark</TargetName>
+    <TargetName>benchmark</TargetName>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)'=='Release'">
-    <TargetName>google_benchmark</TargetName>
+    <TargetName>benchmark</TargetName>
   </PropertyGroup>
     <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
     <ClCompile>
@@ -147,53 +147,53 @@
   </ItemDefinitionGroup>
 
   <ItemGroup>
-    <ClInclude Include="$(SolutionDir)\..\third_party\google_benchmark\include\benchmark\benchmark.h" />
-    <ClInclude Include="$(SolutionDir)\..\third_party\google_benchmark\include\benchmark\benchmark_api.h" />
-    <ClInclude Include="$(SolutionDir)\..\third_party\google_benchmark\include\benchmark\macros.h" />
-    <ClInclude Include="$(SolutionDir)\..\third_party\google_benchmark\include\benchmark\reporter.h" />
-    <ClInclude Include="$(SolutionDir)\..\third_party\google_benchmark\src\arraysize.h" />
-    <ClInclude Include="$(SolutionDir)\..\third_party\google_benchmark\src\benchmark_api_internal.h" />
-    <ClInclude Include="$(SolutionDir)\..\third_party\google_benchmark\src\check.h" />
-    <ClInclude Include="$(SolutionDir)\..\third_party\google_benchmark\src\colorprint.h" />
-    <ClInclude Include="$(SolutionDir)\..\third_party\google_benchmark\src\commandlineflags.h" />
-    <ClInclude Include="$(SolutionDir)\..\third_party\google_benchmark\src\complexity.h" />
-    <ClInclude Include="$(SolutionDir)\..\third_party\google_benchmark\src\cycleclock.h" />
-    <ClInclude Include="$(SolutionDir)\..\third_party\google_benchmark\src\internal_macros.h" />
-    <ClInclude Include="$(SolutionDir)\..\third_party\google_benchmark\src\log.h" />
-    <ClInclude Include="$(SolutionDir)\..\third_party\google_benchmark\src\mutex.h" />
-    <ClInclude Include="$(SolutionDir)\..\third_party\google_benchmark\src\re.h" />
-    <ClInclude Include="$(SolutionDir)\..\third_party\google_benchmark\src\sleep.h" />
-    <ClInclude Include="$(SolutionDir)\..\third_party\google_benchmark\src\stat.h" />
-    <ClInclude Include="$(SolutionDir)\..\third_party\google_benchmark\src\string_util.h" />
-    <ClInclude Include="$(SolutionDir)\..\third_party\google_benchmark\src\sysinfo.h" />
-    <ClInclude Include="$(SolutionDir)\..\third_party\google_benchmark\src\timers.h" />
+    <ClInclude Include="$(SolutionDir)\..\third_party\benchmark\include\benchmark\benchmark.h" />
+    <ClInclude Include="$(SolutionDir)\..\third_party\benchmark\include\benchmark\benchmark_api.h" />
+    <ClInclude Include="$(SolutionDir)\..\third_party\benchmark\include\benchmark\macros.h" />
+    <ClInclude Include="$(SolutionDir)\..\third_party\benchmark\include\benchmark\reporter.h" />
+    <ClInclude Include="$(SolutionDir)\..\third_party\benchmark\src\arraysize.h" />
+    <ClInclude Include="$(SolutionDir)\..\third_party\benchmark\src\benchmark_api_internal.h" />
+    <ClInclude Include="$(SolutionDir)\..\third_party\benchmark\src\check.h" />
+    <ClInclude Include="$(SolutionDir)\..\third_party\benchmark\src\colorprint.h" />
+    <ClInclude Include="$(SolutionDir)\..\third_party\benchmark\src\commandlineflags.h" />
+    <ClInclude Include="$(SolutionDir)\..\third_party\benchmark\src\complexity.h" />
+    <ClInclude Include="$(SolutionDir)\..\third_party\benchmark\src\cycleclock.h" />
+    <ClInclude Include="$(SolutionDir)\..\third_party\benchmark\src\internal_macros.h" />
+    <ClInclude Include="$(SolutionDir)\..\third_party\benchmark\src\log.h" />
+    <ClInclude Include="$(SolutionDir)\..\third_party\benchmark\src\mutex.h" />
+    <ClInclude Include="$(SolutionDir)\..\third_party\benchmark\src\re.h" />
+    <ClInclude Include="$(SolutionDir)\..\third_party\benchmark\src\sleep.h" />
+    <ClInclude Include="$(SolutionDir)\..\third_party\benchmark\src\stat.h" />
+    <ClInclude Include="$(SolutionDir)\..\third_party\benchmark\src\string_util.h" />
+    <ClInclude Include="$(SolutionDir)\..\third_party\benchmark\src\sysinfo.h" />
+    <ClInclude Include="$(SolutionDir)\..\third_party\benchmark\src\timers.h" />
   </ItemGroup>
   <ItemGroup>
-    <ClCompile Include="$(SolutionDir)\..\third_party\google_benchmark\src\benchmark.cc">
+    <ClCompile Include="$(SolutionDir)\..\third_party\benchmark\src\benchmark.cc">
     </ClCompile>
-    <ClCompile Include="$(SolutionDir)\..\third_party\google_benchmark\src\benchmark_register.cc">
+    <ClCompile Include="$(SolutionDir)\..\third_party\benchmark\src\benchmark_register.cc">
     </ClCompile>
-    <ClCompile Include="$(SolutionDir)\..\third_party\google_benchmark\src\colorprint.cc">
+    <ClCompile Include="$(SolutionDir)\..\third_party\benchmark\src\colorprint.cc">
     </ClCompile>
-    <ClCompile Include="$(SolutionDir)\..\third_party\google_benchmark\src\commandlineflags.cc">
+    <ClCompile Include="$(SolutionDir)\..\third_party\benchmark\src\commandlineflags.cc">
     </ClCompile>
-    <ClCompile Include="$(SolutionDir)\..\third_party\google_benchmark\src\complexity.cc">
+    <ClCompile Include="$(SolutionDir)\..\third_party\benchmark\src\complexity.cc">
     </ClCompile>
-    <ClCompile Include="$(SolutionDir)\..\third_party\google_benchmark\src\console_reporter.cc">
+    <ClCompile Include="$(SolutionDir)\..\third_party\benchmark\src\console_reporter.cc">
     </ClCompile>
-    <ClCompile Include="$(SolutionDir)\..\third_party\google_benchmark\src\csv_reporter.cc">
+    <ClCompile Include="$(SolutionDir)\..\third_party\benchmark\src\csv_reporter.cc">
     </ClCompile>
-    <ClCompile Include="$(SolutionDir)\..\third_party\google_benchmark\src\json_reporter.cc">
+    <ClCompile Include="$(SolutionDir)\..\third_party\benchmark\src\json_reporter.cc">
     </ClCompile>
-    <ClCompile Include="$(SolutionDir)\..\third_party\google_benchmark\src\reporter.cc">
+    <ClCompile Include="$(SolutionDir)\..\third_party\benchmark\src\reporter.cc">
     </ClCompile>
-    <ClCompile Include="$(SolutionDir)\..\third_party\google_benchmark\src\sleep.cc">
+    <ClCompile Include="$(SolutionDir)\..\third_party\benchmark\src\sleep.cc">
     </ClCompile>
-    <ClCompile Include="$(SolutionDir)\..\third_party\google_benchmark\src\string_util.cc">
+    <ClCompile Include="$(SolutionDir)\..\third_party\benchmark\src\string_util.cc">
     </ClCompile>
-    <ClCompile Include="$(SolutionDir)\..\third_party\google_benchmark\src\sysinfo.cc">
+    <ClCompile Include="$(SolutionDir)\..\third_party\benchmark\src\sysinfo.cc">
     </ClCompile>
-    <ClCompile Include="$(SolutionDir)\..\third_party\google_benchmark\src\timers.cc">
+    <ClCompile Include="$(SolutionDir)\..\third_party\benchmark\src\timers.cc">
     </ClCompile>
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
diff --git a/vsprojects/vcxproj/benchmark/benchmark.vcxproj.filters b/vsprojects/vcxproj/benchmark/benchmark.vcxproj.filters
new file mode 100644
index 0000000..ccc9ca2
--- /dev/null
+++ b/vsprojects/vcxproj/benchmark/benchmark.vcxproj.filters
@@ -0,0 +1,125 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <ClCompile Include="$(SolutionDir)\..\third_party\benchmark\src\benchmark.cc">
+      <Filter>third_party\benchmark\src</Filter>
+    </ClCompile>
+    <ClCompile Include="$(SolutionDir)\..\third_party\benchmark\src\benchmark_register.cc">
+      <Filter>third_party\benchmark\src</Filter>
+    </ClCompile>
+    <ClCompile Include="$(SolutionDir)\..\third_party\benchmark\src\colorprint.cc">
+      <Filter>third_party\benchmark\src</Filter>
+    </ClCompile>
+    <ClCompile Include="$(SolutionDir)\..\third_party\benchmark\src\commandlineflags.cc">
+      <Filter>third_party\benchmark\src</Filter>
+    </ClCompile>
+    <ClCompile Include="$(SolutionDir)\..\third_party\benchmark\src\complexity.cc">
+      <Filter>third_party\benchmark\src</Filter>
+    </ClCompile>
+    <ClCompile Include="$(SolutionDir)\..\third_party\benchmark\src\console_reporter.cc">
+      <Filter>third_party\benchmark\src</Filter>
+    </ClCompile>
+    <ClCompile Include="$(SolutionDir)\..\third_party\benchmark\src\csv_reporter.cc">
+      <Filter>third_party\benchmark\src</Filter>
+    </ClCompile>
+    <ClCompile Include="$(SolutionDir)\..\third_party\benchmark\src\json_reporter.cc">
+      <Filter>third_party\benchmark\src</Filter>
+    </ClCompile>
+    <ClCompile Include="$(SolutionDir)\..\third_party\benchmark\src\reporter.cc">
+      <Filter>third_party\benchmark\src</Filter>
+    </ClCompile>
+    <ClCompile Include="$(SolutionDir)\..\third_party\benchmark\src\sleep.cc">
+      <Filter>third_party\benchmark\src</Filter>
+    </ClCompile>
+    <ClCompile Include="$(SolutionDir)\..\third_party\benchmark\src\string_util.cc">
+      <Filter>third_party\benchmark\src</Filter>
+    </ClCompile>
+    <ClCompile Include="$(SolutionDir)\..\third_party\benchmark\src\sysinfo.cc">
+      <Filter>third_party\benchmark\src</Filter>
+    </ClCompile>
+    <ClCompile Include="$(SolutionDir)\..\third_party\benchmark\src\timers.cc">
+      <Filter>third_party\benchmark\src</Filter>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="$(SolutionDir)\..\third_party\benchmark\include\benchmark\benchmark.h">
+      <Filter>third_party\benchmark\include\benchmark</Filter>
+    </ClInclude>
+    <ClInclude Include="$(SolutionDir)\..\third_party\benchmark\include\benchmark\benchmark_api.h">
+      <Filter>third_party\benchmark\include\benchmark</Filter>
+    </ClInclude>
+    <ClInclude Include="$(SolutionDir)\..\third_party\benchmark\include\benchmark\macros.h">
+      <Filter>third_party\benchmark\include\benchmark</Filter>
+    </ClInclude>
+    <ClInclude Include="$(SolutionDir)\..\third_party\benchmark\include\benchmark\reporter.h">
+      <Filter>third_party\benchmark\include\benchmark</Filter>
+    </ClInclude>
+    <ClInclude Include="$(SolutionDir)\..\third_party\benchmark\src\arraysize.h">
+      <Filter>third_party\benchmark\src</Filter>
+    </ClInclude>
+    <ClInclude Include="$(SolutionDir)\..\third_party\benchmark\src\benchmark_api_internal.h">
+      <Filter>third_party\benchmark\src</Filter>
+    </ClInclude>
+    <ClInclude Include="$(SolutionDir)\..\third_party\benchmark\src\check.h">
+      <Filter>third_party\benchmark\src</Filter>
+    </ClInclude>
+    <ClInclude Include="$(SolutionDir)\..\third_party\benchmark\src\colorprint.h">
+      <Filter>third_party\benchmark\src</Filter>
+    </ClInclude>
+    <ClInclude Include="$(SolutionDir)\..\third_party\benchmark\src\commandlineflags.h">
+      <Filter>third_party\benchmark\src</Filter>
+    </ClInclude>
+    <ClInclude Include="$(SolutionDir)\..\third_party\benchmark\src\complexity.h">
+      <Filter>third_party\benchmark\src</Filter>
+    </ClInclude>
+    <ClInclude Include="$(SolutionDir)\..\third_party\benchmark\src\cycleclock.h">
+      <Filter>third_party\benchmark\src</Filter>
+    </ClInclude>
+    <ClInclude Include="$(SolutionDir)\..\third_party\benchmark\src\internal_macros.h">
+      <Filter>third_party\benchmark\src</Filter>
+    </ClInclude>
+    <ClInclude Include="$(SolutionDir)\..\third_party\benchmark\src\log.h">
+      <Filter>third_party\benchmark\src</Filter>
+    </ClInclude>
+    <ClInclude Include="$(SolutionDir)\..\third_party\benchmark\src\mutex.h">
+      <Filter>third_party\benchmark\src</Filter>
+    </ClInclude>
+    <ClInclude Include="$(SolutionDir)\..\third_party\benchmark\src\re.h">
+      <Filter>third_party\benchmark\src</Filter>
+    </ClInclude>
+    <ClInclude Include="$(SolutionDir)\..\third_party\benchmark\src\sleep.h">
+      <Filter>third_party\benchmark\src</Filter>
+    </ClInclude>
+    <ClInclude Include="$(SolutionDir)\..\third_party\benchmark\src\stat.h">
+      <Filter>third_party\benchmark\src</Filter>
+    </ClInclude>
+    <ClInclude Include="$(SolutionDir)\..\third_party\benchmark\src\string_util.h">
+      <Filter>third_party\benchmark\src</Filter>
+    </ClInclude>
+    <ClInclude Include="$(SolutionDir)\..\third_party\benchmark\src\sysinfo.h">
+      <Filter>third_party\benchmark\src</Filter>
+    </ClInclude>
+    <ClInclude Include="$(SolutionDir)\..\third_party\benchmark\src\timers.h">
+      <Filter>third_party\benchmark\src</Filter>
+    </ClInclude>
+  </ItemGroup>
+
+  <ItemGroup>
+    <Filter Include="third_party">
+      <UniqueIdentifier>{7b593518-9fee-107e-6b64-24bdce73f939}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="third_party\benchmark">
+      <UniqueIdentifier>{f0d35de1-6b41-778d-0ba0-faad514fb0f4}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="third_party\benchmark\include">
+      <UniqueIdentifier>{cbc02dfa-face-8cc6-0efb-efacc0c3369c}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="third_party\benchmark\include\benchmark">
+      <UniqueIdentifier>{4f2f03fc-b82d-df33-63ee-bedebeb2c0ee}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="third_party\benchmark\src">
+      <UniqueIdentifier>{f42a8e0a-5a76-0e6f-d708-f0306858f673}</UniqueIdentifier>
+    </Filter>
+  </ItemGroup>
+</Project>
+
diff --git a/vsprojects/vcxproj/google_benchmark/google_benchmark.vcxproj.filters b/vsprojects/vcxproj/google_benchmark/google_benchmark.vcxproj.filters
deleted file mode 100644
index 9db6ed4..0000000
--- a/vsprojects/vcxproj/google_benchmark/google_benchmark.vcxproj.filters
+++ /dev/null
@@ -1,125 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
-  <ItemGroup>
-    <ClCompile Include="$(SolutionDir)\..\third_party\google_benchmark\src\benchmark.cc">
-      <Filter>third_party\google_benchmark\src</Filter>
-    </ClCompile>
-    <ClCompile Include="$(SolutionDir)\..\third_party\google_benchmark\src\benchmark_register.cc">
-      <Filter>third_party\google_benchmark\src</Filter>
-    </ClCompile>
-    <ClCompile Include="$(SolutionDir)\..\third_party\google_benchmark\src\colorprint.cc">
-      <Filter>third_party\google_benchmark\src</Filter>
-    </ClCompile>
-    <ClCompile Include="$(SolutionDir)\..\third_party\google_benchmark\src\commandlineflags.cc">
-      <Filter>third_party\google_benchmark\src</Filter>
-    </ClCompile>
-    <ClCompile Include="$(SolutionDir)\..\third_party\google_benchmark\src\complexity.cc">
-      <Filter>third_party\google_benchmark\src</Filter>
-    </ClCompile>
-    <ClCompile Include="$(SolutionDir)\..\third_party\google_benchmark\src\console_reporter.cc">
-      <Filter>third_party\google_benchmark\src</Filter>
-    </ClCompile>
-    <ClCompile Include="$(SolutionDir)\..\third_party\google_benchmark\src\csv_reporter.cc">
-      <Filter>third_party\google_benchmark\src</Filter>
-    </ClCompile>
-    <ClCompile Include="$(SolutionDir)\..\third_party\google_benchmark\src\json_reporter.cc">
-      <Filter>third_party\google_benchmark\src</Filter>
-    </ClCompile>
-    <ClCompile Include="$(SolutionDir)\..\third_party\google_benchmark\src\reporter.cc">
-      <Filter>third_party\google_benchmark\src</Filter>
-    </ClCompile>
-    <ClCompile Include="$(SolutionDir)\..\third_party\google_benchmark\src\sleep.cc">
-      <Filter>third_party\google_benchmark\src</Filter>
-    </ClCompile>
-    <ClCompile Include="$(SolutionDir)\..\third_party\google_benchmark\src\string_util.cc">
-      <Filter>third_party\google_benchmark\src</Filter>
-    </ClCompile>
-    <ClCompile Include="$(SolutionDir)\..\third_party\google_benchmark\src\sysinfo.cc">
-      <Filter>third_party\google_benchmark\src</Filter>
-    </ClCompile>
-    <ClCompile Include="$(SolutionDir)\..\third_party\google_benchmark\src\timers.cc">
-      <Filter>third_party\google_benchmark\src</Filter>
-    </ClCompile>
-  </ItemGroup>
-  <ItemGroup>
-    <ClInclude Include="$(SolutionDir)\..\third_party\google_benchmark\include\benchmark\benchmark.h">
-      <Filter>third_party\google_benchmark\include\benchmark</Filter>
-    </ClInclude>
-    <ClInclude Include="$(SolutionDir)\..\third_party\google_benchmark\include\benchmark\benchmark_api.h">
-      <Filter>third_party\google_benchmark\include\benchmark</Filter>
-    </ClInclude>
-    <ClInclude Include="$(SolutionDir)\..\third_party\google_benchmark\include\benchmark\macros.h">
-      <Filter>third_party\google_benchmark\include\benchmark</Filter>
-    </ClInclude>
-    <ClInclude Include="$(SolutionDir)\..\third_party\google_benchmark\include\benchmark\reporter.h">
-      <Filter>third_party\google_benchmark\include\benchmark</Filter>
-    </ClInclude>
-    <ClInclude Include="$(SolutionDir)\..\third_party\google_benchmark\src\arraysize.h">
-      <Filter>third_party\google_benchmark\src</Filter>
-    </ClInclude>
-    <ClInclude Include="$(SolutionDir)\..\third_party\google_benchmark\src\benchmark_api_internal.h">
-      <Filter>third_party\google_benchmark\src</Filter>
-    </ClInclude>
-    <ClInclude Include="$(SolutionDir)\..\third_party\google_benchmark\src\check.h">
-      <Filter>third_party\google_benchmark\src</Filter>
-    </ClInclude>
-    <ClInclude Include="$(SolutionDir)\..\third_party\google_benchmark\src\colorprint.h">
-      <Filter>third_party\google_benchmark\src</Filter>
-    </ClInclude>
-    <ClInclude Include="$(SolutionDir)\..\third_party\google_benchmark\src\commandlineflags.h">
-      <Filter>third_party\google_benchmark\src</Filter>
-    </ClInclude>
-    <ClInclude Include="$(SolutionDir)\..\third_party\google_benchmark\src\complexity.h">
-      <Filter>third_party\google_benchmark\src</Filter>
-    </ClInclude>
-    <ClInclude Include="$(SolutionDir)\..\third_party\google_benchmark\src\cycleclock.h">
-      <Filter>third_party\google_benchmark\src</Filter>
-    </ClInclude>
-    <ClInclude Include="$(SolutionDir)\..\third_party\google_benchmark\src\internal_macros.h">
-      <Filter>third_party\google_benchmark\src</Filter>
-    </ClInclude>
-    <ClInclude Include="$(SolutionDir)\..\third_party\google_benchmark\src\log.h">
-      <Filter>third_party\google_benchmark\src</Filter>
-    </ClInclude>
-    <ClInclude Include="$(SolutionDir)\..\third_party\google_benchmark\src\mutex.h">
-      <Filter>third_party\google_benchmark\src</Filter>
-    </ClInclude>
-    <ClInclude Include="$(SolutionDir)\..\third_party\google_benchmark\src\re.h">
-      <Filter>third_party\google_benchmark\src</Filter>
-    </ClInclude>
-    <ClInclude Include="$(SolutionDir)\..\third_party\google_benchmark\src\sleep.h">
-      <Filter>third_party\google_benchmark\src</Filter>
-    </ClInclude>
-    <ClInclude Include="$(SolutionDir)\..\third_party\google_benchmark\src\stat.h">
-      <Filter>third_party\google_benchmark\src</Filter>
-    </ClInclude>
-    <ClInclude Include="$(SolutionDir)\..\third_party\google_benchmark\src\string_util.h">
-      <Filter>third_party\google_benchmark\src</Filter>
-    </ClInclude>
-    <ClInclude Include="$(SolutionDir)\..\third_party\google_benchmark\src\sysinfo.h">
-      <Filter>third_party\google_benchmark\src</Filter>
-    </ClInclude>
-    <ClInclude Include="$(SolutionDir)\..\third_party\google_benchmark\src\timers.h">
-      <Filter>third_party\google_benchmark\src</Filter>
-    </ClInclude>
-  </ItemGroup>
-
-  <ItemGroup>
-    <Filter Include="third_party">
-      <UniqueIdentifier>{7458b63d-7ba4-103d-2bed-3e3ad30d8237}</UniqueIdentifier>
-    </Filter>
-    <Filter Include="third_party\google_benchmark">
-      <UniqueIdentifier>{54a154e8-669b-a7c1-9b6e-bd1aab2f86e3}</UniqueIdentifier>
-    </Filter>
-    <Filter Include="third_party\google_benchmark\include">
-      <UniqueIdentifier>{f54c3cb1-ec20-a651-6956-78379b51e1a5}</UniqueIdentifier>
-    </Filter>
-    <Filter Include="third_party\google_benchmark\include\benchmark">
-      <UniqueIdentifier>{0483a457-8050-4565-bc15-09695bf7b822}</UniqueIdentifier>
-    </Filter>
-    <Filter Include="third_party\google_benchmark\src">
-      <UniqueIdentifier>{c39ff2d1-691e-4614-4d75-4bc20db05e09}</UniqueIdentifier>
-    </Filter>
-  </ItemGroup>
-</Project>
-
diff --git a/vsprojects/vcxproj/test/bm_fullstack/bm_fullstack.vcxproj b/vsprojects/vcxproj/test/bm_fullstack/bm_fullstack.vcxproj
index 1ce993e..3809beb 100644
--- a/vsprojects/vcxproj/test/bm_fullstack/bm_fullstack.vcxproj
+++ b/vsprojects/vcxproj/test/bm_fullstack/bm_fullstack.vcxproj
@@ -164,8 +164,8 @@
     </ClCompile>
   </ItemGroup>
   <ItemGroup>
-    <ProjectReference Include="$(SolutionDir)\..\vsprojects\vcxproj\.\google_benchmark\google_benchmark.vcxproj">
-      <Project>{AAD4AEF3-DF1E-7A6D-EC35-233BD1031BF4}</Project>
+    <ProjectReference Include="$(SolutionDir)\..\vsprojects\vcxproj\.\benchmark\benchmark.vcxproj">
+      <Project>{07978586-E47C-8709-A63E-895FBF3C3C7D}</Project>
     </ProjectReference>
     <ProjectReference Include="$(SolutionDir)\..\vsprojects\vcxproj\.\grpc++_test_util\grpc++_test_util.vcxproj">
       <Project>{0BE77741-552A-929B-A497-4EF7ECE17A64}</Project>
diff --git a/vsprojects/vcxproj/test/noop-benchmark/noop-benchmark.vcxproj b/vsprojects/vcxproj/test/noop-benchmark/noop-benchmark.vcxproj
index 99f33b2..15a82c0 100644
--- a/vsprojects/vcxproj/test/noop-benchmark/noop-benchmark.vcxproj
+++ b/vsprojects/vcxproj/test/noop-benchmark/noop-benchmark.vcxproj
@@ -164,8 +164,8 @@
     </ClCompile>
   </ItemGroup>
   <ItemGroup>
-    <ProjectReference Include="$(SolutionDir)\..\vsprojects\vcxproj\.\google_benchmark\google_benchmark.vcxproj">
-      <Project>{AAD4AEF3-DF1E-7A6D-EC35-233BD1031BF4}</Project>
+    <ProjectReference Include="$(SolutionDir)\..\vsprojects\vcxproj\.\benchmark\benchmark.vcxproj">
+      <Project>{07978586-E47C-8709-A63E-895FBF3C3C7D}</Project>
     </ProjectReference>
   </ItemGroup>
   <ItemGroup>