Merge pull request #9737 from ctiller/bm_call_create

Call creation/destruction benchmark
diff --git a/WORKSPACE b/WORKSPACE
index 4f90f06..9b79d04 100644
--- a/WORKSPACE
+++ b/WORKSPACE
@@ -33,26 +33,36 @@
     actual = "@submodule_gtest//:gtest",
 )
 
+bind(
+    name = "gflags",
+    actual = "@com_github_gflags_gflags//:gflags",
+)
+
 new_local_repository(
     name = "submodule_boringssl",
-    path = "third_party/boringssl-with-bazel",
     build_file = "third_party/boringssl-with-bazel/BUILD",
+    path = "third_party/boringssl-with-bazel",
 )
 
 new_local_repository(
     name = "submodule_zlib",
-    path = "third_party/zlib",
     build_file = "third_party/zlib.BUILD",
+    path = "third_party/zlib",
 )
 
 new_local_repository(
     name = "submodule_protobuf",
-    path = "third_party/protobuf",
     build_file = "third_party/protobuf/BUILD",
+    path = "third_party/protobuf",
 )
 
 new_local_repository(
     name = "submodule_gtest",
-    path = "third_party/googletest",
     build_file = "third_party/gtest.BUILD",
+    path = "third_party/googletest",
+)
+
+local_repository(
+    name = "com_github_gflags_gflags",
+    path = "third_party/gflags",
 )
diff --git a/doc/service_config.md b/doc/service_config.md
index 2dabb83..ecc2381 100644
--- a/doc/service_config.md
+++ b/doc/service_config.md
@@ -12,105 +12,105 @@
 
 ```
 {
-  # Load balancing policy name.
-  # Supported values are 'round_robin' and 'grpclb'.
-  # Optional; if unset, the default behavior is pick the first available
-  # backend.
-  # Note that if the resolver returns only balancer addresses and no
-  # backend addresses, gRPC will always use the 'grpclb' policy,
-  # regardless of what this field is set to.
+  // Load balancing policy name.
+  // Supported values are 'round_robin' and 'grpclb'.
+  // Optional; if unset, the default behavior is pick the first available
+  // backend.
+  // Note that if the resolver returns only balancer addresses and no
+  // backend addresses, gRPC will always use the 'grpclb' policy,
+  // regardless of what this field is set to.
   'loadBalancingPolicy': string,
 
-  # Per-method configuration.  Optional.
+  // Per-method configuration.  Optional.
   'methodConfig': [
     {
-      # The names of the methods to which this method config applies. There
-      # must be at least one name. Each name entry must be unique across the
-      # entire service config. If the 'method' field is empty, then this
-      # method config specifies the defaults for all methods for the specified
-      # service.
-      #
-      # For example, let's say that the service config contains the following
-      # method config entries:
-      #
-      # 'methodConfig': [
-      #   { 'name': [ { 'service': 'MyService' } ] ... },
-      #   { 'name': [ { 'service': 'MyService', 'method': 'Foo' } ] ... }
-      # ]
-      #
-      # For a request for MyService/Foo, we will use the second entry, because
-      # it exactly matches the service and method name.
-      # For a request for MyService/Bar, we will use the first entry, because
-      # it provides the default for all methods of MyService.
+      // The names of the methods to which this method config applies. There
+      // must be at least one name. Each name entry must be unique across the
+      // entire service config. If the 'method' field is empty, then this
+      // method config specifies the defaults for all methods for the specified
+      // service.
+      //
+      // For example, let's say that the service config contains the following
+      // method config entries:
+      //
+      // 'methodConfig': [
+      //   { 'name': [ { 'service': 'MyService' } ] ... },
+      //   { 'name': [ { 'service': 'MyService', 'method': 'Foo' } ] ... }
+      // ]
+      //
+      // For a request for MyService/Foo, we will use the second entry, because
+      // it exactly matches the service and method name.
+      // For a request for MyService/Bar, we will use the first entry, because
+      // it provides the default for all methods of MyService.
       'name': [
         {
-          # RPC service name.  Required.
-          # If using gRPC with protobuf as the IDL, then this will be of
-          # the form "pkg.service_name", where "pkg" is the package name
-          # defined in the proto file.
+          // RPC service name.  Required.
+          // If using gRPC with protobuf as the IDL, then this will be of
+          // the form "pkg.service_name", where "pkg" is the package name
+          // defined in the proto file.
           'service': string,
 
-          # RPC method name.  Optional (see above).
+          // RPC method name.  Optional (see above).
           'method': string,
         }
       ],
 
-      # Whether RPCs sent to this method should wait until the connection is
-      # ready by default. If false, the RPC will abort immediately if there
-      # is a transient failure connecting to the server. Otherwise, gRPC will
-      # attempt to connect until the deadline is exceeded.
-      #
-      # The value specified via the gRPC client API will override the value
-      # set here. However, note that setting the value in the client API will
-      # also affect transient errors encountered during name resolution,
-      # which cannot be caught by the value here, since the service config
-      # is obtained by the gRPC client via name resolution.
+      // Whether RPCs sent to this method should wait until the connection is
+      // ready by default. If false, the RPC will abort immediately if there
+      // is a transient failure connecting to the server. Otherwise, gRPC will
+      // attempt to connect until the deadline is exceeded.
+      //
+      // The value specified via the gRPC client API will override the value
+      // set here. However, note that setting the value in the client API will
+      // also affect transient errors encountered during name resolution,
+      // which cannot be caught by the value here, since the service config
+      // is obtained by the gRPC client via name resolution.
       'waitForReady': bool,
 
-      # The default timeout in seconds for RPCs sent to this method. This can
-      # be overridden in code. If no reply is received in the specified amount
-      # of time, the request is aborted and a deadline-exceeded error status
-      # is returned to the caller.
-      #
-      # The actual deadline used will be the minimum of the value specified
-      # here and the value set by the application via the gRPC client API.
-      # If either one is not set, then the other will be used.
-      # If neither is set, then the request has no deadline.
-      #
-      # The format of the value is that of the 'Duration' type defined here:
-      # https://developers.google.com/protocol-buffers/docs/proto3#json
+      // The default timeout in seconds for RPCs sent to this method. This can
+      // be overridden in code. If no reply is received in the specified amount
+      // of time, the request is aborted and a deadline-exceeded error status
+      // is returned to the caller.
+      //
+      // The actual deadline used will be the minimum of the value specified
+      // here and the value set by the application via the gRPC client API.
+      // If either one is not set, then the other will be used.
+      // If neither is set, then the request has no deadline.
+      //
+      // The format of the value is that of the 'Duration' type defined here:
+      // https://developers.google.com/protocol-buffers/docs/proto3#json
       'timeout': string,
 
-      # The maximum allowed payload size for an individual request or object
-      # in a stream (client->server) in bytes. The size which is measured is
-      # the serialized, uncompressed payload in bytes. This applies both
-      # to streaming and non-streaming requests.
-      #
-      # The actual value used is the minimum of the value specified here and
-      # the value set by the application via the gRPC client API.
-      # If either one is not set, then the other will be used.
-      # If neither is set, then the built-in default is used.
-      #
-      # If a client attempts to send an object larger than this value, it
-      # will not be sent and the client will see an error.
-      # Note that 0 is a valid value, meaning that the request message must
-      # be empty.
+      // The maximum allowed payload size for an individual request or object
+      // in a stream (client->server) in bytes. The size which is measured is
+      // the serialized, uncompressed payload in bytes. This applies both
+      // to streaming and non-streaming requests.
+      //
+      // The actual value used is the minimum of the value specified here and
+      // the value set by the application via the gRPC client API.
+      // If either one is not set, then the other will be used.
+      // If neither is set, then the built-in default is used.
+      //
+      // If a client attempts to send an object larger than this value, it
+      // will not be sent and the client will see an error.
+      // Note that 0 is a valid value, meaning that the request message must
+      // be empty.
       'maxRequestMessageBytes': number,
 
-      # The maximum allowed payload size for an individual response or object
-      # in a stream (server->client) in bytes. The size which is measured is
-      # the serialized, uncompressed payload in bytes. This applies both
-      # to streaming and non-streaming requests.
-      #
-      # The actual value used is the minimum of the value specified here and
-      # the value set by the application via the gRPC client API.
-      # If either one is not set, then the other will be used.
-      # If neither is set, then the built-in default is used.
-      #
-      # If a server attempts to send an object larger than this value, it
-      # will not be sent, and the client will see an error.
-      # Note that 0 is a valid value, meaning that the response message must
-      # be empty.
+      // The maximum allowed payload size for an individual response or object
+      // in a stream (server->client) in bytes. The size which is measured is
+      // the serialized, uncompressed payload in bytes. This applies both
+      // to streaming and non-streaming requests.
+      //
+      // The actual value used is the minimum of the value specified here and
+      // the value set by the application via the gRPC client API.
+      // If either one is not set, then the other will be used.
+      // If neither is set, then the built-in default is used.
+      //
+      // If a server attempts to send an object larger than this value, it
+      // will not be sent, and the client will see an error.
+      // Note that 0 is a valid value, meaning that the response message must
+      // be empty.
       'maxResponseMessageBytes': number
     }
   ]
diff --git a/src/core/ext/transport/chttp2/transport/chttp2_transport.c b/src/core/ext/transport/chttp2/transport/chttp2_transport.c
index d1fab25..28a3166 100644
--- a/src/core/ext/transport/chttp2/transport/chttp2_transport.c
+++ b/src/core/ext/transport/chttp2/transport/chttp2_transport.c
@@ -1114,8 +1114,11 @@
             grpc_chttp2_list_add_waiting_for_concurrency(t, s);
             maybe_start_some_streams(exec_ctx, t);
           } else {
-            grpc_chttp2_cancel_stream(exec_ctx, t, s,
-                                      GRPC_ERROR_CREATE("Transport closed"));
+            grpc_chttp2_cancel_stream(
+                exec_ctx, t, s,
+                grpc_error_set_int(GRPC_ERROR_CREATE("Transport closed"),
+                                   GRPC_ERROR_INT_GRPC_STATUS,
+                                   GRPC_STATUS_UNAVAILABLE));
           }
         } else {
           GPR_ASSERT(s->id != 0);
diff --git a/src/objective-c/tests/CronetUnitTests/CronetUnitTests.m b/src/objective-c/tests/CronetUnitTests/CronetUnitTests.m
index 5e3c59f..e97f3d2 100644
--- a/src/objective-c/tests/CronetUnitTests/CronetUnitTests.m
+++ b/src/objective-c/tests/CronetUnitTests/CronetUnitTests.m
@@ -321,44 +321,8 @@
   grpc_metadata_array_init(&request_metadata_recv);
   grpc_call_details_init(&call_details);
 
-  memset(ops, 0, sizeof(ops));
-  op = ops;
-  op->op = GRPC_OP_SEND_INITIAL_METADATA;
-  op->data.send_initial_metadata.count = 2;
-  op->data.send_initial_metadata.metadata = meta_c;
-  op->flags = 0;
-  op->reserved = NULL;
-  op++;
-  op->op = GRPC_OP_SEND_MESSAGE;
-  op->data.send_message.send_message = request_payload;
-  op->flags = 0;
-  op->reserved = NULL;
-  op++;
-  op->op = GRPC_OP_SEND_CLOSE_FROM_CLIENT;
-  op->flags = 0;
-  op->reserved = NULL;
-  op++;
-  op->op = GRPC_OP_RECV_INITIAL_METADATA;
-  op->data.recv_initial_metadata.recv_initial_metadata = &initial_metadata_recv;
-  op->flags = 0;
-  op->reserved = NULL;
-  op++;
-  op->op = GRPC_OP_RECV_MESSAGE;
-  op->data.recv_message.recv_message = &response_payload_recv;
-  op->flags = 0;
-  op->reserved = NULL;
-  op++;
-  op->op = GRPC_OP_RECV_STATUS_ON_CLIENT;
-  op->data.recv_status_on_client.trailing_metadata = &trailing_metadata_recv;
-  op->data.recv_status_on_client.status = &status;
-  op->data.recv_status_on_client.status_details = &details;
-  op->flags = 0;
-  op->reserved = NULL;
-  op++;
-  error = grpc_call_start_batch(c, ops, (size_t)(op - ops), (void *)1, NULL);
-  GPR_ASSERT(GRPC_CALL_OK == error);
-
-  __weak XCTestExpectation *expectation = [self expectationWithDescription:@"Coalescing"];
+  __weak XCTestExpectation *expectation =
+      [self expectationWithDescription:@"Coalescing"];
 
   dispatch_async(
       dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{
@@ -425,6 +389,46 @@
         [expectation fulfill];
       });
 
+  // Guarantees that server is listening to the port before client connects.
+  sleep(1);
+
+  memset(ops, 0, sizeof(ops));
+  op = ops;
+  op->op = GRPC_OP_SEND_INITIAL_METADATA;
+  op->data.send_initial_metadata.count = 2;
+  op->data.send_initial_metadata.metadata = meta_c;
+  op->flags = 0;
+  op->reserved = NULL;
+  op++;
+  op->op = GRPC_OP_SEND_MESSAGE;
+  op->data.send_message.send_message = request_payload;
+  op->flags = 0;
+  op->reserved = NULL;
+  op++;
+  op->op = GRPC_OP_SEND_CLOSE_FROM_CLIENT;
+  op->flags = 0;
+  op->reserved = NULL;
+  op++;
+  op->op = GRPC_OP_RECV_INITIAL_METADATA;
+  op->data.recv_initial_metadata.recv_initial_metadata = &initial_metadata_recv;
+  op->flags = 0;
+  op->reserved = NULL;
+  op++;
+  op->op = GRPC_OP_RECV_MESSAGE;
+  op->data.recv_message.recv_message = &response_payload_recv;
+  op->flags = 0;
+  op->reserved = NULL;
+  op++;
+  op->op = GRPC_OP_RECV_STATUS_ON_CLIENT;
+  op->data.recv_status_on_client.trailing_metadata = &trailing_metadata_recv;
+  op->data.recv_status_on_client.status = &status;
+  op->data.recv_status_on_client.status_details = &details;
+  op->flags = 0;
+  op->reserved = NULL;
+  op++;
+  error = grpc_call_start_batch(c, ops, (size_t)(op - ops), (void *)1, NULL);
+  GPR_ASSERT(GRPC_CALL_OK == error);
+
   CQ_EXPECT_COMPLETION(cqv, (void *)1, 1);
   cq_verify(cqv);
 
@@ -445,7 +449,7 @@
   grpc_completion_queue_shutdown(cq);
   drain_cq(cq);
   grpc_completion_queue_destroy(cq);
-  
+
   [self waitForExpectationsWithTimeout:4 handler:nil];
 }
 
diff --git a/test/cpp/util/BUILD b/test/cpp/util/BUILD
index 78eca8c..f3cdc58 100644
--- a/test/cpp/util/BUILD
+++ b/test/cpp/util/BUILD
@@ -37,14 +37,17 @@
     hdrs = [
         "test_config.h",
     ],
-    deps = ["//:gpr"],
     visibility = ["//test:__subpackages__"],
+    deps = [
+        "//:gpr",
+        "//external:gflags",
+    ],
 )
 
 cc_library(
     name = "test_util",
     srcs = [
-#        "test/cpp/end2end/test_service_impl.cc",
+        #        "test/cpp/end2end/test_service_impl.cc",
         "byte_buffer_proto_helper.cc",
         "create_test_channel.cc",
         "string_ref_helper.cc",
@@ -58,6 +61,10 @@
         "subprocess.h",
         "test_credentials_provider.h",
     ],
-    deps = ["//test/core/util:gpr_test_util", "//:grpc++", "//test/core/end2end:ssl_test_data"],
     visibility = ["//test:__subpackages__"],
+    deps = [
+        "//:grpc++",
+        "//test/core/end2end:ssl_test_data",
+        "//test/core/util:gpr_test_util",
+    ],
 )
diff --git a/third_party/gflags b/third_party/gflags
index f8a0efe..30dbc81 160000
--- a/third_party/gflags
+++ b/third_party/gflags
@@ -1 +1 @@
-Subproject commit f8a0efe03aa69b3336d8e228b37d4ccb17324b88
+Subproject commit 30dbc81fb5ffdc98ea9b14b1918bfe4e8779b26e
diff --git a/tools/internal_ci/linux/grpc_interop.cfg b/tools/internal_ci/linux/grpc_interop_badserver_java.cfg
similarity index 94%
copy from tools/internal_ci/linux/grpc_interop.cfg
copy to tools/internal_ci/linux/grpc_interop_badserver_java.cfg
index 9259faf..e521b08 100644
--- a/tools/internal_ci/linux/grpc_interop.cfg
+++ b/tools/internal_ci/linux/grpc_interop_badserver_java.cfg
@@ -1,4 +1,3 @@
-#!/bin/bash
 # Copyright 2017, Google Inc.
 # All rights reserved.
 #
@@ -31,11 +30,11 @@
 # Config file for the internal CI (in protobuf text format)
 
 # Location of the continuous shell script in repository.
-build_file: "grpc/tools/internal_ci/linux/grpc_interop.sh"
+build_file: "grpc/tools/internal_ci/linux/grpc_interop_badserver_java.sh"
 # grpc_interop tests can take 6+ hours to complete.
 timeout_mins: 480
 action {
   define_artifacts {
-    regex: "**/sponge_log.xml"
+    regex: "**/report.xml"
   }
 }
diff --git a/tools/internal_ci/linux/grpc_interop.sh b/tools/internal_ci/linux/grpc_interop_badserver_java.sh
similarity index 85%
rename from tools/internal_ci/linux/grpc_interop.sh
rename to tools/internal_ci/linux/grpc_interop_badserver_java.sh
index 68bb419..0985e65 100755
--- a/tools/internal_ci/linux/grpc_interop.sh
+++ b/tools/internal_ci/linux/grpc_interop_badserver_java.sh
@@ -37,6 +37,5 @@
 
 git submodule update --init
 
-tools/run_tests/run_interop_tests.py -l all -s all --cloud_to_prod --cloud_to_prod_auth --use_docker --http2_interop -t -j 12 $@ || FAILED="true"
-tools/run_tests/run_interop_tests.py -l java --use_docker --http2_badserver_interop $@ || FAILED="true"
-tools/run_tests/run_interop_tests.py -l python --use_docker --http2_badserver_interop $@ || FAILED="true"
+tools/run_tests/run_interop_tests.py -l java --use_docker --http2_badserver_interop $@
+
diff --git a/tools/internal_ci/linux/grpc_interop.cfg b/tools/internal_ci/linux/grpc_interop_badserver_python.cfg
similarity index 94%
rename from tools/internal_ci/linux/grpc_interop.cfg
rename to tools/internal_ci/linux/grpc_interop_badserver_python.cfg
index 9259faf..940f760 100644
--- a/tools/internal_ci/linux/grpc_interop.cfg
+++ b/tools/internal_ci/linux/grpc_interop_badserver_python.cfg
@@ -1,4 +1,3 @@
-#!/bin/bash
 # Copyright 2017, Google Inc.
 # All rights reserved.
 #
@@ -31,11 +30,11 @@
 # Config file for the internal CI (in protobuf text format)
 
 # Location of the continuous shell script in repository.
-build_file: "grpc/tools/internal_ci/linux/grpc_interop.sh"
+build_file: "grpc/tools/internal_ci/linux/grpc_interop_badserver_python.sh"
 # grpc_interop tests can take 6+ hours to complete.
 timeout_mins: 480
 action {
   define_artifacts {
-    regex: "**/sponge_log.xml"
+    regex: "**/report.xml"
   }
 }
diff --git a/tools/internal_ci/linux/grpc_interop.sh b/tools/internal_ci/linux/grpc_interop_badserver_python.sh
similarity index 85%
copy from tools/internal_ci/linux/grpc_interop.sh
copy to tools/internal_ci/linux/grpc_interop_badserver_python.sh
index 68bb419..3fff537 100755
--- a/tools/internal_ci/linux/grpc_interop.sh
+++ b/tools/internal_ci/linux/grpc_interop_badserver_python.sh
@@ -37,6 +37,5 @@
 
 git submodule update --init
 
-tools/run_tests/run_interop_tests.py -l all -s all --cloud_to_prod --cloud_to_prod_auth --use_docker --http2_interop -t -j 12 $@ || FAILED="true"
-tools/run_tests/run_interop_tests.py -l java --use_docker --http2_badserver_interop $@ || FAILED="true"
-tools/run_tests/run_interop_tests.py -l python --use_docker --http2_badserver_interop $@ || FAILED="true"
+tools/run_tests/run_interop_tests.py -l python --use_docker --http2_badserver_interop $@
+
diff --git a/tools/internal_ci/linux/grpc_interop.cfg b/tools/internal_ci/linux/grpc_interop_tocloud.cfg
similarity index 94%
copy from tools/internal_ci/linux/grpc_interop.cfg
copy to tools/internal_ci/linux/grpc_interop_tocloud.cfg
index 9259faf..2b53644 100644
--- a/tools/internal_ci/linux/grpc_interop.cfg
+++ b/tools/internal_ci/linux/grpc_interop_tocloud.cfg
@@ -1,4 +1,3 @@
-#!/bin/bash
 # Copyright 2017, Google Inc.
 # All rights reserved.
 #
@@ -31,11 +30,11 @@
 # Config file for the internal CI (in protobuf text format)
 
 # Location of the continuous shell script in repository.
-build_file: "grpc/tools/internal_ci/linux/grpc_interop.sh"
+build_file: "grpc/tools/internal_ci/linux/grpc_interop_tocloud.sh"
 # grpc_interop tests can take 6+ hours to complete.
 timeout_mins: 480
 action {
   define_artifacts {
-    regex: "**/sponge_log.xml"
+    regex: "**/report.xml"
   }
 }
diff --git a/tools/internal_ci/linux/grpc_interop.sh b/tools/internal_ci/linux/grpc_interop_tocloud.sh
similarity index 82%
copy from tools/internal_ci/linux/grpc_interop.sh
copy to tools/internal_ci/linux/grpc_interop_tocloud.sh
index 68bb419..572001d 100755
--- a/tools/internal_ci/linux/grpc_interop.sh
+++ b/tools/internal_ci/linux/grpc_interop_tocloud.sh
@@ -37,6 +37,4 @@
 
 git submodule update --init
 
-tools/run_tests/run_interop_tests.py -l all -s all --cloud_to_prod --cloud_to_prod_auth --use_docker --http2_interop -t -j 12 $@ || FAILED="true"
-tools/run_tests/run_interop_tests.py -l java --use_docker --http2_badserver_interop $@ || FAILED="true"
-tools/run_tests/run_interop_tests.py -l python --use_docker --http2_badserver_interop $@ || FAILED="true"
+tools/run_tests/run_interop_tests.py -l all -s all --use_docker --http2_interop -t -j 12 $@
diff --git a/tools/internal_ci/linux/grpc_master.cfg b/tools/internal_ci/linux/grpc_master.cfg
index 8ce2ef1..d5901bb 100644
--- a/tools/internal_ci/linux/grpc_master.cfg
+++ b/tools/internal_ci/linux/grpc_master.cfg
@@ -32,7 +32,7 @@
 
 # Location of the continuous shell script in repository.
 build_file: "grpc/tools/internal_ci/linux/grpc_master.sh"
-timeout_mins: 60
+timeout_mins: 240
 action {
   define_artifacts {
     regex: "**/sponge_log.xml"
diff --git a/tools/run_tests/python_utils/start_port_server.py b/tools/run_tests/python_utils/start_port_server.py
index 8ee7080..4c9f6aa 100644
--- a/tools/run_tests/python_utils/start_port_server.py
+++ b/tools/run_tests/python_utils/start_port_server.py
@@ -30,101 +30,109 @@
 from __future__ import print_function
 
 from six.moves import urllib
-import os
-import subprocess
-import tempfile
-import sys
-import time
 import jobset
+import logging
+import os
 import socket
+import subprocess
+import sys
+import tempfile
+import time
+
 
 def start_port_server(port_server_port):
-  # check if a compatible port server is running
-  # if incompatible (version mismatch) ==> start a new one
-  # if not running ==> start a new one
-  # otherwise, leave it up
-  try:
-    version = int(urllib.request.urlopen(
-        'http://localhost:%d/version_number' % port_server_port,
-        timeout=10).read())
-    print('detected port server running version %d' % version)
-    running = True
-  except Exception as e:
-    print('failed to detect port server: %s' % sys.exc_info()[0])
-    print(e.strerror)
-    running = False
-  if running:
-    current_version = int(subprocess.check_output(
-        [sys.executable, os.path.abspath('tools/run_tests/python_utils/port_server.py'),
-         'dump_version']))
-    print('my port server is version %d' % current_version)
-    running = (version >= current_version)
+    # check if a compatible port server is running
+    # if incompatible (version mismatch) ==> start a new one
+    # if not running ==> start a new one
+    # otherwise, leave it up
+    try:
+        version = int(
+            urllib.request.urlopen(
+                'http://localhost:%d/version_number' % port_server_port,
+                timeout=10).read())
+        logging.info('detected port server running version %d', version)
+        running = True
+    except Exception as e:
+        logging.exception('failed to detect port server')
+        running = False
+    if running:
+        current_version = int(
+            subprocess.check_output([
+                sys.executable, os.path.abspath(
+                    'tools/run_tests/python_utils/port_server.py'),
+                'dump_version'
+            ]))
+        logging.info('my port server is version %d', current_version)
+        running = (version >= current_version)
+        if not running:
+            logging.info('port_server version mismatch: killing the old one')
+            urllib.request.urlopen('http://localhost:%d/quitquitquit' %
+                                   port_server_port).read()
+            time.sleep(1)
     if not running:
-      print('port_server version mismatch: killing the old one')
-      urllib.request.urlopen('http://localhost:%d/quitquitquit' % port_server_port).read()
-      time.sleep(1)
-  if not running:
-    fd, logfile = tempfile.mkstemp()
-    os.close(fd)
-    print('starting port_server, with log file %s' % logfile)
-    args = [sys.executable, os.path.abspath('tools/run_tests/python_utils/port_server.py'),
-            '-p', '%d' % port_server_port, '-l', logfile]
-    env = dict(os.environ)
-    env['BUILD_ID'] = 'pleaseDontKillMeJenkins'
-    if jobset.platform_string() == 'windows':
-      # Working directory of port server needs to be outside of Jenkins
-      # workspace to prevent file lock issues.
-      tempdir = tempfile.mkdtemp()
-      port_server = subprocess.Popen(
-          args,
-          env=env,
-          cwd=tempdir,
-          creationflags = 0x00000008, # detached process
-          close_fds=True)
-    else:
-      port_server = subprocess.Popen(
-          args,
-          env=env,
-          preexec_fn=os.setsid,
-          close_fds=True)
-    time.sleep(1)
-    # ensure port server is up
-    waits = 0
-    while True:
-      if waits > 10:
-        print('killing port server due to excessive start up waits')
-        port_server.kill()
-      if port_server.poll() is not None:
-        print('port_server failed to start')
-        # try one final time: maybe another build managed to start one
+        fd, logfile = tempfile.mkstemp()
+        os.close(fd)
+        logging.info('starting port_server, with log file %s', logfile)
+        args = [
+            sys.executable,
+            os.path.abspath('tools/run_tests/python_utils/port_server.py'),
+            '-p', '%d' % port_server_port, '-l', logfile
+        ]
+        env = dict(os.environ)
+        env['BUILD_ID'] = 'pleaseDontKillMeJenkins'
+        if jobset.platform_string() == 'windows':
+            # Working directory of port server needs to be outside of Jenkins
+            # workspace to prevent file lock issues.
+            tempdir = tempfile.mkdtemp()
+            port_server = subprocess.Popen(
+                args,
+                env=env,
+                cwd=tempdir,
+                creationflags=0x00000008,  # detached process
+                close_fds=True)
+        else:
+            port_server = subprocess.Popen(
+                args, env=env, preexec_fn=os.setsid, close_fds=True)
         time.sleep(1)
-        try:
-          urllib.request.urlopen('http://localhost:%d/get' % port_server_port,
-                          timeout=1).read()
-          print('last ditch attempt to contact port server succeeded')
-          break
-        except:
-          traceback.print_exc()
-          port_log = open(logfile, 'r').read()
-          print(port_log)
-          sys.exit(1)
-      try:
-        urllib.request.urlopen('http://localhost:%d/get' % port_server_port,
+        # ensure port server is up
+        waits = 0
+        while True:
+            if waits > 10:
+                logging.warning(
+                    'killing port server due to excessive start up waits')
+                port_server.kill()
+            if port_server.poll() is not None:
+                logging.error('port_server failed to start')
+                # try one final time: maybe another build managed to start one
+                time.sleep(1)
+                try:
+                    urllib.request.urlopen(
+                        'http://localhost:%d/get' % port_server_port,
                         timeout=1).read()
-        print('port server is up and ready')
-        break
-      except socket.timeout:
-        print('waiting for port_server: timeout')
-        traceback.print_exc();
-        time.sleep(1)
-        waits += 1
-      except urllib.error.URLError:
-        print('waiting for port_server: urlerror')
-        traceback.print_exc();
-        time.sleep(1)
-        waits += 1
-      except:
-        traceback.print_exc()
-        port_server.kill()
-        raise
-
+                    logging.info(
+                        'last ditch attempt to contact port server succeeded')
+                    break
+                except:
+                    logging.exception(
+                        'final attempt to contact port server failed')
+                    port_log = open(logfile, 'r').read()
+                    print(port_log)
+                    sys.exit(1)
+            try:
+                port_server_url = 'http://localhost:%d/get' % port_server_port
+                urllib.request.urlopen(port_server_url, timeout=1).read()
+                logging.info('port server is up and ready')
+                break
+            except socket.timeout:
+                logging.exception('while waiting for port_server')
+                time.sleep(1)
+                waits += 1
+            except urllib.error.URLError:
+                logging.exception('while waiting for port_server')
+                time.sleep(1)
+                waits += 1
+            except:
+                logging.exception('error while contacting port server at "%s".'
+                                  'Will try killing it.', port_server_url)
+                port_server.kill()
+                raise
diff --git a/tools/run_tests/run_tests.py b/tools/run_tests/run_tests.py
index cfc2b04..2d9eb29 100755
--- a/tools/run_tests/run_tests.py
+++ b/tools/run_tests/run_tests.py
@@ -38,6 +38,7 @@
 import glob
 import itertools
 import json
+import logging
 import multiprocessing
 import os
 import os.path
@@ -84,8 +85,8 @@
   try:
     subprocess.check_output(cmd, shell=True, env=env, cwd=cwd)
   except subprocess.CalledProcessError as e:
-    print("Error while running command '%s'. Exit status %d. Output:\n%s",
-          e.cmd, e.returncode, e.output)
+    logging.exception("Error while running command '%s'. Exit status %d. Output:\n%s",
+                       e.cmd, e.returncode, e.output)
     raise
 
 # SimpleConfig: just compile with CONFIG=config, and run the binary to test
diff --git a/tools/run_tests/sanity/check_submodules.sh b/tools/run_tests/sanity/check_submodules.sh
index 0b68319..cfe4e27 100755
--- a/tools/run_tests/sanity/check_submodules.sh
+++ b/tools/run_tests/sanity/check_submodules.sh
@@ -44,7 +44,7 @@
  44c25c892a6229b20db7cd9dc05584ea865896de third_party/benchmark (v0.1.0-343-g44c25c8)
  78684e5b222645828ca302e56b40b9daff2b2d27 third_party/boringssl (78684e5)
  886e7d75368e3f4fab3f4d0d3584e4abfc557755 third_party/boringssl-with-bazel (version_for_cocoapods_7.0-857-g886e7d7)
- f8a0efe03aa69b3336d8e228b37d4ccb17324b88 third_party/gflags (v2.2.0)
+ 30dbc81fb5ffdc98ea9b14b1918bfe4e8779b26e third_party/gflags (v2.2.0)
  c99458533a9b4c743ed51537e25989ea55944908 third_party/googletest (release-1.7.0)
  a428e42072765993ff674fda72863c9f1aa2d268 third_party/protobuf (v3.1.0-alpha-1)
  bcad91771b7f0bff28a1cac1981d7ef2b9bcef3c third_party/thrift (bcad917)