Merge pull request #13341 from kpayson64/fix_loon

Default to 1 cq per sync server
diff --git a/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc b/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc
index 01b243b..6675cac 100644
--- a/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc
+++ b/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc
@@ -175,6 +175,10 @@
   /* The RR instance related to the closure */
   grpc_lb_policy* rr_policy;
 
+  /* The grpclb instance that created the wrapping. This instance is not owned,
+   * reference counts are untouched. It's used only for logging purposes. */
+  grpc_lb_policy* glb_policy;
+
   /* heap memory to be freed upon closure execution. */
   void* free_when_done;
 } wrapped_rr_closure_arg;
@@ -199,10 +203,11 @@
                                       wc_arg->lb_token_mdelem_storage,
                                       GRPC_MDELEM_REF(wc_arg->lb_token));
       } else {
-        gpr_log(GPR_ERROR,
-                "No LB token for connected subchannel pick %p (from RR "
-                "instance %p).",
-                (void*)*wc_arg->target, (void*)wc_arg->rr_policy);
+        gpr_log(
+            GPR_ERROR,
+            "[grpclb %p] No LB token for connected subchannel pick %p (from RR "
+            "instance %p).",
+            wc_arg->glb_policy, *wc_arg->target, wc_arg->rr_policy);
         abort();
       }
       // Pass on client stats via context. Passes ownership of the reference.
@@ -213,7 +218,8 @@
       grpc_grpclb_client_stats_unref(wc_arg->client_stats);
     }
     if (GRPC_TRACER_ON(grpc_lb_glb_trace)) {
-      gpr_log(GPR_INFO, "Unreffing RR %p", (void*)wc_arg->rr_policy);
+      gpr_log(GPR_INFO, "[grpclb %p] Unreffing RR %p", wc_arg->glb_policy,
+              wc_arg->rr_policy);
     }
     GRPC_LB_POLICY_UNREF(exec_ctx, wc_arg->rr_policy, "wrapped_rr_closure");
   }
@@ -619,8 +625,10 @@
 
   if (GRPC_TRACER_ON(grpc_lb_glb_trace)) {
     gpr_log(
-        GPR_INFO, "Setting grpclb's state to %s from new RR policy %p state.",
-        grpc_connectivity_state_name(rr_state), (void*)glb_policy->rr_policy);
+        GPR_INFO,
+        "[grpclb %p] Setting grpclb's state to %s from new RR policy %p state.",
+        glb_policy, grpc_connectivity_state_name(rr_state),
+        glb_policy->rr_policy);
   }
   grpc_connectivity_state_set(exec_ctx, &glb_policy->state_tracker, rr_state,
                               rr_state_error,
@@ -647,8 +655,8 @@
     if (server->drop) {
       // Not using the RR policy, so unref it.
       if (GRPC_TRACER_ON(grpc_lb_glb_trace)) {
-        gpr_log(GPR_INFO, "Unreffing RR for drop (0x%" PRIxPTR ")",
-                (intptr_t)wc_arg->rr_policy);
+        gpr_log(GPR_INFO, "[grpclb %p] Unreffing RR %p for drop", glb_policy,
+                wc_arg->rr_policy);
       }
       GRPC_LB_POLICY_UNREF(exec_ctx, wc_arg->rr_policy, "glb_pick_sync");
       // Update client load reporting stats to indicate the number of
@@ -656,6 +664,7 @@
       // the client_load_reporting filter, because we do not create a
       // subchannel call (and therefore no client_load_reporting filter)
       // for dropped calls.
+      GPR_ASSERT(wc_arg->client_stats != NULL);
       grpc_grpclb_client_stats_add_call_dropped_locked(
           server->load_balance_token, wc_arg->client_stats);
       grpc_grpclb_client_stats_unref(wc_arg->client_stats);
@@ -676,8 +685,8 @@
   if (pick_done) {
     /* synchronous grpc_lb_policy_pick call. Unref the RR policy. */
     if (GRPC_TRACER_ON(grpc_lb_glb_trace)) {
-      gpr_log(GPR_INFO, "Unreffing RR (0x%" PRIxPTR ")",
-              (intptr_t)wc_arg->rr_policy);
+      gpr_log(GPR_INFO, "[grpclb %p] Unreffing RR %p", glb_policy,
+              wc_arg->rr_policy);
     }
     GRPC_LB_POLICY_UNREF(exec_ctx, wc_arg->rr_policy, "glb_pick_sync");
     /* add the load reporting initial metadata */
@@ -748,12 +757,13 @@
       grpc_lb_policy_create(exec_ctx, "round_robin", args);
   if (new_rr_policy == NULL) {
     gpr_log(GPR_ERROR,
-            "Failure creating a RoundRobin policy for serverlist update with "
-            "%lu entries. The previous RR instance (%p), if any, will continue "
-            "to be used. Future updates from the LB will attempt to create new "
+            "[grpclb %p] Failure creating a RoundRobin policy for serverlist "
+            "update with %" PRIuPTR
+            " entries. The previous RR instance (%p), if any, will continue to "
+            "be used. Future updates from the LB will attempt to create new "
             "instances.",
-            (unsigned long)glb_policy->serverlist->num_servers,
-            (void*)glb_policy->rr_policy);
+            glb_policy, glb_policy->serverlist->num_servers,
+            glb_policy->rr_policy);
     return;
   }
   glb_policy->rr_policy = new_rr_policy;
@@ -797,8 +807,9 @@
     pp->wrapped_on_complete_arg.client_stats =
         grpc_grpclb_client_stats_ref(glb_policy->client_stats);
     if (GRPC_TRACER_ON(grpc_lb_glb_trace)) {
-      gpr_log(GPR_INFO, "Pending pick about to (async) PICK from %p",
-              (void*)glb_policy->rr_policy);
+      gpr_log(GPR_INFO,
+              "[grpclb %p] Pending pick about to (async) PICK from RR %p",
+              glb_policy, glb_policy->rr_policy);
     }
     pick_from_internal_rr_locked(exec_ctx, glb_policy, &pp->pick_args,
                                  true /* force_async */, pp->target,
@@ -811,8 +822,8 @@
     GRPC_LB_POLICY_REF(glb_policy->rr_policy, "rr_handover_pending_ping");
     pping->wrapped_notify_arg.rr_policy = glb_policy->rr_policy;
     if (GRPC_TRACER_ON(grpc_lb_glb_trace)) {
-      gpr_log(GPR_INFO, "Pending ping about to PING from 0x%" PRIxPTR "",
-              (intptr_t)glb_policy->rr_policy);
+      gpr_log(GPR_INFO, "[grpclb %p] Pending ping about to PING from RR %p",
+              glb_policy, glb_policy->rr_policy);
     }
     grpc_lb_policy_ping_one_locked(exec_ctx, glb_policy->rr_policy,
                                    &pping->wrapped_notify_arg.wrapper_closure);
@@ -827,15 +838,15 @@
   GPR_ASSERT(args != NULL);
   if (glb_policy->rr_policy != NULL) {
     if (GRPC_TRACER_ON(grpc_lb_glb_trace)) {
-      gpr_log(GPR_DEBUG, "Updating Round Robin policy (%p)",
-              (void*)glb_policy->rr_policy);
+      gpr_log(GPR_DEBUG, "[grpclb %p] Updating RR policy %p", glb_policy,
+              glb_policy->rr_policy);
     }
     grpc_lb_policy_update_locked(exec_ctx, glb_policy->rr_policy, args);
   } else {
     create_rr_locked(exec_ctx, glb_policy, args);
     if (GRPC_TRACER_ON(grpc_lb_glb_trace)) {
-      gpr_log(GPR_DEBUG, "Created new Round Robin policy (%p)",
-              (void*)glb_policy->rr_policy);
+      gpr_log(GPR_DEBUG, "[grpclb %p] Created new RR policy %p", glb_policy,
+              glb_policy->rr_policy);
     }
   }
   lb_policy_args_destroy(exec_ctx, args);
@@ -1177,8 +1188,8 @@
     if (rr_connectivity_state == GRPC_CHANNEL_SHUTDOWN) {
       if (GRPC_TRACER_ON(grpc_lb_glb_trace)) {
         gpr_log(GPR_INFO,
-                "grpclb %p NOT picking from from RR %p: RR conn state=%s",
-                (void*)glb_policy, (void*)glb_policy->rr_policy,
+                "[grpclb %p] NOT picking from from RR %p: RR conn state=%s",
+                glb_policy, glb_policy->rr_policy,
                 grpc_connectivity_state_name(rr_connectivity_state));
       }
       add_pending_pick(&glb_policy->pending_picks, pick_args, target, context,
@@ -1186,8 +1197,8 @@
       pick_done = false;
     } else {  // RR not in shutdown
       if (GRPC_TRACER_ON(grpc_lb_glb_trace)) {
-        gpr_log(GPR_INFO, "grpclb %p about to PICK from RR %p",
-                (void*)glb_policy, (void*)glb_policy->rr_policy);
+        gpr_log(GPR_INFO, "[grpclb %p] about to PICK from RR %p", glb_policy,
+                glb_policy->rr_policy);
       }
       GRPC_LB_POLICY_REF(glb_policy->rr_policy, "glb_pick");
       wrapped_rr_closure_arg* wc_arg =
@@ -1204,6 +1215,7 @@
       wc_arg->lb_token_mdelem_storage = pick_args->lb_token_mdelem_storage;
       wc_arg->initial_metadata = pick_args->initial_metadata;
       wc_arg->free_when_done = wc_arg;
+      wc_arg->glb_policy = pol;
       pick_done =
           pick_from_internal_rr_locked(exec_ctx, glb_policy, pick_args,
                                        false /* force_async */, target, wc_arg);
@@ -1211,9 +1223,8 @@
   } else {  // glb_policy->rr_policy == NULL
     if (GRPC_TRACER_ON(grpc_lb_glb_trace)) {
       gpr_log(GPR_DEBUG,
-              "No RR policy in grpclb instance %p. Adding to grpclb's pending "
-              "picks",
-              (void*)(glb_policy));
+              "[grpclb %p] No RR policy. Adding to grpclb's pending picks",
+              glb_policy);
     }
     add_pending_pick(&glb_policy->pending_picks, pick_args, target, context,
                      on_complete);
@@ -1259,12 +1270,11 @@
                                           grpc_error* error) {
   glb_lb_policy* glb_policy = (glb_lb_policy*)arg;
   glb_policy->retry_timer_active = false;
-  if (!glb_policy->shutting_down && error == GRPC_ERROR_NONE) {
+  if (!glb_policy->shutting_down && glb_policy->lb_call == NULL &&
+      error == GRPC_ERROR_NONE) {
     if (GRPC_TRACER_ON(grpc_lb_glb_trace)) {
-      gpr_log(GPR_INFO, "Restaring call to LB server (grpclb %p)",
-              (void*)glb_policy);
+      gpr_log(GPR_INFO, "[grpclb %p] Restarting call to LB server", glb_policy);
     }
-    GPR_ASSERT(glb_policy->lb_call == NULL);
     query_for_backends_locked(exec_ctx, glb_policy);
   }
   GRPC_LB_POLICY_WEAK_UNREF(exec_ctx, &glb_policy->base, "grpclb_retry_timer");
@@ -1284,14 +1294,16 @@
         grpc_backoff_step(exec_ctx, &glb_policy->lb_call_backoff_state)
             .next_attempt_start_time;
     if (GRPC_TRACER_ON(grpc_lb_glb_trace)) {
-      gpr_log(GPR_DEBUG, "Connection to LB server lost (grpclb: %p)...",
-              (void*)glb_policy);
+      gpr_log(GPR_DEBUG, "[grpclb %p] Connection to LB server lost...",
+              glb_policy);
       grpc_millis timeout = next_try - grpc_exec_ctx_now(exec_ctx);
       if (timeout > 0) {
-        gpr_log(GPR_DEBUG, "... retry_timer_active in %" PRIdPTR "ms.",
-                timeout);
+        gpr_log(GPR_DEBUG,
+                "[grpclb %p] ... retry_timer_active in %" PRIuPTR "ms.",
+                glb_policy, timeout);
       } else {
-        gpr_log(GPR_DEBUG, "... retry_timer_active immediately.");
+        gpr_log(GPR_DEBUG, "[grpclb %p] ... retry_timer_active immediately.",
+                glb_policy);
       }
     }
     GRPC_LB_POLICY_WEAK_REF(&glb_policy->base, "grpclb_retry_timer");
@@ -1392,7 +1404,7 @@
       exec_ctx, glb_policy->lb_call, &op, 1,
       &glb_policy->client_load_report_closure);
   if (call_error != GRPC_CALL_OK) {
-    gpr_log(GPR_ERROR, "call_error=%d", call_error);
+    gpr_log(GPR_ERROR, "[grpclb %p] call_error=%d", glb_policy, call_error);
     GPR_ASSERT(GRPC_CALL_OK == call_error);
   }
 }
@@ -1486,9 +1498,8 @@
 
   if (GRPC_TRACER_ON(grpc_lb_glb_trace)) {
     gpr_log(GPR_INFO,
-            "Query for backends (grpclb: %p, lb_channel: %p, lb_call: %p)",
-            (void*)glb_policy, (void*)glb_policy->lb_channel,
-            (void*)glb_policy->lb_call);
+            "[grpclb %p] Query for backends (lb_channel: %p, lb_call: %p)",
+            glb_policy, glb_policy->lb_channel, glb_policy->lb_call);
   }
   GPR_ASSERT(glb_policy->lb_call != NULL);
 
@@ -1578,9 +1589,9 @@
                                 &response->client_stats_report_interval));
         if (GRPC_TRACER_ON(grpc_lb_glb_trace)) {
           gpr_log(GPR_INFO,
-                  "received initial LB response message; "
+                  "[grpclb %p] Received initial LB response message; "
                   "client load reporting interval = %" PRIdPTR " milliseconds",
-                  glb_policy->client_stats_report_interval);
+                  glb_policy, glb_policy->client_stats_report_interval);
         }
         /* take a weak ref (won't prevent calling of \a glb_shutdown() if the
          * strong ref count goes to zero) to be unref'd in
@@ -1590,8 +1601,9 @@
         schedule_next_client_load_report(exec_ctx, glb_policy);
       } else if (GRPC_TRACER_ON(grpc_lb_glb_trace)) {
         gpr_log(GPR_INFO,
-                "received initial LB response message; "
-                "client load reporting NOT enabled");
+                "[grpclb %p] Received initial LB response message; client load "
+                "reporting NOT enabled",
+                glb_policy);
       }
       grpc_grpclb_initial_response_destroy(response);
       glb_policy->seen_initial_response = true;
@@ -1601,14 +1613,16 @@
       if (serverlist != NULL) {
         GPR_ASSERT(glb_policy->lb_call != NULL);
         if (GRPC_TRACER_ON(grpc_lb_glb_trace)) {
-          gpr_log(GPR_INFO, "Serverlist with %lu servers received",
-                  (unsigned long)serverlist->num_servers);
+          gpr_log(GPR_INFO,
+                  "[grpclb %p] Serverlist with %" PRIuPTR " servers received",
+                  glb_policy, serverlist->num_servers);
           for (size_t i = 0; i < serverlist->num_servers; ++i) {
             grpc_resolved_address addr;
             parse_server(serverlist->servers[i], &addr);
             char* ipport;
             grpc_sockaddr_to_string(&ipport, &addr, false);
-            gpr_log(GPR_INFO, "Serverlist[%lu]: %s", (unsigned long)i, ipport);
+            gpr_log(GPR_INFO, "[grpclb %p] Serverlist[%" PRIuPTR "]: %s",
+                    glb_policy, i, ipport);
             gpr_free(ipport);
           }
         }
@@ -1618,7 +1632,9 @@
                                             serverlist)) {
             if (GRPC_TRACER_ON(grpc_lb_glb_trace)) {
               gpr_log(GPR_INFO,
-                      "Incoming server list identical to current, ignoring.");
+                      "[grpclb %p] Incoming server list identical to current, "
+                      "ignoring.",
+                      glb_policy);
             }
             grpc_grpclb_destroy_serverlist(serverlist);
           } else { /* new serverlist */
@@ -1644,12 +1660,16 @@
           }
         } else {
           if (GRPC_TRACER_ON(grpc_lb_glb_trace)) {
-            gpr_log(GPR_INFO, "Received empty server list, ignoring.");
+            gpr_log(GPR_INFO,
+                    "[grpclb %p] Received empty server list, ignoring.",
+                    glb_policy);
           }
           grpc_grpclb_destroy_serverlist(serverlist);
         }
       } else { /* serverlist == NULL */
-        gpr_log(GPR_ERROR, "Invalid LB response received: '%s'. Ignoring.",
+        gpr_log(GPR_ERROR,
+                "[grpclb %p] Invalid LB response received: '%s'. Ignoring.",
+                glb_policy,
                 grpc_dump_slice(response_slice, GPR_DUMP_ASCII | GPR_DUMP_HEX));
       }
     }
@@ -1689,8 +1709,8 @@
     if (!glb_policy->shutting_down && error == GRPC_ERROR_NONE) {
       if (GRPC_TRACER_ON(grpc_lb_glb_trace)) {
         gpr_log(GPR_INFO,
-                "Falling back to use backends from resolver (grpclb %p)",
-                (void*)glb_policy);
+                "[grpclb %p] Falling back to use backends from resolver",
+                glb_policy);
       }
       GPR_ASSERT(glb_policy->fallback_backend_addresses != NULL);
       rr_handover_locked(exec_ctx, glb_policy);
@@ -1708,10 +1728,10 @@
     char* status_details =
         grpc_slice_to_c_string(glb_policy->lb_call_status_details);
     gpr_log(GPR_INFO,
-            "Status from LB server received. Status = %d, Details = '%s', "
-            "(call: %p), error %p",
-            glb_policy->lb_call_status, status_details,
-            (void*)glb_policy->lb_call, (void*)error);
+            "[grpclb %p] Status from LB server received. Status = %d, Details "
+            "= '%s', (call: %p), error '%s'",
+            glb_policy, glb_policy->lb_call_status, status_details,
+            glb_policy->lb_call, grpc_error_string(error));
     gpr_free(status_details);
   }
   /* We need to perform cleanups no matter what. */
@@ -1752,10 +1772,10 @@
           "glb_update_missing");
     } else {
       // otherwise, keep using the current LB channel (ignore this update).
-      gpr_log(GPR_ERROR,
-              "No valid LB addresses channel arg for grpclb %p update, "
-              "ignoring.",
-              (void*)glb_policy);
+      gpr_log(
+          GPR_ERROR,
+          "[grpclb %p] No valid LB addresses channel arg in update, ignoring.",
+          glb_policy);
     }
     return;
   }
@@ -1887,8 +1907,9 @@
   glb_policy->server_name =
       gpr_strdup(uri->path[0] == '/' ? uri->path + 1 : uri->path);
   if (GRPC_TRACER_ON(grpc_lb_glb_trace)) {
-    gpr_log(GPR_INFO, "Will use '%s' as the server name for LB request.",
-            glb_policy->server_name);
+    gpr_log(GPR_INFO,
+            "[grpclb %p] Will use '%s' as the server name for LB request.",
+            glb_policy, glb_policy->server_name);
   }
   grpc_uri_destroy(uri);
 
diff --git a/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc b/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc
index 125a418..c79ee56 100644
--- a/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc
+++ b/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc
@@ -440,128 +440,111 @@
   //    for a subchannel in p->latest_pending_subchannel_list.  The
   //    goal here is to find a subchannel from the update that we can
   //    select in place of the current one.
-  if (sd->curr_connectivity_state == GRPC_CHANNEL_TRANSIENT_FAILURE ||
-      sd->curr_connectivity_state == GRPC_CHANNEL_SHUTDOWN) {
-    grpc_lb_subchannel_data_stop_connectivity_watch(exec_ctx, sd);
-  }
-  while (true) {
-    switch (sd->curr_connectivity_state) {
-      case GRPC_CHANNEL_READY: {
-        // Case 2.  Promote p->latest_pending_subchannel_list to
-        // p->subchannel_list.
-        if (sd->subchannel_list == p->latest_pending_subchannel_list) {
-          GPR_ASSERT(p->subchannel_list != NULL);
-          grpc_lb_subchannel_list_shutdown_and_unref(
-              exec_ctx, p->subchannel_list, "finish_update");
-          p->subchannel_list = p->latest_pending_subchannel_list;
-          p->latest_pending_subchannel_list = NULL;
-        }
-        // Cases 1 and 2.
-        grpc_connectivity_state_set(exec_ctx, &p->state_tracker,
-                                    GRPC_CHANNEL_READY, GRPC_ERROR_NONE,
-                                    "connecting_ready");
-        sd->connected_subchannel = GRPC_CONNECTED_SUBCHANNEL_REF(
-            grpc_subchannel_get_connected_subchannel(sd->subchannel),
-            "connected");
-        p->selected = sd;
+  switch (sd->curr_connectivity_state) {
+    case GRPC_CHANNEL_READY: {
+      // Case 2.  Promote p->latest_pending_subchannel_list to
+      // p->subchannel_list.
+      if (sd->subchannel_list == p->latest_pending_subchannel_list) {
+        GPR_ASSERT(p->subchannel_list != NULL);
+        grpc_lb_subchannel_list_shutdown_and_unref(exec_ctx, p->subchannel_list,
+                                                   "finish_update");
+        p->subchannel_list = p->latest_pending_subchannel_list;
+        p->latest_pending_subchannel_list = NULL;
+      }
+      // Cases 1 and 2.
+      grpc_connectivity_state_set(exec_ctx, &p->state_tracker,
+                                  GRPC_CHANNEL_READY, GRPC_ERROR_NONE,
+                                  "connecting_ready");
+      sd->connected_subchannel = GRPC_CONNECTED_SUBCHANNEL_REF(
+          grpc_subchannel_get_connected_subchannel(sd->subchannel),
+          "connected");
+      p->selected = sd;
+      if (GRPC_TRACER_ON(grpc_lb_pick_first_trace)) {
+        gpr_log(GPR_INFO, "Pick First %p selected subchannel %p", (void*)p,
+                (void*)sd->subchannel);
+      }
+      // Drop all other subchannels, since we are now connected.
+      destroy_unselected_subchannels_locked(exec_ctx, p);
+      // Update any calls that were waiting for a pick.
+      pending_pick* pp;
+      while ((pp = p->pending_picks)) {
+        p->pending_picks = pp->next;
+        *pp->target = GRPC_CONNECTED_SUBCHANNEL_REF(
+            p->selected->connected_subchannel, "picked");
         if (GRPC_TRACER_ON(grpc_lb_pick_first_trace)) {
-          gpr_log(GPR_INFO, "Pick First %p selected subchannel %p", (void*)p,
-                  (void*)sd->subchannel);
+          gpr_log(GPR_INFO,
+                  "Servicing pending pick with selected subchannel %p",
+                  (void*)p->selected);
         }
-        // Drop all other subchannels, since we are now connected.
-        destroy_unselected_subchannels_locked(exec_ctx, p);
-        // Update any calls that were waiting for a pick.
-        pending_pick* pp;
-        while ((pp = p->pending_picks)) {
-          p->pending_picks = pp->next;
-          *pp->target = GRPC_CONNECTED_SUBCHANNEL_REF(
-              p->selected->connected_subchannel, "picked");
-          if (GRPC_TRACER_ON(grpc_lb_pick_first_trace)) {
-            gpr_log(GPR_INFO,
-                    "Servicing pending pick with selected subchannel %p",
-                    (void*)p->selected);
-          }
-          GRPC_CLOSURE_SCHED(exec_ctx, pp->on_complete, GRPC_ERROR_NONE);
-          gpr_free(pp);
-        }
-        // Renew notification.
-        grpc_lb_subchannel_data_start_connectivity_watch(exec_ctx, sd);
-        return;
+        GRPC_CLOSURE_SCHED(exec_ctx, pp->on_complete, GRPC_ERROR_NONE);
+        gpr_free(pp);
       }
-      case GRPC_CHANNEL_TRANSIENT_FAILURE: {
-        do {
-          sd->subchannel_list->checking_subchannel =
-              (sd->subchannel_list->checking_subchannel + 1) %
-              sd->subchannel_list->num_subchannels;
-          sd = &sd->subchannel_list
-                    ->subchannels[sd->subchannel_list->checking_subchannel];
-        } while (sd->subchannel == NULL);
-        // Case 1: Only set state to TRANSIENT_FAILURE if we've tried
-        // all subchannels.
-        if (sd->subchannel_list->checking_subchannel == 0 &&
-            sd->subchannel_list == p->subchannel_list) {
-          grpc_connectivity_state_set(
-              exec_ctx, &p->state_tracker, GRPC_CHANNEL_TRANSIENT_FAILURE,
-              GRPC_ERROR_REF(error), "connecting_transient_failure");
-        }
-        sd->curr_connectivity_state =
-            grpc_subchannel_check_connectivity(sd->subchannel, &error);
-        GRPC_ERROR_UNREF(error);
-        if (sd->curr_connectivity_state == GRPC_CHANNEL_TRANSIENT_FAILURE) {
-          // Reuses the connectivity refs from the previous watch.
-          grpc_lb_subchannel_data_start_connectivity_watch(exec_ctx, sd);
-          return;
-        }
-        break;  // Go back to top of loop.
+      // Renew notification.
+      grpc_lb_subchannel_data_start_connectivity_watch(exec_ctx, sd);
+      break;
+    }
+    case GRPC_CHANNEL_TRANSIENT_FAILURE: {
+      grpc_lb_subchannel_data_stop_connectivity_watch(exec_ctx, sd);
+      do {
+        sd->subchannel_list->checking_subchannel =
+            (sd->subchannel_list->checking_subchannel + 1) %
+            sd->subchannel_list->num_subchannels;
+        sd = &sd->subchannel_list
+                  ->subchannels[sd->subchannel_list->checking_subchannel];
+      } while (sd->subchannel == NULL);
+      // Case 1: Only set state to TRANSIENT_FAILURE if we've tried
+      // all subchannels.
+      if (sd->subchannel_list->checking_subchannel == 0 &&
+          sd->subchannel_list == p->subchannel_list) {
+        grpc_connectivity_state_set(
+            exec_ctx, &p->state_tracker, GRPC_CHANNEL_TRANSIENT_FAILURE,
+            GRPC_ERROR_REF(error), "connecting_transient_failure");
       }
-      case GRPC_CHANNEL_CONNECTING:
-      case GRPC_CHANNEL_IDLE: {
-        // Only update connectivity state in case 1.
-        if (sd->subchannel_list == p->subchannel_list) {
-          grpc_connectivity_state_set(
-              exec_ctx, &p->state_tracker, GRPC_CHANNEL_CONNECTING,
-              GRPC_ERROR_REF(error), "connecting_changed");
-        }
-        // Renew notification.
-        grpc_lb_subchannel_data_start_connectivity_watch(exec_ctx, sd);
-        return;
+      // Reuses the connectivity refs from the previous watch.
+      grpc_lb_subchannel_data_start_connectivity_watch(exec_ctx, sd);
+      break;
+    }
+    case GRPC_CHANNEL_CONNECTING:
+    case GRPC_CHANNEL_IDLE: {
+      // Only update connectivity state in case 1.
+      if (sd->subchannel_list == p->subchannel_list) {
+        grpc_connectivity_state_set(
+            exec_ctx, &p->state_tracker, GRPC_CHANNEL_CONNECTING,
+            GRPC_ERROR_REF(error), "connecting_changed");
       }
-      case GRPC_CHANNEL_SHUTDOWN: {
-        grpc_lb_subchannel_data_unref_subchannel(exec_ctx, sd,
-                                                 "pf_candidate_shutdown");
-        // Advance to next subchannel and check its state.
-        grpc_lb_subchannel_data* original_sd = sd;
-        do {
-          sd->subchannel_list->checking_subchannel =
-              (sd->subchannel_list->checking_subchannel + 1) %
-              sd->subchannel_list->num_subchannels;
-          sd = &sd->subchannel_list
-                    ->subchannels[sd->subchannel_list->checking_subchannel];
-        } while (sd->subchannel == NULL && sd != original_sd);
-        if (sd == original_sd) {
-          grpc_lb_subchannel_list_unref_for_connectivity_watch(
-              exec_ctx, sd->subchannel_list, "pf_candidate_shutdown");
-          shutdown_locked(exec_ctx, p,
-                          GRPC_ERROR_CREATE_REFERENCING_FROM_STATIC_STRING(
-                              "Pick first exhausted channels", &error, 1));
-          return;
-        }
-        if (sd->subchannel_list == p->subchannel_list) {
-          grpc_connectivity_state_set(
-              exec_ctx, &p->state_tracker, GRPC_CHANNEL_TRANSIENT_FAILURE,
-              GRPC_ERROR_REF(error), "subchannel_failed");
-        }
-        sd->curr_connectivity_state =
-            grpc_subchannel_check_connectivity(sd->subchannel, &error);
-        GRPC_ERROR_UNREF(error);
-        if (sd->curr_connectivity_state == GRPC_CHANNEL_TRANSIENT_FAILURE) {
-          // Reuses the connectivity refs from the previous watch.
-          grpc_lb_subchannel_data_start_connectivity_watch(exec_ctx, sd);
-          return;
-        }
-        // For any other state, go back to top of loop.
-        // We will reuse the connectivity refs from the previous watch.
+      // Renew notification.
+      grpc_lb_subchannel_data_start_connectivity_watch(exec_ctx, sd);
+      break;
+    }
+    case GRPC_CHANNEL_SHUTDOWN: {
+      grpc_lb_subchannel_data_stop_connectivity_watch(exec_ctx, sd);
+      grpc_lb_subchannel_data_unref_subchannel(exec_ctx, sd,
+                                               "pf_candidate_shutdown");
+      // Advance to next subchannel and check its state.
+      grpc_lb_subchannel_data* original_sd = sd;
+      do {
+        sd->subchannel_list->checking_subchannel =
+            (sd->subchannel_list->checking_subchannel + 1) %
+            sd->subchannel_list->num_subchannels;
+        sd = &sd->subchannel_list
+                  ->subchannels[sd->subchannel_list->checking_subchannel];
+      } while (sd->subchannel == NULL && sd != original_sd);
+      if (sd == original_sd) {
+        grpc_lb_subchannel_list_unref_for_connectivity_watch(
+            exec_ctx, sd->subchannel_list, "pf_candidate_shutdown");
+        shutdown_locked(exec_ctx, p,
+                        GRPC_ERROR_CREATE_REFERENCING_FROM_STATIC_STRING(
+                            "Pick first exhausted channels", &error, 1));
+        break;
       }
+      if (sd->subchannel_list == p->subchannel_list) {
+        grpc_connectivity_state_set(exec_ctx, &p->state_tracker,
+                                    GRPC_CHANNEL_TRANSIENT_FAILURE,
+                                    GRPC_ERROR_REF(error), "subchannel_failed");
+      }
+      // Reuses the connectivity refs from the previous watch.
+      grpc_lb_subchannel_data_start_connectivity_watch(exec_ctx, sd);
+      break;
     }
   }
 }
diff --git a/tools/interop_matrix/client_matrix.py b/tools/interop_matrix/client_matrix.py
index 51f04ea..6bd7ff6 100644
--- a/tools/interop_matrix/client_matrix.py
+++ b/tools/interop_matrix/client_matrix.py
@@ -27,7 +27,7 @@
     'cxx': ['cxx'],             # This is actually debian8.
     'go': ['go1.7', 'go1.8'],
     'java': ['java_oracle8'],
-    'python': ['python'],
+    #'python': ['python'],  # All python versions fail the tests due to timeout.
     'node': ['node'],
     'ruby': ['ruby'],
     'php': ['php', 'php7'],
@@ -64,14 +64,14 @@
         'v1.6.1',
         'v1.7.0',
     ],
-    'python': [
+    #'python': [
         #'v1.0.x',  #Fail to run the test. #13230.
-        'v1.1.4',
-        'v1.2.5',
-        'v1.3.9',
-        'v1.4.2',
-        'v1.6.6',
-    ],
+    #    'v1.1.4',
+    #    'v1.2.5',
+    #    'v1.3.9',
+    #    'v1.4.2',
+    #    'v1.6.6',
+    #],
     'node': [
         'v1.0.1',
         'v1.1.4',
@@ -97,11 +97,10 @@
         'v1.6.6',
     ],
    'csharp': [
-        # Fail to build images due to #13278.
         #'v1.0.1',
-        #'v1.1.4',
-        #'v1.2.5',
-        #'v1.3.9',
+        #'v1.1.4',  Fail to build.
+        #'v1.2.5',  Fail to run test with csharp image.
+        'v1.3.9',
         'v1.4.2',
         'v1.6.6',
     ],
diff --git a/tools/interop_matrix/run_interop_matrix_tests.py b/tools/interop_matrix/run_interop_matrix_tests.py
index 491438a..dce1033 100755
--- a/tools/interop_matrix/run_interop_matrix_tests.py
+++ b/tools/interop_matrix/run_interop_matrix_tests.py
@@ -83,6 +83,8 @@
 
 args = argp.parse_args()
 
+print(str(args))
+
 
 def find_all_images_for_lang(lang):
   """Find docker images for a language across releases and runtimes.
@@ -170,7 +172,6 @@
     jobset.message('START', 'Testing %s' % image, do_newline=True)
     # Download the docker image before running each test case.
     subprocess.check_call(['gcloud', 'docker', '--', 'pull', image])
-    _docker_images_cleanup.append(image)
     suite_name = '%s__%s_%s' % (lang, runtime, release)
     job_spec_list = find_test_cases(lang, runtime, release, suite_name)
     
@@ -197,17 +198,17 @@
         'grpc_interop_matrix',
         suite_name,
         str(uuid.uuid4()))
+
+    if not args.keep:
+      cleanup(image)
   
   return total_num_failures
 
 
-_docker_images_cleanup = []
-def cleanup():
-  if not args.keep:
-    for image in _docker_images_cleanup:
-      dockerjob.remove_image(image, skip_nonexistent=True)
+def cleanup(image):
+  jobset.message('START', 'Cleanup docker image %s' % image, do_newline=True)
+  dockerjob.remove_image(image, skip_nonexistent=True)
 
-atexit.register(cleanup)
 
 languages = args.language if args.language != ['all'] else _LANGUAGES
 total_num_failures = 0
diff --git a/tools/jenkins/run_full_performance.sh b/tools/jenkins/run_full_performance.sh
index 195cc68..8657cc9 100755
--- a/tools/jenkins/run_full_performance.sh
+++ b/tools/jenkins/run_full_performance.sh
@@ -21,7 +21,7 @@
 
 # run 8core client vs 8core server
 tools/run_tests/run_performance_tests.py \
-    -l c++ csharp node ruby java python go node_express php7 php7_protobuf_c \
+    -l c++ csharp ruby java python go php7 php7_protobuf_c \
     --netperf \
     --category scalable \
     --bq_result_table performance_test.performance_experiment \