blob: 5006e7e3f0addec3536714ec58455d520fe95f74 [file] [log] [blame]
/*
*
* Copyright 2016, Google Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following disclaimer
* in the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Google Inc. nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
/** Implementation of the gRPC LB policy.
*
* This policy takes as input a set of resolved addresses {a1..an} for which the
* LB set was set (it's the resolver's responsibility to ensure this). That is
* to say, {a1..an} represent a collection of LB servers.
*
* An internal channel (\a glb_lb_policy.lb_channel) is created over {a1..an}.
* This channel behaves just like a regular channel. In particular, the
* constructed URI over the addresses a1..an will use the default pick first
* policy to select from this list of LB server backends.
*
* The first time the policy gets a request for a pick, a ping, or to exit the
* idle state, \a query_for_backends() is called. It creates an instance of \a
* lb_client_data, an internal struct meant to contain the data associated with
* the internal communication with the LB server. This instance is created via
* \a lb_client_data_create(). There, the call over lb_channel to pick-first
* from {a1..an} is created, the \a LoadBalancingRequest message is assembled
* and all necessary callbacks for the progress of the internal call configured.
*
* Back in \a query_for_backends(), the internal *streaming* call to the LB
* server (whichever address from {a1..an} pick-first chose) is kicked off.
* It'll progress over the callbacks configured in \a lb_client_data_create()
* (see the field docstrings of \a lb_client_data for more details).
*
* If the call fails with UNIMPLEMENTED, the original call will also fail.
* There's a misconfiguration somewhere: at least one of {a1..an} isn't a LB
* server, which contradicts the LB bit being set. If the internal call times
* out, the usual behavior of pick-first applies, continuing to pick from the
* list {a1..an}.
*
* Upon sucesss, a \a LoadBalancingResponse is expected in \a res_recv_cb. An
* invalid one results in the termination of the streaming call. A new streaming
* call should be created if possible, failing the original call otherwise.
* For a valid \a LoadBalancingResponse, the server list of actual backends is
* extracted. A Round Robin policy will be created from this list. There are two
* possible scenarios:
*
* 1. This is the first server list received. There was no previous instance of
* the Round Robin policy. \a rr_handover() will instantiate the RR policy
* and perform all the pending operations over it.
* 2. There's already a RR policy instance active. We need to introduce the new
* one build from the new serverlist, but taking care not to disrupt the
* operations in progress over the old RR instance. This is done by
* decreasing the reference count on the old policy. The moment no more
* references are held on the old RR policy, it'll be destroyed and \a
* rr_connectivity_changed notified with a \a GRPC_CHANNEL_SHUTDOWN state.
* At this point we can transition to a new RR instance safely, which is done
* once again via \a rr_handover().
*
*
* Once a RR policy instance is in place (and getting updated as described),
* calls to for a pick, a ping or a cancellation will be serviced right away by
* forwarding them to the RR instance. Any time there's no RR policy available
* (ie, right after the creation of the gRPCLB policy, if an empty serverlist
* is received, etc), pick/ping requests are added to a list of pending
* picks/pings to be flushed and serviced as part of \a rr_handover() the moment
* the RR policy instance becomes available.
*
* \see https://github.com/grpc/grpc/blob/master/doc/load-balancing.md for the
* high level design and details. */
/* TODO(dgq):
* - Implement LB service forwarding (point 2c. in the doc's diagram).
*/
#include <string.h>
#include <grpc/byte_buffer_reader.h>
#include <grpc/grpc.h>
#include <grpc/support/alloc.h>
#include <grpc/support/host_port.h>
#include <grpc/support/string_util.h>
#include "src/core/ext/client_channel/client_channel_factory.h"
#include "src/core/ext/client_channel/lb_policy_registry.h"
#include "src/core/ext/client_channel/parse_address.h"
#include "src/core/ext/lb_policy/grpclb/grpclb.h"
#include "src/core/ext/lb_policy/grpclb/load_balancer_api.h"
#include "src/core/lib/iomgr/sockaddr_utils.h"
#include "src/core/lib/support/string.h"
#include "src/core/lib/surface/call.h"
#include "src/core/lib/surface/channel.h"
int grpc_lb_glb_trace = 0;
typedef struct wrapped_rr_closure_arg {
/* the original closure. Usually a on_complete/notify cb for pick() and ping()
* calls against the internal RR instance, respectively. */
grpc_closure *wrapped_closure;
/* The RR instance related to the closure */
grpc_lb_policy *rr_policy;
/* when not NULL, represents a pending_{pick,ping} node to be freed upon
* closure execution */
void *owning_pending_node; /* to be freed if not NULL */
} wrapped_rr_closure_arg;
/* The \a on_complete closure passed as part of the pick requires keeping a
* reference to its associated round robin instance. We wrap this closure in
* order to unref the round robin instance upon its invocation */
static void wrapped_rr_closure(grpc_exec_ctx *exec_ctx, void *arg,
grpc_error *error) {
wrapped_rr_closure_arg *wc_arg = arg;
if (wc_arg->rr_policy != NULL) {
if (grpc_lb_glb_trace) {
gpr_log(GPR_INFO, "Unreffing RR (0x%" PRIxPTR ")",
(intptr_t)wc_arg->rr_policy);
}
GRPC_LB_POLICY_UNREF(exec_ctx, wc_arg->rr_policy, "wrapped_rr_closure");
}
GPR_ASSERT(wc_arg->wrapped_closure != NULL);
grpc_exec_ctx_sched(exec_ctx, wc_arg->wrapped_closure, error, NULL);
gpr_free(wc_arg->owning_pending_node);
}
/* Linked list of pending pick requests. It stores all information needed to
* eventually call (Round Robin's) pick() on them. They mainly stay pending
* waiting for the RR policy to be created/updated.
*
* One particularity is the wrapping of the user-provided \a on_complete closure
* (in \a wrapped_on_complete and \a wrapped_on_complete_arg). This is needed in
* order to correctly unref the RR policy instance upon completion of the pick.
* See \a wrapped_rr_closure for details. */
typedef struct pending_pick {
struct pending_pick *next;
/* polling entity for the pick()'s async notification */
grpc_polling_entity *pollent;
/* the initial metadata for the pick. See grpc_lb_policy_pick() */
grpc_metadata_batch *initial_metadata;
/* bitmask passed to pick() and used for selective cancelling. See
* grpc_lb_policy_cancel_picks() */
uint32_t initial_metadata_flags;
/* output argument where to store the pick()ed connected subchannel, or NULL
* upon error. */
grpc_connected_subchannel **target;
/* a closure wrapping the original on_complete one to be invoked once the
* pick() has completed (regardless of success) */
grpc_closure wrapped_on_complete;
/* args for wrapped_on_complete */
wrapped_rr_closure_arg wrapped_on_complete_arg;
} pending_pick;
static void add_pending_pick(pending_pick **root, grpc_polling_entity *pollent,
grpc_metadata_batch *initial_metadata,
uint32_t initial_metadata_flags,
grpc_connected_subchannel **target,
grpc_closure *on_complete) {
pending_pick *pp = gpr_malloc(sizeof(*pp));
memset(pp, 0, sizeof(pending_pick));
memset(&pp->wrapped_on_complete_arg, 0, sizeof(wrapped_rr_closure_arg));
pp->next = *root;
pp->pollent = pollent;
pp->target = target;
pp->initial_metadata = initial_metadata;
pp->initial_metadata_flags = initial_metadata_flags;
pp->wrapped_on_complete_arg.wrapped_closure = on_complete;
grpc_closure_init(&pp->wrapped_on_complete, wrapped_rr_closure,
&pp->wrapped_on_complete_arg);
*root = pp;
}
/* Same as the \a pending_pick struct but for ping operations */
typedef struct pending_ping {
struct pending_ping *next;
/* a closure wrapping the original on_complete one to be invoked once the
* ping() has completed (regardless of success) */
grpc_closure wrapped_notify;
/* args for wrapped_notify */
wrapped_rr_closure_arg wrapped_notify_arg;
} pending_ping;
static void add_pending_ping(pending_ping **root, grpc_closure *notify) {
pending_ping *pping = gpr_malloc(sizeof(*pping));
memset(pping, 0, sizeof(pending_ping));
memset(&pping->wrapped_notify_arg, 0, sizeof(wrapped_rr_closure_arg));
pping->next = *root;
grpc_closure_init(&pping->wrapped_notify, wrapped_rr_closure,
&pping->wrapped_notify_arg);
pping->wrapped_notify_arg.wrapped_closure = notify;
*root = pping;
}
/*
* glb_lb_policy
*/
typedef struct rr_connectivity_data rr_connectivity_data;
struct lb_client_data;
static const grpc_lb_policy_vtable glb_lb_policy_vtable;
typedef struct glb_lb_policy {
/** base policy: must be first */
grpc_lb_policy base;
/** mutex protecting remaining members */
gpr_mu mu;
grpc_client_channel_factory *cc_factory;
/** for communicating with the LB server */
grpc_channel *lb_channel;
/** the RR policy to use of the backend servers returned by the LB server */
grpc_lb_policy *rr_policy;
bool started_picking;
/** our connectivity state tracker */
grpc_connectivity_state_tracker state_tracker;
/** stores the deserialized response from the LB. May be NULL until one such
* response has arrived. */
grpc_grpclb_serverlist *serverlist;
/** list of picks that are waiting on RR's policy connectivity */
pending_pick *pending_picks;
/** list of pings that are waiting on RR's policy connectivity */
pending_ping *pending_pings;
/** client data associated with the LB server communication */
struct lb_client_data *lb_client;
/** for tracking of the RR connectivity */
rr_connectivity_data *rr_connectivity;
/* a wrapped (see \a wrapped_rr_closure) on-complete closure for readily
* available RR picks */
grpc_closure wrapped_on_complete;
/* arguments for the wrapped_on_complete closure */
wrapped_rr_closure_arg wc_arg;
} glb_lb_policy;
/* Keeps track and reacts to changes in connectivity of the RR instance */
struct rr_connectivity_data {
grpc_closure on_change;
grpc_connectivity_state state;
glb_lb_policy *glb_policy;
};
static grpc_lb_policy *create_rr(grpc_exec_ctx *exec_ctx,
const grpc_grpclb_serverlist *serverlist,
glb_lb_policy *glb_policy) {
/* TODO(dgq): support mixed ip version */
GPR_ASSERT(serverlist != NULL && serverlist->num_servers > 0);
char **host_ports = gpr_malloc(sizeof(char *) * serverlist->num_servers);
for (size_t i = 0; i < serverlist->num_servers; ++i) {
gpr_join_host_port(&host_ports[i], serverlist->servers[i]->ip_address,
serverlist->servers[i]->port);
}
size_t uri_path_len;
char *concat_ipports = gpr_strjoin_sep(
(const char **)host_ports, serverlist->num_servers, ",", &uri_path_len);
grpc_lb_policy_args args;
args.client_channel_factory = glb_policy->cc_factory;
args.addresses = gpr_malloc(sizeof(grpc_resolved_addresses));
args.addresses->naddrs = serverlist->num_servers;
args.addresses->addrs =
gpr_malloc(sizeof(grpc_resolved_address) * args.addresses->naddrs);
size_t out_addrs_idx = 0;
for (size_t i = 0; i < serverlist->num_servers; ++i) {
grpc_uri uri;
struct sockaddr_storage sa;
size_t sa_len;
uri.path = host_ports[i];
if (parse_ipv4(&uri, &sa, &sa_len)) { /* TODO(dgq): add support for ipv6 */
memcpy(args.addresses->addrs[out_addrs_idx].addr, &sa, sa_len);
args.addresses->addrs[out_addrs_idx].len = sa_len;
++out_addrs_idx;
} else {
gpr_log(GPR_ERROR, "Invalid LB service address '%s', ignoring.",
host_ports[i]);
}
}
grpc_lb_policy *rr = grpc_lb_policy_create(exec_ctx, "round_robin", &args);
gpr_free(concat_ipports);
for (size_t i = 0; i < serverlist->num_servers; i++) {
gpr_free(host_ports[i]);
}
gpr_free(host_ports);
gpr_free(args.addresses->addrs);
gpr_free(args.addresses);
return rr;
}
static void rr_handover(grpc_exec_ctx *exec_ctx, glb_lb_policy *glb_policy,
grpc_error *error) {
GRPC_ERROR_REF(error);
glb_policy->rr_policy =
create_rr(exec_ctx, glb_policy->serverlist, glb_policy);
if (grpc_lb_glb_trace) {
gpr_log(GPR_INFO, "Created RR policy (0x%" PRIxPTR ")",
(intptr_t)glb_policy->rr_policy);
}
GPR_ASSERT(glb_policy->rr_policy != NULL);
glb_policy->rr_connectivity->state = grpc_lb_policy_check_connectivity(
exec_ctx, glb_policy->rr_policy, &error);
grpc_lb_policy_notify_on_state_change(
exec_ctx, glb_policy->rr_policy, &glb_policy->rr_connectivity->state,
&glb_policy->rr_connectivity->on_change);
grpc_connectivity_state_set(exec_ctx, &glb_policy->state_tracker,
glb_policy->rr_connectivity->state, error,
"rr_handover");
grpc_lb_policy_exit_idle(exec_ctx, glb_policy->rr_policy);
/* flush pending ops */
pending_pick *pp;
while ((pp = glb_policy->pending_picks)) {
glb_policy->pending_picks = pp->next;
GRPC_LB_POLICY_REF(glb_policy->rr_policy, "rr_handover_pending_pick");
pp->wrapped_on_complete_arg.rr_policy = glb_policy->rr_policy;
if (grpc_lb_glb_trace) {
gpr_log(GPR_INFO, "Pending pick about to PICK from 0x%" PRIxPTR "",
(intptr_t)glb_policy->rr_policy);
}
grpc_lb_policy_pick(exec_ctx, glb_policy->rr_policy, pp->pollent,
pp->initial_metadata, pp->initial_metadata_flags,
pp->target, &pp->wrapped_on_complete);
pp->wrapped_on_complete_arg.owning_pending_node = pp;
}
pending_ping *pping;
while ((pping = glb_policy->pending_pings)) {
glb_policy->pending_pings = pping->next;
GRPC_LB_POLICY_REF(glb_policy->rr_policy, "rr_handover_pending_ping");
pping->wrapped_notify_arg.rr_policy = glb_policy->rr_policy;
if (grpc_lb_glb_trace) {
gpr_log(GPR_INFO, "Pending ping about to PING from 0x%" PRIxPTR "",
(intptr_t)glb_policy->rr_policy);
}
grpc_lb_policy_ping_one(exec_ctx, glb_policy->rr_policy,
&pping->wrapped_notify);
pping->wrapped_notify_arg.owning_pending_node = pping;
}
GRPC_ERROR_UNREF(error);
}
static void rr_connectivity_changed(grpc_exec_ctx *exec_ctx, void *arg,
grpc_error *error) {
rr_connectivity_data *rr_conn_data = arg;
glb_lb_policy *glb_policy = rr_conn_data->glb_policy;
if (rr_conn_data->state == GRPC_CHANNEL_SHUTDOWN) {
if (glb_policy->serverlist != NULL) {
/* a RR policy is shutting down but there's a serverlist available ->
* perform a handover */
rr_handover(exec_ctx, glb_policy, error);
} else {
/* shutting down and no new serverlist available. Bail out. */
gpr_free(rr_conn_data);
}
} else {
if (error == GRPC_ERROR_NONE) {
/* RR not shutting down. Mimic the RR's policy state */
grpc_connectivity_state_set(exec_ctx, &glb_policy->state_tracker,
rr_conn_data->state, error,
"rr_connectivity_changed");
/* resubscribe */
grpc_lb_policy_notify_on_state_change(exec_ctx, glb_policy->rr_policy,
&rr_conn_data->state,
&rr_conn_data->on_change);
} else { /* error */
gpr_free(rr_conn_data);
}
}
GRPC_ERROR_UNREF(error);
}
static grpc_lb_policy *glb_create(grpc_exec_ctx *exec_ctx,
grpc_lb_policy_factory *factory,
grpc_lb_policy_args *args) {
glb_lb_policy *glb_policy = gpr_malloc(sizeof(*glb_policy));
memset(glb_policy, 0, sizeof(*glb_policy));
/* All input addresses in args->addresses come from a resolver that claims
* they are LB services. It's the resolver's responsibility to make sure this
* policy is only instantiated and used in that case.
*
* Create a client channel over them to communicate with a LB service */
glb_policy->cc_factory = args->client_channel_factory;
GPR_ASSERT(glb_policy->cc_factory != NULL);
if (args->addresses->naddrs == 0) {
return NULL;
}
/* construct a target from the args->addresses, in the form
* ipvX://ip1:port1,ip2:port2,...
* TODO(dgq): support mixed ip version */
char **addr_strs = gpr_malloc(sizeof(char *) * args->addresses->naddrs);
addr_strs[0] =
grpc_sockaddr_to_uri((const struct sockaddr *)&args->addresses->addrs[0]);
for (size_t i = 1; i < args->addresses->naddrs; i++) {
GPR_ASSERT(grpc_sockaddr_to_string(
&addr_strs[i],
(const struct sockaddr *)&args->addresses->addrs[i],
true) == 0);
}
size_t uri_path_len;
char *target_uri_str = gpr_strjoin_sep(
(const char **)addr_strs, args->addresses->naddrs, ",", &uri_path_len);
/* will pick using pick_first */
glb_policy->lb_channel = grpc_client_channel_factory_create_channel(
exec_ctx, glb_policy->cc_factory, target_uri_str,
GRPC_CLIENT_CHANNEL_TYPE_LOAD_BALANCING, NULL);
gpr_free(target_uri_str);
for (size_t i = 0; i < args->addresses->naddrs; i++) {
gpr_free(addr_strs[i]);
}
gpr_free(addr_strs);
if (glb_policy->lb_channel == NULL) {
gpr_free(glb_policy);
return NULL;
}
rr_connectivity_data *rr_connectivity =
gpr_malloc(sizeof(rr_connectivity_data));
memset(rr_connectivity, 0, sizeof(rr_connectivity_data));
grpc_closure_init(&rr_connectivity->on_change, rr_connectivity_changed,
rr_connectivity);
rr_connectivity->glb_policy = glb_policy;
glb_policy->rr_connectivity = rr_connectivity;
grpc_lb_policy_init(&glb_policy->base, &glb_lb_policy_vtable);
gpr_mu_init(&glb_policy->mu);
grpc_connectivity_state_init(&glb_policy->state_tracker, GRPC_CHANNEL_IDLE,
"grpclb");
return &glb_policy->base;
}
static void glb_destroy(grpc_exec_ctx *exec_ctx, grpc_lb_policy *pol) {
glb_lb_policy *glb_policy = (glb_lb_policy *)pol;
GPR_ASSERT(glb_policy->pending_picks == NULL);
GPR_ASSERT(glb_policy->pending_pings == NULL);
grpc_channel_destroy(glb_policy->lb_channel);
glb_policy->lb_channel = NULL;
grpc_connectivity_state_destroy(exec_ctx, &glb_policy->state_tracker);
if (glb_policy->serverlist != NULL) {
grpc_grpclb_destroy_serverlist(glb_policy->serverlist);
}
gpr_mu_destroy(&glb_policy->mu);
gpr_free(glb_policy);
}
static void lb_client_data_destroy(struct lb_client_data *lb_client);
static void glb_shutdown(grpc_exec_ctx *exec_ctx, grpc_lb_policy *pol) {
glb_lb_policy *glb_policy = (glb_lb_policy *)pol;
gpr_mu_lock(&glb_policy->mu);
pending_pick *pp = glb_policy->pending_picks;
glb_policy->pending_picks = NULL;
pending_ping *pping = glb_policy->pending_pings;
glb_policy->pending_pings = NULL;
gpr_mu_unlock(&glb_policy->mu);
while (pp != NULL) {
pending_pick *next = pp->next;
*pp->target = NULL;
grpc_exec_ctx_sched(exec_ctx, &pp->wrapped_on_complete, GRPC_ERROR_NONE,
NULL);
gpr_free(pp);
pp = next;
}
while (pping != NULL) {
pending_ping *next = pping->next;
grpc_exec_ctx_sched(exec_ctx, &pping->wrapped_notify, GRPC_ERROR_NONE,
NULL);
pping = next;
}
if (glb_policy->rr_policy) {
/* unsubscribe */
grpc_lb_policy_notify_on_state_change(
exec_ctx, glb_policy->rr_policy, NULL,
&glb_policy->rr_connectivity->on_change);
GRPC_LB_POLICY_UNREF(exec_ctx, glb_policy->rr_policy, "glb_shutdown");
}
lb_client_data_destroy(glb_policy->lb_client);
glb_policy->lb_client = NULL;
grpc_connectivity_state_set(
exec_ctx, &glb_policy->state_tracker, GRPC_CHANNEL_SHUTDOWN,
GRPC_ERROR_CREATE("Channel Shutdown"), "glb_shutdown");
}
static void glb_cancel_pick(grpc_exec_ctx *exec_ctx, grpc_lb_policy *pol,
grpc_connected_subchannel **target) {
glb_lb_policy *glb_policy = (glb_lb_policy *)pol;
gpr_mu_lock(&glb_policy->mu);
pending_pick *pp = glb_policy->pending_picks;
glb_policy->pending_picks = NULL;
while (pp != NULL) {
pending_pick *next = pp->next;
if (pp->target == target) {
grpc_polling_entity_del_from_pollset_set(
exec_ctx, pp->pollent, glb_policy->base.interested_parties);
*target = NULL;
grpc_exec_ctx_sched(exec_ctx, &pp->wrapped_on_complete,
GRPC_ERROR_CANCELLED, NULL);
gpr_free(pp);
} else {
pp->next = glb_policy->pending_picks;
glb_policy->pending_picks = pp;
}
pp = next;
}
gpr_mu_unlock(&glb_policy->mu);
}
static grpc_call *lb_client_data_get_call(struct lb_client_data *lb_client);
static void glb_cancel_picks(grpc_exec_ctx *exec_ctx, grpc_lb_policy *pol,
uint32_t initial_metadata_flags_mask,
uint32_t initial_metadata_flags_eq) {
glb_lb_policy *glb_policy = (glb_lb_policy *)pol;
gpr_mu_lock(&glb_policy->mu);
if (glb_policy->lb_client != NULL) {
/* cancel the call to the load balancer service, if any */
grpc_call_cancel(lb_client_data_get_call(glb_policy->lb_client), NULL);
}
pending_pick *pp = glb_policy->pending_picks;
glb_policy->pending_picks = NULL;
while (pp != NULL) {
pending_pick *next = pp->next;
if ((pp->initial_metadata_flags & initial_metadata_flags_mask) ==
initial_metadata_flags_eq) {
grpc_polling_entity_del_from_pollset_set(
exec_ctx, pp->pollent, glb_policy->base.interested_parties);
grpc_exec_ctx_sched(exec_ctx, &pp->wrapped_on_complete,
GRPC_ERROR_CANCELLED, NULL);
gpr_free(pp);
} else {
pp->next = glb_policy->pending_picks;
glb_policy->pending_picks = pp;
}
pp = next;
}
gpr_mu_unlock(&glb_policy->mu);
}
static void query_for_backends(grpc_exec_ctx *exec_ctx,
glb_lb_policy *glb_policy);
static void start_picking(grpc_exec_ctx *exec_ctx, glb_lb_policy *glb_policy) {
glb_policy->started_picking = true;
query_for_backends(exec_ctx, glb_policy);
}
static void glb_exit_idle(grpc_exec_ctx *exec_ctx, grpc_lb_policy *pol) {
glb_lb_policy *glb_policy = (glb_lb_policy *)pol;
gpr_mu_lock(&glb_policy->mu);
if (!glb_policy->started_picking) {
start_picking(exec_ctx, glb_policy);
}
gpr_mu_unlock(&glb_policy->mu);
}
static int glb_pick(grpc_exec_ctx *exec_ctx, grpc_lb_policy *pol,
grpc_polling_entity *pollent,
grpc_metadata_batch *initial_metadata,
uint32_t initial_metadata_flags,
grpc_connected_subchannel **target,
grpc_closure *on_complete) {
glb_lb_policy *glb_policy = (glb_lb_policy *)pol;
gpr_mu_lock(&glb_policy->mu);
int r;
if (glb_policy->rr_policy != NULL) {
if (grpc_lb_glb_trace) {
gpr_log(GPR_INFO, "about to PICK from 0x%" PRIxPTR "",
(intptr_t)glb_policy->rr_policy);
}
GRPC_LB_POLICY_REF(glb_policy->rr_policy, "glb_pick");
memset(&glb_policy->wc_arg, 0, sizeof(wrapped_rr_closure_arg));
glb_policy->wc_arg.rr_policy = glb_policy->rr_policy;
glb_policy->wc_arg.wrapped_closure = on_complete;
grpc_closure_init(&glb_policy->wrapped_on_complete, wrapped_rr_closure,
&glb_policy->wc_arg);
r = grpc_lb_policy_pick(exec_ctx, glb_policy->rr_policy, pollent,
initial_metadata, initial_metadata_flags, target,
&glb_policy->wrapped_on_complete);
if (r != 0) {
/* the call to grpc_lb_policy_pick has been sychronous. Unreffing the RR
* policy and notify the original callback */
glb_policy->wc_arg.wrapped_closure = NULL;
if (grpc_lb_glb_trace) {
gpr_log(GPR_INFO, "Unreffing RR (0x%" PRIxPTR ")",
(intptr_t)glb_policy->wc_arg.rr_policy);
}
GRPC_LB_POLICY_UNREF(exec_ctx, glb_policy->wc_arg.rr_policy, "glb_pick");
grpc_exec_ctx_sched(exec_ctx, glb_policy->wc_arg.wrapped_closure,
GRPC_ERROR_NONE, NULL);
}
} else {
grpc_polling_entity_add_to_pollset_set(exec_ctx, pollent,
glb_policy->base.interested_parties);
add_pending_pick(&glb_policy->pending_picks, pollent, initial_metadata,
initial_metadata_flags, target, on_complete);
if (!glb_policy->started_picking) {
start_picking(exec_ctx, glb_policy);
}
r = 0;
}
gpr_mu_unlock(&glb_policy->mu);
return r;
}
static grpc_connectivity_state glb_check_connectivity(
grpc_exec_ctx *exec_ctx, grpc_lb_policy *pol,
grpc_error **connectivity_error) {
glb_lb_policy *glb_policy = (glb_lb_policy *)pol;
grpc_connectivity_state st;
gpr_mu_lock(&glb_policy->mu);
st = grpc_connectivity_state_check(&glb_policy->state_tracker,
connectivity_error);
gpr_mu_unlock(&glb_policy->mu);
return st;
}
static void glb_ping_one(grpc_exec_ctx *exec_ctx, grpc_lb_policy *pol,
grpc_closure *closure) {
glb_lb_policy *glb_policy = (glb_lb_policy *)pol;
gpr_mu_lock(&glb_policy->mu);
if (glb_policy->rr_policy) {
grpc_lb_policy_ping_one(exec_ctx, glb_policy->rr_policy, closure);
} else {
add_pending_ping(&glb_policy->pending_pings, closure);
if (!glb_policy->started_picking) {
start_picking(exec_ctx, glb_policy);
}
}
gpr_mu_unlock(&glb_policy->mu);
}
static void glb_notify_on_state_change(grpc_exec_ctx *exec_ctx,
grpc_lb_policy *pol,
grpc_connectivity_state *current,
grpc_closure *notify) {
glb_lb_policy *glb_policy = (glb_lb_policy *)pol;
gpr_mu_lock(&glb_policy->mu);
grpc_connectivity_state_notify_on_state_change(
exec_ctx, &glb_policy->state_tracker, current, notify);
gpr_mu_unlock(&glb_policy->mu);
}
/*
* lb_client_data
*
* Used internally for the client call to the LB */
typedef struct lb_client_data {
gpr_mu mu;
/* called once initial metadata's been sent */
grpc_closure md_sent;
/* called once initial metadata's been received */
grpc_closure md_rcvd;
/* called once the LoadBalanceRequest has been sent to the LB server. See
* src/proto/grpc/.../load_balancer.proto */
grpc_closure req_sent;
/* A response from the LB server has been received (or error). Process it */
grpc_closure res_rcvd;
/* After the client has sent a close to the LB server */
grpc_closure close_sent;
/* ... and the status from the LB server has been received */
grpc_closure srv_status_rcvd;
grpc_call *lb_call; /* streaming call to the LB server, */
gpr_timespec deadline; /* for the streaming call to the LB server */
grpc_metadata_array initial_metadata_recv; /* initial MD from LB server */
grpc_metadata_array trailing_metadata_recv; /* trailing MD from LB server */
/* what's being sent to the LB server. Note that its value may vary if the LB
* server indicates a redirect. */
grpc_byte_buffer *request_payload;
/* response from the LB server, if any. Processed in res_recv_cb() */
grpc_byte_buffer *response_payload;
/* the call's status and status detailset in srv_status_rcvd_cb() */
grpc_status_code status;
char *status_details;
size_t status_details_capacity;
/* pointer back to the enclosing policy */
glb_lb_policy *glb_policy;
} lb_client_data;
static void md_sent_cb(grpc_exec_ctx *exec_ctx, void *arg, grpc_error *error);
static void md_recv_cb(grpc_exec_ctx *exec_ctx, void *arg, grpc_error *error);
static void req_sent_cb(grpc_exec_ctx *exec_ctx, void *arg, grpc_error *error);
static void res_recv_cb(grpc_exec_ctx *exec_ctx, void *arg, grpc_error *error);
static void close_sent_cb(grpc_exec_ctx *exec_ctx, void *arg,
grpc_error *error);
static void srv_status_rcvd_cb(grpc_exec_ctx *exec_ctx, void *arg,
grpc_error *error);
static lb_client_data *lb_client_data_create(glb_lb_policy *glb_policy) {
lb_client_data *lb_client = gpr_malloc(sizeof(lb_client_data));
memset(lb_client, 0, sizeof(lb_client_data));
gpr_mu_init(&lb_client->mu);
grpc_closure_init(&lb_client->md_sent, md_sent_cb, lb_client);
grpc_closure_init(&lb_client->md_rcvd, md_recv_cb, lb_client);
grpc_closure_init(&lb_client->req_sent, req_sent_cb, lb_client);
grpc_closure_init(&lb_client->res_rcvd, res_recv_cb, lb_client);
grpc_closure_init(&lb_client->close_sent, close_sent_cb, lb_client);
grpc_closure_init(&lb_client->srv_status_rcvd, srv_status_rcvd_cb, lb_client);
/* TODO(dgq): get the deadline from the client config instead of fabricating
* one here. */
lb_client->deadline = gpr_time_add(gpr_now(GPR_CLOCK_MONOTONIC),
gpr_time_from_seconds(3, GPR_TIMESPAN));
/* Note the following LB call progresses every time there's activity in \a
* glb_policy->base.interested_parties, which is comprised of the polling
* entities passed to glb_pick(). */
lb_client->lb_call = grpc_channel_create_pollset_set_call(
glb_policy->lb_channel, NULL, GRPC_PROPAGATE_DEFAULTS,
glb_policy->base.interested_parties, "/BalanceLoad",
NULL, /* FIXME(dgq): which "host" value to use? */
lb_client->deadline, NULL);
grpc_metadata_array_init(&lb_client->initial_metadata_recv);
grpc_metadata_array_init(&lb_client->trailing_metadata_recv);
grpc_grpclb_request *request = grpc_grpclb_request_create(
"load.balanced.service.name"); /* FIXME(dgq): get the name of the load
balanced service from the resolver */
gpr_slice request_payload_slice = grpc_grpclb_request_encode(request);
lb_client->request_payload =
grpc_raw_byte_buffer_create(&request_payload_slice, 1);
gpr_slice_unref(request_payload_slice);
grpc_grpclb_request_destroy(request);
lb_client->status_details = NULL;
lb_client->status_details_capacity = 0;
lb_client->glb_policy = glb_policy;
return lb_client;
}
static void lb_client_data_destroy(lb_client_data *lb_client) {
grpc_call_destroy(lb_client->lb_call);
grpc_metadata_array_destroy(&lb_client->initial_metadata_recv);
grpc_metadata_array_destroy(&lb_client->trailing_metadata_recv);
grpc_byte_buffer_destroy(lb_client->request_payload);
gpr_free(lb_client->status_details);
gpr_mu_destroy(&lb_client->mu);
gpr_free(lb_client);
}
static grpc_call *lb_client_data_get_call(lb_client_data *lb_client) {
return lb_client->lb_call;
}
/*
* Auxiliary functions and LB client callbacks.
*/
static void query_for_backends(grpc_exec_ctx *exec_ctx,
glb_lb_policy *glb_policy) {
GPR_ASSERT(glb_policy->lb_channel != NULL);
glb_policy->lb_client = lb_client_data_create(glb_policy);
grpc_call_error call_error;
grpc_op ops[1];
memset(ops, 0, sizeof(ops));
grpc_op *op = ops;
op->op = GRPC_OP_SEND_INITIAL_METADATA;
op->data.send_initial_metadata.count = 0;
op->flags = 0;
op->reserved = NULL;
op++;
call_error = grpc_call_start_batch_and_execute(
exec_ctx, glb_policy->lb_client->lb_call, ops, (size_t)(op - ops),
&glb_policy->lb_client->md_sent);
GPR_ASSERT(GRPC_CALL_OK == call_error);
op = ops;
op->op = GRPC_OP_RECV_STATUS_ON_CLIENT;
op->data.recv_status_on_client.trailing_metadata =
&glb_policy->lb_client->trailing_metadata_recv;
op->data.recv_status_on_client.status = &glb_policy->lb_client->status;
op->data.recv_status_on_client.status_details =
&glb_policy->lb_client->status_details;
op->data.recv_status_on_client.status_details_capacity =
&glb_policy->lb_client->status_details_capacity;
op->flags = 0;
op->reserved = NULL;
op++;
call_error = grpc_call_start_batch_and_execute(
exec_ctx, glb_policy->lb_client->lb_call, ops, (size_t)(op - ops),
&glb_policy->lb_client->srv_status_rcvd);
GPR_ASSERT(GRPC_CALL_OK == call_error);
}
static void md_sent_cb(grpc_exec_ctx *exec_ctx, void *arg, grpc_error *error) {
lb_client_data *lb_client = arg;
GPR_ASSERT(lb_client->lb_call);
grpc_op ops[1];
memset(ops, 0, sizeof(ops));
grpc_op *op = ops;
op->op = GRPC_OP_RECV_INITIAL_METADATA;
op->data.recv_initial_metadata = &lb_client->initial_metadata_recv;
op->flags = 0;
op->reserved = NULL;
op++;
grpc_call_error call_error = grpc_call_start_batch_and_execute(
exec_ctx, lb_client->lb_call, ops, (size_t)(op - ops),
&lb_client->md_rcvd);
GPR_ASSERT(GRPC_CALL_OK == call_error);
}
static void md_recv_cb(grpc_exec_ctx *exec_ctx, void *arg, grpc_error *error) {
lb_client_data *lb_client = arg;
GPR_ASSERT(lb_client->lb_call);
grpc_op ops[1];
memset(ops, 0, sizeof(ops));
grpc_op *op = ops;
op->op = GRPC_OP_SEND_MESSAGE;
op->data.send_message = lb_client->request_payload;
op->flags = 0;
op->reserved = NULL;
op++;
grpc_call_error call_error = grpc_call_start_batch_and_execute(
exec_ctx, lb_client->lb_call, ops, (size_t)(op - ops),
&lb_client->req_sent);
GPR_ASSERT(GRPC_CALL_OK == call_error);
}
static void req_sent_cb(grpc_exec_ctx *exec_ctx, void *arg, grpc_error *error) {
lb_client_data *lb_client = arg;
grpc_op ops[1];
memset(ops, 0, sizeof(ops));
grpc_op *op = ops;
op->op = GRPC_OP_RECV_MESSAGE;
op->data.recv_message = &lb_client->response_payload;
op->flags = 0;
op->reserved = NULL;
op++;
grpc_call_error call_error = grpc_call_start_batch_and_execute(
exec_ctx, lb_client->lb_call, ops, (size_t)(op - ops),
&lb_client->res_rcvd);
GPR_ASSERT(GRPC_CALL_OK == call_error);
}
static void res_recv_cb(grpc_exec_ctx *exec_ctx, void *arg, grpc_error *error) {
lb_client_data *lb_client = arg;
grpc_op ops[2];
memset(ops, 0, sizeof(ops));
grpc_op *op = ops;
if (lb_client->response_payload != NULL) {
/* Received data from the LB server. Look inside
* lb_client->response_payload, for
* a serverlist. */
grpc_byte_buffer_reader bbr;
grpc_byte_buffer_reader_init(&bbr, lb_client->response_payload);
gpr_slice response_slice = grpc_byte_buffer_reader_readall(&bbr);
grpc_byte_buffer_destroy(lb_client->response_payload);
grpc_grpclb_serverlist *serverlist =
grpc_grpclb_response_parse_serverlist(response_slice);
if (serverlist != NULL) {
gpr_slice_unref(response_slice);
if (grpc_lb_glb_trace) {
gpr_log(GPR_INFO, "Serverlist with %zu servers received",
serverlist->num_servers);
}
/* update serverlist */
if (serverlist->num_servers > 0) {
if (grpc_grpclb_serverlist_equals(lb_client->glb_policy->serverlist,
serverlist)) {
if (grpc_lb_glb_trace) {
gpr_log(GPR_INFO,
"Incoming server list identical to current, ignoring.");
}
} else { /* new serverlist */
if (lb_client->glb_policy->serverlist != NULL) {
/* dispose of the old serverlist */
grpc_grpclb_destroy_serverlist(lb_client->glb_policy->serverlist);
}
/* and update the copy in the glb_lb_policy instance */
lb_client->glb_policy->serverlist = serverlist;
}
if (lb_client->glb_policy->rr_policy == NULL) {
/* initial "handover", in this case from a null RR policy, meaning
* it'll just create the first RR policy instance */
rr_handover(exec_ctx, lb_client->glb_policy, error);
} else {
/* unref the RR policy, eventually leading to its substitution with a
* new one constructed from the received serverlist (see
* rr_connectivity_changed) */
GRPC_LB_POLICY_UNREF(exec_ctx, lb_client->glb_policy->rr_policy,
"serverlist_received");
}
} else {
if (grpc_lb_glb_trace) {
gpr_log(GPR_INFO,
"Received empty server list. Picks will stay pending until a "
"response with > 0 servers is received");
}
}
/* keep listening for serverlist updates */
op->op = GRPC_OP_RECV_MESSAGE;
op->data.recv_message = &lb_client->response_payload;
op->flags = 0;
op->reserved = NULL;
op++;
const grpc_call_error call_error = grpc_call_start_batch_and_execute(
exec_ctx, lb_client->lb_call, ops, (size_t)(op - ops),
&lb_client->res_rcvd); /* loop */
GPR_ASSERT(GRPC_CALL_OK == call_error);
return;
}
GPR_ASSERT(serverlist == NULL);
gpr_log(GPR_ERROR, "Invalid LB response received: '%s'",
gpr_dump_slice(response_slice, GPR_DUMP_ASCII));
gpr_slice_unref(response_slice);
/* Disconnect from server returning invalid response. */
op->op = GRPC_OP_SEND_CLOSE_FROM_CLIENT;
op->flags = 0;
op->reserved = NULL;
op++;
grpc_call_error call_error = grpc_call_start_batch_and_execute(
exec_ctx, lb_client->lb_call, ops, (size_t)(op - ops),
&lb_client->close_sent);
GPR_ASSERT(GRPC_CALL_OK == call_error);
}
/* empty payload: call cancelled by server. Cleanups happening in
* srv_status_rcvd_cb */
}
static void close_sent_cb(grpc_exec_ctx *exec_ctx, void *arg,
grpc_error *error) {
if (grpc_lb_glb_trace) {
gpr_log(GPR_INFO,
"Close from LB client sent. Waiting from server status now");
}
}
static void srv_status_rcvd_cb(grpc_exec_ctx *exec_ctx, void *arg,
grpc_error *error) {
lb_client_data *lb_client = arg;
if (grpc_lb_glb_trace) {
gpr_log(GPR_INFO,
"status from lb server received. Status = %d, Details = '%s', "
"Capaticy "
"= %zu",
lb_client->status, lb_client->status_details,
lb_client->status_details_capacity);
}
/* TODO(dgq): deal with stream termination properly (fire up another one? fail
* the original call?) */
}
/* Code wiring the policy with the rest of the core */
static const grpc_lb_policy_vtable glb_lb_policy_vtable = {
glb_destroy, glb_shutdown, glb_pick,
glb_cancel_pick, glb_cancel_picks, glb_ping_one,
glb_exit_idle, glb_check_connectivity, glb_notify_on_state_change};
static void glb_factory_ref(grpc_lb_policy_factory *factory) {}
static void glb_factory_unref(grpc_lb_policy_factory *factory) {}
static const grpc_lb_policy_factory_vtable glb_factory_vtable = {
glb_factory_ref, glb_factory_unref, glb_create, "grpclb"};
static grpc_lb_policy_factory glb_lb_policy_factory = {&glb_factory_vtable};
grpc_lb_policy_factory *grpc_glb_lb_factory_create() {
return &glb_lb_policy_factory;
}
/* Plugin registration */
void grpc_lb_policy_grpclb_init() {
grpc_register_lb_policy(grpc_glb_lb_factory_create());
grpc_register_tracer("glb", &grpc_lb_glb_trace);
}
void grpc_lb_policy_grpclb_shutdown() {}