nvme-fabrics: Allow ctrl loss timeout configuration
When a host sense that its controller session is damaged,
it tries to re-establish it periodically (reconnect every
reconnect_delay). It may very well be that the controller
is gone and never coming back, in this case the host will
try to reconnect forever.
Add a ctrl_loss_tmo to bound the number of reconnect attempts
to a specific controller (default to a reasonable 10 minutes).
The timeout configuration is actually translated into number of
reconnect attempts and not a schedule on its own but rather
divided with reconnect_delay. This is useful to prevent
racing flows of remove and reconnect, and it doesn't really
matter if we remove slightly sooner than what the user requested.
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Jens Axboe <axboe@fb.com>
diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h
index 1560181..f5a9c1f 100644
--- a/drivers/nvme/host/fabrics.h
+++ b/drivers/nvme/host/fabrics.h
@@ -21,6 +21,8 @@
#define NVMF_MAX_QUEUE_SIZE 1024
#define NVMF_DEF_QUEUE_SIZE 128
#define NVMF_DEF_RECONNECT_DELAY 10
+/* default to 600 seconds of reconnect attempts before giving up */
+#define NVMF_DEF_CTRL_LOSS_TMO 600
/*
* Define a host as seen by the target. We allocate one at boot, but also
@@ -53,6 +55,7 @@ enum {
NVMF_OPT_HOSTNQN = 1 << 8,
NVMF_OPT_RECONNECT_DELAY = 1 << 9,
NVMF_OPT_HOST_TRADDR = 1 << 10,
+ NVMF_OPT_CTRL_LOSS_TMO = 1 << 11,
};
/**
@@ -77,6 +80,10 @@ enum {
* @discovery_nqn: indicates if the subsysnqn is the well-known discovery NQN.
* @kato: Keep-alive timeout.
* @host: Virtual NVMe host, contains the NQN and Host ID.
+ * @nr_reconnects: number of reconnect attempted since the last ctrl failure
+ * @max_reconnects: maximum number of allowed reconnect attempts before removing
+ * the controller, (-1) means reconnect forever, zero means remove
+ * immediately;
*/
struct nvmf_ctrl_options {
unsigned mask;
@@ -91,6 +98,8 @@ struct nvmf_ctrl_options {
bool discovery_nqn;
unsigned int kato;
struct nvmf_host *host;
+ int nr_reconnects;
+ int max_reconnects;
};
/*
@@ -133,5 +142,6 @@ void nvmf_unregister_transport(struct nvmf_transport_ops *ops);
void nvmf_free_options(struct nvmf_ctrl_options *opts);
const char *nvmf_get_subsysnqn(struct nvme_ctrl *ctrl);
int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size);
+bool nvmf_should_reconnect(struct nvme_ctrl *ctrl);
#endif /* _NVME_FABRICS_H */