RDMA/nes: Fix hang issues for large cluster dynamic connections
Running large cluster setup, we are hanging after many hours of
testing. Fixing this required going over the code and making sure the
rexmit entry was properly removed based on the cm_node's state and
packet received. Also when receiving a FIN packet, check seq# and
make sure there were no errors before calling handle_fin().
Following are the changes done in nes_cm.c:
* handle_ack_pkt() needs to return error value, so in case of error,
handle_fin() is not called. Some cleanup done while going over the code.
* handle_rst_pkt(), handling of cm_node's NES_CM_STATE_LAST_ACK is missing.
* process_packet(), in case of FIN only packet is received, call
check_seq() before processing.
* in handle_fin_pkt(), we are calling cleanup_retrans_entry() for all
conditions, even if the packets need to be dropped.
Signed-off-by: Faisal Latif <faisal.latif@intel.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c
index 4969c38..2c90b38 100644
--- a/drivers/infiniband/hw/nes/nes_cm.c
+++ b/drivers/infiniband/hw/nes/nes_cm.c
@@ -1326,18 +1326,20 @@
nes_debug(NES_DBG_CM, "Received FIN, cm_node = %p, state = %u. "
"refcnt=%d\n", cm_node, cm_node->state,
atomic_read(&cm_node->ref_count));
- cm_node->tcp_cntxt.rcv_nxt++;
- cleanup_retrans_entry(cm_node);
switch (cm_node->state) {
case NES_CM_STATE_SYN_RCVD:
case NES_CM_STATE_SYN_SENT:
case NES_CM_STATE_ESTABLISHED:
case NES_CM_STATE_MPAREQ_SENT:
case NES_CM_STATE_MPAREJ_RCVD:
+ cm_node->tcp_cntxt.rcv_nxt++;
+ cleanup_retrans_entry(cm_node);
cm_node->state = NES_CM_STATE_LAST_ACK;
send_fin(cm_node, NULL);
break;
case NES_CM_STATE_FIN_WAIT1:
+ cm_node->tcp_cntxt.rcv_nxt++;
+ cleanup_retrans_entry(cm_node);
cm_node->state = NES_CM_STATE_CLOSING;
send_ack(cm_node, NULL);
/* Wait for ACK as this is simultanous close..
@@ -1345,11 +1347,15 @@
* Just rm the node.. Done.. */
break;
case NES_CM_STATE_FIN_WAIT2:
+ cm_node->tcp_cntxt.rcv_nxt++;
+ cleanup_retrans_entry(cm_node);
cm_node->state = NES_CM_STATE_TIME_WAIT;
send_ack(cm_node, NULL);
schedule_nes_timer(cm_node, NULL, NES_TIMER_TYPE_CLOSE, 1, 0);
break;
case NES_CM_STATE_TIME_WAIT:
+ cm_node->tcp_cntxt.rcv_nxt++;
+ cleanup_retrans_entry(cm_node);
cm_node->state = NES_CM_STATE_CLOSED;
rem_ref_cm_node(cm_node->cm_core, cm_node);
break;
@@ -1385,7 +1391,6 @@
passive_state = atomic_add_return(1, &cm_node->passive_state);
if (passive_state == NES_SEND_RESET_EVENT)
create_event(cm_node, NES_CM_EVENT_RESET);
- cleanup_retrans_entry(cm_node);
cm_node->state = NES_CM_STATE_CLOSED;
dev_kfree_skb_any(skb);
break;
@@ -1399,17 +1404,16 @@
active_open_err(cm_node, skb, reset);
break;
case NES_CM_STATE_CLOSED:
- cleanup_retrans_entry(cm_node);
drop_packet(skb);
break;
+ case NES_CM_STATE_LAST_ACK:
+ cm_node->cm_id->rem_ref(cm_node->cm_id);
case NES_CM_STATE_TIME_WAIT:
- cleanup_retrans_entry(cm_node);
cm_node->state = NES_CM_STATE_CLOSED;
rem_ref_cm_node(cm_node->cm_core, cm_node);
drop_packet(skb);
break;
case NES_CM_STATE_FIN_WAIT1:
- cleanup_retrans_entry(cm_node);
nes_debug(NES_DBG_CM, "Bad state %s[%u]\n", __func__, __LINE__);
default:
drop_packet(skb);
@@ -1456,6 +1460,7 @@
NES_PASSIVE_STATE_INDICATED);
break;
case NES_CM_STATE_MPAREQ_SENT:
+ cleanup_retrans_entry(cm_node);
if (res_type == NES_MPA_REQUEST_REJECT) {
type = NES_CM_EVENT_MPA_REJECT;
cm_node->state = NES_CM_STATE_MPAREJ_RCVD;
@@ -1653,49 +1658,39 @@
}
}
-static void handle_ack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
+static int handle_ack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
struct tcphdr *tcph)
{
int datasize = 0;
u32 inc_sequence;
u32 rem_seq_ack;
u32 rem_seq;
- int ret;
+ int ret = 0;
int optionsize;
optionsize = (tcph->doff << 2) - sizeof(struct tcphdr);
if (check_seq(cm_node, tcph, skb))
- return;
+ return -EINVAL;
skb_pull(skb, tcph->doff << 2);
inc_sequence = ntohl(tcph->seq);
rem_seq = ntohl(tcph->seq);
rem_seq_ack = ntohl(tcph->ack_seq);
datasize = skb->len;
- cleanup_retrans_entry(cm_node);
switch (cm_node->state) {
case NES_CM_STATE_SYN_RCVD:
/* Passive OPEN */
+ cleanup_retrans_entry(cm_node);
ret = handle_tcp_options(cm_node, tcph, skb, optionsize, 1);
if (ret)
break;
cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->ack_seq);
- if (cm_node->tcp_cntxt.rem_ack_num !=
- cm_node->tcp_cntxt.loc_seq_num) {
- nes_debug(NES_DBG_CM, "rem_ack_num != loc_seq_num\n");
- cleanup_retrans_entry(cm_node);
- send_reset(cm_node, skb);
- return;
- }
cm_node->state = NES_CM_STATE_ESTABLISHED;
- cleanup_retrans_entry(cm_node);
if (datasize) {
cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize;
handle_rcv_mpa(cm_node, skb);
- } else { /* rcvd ACK only */
+ } else /* rcvd ACK only */
dev_kfree_skb_any(skb);
- cleanup_retrans_entry(cm_node);
- }
break;
case NES_CM_STATE_ESTABLISHED:
/* Passive OPEN */
@@ -1707,15 +1702,12 @@
drop_packet(skb);
break;
case NES_CM_STATE_MPAREQ_SENT:
- cleanup_retrans_entry(cm_node);
cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->ack_seq);
if (datasize) {
cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize;
handle_rcv_mpa(cm_node, skb);
- } else { /* Could be just an ack pkt.. */
- cleanup_retrans_entry(cm_node);
+ } else /* Could be just an ack pkt.. */
dev_kfree_skb_any(skb);
- }
break;
case NES_CM_STATE_LISTENING:
case NES_CM_STATE_CLOSED:
@@ -1723,11 +1715,10 @@
send_reset(cm_node, skb);
break;
case NES_CM_STATE_LAST_ACK:
+ case NES_CM_STATE_CLOSING:
cleanup_retrans_entry(cm_node);
cm_node->state = NES_CM_STATE_CLOSED;
cm_node->cm_id->rem_ref(cm_node->cm_id);
- case NES_CM_STATE_CLOSING:
- cleanup_retrans_entry(cm_node);
rem_ref_cm_node(cm_node->cm_core, cm_node);
drop_packet(skb);
break;
@@ -1742,9 +1733,11 @@
case NES_CM_STATE_MPAREQ_RCVD:
case NES_CM_STATE_UNKNOWN:
default:
+ cleanup_retrans_entry(cm_node);
drop_packet(skb);
break;
}
+ return ret;
}
@@ -1850,6 +1843,7 @@
enum nes_tcpip_pkt_type pkt_type = NES_PKT_TYPE_UNKNOWN;
struct tcphdr *tcph = tcp_hdr(skb);
u32 fin_set = 0;
+ int ret = 0;
skb_pull(skb, ip_hdr(skb)->ihl << 2);
nes_debug(NES_DBG_CM, "process_packet: cm_node=%p state =%d syn=%d "
@@ -1875,17 +1869,17 @@
handle_synack_pkt(cm_node, skb, tcph);
break;
case NES_PKT_TYPE_ACK:
- handle_ack_pkt(cm_node, skb, tcph);
- if (fin_set)
+ ret = handle_ack_pkt(cm_node, skb, tcph);
+ if (fin_set && !ret)
handle_fin_pkt(cm_node);
break;
case NES_PKT_TYPE_RST:
handle_rst_pkt(cm_node, skb, tcph);
break;
default:
- drop_packet(skb);
- if (fin_set)
+ if ((fin_set) && (!check_seq(cm_node, tcph, skb)))
handle_fin_pkt(cm_node);
+ drop_packet(skb);
break;
}
}