[SCTP]: Allow spillover of receive buffer to avoid deadlock.

This patch fixes a deadlock situation in the receive path by allowing
temporary spillover of the receive buffer.

- If the chunk we receive has a tsn that immediately follows the ctsn,
  accept it even if we run out of receive buffer space and renege data with
  higher TSNs.
- Once we accept one chunk in a packet, accept all the remaining chunks
  even if we run out of receive buffer space.

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
Acked-by: Mark Butler <butlerm@middle.net>
Acked-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: Sridhar Samudrala <sri@us.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index eba99f3..7f4fea1 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -712,6 +712,7 @@
 	__u8 tsn_gap_acked;	/* Is this chunk acked by a GAP ACK? */
 	__s8 fast_retransmit;	 /* Is this chunk fast retransmitted? */
 	__u8 tsn_missing_report; /* Data chunk missing counter. */
+	__u8 data_accepted; 	/* At least 1 chunk in this packet accepted */
 };
 
 void sctp_chunk_hold(struct sctp_chunk *);
diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c
index 297b895..cf0c767 100644
--- a/net/sctp/inqueue.c
+++ b/net/sctp/inqueue.c
@@ -149,6 +149,7 @@
 		/* This is the first chunk in the packet.  */
 		chunk->singleton = 1;
 		ch = (sctp_chunkhdr_t *) chunk->skb->data;
+		chunk->data_accepted = 0;
 	}
 
         chunk->chunk_hdr = ch;
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 2b9a832..f5d131f 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -5151,7 +5151,9 @@
 	int tmp;
 	__u32 tsn;
 	int account_value;
+	struct sctp_tsnmap *map = (struct sctp_tsnmap *)&asoc->peer.tsn_map;
 	struct sock *sk = asoc->base.sk;
+	int rcvbuf_over = 0;
 
 	data_hdr = chunk->subh.data_hdr = (sctp_datahdr_t *)chunk->skb->data;
 	skb_pull(chunk->skb, sizeof(sctp_datahdr_t));
@@ -5162,10 +5164,16 @@
 	/* ASSERT:  Now skb->data is really the user data.  */
 
 	/*
-	 * if we are established, and we have used up our receive
-	 * buffer memory, drop the frame
+	 * If we are established, and we have used up our receive buffer
+	 * memory, think about droping the frame.
+	 * Note that we have an opportunity to improve performance here.
+	 * If we accept one chunk from an skbuff, we have to keep all the
+	 * memory of that skbuff around until the chunk is read into user
+	 * space. Therefore, once we accept 1 chunk we may as well accept all
+	 * remaining chunks in the skbuff. The data_accepted flag helps us do
+	 * that.
 	 */
-	if (asoc->state == SCTP_STATE_ESTABLISHED) {
+	if ((asoc->state == SCTP_STATE_ESTABLISHED) && (!chunk->data_accepted)) {
 		/*
 		 * If the receive buffer policy is 1, then each
 		 * association can allocate up to sk_rcvbuf bytes
@@ -5176,9 +5184,25 @@
 			account_value = atomic_read(&asoc->rmem_alloc);
 		else
 			account_value = atomic_read(&sk->sk_rmem_alloc);
+		if (account_value > sk->sk_rcvbuf) {
+			/*
+			 * We need to make forward progress, even when we are
+			 * under memory pressure, so we always allow the
+			 * next tsn after the ctsn ack point to be accepted.
+			 * This lets us avoid deadlocks in which we have to
+			 * drop frames that would otherwise let us drain the
+			 * receive queue.
+			 */
+			if ((sctp_tsnmap_get_ctsn(map) + 1) != tsn)
+				return SCTP_IERROR_IGNORE_TSN;
 
-		if (account_value > sk->sk_rcvbuf)
-			return SCTP_IERROR_IGNORE_TSN;
+			/*
+			 * We're going to accept the frame but we should renege
+			 * to make space for it. This will send us down that
+			 * path later in this function.
+			 */
+			rcvbuf_over = 1;
+		}
 	}
 
 	/* Process ECN based congestion.
@@ -5226,6 +5250,7 @@
 	datalen -= sizeof(sctp_data_chunk_t);
 
 	deliver = SCTP_CMD_CHUNK_ULP;
+	chunk->data_accepted = 1;
 
 	/* Think about partial delivery. */
 	if ((datalen >= asoc->rwnd) && (!asoc->ulpq.pd_mode)) {
@@ -5242,7 +5267,8 @@
 	 * large spill over.
 	 */
 	if (!asoc->rwnd || asoc->rwnd_over ||
-	    (datalen > asoc->rwnd + asoc->frag_point)) {
+	    (datalen > asoc->rwnd + asoc->frag_point) ||
+	    rcvbuf_over) {
 
 		/* If this is the next TSN, consider reneging to make
 		 * room.   Note: Playing nice with a confused sender.  A
@@ -5250,8 +5276,8 @@
 		 * space and in the future we may want to detect and
 		 * do more drastic reneging.
 		 */
-		if (sctp_tsnmap_has_gap(&asoc->peer.tsn_map) &&
-		    (sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map) + 1) == tsn) {
+		if (sctp_tsnmap_has_gap(map) &&
+		    (sctp_tsnmap_get_ctsn(map) + 1) == tsn) {
 			SCTP_DEBUG_PRINTK("Reneging for tsn:%u\n", tsn);
 			deliver = SCTP_CMD_RENEGE;
 		} else {