[AF_IUCV]: Implementation of a skb backlog queue

With the inital implementation we missed to implement a skb backlog
queue . The result is that socket receive processing tossed packets.
Since AF_IUCV connections are working synchronously it leads to
connection hangs. Problems with read, close and select also occured.

Using a skb backlog queue is fixing all of these problems .

Signed-off-by: Jennifer Hunt <jenhunt@us.ibm.com>
Signed-off-by: Frank Pavlic <fpavlic@de.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/include/net/iucv/af_iucv.h b/include/net/iucv/af_iucv.h
index 04d1abb..f9bd11b 100644
--- a/include/net/iucv/af_iucv.h
+++ b/include/net/iucv/af_iucv.h
@@ -28,6 +28,7 @@
 	IUCV_LISTEN,
 	IUCV_SEVERED,
 	IUCV_DISCONN,
+	IUCV_CLOSING,
 	IUCV_CLOSED
 };
 
@@ -62,6 +63,7 @@
 	struct sock		*parent;
 	struct iucv_path	*path;
 	struct sk_buff_head	send_skb_q;
+	struct sk_buff_head	backlog_skb_q;
 	unsigned int		send_tag;
 };
 
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index e84c924..026704a 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -147,6 +147,7 @@
 	unsigned char user_data[16];
 	struct iucv_sock *iucv = iucv_sk(sk);
 	int err;
+	unsigned long timeo;
 
 	iucv_sock_clear_timer(sk);
 	lock_sock(sk);
@@ -159,6 +160,21 @@
 	case IUCV_CONNECTED:
 	case IUCV_DISCONN:
 		err = 0;
+
+		sk->sk_state = IUCV_CLOSING;
+		sk->sk_state_change(sk);
+
+		if(!skb_queue_empty(&iucv->send_skb_q)) {
+			if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime)
+				timeo = sk->sk_lingertime;
+			else
+				timeo = IUCV_DISCONN_TIMEOUT;
+			err = iucv_sock_wait_state(sk, IUCV_CLOSED, 0, timeo);
+		}
+
+		sk->sk_state = IUCV_CLOSED;
+		sk->sk_state_change(sk);
+
 		if (iucv->path) {
 			low_nmcpy(user_data, iucv->src_name);
 			high_nmcpy(user_data, iucv->dst_name);
@@ -168,12 +184,11 @@
 			iucv->path = NULL;
 		}
 
-		sk->sk_state = IUCV_CLOSED;
-		sk->sk_state_change(sk);
 		sk->sk_err = ECONNRESET;
 		sk->sk_state_change(sk);
 
 		skb_queue_purge(&iucv->send_skb_q);
+		skb_queue_purge(&iucv->backlog_skb_q);
 
 		sock_set_flag(sk, SOCK_ZAPPED);
 		break;
@@ -204,6 +219,7 @@
 	sock_init_data(sock, sk);
 	INIT_LIST_HEAD(&iucv_sk(sk)->accept_q);
 	skb_queue_head_init(&iucv_sk(sk)->send_skb_q);
+	skb_queue_head_init(&iucv_sk(sk)->backlog_skb_q);
 	iucv_sk(sk)->send_tag = 0;
 
 	sk->sk_destruct = iucv_sock_destruct;
@@ -510,7 +526,7 @@
 	long timeo;
 	int err = 0;
 
-	lock_sock(sk);
+	lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
 
 	if (sk->sk_state != IUCV_LISTEN) {
 		err = -EBADFD;
@@ -530,7 +546,7 @@
 
 		release_sock(sk);
 		timeo = schedule_timeout(timeo);
-		lock_sock(sk);
+		lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
 
 		if (sk->sk_state != IUCV_LISTEN) {
 			err = -EBADFD;
@@ -606,7 +622,7 @@
 		if(!(skb = sock_alloc_send_skb(sk, len,
 				       msg->msg_flags & MSG_DONTWAIT,
 				       &err)))
-			return err;
+			goto out;
 
 		if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)){
 			err = -EFAULT;
@@ -647,10 +663,16 @@
 {
 	int noblock = flags & MSG_DONTWAIT;
 	struct sock *sk = sock->sk;
+	struct iucv_sock *iucv = iucv_sk(sk);
 	int target, copied = 0;
-	struct sk_buff *skb;
+	struct sk_buff *skb, *rskb, *cskb;
 	int err = 0;
 
+	if ((sk->sk_state == IUCV_DISCONN || sk->sk_state == IUCV_SEVERED) &&
+		skb_queue_empty(&iucv->backlog_skb_q) &&
+		skb_queue_empty(&sk->sk_receive_queue))
+		return 0;
+
 	if (flags & (MSG_OOB))
 		return -EOPNOTSUPP;
 
@@ -665,10 +687,12 @@
 
 	copied = min_t(unsigned int, skb->len, len);
 
-	if (memcpy_toiovec(msg->msg_iov, skb->data, copied)) {
+	cskb = skb;
+	if (memcpy_toiovec(msg->msg_iov, cskb->data, copied)) {
 		skb_queue_head(&sk->sk_receive_queue, skb);
 		if (copied == 0)
 			return -EFAULT;
+		goto done;
 	}
 
 	len -= copied;
@@ -683,6 +707,18 @@
 		}
 
 		kfree_skb(skb);
+
+		/* Queue backlog skbs */
+		rskb = skb_dequeue(&iucv_sk(sk)->backlog_skb_q);
+		while(rskb) {
+			if (sock_queue_rcv_skb(sk, rskb)) {
+				skb_queue_head(&iucv_sk(sk)->backlog_skb_q,
+						rskb);
+				break;
+			} else {
+				rskb = skb_dequeue(&iucv_sk(sk)->backlog_skb_q);
+			}
+		}
 	} else
 		skb_queue_head(&sk->sk_receive_queue, skb);
 
@@ -732,6 +768,9 @@
 	if (sk->sk_state == IUCV_CLOSED)
 		mask |= POLLHUP;
 
+	if (sk->sk_state == IUCV_DISCONN || sk->sk_state == IUCV_SEVERED)
+		mask |= POLLIN;
+
 	if (sock_writeable(sk))
 		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
 	else
@@ -817,13 +856,6 @@
 		iucv_sk(sk)->path = NULL;
 	}
 
-	if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime){
-		lock_sock(sk);
-		err = iucv_sock_wait_state(sk, IUCV_CLOSED, 0,
-					   sk->sk_lingertime);
-		release_sock(sk);
-	}
-
 	sock_orphan(sk);
 	iucv_sock_kill(sk);
 	return err;
@@ -927,18 +959,52 @@
 	sk->sk_state_change(sk);
 }
 
+static int iucv_fragment_skb(struct sock *sk, struct sk_buff *skb, int len,
+			     struct sk_buff_head fragmented_skb_q)
+{
+	int dataleft, size, copied = 0;
+	struct sk_buff *nskb;
+
+	dataleft = len;
+	while(dataleft) {
+		if (dataleft >= sk->sk_rcvbuf / 4)
+			size = sk->sk_rcvbuf / 4;
+		else
+			size = dataleft;
+
+		nskb = alloc_skb(size, GFP_ATOMIC | GFP_DMA);
+		if (!nskb)
+			return -ENOMEM;
+
+		memcpy(nskb->data, skb->data + copied, size);
+		copied += size;
+		dataleft -= size;
+
+		nskb->h.raw = nskb->data;
+		nskb->nh.raw = nskb->data;
+		nskb->len = size;
+
+		skb_queue_tail(fragmented_skb_q, nskb);
+	}
+
+	return 0;
+}
 static void iucv_callback_rx(struct iucv_path *path, struct iucv_message *msg)
 {
 	struct sock *sk = path->private;
-	struct sk_buff *skb;
+	struct iucv_sock *iucv = iucv_sk(sk);
+	struct sk_buff *skb, *fskb;
+	struct sk_buff_head fragmented_skb_q;
 	int rc;
 
+	skb_queue_head_init(&fragmented_skb_q);
+
 	if (sk->sk_shutdown & RCV_SHUTDOWN)
 		return;
 
 	skb = alloc_skb(msg->length, GFP_ATOMIC | GFP_DMA);
 	if (!skb) {
-		iucv_message_reject(path, msg);
+		iucv_path_sever(path, NULL);
 		return;
 	}
 
@@ -952,14 +1018,39 @@
 			kfree_skb(skb);
 			return;
 		}
-
-		skb_reset_transport_header(skb);
-		skb_reset_network_header(skb);
-		skb->len = msg->length;
+		if (skb->truesize >= sk->sk_rcvbuf / 4) {
+			rc = iucv_fragment_skb(sk, skb, msg->length,
+					       &fragmented_skb_q);
+			kfree_skb(skb);
+			skb = NULL;
+			if (rc) {
+				iucv_path_sever(path, NULL);
+				return;
+			}
+		} else {
+			skb_reset_transport_header(skb);
+			skb_reset_network_header(skb);
+			skb->len = msg->length;
+		}
+	}
+	/* Queue the fragmented skb */
+	fskb = skb_dequeue(&fragmented_skb_q);
+	while(fskb) {
+		if (!skb_queue_empty(&iucv->backlog_skb_q))
+			skb_queue_tail(&iucv->backlog_skb_q, fskb);
+		else if (sock_queue_rcv_skb(sk, fskb))
+			skb_queue_tail(&iucv_sk(sk)->backlog_skb_q, fskb);
+		fskb = skb_dequeue(&fragmented_skb_q);
 	}
 
-	if (sock_queue_rcv_skb(sk, skb))
-		kfree_skb(skb);
+	/* Queue the original skb if it exists (was not fragmented) */
+	if (skb) {
+		if (!skb_queue_empty(&iucv->backlog_skb_q))
+			skb_queue_tail(&iucv_sk(sk)->backlog_skb_q, skb);
+		else if (sock_queue_rcv_skb(sk, skb))
+			skb_queue_tail(&iucv_sk(sk)->backlog_skb_q, skb);
+	}
+
 }
 
 static void iucv_callback_txdone(struct iucv_path *path,
@@ -971,17 +1062,27 @@
 	struct sk_buff *list_skb = list->next;
 	unsigned long flags;
 
-	spin_lock_irqsave(&list->lock, flags);
+	if (list_skb) {
+		spin_lock_irqsave(&list->lock, flags);
 
-	do {
-		this = list_skb;
-		list_skb = list_skb->next;
-	} while (memcmp(&msg->tag, this->cb, 4));
+		do {
+			this = list_skb;
+			list_skb = list_skb->next;
+		} while (memcmp(&msg->tag, this->cb, 4) && list_skb);
 
-	spin_unlock_irqrestore(&list->lock, flags);
+		spin_unlock_irqrestore(&list->lock, flags);
 
-	skb_unlink(this, &iucv_sk(sk)->send_skb_q);
-	kfree_skb(this);
+		skb_unlink(this, &iucv_sk(sk)->send_skb_q);
+		kfree_skb(this);
+	}
+
+	if (sk->sk_state == IUCV_CLOSING){
+		if (skb_queue_empty(&iucv_sk(sk)->send_skb_q)) {
+			sk->sk_state = IUCV_CLOSED;
+			sk->sk_state_change(sk);
+		}
+	}
+
 }
 
 static void iucv_callback_connrej(struct iucv_path *path, u8 ipuser[16])