ocfs2: introduce sc->sc_send_lock to protect outbound outbound messages

When there is a lot of multithreaded I/O usage, two threads can collide
while sending out a message to the other nodes. This is due to the lack of
locking between threads while sending out the messages.

When a connected TCP send(), sendto(), or sendmsg() arrives in the Linux
kernel, it eventually comes through tcp_sendmsg(). tcp_sendmsg() protects
itself by acquiring a lock at invocation by calling lock_sock().
tcp_sendmsg() then loops over the buffers in the iovec, allocating
associated sk_buff's and cache pages for use in the actual send. As it does
so, it pushes the data out to tcp for actual transmission. However, if one
of those allocation fails (because a large number of large sends is being
processed, for example), it must wait for memory to become available. It
does so by jumping to wait_for_sndbuf or wait_for_memory, both of which
eventually cause a call to sk_stream_wait_memory(). sk_stream_wait_memory()
contains a code path that calls sk_wait_event(). Finally, sk_wait_event()
contains the call to release_sock().

The following patch adds a lock to the socket container in order to
properly serialize outbound requests.

From: Zhen Wei <zwei@novell.com>
Acked-by: Jeff Mahoney <jeffm@suse.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 2021aec..1718215 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -556,6 +556,8 @@
 	sk->sk_data_ready = o2net_data_ready;
 	sk->sk_state_change = o2net_state_change;
 
+	mutex_init(&sc->sc_send_lock);
+
 	write_unlock_bh(&sk->sk_callback_lock);
 }
 
@@ -858,10 +860,12 @@
 	ssize_t ret;
 
 
+	mutex_lock(&sc->sc_send_lock);
 	ret = sc->sc_sock->ops->sendpage(sc->sc_sock,
 					 virt_to_page(kmalloced_virt),
 					 (long)kmalloced_virt & ~PAGE_MASK,
 					 size, MSG_DONTWAIT);
+	mutex_unlock(&sc->sc_send_lock);
 	if (ret != size) {
 		mlog(ML_ERROR, "sendpage of size %zu to " SC_NODEF_FMT 
 		     " failed with %zd\n", size, SC_NODEF_ARGS(sc), ret);
@@ -976,8 +980,10 @@
 
 	/* finally, convert the message header to network byte-order
 	 * and send */
+	mutex_lock(&sc->sc_send_lock);
 	ret = o2net_send_tcp_msg(sc->sc_sock, vec, veclen,
 				 sizeof(struct o2net_msg) + caller_bytes);
+	mutex_unlock(&sc->sc_send_lock);
 	msglog(msg, "sending returned %d\n", ret);
 	if (ret < 0) {
 		mlog(0, "error returned from o2net_send_tcp_msg=%d\n", ret);
@@ -1109,8 +1115,10 @@
 
 out_respond:
 	/* this destroys the hdr, so don't use it after this */
+	mutex_lock(&sc->sc_send_lock);
 	ret = o2net_send_status_magic(sc->sc_sock, hdr, syserr,
 				      handler_status);
+	mutex_unlock(&sc->sc_send_lock);
 	hdr = NULL;
 	mlog(0, "sending handler status %d, syserr %d returned %d\n",
 	     handler_status, syserr, ret);
diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h
index 177927a..4dae5df 100644
--- a/fs/ocfs2/cluster/tcp_internal.h
+++ b/fs/ocfs2/cluster/tcp_internal.h
@@ -155,6 +155,8 @@
 	struct timeval 		sc_tv_func_stop;
 	u32			sc_msg_key;
 	u16			sc_msg_type;
+
+	struct mutex		sc_send_lock;
 };
 
 struct o2net_msg_handler {