misc: mic: SCIF poll

SCIF poll allows both user and kernel mode clients to wait on
events on a SCIF endpoint. These events include availability of
space or data in the SCIF ring buffer, availability of connection
requests on a listening endpoint and completion of connections
when using async connects.

Reviewed-by: Nikhil Rao <nikhil.rao@intel.com>
Reviewed-by: Sudeep Dutt <sudeep.dutt@intel.com>
Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
diff --git a/drivers/misc/mic/scif/scif_api.c b/drivers/misc/mic/scif/scif_api.c
index f39d313..bf2d70f 100644
--- a/drivers/misc/mic/scif/scif_api.c
+++ b/drivers/misc/mic/scif/scif_api.c
@@ -37,9 +37,21 @@
 	ASYNC_CONN_FLUSH_WORK	/* async work flush in progress  */
 };
 
+/*
+ * File operations for anonymous inode file associated with a SCIF endpoint,
+ * used in kernel mode SCIF poll. Kernel mode SCIF poll calls portions of the
+ * poll API in the kernel and these take in a struct file *. Since a struct
+ * file is not available to kernel mode SCIF, it uses an anonymous file for
+ * this purpose.
+ */
+const struct file_operations scif_anon_fops = {
+	.owner = THIS_MODULE,
+};
+
 scif_epd_t scif_open(void)
 {
 	struct scif_endpt *ep;
+	int err;
 
 	might_sleep();
 	ep = kzalloc(sizeof(*ep), GFP_KERNEL);
@@ -50,6 +62,10 @@
 	if (!ep->qp_info.qp)
 		goto err_qp_alloc;
 
+	err = scif_anon_inode_getfile(ep);
+	if (err)
+		goto err_anon_inode;
+
 	spin_lock_init(&ep->lock);
 	mutex_init(&ep->sendlock);
 	mutex_init(&ep->recvlock);
@@ -59,6 +75,8 @@
 		"SCIFAPI open: ep %p success\n", ep);
 	return ep;
 
+err_anon_inode:
+	kfree(ep->qp_info.qp);
 err_qp_alloc:
 	kfree(ep);
 err_ep_alloc:
@@ -279,6 +297,7 @@
 	}
 	}
 	scif_put_port(ep->port.port);
+	scif_anon_inode_fput(ep);
 	scif_teardown_ep(ep);
 	scif_add_epd_to_zombie_list(ep, !SCIF_EPLOCK_HELD);
 	return 0;
@@ -558,8 +577,10 @@
 			list_del(&ep->conn_list);
 		}
 		spin_unlock(&scif_info.nb_connect_lock);
-		if (ep)
+		if (ep) {
 			ep->conn_err = scif_conn_func(ep);
+			wake_up_interruptible(&ep->conn_pend_wq);
+		}
 	} while (ep);
 }
 
@@ -660,6 +681,7 @@
 	ep->remote_dev = &scif_dev[dst->node];
 	ep->qp_info.qp->magic = SCIFEP_MAGIC;
 	if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
+		init_waitqueue_head(&ep->conn_pend_wq);
 		spin_lock(&scif_info.nb_connect_lock);
 		list_add_tail(&ep->conn_list, &scif_info.nb_connect_list);
 		spin_unlock(&scif_info.nb_connect_lock);
@@ -788,6 +810,10 @@
 		goto scif_accept_error_qpalloc;
 	}
 
+	err = scif_anon_inode_getfile(cep);
+	if (err)
+		goto scif_accept_error_anon_inode;
+
 	cep->qp_info.qp->magic = SCIFEP_MAGIC;
 	spdev = scif_get_peer_dev(cep->remote_dev);
 	if (IS_ERR(spdev)) {
@@ -858,6 +884,8 @@
 	spin_unlock(&cep->lock);
 	return 0;
 scif_accept_error_map:
+	scif_anon_inode_fput(cep);
+scif_accept_error_anon_inode:
 	scif_teardown_ep(cep);
 scif_accept_error_qpalloc:
 	kfree(cep);
@@ -1247,6 +1275,134 @@
 }
 EXPORT_SYMBOL_GPL(scif_recv);
 
+static inline void _scif_poll_wait(struct file *f, wait_queue_head_t *wq,
+				   poll_table *p, struct scif_endpt *ep)
+{
+	/*
+	 * Because poll_wait makes a GFP_KERNEL allocation, give up the lock
+	 * and regrab it afterwards. Because the endpoint state might have
+	 * changed while the lock was given up, the state must be checked
+	 * again after re-acquiring the lock. The code in __scif_pollfd(..)
+	 * does this.
+	 */
+	spin_unlock(&ep->lock);
+	poll_wait(f, wq, p);
+	spin_lock(&ep->lock);
+}
+
+unsigned int
+__scif_pollfd(struct file *f, poll_table *wait, struct scif_endpt *ep)
+{
+	unsigned int mask = 0;
+
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI pollfd: ep %p %s\n", ep, scif_ep_states[ep->state]);
+
+	spin_lock(&ep->lock);
+
+	/* Endpoint is waiting for a non-blocking connect to complete */
+	if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
+		_scif_poll_wait(f, &ep->conn_pend_wq, wait, ep);
+		if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
+			if (ep->state == SCIFEP_CONNECTED ||
+			    ep->state == SCIFEP_DISCONNECTED ||
+			    ep->conn_err)
+				mask |= POLLOUT;
+			goto exit;
+		}
+	}
+
+	/* Endpoint is listening for incoming connection requests */
+	if (ep->state == SCIFEP_LISTENING) {
+		_scif_poll_wait(f, &ep->conwq, wait, ep);
+		if (ep->state == SCIFEP_LISTENING) {
+			if (ep->conreqcnt)
+				mask |= POLLIN;
+			goto exit;
+		}
+	}
+
+	/* Endpoint is connected or disconnected */
+	if (ep->state == SCIFEP_CONNECTED || ep->state == SCIFEP_DISCONNECTED) {
+		if (poll_requested_events(wait) & POLLIN)
+			_scif_poll_wait(f, &ep->recvwq, wait, ep);
+		if (poll_requested_events(wait) & POLLOUT)
+			_scif_poll_wait(f, &ep->sendwq, wait, ep);
+		if (ep->state == SCIFEP_CONNECTED ||
+		    ep->state == SCIFEP_DISCONNECTED) {
+			/* Data can be read without blocking */
+			if (scif_rb_count(&ep->qp_info.qp->inbound_q, 1))
+				mask |= POLLIN;
+			/* Data can be written without blocking */
+			if (scif_rb_space(&ep->qp_info.qp->outbound_q))
+				mask |= POLLOUT;
+			/* Return POLLHUP if endpoint is disconnected */
+			if (ep->state == SCIFEP_DISCONNECTED)
+				mask |= POLLHUP;
+			goto exit;
+		}
+	}
+
+	/* Return POLLERR if the endpoint is in none of the above states */
+	mask |= POLLERR;
+exit:
+	spin_unlock(&ep->lock);
+	return mask;
+}
+
+/**
+ * scif_poll() - Kernel mode SCIF poll
+ * @ufds: Array of scif_pollepd structures containing the end points
+ *	  and events to poll on
+ * @nfds: Size of the ufds array
+ * @timeout_msecs: Timeout in msecs, -ve implies infinite timeout
+ *
+ * The code flow in this function is based on do_poll(..) in select.c
+ *
+ * Returns the number of endpoints which have pending events or 0 in
+ * the event of a timeout. If a signal is used for wake up, -EINTR is
+ * returned.
+ */
+int
+scif_poll(struct scif_pollepd *ufds, unsigned int nfds, long timeout_msecs)
+{
+	struct poll_wqueues table;
+	poll_table *pt;
+	int i, mask, count = 0, timed_out = timeout_msecs == 0;
+	u64 timeout = timeout_msecs < 0 ? MAX_SCHEDULE_TIMEOUT
+		: msecs_to_jiffies(timeout_msecs);
+
+	poll_initwait(&table);
+	pt = &table.pt;
+	while (1) {
+		for (i = 0; i < nfds; i++) {
+			pt->_key = ufds[i].events | POLLERR | POLLHUP;
+			mask = __scif_pollfd(ufds[i].epd->anon,
+					     pt, ufds[i].epd);
+			mask &= ufds[i].events | POLLERR | POLLHUP;
+			if (mask) {
+				count++;
+				pt->_qproc = NULL;
+			}
+			ufds[i].revents = mask;
+		}
+		pt->_qproc = NULL;
+		if (!count) {
+			count = table.error;
+			if (signal_pending(current))
+				count = -EINTR;
+		}
+		if (count || timed_out)
+			break;
+
+		if (!schedule_timeout_interruptible(timeout))
+			timed_out = 1;
+	}
+	poll_freewait(&table);
+	return count;
+}
+EXPORT_SYMBOL_GPL(scif_poll);
+
 int scif_get_node_ids(u16 *nodes, int len, u16 *self)
 {
 	int online = 0;
diff --git a/drivers/misc/mic/scif/scif_epd.h b/drivers/misc/mic/scif/scif_epd.h
index 331322a..4dc4ccd 100644
--- a/drivers/misc/mic/scif/scif_epd.h
+++ b/drivers/misc/mic/scif/scif_epd.h
@@ -96,7 +96,9 @@
  * @conn_port: Connection port
  * @conn_err: Errors during connection
  * @conn_async_state: Async connection
+ * @conn_pend_wq: Used by poll while waiting for incoming connections
  * @conn_list: List of async connection requests
+ * @anon: anonymous file for use in kernel mode scif poll
  */
 struct scif_endpt {
 	enum scif_epd_state state;
@@ -125,7 +127,9 @@
 	struct scif_port_id conn_port;
 	int conn_err;
 	int conn_async_state;
+	wait_queue_head_t conn_pend_wq;
 	struct list_head conn_list;
+	struct file *anon;
 };
 
 static inline int scifdev_alive(struct scif_endpt *ep)
@@ -133,6 +137,22 @@
 	return _scifdev_alive(ep->remote_dev);
 }
 
+static inline int scif_anon_inode_getfile(scif_epd_t epd)
+{
+	epd->anon = anon_inode_getfile("scif", &scif_anon_fops, NULL, 0);
+	if (IS_ERR(epd->anon))
+		return PTR_ERR(epd->anon);
+	return 0;
+}
+
+static inline void scif_anon_inode_fput(scif_epd_t epd)
+{
+	if (epd->anon) {
+		fput(epd->anon);
+		epd->anon = NULL;
+	}
+}
+
 void scif_cleanup_zombie_epd(void);
 void scif_teardown_ep(void *endpt);
 void scif_cleanup_ep_qp(struct scif_endpt *ep);
@@ -157,4 +177,6 @@
 void scif_clientrcvd(struct scif_dev *scifdev, struct scifmsg *msg);
 int __scif_connect(scif_epd_t epd, struct scif_port_id *dst, bool non_block);
 int __scif_flush(scif_epd_t epd);
+unsigned int __scif_pollfd(struct file *f, poll_table *wait,
+			   struct scif_endpt *ep);
 #endif /* SCIF_EPD_H */
diff --git a/drivers/misc/mic/scif/scif_fd.c b/drivers/misc/mic/scif/scif_fd.c
index eccf7e7..24e47f7 100644
--- a/drivers/misc/mic/scif/scif_fd.c
+++ b/drivers/misc/mic/scif/scif_fd.c
@@ -34,6 +34,13 @@
 	return scif_close(priv);
 }
 
+static unsigned int scif_fdpoll(struct file *f, poll_table *wait)
+{
+	struct scif_endpt *priv = f->private_data;
+
+	return __scif_pollfd(f, wait, priv);
+}
+
 static int scif_fdflush(struct file *f, fl_owner_t id)
 {
 	struct scif_endpt *ep = f->private_data;
@@ -193,6 +200,7 @@
 		spin_unlock(&scif_info.eplock);
 
 		/* Free the resources automatically created from the open. */
+		scif_anon_inode_fput(priv);
 		scif_teardown_ep(priv);
 		scif_add_epd_to_zombie_list(priv, !SCIF_EPLOCK_HELD);
 		f->private_data = newep;
@@ -298,6 +306,7 @@
 	.open = scif_fdopen,
 	.release = scif_fdclose,
 	.unlocked_ioctl = scif_fdioctl,
+	.poll = scif_fdpoll,
 	.flush = scif_fdflush,
 	.owner = THIS_MODULE,
 };
diff --git a/drivers/misc/mic/scif/scif_main.h b/drivers/misc/mic/scif/scif_main.h
index 580bc63..87c1327 100644
--- a/drivers/misc/mic/scif/scif_main.h
+++ b/drivers/misc/mic/scif/scif_main.h
@@ -22,6 +22,7 @@
 #include <linux/pci.h>
 #include <linux/miscdevice.h>
 #include <linux/dmaengine.h>
+#include <linux/anon_inodes.h>
 #include <linux/file.h>
 #include <linux/scif.h>
 
@@ -184,6 +185,7 @@
 extern struct idr scif_ports;
 extern struct scif_dev *scif_dev;
 extern const struct file_operations scif_fops;
+extern const struct file_operations scif_anon_fops;
 
 /* Size of the RB for the Node QP */
 #define SCIF_NODE_QP_SIZE 0x10000