vhost: replace vhost_workqueue with per-vhost kthread
Replace vhost_workqueue with per-vhost kthread. Other than callback
argument change from struct work_struct * to struct vhost_work *,
there's no visible change to vhost_poll_*() interface.
This conversion is to make each vhost use a dedicated kthread so that
resource control via cgroup can be applied.
Partially based on Sridhar Samudrala's patch.
* Updated to use sub structure vhost_work instead of directly using
vhost_poll at Michael's suggestion.
* Added flusher wake_up() optimization at Michael's suggestion.
Changes by MST:
* Converted atomics/barrier use to a spinlock.
* Create thread on SET_OWNER
* Fix flushing
Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Cc: Sridhar Samudrala <samudrala.sridhar@gmail.com>
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index 11ee13d..3693327 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -5,13 +5,13 @@
#include <linux/vhost.h>
#include <linux/mm.h>
#include <linux/mutex.h>
-#include <linux/workqueue.h>
#include <linux/poll.h>
#include <linux/file.h>
#include <linux/skbuff.h>
#include <linux/uio.h>
#include <linux/virtio_config.h>
#include <linux/virtio_ring.h>
+#include <asm/atomic.h>
struct vhost_device;
@@ -20,19 +20,31 @@
VHOST_NET_MAX_SG = MAX_SKB_FRAGS + 2,
};
+struct vhost_work;
+typedef void (*vhost_work_fn_t)(struct vhost_work *work);
+
+struct vhost_work {
+ struct list_head node;
+ vhost_work_fn_t fn;
+ wait_queue_head_t done;
+ int flushing;
+ unsigned queue_seq;
+ unsigned done_seq;
+};
+
/* Poll a file (eventfd or socket) */
/* Note: there's nothing vhost specific about this structure. */
struct vhost_poll {
poll_table table;
wait_queue_head_t *wqh;
wait_queue_t wait;
- /* struct which will handle all actual work. */
- struct work_struct work;
+ struct vhost_work work;
unsigned long mask;
+ struct vhost_dev *dev;
};
-void vhost_poll_init(struct vhost_poll *poll, work_func_t func,
- unsigned long mask);
+void vhost_poll_init(struct vhost_poll *poll, vhost_work_fn_t fn,
+ unsigned long mask, struct vhost_dev *dev);
void vhost_poll_start(struct vhost_poll *poll, struct file *file);
void vhost_poll_stop(struct vhost_poll *poll);
void vhost_poll_flush(struct vhost_poll *poll);
@@ -63,7 +75,7 @@
struct vhost_poll poll;
/* The routine to call when the Guest pings us, or timeout. */
- work_func_t handle_kick;
+ vhost_work_fn_t handle_kick;
/* Last available index we saw. */
u16 last_avail_idx;
@@ -86,11 +98,11 @@
struct iovec hdr[VHOST_NET_MAX_SG];
size_t hdr_size;
/* We use a kind of RCU to access private pointer.
- * All readers access it from workqueue, which makes it possible to
- * flush the workqueue instead of synchronize_rcu. Therefore readers do
+ * All readers access it from worker, which makes it possible to
+ * flush the vhost_work instead of synchronize_rcu. Therefore readers do
* not need to call rcu_read_lock/rcu_read_unlock: the beginning of
- * work item execution acts instead of rcu_read_lock() and the end of
- * work item execution acts instead of rcu_read_lock().
+ * vhost_work execution acts instead of rcu_read_lock() and the end of
+ * vhost_work execution acts instead of rcu_read_lock().
* Writers use virtqueue mutex. */
void *private_data;
/* Log write descriptors */
@@ -110,6 +122,9 @@
int nvqs;
struct file *log_file;
struct eventfd_ctx *log_ctx;
+ spinlock_t work_lock;
+ struct list_head work_list;
+ struct task_struct *worker;
};
long vhost_dev_init(struct vhost_dev *, struct vhost_virtqueue *vqs, int nvqs);
@@ -136,9 +151,6 @@
int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log,
unsigned int log_num, u64 len);
-int vhost_init(void);
-void vhost_cleanup(void);
-
#define vq_err(vq, fmt, ...) do { \
pr_debug(pr_fmt(fmt), ##__VA_ARGS__); \
if ((vq)->error_ctx) \