fuse: separate queue for FORGET requests
Terje Malmedal reports that a fuse filesystem with 32 million inodes
on a machine with lots of memory can go unresponsive for up to 30
minutes when all those inodes are evicted from the icache.
The reason is that FORGET messages, sent when the inode is evicted,
are queued up together with regular filesystem requests, and while the
huge queue of FORGET messages are processed no other filesystem
operation can proceed.
Since a full fuse request structure is allocated for each inode, these
take up quite a bit of memory as well.
To solve these issues, create a slim 'fuse_forget_link' structure
containing just the minimum of information required to send the FORGET
request and chain these on a separate queue.
When userspace is asking for a request make sure that FORGET and
non-FORGET requests are selected fairly: for each 8 non-FORGET allow
16 FORGET requests. This will make sure FORGETs do not pile up, yet
other requests are also allowed to proceed while the queued FORGETs
are processed.
Reported-by: Terje Malmedal <terje.malmedal@usit.uio.no>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 6e07696..fed6530 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -251,6 +251,20 @@
kill_fasync(&fc->fasync, SIGIO, POLL_IN);
}
+void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget,
+ u64 nodeid, u64 nlookup)
+{
+ forget->nodeid = nodeid;
+ forget->nlookup = nlookup;
+
+ spin_lock(&fc->lock);
+ fc->forget_list_tail->next = forget;
+ fc->forget_list_tail = forget;
+ wake_up(&fc->waitq);
+ kill_fasync(&fc->fasync, SIGIO, POLL_IN);
+ spin_unlock(&fc->lock);
+}
+
static void flush_bg_queue(struct fuse_conn *fc)
{
while (fc->active_background < fc->max_background &&
@@ -438,12 +452,6 @@
}
}
-void fuse_request_send_noreply(struct fuse_conn *fc, struct fuse_req *req)
-{
- req->isreply = 0;
- fuse_request_send_nowait(fc, req);
-}
-
void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req)
{
req->isreply = 1;
@@ -896,9 +904,15 @@
return err;
}
+static int forget_pending(struct fuse_conn *fc)
+{
+ return fc->forget_list_head.next != NULL;
+}
+
static int request_pending(struct fuse_conn *fc)
{
- return !list_empty(&fc->pending) || !list_empty(&fc->interrupts);
+ return !list_empty(&fc->pending) || !list_empty(&fc->interrupts) ||
+ forget_pending(fc);
}
/* Wait until a request is available on the pending list */
@@ -960,6 +974,50 @@
return err ? err : reqsize;
}
+static struct fuse_forget_link *dequeue_forget(struct fuse_conn *fc)
+{
+ struct fuse_forget_link *forget = fc->forget_list_head.next;
+
+ fc->forget_list_head.next = forget->next;
+ if (fc->forget_list_head.next == NULL)
+ fc->forget_list_tail = &fc->forget_list_head;
+
+ return forget;
+}
+
+static int fuse_read_single_forget(struct fuse_conn *fc,
+ struct fuse_copy_state *cs,
+ size_t nbytes)
+__releases(fc->lock)
+{
+ int err;
+ struct fuse_forget_link *forget = dequeue_forget(fc);
+ struct fuse_forget_in arg = {
+ .nlookup = forget->nlookup,
+ };
+ struct fuse_in_header ih = {
+ .opcode = FUSE_FORGET,
+ .nodeid = forget->nodeid,
+ .unique = fuse_get_unique(fc),
+ .len = sizeof(ih) + sizeof(arg),
+ };
+
+ spin_unlock(&fc->lock);
+ kfree(forget);
+ if (nbytes < ih.len)
+ return -EINVAL;
+
+ err = fuse_copy_one(cs, &ih, sizeof(ih));
+ if (!err)
+ err = fuse_copy_one(cs, &arg, sizeof(arg));
+ fuse_copy_finish(cs);
+
+ if (err)
+ return err;
+
+ return ih.len;
+}
+
/*
* Read a single request into the userspace filesystem's buffer. This
* function waits until a request is available, then removes it from
@@ -998,6 +1056,14 @@
return fuse_read_interrupt(fc, cs, nbytes, req);
}
+ if (forget_pending(fc)) {
+ if (list_empty(&fc->pending) || fc->forget_batch-- > 0)
+ return fuse_read_single_forget(fc, cs, nbytes);
+
+ if (fc->forget_batch <= -8)
+ fc->forget_batch = 16;
+ }
+
req = list_entry(fc->pending.next, struct fuse_req, list);
req->state = FUSE_REQ_READING;
list_move(&req->list, &fc->io);
@@ -1090,7 +1156,7 @@
if (!fc)
return -EPERM;
- bufs = kmalloc(pipe->buffers * sizeof (struct pipe_buffer), GFP_KERNEL);
+ bufs = kmalloc(pipe->buffers * sizeof(struct pipe_buffer), GFP_KERNEL);
if (!bufs)
return -ENOMEM;
@@ -1626,7 +1692,7 @@
if (!fc)
return -EPERM;
- bufs = kmalloc(pipe->buffers * sizeof (struct pipe_buffer), GFP_KERNEL);
+ bufs = kmalloc(pipe->buffers * sizeof(struct pipe_buffer), GFP_KERNEL);
if (!bufs)
return -ENOMEM;
@@ -1770,6 +1836,8 @@
flush_bg_queue(fc);
end_requests(fc, &fc->pending);
end_requests(fc, &fc->processing);
+ while (forget_pending(fc))
+ kfree(dequeue_forget(fc));
}
/*