fuse: separate queue for FORGET requests Terje Malmedal reports that a fuse filesystem with 32 million inodes on a machine with lots of memory can go unresponsive for up to 30 minutes when all those inodes are evicted from the icache. The reason is that FORGET messages, sent when the inode is evicted, are queued up together with regular filesystem requests, and while the huge queue of FORGET messages are processed no other filesystem operation can proceed. Since a full fuse request structure is allocated for each inode, these take up quite a bit of memory as well. To solve these issues, create a slim 'fuse_forget_link' structure containing just the minimum of information required to send the FORGET request and chain these on a separate queue. When userspace is asking for a request make sure that FORGET and non-FORGET requests are selected fairly: for each 8 non-FORGET allow 16 FORGET requests. This will make sure FORGETs do not pile up, yet other requests are also allowed to proceed while the queued FORGETs are processed. Reported-by: Terje Malmedal <terje.malmedal@usit.uio.no> Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>

commit: 07e77dca8a1f17a724a9b7449f0ca02e70e9d057 [log] [tgz]
author: Miklos Szeredi <mszeredi@suse.cz> Tue Dec 07 20:16:56 2010 +0100
committer: Miklos Szeredi <mszeredi@suse.cz> Tue Dec 07 20:16:56 2010 +0100
tree: 69186be641145fd997ce15e17a22598d9a264119
parent: 8ac835056ca39b242d98332f46e4d65428a8b7db [diff] [blame]
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index cfce3ad..7ba4d35 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c

@@ -71,6 +71,11 @@
 	unsigned blksize;
 };
 
+struct fuse_forget_link *fuse_alloc_forget()
+{
+	return kzalloc(sizeof(struct fuse_forget_link), GFP_KERNEL);
+}
+
 static struct inode *fuse_alloc_inode(struct super_block *sb)
 {
 	struct inode *inode;
@@ -90,8 +95,8 @@
 	INIT_LIST_HEAD(&fi->queued_writes);
 	INIT_LIST_HEAD(&fi->writepages);
 	init_waitqueue_head(&fi->page_waitq);
-	fi->forget_req = fuse_request_alloc();
-	if (!fi->forget_req) {
+	fi->forget = fuse_alloc_forget();
+	if (!fi->forget) {
 		kmem_cache_free(fuse_inode_cachep, inode);
 		return NULL;
 	}
@@ -104,24 +109,10 @@
 	struct fuse_inode *fi = get_fuse_inode(inode);
 	BUG_ON(!list_empty(&fi->write_files));
 	BUG_ON(!list_empty(&fi->queued_writes));
-	if (fi->forget_req)
-		fuse_request_free(fi->forget_req);
+	kfree(fi->forget);
 	kmem_cache_free(fuse_inode_cachep, inode);
 }
 
-void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req,
-		      u64 nodeid, u64 nlookup)
-{
-	struct fuse_forget_in *inarg = &req->misc.forget_in;
-	inarg->nlookup = nlookup;
-	req->in.h.opcode = FUSE_FORGET;
-	req->in.h.nodeid = nodeid;
-	req->in.numargs = 1;
-	req->in.args[0].size = sizeof(struct fuse_forget_in);
-	req->in.args[0].value = inarg;
-	fuse_request_send_noreply(fc, req);
-}
-
 static void fuse_evict_inode(struct inode *inode)
 {
 	truncate_inode_pages(&inode->i_data, 0);
@@ -129,8 +120,8 @@
 	if (inode->i_sb->s_flags & MS_ACTIVE) {
 		struct fuse_conn *fc = get_fuse_conn(inode);
 		struct fuse_inode *fi = get_fuse_inode(inode);
-		fuse_send_forget(fc, fi->forget_req, fi->nodeid, fi->nlookup);
-		fi->forget_req = NULL;
+		fuse_queue_forget(fc, fi->forget, fi->nodeid, fi->nlookup);
+		fi->forget = NULL;
 	}
 }
 
@@ -534,6 +525,7 @@
 	INIT_LIST_HEAD(&fc->interrupts);
 	INIT_LIST_HEAD(&fc->bg_queue);
 	INIT_LIST_HEAD(&fc->entry);
+	fc->forget_list_tail = &fc->forget_list_head;
 	atomic_set(&fc->num_waiting, 0);
 	fc->max_background = FUSE_DEFAULT_MAX_BACKGROUND;
 	fc->congestion_threshold = FUSE_DEFAULT_CONGESTION_THRESHOLD;
commit	07e77dca8a1f17a724a9b7449f0ca02e70e9d057	[log] [tgz]
author	Miklos Szeredi <mszeredi@suse.cz>	Tue Dec 07 20:16:56 2010 +0100
committer	Miklos Szeredi <mszeredi@suse.cz>	Tue Dec 07 20:16:56 2010 +0100
tree	69186be641145fd997ce15e17a22598d9a264119
parent	8ac835056ca39b242d98332f46e4d65428a8b7db [diff] [blame]