fsnotify: allow marks to not pin inodes in core

inotify marks must pin inodes in core.  dnotify doesn't technically need to
since they are closed when the directory is closed.  fanotify also need to
pin inodes in core as it works today.  But the next step is to introduce
the concept of 'ignored masks' which is actually a mask of events for an
inode of no interest.  I claim that these should be liberally sent to the
kernel and should not pin the inode in core.  If the inode is brought back
in the listener will get an event it may have thought excluded, but this is
not a serious situation and one any listener should deal with.

This patch lays the ground work for non-pinning inode marks by using lazy
inode pinning.  We do not pin a mark until it has a non-zero mask entry.  If a
listener new sets a mask we never pin the inode.

Signed-off-by: Eric Paris <eparis@redhat.com>
diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c
index 69f42df..6624c2e 100644
--- a/fs/notify/dnotify/dnotify.c
+++ b/fs/notify/dnotify/dnotify.c
@@ -65,7 +65,7 @@
 	new_mask = 0;
 	for (dn = dn_mark->dn; dn != NULL; dn = dn->dn_next)
 		new_mask |= (dn->dn_mask & ~FS_DN_MULTISHOT);
-	fsn_mark->mask = new_mask;
+	fsnotify_set_mark_mask_locked(fsn_mark, new_mask);
 
 	if (old_mask == new_mask)
 		return;
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 8415584..3320f0c 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -302,7 +302,7 @@
 
 	spin_lock(&fsn_mark->lock);
 	oldmask = fsn_mark->mask;
-	fsn_mark->mask = oldmask & ~mask;
+	fsnotify_set_mark_mask_locked(fsn_mark, (oldmask & ~mask));
 	spin_unlock(&fsn_mark->lock);
 
 	if (!(oldmask & ~mask))
@@ -359,7 +359,7 @@
 
 	spin_lock(&fsn_mark->lock);
 	oldmask = fsn_mark->mask;
-	fsn_mark->mask = oldmask | mask;
+	fsnotify_set_mark_mask_locked(fsn_mark, (oldmask | mask));
 	spin_unlock(&fsn_mark->lock);
 
 	return mask & ~oldmask;
diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h
index 204353c..1be54f6 100644
--- a/fs/notify/fsnotify.h
+++ b/fs/notify/fsnotify.h
@@ -20,6 +20,8 @@
 /* destroy all events sitting in this groups notification queue */
 extern void fsnotify_flush_notify(struct fsnotify_group *group);
 
+extern void fsnotify_set_inode_mark_mask_locked(struct fsnotify_mark *fsn_mark,
+						__u32 mask);
 /* add a mark to an inode */
 extern int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
 				   struct fsnotify_group *group, struct inode *inode,
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
index c925579..4292f9e 100644
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -141,7 +141,32 @@
 }
 
 /*
- * Attach an initialized mark mark to a given group and inode.
+ * If we are setting a mark mask on an inode mark we should pin the inode
+ * in memory.
+ */
+void fsnotify_set_inode_mark_mask_locked(struct fsnotify_mark *mark,
+					 __u32 mask)
+{
+	struct inode *inode;
+
+	assert_spin_locked(&mark->lock);
+
+	if (mask &&
+	    mark->i.inode &&
+	    !(mark->flags & FSNOTIFY_MARK_FLAG_OBJECT_PINNED)) {
+		mark->flags |= FSNOTIFY_MARK_FLAG_OBJECT_PINNED;
+		inode = igrab(mark->i.inode);
+		/*
+		 * we shouldn't be able to get here if the inode wasn't
+		 * already safely held in memory.  But bug in case it
+		 * ever is wrong.
+		 */
+		BUG_ON(!inode);
+	}
+}
+
+/*
+ * Attach an initialized mark to a given group and inode.
  * These marks may be used for the fsnotify backend to determine which
  * event types should be delivered to which group and for which inodes.
  */
@@ -152,10 +177,6 @@
 	struct fsnotify_mark *lmark = NULL;
 	int ret = 0;
 
-	inode = igrab(inode);
-	if (unlikely(!inode))
-		return -EINVAL;
-
 	mark->flags = FSNOTIFY_MARK_FLAG_INODE;
 
 	assert_spin_locked(&mark->lock);
@@ -175,10 +196,8 @@
 
 	spin_unlock(&inode->i_lock);
 
-	if (lmark) {
+	if (lmark)
 		ret = -EEXIST;
-		iput(inode);
-	}
 
 	return ret;
 }
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index a12315a..19d2740 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -575,13 +575,11 @@
 	spin_lock(&fsn_mark->lock);
 
 	old_mask = fsn_mark->mask;
-	if (add) {
-		fsn_mark->mask |= mask;
-		new_mask = fsn_mark->mask;
-	} else {
-		fsn_mark->mask = mask;
-		new_mask = fsn_mark->mask;
-	}
+	if (add)
+		fsnotify_set_mark_mask_locked(fsn_mark, (fsn_mark->mask | mask));
+	else
+		fsnotify_set_mark_mask_locked(fsn_mark, mask);
+	new_mask = fsn_mark->mask;
 
 	spin_unlock(&fsn_mark->lock);
 
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index d296ec9..0ebc3fd 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -168,7 +168,7 @@
 	 * is just a lazy update (and could be a perf win...)
 	 */
 
-	if (inode)
+	if (inode && (mark->flags & FSNOTIFY_MARK_FLAG_OBJECT_PINNED))
 		iput(inode);
 
 	/*
@@ -180,6 +180,17 @@
 		fsnotify_final_destroy_group(group);
 }
 
+void fsnotify_set_mark_mask_locked(struct fsnotify_mark *mark, __u32 mask)
+{
+	assert_spin_locked(&mark->lock);
+
+	mark->mask = mask;
+
+	if (mark->flags & FSNOTIFY_MARK_FLAG_INODE)
+		fsnotify_set_inode_mark_mask_locked(mark, mask);
+}
+
+
 /*
  * Attach an initialized mark to a given group and fs object.
  * These marks may be used for the fsnotify backend to determine which
@@ -230,6 +241,10 @@
 	}
 
 	spin_unlock(&group->mark_lock);
+
+	/* this will pin the object if appropriate */
+	fsnotify_set_mark_mask_locked(mark, mark->mask);
+
 	spin_unlock(&mark->lock);
 
 	if (inode)