f2fs: use percpu_rw_semaphore

This patch replaces rw_semaphore with percpu_rw_semaphore for:
sbi->cp_rwsem
nm_i->nat_tree_lock

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index d5892f5..88fa139 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -527,7 +527,7 @@
 	/* NAT cache management */
 	struct radix_tree_root nat_root;/* root of the nat entry cache */
 	struct radix_tree_root nat_set_root;/* root of the nat set cache */
-	struct rw_semaphore nat_tree_lock;	/* protect nat_tree_lock */
+	struct percpu_rw_semaphore nat_tree_lock;	/* protect nat_tree_lock */
 	struct list_head nat_entries;	/* cached nat entry list (clean) */
 	unsigned int nat_cnt;		/* the # of cached nat entries */
 	unsigned int dirty_nat_cnt;	/* total num of nat entries in set */
@@ -775,7 +775,7 @@
 	struct f2fs_checkpoint *ckpt;		/* raw checkpoint pointer */
 	struct inode *meta_inode;		/* cache meta blocks */
 	struct mutex cp_mutex;			/* checkpoint procedure lock */
-	struct rw_semaphore cp_rwsem;		/* blocking FS operations */
+	struct percpu_rw_semaphore cp_rwsem;		/* blocking FS operations */
 	struct rw_semaphore node_write;		/* locking node writes */
 	wait_queue_head_t cp_wait;
 	unsigned long last_time[MAX_TIME];	/* to store time in jiffies */
@@ -1062,22 +1062,22 @@
 
 static inline void f2fs_lock_op(struct f2fs_sb_info *sbi)
 {
-	down_read(&sbi->cp_rwsem);
+	percpu_down_read(&sbi->cp_rwsem);
 }
 
 static inline void f2fs_unlock_op(struct f2fs_sb_info *sbi)
 {
-	up_read(&sbi->cp_rwsem);
+	percpu_up_read(&sbi->cp_rwsem);
 }
 
 static inline void f2fs_lock_all(struct f2fs_sb_info *sbi)
 {
-	down_write(&sbi->cp_rwsem);
+	percpu_down_write(&sbi->cp_rwsem);
 }
 
 static inline void f2fs_unlock_all(struct f2fs_sb_info *sbi)
 {
-	up_write(&sbi->cp_rwsem);
+	percpu_up_write(&sbi->cp_rwsem);
 }
 
 static inline int __get_cp_reason(struct f2fs_sb_info *sbi)
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 9d994b9..b841c43 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -206,14 +206,14 @@
 	struct nat_entry *e;
 	bool need = false;
 
-	down_read(&nm_i->nat_tree_lock);
+	percpu_down_read(&nm_i->nat_tree_lock);
 	e = __lookup_nat_cache(nm_i, nid);
 	if (e) {
 		if (!get_nat_flag(e, IS_CHECKPOINTED) &&
 				!get_nat_flag(e, HAS_FSYNCED_INODE))
 			need = true;
 	}
-	up_read(&nm_i->nat_tree_lock);
+	percpu_up_read(&nm_i->nat_tree_lock);
 	return need;
 }
 
@@ -223,11 +223,11 @@
 	struct nat_entry *e;
 	bool is_cp = true;
 
-	down_read(&nm_i->nat_tree_lock);
+	percpu_down_read(&nm_i->nat_tree_lock);
 	e = __lookup_nat_cache(nm_i, nid);
 	if (e && !get_nat_flag(e, IS_CHECKPOINTED))
 		is_cp = false;
-	up_read(&nm_i->nat_tree_lock);
+	percpu_up_read(&nm_i->nat_tree_lock);
 	return is_cp;
 }
 
@@ -237,13 +237,13 @@
 	struct nat_entry *e;
 	bool need_update = true;
 
-	down_read(&nm_i->nat_tree_lock);
+	percpu_down_read(&nm_i->nat_tree_lock);
 	e = __lookup_nat_cache(nm_i, ino);
 	if (e && get_nat_flag(e, HAS_LAST_FSYNC) &&
 			(get_nat_flag(e, IS_CHECKPOINTED) ||
 			 get_nat_flag(e, HAS_FSYNCED_INODE)))
 		need_update = false;
-	up_read(&nm_i->nat_tree_lock);
+	percpu_up_read(&nm_i->nat_tree_lock);
 	return need_update;
 }
 
@@ -284,7 +284,7 @@
 	struct f2fs_nm_info *nm_i = NM_I(sbi);
 	struct nat_entry *e;
 
-	down_write(&nm_i->nat_tree_lock);
+	percpu_down_write(&nm_i->nat_tree_lock);
 	e = __lookup_nat_cache(nm_i, ni->nid);
 	if (!e) {
 		e = grab_nat_entry(nm_i, ni->nid);
@@ -334,7 +334,7 @@
 			set_nat_flag(e, HAS_FSYNCED_INODE, true);
 		set_nat_flag(e, HAS_LAST_FSYNC, fsync_done);
 	}
-	up_write(&nm_i->nat_tree_lock);
+	percpu_up_write(&nm_i->nat_tree_lock);
 }
 
 int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
@@ -342,8 +342,7 @@
 	struct f2fs_nm_info *nm_i = NM_I(sbi);
 	int nr = nr_shrink;
 
-	if (!down_write_trylock(&nm_i->nat_tree_lock))
-		return 0;
+	percpu_down_write(&nm_i->nat_tree_lock);
 
 	while (nr_shrink && !list_empty(&nm_i->nat_entries)) {
 		struct nat_entry *ne;
@@ -352,7 +351,7 @@
 		__del_from_nat_cache(nm_i, ne);
 		nr_shrink--;
 	}
-	up_write(&nm_i->nat_tree_lock);
+	percpu_up_write(&nm_i->nat_tree_lock);
 	return nr - nr_shrink;
 }
 
@@ -374,13 +373,13 @@
 	ni->nid = nid;
 
 	/* Check nat cache */
-	down_read(&nm_i->nat_tree_lock);
+	percpu_down_read(&nm_i->nat_tree_lock);
 	e = __lookup_nat_cache(nm_i, nid);
 	if (e) {
 		ni->ino = nat_get_ino(e);
 		ni->blk_addr = nat_get_blkaddr(e);
 		ni->version = nat_get_version(e);
-		up_read(&nm_i->nat_tree_lock);
+		percpu_up_read(&nm_i->nat_tree_lock);
 		return;
 	}
 
@@ -404,11 +403,11 @@
 	node_info_from_raw_nat(ni, &ne);
 	f2fs_put_page(page, 1);
 cache:
-	up_read(&nm_i->nat_tree_lock);
+	percpu_up_read(&nm_i->nat_tree_lock);
 	/* cache nat entry */
-	down_write(&nm_i->nat_tree_lock);
+	percpu_down_write(&nm_i->nat_tree_lock);
 	cache_nat_entry(sbi, nid, &ne);
-	up_write(&nm_i->nat_tree_lock);
+	percpu_up_write(&nm_i->nat_tree_lock);
 }
 
 /*
@@ -1783,7 +1782,7 @@
 	ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES,
 							META_NAT, true);
 
-	down_read(&nm_i->nat_tree_lock);
+	percpu_down_read(&nm_i->nat_tree_lock);
 
 	while (1) {
 		struct page *page = get_current_nat_page(sbi, nid);
@@ -1815,7 +1814,7 @@
 			remove_free_nid(nm_i, nid);
 	}
 	up_read(&curseg->journal_rwsem);
-	up_read(&nm_i->nat_tree_lock);
+	percpu_up_read(&nm_i->nat_tree_lock);
 
 	ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nm_i->next_scan_nid),
 					nm_i->ra_nid_pages, META_NAT, false);
@@ -2204,7 +2203,7 @@
 	if (!nm_i->dirty_nat_cnt)
 		return;
 
-	down_write(&nm_i->nat_tree_lock);
+	percpu_down_write(&nm_i->nat_tree_lock);
 
 	/*
 	 * if there are no enough space in journal to store dirty nat
@@ -2227,7 +2226,7 @@
 	list_for_each_entry_safe(set, tmp, &sets, set_list)
 		__flush_nat_entry_set(sbi, set);
 
-	up_write(&nm_i->nat_tree_lock);
+	percpu_up_write(&nm_i->nat_tree_lock);
 
 	f2fs_bug_on(sbi, nm_i->dirty_nat_cnt);
 }
@@ -2263,7 +2262,8 @@
 
 	mutex_init(&nm_i->build_lock);
 	spin_lock_init(&nm_i->free_nid_list_lock);
-	init_rwsem(&nm_i->nat_tree_lock);
+	if (percpu_init_rwsem(&nm_i->nat_tree_lock))
+		return -ENOMEM;
 
 	nm_i->next_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid);
 	nm_i->bitmap_size = __bitmap_size(sbi, NAT_BITMAP);
@@ -2320,7 +2320,7 @@
 	spin_unlock(&nm_i->free_nid_list_lock);
 
 	/* destroy nat cache */
-	down_write(&nm_i->nat_tree_lock);
+	percpu_down_write(&nm_i->nat_tree_lock);
 	while ((found = __gang_lookup_nat_cache(nm_i,
 					nid, NATVEC_SIZE, natvec))) {
 		unsigned idx;
@@ -2345,8 +2345,9 @@
 			kmem_cache_free(nat_entry_set_slab, setvec[idx]);
 		}
 	}
-	up_write(&nm_i->nat_tree_lock);
+	percpu_up_write(&nm_i->nat_tree_lock);
 
+	percpu_free_rwsem(&nm_i->nat_tree_lock);
 	kfree(nm_i->nat_bitmap);
 	sbi->nm_info = NULL;
 	kfree(nm_i);
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index e0a975d..2bac917 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -695,6 +695,8 @@
 		percpu_counter_destroy(&sbi->nr_pages[i]);
 	percpu_counter_destroy(&sbi->alloc_valid_block_count);
 	percpu_counter_destroy(&sbi->total_valid_inode_count);
+
+	percpu_free_rwsem(&sbi->cp_rwsem);
 }
 
 static void f2fs_put_super(struct super_block *sb)
@@ -1471,6 +1473,9 @@
 {
 	int i, err;
 
+	if (percpu_init_rwsem(&sbi->cp_rwsem))
+		return -ENOMEM;
+
 	for (i = 0; i < NR_COUNT_TYPE; i++) {
 		err = percpu_counter_init(&sbi->nr_pages[i], 0, GFP_KERNEL);
 		if (err)
@@ -1671,7 +1676,6 @@
 		sbi->write_io[i].bio = NULL;
 	}
 
-	init_rwsem(&sbi->cp_rwsem);
 	init_waitqueue_head(&sbi->cp_wait);
 	init_sb_info(sbi);