zram: introduce zram->tb_lock Currently, the zram table is protected by zram->lock but it's rather coarse-grained lock and it makes hard for scalibility. Let's use own rwlock instead of depending on zram->lock. This patch adds new locking so obviously, it would make slow but this patch is just prepartion for removing coarse-grained rw_semaphore(ie, zram->lock) which is hurdle about zram scalability. Final patch in this patchset series will remove the lock from read-path and change rw_semaphore with mutex in write path. With bonus, we could drop pending slot free mess in next patch. Signed-off-by: Minchan Kim <minchan@kernel.org> Cc: Nitin Gupta <ngupta@vflare.org> Tested-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com> Cc: Jerome Marchand <jmarchan@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

commit: 92967471b67163bb1654e9b7fe99449ab70a4aaa [log] [tgz]
author: Minchan Kim <minchan@kernel.org> Thu Jan 30 15:46:03 2014 -0800
committer: Linus Torvalds <torvalds@linux-foundation.org> Thu Jan 30 16:56:55 2014 -0800
tree: 440e10b86af31f9752585f2472a382d0b36611d4
parent: deb0bdeb2f3d6b81d37fc778316dae46b6daab56 [diff] [blame]
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 9ab8849..24e6426 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c

@@ -140,6 +140,7 @@
 	return sprintf(buf, "%llu\n", val);
 }
 
+/* flag operations needs meta->tb_lock */
 static int zram_test_flag(struct zram_meta *meta, u32 index,
 			enum zram_pageflags flag)
 {
@@ -228,6 +229,7 @@
 		goto free_table;
 	}
 
+	rwlock_init(&meta->tb_lock);
 	return meta;
 
 free_table:
@@ -280,6 +282,7 @@
 	flush_dcache_page(page);
 }
 
+/* NOTE: caller should hold meta->tb_lock with write-side */
 static void zram_free_page(struct zram *zram, size_t index)
 {
 	struct zram_meta *meta = zram->meta;
@@ -319,20 +322,26 @@
 	size_t clen = PAGE_SIZE;
 	unsigned char *cmem;
 	struct zram_meta *meta = zram->meta;
-	unsigned long handle = meta->table[index].handle;
+	unsigned long handle;
+	u16 size;
+
+	read_lock(&meta->tb_lock);
+	handle = meta->table[index].handle;
+	size = meta->table[index].size;
 
 	if (!handle || zram_test_flag(meta, index, ZRAM_ZERO)) {
+		read_unlock(&meta->tb_lock);
 		clear_page(mem);
 		return 0;
 	}
 
 	cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_RO);
-	if (meta->table[index].size == PAGE_SIZE)
+	if (size == PAGE_SIZE)
 		copy_page(mem, cmem);
 	else
-		ret = lzo1x_decompress_safe(cmem, meta->table[index].size,
-						mem, &clen);
+		ret = lzo1x_decompress_safe(cmem, size,	mem, &clen);
 	zs_unmap_object(meta->mem_pool, handle);
+	read_unlock(&meta->tb_lock);
 
 	/* Should NEVER happen. Return bio error if it does. */
 	if (unlikely(ret != LZO_E_OK)) {
@@ -353,11 +362,14 @@
 	struct zram_meta *meta = zram->meta;
 	page = bvec->bv_page;
 
+	read_lock(&meta->tb_lock);
 	if (unlikely(!meta->table[index].handle) ||
 			zram_test_flag(meta, index, ZRAM_ZERO)) {
+		read_unlock(&meta->tb_lock);
 		handle_zero_page(bvec);
 		return 0;
 	}
+	read_unlock(&meta->tb_lock);
 
 	if (is_partial_io(bvec))
 		/* Use  a temporary buffer to decompress the page */
@@ -433,10 +445,12 @@
 	if (page_zero_filled(uncmem)) {
 		kunmap_atomic(user_mem);
 		/* Free memory associated with this sector now. */
+		write_lock(&zram->meta->tb_lock);
 		zram_free_page(zram, index);
+		zram_set_flag(meta, index, ZRAM_ZERO);
+		write_unlock(&zram->meta->tb_lock);
 
 		atomic_inc(&zram->stats.pages_zero);
-		zram_set_flag(meta, index, ZRAM_ZERO);
 		ret = 0;
 		goto out;
 	}
@@ -486,10 +500,12 @@
 	 * Free memory associated with this sector
 	 * before overwriting unused sectors.
 	 */
+	write_lock(&zram->meta->tb_lock);
 	zram_free_page(zram, index);
 
 	meta->table[index].handle = handle;
 	meta->table[index].size = clen;
+	write_unlock(&zram->meta->tb_lock);
 
 	/* Update stats */
 	atomic64_add(clen, &zram->stats.compr_size);
commit	92967471b67163bb1654e9b7fe99449ab70a4aaa	[log] [tgz]
author	Minchan Kim <minchan@kernel.org>	Thu Jan 30 15:46:03 2014 -0800
committer	Linus Torvalds <torvalds@linux-foundation.org>	Thu Jan 30 16:56:55 2014 -0800
tree	440e10b86af31f9752585f2472a382d0b36611d4
parent	deb0bdeb2f3d6b81d37fc778316dae46b6daab56 [diff] [blame]