ps3vram: Replace mutex by spinlock + bio_list

Remove the mutex serializing access to the cache.
Instead, queue up new requests on a bio_list if the driver is busy.

This improves sequential write performance by ca. 2%.

Signed-off-by: Geert Uytterhoeven <Geert.Uytterhoeven@sonycom.com>
Cc: Jim Paris <jim@jtan.com>
Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c
index 2c2ea9c..7d1c742 100644
--- a/drivers/block/ps3vram.c
+++ b/drivers/block/ps3vram.c
@@ -81,8 +81,8 @@
 
 	struct ps3vram_cache cache;
 
-	/* Used to serialize cache/DMA operations */
-	struct mutex lock;
+	spinlock_t lock;	/* protecting list of bios */
+	struct bio_list list;
 };
 
 
@@ -443,8 +443,6 @@
 		offset = (unsigned int) (from & (priv->cache.page_size - 1));
 		avail  = priv->cache.page_size - offset;
 
-		mutex_lock(&priv->lock);
-
 		entry = ps3vram_cache_match(dev, from);
 		cached = CACHE_OFFSET + entry * priv->cache.page_size + offset;
 
@@ -456,8 +454,6 @@
 			avail = count;
 		memcpy(buf, priv->xdr_buf + cached, avail);
 
-		mutex_unlock(&priv->lock);
-
 		buf += avail;
 		count -= avail;
 		from += avail;
@@ -488,8 +484,6 @@
 		offset = (unsigned int) (to & (priv->cache.page_size - 1));
 		avail  = priv->cache.page_size - offset;
 
-		mutex_lock(&priv->lock);
-
 		entry = ps3vram_cache_match(dev, to);
 		cached = CACHE_OFFSET + entry * priv->cache.page_size + offset;
 
@@ -503,8 +497,6 @@
 
 		priv->cache.tags[entry].flags |= CACHE_PAGE_DIRTY;
 
-		mutex_unlock(&priv->lock);
-
 		buf += avail;
 		count -= avail;
 		to += avail;
@@ -546,17 +538,17 @@
 		dev_warn(&dev->core, "failed to create /proc entry\n");
 }
 
-static int ps3vram_make_request(struct request_queue *q, struct bio *bio)
+static struct bio *ps3vram_do_bio(struct ps3_system_bus_device *dev,
+				  struct bio *bio)
 {
-	struct ps3_system_bus_device *dev = q->queuedata;
+	struct ps3vram_priv *priv = dev->core.driver_data;
 	int write = bio_data_dir(bio) == WRITE;
 	const char *op = write ? "write" : "read";
 	loff_t offset = bio->bi_sector << 9;
 	int error = 0;
 	struct bio_vec *bvec;
 	unsigned int i;
-
-	dev_dbg(&dev->core, "%s\n", __func__);
+	struct bio *next;
 
 	bio_for_each_segment(bvec, bio, i) {
 		/* PS3 is ppc64, so we don't handle highmem */
@@ -587,7 +579,35 @@
 	dev_dbg(&dev->core, "%s completed\n", op);
 
 out:
+	spin_lock_irq(&priv->lock);
+	bio_list_pop(&priv->list);
+	next = bio_list_peek(&priv->list);
+	spin_unlock_irq(&priv->lock);
+
 	bio_endio(bio, error);
+	return next;
+}
+
+static int ps3vram_make_request(struct request_queue *q, struct bio *bio)
+{
+	struct ps3_system_bus_device *dev = q->queuedata;
+	struct ps3vram_priv *priv = dev->core.driver_data;
+	int busy;
+
+	dev_dbg(&dev->core, "%s\n", __func__);
+
+	spin_lock_irq(&priv->lock);
+	busy = !bio_list_empty(&priv->list);
+	bio_list_add(&priv->list, bio);
+	spin_unlock_irq(&priv->lock);
+
+	if (busy)
+		return 0;
+
+	do {
+		bio = ps3vram_do_bio(dev, bio);
+	} while (bio);
+
 	return 0;
 }
 
@@ -607,7 +627,8 @@
 		goto fail;
 	}
 
-	mutex_init(&priv->lock);
+	spin_lock_init(&priv->lock);
+	bio_list_init(&priv->list);
 	dev->core.driver_data = priv;
 
 	priv = dev->core.driver_data;