cma: Make locking finer grained

CMA locking is currently very coarse. The cma_mutex protects both
the bitmap and avoids concurrency with alloc_contig_range. There
are several situations which may result in a deadlock on the CMA
mutex currently, mostly involving AB/BA situations with alloc and
free. Fix this issue by protecting the bitmap with a mutex per CMA
region and use the existing mutex for protecting against concurrency
with alloc_contig_range.

Change-Id: I6863ba7ab7fae07c68fee23b0aa4c869244fe2a1
Signed-off-by: Laura Abbott <lauraa@codeaurora.org>
diff --git a/drivers/base/dma-contiguous.c b/drivers/base/dma-contiguous.c
index adac211..0395681 100644
--- a/drivers/base/dma-contiguous.c
+++ b/drivers/base/dma-contiguous.c
@@ -47,6 +47,7 @@
 	unsigned long	base_pfn;
 	unsigned long	count;
 	unsigned long	*bitmap;
+	struct mutex lock;
 };
 
 static DEFINE_MUTEX(cma_mutex);
@@ -191,6 +192,7 @@
 	ret = cma_activate_area(base_pfn, count);
 	if (ret)
 		goto error;
+	mutex_init(&cma->lock);
 
 	pr_debug("%s: returned %p\n", __func__, (void *)cma);
 	return cma;
@@ -459,6 +461,13 @@
 	return cma->base_pfn << PAGE_SHIFT;
 }
 
+static void clear_cma_bitmap(struct cma *cma, unsigned long pfn, int count)
+{
+	mutex_lock(&cma->lock);
+	bitmap_clear(cma->bitmap, pfn - cma->base_pfn, count);
+	mutex_unlock(&cma->lock);
+}
+
 /**
  * dma_alloc_from_contiguous() - allocate pages from contiguous area
  * @dev:   Pointer to device for which the allocation is performed.
@@ -493,23 +502,35 @@
 
 	mask = (1 << align) - 1;
 
-	mutex_lock(&cma_mutex);
 
 	for (;;) {
+		mutex_lock(&cma->lock);
 		pageno = bitmap_find_next_zero_area(cma->bitmap, cma->count,
 						    start, count, mask);
-		if (pageno >= cma->count)
+		if (pageno >= cma->count) {
+			mutex_unlock(&cma_mutex);
 			break;
+		}
+		bitmap_set(cma->bitmap, pageno, count);
+		/*
+		 * It's safe to drop the lock here. We've marked this region for
+		 * our exclusive use. If the migration fails we will take the
+		 * lock again and unmark it.
+		 */
+		mutex_unlock(&cma->lock);
 
 		pfn = cma->base_pfn + pageno;
+		mutex_lock(&cma_mutex);
 		ret = alloc_contig_range(pfn, pfn + count, MIGRATE_CMA);
+		mutex_unlock(&cma_mutex);
 		if (ret == 0) {
-			bitmap_set(cma->bitmap, pageno, count);
 			page = pfn_to_page(pfn);
 			break;
 		} else if (ret != -EBUSY) {
+			clear_cma_bitmap(cma, pfn, count);
 			break;
 		}
+		clear_cma_bitmap(cma, pfn, count);
 		tries++;
 		trace_dma_alloc_contiguous_retry(tries);
 
@@ -519,7 +540,6 @@
 		start = pageno + mask + 1;
 	}
 
-	mutex_unlock(&cma_mutex);
 	pr_debug("%s(): returned %p\n", __func__, page);
 	return page;
 }
@@ -553,9 +573,7 @@
 	VM_BUG_ON(pfn + count > cma->base_pfn + cma->count);
 
 	free_contig_range(pfn, count);
-	mutex_lock(&cma_mutex);
-	bitmap_clear(cma->bitmap, pfn - cma->base_pfn, count);
-	mutex_unlock(&cma_mutex);
+	clear_cma_bitmap(cma, pfn, count);
 
 	return true;
 }