[PATCH] mm: init_mm without ptlock

First step in pushing down the page_table_lock.  init_mm.page_table_lock has
been used throughout the architectures (usually for ioremap): not to serialize
kernel address space allocation (that's usually vmlist_lock), but because
pud_alloc,pmd_alloc,pte_alloc_kernel expect caller holds it.

Reverse that: don't lock or unlock init_mm.page_table_lock in any of the
architectures; instead rely on pud_alloc,pmd_alloc,pte_alloc_kernel to take
and drop it when allocating a new one, to check lest a racing task already
did.  Similarly no page_table_lock in vmalloc's map_vm_area.

Some temporary ugliness in __pud_alloc and __pmd_alloc: since they also handle
user mms, which are converted only by a later patch, for now they have to lock
differently according to whether or not it's init_mm.

If sources get muddled, there's a danger that an arch source taking
init_mm.page_table_lock will be mixed with common source also taking it (or
neither take it).  So break the rules and make another change, which should
break the build for such a mismatch: remove the redundant mm arg from
pte_alloc_kernel (ppc64 scrapped its distinct ioremap_mm in 2.6.13).

Exceptions: arm26 used pte_alloc_kernel on user mm, now pte_alloc_map; ia64
used pte_alloc_map on init_mm, now pte_alloc_kernel; parisc had bad args to
pmd_alloc and pte_alloc_kernel in unused USE_HPPA_IOREMAP code; ppc64
map_io_page forgot to unlock on failure; ppc mmu_mapin_ram and ppc64 im_free
took page_table_lock for no good reason.

Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
diff --git a/mm/memory.c b/mm/memory.c
index 692ad81..95a4553 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -307,28 +307,22 @@
 	return pte_offset_map(pmd, address);
 }
 
-pte_t fastcall * pte_alloc_kernel(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
+pte_t fastcall * pte_alloc_kernel(pmd_t *pmd, unsigned long address)
 {
 	if (!pmd_present(*pmd)) {
 		pte_t *new;
 
-		spin_unlock(&mm->page_table_lock);
-		new = pte_alloc_one_kernel(mm, address);
-		spin_lock(&mm->page_table_lock);
+		new = pte_alloc_one_kernel(&init_mm, address);
 		if (!new)
 			return NULL;
 
-		/*
-		 * Because we dropped the lock, we should re-check the
-		 * entry, as somebody else could have populated it..
-		 */
-		if (pmd_present(*pmd)) {
+		spin_lock(&init_mm.page_table_lock);
+		if (pmd_present(*pmd))
 			pte_free_kernel(new);
-			goto out;
-		}
-		pmd_populate_kernel(mm, pmd, new);
+		else
+			pmd_populate_kernel(&init_mm, pmd, new);
+		spin_unlock(&init_mm.page_table_lock);
 	}
-out:
 	return pte_offset_kernel(pmd, address);
 }
 
@@ -2097,30 +2091,30 @@
 #ifndef __PAGETABLE_PUD_FOLDED
 /*
  * Allocate page upper directory.
- *
- * We've already handled the fast-path in-line, and we own the
- * page table lock.
+ * We've already handled the fast-path in-line.
  */
 pud_t fastcall *__pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
 {
 	pud_t *new;
 
-	spin_unlock(&mm->page_table_lock);
+	if (mm != &init_mm)		/* Temporary bridging hack */
+		spin_unlock(&mm->page_table_lock);
 	new = pud_alloc_one(mm, address);
-	spin_lock(&mm->page_table_lock);
-	if (!new)
+	if (!new) {
+		if (mm != &init_mm)	/* Temporary bridging hack */
+			spin_lock(&mm->page_table_lock);
 		return NULL;
+	}
 
-	/*
-	 * Because we dropped the lock, we should re-check the
-	 * entry, as somebody else could have populated it..
-	 */
+	spin_lock(&mm->page_table_lock);
 	if (pgd_present(*pgd)) {
 		pud_free(new);
 		goto out;
 	}
 	pgd_populate(mm, pgd, new);
  out:
+	if (mm == &init_mm)		/* Temporary bridging hack */
+		spin_unlock(&mm->page_table_lock);
 	return pud_offset(pgd, address);
 }
 #endif /* __PAGETABLE_PUD_FOLDED */
@@ -2128,24 +2122,22 @@
 #ifndef __PAGETABLE_PMD_FOLDED
 /*
  * Allocate page middle directory.
- *
- * We've already handled the fast-path in-line, and we own the
- * page table lock.
+ * We've already handled the fast-path in-line.
  */
 pmd_t fastcall *__pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
 {
 	pmd_t *new;
 
-	spin_unlock(&mm->page_table_lock);
+	if (mm != &init_mm)		/* Temporary bridging hack */
+		spin_unlock(&mm->page_table_lock);
 	new = pmd_alloc_one(mm, address);
-	spin_lock(&mm->page_table_lock);
-	if (!new)
+	if (!new) {
+		if (mm != &init_mm)	/* Temporary bridging hack */
+			spin_lock(&mm->page_table_lock);
 		return NULL;
+	}
 
-	/*
-	 * Because we dropped the lock, we should re-check the
-	 * entry, as somebody else could have populated it..
-	 */
+	spin_lock(&mm->page_table_lock);
 #ifndef __ARCH_HAS_4LEVEL_HACK
 	if (pud_present(*pud)) {
 		pmd_free(new);
@@ -2161,6 +2153,8 @@
 #endif /* __ARCH_HAS_4LEVEL_HACK */
 
  out:
+	if (mm == &init_mm)		/* Temporary bridging hack */
+		spin_unlock(&mm->page_table_lock);
 	return pmd_offset(pud, address);
 }
 #endif /* __PAGETABLE_PMD_FOLDED */