[PATCH] CRIS update: mm

Memory management patches.

* SMP support.
* Non-executable stack (on v32).
* 4-level page tables.
* Added simple Thread Local Storage support.

Signed-off-by: Mikael Starvik <starvik@axis.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
diff --git a/arch/cris/mm/fault.c b/arch/cris/mm/fault.c
index 03254b9..fe1cc36 100644
--- a/arch/cris/mm/fault.c
+++ b/arch/cris/mm/fault.c
@@ -6,6 +6,38 @@
  *  Authors:  Bjorn Wesen 
  * 
  *  $Log: fault.c,v $
+ *  Revision 1.20  2005/03/04 08:16:18  starvik
+ *  Merge of Linux 2.6.11.
+ *
+ *  Revision 1.19  2005/01/14 10:07:59  starvik
+ *  Fixed warning.
+ *
+ *  Revision 1.18  2005/01/12 08:10:14  starvik
+ *  Readded the change of frametype when handling kernel page fault fixup
+ *  for v10. This is necessary to avoid that the CPU remakes the faulting
+ *  access.
+ *
+ *  Revision 1.17  2005/01/11 13:53:05  starvik
+ *  Use raw_printk.
+ *
+ *  Revision 1.16  2004/12/17 11:39:41  starvik
+ *  SMP support.
+ *
+ *  Revision 1.15  2004/11/23 18:36:18  starvik
+ *  Stack is now non-executable.
+ *  Signal handler trampolines are placed in a reserved page mapped into all
+ *  processes.
+ *
+ *  Revision 1.14  2004/11/23 07:10:21  starvik
+ *  Moved find_fixup_code to generic code.
+ *
+ *  Revision 1.13  2004/11/23 07:00:54  starvik
+ *  Actually use the execute permission bit in the MMU. This makes it possible
+ *  to prevent e.g. attacks where executable code is put on the stack.
+ *
+ *  Revision 1.12  2004/09/29 06:16:04  starvik
+ *  Use instruction_pointer
+ *
  *  Revision 1.11  2004/05/14 07:58:05  starvik
  *  Merge of changes from 2.4
  *
@@ -103,6 +135,7 @@
 
 extern int find_fixup_code(struct pt_regs *);
 extern void die_if_kernel(const char *, struct pt_regs *, long);
+extern int raw_printk(const char *fmt, ...);
 
 /* debug of low-level TLB reload */
 #undef DEBUG
@@ -118,7 +151,8 @@
 
 /* current active page directory */
 
-volatile pgd_t *current_pgd;
+volatile DEFINE_PER_CPU(pgd_t *,current_pgd);
+unsigned long cris_signal_return_page;
 
 /*
  * This routine handles page faults.  It determines the address,
@@ -146,8 +180,9 @@
 	struct vm_area_struct * vma;
 	siginfo_t info;
 
-        D(printk("Page fault for %X at %X, prot %d write %d\n",
-                 address, regs->erp, protection, writeaccess));
+        D(printk("Page fault for %lX on %X at %lX, prot %d write %d\n",
+                 address, smp_processor_id(), instruction_pointer(regs),
+                 protection, writeaccess));
 
 	tsk = current;
 
@@ -175,8 +210,19 @@
 	    !user_mode(regs))
 		goto vmalloc_fault;
 
+	/* When stack execution is not allowed we store the signal
+	 * trampolines in the reserved cris_signal_return_page.
+	 * Handle this in the exact same way as vmalloc (we know
+	 * that the mapping is there and is valid so no need to
+	 * call handle_mm_fault).
+	 */
+	if (cris_signal_return_page &&
+	    address == cris_signal_return_page &&
+	    !protection && user_mode(regs))
+		goto vmalloc_fault;
+
 	/* we can and should enable interrupts at this point */
-	sti();
+	local_irq_enable();
 
 	mm = tsk->mm;
 	info.si_code = SEGV_MAPERR;
@@ -220,7 +266,10 @@
 
 	/* first do some preliminary protection checks */
 
-	if (writeaccess) {
+	if (writeaccess == 2){
+		if (!(vma->vm_flags & VM_EXEC))
+			goto bad_area;
+	} else if (writeaccess == 1) {
 		if (!(vma->vm_flags & VM_WRITE))
 			goto bad_area;
 	} else {
@@ -234,7 +283,7 @@
 	 * the fault.
 	 */
 
-	switch (handle_mm_fault(mm, vma, address, writeaccess)) {
+	switch (handle_mm_fault(mm, vma, address, writeaccess & 1)) {
 	case 1:
 		tsk->min_flt++;
 		break;
@@ -292,10 +341,10 @@
 	 */
 
 	if ((unsigned long) (address) < PAGE_SIZE)
-		printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference");
+		raw_printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference");
 	else
-		printk(KERN_ALERT "Unable to handle kernel access");
-	printk(" at virtual address %08lx\n",address);
+		raw_printk(KERN_ALERT "Unable to handle kernel access");
+	raw_printk(" at virtual address %08lx\n",address);
 
 	die_if_kernel("Oops", regs, (writeaccess << 1) | protection);
 
@@ -346,10 +395,11 @@
 
 		int offset = pgd_index(address);
 		pgd_t *pgd, *pgd_k;
+		pud_t *pud, *pud_k;
 		pmd_t *pmd, *pmd_k;
 		pte_t *pte_k;
 
-		pgd = (pgd_t *)current_pgd + offset;
+		pgd = (pgd_t *)per_cpu(current_pgd, smp_processor_id()) + offset;
 		pgd_k = init_mm.pgd + offset;
 
 		/* Since we're two-level, we don't need to do both
@@ -364,8 +414,13 @@
 		 * it exists.
 		 */
 
-		pmd = pmd_offset(pgd, address);
-		pmd_k = pmd_offset(pgd_k, address);
+		pud = pud_offset(pgd, address);
+		pud_k = pud_offset(pgd_k, address);
+		if (!pud_present(*pud_k))
+			goto no_context;
+
+		pmd = pmd_offset(pud, address);
+		pmd_k = pmd_offset(pud_k, address);
 
 		if (!pmd_present(*pmd_k))
 			goto bad_area_nosemaphore;
@@ -385,3 +440,19 @@
 		return;
 	}
 }
+
+/* Find fixup code. */
+int
+find_fixup_code(struct pt_regs *regs)
+{
+	const struct exception_table_entry *fixup;
+
+	if ((fixup = search_exception_tables(instruction_pointer(regs))) != 0) {
+		/* Adjust the instruction pointer in the stackframe. */
+		instruction_pointer(regs) = fixup->fixup;
+		arch_fixup(regs);
+		return 1;
+	}
+
+	return 0;
+}
diff --git a/arch/cris/mm/ioremap.c b/arch/cris/mm/ioremap.c
index 6b9130b..ebba11e 100644
--- a/arch/cris/mm/ioremap.c
+++ b/arch/cris/mm/ioremap.c
@@ -14,9 +14,10 @@
 #include <asm/pgalloc.h>
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
+#include <asm/arch/memmap.h>
 
 extern inline void remap_area_pte(pte_t * pte, unsigned long address, unsigned long size,
-	unsigned long phys_addr, unsigned long flags)
+	unsigned long phys_addr, pgprot_t prot)
 {
 	unsigned long end;
 
@@ -31,9 +32,7 @@
 			printk("remap_area_pte: page already exists\n");
 			BUG();
 		}
-		set_pte(pte, mk_pte_phys(phys_addr, __pgprot(_PAGE_PRESENT | __READABLE | 
-							     __WRITEABLE | _PAGE_GLOBAL |
-							     _PAGE_KERNEL | flags)));
+		set_pte(pte, mk_pte_phys(phys_addr, prot));
 		address += PAGE_SIZE;
 		phys_addr += PAGE_SIZE;
 		pte++;
@@ -41,7 +40,7 @@
 }
 
 static inline int remap_area_pmd(pmd_t * pmd, unsigned long address, unsigned long size,
-	unsigned long phys_addr, unsigned long flags)
+	unsigned long phys_addr, pgprot_t prot)
 {
 	unsigned long end;
 
@@ -56,7 +55,7 @@
 		pte_t * pte = pte_alloc_kernel(&init_mm, pmd, address);
 		if (!pte)
 			return -ENOMEM;
-		remap_area_pte(pte, address, end - address, address + phys_addr, flags);
+		remap_area_pte(pte, address, end - address, address + phys_addr, prot);
 		address = (address + PMD_SIZE) & PMD_MASK;
 		pmd++;
 	} while (address && (address < end));
@@ -64,7 +63,7 @@
 }
 
 static int remap_area_pages(unsigned long address, unsigned long phys_addr,
-				 unsigned long size, unsigned long flags)
+				 unsigned long size, pgprot_t prot)
 {
 	int error;
 	pgd_t * dir;
@@ -77,13 +76,19 @@
 		BUG();
 	spin_lock(&init_mm.page_table_lock);
 	do {
+		pud_t *pud;
 		pmd_t *pmd;
-		pmd = pmd_alloc(&init_mm, dir, address);
+
 		error = -ENOMEM;
+		pud = pud_alloc(&init_mm, dir, address);
+		if (!pud)
+			break;
+		pmd = pmd_alloc(&init_mm, pud, address);
+
 		if (!pmd)
 			break;
 		if (remap_area_pmd(pmd, address, end - address,
-				   phys_addr + address, flags))
+				   phys_addr + address, prot))
 			break;
 		error = 0;
 		address = (address + PGDIR_SIZE) & PGDIR_MASK;
@@ -107,9 +112,9 @@
  * have to convert them into an offset in a page-aligned mapping, but the
  * caller shouldn't need to know that small detail.
  */
-void * __ioremap(unsigned long phys_addr, unsigned long size, unsigned long flags)
+void __iomem * __ioremap_prot(unsigned long phys_addr, unsigned long size, pgprot_t prot)
 {
-	void * addr;
+	void __iomem * addr;
 	struct vm_struct * area;
 	unsigned long offset, last_addr;
 
@@ -131,15 +136,36 @@
 	area = get_vm_area(size, VM_IOREMAP);
 	if (!area)
 		return NULL;
-	addr = area->addr;
-	if (remap_area_pages((unsigned long) addr, phys_addr, size, flags)) {
-		vfree(addr);
+	addr = (void __iomem *)area->addr;
+	if (remap_area_pages((unsigned long) addr, phys_addr, size, prot)) {
+		vfree((void __force *)addr);
 		return NULL;
 	}
-	return (void *) (offset + (char *)addr);
+	return (void __iomem *) (offset + (char __iomem *)addr);
 }
 
-void iounmap(void *addr)
+void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned long flags)
+{
+	return __ioremap_prot(phys_addr, size,
+		              __pgprot(_PAGE_PRESENT | __READABLE |
+				       __WRITEABLE | _PAGE_GLOBAL |
+				       _PAGE_KERNEL | flags));
+}
+
+/**
+ * ioremap_nocache     -   map bus memory into CPU space
+ * @offset:    bus address of the memory
+ * @size:      size of the resource to map
+ *
+ * Must be freed with iounmap.
+ */
+
+void __iomem *ioremap_nocache (unsigned long phys_addr, unsigned long size)
+{
+        return __ioremap(phys_addr | MEM_NON_CACHEABLE, size, 0);
+}
+
+void iounmap(volatile void __iomem *addr)
 {
 	if (addr > high_memory)
 		return vfree((void *) (PAGE_MASK & (unsigned long) addr));
diff --git a/arch/cris/mm/tlb.c b/arch/cris/mm/tlb.c
index 23eca5a..0df390a 100644
--- a/arch/cris/mm/tlb.c
+++ b/arch/cris/mm/tlb.c
@@ -29,18 +29,6 @@
 struct mm_struct *page_id_map[NUM_PAGEID];
 static int map_replace_ptr = 1;  /* which page_id_map entry to replace next */
 
-/*
- * Initialize the context related info for a new mm_struct
- * instance.
- */
-
-int
-init_new_context(struct task_struct *tsk, struct mm_struct *mm)
-{
-	mm->context = NO_CONTEXT;
-	return 0;
-}
-
 /* the following functions are similar to those used in the PPC port */
 
 static inline void
@@ -60,12 +48,12 @@
 		 */
 		flush_tlb_mm(old_mm);
 
-		old_mm->context = NO_CONTEXT;
+		old_mm->context.page_id = NO_CONTEXT;
 	}
 
 	/* insert it into the page_id_map */
 
-	mm->context = map_replace_ptr;
+	mm->context.page_id = map_replace_ptr;
 	page_id_map[map_replace_ptr] = mm;
 
 	map_replace_ptr++;
@@ -81,7 +69,7 @@
 void
 get_mmu_context(struct mm_struct *mm)
 {
-	if(mm->context == NO_CONTEXT)
+	if(mm->context.page_id == NO_CONTEXT)
 		alloc_context(mm);
 }
 
@@ -96,11 +84,10 @@
 void
 destroy_context(struct mm_struct *mm)
 {
-	if(mm->context != NO_CONTEXT) {
-		D(printk("destroy_context %d (%p)\n", mm->context, mm));
+	if(mm->context.page_id != NO_CONTEXT) {
+		D(printk("destroy_context %d (%p)\n", mm->context.page_id, mm));
 		flush_tlb_mm(mm);  /* TODO this might be redundant ? */
-		page_id_map[mm->context] = NULL;
-		/* mm->context = NO_CONTEXT; redundant.. mm will be freed */
+		page_id_map[mm->context.page_id] = NULL;
 	}
 }
 
diff --git a/include/asm-cris/arch-v10/mmu.h b/include/asm-cris/arch-v10/mmu.h
index d18aa00..df84f17 100644
--- a/include/asm-cris/arch-v10/mmu.h
+++ b/include/asm-cris/arch-v10/mmu.h
@@ -7,7 +7,10 @@
 
 /* type used in struct mm to couple an MMU context to an active mm */
 
-typedef unsigned int mm_context_t;
+typedef struct
+{
+  unsigned int page_id;
+} mm_context_t;
 
 /* kernel memory segments */