Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc-next
Pull sparc update from David Miller:
1) Implement support for up to 47-bit physical addresses on sparc64.
2) Support HAVE_CONTEXT_TRACKING on sparc64, from Kirill Tkhai.
3) Fix Simba bridge window calculations, from Kjetil Oftedal.
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc-next:
sparc64: Implement HAVE_CONTEXT_TRACKING
sparc64: Add self-IPI support for smp_send_reschedule()
sparc: PCI: Fix incorrect address calculation of PCI Bridge windows on Simba-bridges
sparc64: Encode huge PMDs using PTE encoding.
sparc64: Move to 64-bit PGDs and PMDs.
sparc64: Move from 4MB to 8MB huge pages.
sparc64: Make PAGE_OFFSET variable.
sparc64: Fix inconsistent max-physical-address defines.
sparc64: Document the shift counts used to validate linear kernel addresses.
sparc64: Define PAGE_OFFSET in terms of physical address bits.
sparc64: Use PAGE_OFFSET instead of a magic constant.
sparc64: Clean up 64-bit mmap exclusion defines.
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 8591b20..05fcfc6 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -63,6 +63,7 @@
select HAVE_DYNAMIC_FTRACE
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_SYSCALL_TRACEPOINTS
+ select HAVE_CONTEXT_TRACKING
select HAVE_DEBUG_KMEMLEAK
select RTC_DRV_CMOS
select RTC_DRV_BQ4802
diff --git a/arch/sparc/include/asm/mmu_64.h b/arch/sparc/include/asm/mmu_64.h
index 76092c4..f668797 100644
--- a/arch/sparc/include/asm/mmu_64.h
+++ b/arch/sparc/include/asm/mmu_64.h
@@ -93,7 +93,6 @@
spinlock_t lock;
unsigned long sparc64_ctx_val;
unsigned long huge_pte_count;
- struct page *pgtable_page;
struct tsb_config tsb_block[MM_NUM_TSBS];
struct hv_tsb_descr tsb_descr[MM_NUM_TSBS];
} mm_context_t;
diff --git a/arch/sparc/include/asm/page_64.h b/arch/sparc/include/asm/page_64.h
index e155388..aac53fc 100644
--- a/arch/sparc/include/asm/page_64.h
+++ b/arch/sparc/include/asm/page_64.h
@@ -15,7 +15,10 @@
#define DCACHE_ALIASING_POSSIBLE
#endif
-#define HPAGE_SHIFT 22
+#define HPAGE_SHIFT 23
+#define REAL_HPAGE_SHIFT 22
+
+#define REAL_HPAGE_SIZE (_AC(1,UL) << REAL_HPAGE_SHIFT)
#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
#define HPAGE_SIZE (_AC(1,UL) << HPAGE_SHIFT)
@@ -53,8 +56,8 @@
/* These are used to make use of C type-checking.. */
typedef struct { unsigned long pte; } pte_t;
typedef struct { unsigned long iopte; } iopte_t;
-typedef struct { unsigned int pmd; } pmd_t;
-typedef struct { unsigned int pgd; } pgd_t;
+typedef struct { unsigned long pmd; } pmd_t;
+typedef struct { unsigned long pgd; } pgd_t;
typedef struct { unsigned long pgprot; } pgprot_t;
#define pte_val(x) ((x).pte)
@@ -73,8 +76,8 @@
/* .. while these make it easier on the compiler */
typedef unsigned long pte_t;
typedef unsigned long iopte_t;
-typedef unsigned int pmd_t;
-typedef unsigned int pgd_t;
+typedef unsigned long pmd_t;
+typedef unsigned long pgd_t;
typedef unsigned long pgprot_t;
#define pte_val(x) (x)
@@ -93,18 +96,44 @@
typedef pte_t *pgtable_t;
+/* These two values define the virtual address space range in which we
+ * must forbid 64-bit user processes from making mappings. It used to
+ * represent precisely the virtual address space hole present in most
+ * early sparc64 chips including UltraSPARC-I. But now it also is
+ * further constrained by the limits of our page tables, which is
+ * 43-bits of virtual address.
+ */
+#define SPARC64_VA_HOLE_TOP _AC(0xfffffc0000000000,UL)
+#define SPARC64_VA_HOLE_BOTTOM _AC(0x0000040000000000,UL)
+
+/* The next two defines specify the actual exclusion region we
+ * enforce, wherein we use a 4GB red zone on each side of the VA hole.
+ */
+#define VA_EXCLUDE_START (SPARC64_VA_HOLE_BOTTOM - (1UL << 32UL))
+#define VA_EXCLUDE_END (SPARC64_VA_HOLE_TOP + (1UL << 32UL))
+
#define TASK_UNMAPPED_BASE (test_thread_flag(TIF_32BIT) ? \
- (_AC(0x0000000070000000,UL)) : \
- (_AC(0xfffff80000000000,UL) + (1UL << 32UL)))
+ _AC(0x0000000070000000,UL) : \
+ VA_EXCLUDE_END)
#include <asm-generic/memory_model.h>
+#define PAGE_OFFSET_BY_BITS(X) (-(_AC(1,UL) << (X)))
+extern unsigned long PAGE_OFFSET;
+
#endif /* !(__ASSEMBLY__) */
-/* We used to stick this into a hard-coded global register (%g4)
- * but that does not make sense anymore.
+/* The maximum number of physical memory address bits we support, this
+ * is used to size various tables used to manage kernel TLB misses and
+ * also the sparsemem code.
*/
-#define PAGE_OFFSET _AC(0xFFFFF80000000000,UL)
+#define MAX_PHYS_ADDRESS_BITS 47
+
+/* These two shift counts are used when indexing sparc64_valid_addr_bitmap
+ * and kpte_linear_bitmap.
+ */
+#define ILOG2_4MB 22
+#define ILOG2_256MB 28
#ifndef __ASSEMBLY__
diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h
index 3676031..8358dc1 100644
--- a/arch/sparc/include/asm/pgtable_64.h
+++ b/arch/sparc/include/asm/pgtable_64.h
@@ -48,18 +48,18 @@
/* PMD_SHIFT determines the size of the area a second-level page
* table can map
*/
-#define PMD_SHIFT (PAGE_SHIFT + (PAGE_SHIFT-4))
+#define PMD_SHIFT (PAGE_SHIFT + (PAGE_SHIFT-3))
#define PMD_SIZE (_AC(1,UL) << PMD_SHIFT)
#define PMD_MASK (~(PMD_SIZE-1))
-#define PMD_BITS (PAGE_SHIFT - 2)
+#define PMD_BITS (PAGE_SHIFT - 3)
/* PGDIR_SHIFT determines what a third-level page table entry can map */
-#define PGDIR_SHIFT (PAGE_SHIFT + (PAGE_SHIFT-4) + PMD_BITS)
+#define PGDIR_SHIFT (PAGE_SHIFT + (PAGE_SHIFT-3) + PMD_BITS)
#define PGDIR_SIZE (_AC(1,UL) << PGDIR_SHIFT)
#define PGDIR_MASK (~(PGDIR_SIZE-1))
-#define PGDIR_BITS (PAGE_SHIFT - 2)
+#define PGDIR_BITS (PAGE_SHIFT - 3)
-#if (PGDIR_SHIFT + PGDIR_BITS) != 44
+#if (PGDIR_SHIFT + PGDIR_BITS) != 43
#error Page table parameters do not cover virtual address space properly.
#endif
@@ -67,35 +67,12 @@
#error PMD_SHIFT must equal HPAGE_SHIFT for transparent huge pages.
#endif
-/* PMDs point to PTE tables which are 4K aligned. */
-#define PMD_PADDR _AC(0xfffffffe,UL)
-#define PMD_PADDR_SHIFT _AC(11,UL)
-
-#define PMD_ISHUGE _AC(0x00000001,UL)
-
-/* This is the PMD layout when PMD_ISHUGE is set. With 4MB huge
- * pages, this frees up a bunch of bits in the layout that we can
- * use for the protection settings and software metadata.
- */
-#define PMD_HUGE_PADDR _AC(0xfffff800,UL)
-#define PMD_HUGE_PROTBITS _AC(0x000007ff,UL)
-#define PMD_HUGE_PRESENT _AC(0x00000400,UL)
-#define PMD_HUGE_WRITE _AC(0x00000200,UL)
-#define PMD_HUGE_DIRTY _AC(0x00000100,UL)
-#define PMD_HUGE_ACCESSED _AC(0x00000080,UL)
-#define PMD_HUGE_EXEC _AC(0x00000040,UL)
-#define PMD_HUGE_SPLITTING _AC(0x00000020,UL)
-
-/* PGDs point to PMD tables which are 8K aligned. */
-#define PGD_PADDR _AC(0xfffffffc,UL)
-#define PGD_PADDR_SHIFT _AC(11,UL)
-
#ifndef __ASSEMBLY__
#include <linux/sched.h>
/* Entries per page directory level. */
-#define PTRS_PER_PTE (1UL << (PAGE_SHIFT-4))
+#define PTRS_PER_PTE (1UL << (PAGE_SHIFT-3))
#define PTRS_PER_PMD (1UL << PMD_BITS)
#define PTRS_PER_PGD (1UL << PGDIR_BITS)
@@ -112,6 +89,7 @@
#define _PAGE_VALID _AC(0x8000000000000000,UL) /* Valid TTE */
#define _PAGE_R _AC(0x8000000000000000,UL) /* Keep ref bit uptodate*/
#define _PAGE_SPECIAL _AC(0x0200000000000000,UL) /* Special page */
+#define _PAGE_PMD_HUGE _AC(0x0100000000000000,UL) /* Huge page */
/* Advertise support for _PAGE_SPECIAL */
#define __HAVE_ARCH_PTE_SPECIAL
@@ -125,6 +103,7 @@
#define _PAGE_IE_4U _AC(0x0800000000000000,UL) /* Invert Endianness */
#define _PAGE_SOFT2_4U _AC(0x07FC000000000000,UL) /* Software bits, set 2 */
#define _PAGE_SPECIAL_4U _AC(0x0200000000000000,UL) /* Special page */
+#define _PAGE_PMD_HUGE_4U _AC(0x0100000000000000,UL) /* Huge page */
#define _PAGE_RES1_4U _AC(0x0002000000000000,UL) /* Reserved */
#define _PAGE_SZ32MB_4U _AC(0x0001000000000000,UL) /* (Panther) 32MB page */
#define _PAGE_SZ256MB_4U _AC(0x2001000000000000,UL) /* (Panther) 256MB page */
@@ -155,6 +134,7 @@
#define _PAGE_READ_4V _AC(0x0800000000000000,UL) /* Readable SW Bit */
#define _PAGE_WRITE_4V _AC(0x0400000000000000,UL) /* Writable SW Bit */
#define _PAGE_SPECIAL_4V _AC(0x0200000000000000,UL) /* Special page */
+#define _PAGE_PMD_HUGE_4V _AC(0x0100000000000000,UL) /* Huge page */
#define _PAGE_PADDR_4V _AC(0x00FFFFFFFFFFE000,UL) /* paddr[55:13] */
#define _PAGE_IE_4V _AC(0x0000000000001000,UL) /* Invert Endianness */
#define _PAGE_E_4V _AC(0x0000000000000800,UL) /* side-Effect */
@@ -180,6 +160,10 @@
#define _PAGE_SZBITS_4U _PAGE_SZ8K_4U
#define _PAGE_SZBITS_4V _PAGE_SZ8K_4V
+#if REAL_HPAGE_SHIFT != 22
+#error REAL_HPAGE_SHIFT and _PAGE_SZHUGE_foo must match up
+#endif
+
#define _PAGE_SZHUGE_4U _PAGE_SZ4MB_4U
#define _PAGE_SZHUGE_4V _PAGE_SZ4MB_4V
@@ -239,16 +223,13 @@
#define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot))
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-extern pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot);
-#define mk_pmd(page, pgprot) pfn_pmd(page_to_pfn(page), (pgprot))
-
-extern pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot);
-
-static inline pmd_t pmd_mkhuge(pmd_t pmd)
+static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot)
{
- /* Do nothing, mk_pmd() does this part. */
- return pmd;
+ pte_t pte = pfn_pte(page_nr, pgprot);
+
+ return __pmd(pte_val(pte));
}
+#define mk_pmd(page, pgprot) pfn_pmd(page_to_pfn(page), (pgprot))
#endif
/* This one can be done with two shifts. */
@@ -309,14 +290,25 @@
: "=r" (mask), "=r" (tmp)
: "i" (_PAGE_PADDR_4U | _PAGE_MODIFIED_4U | _PAGE_ACCESSED_4U |
_PAGE_CP_4U | _PAGE_CV_4U | _PAGE_E_4U | _PAGE_PRESENT_4U |
- _PAGE_SPECIAL),
+ _PAGE_SPECIAL | _PAGE_PMD_HUGE | _PAGE_SZALL_4U),
"i" (_PAGE_PADDR_4V | _PAGE_MODIFIED_4V | _PAGE_ACCESSED_4V |
_PAGE_CP_4V | _PAGE_CV_4V | _PAGE_E_4V | _PAGE_PRESENT_4V |
- _PAGE_SPECIAL));
+ _PAGE_SPECIAL | _PAGE_PMD_HUGE | _PAGE_SZALL_4V));
return __pte((pte_val(pte) & mask) | (pgprot_val(prot) & ~mask));
}
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
+{
+ pte_t pte = __pte(pmd_val(pmd));
+
+ pte = pte_modify(pte, newprot);
+
+ return __pmd(pte_val(pte));
+}
+#endif
+
static inline pte_t pgoff_to_pte(unsigned long off)
{
off <<= PAGE_SHIFT;
@@ -357,7 +349,7 @@
*/
#define pgprot_noncached pgprot_noncached
-#ifdef CONFIG_HUGETLB_PAGE
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
static inline pte_t pte_mkhuge(pte_t pte)
{
unsigned long mask;
@@ -375,6 +367,17 @@
return __pte(pte_val(pte) | mask);
}
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static inline pmd_t pmd_mkhuge(pmd_t pmd)
+{
+ pte_t pte = __pte(pmd_val(pmd));
+
+ pte = pte_mkhuge(pte);
+ pte_val(pte) |= _PAGE_PMD_HUGE;
+
+ return __pmd(pte_val(pte));
+}
+#endif
#endif
static inline pte_t pte_mkdirty(pte_t pte)
@@ -626,91 +629,130 @@
return pte_val(pte) & _PAGE_SPECIAL;
}
-static inline int pmd_large(pmd_t pmd)
+static inline unsigned long pmd_large(pmd_t pmd)
{
- return (pmd_val(pmd) & (PMD_ISHUGE | PMD_HUGE_PRESENT)) ==
- (PMD_ISHUGE | PMD_HUGE_PRESENT);
+ pte_t pte = __pte(pmd_val(pmd));
+
+ return (pte_val(pte) & _PAGE_PMD_HUGE) && pte_present(pte);
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-static inline int pmd_young(pmd_t pmd)
+static inline unsigned long pmd_young(pmd_t pmd)
{
- return pmd_val(pmd) & PMD_HUGE_ACCESSED;
+ pte_t pte = __pte(pmd_val(pmd));
+
+ return pte_young(pte);
}
-static inline int pmd_write(pmd_t pmd)
+static inline unsigned long pmd_write(pmd_t pmd)
{
- return pmd_val(pmd) & PMD_HUGE_WRITE;
+ pte_t pte = __pte(pmd_val(pmd));
+
+ return pte_write(pte);
}
static inline unsigned long pmd_pfn(pmd_t pmd)
{
- unsigned long val = pmd_val(pmd) & PMD_HUGE_PADDR;
+ pte_t pte = __pte(pmd_val(pmd));
- return val >> (PAGE_SHIFT - PMD_PADDR_SHIFT);
+ return pte_pfn(pte);
}
-static inline int pmd_trans_splitting(pmd_t pmd)
+static inline unsigned long pmd_trans_huge(pmd_t pmd)
{
- return (pmd_val(pmd) & (PMD_ISHUGE|PMD_HUGE_SPLITTING)) ==
- (PMD_ISHUGE|PMD_HUGE_SPLITTING);
+ pte_t pte = __pte(pmd_val(pmd));
+
+ return pte_val(pte) & _PAGE_PMD_HUGE;
}
-static inline int pmd_trans_huge(pmd_t pmd)
+static inline unsigned long pmd_trans_splitting(pmd_t pmd)
{
- return pmd_val(pmd) & PMD_ISHUGE;
+ pte_t pte = __pte(pmd_val(pmd));
+
+ return pmd_trans_huge(pmd) && pte_special(pte);
}
#define has_transparent_hugepage() 1
static inline pmd_t pmd_mkold(pmd_t pmd)
{
- pmd_val(pmd) &= ~PMD_HUGE_ACCESSED;
- return pmd;
+ pte_t pte = __pte(pmd_val(pmd));
+
+ pte = pte_mkold(pte);
+
+ return __pmd(pte_val(pte));
}
static inline pmd_t pmd_wrprotect(pmd_t pmd)
{
- pmd_val(pmd) &= ~PMD_HUGE_WRITE;
- return pmd;
+ pte_t pte = __pte(pmd_val(pmd));
+
+ pte = pte_wrprotect(pte);
+
+ return __pmd(pte_val(pte));
}
static inline pmd_t pmd_mkdirty(pmd_t pmd)
{
- pmd_val(pmd) |= PMD_HUGE_DIRTY;
- return pmd;
+ pte_t pte = __pte(pmd_val(pmd));
+
+ pte = pte_mkdirty(pte);
+
+ return __pmd(pte_val(pte));
}
static inline pmd_t pmd_mkyoung(pmd_t pmd)
{
- pmd_val(pmd) |= PMD_HUGE_ACCESSED;
- return pmd;
+ pte_t pte = __pte(pmd_val(pmd));
+
+ pte = pte_mkyoung(pte);
+
+ return __pmd(pte_val(pte));
}
static inline pmd_t pmd_mkwrite(pmd_t pmd)
{
- pmd_val(pmd) |= PMD_HUGE_WRITE;
- return pmd;
+ pte_t pte = __pte(pmd_val(pmd));
+
+ pte = pte_mkwrite(pte);
+
+ return __pmd(pte_val(pte));
}
static inline pmd_t pmd_mknotpresent(pmd_t pmd)
{
- pmd_val(pmd) &= ~PMD_HUGE_PRESENT;
+ unsigned long mask;
+
+ if (tlb_type == hypervisor)
+ mask = _PAGE_PRESENT_4V;
+ else
+ mask = _PAGE_PRESENT_4U;
+
+ pmd_val(pmd) &= ~mask;
+
return pmd;
}
static inline pmd_t pmd_mksplitting(pmd_t pmd)
{
- pmd_val(pmd) |= PMD_HUGE_SPLITTING;
- return pmd;
+ pte_t pte = __pte(pmd_val(pmd));
+
+ pte = pte_mkspecial(pte);
+
+ return __pmd(pte_val(pte));
}
-extern pgprot_t pmd_pgprot(pmd_t entry);
+static inline pgprot_t pmd_pgprot(pmd_t entry)
+{
+ unsigned long val = pmd_val(entry);
+
+ return __pgprot(val);
+}
#endif
static inline int pmd_present(pmd_t pmd)
{
- return pmd_val(pmd) != 0U;
+ return pmd_val(pmd) != 0UL;
}
#define pmd_none(pmd) (!pmd_val(pmd))
@@ -728,33 +770,32 @@
static inline void pmd_set(struct mm_struct *mm, pmd_t *pmdp, pte_t *ptep)
{
- unsigned long val = __pa((unsigned long) (ptep)) >> PMD_PADDR_SHIFT;
+ unsigned long val = __pa((unsigned long) (ptep));
pmd_val(*pmdp) = val;
}
#define pud_set(pudp, pmdp) \
- (pud_val(*(pudp)) = (__pa((unsigned long) (pmdp)) >> PGD_PADDR_SHIFT))
+ (pud_val(*(pudp)) = (__pa((unsigned long) (pmdp))))
static inline unsigned long __pmd_page(pmd_t pmd)
{
- unsigned long paddr = (unsigned long) pmd_val(pmd);
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- if (pmd_val(pmd) & PMD_ISHUGE)
- paddr &= PMD_HUGE_PADDR;
-#endif
- paddr <<= PMD_PADDR_SHIFT;
- return ((unsigned long) __va(paddr));
+ pte_t pte = __pte(pmd_val(pmd));
+ unsigned long pfn;
+
+ pfn = pte_pfn(pte);
+
+ return ((unsigned long) __va(pfn << PAGE_SHIFT));
}
#define pmd_page(pmd) virt_to_page((void *)__pmd_page(pmd))
#define pud_page_vaddr(pud) \
- ((unsigned long) __va((((unsigned long)pud_val(pud))<<PGD_PADDR_SHIFT)))
+ ((unsigned long) __va(pud_val(pud)))
#define pud_page(pud) virt_to_page((void *)pud_page_vaddr(pud))
#define pmd_bad(pmd) (0)
-#define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0U)
+#define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0UL)
#define pud_none(pud) (!pud_val(pud))
#define pud_bad(pud) (0)
#define pud_present(pud) (pud_val(pud) != 0U)
-#define pud_clear(pudp) (pud_val(*(pudp)) = 0U)
+#define pud_clear(pudp) (pud_val(*(pudp)) = 0UL)
/* Same in both SUN4V and SUN4U. */
#define pte_none(pte) (!pte_val(pte))
@@ -789,7 +830,7 @@
pmd_t *pmdp)
{
pmd_t pmd = *pmdp;
- set_pmd_at(mm, addr, pmdp, __pmd(0U));
+ set_pmd_at(mm, addr, pmdp, __pmd(0UL));
return pmd;
}
@@ -837,8 +878,8 @@
})
#endif
-extern pgd_t swapper_pg_dir[2048];
-extern pmd_t swapper_low_pmd_dir[2048];
+extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
+extern pmd_t swapper_low_pmd_dir[PTRS_PER_PMD];
extern void paging_init(void);
extern unsigned long find_ecache_flush_span(unsigned long size);
diff --git a/arch/sparc/include/asm/sparsemem.h b/arch/sparc/include/asm/sparsemem.h
index b99d4e4..e5e1752 100644
--- a/arch/sparc/include/asm/sparsemem.h
+++ b/arch/sparc/include/asm/sparsemem.h
@@ -3,9 +3,11 @@
#ifdef __KERNEL__
+#include <asm/page.h>
+
#define SECTION_SIZE_BITS 30
-#define MAX_PHYSADDR_BITS 42
-#define MAX_PHYSMEM_BITS 42
+#define MAX_PHYSADDR_BITS MAX_PHYS_ADDRESS_BITS
+#define MAX_PHYSMEM_BITS MAX_PHYS_ADDRESS_BITS
#endif /* !(__KERNEL__) */
diff --git a/arch/sparc/include/asm/thread_info_64.h b/arch/sparc/include/asm/thread_info_64.h
index d5e5042..5d9292a 100644
--- a/arch/sparc/include/asm/thread_info_64.h
+++ b/arch/sparc/include/asm/thread_info_64.h
@@ -192,7 +192,7 @@
#define TIF_UNALIGNED 5 /* allowed to do unaligned accesses */
/* flag bit 6 is available */
#define TIF_32BIT 7 /* 32-bit binary */
-/* flag bit 8 is available */
+#define TIF_NOHZ 8 /* in adaptive nohz mode */
#define TIF_SECCOMP 9 /* secure computing */
#define TIF_SYSCALL_AUDIT 10 /* syscall auditing active */
#define TIF_SYSCALL_TRACEPOINT 11 /* syscall tracepoint instrumentation */
@@ -210,6 +210,7 @@
#define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED)
#define _TIF_UNALIGNED (1<<TIF_UNALIGNED)
#define _TIF_32BIT (1<<TIF_32BIT)
+#define _TIF_NOHZ (1<<TIF_NOHZ)
#define _TIF_SECCOMP (1<<TIF_SECCOMP)
#define _TIF_SYSCALL_AUDIT (1<<TIF_SYSCALL_AUDIT)
#define _TIF_SYSCALL_TRACEPOINT (1<<TIF_SYSCALL_TRACEPOINT)
diff --git a/arch/sparc/include/asm/tsb.h b/arch/sparc/include/asm/tsb.h
index e696432..2230f80 100644
--- a/arch/sparc/include/asm/tsb.h
+++ b/arch/sparc/include/asm/tsb.h
@@ -142,98 +142,39 @@
or REG1, %lo(swapper_pg_dir), REG1; \
sllx VADDR, 64 - (PGDIR_SHIFT + PGDIR_BITS), REG2; \
srlx REG2, 64 - PAGE_SHIFT, REG2; \
- andn REG2, 0x3, REG2; \
- lduw [REG1 + REG2], REG1; \
+ andn REG2, 0x7, REG2; \
+ ldx [REG1 + REG2], REG1; \
brz,pn REG1, FAIL_LABEL; \
sllx VADDR, 64 - (PMD_SHIFT + PMD_BITS), REG2; \
srlx REG2, 64 - PAGE_SHIFT, REG2; \
- sllx REG1, PGD_PADDR_SHIFT, REG1; \
- andn REG2, 0x3, REG2; \
- lduwa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \
+ andn REG2, 0x7, REG2; \
+ ldxa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \
brz,pn REG1, FAIL_LABEL; \
sllx VADDR, 64 - PMD_SHIFT, REG2; \
- srlx REG2, 64 - (PAGE_SHIFT - 1), REG2; \
- sllx REG1, PMD_PADDR_SHIFT, REG1; \
+ srlx REG2, 64 - PAGE_SHIFT, REG2; \
andn REG2, 0x7, REG2; \
add REG1, REG2, REG1;
- /* These macros exists only to make the PMD translator below
- * easier to read. It hides the ELF section switch for the
- * sun4v code patching.
- */
-#define OR_PTE_BIT_1INSN(REG, NAME) \
-661: or REG, _PAGE_##NAME##_4U, REG; \
- .section .sun4v_1insn_patch, "ax"; \
- .word 661b; \
- or REG, _PAGE_##NAME##_4V, REG; \
- .previous;
-
-#define OR_PTE_BIT_2INSN(REG, TMP, NAME) \
-661: sethi %hi(_PAGE_##NAME##_4U), TMP; \
- or REG, TMP, REG; \
- .section .sun4v_2insn_patch, "ax"; \
- .word 661b; \
- mov -1, TMP; \
- or REG, _PAGE_##NAME##_4V, REG; \
- .previous;
-
- /* Load into REG the PTE value for VALID, CACHE, and SZHUGE. */
-#define BUILD_PTE_VALID_SZHUGE_CACHE(REG) \
-661: sethi %uhi(_PAGE_VALID|_PAGE_SZHUGE_4U), REG; \
- .section .sun4v_1insn_patch, "ax"; \
- .word 661b; \
- sethi %uhi(_PAGE_VALID), REG; \
- .previous; \
- sllx REG, 32, REG; \
-661: or REG, _PAGE_CP_4U|_PAGE_CV_4U, REG; \
- .section .sun4v_1insn_patch, "ax"; \
- .word 661b; \
- or REG, _PAGE_CP_4V|_PAGE_CV_4V|_PAGE_SZHUGE_4V, REG; \
- .previous;
-
/* PMD has been loaded into REG1, interpret the value, seeing
* if it is a HUGE PMD or a normal one. If it is not valid
* then jump to FAIL_LABEL. If it is a HUGE PMD, and it
* translates to a valid PTE, branch to PTE_LABEL.
*
- * We translate the PMD by hand, one bit at a time,
- * constructing the huge PTE.
- *
- * So we construct the PTE in REG2 as follows:
- *
- * 1) Extract the PMD PFN from REG1 and place it into REG2.
- *
- * 2) Translate PMD protection bits in REG1 into REG2, one bit
- * at a time using andcc tests on REG1 and OR's into REG2.
- *
- * Only two bits to be concerned with here, EXEC and WRITE.
- * Now REG1 is freed up and we can use it as a temporary.
- *
- * 3) Construct the VALID, CACHE, and page size PTE bits in
- * REG1, OR with REG2 to form final PTE.
+ * We have to propagate the 4MB bit of the virtual address
+ * because we are fabricating 8MB pages using 4MB hw pages.
*/
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
#define USER_PGTABLE_CHECK_PMD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, PTE_LABEL) \
- brz,pn REG1, FAIL_LABEL; \
- andcc REG1, PMD_ISHUGE, %g0; \
- be,pt %xcc, 700f; \
- and REG1, PMD_HUGE_PRESENT|PMD_HUGE_ACCESSED, REG2; \
- cmp REG2, PMD_HUGE_PRESENT|PMD_HUGE_ACCESSED; \
- bne,pn %xcc, FAIL_LABEL; \
- andn REG1, PMD_HUGE_PROTBITS, REG2; \
- sllx REG2, PMD_PADDR_SHIFT, REG2; \
- /* REG2 now holds PFN << PAGE_SHIFT */ \
- andcc REG1, PMD_HUGE_WRITE, %g0; \
- bne,a,pt %xcc, 1f; \
- OR_PTE_BIT_1INSN(REG2, W); \
-1: andcc REG1, PMD_HUGE_EXEC, %g0; \
- be,pt %xcc, 1f; \
- nop; \
- OR_PTE_BIT_2INSN(REG2, REG1, EXEC); \
- /* REG1 can now be clobbered, build final PTE */ \
-1: BUILD_PTE_VALID_SZHUGE_CACHE(REG1); \
- ba,pt %xcc, PTE_LABEL; \
- or REG1, REG2, REG1; \
+ brz,pn REG1, FAIL_LABEL; \
+ sethi %uhi(_PAGE_PMD_HUGE), REG2; \
+ sllx REG2, 32, REG2; \
+ andcc REG1, REG2, %g0; \
+ be,pt %xcc, 700f; \
+ sethi %hi(4 * 1024 * 1024), REG2; \
+ andn REG1, REG2, REG1; \
+ and VADDR, REG2, REG2; \
+ brlz,pt REG1, PTE_LABEL; \
+ or REG1, REG2, REG1; \
700:
#else
#define USER_PGTABLE_CHECK_PMD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, PTE_LABEL) \
@@ -253,18 +194,16 @@
#define USER_PGTABLE_WALK_TL1(VADDR, PHYS_PGD, REG1, REG2, FAIL_LABEL) \
sllx VADDR, 64 - (PGDIR_SHIFT + PGDIR_BITS), REG2; \
srlx REG2, 64 - PAGE_SHIFT, REG2; \
- andn REG2, 0x3, REG2; \
- lduwa [PHYS_PGD + REG2] ASI_PHYS_USE_EC, REG1; \
+ andn REG2, 0x7, REG2; \
+ ldxa [PHYS_PGD + REG2] ASI_PHYS_USE_EC, REG1; \
brz,pn REG1, FAIL_LABEL; \
sllx VADDR, 64 - (PMD_SHIFT + PMD_BITS), REG2; \
srlx REG2, 64 - PAGE_SHIFT, REG2; \
- sllx REG1, PGD_PADDR_SHIFT, REG1; \
- andn REG2, 0x3, REG2; \
- lduwa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \
+ andn REG2, 0x7, REG2; \
+ ldxa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \
USER_PGTABLE_CHECK_PMD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, 800f) \
sllx VADDR, 64 - PMD_SHIFT, REG2; \
- srlx REG2, 64 - (PAGE_SHIFT - 1), REG2; \
- sllx REG1, PMD_PADDR_SHIFT, REG1; \
+ srlx REG2, 64 - PAGE_SHIFT, REG2; \
andn REG2, 0x7, REG2; \
add REG1, REG2, REG1; \
ldxa [REG1] ASI_PHYS_USE_EC, REG1; \
diff --git a/arch/sparc/kernel/entry.h b/arch/sparc/kernel/entry.h
index 9c179fb..140966f 100644
--- a/arch/sparc/kernel/entry.h
+++ b/arch/sparc/kernel/entry.h
@@ -88,7 +88,6 @@
extern void bad_trap_tl1(struct pt_regs *regs, long lvl);
-extern void do_fpe_common(struct pt_regs *regs);
extern void do_fpieee(struct pt_regs *regs);
extern void do_fpother(struct pt_regs *regs);
extern void do_tof(struct pt_regs *regs);
diff --git a/arch/sparc/kernel/kgdb_64.c b/arch/sparc/kernel/kgdb_64.c
index 53c0a82..60b19f5 100644
--- a/arch/sparc/kernel/kgdb_64.c
+++ b/arch/sparc/kernel/kgdb_64.c
@@ -159,11 +159,12 @@
asmlinkage void kgdb_trap(unsigned long trap_level, struct pt_regs *regs)
{
+ enum ctx_state prev_state = exception_enter();
unsigned long flags;
if (user_mode(regs)) {
bad_trap(regs, trap_level);
- return;
+ goto out;
}
flushw_all();
@@ -171,6 +172,8 @@
local_irq_save(flags);
kgdb_handle_exception(0x172, SIGTRAP, 0, regs);
local_irq_restore(flags);
+out:
+ exception_exit(prev_state);
}
int kgdb_arch_init(void)
diff --git a/arch/sparc/kernel/kprobes.c b/arch/sparc/kernel/kprobes.c
index e722121..5a09fd3 100644
--- a/arch/sparc/kernel/kprobes.c
+++ b/arch/sparc/kernel/kprobes.c
@@ -8,6 +8,7 @@
#include <linux/module.h>
#include <linux/kdebug.h>
#include <linux/slab.h>
+#include <linux/context_tracking.h>
#include <asm/signal.h>
#include <asm/cacheflush.h>
#include <asm/uaccess.h>
@@ -418,12 +419,14 @@
asmlinkage void __kprobes kprobe_trap(unsigned long trap_level,
struct pt_regs *regs)
{
+ enum ctx_state prev_state = exception_enter();
+
BUG_ON(trap_level != 0x170 && trap_level != 0x171);
if (user_mode(regs)) {
local_irq_enable();
bad_trap(regs, trap_level);
- return;
+ goto out;
}
/* trap_level == 0x170 --> ta 0x70
@@ -433,6 +436,8 @@
(trap_level == 0x170) ? "debug" : "debug_2",
regs, 0, trap_level, SIGTRAP) != NOTIFY_STOP)
bad_trap(regs, trap_level);
+out:
+ exception_exit(prev_state);
}
/* Jprobes support. */
diff --git a/arch/sparc/kernel/ktlb.S b/arch/sparc/kernel/ktlb.S
index fde5a41..542e96a 100644
--- a/arch/sparc/kernel/ktlb.S
+++ b/arch/sparc/kernel/ktlb.S
@@ -153,12 +153,19 @@
/* Clear the PAGE_OFFSET top virtual bits, shift
* down to get PFN, and make sure PFN is in range.
*/
- sllx %g4, 21, %g5
+661: sllx %g4, 0, %g5
+ .section .page_offset_shift_patch, "ax"
+ .word 661b
+ .previous
/* Check to see if we know about valid memory at the 4MB
* chunk this physical address will reside within.
*/
- srlx %g5, 21 + 41, %g2
+661: srlx %g5, MAX_PHYS_ADDRESS_BITS, %g2
+ .section .page_offset_shift_patch, "ax"
+ .word 661b
+ .previous
+
brnz,pn %g2, kvmap_dtlb_longpath
nop
@@ -176,7 +183,11 @@
or %g7, %lo(sparc64_valid_addr_bitmap), %g7
.previous
- srlx %g5, 21 + 22, %g2
+661: srlx %g5, ILOG2_4MB, %g2
+ .section .page_offset_shift_patch, "ax"
+ .word 661b
+ .previous
+
srlx %g2, 6, %g5
and %g2, 63, %g2
sllx %g5, 3, %g5
@@ -189,9 +200,18 @@
2: sethi %hi(kpte_linear_bitmap), %g2
/* Get the 256MB physical address index. */
- sllx %g4, 21, %g5
+661: sllx %g4, 0, %g5
+ .section .page_offset_shift_patch, "ax"
+ .word 661b
+ .previous
+
or %g2, %lo(kpte_linear_bitmap), %g2
- srlx %g5, 21 + 28, %g5
+
+661: srlx %g5, ILOG2_256MB, %g5
+ .section .page_offset_shift_patch, "ax"
+ .word 661b
+ .previous
+
and %g5, (32 - 1), %g7
/* Divide by 32 to get the offset into the bitmask. */
diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c
index bc4d3f5..cb02145 100644
--- a/arch/sparc/kernel/pci.c
+++ b/arch/sparc/kernel/pci.c
@@ -398,8 +398,8 @@
apb_calc_first_last(map, &first, &last);
res = bus->resource[1];
res->flags = IORESOURCE_MEM;
- region.start = (first << 21);
- region.end = (last << 21) + ((1 << 21) - 1);
+ region.start = (first << 29);
+ region.end = (last << 29) + ((1 << 29) - 1);
pcibios_bus_to_resource(dev, res, ®ion);
}
diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c
index baebab2..32a280e 100644
--- a/arch/sparc/kernel/process_64.c
+++ b/arch/sparc/kernel/process_64.c
@@ -31,6 +31,7 @@
#include <linux/elfcore.h>
#include <linux/sysrq.h>
#include <linux/nmi.h>
+#include <linux/context_tracking.h>
#include <asm/uaccess.h>
#include <asm/page.h>
@@ -557,6 +558,7 @@
barf:
set_thread_wsaved(window + 1);
+ user_exit();
do_exit(SIGILL);
}
diff --git a/arch/sparc/kernel/ptrace_64.c b/arch/sparc/kernel/ptrace_64.c
index 773c1f2..c13c9f2 100644
--- a/arch/sparc/kernel/ptrace_64.c
+++ b/arch/sparc/kernel/ptrace_64.c
@@ -27,6 +27,7 @@
#include <trace/syscall.h>
#include <linux/compat.h>
#include <linux/elf.h>
+#include <linux/context_tracking.h>
#include <asm/asi.h>
#include <asm/pgtable.h>
@@ -1066,6 +1067,9 @@
/* do the secure computing check first */
secure_computing_strict(regs->u_regs[UREG_G1]);
+ if (test_thread_flag(TIF_NOHZ))
+ user_exit();
+
if (test_thread_flag(TIF_SYSCALL_TRACE))
ret = tracehook_report_syscall_entry(regs);
@@ -1086,6 +1090,9 @@
asmlinkage void syscall_trace_leave(struct pt_regs *regs)
{
+ if (test_thread_flag(TIF_NOHZ))
+ user_exit();
+
audit_syscall_exit(regs);
if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
@@ -1093,4 +1100,7 @@
if (test_thread_flag(TIF_SYSCALL_TRACE))
tracehook_report_syscall_exit(regs, 0);
+
+ if (test_thread_flag(TIF_NOHZ))
+ user_enter();
}
diff --git a/arch/sparc/kernel/rtrap_64.S b/arch/sparc/kernel/rtrap_64.S
index afa2a9e..a954eb8 100644
--- a/arch/sparc/kernel/rtrap_64.S
+++ b/arch/sparc/kernel/rtrap_64.S
@@ -18,10 +18,16 @@
#define RTRAP_PSTATE_IRQOFF (PSTATE_TSO|PSTATE_PEF|PSTATE_PRIV)
#define RTRAP_PSTATE_AG_IRQOFF (PSTATE_TSO|PSTATE_PEF|PSTATE_PRIV|PSTATE_AG)
+#ifdef CONFIG_CONTEXT_TRACKING
+# define SCHEDULE_USER schedule_user
+#else
+# define SCHEDULE_USER schedule
+#endif
+
.text
.align 32
__handle_preemption:
- call schedule
+ call SCHEDULE_USER
wrpr %g0, RTRAP_PSTATE, %pstate
ba,pt %xcc, __handle_preemption_continue
wrpr %g0, RTRAP_PSTATE_IRQOFF, %pstate
diff --git a/arch/sparc/kernel/signal_64.c b/arch/sparc/kernel/signal_64.c
index 35923e8..cd91d01 100644
--- a/arch/sparc/kernel/signal_64.c
+++ b/arch/sparc/kernel/signal_64.c
@@ -23,6 +23,7 @@
#include <linux/tty.h>
#include <linux/binfmts.h>
#include <linux/bitops.h>
+#include <linux/context_tracking.h>
#include <asm/uaccess.h>
#include <asm/ptrace.h>
@@ -43,6 +44,7 @@
{
struct ucontext __user *ucp = (struct ucontext __user *)
regs->u_regs[UREG_I0];
+ enum ctx_state prev_state = exception_enter();
mc_gregset_t __user *grp;
unsigned long pc, npc, tstate;
unsigned long fp, i7;
@@ -129,16 +131,19 @@
}
if (err)
goto do_sigsegv;
-
+out:
+ exception_exit(prev_state);
return;
do_sigsegv:
force_sig(SIGSEGV, current);
+ goto out;
}
asmlinkage void sparc64_get_context(struct pt_regs *regs)
{
struct ucontext __user *ucp = (struct ucontext __user *)
regs->u_regs[UREG_I0];
+ enum ctx_state prev_state = exception_enter();
mc_gregset_t __user *grp;
mcontext_t __user *mcp;
unsigned long fp, i7;
@@ -220,10 +225,12 @@
}
if (err)
goto do_sigsegv;
-
+out:
+ exception_exit(prev_state);
return;
do_sigsegv:
force_sig(SIGSEGV, current);
+ goto out;
}
struct rt_signal_frame {
@@ -528,11 +535,13 @@
void do_notify_resume(struct pt_regs *regs, unsigned long orig_i0, unsigned long thread_info_flags)
{
+ user_exit();
if (thread_info_flags & _TIF_SIGPENDING)
do_signal(regs, orig_i0);
if (thread_info_flags & _TIF_NOTIFY_RESUME) {
clear_thread_flag(TIF_NOTIFY_RESUME);
tracehook_notify_resume(regs);
}
+ user_enter();
}
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index e142545..b66a533 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -1399,8 +1399,13 @@
void smp_send_reschedule(int cpu)
{
- xcall_deliver((u64) &xcall_receive_signal, 0, 0,
- cpumask_of(cpu));
+ if (cpu == smp_processor_id()) {
+ WARN_ON_ONCE(preemptible());
+ set_softint(1 << PIL_SMP_RECEIVE_SIGNAL);
+ } else {
+ xcall_deliver((u64) &xcall_receive_signal,
+ 0, 0, cpumask_of(cpu));
+ }
}
void __irq_entry smp_receive_signal_client(int irq, struct pt_regs *regs)
diff --git a/arch/sparc/kernel/sun4v_tlb_miss.S b/arch/sparc/kernel/sun4v_tlb_miss.S
index bde867f..e0c09bf8 100644
--- a/arch/sparc/kernel/sun4v_tlb_miss.S
+++ b/arch/sparc/kernel/sun4v_tlb_miss.S
@@ -182,7 +182,7 @@
cmp %g5, -1
be,pt %xcc, 80f
nop
- COMPUTE_TSB_PTR(%g5, %g4, HPAGE_SHIFT, %g2, %g7)
+ COMPUTE_TSB_PTR(%g5, %g4, REAL_HPAGE_SHIFT, %g2, %g7)
/* That clobbered %g2, reload it. */
ldxa [%g0] ASI_SCRATCHPAD, %g2
diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c
index 51561b8..beb0b5a 100644
--- a/arch/sparc/kernel/sys_sparc_64.c
+++ b/arch/sparc/kernel/sys_sparc_64.c
@@ -24,6 +24,7 @@
#include <linux/personality.h>
#include <linux/random.h>
#include <linux/export.h>
+#include <linux/context_tracking.h>
#include <asm/uaccess.h>
#include <asm/utrap.h>
@@ -39,9 +40,6 @@
return PAGE_SIZE;
}
-#define VA_EXCLUDE_START (0x0000080000000000UL - (1UL << 32UL))
-#define VA_EXCLUDE_END (0xfffff80000000000UL + (1UL << 32UL))
-
/* Does addr --> addr+len fall within 4GB of the VA-space hole or
* overflow past the end of the 64-bit address space?
*/
@@ -499,6 +497,7 @@
asmlinkage void sparc_breakpoint(struct pt_regs *regs)
{
+ enum ctx_state prev_state = exception_enter();
siginfo_t info;
if (test_thread_flag(TIF_32BIT)) {
@@ -517,6 +516,7 @@
#ifdef DEBUG_SPARC_BREAKPOINT
printk ("TRAP: Returning to space: PC=%lx nPC=%lx\n", regs->tpc, regs->tnpc);
#endif
+ exception_exit(prev_state);
}
extern void check_pending(int signum);
diff --git a/arch/sparc/kernel/syscalls.S b/arch/sparc/kernel/syscalls.S
index d950197..87729ff 100644
--- a/arch/sparc/kernel/syscalls.S
+++ b/arch/sparc/kernel/syscalls.S
@@ -52,7 +52,7 @@
#endif
.align 32
1: ldx [%g6 + TI_FLAGS], %l5
- andcc %l5, (_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT|_TIF_SYSCALL_TRACEPOINT), %g0
+ andcc %l5, (_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT|_TIF_SYSCALL_TRACEPOINT|_TIF_NOHZ), %g0
be,pt %icc, rtrap
nop
call syscall_trace_leave
@@ -184,7 +184,7 @@
srl %i3, 0, %o3 ! IEU0
srl %i2, 0, %o2 ! IEU0 Group
- andcc %l0, (_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT|_TIF_SYSCALL_TRACEPOINT), %g0
+ andcc %l0, (_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT|_TIF_SYSCALL_TRACEPOINT|_TIF_NOHZ), %g0
bne,pn %icc, linux_syscall_trace32 ! CTI
mov %i0, %l5 ! IEU1
5: call %l7 ! CTI Group brk forced
@@ -207,7 +207,7 @@
mov %i3, %o3 ! IEU1
mov %i4, %o4 ! IEU0 Group
- andcc %l0, (_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT|_TIF_SYSCALL_TRACEPOINT), %g0
+ andcc %l0, (_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT|_TIF_SYSCALL_TRACEPOINT|_TIF_NOHZ), %g0
bne,pn %icc, linux_syscall_trace ! CTI Group
mov %i0, %l5 ! IEU0
2: call %l7 ! CTI Group brk forced
@@ -223,7 +223,7 @@
cmp %o0, -ERESTART_RESTARTBLOCK
bgeu,pn %xcc, 1f
- andcc %l0, (_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT|_TIF_SYSCALL_TRACEPOINT), %g0
+ andcc %l0, (_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT|_TIF_SYSCALL_TRACEPOINT|_TIF_NOHZ), %g0
ldx [%sp + PTREGS_OFF + PT_V9_TNPC], %l1 ! pc = npc
2:
diff --git a/arch/sparc/kernel/traps_64.c b/arch/sparc/kernel/traps_64.c
index b3f833a..4ced92f 100644
--- a/arch/sparc/kernel/traps_64.c
+++ b/arch/sparc/kernel/traps_64.c
@@ -20,6 +20,7 @@
#include <linux/ftrace.h>
#include <linux/reboot.h>
#include <linux/gfp.h>
+#include <linux/context_tracking.h>
#include <asm/smp.h>
#include <asm/delay.h>
@@ -186,11 +187,12 @@
void spitfire_insn_access_exception(struct pt_regs *regs, unsigned long sfsr, unsigned long sfar)
{
+ enum ctx_state prev_state = exception_enter();
siginfo_t info;
if (notify_die(DIE_TRAP, "instruction access exception", regs,
0, 0x8, SIGTRAP) == NOTIFY_STOP)
- return;
+ goto out;
if (regs->tstate & TSTATE_PRIV) {
printk("spitfire_insn_access_exception: SFSR[%016lx] "
@@ -207,6 +209,8 @@
info.si_addr = (void __user *)regs->tpc;
info.si_trapno = 0;
force_sig_info(SIGSEGV, &info, current);
+out:
+ exception_exit(prev_state);
}
void spitfire_insn_access_exception_tl1(struct pt_regs *regs, unsigned long sfsr, unsigned long sfar)
@@ -260,11 +264,12 @@
void spitfire_data_access_exception(struct pt_regs *regs, unsigned long sfsr, unsigned long sfar)
{
+ enum ctx_state prev_state = exception_enter();
siginfo_t info;
if (notify_die(DIE_TRAP, "data access exception", regs,
0, 0x30, SIGTRAP) == NOTIFY_STOP)
- return;
+ goto out;
if (regs->tstate & TSTATE_PRIV) {
/* Test if this comes from uaccess places. */
@@ -280,7 +285,7 @@
#endif
regs->tpc = entry->fixup;
regs->tnpc = regs->tpc + 4;
- return;
+ goto out;
}
/* Shit... */
printk("spitfire_data_access_exception: SFSR[%016lx] "
@@ -294,6 +299,8 @@
info.si_addr = (void __user *)sfar;
info.si_trapno = 0;
force_sig_info(SIGSEGV, &info, current);
+out:
+ exception_exit(prev_state);
}
void spitfire_data_access_exception_tl1(struct pt_regs *regs, unsigned long sfsr, unsigned long sfar)
@@ -1994,6 +2001,7 @@
*/
void sun4v_resum_error(struct pt_regs *regs, unsigned long offset)
{
+ enum ctx_state prev_state = exception_enter();
struct sun4v_error_entry *ent, local_copy;
struct trap_per_cpu *tb;
unsigned long paddr;
@@ -2022,12 +2030,14 @@
pr_info("Shutdown request, %u seconds...\n",
local_copy.err_secs);
orderly_poweroff(true);
- return;
+ goto out;
}
sun4v_log_error(regs, &local_copy, cpu,
KERN_ERR "RESUMABLE ERROR",
&sun4v_resum_oflow_cnt);
+out:
+ exception_exit(prev_state);
}
/* If we try to printk() we'll probably make matters worse, by trying
@@ -2152,7 +2162,7 @@
err, op);
}
-void do_fpe_common(struct pt_regs *regs)
+static void do_fpe_common(struct pt_regs *regs)
{
if (regs->tstate & TSTATE_PRIV) {
regs->tpc = regs->tnpc;
@@ -2188,23 +2198,28 @@
void do_fpieee(struct pt_regs *regs)
{
+ enum ctx_state prev_state = exception_enter();
+
if (notify_die(DIE_TRAP, "fpu exception ieee", regs,
0, 0x24, SIGFPE) == NOTIFY_STOP)
- return;
+ goto out;
do_fpe_common(regs);
+out:
+ exception_exit(prev_state);
}
extern int do_mathemu(struct pt_regs *, struct fpustate *, bool);
void do_fpother(struct pt_regs *regs)
{
+ enum ctx_state prev_state = exception_enter();
struct fpustate *f = FPUSTATE;
int ret = 0;
if (notify_die(DIE_TRAP, "fpu exception other", regs,
0, 0x25, SIGFPE) == NOTIFY_STOP)
- return;
+ goto out;
switch ((current_thread_info()->xfsr[0] & 0x1c000)) {
case (2 << 14): /* unfinished_FPop */
@@ -2213,17 +2228,20 @@
break;
}
if (ret)
- return;
+ goto out;
do_fpe_common(regs);
+out:
+ exception_exit(prev_state);
}
void do_tof(struct pt_regs *regs)
{
+ enum ctx_state prev_state = exception_enter();
siginfo_t info;
if (notify_die(DIE_TRAP, "tagged arithmetic overflow", regs,
0, 0x26, SIGEMT) == NOTIFY_STOP)
- return;
+ goto out;
if (regs->tstate & TSTATE_PRIV)
die_if_kernel("Penguin overflow trap from kernel mode", regs);
@@ -2237,15 +2255,18 @@
info.si_addr = (void __user *)regs->tpc;
info.si_trapno = 0;
force_sig_info(SIGEMT, &info, current);
+out:
+ exception_exit(prev_state);
}
void do_div0(struct pt_regs *regs)
{
+ enum ctx_state prev_state = exception_enter();
siginfo_t info;
if (notify_die(DIE_TRAP, "integer division by zero", regs,
0, 0x28, SIGFPE) == NOTIFY_STOP)
- return;
+ goto out;
if (regs->tstate & TSTATE_PRIV)
die_if_kernel("TL0: Kernel divide by zero.", regs);
@@ -2259,6 +2280,8 @@
info.si_addr = (void __user *)regs->tpc;
info.si_trapno = 0;
force_sig_info(SIGFPE, &info, current);
+out:
+ exception_exit(prev_state);
}
static void instruction_dump(unsigned int *pc)
@@ -2415,6 +2438,7 @@
void do_illegal_instruction(struct pt_regs *regs)
{
+ enum ctx_state prev_state = exception_enter();
unsigned long pc = regs->tpc;
unsigned long tstate = regs->tstate;
u32 insn;
@@ -2422,7 +2446,7 @@
if (notify_die(DIE_TRAP, "illegal instruction", regs,
0, 0x10, SIGILL) == NOTIFY_STOP)
- return;
+ goto out;
if (tstate & TSTATE_PRIV)
die_if_kernel("Kernel illegal instruction", regs);
@@ -2431,14 +2455,14 @@
if (get_user(insn, (u32 __user *) pc) != -EFAULT) {
if ((insn & 0xc1ffc000) == 0x81700000) /* POPC */ {
if (handle_popc(insn, regs))
- return;
+ goto out;
} else if ((insn & 0xc1580000) == 0xc1100000) /* LDQ/STQ */ {
if (handle_ldf_stq(insn, regs))
- return;
+ goto out;
} else if (tlb_type == hypervisor) {
if ((insn & VIS_OPCODE_MASK) == VIS_OPCODE_VAL) {
if (!vis_emul(regs, insn))
- return;
+ goto out;
} else {
struct fpustate *f = FPUSTATE;
@@ -2448,7 +2472,7 @@
* Trap in the %fsr to unimplemented_FPop.
*/
if (do_mathemu(regs, f, true))
- return;
+ goto out;
}
}
}
@@ -2458,21 +2482,24 @@
info.si_addr = (void __user *)pc;
info.si_trapno = 0;
force_sig_info(SIGILL, &info, current);
+out:
+ exception_exit(prev_state);
}
extern void kernel_unaligned_trap(struct pt_regs *regs, unsigned int insn);
void mem_address_unaligned(struct pt_regs *regs, unsigned long sfar, unsigned long sfsr)
{
+ enum ctx_state prev_state = exception_enter();
siginfo_t info;
if (notify_die(DIE_TRAP, "memory address unaligned", regs,
0, 0x34, SIGSEGV) == NOTIFY_STOP)
- return;
+ goto out;
if (regs->tstate & TSTATE_PRIV) {
kernel_unaligned_trap(regs, *((unsigned int *)regs->tpc));
- return;
+ goto out;
}
info.si_signo = SIGBUS;
info.si_errno = 0;
@@ -2480,6 +2507,8 @@
info.si_addr = (void __user *)sfar;
info.si_trapno = 0;
force_sig_info(SIGBUS, &info, current);
+out:
+ exception_exit(prev_state);
}
void sun4v_do_mna(struct pt_regs *regs, unsigned long addr, unsigned long type_ctx)
@@ -2504,11 +2533,12 @@
void do_privop(struct pt_regs *regs)
{
+ enum ctx_state prev_state = exception_enter();
siginfo_t info;
if (notify_die(DIE_TRAP, "privileged operation", regs,
0, 0x11, SIGILL) == NOTIFY_STOP)
- return;
+ goto out;
if (test_thread_flag(TIF_32BIT)) {
regs->tpc &= 0xffffffff;
@@ -2520,6 +2550,8 @@
info.si_addr = (void __user *)regs->tpc;
info.si_trapno = 0;
force_sig_info(SIGILL, &info, current);
+out:
+ exception_exit(prev_state);
}
void do_privact(struct pt_regs *regs)
@@ -2530,99 +2562,116 @@
/* Trap level 1 stuff or other traps we should never see... */
void do_cee(struct pt_regs *regs)
{
+ exception_enter();
die_if_kernel("TL0: Cache Error Exception", regs);
}
void do_cee_tl1(struct pt_regs *regs)
{
+ exception_enter();
dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
die_if_kernel("TL1: Cache Error Exception", regs);
}
void do_dae_tl1(struct pt_regs *regs)
{
+ exception_enter();
dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
die_if_kernel("TL1: Data Access Exception", regs);
}
void do_iae_tl1(struct pt_regs *regs)
{
+ exception_enter();
dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
die_if_kernel("TL1: Instruction Access Exception", regs);
}
void do_div0_tl1(struct pt_regs *regs)
{
+ exception_enter();
dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
die_if_kernel("TL1: DIV0 Exception", regs);
}
void do_fpdis_tl1(struct pt_regs *regs)
{
+ exception_enter();
dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
die_if_kernel("TL1: FPU Disabled", regs);
}
void do_fpieee_tl1(struct pt_regs *regs)
{
+ exception_enter();
dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
die_if_kernel("TL1: FPU IEEE Exception", regs);
}
void do_fpother_tl1(struct pt_regs *regs)
{
+ exception_enter();
dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
die_if_kernel("TL1: FPU Other Exception", regs);
}
void do_ill_tl1(struct pt_regs *regs)
{
+ exception_enter();
dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
die_if_kernel("TL1: Illegal Instruction Exception", regs);
}
void do_irq_tl1(struct pt_regs *regs)
{
+ exception_enter();
dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
die_if_kernel("TL1: IRQ Exception", regs);
}
void do_lddfmna_tl1(struct pt_regs *regs)
{
+ exception_enter();
dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
die_if_kernel("TL1: LDDF Exception", regs);
}
void do_stdfmna_tl1(struct pt_regs *regs)
{
+ exception_enter();
dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
die_if_kernel("TL1: STDF Exception", regs);
}
void do_paw(struct pt_regs *regs)
{
+ exception_enter();
die_if_kernel("TL0: Phys Watchpoint Exception", regs);
}
void do_paw_tl1(struct pt_regs *regs)
{
+ exception_enter();
dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
die_if_kernel("TL1: Phys Watchpoint Exception", regs);
}
void do_vaw(struct pt_regs *regs)
{
+ exception_enter();
die_if_kernel("TL0: Virt Watchpoint Exception", regs);
}
void do_vaw_tl1(struct pt_regs *regs)
{
+ exception_enter();
dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
die_if_kernel("TL1: Virt Watchpoint Exception", regs);
}
void do_tof_tl1(struct pt_regs *regs)
{
+ exception_enter();
dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
die_if_kernel("TL1: Tag Overflow Exception", regs);
}
diff --git a/arch/sparc/kernel/tsb.S b/arch/sparc/kernel/tsb.S
index a313e4a..14158d4 100644
--- a/arch/sparc/kernel/tsb.S
+++ b/arch/sparc/kernel/tsb.S
@@ -75,7 +75,7 @@
mov 512, %g7
andn %g5, 0x7, %g5
sllx %g7, %g6, %g7
- srlx %g4, HPAGE_SHIFT, %g6
+ srlx %g4, REAL_HPAGE_SHIFT, %g6
sub %g7, 1, %g7
and %g6, %g7, %g6
sllx %g6, 4, %g6
diff --git a/arch/sparc/kernel/unaligned_64.c b/arch/sparc/kernel/unaligned_64.c
index 8201c25e..3c1a7cb 100644
--- a/arch/sparc/kernel/unaligned_64.c
+++ b/arch/sparc/kernel/unaligned_64.c
@@ -21,9 +21,12 @@
#include <linux/bitops.h>
#include <linux/perf_event.h>
#include <linux/ratelimit.h>
+#include <linux/context_tracking.h>
#include <asm/fpumacro.h>
#include <asm/cacheflush.h>
+#include "entry.h"
+
enum direction {
load, /* ld, ldd, ldh, ldsh */
store, /* st, std, sth, stsh */
@@ -418,9 +421,6 @@
extern void do_fpother(struct pt_regs *regs);
extern void do_privact(struct pt_regs *regs);
-extern void spitfire_data_access_exception(struct pt_regs *regs,
- unsigned long sfsr,
- unsigned long sfar);
extern void sun4v_data_access_exception(struct pt_regs *regs,
unsigned long addr,
unsigned long type_ctx);
@@ -578,6 +578,7 @@
void handle_lddfmna(struct pt_regs *regs, unsigned long sfar, unsigned long sfsr)
{
+ enum ctx_state prev_state = exception_enter();
unsigned long pc = regs->tpc;
unsigned long tstate = regs->tstate;
u32 insn;
@@ -632,13 +633,16 @@
sun4v_data_access_exception(regs, sfar, sfsr);
else
spitfire_data_access_exception(regs, sfsr, sfar);
- return;
+ goto out;
}
advance(regs);
+out:
+ exception_exit(prev_state);
}
void handle_stdfmna(struct pt_regs *regs, unsigned long sfar, unsigned long sfsr)
{
+ enum ctx_state prev_state = exception_enter();
unsigned long pc = regs->tpc;
unsigned long tstate = regs->tstate;
u32 insn;
@@ -680,7 +684,9 @@
sun4v_data_access_exception(regs, sfar, sfsr);
else
spitfire_data_access_exception(regs, sfsr, sfar);
- return;
+ goto out;
}
advance(regs);
+out:
+ exception_exit(prev_state);
}
diff --git a/arch/sparc/kernel/vmlinux.lds.S b/arch/sparc/kernel/vmlinux.lds.S
index 0bacceb..932ff90 100644
--- a/arch/sparc/kernel/vmlinux.lds.S
+++ b/arch/sparc/kernel/vmlinux.lds.S
@@ -122,6 +122,11 @@
*(.swapper_4m_tsb_phys_patch)
__swapper_4m_tsb_phys_patch_end = .;
}
+ .page_offset_shift_patch : {
+ __page_offset_shift_patch = .;
+ *(.page_offset_shift_patch)
+ __page_offset_shift_patch_end = .;
+ }
.popc_3insn_patch : {
__popc_3insn_patch = .;
*(.popc_3insn_patch)
diff --git a/arch/sparc/lib/clear_page.S b/arch/sparc/lib/clear_page.S
index 77e531f..46272df 100644
--- a/arch/sparc/lib/clear_page.S
+++ b/arch/sparc/lib/clear_page.S
@@ -37,10 +37,10 @@
.globl clear_user_page
clear_user_page: /* %o0=dest, %o1=vaddr */
lduw [%g6 + TI_PRE_COUNT], %o2
- sethi %uhi(PAGE_OFFSET), %g2
+ sethi %hi(PAGE_OFFSET), %g2
sethi %hi(PAGE_SIZE), %o4
- sllx %g2, 32, %g2
+ ldx [%g2 + %lo(PAGE_OFFSET)], %g2
sethi %hi(PAGE_KERNEL_LOCKED), %g3
ldx [%g3 + %lo(PAGE_KERNEL_LOCKED)], %g3
diff --git a/arch/sparc/lib/copy_page.S b/arch/sparc/lib/copy_page.S
index 4d2df32..dd16c61 100644
--- a/arch/sparc/lib/copy_page.S
+++ b/arch/sparc/lib/copy_page.S
@@ -46,10 +46,10 @@
.type copy_user_page,#function
copy_user_page: /* %o0=dest, %o1=src, %o2=vaddr */
lduw [%g6 + TI_PRE_COUNT], %o4
- sethi %uhi(PAGE_OFFSET), %g2
+ sethi %hi(PAGE_OFFSET), %g2
sethi %hi(PAGE_SIZE), %o3
- sllx %g2, 32, %g2
+ ldx [%g2 + %lo(PAGE_OFFSET)], %g2
sethi %hi(PAGE_KERNEL_LOCKED), %g3
ldx [%g3 + %lo(PAGE_KERNEL_LOCKED)], %g3
diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c
index 2ebec26..69bb818 100644
--- a/arch/sparc/mm/fault_64.c
+++ b/arch/sparc/mm/fault_64.c
@@ -21,6 +21,7 @@
#include <linux/kprobes.h>
#include <linux/kdebug.h>
#include <linux/percpu.h>
+#include <linux/context_tracking.h>
#include <asm/page.h>
#include <asm/pgtable.h>
@@ -272,6 +273,7 @@
asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs)
{
+ enum ctx_state prev_state = exception_enter();
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
unsigned int insn = 0;
@@ -282,7 +284,7 @@
fault_code = get_thread_fault_code();
if (notify_page_fault(regs))
- return;
+ goto exit_exception;
si_code = SEGV_MAPERR;
address = current_thread_info()->fault_address;
@@ -313,7 +315,7 @@
/* Valid, no problems... */
} else {
bad_kernel_pc(regs, address);
- return;
+ goto exit_exception;
}
} else
flags |= FAULT_FLAG_USER;
@@ -430,7 +432,7 @@
fault = handle_mm_fault(mm, vma, address, flags);
if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
- return;
+ goto exit_exception;
if (unlikely(fault & VM_FAULT_ERROR)) {
if (fault & VM_FAULT_OOM)
@@ -482,6 +484,8 @@
}
#endif
+exit_exception:
+ exception_exit(prev_state);
return;
/*
@@ -494,7 +498,7 @@
handle_kernel_fault:
do_kernel_fault(regs, si_code, fault_code, insn, address);
- return;
+ goto exit_exception;
/*
* We ran out of memory, or some other thing happened to us that made
@@ -505,7 +509,7 @@
up_read(&mm->mmap_sem);
if (!(regs->tstate & TSTATE_PRIV)) {
pagefault_out_of_memory();
- return;
+ goto exit_exception;
}
goto handle_kernel_fault;
diff --git a/arch/sparc/mm/gup.c b/arch/sparc/mm/gup.c
index 01ee23d..c4d3da6 100644
--- a/arch/sparc/mm/gup.c
+++ b/arch/sparc/mm/gup.c
@@ -71,13 +71,12 @@
int *nr)
{
struct page *head, *page, *tail;
- u32 mask;
int refs;
- mask = PMD_HUGE_PRESENT;
- if (write)
- mask |= PMD_HUGE_WRITE;
- if ((pmd_val(pmd) & mask) != mask)
+ if (!pmd_large(pmd))
+ return 0;
+
+ if (write && !pmd_write(pmd))
return 0;
refs = 0;
diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c
index 9639964..3096317 100644
--- a/arch/sparc/mm/hugetlbpage.c
+++ b/arch/sparc/mm/hugetlbpage.c
@@ -21,8 +21,6 @@
/* Slightly simplified from the non-hugepage variant because by
* definition we don't have to worry about any page coloring stuff
*/
-#define VA_EXCLUDE_START (0x0000080000000000UL - (1UL << 32UL))
-#define VA_EXCLUDE_END (0xfffff80000000000UL + (1UL << 32UL))
static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *filp,
unsigned long addr,
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index d6de935..6b64379 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -354,7 +354,7 @@
#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
if (mm->context.huge_pte_count && is_hugetlb_pte(pte))
- __update_mmu_tsb_insert(mm, MM_TSB_HUGE, HPAGE_SHIFT,
+ __update_mmu_tsb_insert(mm, MM_TSB_HUGE, REAL_HPAGE_SHIFT,
address, pte_val(pte));
else
#endif
@@ -1557,6 +1557,96 @@
return ~0UL;
}
+unsigned long PAGE_OFFSET;
+EXPORT_SYMBOL(PAGE_OFFSET);
+
+static void __init page_offset_shift_patch_one(unsigned int *insn, unsigned long phys_bits)
+{
+ unsigned long final_shift;
+ unsigned int val = *insn;
+ unsigned int cnt;
+
+ /* We are patching in ilog2(max_supported_phys_address), and
+ * we are doing so in a manner similar to a relocation addend.
+ * That is, we are adding the shift value to whatever value
+ * is in the shift instruction count field already.
+ */
+ cnt = (val & 0x3f);
+ val &= ~0x3f;
+
+ /* If we are trying to shift >= 64 bits, clear the destination
+ * register. This can happen when phys_bits ends up being equal
+ * to MAX_PHYS_ADDRESS_BITS.
+ */
+ final_shift = (cnt + (64 - phys_bits));
+ if (final_shift >= 64) {
+ unsigned int rd = (val >> 25) & 0x1f;
+
+ val = 0x80100000 | (rd << 25);
+ } else {
+ val |= final_shift;
+ }
+ *insn = val;
+
+ __asm__ __volatile__("flush %0"
+ : /* no outputs */
+ : "r" (insn));
+}
+
+static void __init page_offset_shift_patch(unsigned long phys_bits)
+{
+ extern unsigned int __page_offset_shift_patch;
+ extern unsigned int __page_offset_shift_patch_end;
+ unsigned int *p;
+
+ p = &__page_offset_shift_patch;
+ while (p < &__page_offset_shift_patch_end) {
+ unsigned int *insn = (unsigned int *)(unsigned long)*p;
+
+ page_offset_shift_patch_one(insn, phys_bits);
+
+ p++;
+ }
+}
+
+static void __init setup_page_offset(void)
+{
+ unsigned long max_phys_bits = 40;
+
+ if (tlb_type == cheetah || tlb_type == cheetah_plus) {
+ max_phys_bits = 42;
+ } else if (tlb_type == hypervisor) {
+ switch (sun4v_chip_type) {
+ case SUN4V_CHIP_NIAGARA1:
+ case SUN4V_CHIP_NIAGARA2:
+ max_phys_bits = 39;
+ break;
+ case SUN4V_CHIP_NIAGARA3:
+ max_phys_bits = 43;
+ break;
+ case SUN4V_CHIP_NIAGARA4:
+ case SUN4V_CHIP_NIAGARA5:
+ case SUN4V_CHIP_SPARC64X:
+ default:
+ max_phys_bits = 47;
+ break;
+ }
+ }
+
+ if (max_phys_bits > MAX_PHYS_ADDRESS_BITS) {
+ prom_printf("MAX_PHYS_ADDRESS_BITS is too small, need %lu\n",
+ max_phys_bits);
+ prom_halt();
+ }
+
+ PAGE_OFFSET = PAGE_OFFSET_BY_BITS(max_phys_bits);
+
+ pr_info("PAGE_OFFSET is 0x%016lx (max_phys_bits == %lu)\n",
+ PAGE_OFFSET, max_phys_bits);
+
+ page_offset_shift_patch(max_phys_bits);
+}
+
static void __init tsb_phys_patch(void)
{
struct tsb_ldquad_phys_patch_entry *pquad;
@@ -1722,7 +1812,7 @@
#ifndef CONFIG_DEBUG_PAGEALLOC
if (cpu_pgsz_mask & HV_PGSZ_MASK_256MB) {
kern_linear_pte_xor[1] = (_PAGE_VALID | _PAGE_SZ256MB_4V) ^
- 0xfffff80000000000UL;
+ PAGE_OFFSET;
kern_linear_pte_xor[1] |= (_PAGE_CP_4V | _PAGE_CV_4V |
_PAGE_P_4V | _PAGE_W_4V);
} else {
@@ -1731,7 +1821,7 @@
if (cpu_pgsz_mask & HV_PGSZ_MASK_2GB) {
kern_linear_pte_xor[2] = (_PAGE_VALID | _PAGE_SZ2GB_4V) ^
- 0xfffff80000000000UL;
+ PAGE_OFFSET;
kern_linear_pte_xor[2] |= (_PAGE_CP_4V | _PAGE_CV_4V |
_PAGE_P_4V | _PAGE_W_4V);
} else {
@@ -1740,7 +1830,7 @@
if (cpu_pgsz_mask & HV_PGSZ_MASK_16GB) {
kern_linear_pte_xor[3] = (_PAGE_VALID | _PAGE_SZ16GB_4V) ^
- 0xfffff80000000000UL;
+ PAGE_OFFSET;
kern_linear_pte_xor[3] |= (_PAGE_CP_4V | _PAGE_CV_4V |
_PAGE_P_4V | _PAGE_W_4V);
} else {
@@ -1752,7 +1842,7 @@
/* paging_init() sets up the page tables */
static unsigned long last_valid_pfn;
-pgd_t swapper_pg_dir[2048];
+pgd_t swapper_pg_dir[PTRS_PER_PGD];
static void sun4u_pgprot_init(void);
static void sun4v_pgprot_init(void);
@@ -1763,6 +1853,8 @@
unsigned long real_end, i;
int node;
+ setup_page_offset();
+
/* These build time checkes make sure that the dcache_dirty_cpu()
* page->flags usage will work.
*
@@ -2261,10 +2353,10 @@
__ACCESS_BITS_4U | _PAGE_E_4U);
#ifdef CONFIG_DEBUG_PAGEALLOC
- kern_linear_pte_xor[0] = _PAGE_VALID ^ 0xfffff80000000000UL;
+ kern_linear_pte_xor[0] = _PAGE_VALID ^ PAGE_OFFSET;
#else
kern_linear_pte_xor[0] = (_PAGE_VALID | _PAGE_SZ4MB_4U) ^
- 0xfffff80000000000UL;
+ PAGE_OFFSET;
#endif
kern_linear_pte_xor[0] |= (_PAGE_CP_4U | _PAGE_CV_4U |
_PAGE_P_4U | _PAGE_W_4U);
@@ -2308,10 +2400,10 @@
_PAGE_CACHE = _PAGE_CACHE_4V;
#ifdef CONFIG_DEBUG_PAGEALLOC
- kern_linear_pte_xor[0] = _PAGE_VALID ^ 0xfffff80000000000UL;
+ kern_linear_pte_xor[0] = _PAGE_VALID ^ PAGE_OFFSET;
#else
kern_linear_pte_xor[0] = (_PAGE_VALID | _PAGE_SZ4MB_4V) ^
- 0xfffff80000000000UL;
+ PAGE_OFFSET;
#endif
kern_linear_pte_xor[0] |= (_PAGE_CP_4V | _PAGE_CV_4V |
_PAGE_P_4V | _PAGE_W_4V);
@@ -2455,53 +2547,13 @@
: : "r" (pstate));
}
-static pte_t *get_from_cache(struct mm_struct *mm)
-{
- struct page *page;
- pte_t *ret;
-
- spin_lock(&mm->page_table_lock);
- page = mm->context.pgtable_page;
- ret = NULL;
- if (page) {
- void *p = page_address(page);
-
- mm->context.pgtable_page = NULL;
-
- ret = (pte_t *) (p + (PAGE_SIZE / 2));
- }
- spin_unlock(&mm->page_table_lock);
-
- return ret;
-}
-
-static struct page *__alloc_for_cache(struct mm_struct *mm)
-{
- struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK |
- __GFP_REPEAT | __GFP_ZERO);
-
- if (page) {
- spin_lock(&mm->page_table_lock);
- if (!mm->context.pgtable_page) {
- atomic_set(&page->_count, 2);
- mm->context.pgtable_page = page;
- }
- spin_unlock(&mm->page_table_lock);
- }
- return page;
-}
-
pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
unsigned long address)
{
- struct page *page;
- pte_t *pte;
+ struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK |
+ __GFP_REPEAT | __GFP_ZERO);
+ pte_t *pte = NULL;
- pte = get_from_cache(mm);
- if (pte)
- return pte;
-
- page = __alloc_for_cache(mm);
if (page)
pte = (pte_t *) page_address(page);
@@ -2511,14 +2563,10 @@
pgtable_t pte_alloc_one(struct mm_struct *mm,
unsigned long address)
{
- struct page *page;
- pte_t *pte;
+ struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK |
+ __GFP_REPEAT | __GFP_ZERO);
+ pte_t *pte = NULL;
- pte = get_from_cache(mm);
- if (pte)
- return pte;
-
- page = __alloc_for_cache(mm);
if (!page)
return NULL;
if (!pgtable_page_ctor(page)) {
@@ -2530,18 +2578,15 @@
void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
{
- struct page *page = virt_to_page(pte);
- if (put_page_testzero(page))
- free_hot_cold_page(page, 0);
+ free_page((unsigned long)pte);
}
static void __pte_free(pgtable_t pte)
{
struct page *page = virt_to_page(pte);
- if (put_page_testzero(page)) {
- pgtable_page_dtor(page);
- free_hot_cold_page(page, 0);
- }
+
+ pgtable_page_dtor(page);
+ __free_page(page);
}
void pte_free(struct mm_struct *mm, pgtable_t pte)
@@ -2558,124 +2603,27 @@
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-static pmd_t pmd_set_protbits(pmd_t pmd, pgprot_t pgprot, bool for_modify)
-{
- if (pgprot_val(pgprot) & _PAGE_VALID)
- pmd_val(pmd) |= PMD_HUGE_PRESENT;
- if (tlb_type == hypervisor) {
- if (pgprot_val(pgprot) & _PAGE_WRITE_4V)
- pmd_val(pmd) |= PMD_HUGE_WRITE;
- if (pgprot_val(pgprot) & _PAGE_EXEC_4V)
- pmd_val(pmd) |= PMD_HUGE_EXEC;
-
- if (!for_modify) {
- if (pgprot_val(pgprot) & _PAGE_ACCESSED_4V)
- pmd_val(pmd) |= PMD_HUGE_ACCESSED;
- if (pgprot_val(pgprot) & _PAGE_MODIFIED_4V)
- pmd_val(pmd) |= PMD_HUGE_DIRTY;
- }
- } else {
- if (pgprot_val(pgprot) & _PAGE_WRITE_4U)
- pmd_val(pmd) |= PMD_HUGE_WRITE;
- if (pgprot_val(pgprot) & _PAGE_EXEC_4U)
- pmd_val(pmd) |= PMD_HUGE_EXEC;
-
- if (!for_modify) {
- if (pgprot_val(pgprot) & _PAGE_ACCESSED_4U)
- pmd_val(pmd) |= PMD_HUGE_ACCESSED;
- if (pgprot_val(pgprot) & _PAGE_MODIFIED_4U)
- pmd_val(pmd) |= PMD_HUGE_DIRTY;
- }
- }
-
- return pmd;
-}
-
-pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot)
-{
- pmd_t pmd;
-
- pmd_val(pmd) = (page_nr << ((PAGE_SHIFT - PMD_PADDR_SHIFT)));
- pmd_val(pmd) |= PMD_ISHUGE;
- pmd = pmd_set_protbits(pmd, pgprot, false);
- return pmd;
-}
-
-pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
-{
- pmd_val(pmd) &= ~(PMD_HUGE_PRESENT |
- PMD_HUGE_WRITE |
- PMD_HUGE_EXEC);
- pmd = pmd_set_protbits(pmd, newprot, true);
- return pmd;
-}
-
-pgprot_t pmd_pgprot(pmd_t entry)
-{
- unsigned long pte = 0;
-
- if (pmd_val(entry) & PMD_HUGE_PRESENT)
- pte |= _PAGE_VALID;
-
- if (tlb_type == hypervisor) {
- if (pmd_val(entry) & PMD_HUGE_PRESENT)
- pte |= _PAGE_PRESENT_4V;
- if (pmd_val(entry) & PMD_HUGE_EXEC)
- pte |= _PAGE_EXEC_4V;
- if (pmd_val(entry) & PMD_HUGE_WRITE)
- pte |= _PAGE_W_4V;
- if (pmd_val(entry) & PMD_HUGE_ACCESSED)
- pte |= _PAGE_ACCESSED_4V;
- if (pmd_val(entry) & PMD_HUGE_DIRTY)
- pte |= _PAGE_MODIFIED_4V;
- pte |= _PAGE_CP_4V|_PAGE_CV_4V;
- } else {
- if (pmd_val(entry) & PMD_HUGE_PRESENT)
- pte |= _PAGE_PRESENT_4U;
- if (pmd_val(entry) & PMD_HUGE_EXEC)
- pte |= _PAGE_EXEC_4U;
- if (pmd_val(entry) & PMD_HUGE_WRITE)
- pte |= _PAGE_W_4U;
- if (pmd_val(entry) & PMD_HUGE_ACCESSED)
- pte |= _PAGE_ACCESSED_4U;
- if (pmd_val(entry) & PMD_HUGE_DIRTY)
- pte |= _PAGE_MODIFIED_4U;
- pte |= _PAGE_CP_4U|_PAGE_CV_4U;
- }
-
- return __pgprot(pte);
-}
-
void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
pmd_t *pmd)
{
unsigned long pte, flags;
struct mm_struct *mm;
pmd_t entry = *pmd;
- pgprot_t prot;
if (!pmd_large(entry) || !pmd_young(entry))
return;
- pte = (pmd_val(entry) & ~PMD_HUGE_PROTBITS);
- pte <<= PMD_PADDR_SHIFT;
- pte |= _PAGE_VALID;
+ pte = pmd_val(entry);
- prot = pmd_pgprot(entry);
-
- if (tlb_type == hypervisor)
- pgprot_val(prot) |= _PAGE_SZHUGE_4V;
- else
- pgprot_val(prot) |= _PAGE_SZHUGE_4U;
-
- pte |= pgprot_val(prot);
+ /* We are fabricating 8MB pages using 4MB real hw pages. */
+ pte |= (addr & (1UL << REAL_HPAGE_SHIFT));
mm = vma->vm_mm;
spin_lock_irqsave(&mm->context.lock, flags);
if (mm->context.tsb_block[MM_TSB_HUGE].tsb != NULL)
- __update_mmu_tsb_insert(mm, MM_TSB_HUGE, HPAGE_SHIFT,
+ __update_mmu_tsb_insert(mm, MM_TSB_HUGE, REAL_HPAGE_SHIFT,
addr, pte);
spin_unlock_irqrestore(&mm->context.lock, flags);
diff --git a/arch/sparc/mm/init_64.h b/arch/sparc/mm/init_64.h
index 0661aa6..5d3782de 100644
--- a/arch/sparc/mm/init_64.h
+++ b/arch/sparc/mm/init_64.h
@@ -1,11 +1,13 @@
#ifndef _SPARC64_MM_INIT_H
#define _SPARC64_MM_INIT_H
+#include <asm/page.h>
+
/* Most of the symbols in this file are defined in init.c and
* marked non-static so that assembler code can get at them.
*/
-#define MAX_PHYS_ADDRESS (1UL << 41UL)
+#define MAX_PHYS_ADDRESS (1UL << MAX_PHYS_ADDRESS_BITS)
#define KPTE_BITMAP_CHUNK_SZ (256UL * 1024UL * 1024UL)
#define KPTE_BITMAP_BYTES \
((MAX_PHYS_ADDRESS / KPTE_BITMAP_CHUNK_SZ) / 4)
diff --git a/arch/sparc/mm/tlb.c b/arch/sparc/mm/tlb.c
index 656cc46..ad3bf4b 100644
--- a/arch/sparc/mm/tlb.c
+++ b/arch/sparc/mm/tlb.c
@@ -161,8 +161,8 @@
if (mm == &init_mm)
return;
- if ((pmd_val(pmd) ^ pmd_val(orig)) & PMD_ISHUGE) {
- if (pmd_val(pmd) & PMD_ISHUGE)
+ if ((pmd_val(pmd) ^ pmd_val(orig)) & _PAGE_PMD_HUGE) {
+ if (pmd_val(pmd) & _PAGE_PMD_HUGE)
mm->context.huge_pte_count++;
else
mm->context.huge_pte_count--;
@@ -178,13 +178,16 @@
}
if (!pmd_none(orig)) {
- bool exec = ((pmd_val(orig) & PMD_HUGE_EXEC) != 0);
+ pte_t orig_pte = __pte(pmd_val(orig));
+ bool exec = pte_exec(orig_pte);
addr &= HPAGE_MASK;
- if (pmd_val(orig) & PMD_ISHUGE)
+ if (pmd_trans_huge(orig)) {
tlb_batch_add_one(mm, addr, exec);
- else
+ tlb_batch_add_one(mm, addr + REAL_HPAGE_SIZE, exec);
+ } else {
tlb_batch_pmd_scan(mm, addr, orig, exec);
+ }
}
}
diff --git a/arch/sparc/mm/tsb.c b/arch/sparc/mm/tsb.c
index 2cc3bce..3b3a360 100644
--- a/arch/sparc/mm/tsb.c
+++ b/arch/sparc/mm/tsb.c
@@ -87,7 +87,7 @@
nentries = mm->context.tsb_block[MM_TSB_HUGE].tsb_nentries;
if (tlb_type == cheetah_plus || tlb_type == hypervisor)
base = __pa(base);
- __flush_tsb_one(tb, HPAGE_SHIFT, base, nentries);
+ __flush_tsb_one(tb, REAL_HPAGE_SHIFT, base, nentries);
}
#endif
spin_unlock_irqrestore(&mm->context.lock, flags);
@@ -111,7 +111,7 @@
nentries = mm->context.tsb_block[MM_TSB_HUGE].tsb_nentries;
if (tlb_type == cheetah_plus || tlb_type == hypervisor)
base = __pa(base);
- __flush_tsb_one_entry(base, vaddr, HPAGE_SHIFT, nentries);
+ __flush_tsb_one_entry(base, vaddr, REAL_HPAGE_SHIFT, nentries);
}
#endif
spin_unlock_irqrestore(&mm->context.lock, flags);
@@ -472,8 +472,6 @@
mm->context.huge_pte_count = 0;
#endif
- mm->context.pgtable_page = NULL;
-
/* copy_mm() copies over the parent's mm_struct before calling
* us, so we need to zero out the TSB pointer or else tsb_grow()
* will be confused and think there is an older TSB to free up.
@@ -512,17 +510,10 @@
void destroy_context(struct mm_struct *mm)
{
unsigned long flags, i;
- struct page *page;
for (i = 0; i < MM_NUM_TSBS; i++)
tsb_destroy_one(&mm->context.tsb_block[i]);
- page = mm->context.pgtable_page;
- if (page && put_page_testzero(page)) {
- pgtable_page_dtor(page);
- free_hot_cold_page(page, 0);
- }
-
spin_lock_irqsave(&ctx_alloc_lock, flags);
if (CTX_VALID(mm->context)) {
diff --git a/arch/sparc/mm/ultra.S b/arch/sparc/mm/ultra.S
index 432aa0c..b4f4733 100644
--- a/arch/sparc/mm/ultra.S
+++ b/arch/sparc/mm/ultra.S
@@ -153,10 +153,10 @@
.globl __flush_icache_page
__flush_icache_page: /* %o0 = phys_page */
srlx %o0, PAGE_SHIFT, %o0
- sethi %uhi(PAGE_OFFSET), %g1
+ sethi %hi(PAGE_OFFSET), %g1
sllx %o0, PAGE_SHIFT, %o0
sethi %hi(PAGE_SIZE), %g2
- sllx %g1, 32, %g1
+ ldx [%g1 + %lo(PAGE_OFFSET)], %g1
add %o0, %g1, %o0
1: subcc %g2, 32, %g2
bne,pt %icc, 1b
@@ -178,8 +178,8 @@
.align 64
.globl __flush_dcache_page
__flush_dcache_page: /* %o0=kaddr, %o1=flush_icache */
- sethi %uhi(PAGE_OFFSET), %g1
- sllx %g1, 32, %g1
+ sethi %hi(PAGE_OFFSET), %g1
+ ldx [%g1 + %lo(PAGE_OFFSET)], %g1
sub %o0, %g1, %o0 ! physical address
srlx %o0, 11, %o0 ! make D-cache TAG
sethi %hi(1 << 14), %o2 ! D-cache size
@@ -287,8 +287,8 @@
#ifdef DCACHE_ALIASING_POSSIBLE
__cheetah_flush_dcache_page: /* 11 insns */
- sethi %uhi(PAGE_OFFSET), %g1
- sllx %g1, 32, %g1
+ sethi %hi(PAGE_OFFSET), %g1
+ ldx [%g1 + %lo(PAGE_OFFSET)], %g1
sub %o0, %g1, %o0
sethi %hi(PAGE_SIZE), %o4
1: subcc %o4, (1 << 5), %o4