[POWERPC] Use 1TB segments

This makes the kernel use 1TB segments for all kernel mappings and for
user addresses of 1TB and above, on machines which support them
(currently POWER5+, POWER6 and PA6T).

We detect that the machine supports 1TB segments by looking at the
ibm,processor-segment-sizes property in the device tree.

We don't currently use 1TB segments for user addresses < 1T, since
that would effectively prevent 32-bit processes from using huge pages
unless we also had a way to revert to using 256MB segments.  That
would be possible but would involve extra complications (such as
keeping track of which segment size was used when HPTEs were inserted)
and is not addressed here.

Parts of this patch were originally written by Ben Herrenschmidt.

Signed-off-by: Paul Mackerras <paulus@samba.org>
diff --git a/include/asm-powerpc/cputable.h b/include/asm-powerpc/cputable.h
index d913f46..ae093ef 100644
--- a/include/asm-powerpc/cputable.h
+++ b/include/asm-powerpc/cputable.h
@@ -164,6 +164,7 @@
 #define CPU_FTR_CELL_TB_BUG		LONG_ASM_CONST(0x0000800000000000)
 #define CPU_FTR_SPURR			LONG_ASM_CONST(0x0001000000000000)
 #define CPU_FTR_DSCR			LONG_ASM_CONST(0x0002000000000000)
+#define CPU_FTR_1T_SEGMENT		LONG_ASM_CONST(0x0004000000000000)
 
 #ifndef __ASSEMBLY__
 
@@ -374,7 +375,7 @@
 #define CPU_FTRS_POSSIBLE	\
 	    (CPU_FTRS_POWER3 | CPU_FTRS_RS64 | CPU_FTRS_POWER4 |	\
 	    CPU_FTRS_PPC970 | CPU_FTRS_POWER5 | CPU_FTRS_POWER6 |	\
-	    CPU_FTRS_CELL | CPU_FTRS_PA6T)
+	    CPU_FTRS_CELL | CPU_FTRS_PA6T | CPU_FTR_1T_SEGMENT)
 #else
 enum {
 	CPU_FTRS_POSSIBLE =
diff --git a/include/asm-powerpc/machdep.h b/include/asm-powerpc/machdep.h
index cc7c17f..6968f43 100644
--- a/include/asm-powerpc/machdep.h
+++ b/include/asm-powerpc/machdep.h
@@ -51,22 +51,22 @@
 #ifdef CONFIG_PPC64
 	void            (*hpte_invalidate)(unsigned long slot,
 					   unsigned long va,
-					   int psize,
+					   int psize, int ssize,
 					   int local);
 	long		(*hpte_updatepp)(unsigned long slot, 
 					 unsigned long newpp, 
 					 unsigned long va,
-					 int pize,
+					 int psize, int ssize,
 					 int local);
 	void            (*hpte_updateboltedpp)(unsigned long newpp, 
 					       unsigned long ea,
-					       int psize);
+					       int psize, int ssize);
 	long		(*hpte_insert)(unsigned long hpte_group,
 				       unsigned long va,
 				       unsigned long prpn,
 				       unsigned long rflags,
 				       unsigned long vflags,
-				       int psize);
+				       int psize, int ssize);
 	long		(*hpte_remove)(unsigned long hpte_group);
 	void		(*flush_hash_range)(unsigned long number, int local);
 
diff --git a/include/asm-powerpc/mmu-hash64.h b/include/asm-powerpc/mmu-hash64.h
index b22b0d2..82328de 100644
--- a/include/asm-powerpc/mmu-hash64.h
+++ b/include/asm-powerpc/mmu-hash64.h
@@ -47,6 +47,8 @@
 
 /* Bits in the SLB VSID word */
 #define SLB_VSID_SHIFT		12
+#define SLB_VSID_SHIFT_1T	24
+#define SLB_VSID_SSIZE_SHIFT	62
 #define SLB_VSID_B		ASM_CONST(0xc000000000000000)
 #define SLB_VSID_B_256M		ASM_CONST(0x0000000000000000)
 #define SLB_VSID_B_1T		ASM_CONST(0x4000000000000000)
@@ -66,6 +68,7 @@
 #define SLB_VSID_USER		(SLB_VSID_KP|SLB_VSID_KS|SLB_VSID_C)
 
 #define SLBIE_C			(0x08000000)
+#define SLBIE_SSIZE_SHIFT	25
 
 /*
  * Hash table
@@ -77,7 +80,7 @@
 #define HPTE_V_AVPN_SHIFT	7
 #define HPTE_V_AVPN		ASM_CONST(0x3fffffffffffff80)
 #define HPTE_V_AVPN_VAL(x)	(((x) & HPTE_V_AVPN) >> HPTE_V_AVPN_SHIFT)
-#define HPTE_V_COMPARE(x,y)	(!(((x) ^ (y)) & HPTE_V_AVPN))
+#define HPTE_V_COMPARE(x,y)	(!(((x) ^ (y)) & 0xffffffffffffff80))
 #define HPTE_V_BOLTED		ASM_CONST(0x0000000000000010)
 #define HPTE_V_LOCK		ASM_CONST(0x0000000000000008)
 #define HPTE_V_LARGE		ASM_CONST(0x0000000000000004)
@@ -164,16 +167,19 @@
 #define MMU_SEGSIZE_256M	0
 #define MMU_SEGSIZE_1T		1
 
+
 #ifndef __ASSEMBLY__
 
 /*
- * The current system page sizes
+ * The current system page and segment sizes
  */
 extern struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
 extern int mmu_linear_psize;
 extern int mmu_virtual_psize;
 extern int mmu_vmalloc_psize;
 extern int mmu_io_psize;
+extern int mmu_kernel_ssize;
+extern int mmu_highuser_ssize;
 
 /*
  * If the processor supports 64k normal pages but not 64k cache
@@ -195,13 +201,15 @@
  * This function sets the AVPN and L fields of the HPTE  appropriately
  * for the page size
  */
-static inline unsigned long hpte_encode_v(unsigned long va, int psize)
+static inline unsigned long hpte_encode_v(unsigned long va, int psize,
+					  int ssize)
 {
-	unsigned long v =
+	unsigned long v;
 	v = (va >> 23) & ~(mmu_psize_defs[psize].avpnm);
 	v <<= HPTE_V_AVPN_SHIFT;
 	if (psize != MMU_PAGE_4K)
 		v |= HPTE_V_LARGE;
+	v |= ((unsigned long) ssize) << HPTE_V_SSIZE_SHIFT;
 	return v;
 }
 
@@ -226,20 +234,40 @@
 }
 
 /*
- * This hashes a virtual address for a 256Mb segment only for now
+ * Build a VA given VSID, EA and segment size
+ */
+static inline unsigned long hpt_va(unsigned long ea, unsigned long vsid,
+				   int ssize)
+{
+	if (ssize == MMU_SEGSIZE_256M)
+		return (vsid << 28) | (ea & 0xfffffffUL);
+	return (vsid << 40) | (ea & 0xffffffffffUL);
+}
+
+/*
+ * This hashes a virtual address
  */
 
-static inline unsigned long hpt_hash(unsigned long va, unsigned int shift)
+static inline unsigned long hpt_hash(unsigned long va, unsigned int shift,
+				     int ssize)
 {
-	return ((va >> 28) & 0x7fffffffffUL) ^ ((va & 0x0fffffffUL) >> shift);
+	unsigned long hash, vsid;
+
+	if (ssize == MMU_SEGSIZE_256M) {
+		hash = (va >> 28) ^ ((va & 0x0fffffffUL) >> shift);
+	} else {
+		vsid = va >> 40;
+		hash = vsid ^ (vsid << 25) ^ ((va & 0xffffffffffUL) >> shift);
+	}
+	return hash & 0x7fffffffffUL;
 }
 
 extern int __hash_page_4K(unsigned long ea, unsigned long access,
 			  unsigned long vsid, pte_t *ptep, unsigned long trap,
-			  unsigned int local);
+			  unsigned int local, int ssize);
 extern int __hash_page_64K(unsigned long ea, unsigned long access,
 			   unsigned long vsid, pte_t *ptep, unsigned long trap,
-			   unsigned int local);
+			   unsigned int local, int ssize);
 struct mm_struct;
 extern int hash_page(unsigned long ea, unsigned long access, unsigned long trap);
 extern int hash_huge_page(struct mm_struct *mm, unsigned long access,
@@ -248,7 +276,7 @@
 
 extern int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
 			     unsigned long pstart, unsigned long mode,
-			     int psize);
+			     int psize, int ssize);
 
 extern void htab_initialize(void);
 extern void htab_initialize_secondary(void);
@@ -317,12 +345,17 @@
  * which are used by the iSeries firmware.
  */
 
-#define VSID_MULTIPLIER	ASM_CONST(200730139)	/* 28-bit prime */
-#define VSID_BITS	36
-#define VSID_MODULUS	((1UL<<VSID_BITS)-1)
+#define VSID_MULTIPLIER_256M	ASM_CONST(200730139)	/* 28-bit prime */
+#define VSID_BITS_256M		36
+#define VSID_MODULUS_256M	((1UL<<VSID_BITS_256M)-1)
 
-#define CONTEXT_BITS	19
-#define USER_ESID_BITS	16
+#define VSID_MULTIPLIER_1T	ASM_CONST(12538073)	/* 24-bit prime */
+#define VSID_BITS_1T		24
+#define VSID_MODULUS_1T		((1UL<<VSID_BITS_1T)-1)
+
+#define CONTEXT_BITS		19
+#define USER_ESID_BITS		16
+#define USER_ESID_BITS_1T	4
 
 #define USER_VSID_RANGE	(1UL << (USER_ESID_BITS + SID_SHIFT))
 
@@ -336,17 +369,17 @@
  *	rx = scratch register (clobbered)
  *
  * 	- rt and rx must be different registers
- * 	- The answer will end up in the low 36 bits of rt.  The higher
+ * 	- The answer will end up in the low VSID_BITS bits of rt.  The higher
  * 	  bits may contain other garbage, so you may need to mask the
  * 	  result.
  */
-#define ASM_VSID_SCRAMBLE(rt, rx)	\
-	lis	rx,VSID_MULTIPLIER@h;					\
-	ori	rx,rx,VSID_MULTIPLIER@l;				\
+#define ASM_VSID_SCRAMBLE(rt, rx, size)					\
+	lis	rx,VSID_MULTIPLIER_##size@h;				\
+	ori	rx,rx,VSID_MULTIPLIER_##size@l;				\
 	mulld	rt,rt,rx;		/* rt = rt * MULTIPLIER */	\
 									\
-	srdi	rx,rt,VSID_BITS;					\
-	clrldi	rt,rt,(64-VSID_BITS);					\
+	srdi	rx,rt,VSID_BITS_##size;					\
+	clrldi	rt,rt,(64-VSID_BITS_##size);				\
 	add	rt,rt,rx;		/* add high and low bits */	\
 	/* Now, r3 == VSID (mod 2^36-1), and lies between 0 and		\
 	 * 2^36-1+2^28-1.  That in particular means that if r3 >=	\
@@ -355,7 +388,7 @@
 	 * doesn't, the answer is the low 36 bits of r3+1.  So in all	\
 	 * cases the answer is the low 36 bits of (r3 + ((r3+1) >> 36))*/\
 	addi	rx,rt,1;						\
-	srdi	rx,rx,VSID_BITS;	/* extract 2^36 bit */		\
+	srdi	rx,rx,VSID_BITS_##size;	/* extract 2^VSID_BITS bit */	\
 	add	rt,rt,rx
 
 
@@ -377,37 +410,60 @@
 } mm_context_t;
 
 
-static inline unsigned long vsid_scramble(unsigned long protovsid)
-{
 #if 0
-	/* The code below is equivalent to this function for arguments
-	 * < 2^VSID_BITS, which is all this should ever be called
-	 * with.  However gcc is not clever enough to compute the
-	 * modulus (2^n-1) without a second multiply. */
-	return ((protovsid * VSID_MULTIPLIER) % VSID_MODULUS);
-#else /* 1 */
-	unsigned long x;
+/*
+ * The code below is equivalent to this function for arguments
+ * < 2^VSID_BITS, which is all this should ever be called
+ * with.  However gcc is not clever enough to compute the
+ * modulus (2^n-1) without a second multiply.
+ */
+#define vsid_scrample(protovsid, size) \
+	((((protovsid) * VSID_MULTIPLIER_##size) % VSID_MODULUS_##size))
 
-	x = protovsid * VSID_MULTIPLIER;
-	x = (x >> VSID_BITS) + (x & VSID_MODULUS);
-	return (x + ((x+1) >> VSID_BITS)) & VSID_MODULUS;
+#else /* 1 */
+#define vsid_scramble(protovsid, size) \
+	({								 \
+		unsigned long x;					 \
+		x = (protovsid) * VSID_MULTIPLIER_##size;		 \
+		x = (x >> VSID_BITS_##size) + (x & VSID_MODULUS_##size); \
+		(x + ((x+1) >> VSID_BITS_##size)) & VSID_MODULUS_##size; \
+	})
 #endif /* 1 */
-}
 
 /* This is only valid for addresses >= KERNELBASE */
-static inline unsigned long get_kernel_vsid(unsigned long ea)
+static inline unsigned long get_kernel_vsid(unsigned long ea, int ssize)
 {
-	return vsid_scramble(ea >> SID_SHIFT);
+	if (ssize == MMU_SEGSIZE_256M)
+		return vsid_scramble(ea >> SID_SHIFT, 256M);
+	return vsid_scramble(ea >> SID_SHIFT_1T, 1T);
 }
 
-/* This is only valid for user addresses (which are below 2^41) */
-static inline unsigned long get_vsid(unsigned long context, unsigned long ea)
+/* Returns the segment size indicator for a user address */
+static inline int user_segment_size(unsigned long addr)
 {
-	return vsid_scramble((context << USER_ESID_BITS)
-			     | (ea >> SID_SHIFT));
+	/* Use 1T segments if possible for addresses >= 1T */
+	if (addr >= (1UL << SID_SHIFT_1T))
+		return mmu_highuser_ssize;
+	return MMU_SEGSIZE_256M;
 }
 
-#define VSID_SCRAMBLE(pvsid)	(((pvsid) * VSID_MULTIPLIER) % VSID_MODULUS)
+/* This is only valid for user addresses (which are below 2^44) */
+static inline unsigned long get_vsid(unsigned long context, unsigned long ea,
+				     int ssize)
+{
+	if (ssize == MMU_SEGSIZE_256M)
+		return vsid_scramble((context << USER_ESID_BITS)
+				     | (ea >> SID_SHIFT), 256M);
+	return vsid_scramble((context << USER_ESID_BITS_1T)
+			     | (ea >> SID_SHIFT_1T), 1T);
+}
+
+/*
+ * This is only used on legacy iSeries in lparmap.c,
+ * hence the 256MB segment assumption.
+ */
+#define VSID_SCRAMBLE(pvsid)	(((pvsid) * VSID_MULTIPLIER_256M) %	\
+				 VSID_MODULUS_256M)
 #define KERNEL_VSID(ea)		VSID_SCRAMBLE(GET_ESID(ea))
 
 /* Physical address used by some IO functions */
diff --git a/include/asm-powerpc/page_64.h b/include/asm-powerpc/page_64.h
index 56a2df0..4ee82c6 100644
--- a/include/asm-powerpc/page_64.h
+++ b/include/asm-powerpc/page_64.h
@@ -26,12 +26,18 @@
  */
 #define PAGE_FACTOR		(PAGE_SHIFT - HW_PAGE_SHIFT)
 
-/* Segment size */
+/* Segment size; normal 256M segments */
 #define SID_SHIFT		28
 #define SID_MASK		ASM_CONST(0xfffffffff)
 #define ESID_MASK		0xfffffffff0000000UL
 #define GET_ESID(x)		(((x) >> SID_SHIFT) & SID_MASK)
 
+/* 1T segments */
+#define SID_SHIFT_1T		40
+#define SID_MASK_1T		0xffffffUL
+#define ESID_MASK_1T		0xffffff0000000000UL
+#define GET_ESID_1T(x)		(((x) >> SID_SHIFT_1T) & SID_MASK_1T)
+
 #ifndef __ASSEMBLY__
 #include <asm/cache.h>
 
diff --git a/include/asm-powerpc/tlbflush.h b/include/asm-powerpc/tlbflush.h
index 99a0439..a022f80 100644
--- a/include/asm-powerpc/tlbflush.h
+++ b/include/asm-powerpc/tlbflush.h
@@ -97,6 +97,7 @@
 	real_pte_t		pte[PPC64_TLB_BATCH_NR];
 	unsigned long		vaddr[PPC64_TLB_BATCH_NR];
 	unsigned int		psize;
+	int			ssize;
 };
 DECLARE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch);
 
@@ -127,7 +128,7 @@
 
 
 extern void flush_hash_page(unsigned long va, real_pte_t pte, int psize,
-			    int local);
+			    int ssize, int local);
 extern void flush_hash_range(unsigned long number, int local);