ARC: MMUv4 preps/1 - Fold PTE K/U access flags

The current ARC VM code has 13 flags in Page Table entry: some software
(accesed/dirty/non-linear-maps) and rest hardware specific. With 8k MMU
page, we need 19 bits for addressing page frame so remaining 13 bits is
just about enough to accomodate the current flags.

In MMUv4 there are 2 additional flags, SZ (normal or super page) and WT
(cache access mode write-thru) - and additionally PFN is 20 bits (vs. 19
before for 8k). Thus these can't be held in current PTE w/o making each
entry 64bit wide.

It seems there is some scope of compressing the current PTE flags (and
freeing up a few bits). Currently PTE contains fully orthogonal distinct
access permissions for kernel and user mode (Kr, Kw, Kx; Ur, Uw, Ux)
which can be folded into one set (R, W, X). The translation of 3 PTE
bits into 6 TLB bits (when programming the MMU) can be done based on
following pre-requites/assumptions:

1. For kernel-mode-only translations (vmalloc: 0x7000_0000 to
   0x7FFF_FFFF), PTE additionally has PAGE_GLOBAL flag set (and user
   space entries can never be global). Thus such a PTE can translate
   to Kr, Kw, Kx (as appropriate) and zero for User mode counterparts.

2. For non global entries, the PTE flags can be used to create mirrored
   K and U TLB bits. This is true after commit a950549c675f2c8c504
   "ARC: copy_(to|from)_user() to honor usermode-access permissions"
   which ensured that user-space translations _MUST_ have same access
   permissions for both U/K mode accesses so that  copy_{to,from}_user()
   play fair with fault based CoW break and such...

There is no such thing as free lunch - the cost is slightly infalted
TLB-Miss Handlers.

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
diff --git a/arch/arc/mm/tlbex.S b/arch/arc/mm/tlbex.S
index 9cce00e..ec382e5 100644
--- a/arch/arc/mm/tlbex.S
+++ b/arch/arc/mm/tlbex.S
@@ -218,8 +218,15 @@
 ; IN: r0 = PTE, r1 = ptr to PTE
 
 .macro CONV_PTE_TO_TLB
-	and r3, r0, PTE_BITS_IN_PD1 ; Extract permission flags+PFN from PTE
-	sr  r3, [ARC_REG_TLBPD1]    ; these go in PD1
+	and    r3, r0, PTE_BITS_RWX	;       r w x
+	lsl    r2, r3, 3		; r w x 0 0 0
+	and.f  0,  r0, _PAGE_GLOBAL
+	or.z   r2, r2, r3		; r w x r w x
+
+	and r3, r0, PTE_BITS_NON_RWX_IN_PD1 ; Extract PFN+cache bits from PTE
+	or  r3, r3, r2
+
+	sr  r3, [ARC_REG_TLBPD1]    	; these go in PD1
 
 	and r2, r0, PTE_BITS_IN_PD0 ; Extract other PTE flags: (V)alid, (G)lb
 #if (CONFIG_ARC_MMU_VER <= 2)   /* Neednot be done with v3 onwards */
@@ -272,8 +279,8 @@
 	;----------------------------------------------------------------
 	; VERIFY_PTE: Check if PTE permissions approp for executing code
 	cmp_s   r2, VMALLOC_START
-	mov.lo  r2, (_PAGE_PRESENT | _PAGE_U_EXECUTE)
-	mov.hs  r2, (_PAGE_PRESENT | _PAGE_K_EXECUTE)
+	mov_s   r2, (_PAGE_PRESENT | _PAGE_EXECUTE)
+	or.hs   r2, r2, _PAGE_GLOBAL
 
 	and     r3, r0, r2  ; Mask out NON Flag bits from PTE
 	xor.f   r3, r3, r2  ; check ( ( pte & flags_test ) == flags_test )
@@ -312,26 +319,21 @@
 	;----------------------------------------------------------------
 	; VERIFY_PTE: Chk if PTE permissions approp for data access (R/W/R+W)
 
-	mov_s   r2, 0
+	cmp_s	r2, VMALLOC_START
+	mov_s   r2, _PAGE_PRESENT	; common bit for K/U PTE
+	or.hs	r2, r2, _PAGE_GLOBAL	; kernel PTE only
+
+	; Linux PTE [RWX] bits are semantically overloaded:
+	; -If PAGE_GLOBAL set, they refer to kernel-only flags (vmalloc)
+	; -Otherwise they are user-mode permissions, and those are exactly
+	;  same for kernel mode as well (e.g. copy_(to|from)_user)
+
 	lr      r3, [ecr]
 	btst_s  r3, ECR_C_BIT_DTLB_LD_MISS	; Read Access
-	or.nz   r2, r2, _PAGE_U_READ      	; chk for Read flag in PTE
+	or.nz   r2, r2, _PAGE_READ      	; chk for Read flag in PTE
 	btst_s  r3, ECR_C_BIT_DTLB_ST_MISS	; Write Access
-	or.nz   r2, r2, _PAGE_U_WRITE     	; chk for Write flag in PTE
-	; Above laddering takes care of XCHG access
-	;   which is both Read and Write
-
-	; If kernel mode access, ; make _PAGE_xx flags as _PAGE_K_xx
-	; For copy_(to|from)_user, despite exception taken in kernel mode,
-	; this code is not hit, because EFA would still be the user mode
-	; address (EFA < 0x6000_0000).
-	; This code is for legit kernel mode faults, vmalloc specifically
-	; (EFA: 0x7000_0000 to 0x7FFF_FFFF)
-
-	lr      r3, [efa]
-	cmp     r3, VMALLOC_START - 1   ; If kernel mode access
-	asl.hi  r2, r2, 3               ; make _PAGE_xx flags as _PAGE_K_xx
-	or      r2, r2, _PAGE_PRESENT   ; Common flag for K/U mode
+	or.nz   r2, r2, _PAGE_WRITE     	; chk for Write flag in PTE
+	; Above laddering takes care of XCHG access (both R and W)
 
 	; By now, r2 setup with all the Flags we need to check in PTE
 	and     r3, r0, r2              ; Mask out NON Flag bits from PTE