ARCv2: Support for ARCv2 ISA and HS38x cores

The notable features are:
    - SMP configurations of upto 4 cores with coherency
    - Optional L2 Cache and IO-Coherency
    - Revised Interrupt Architecture (multiple priorites, reg banks,
        auto stack switch, auto regfile save/restore)
    - MMUv4 (PIPT dcache, Huge Pages)
    - Instructions for
	* 64bit load/store: LDD, STD
	* Hardware assisted divide/remainder: DIV, REM
	* Function prologue/epilogue: ENTER_S, LEAVE_S
	* IRQ enable/disable: CLRI, SETI
	* pop count: FFS, FLS
	* SETcc, BMSKN, XBFU...

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
diff --git a/arch/arc/include/asm/arcregs.h b/arch/arc/include/asm/arcregs.h
index 6496465..373bb41 100644
--- a/arch/arc/include/asm/arcregs.h
+++ b/arch/arc/include/asm/arcregs.h
@@ -16,6 +16,7 @@
 #define ARC_REG_PERIBASE_BCR	0x69
 #define ARC_REG_FP_BCR		0x6B	/* ARCompact: Single-Precision FPU */
 #define ARC_REG_DPFP_BCR	0x6C	/* ARCompact: Dbl Precision FPU */
+#define ARC_REG_FP_V2_BCR	0xc8	/* ARCv2 FPU */
 #define ARC_REG_DCCM_BCR	0x74	/* DCCM Present + SZ */
 #define ARC_REG_TIMERS_BCR	0x75
 #define ARC_REG_AP_BCR		0x76
@@ -52,6 +53,7 @@
  * [15: 8] = Exception Cause Code
  * [ 7: 0] = Exception Parameters (for certain types only)
  */
+#ifdef CONFIG_ISA_ARCOMPACT
 #define ECR_V_MEM_ERR			0x01
 #define ECR_V_INSN_ERR			0x02
 #define ECR_V_MACH_CHK			0x20
@@ -59,6 +61,15 @@
 #define ECR_V_DTLB_MISS			0x22
 #define ECR_V_PROTV			0x23
 #define ECR_V_TRAP			0x25
+#else
+#define ECR_V_MEM_ERR			0x01
+#define ECR_V_INSN_ERR			0x02
+#define ECR_V_MACH_CHK			0x03
+#define ECR_V_ITLB_MISS			0x04
+#define ECR_V_DTLB_MISS			0x05
+#define ECR_V_PROTV			0x06
+#define ECR_V_TRAP			0x09
+#endif
 
 /* DTLB Miss and Protection Violation Cause Codes */
 
@@ -202,9 +213,11 @@
 
 struct bcr_isa {
 #ifdef CONFIG_CPU_BIG_ENDIAN
-	unsigned int pad1:23, atomic1:1, ver:8;
+	unsigned int div_rem:4, pad2:4, ldd:1, unalign:1, atomic:1, be:1,
+		     pad1:11, atomic1:1, ver:8;
 #else
-	unsigned int ver:8, atomic1:1, pad1:23;
+	unsigned int ver:8, atomic1:1, pad1:11, be:1, atomic:1, unalign:1,
+		     ldd:1, pad2:4, div_rem:4;
 #endif
 };
 
@@ -267,11 +280,19 @@
 #endif
 };
 
+struct bcr_fp_arcv2 {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+	unsigned int pad2:15, dp:1, pad1:7, sp:1, ver:8;
+#else
+	unsigned int ver:8, sp:1, pad1:7, dp:1, pad2:15;
+#endif
+};
+
 struct bcr_timer {
 #ifdef CONFIG_CPU_BIG_ENDIAN
-	unsigned int pad2:15, rtsc:1, pad1:6, t1:1, t0:1, ver:8;
+	unsigned int pad2:15, rtsc:1, pad1:5, rtc:1, t1:1, t0:1, ver:8;
 #else
-	unsigned int ver:8, t0:1, t1:1, pad1:6, rtsc:1, pad2:15;
+	unsigned int ver:8, t0:1, t1:1, rtc:1, pad1:5, rtsc:1, pad2:15;
 #endif
 };
 
@@ -283,6 +304,14 @@
 #endif
 };
 
+struct bcr_bpu_arcv2 {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+	unsigned int pad:6, fbe:2, tqe:2, ts:4, ft:1, rse:2, pte:3, bce:3, ver:8;
+#else
+	unsigned int ver:8, bce:3, pte:3, rse:2, ft:1, ts:4, tqe:2, fbe:2, pad:6;
+#endif
+};
+
 struct bcr_generic {
 #ifdef CONFIG_CPU_BIG_ENDIAN
 	unsigned int pad:24, ver:8;
@@ -334,6 +363,22 @@
 
 extern struct cpuinfo_arc cpuinfo_arc700[];
 
+static inline int is_isa_arcv2(void)
+{
+	return IS_ENABLED(CONFIG_ISA_ARCV2);
+}
+
+static inline int is_isa_arcompact(void)
+{
+	return IS_ENABLED(CONFIG_ISA_ARCOMPACT);
+}
+
+#if defined(CONFIG_ISA_ARCOMPACT) && !defined(_CPU_DEFAULT_A7)
+#error "Toolchain not configured for ARCompact builds"
+#elif defined(CONFIG_ISA_ARCV2) && !defined(_CPU_DEFAULT_HS)
+#error "Toolchain not configured for ARCv2 builds"
+#endif
+
 #endif /* __ASEMBLY__ */
 
 #endif /* _ASM_ARC_ARCREGS_H */
diff --git a/arch/arc/include/asm/bitops.h b/arch/arc/include/asm/bitops.h
index 4051e95..829a8a2 100644
--- a/arch/arc/include/asm/bitops.h
+++ b/arch/arc/include/asm/bitops.h
@@ -402,6 +402,8 @@
 	return ((mask & *addr) != 0);
 }
 
+#ifdef CONFIG_ISA_ARCOMPACT
+
 /*
  * Count the number of zeros, starting from MSB
  * Helper for fls( ) friends
@@ -494,6 +496,75 @@
 	return ffs(word) - 1;
 }
 
+#else	/* CONFIG_ISA_ARCV2 */
+
+/*
+ * fls = Find Last Set in word
+ * @result: [1-32]
+ * fls(1) = 1, fls(0x80000000) = 32, fls(0) = 0
+ */
+static inline __attribute__ ((const)) int fls(unsigned long x)
+{
+	int n;
+
+	asm volatile(
+	"	fls.f	%0, %1		\n"  /* 0:31; 0(Z) if src 0 */
+	"	add.nz	%0, %0, 1	\n"  /* 0:31 -> 1:32 */
+	: "=r"(n)	/* Early clobber not needed */
+	: "r"(x)
+	: "cc");
+
+	return n;
+}
+
+/*
+ * __fls: Similar to fls, but zero based (0-31). Also 0 if no bit set
+ */
+static inline __attribute__ ((const)) int __fls(unsigned long x)
+{
+	/* FLS insn has exactly same semantics as the API */
+	return	__builtin_arc_fls(x);
+}
+
+/*
+ * ffs = Find First Set in word (LSB to MSB)
+ * @result: [1-32], 0 if all 0's
+ */
+static inline __attribute__ ((const)) int ffs(unsigned long x)
+{
+	int n;
+
+	asm volatile(
+	"	ffs.f	%0, %1		\n"  /* 0:31; 31(Z) if src 0 */
+	"	add.nz	%0, %0, 1	\n"  /* 0:31 -> 1:32 */
+	"	mov.z	%0, 0		\n"  /* 31(Z)-> 0 */
+	: "=r"(n)	/* Early clobber not needed */
+	: "r"(x)
+	: "cc");
+
+	return n;
+}
+
+/*
+ * __ffs: Similar to ffs, but zero based (0-31)
+ */
+static inline __attribute__ ((const)) int __ffs(unsigned long x)
+{
+	int n;
+
+	asm volatile(
+	"	ffs.f	%0, %1		\n"  /* 0:31; 31(Z) if src 0 */
+	"	mov.z	%0, 0		\n"  /* 31(Z)-> 0 */
+	: "=r"(n)
+	: "r"(x)
+	: "cc");
+
+	return n;
+
+}
+
+#endif	/* CONFIG_ISA_ARCOMPACT */
+
 /*
  * ffz = Find First Zero in word.
  * @return:[0-31], 32 if all 1's
diff --git a/arch/arc/include/asm/elf.h b/arch/arc/include/asm/elf.h
index a262828..51a99e2 100644
--- a/arch/arc/include/asm/elf.h
+++ b/arch/arc/include/asm/elf.h
@@ -15,6 +15,11 @@
 /* These ELF defines belong to uapi but libc elf.h already defines them */
 #define EM_ARCOMPACT		93
 
+#define EM_ARCV2		195	/* ARCv2 Cores */
+
+#define EM_ARC_INUSE		(IS_ENABLED(CONFIG_ISA_ARCOMPACT) ? \
+					EM_ARCOMPACT : EM_ARCV2)
+
 /* ARC Relocations (kernel Modules only) */
 #define  R_ARC_32		0x4
 #define  R_ARC_32_ME		0x1B
diff --git a/arch/arc/include/asm/entry-arcv2.h b/arch/arc/include/asm/entry-arcv2.h
new file mode 100644
index 0000000..b5ff87e
--- /dev/null
+++ b/arch/arc/include/asm/entry-arcv2.h
@@ -0,0 +1,190 @@
+
+#ifndef __ASM_ARC_ENTRY_ARCV2_H
+#define __ASM_ARC_ENTRY_ARCV2_H
+
+#include <asm/asm-offsets.h>
+#include <asm/irqflags-arcv2.h>
+#include <asm/thread_info.h>	/* For THREAD_SIZE */
+
+/*------------------------------------------------------------------------*/
+.macro INTERRUPT_PROLOGUE	called_from
+
+	; Before jumping to Interrupt Vector, hardware micro-ops did following:
+	;   1. SP auto-switched to kernel mode stack
+	;   2. STATUS32.Z flag set to U mode at time of interrupt (U:1, K:0)
+	;   3. Auto saved: r0-r11, blink, LPE,LPS,LPC, JLI,LDI,EI, PC, STAT32
+	;
+	; Now manually save: r12, sp, fp, gp, r25
+
+	PUSH	r12
+
+	; Saving pt_regs->sp correctly requires some extra work due to the way
+	; Auto stack switch works
+	;  - U mode: retrieve it from AUX_USER_SP
+	;  - K mode: add the offset from current SP where H/w starts auto push
+	;
+	; Utilize the fact that Z bit is set if Intr taken in U mode
+	mov.nz	r9, sp
+	add.nz	r9, r9, SZ_PT_REGS - PT_sp - 4
+	bnz	1f
+
+	lr	r9, [AUX_USER_SP]
+1:
+	PUSH	r9	; SP
+
+	PUSH	fp
+	PUSH	gp
+
+#ifdef CONFIG_ARC_CURR_IN_REG
+	PUSH	r25			; user_r25
+	GET_CURR_TASK_ON_CPU	r25
+#else
+	sub	sp, sp, 4
+#endif
+
+.ifnc \called_from, exception
+	sub	sp, sp, 12	; BTA/ECR/orig_r0 placeholder per pt_regs
+.endif
+
+.endm
+
+/*------------------------------------------------------------------------*/
+.macro INTERRUPT_EPILOGUE	called_from
+
+.ifnc \called_from, exception
+	add	sp, sp, 12	; skip BTA/ECR/orig_r0 placeholderss
+.endif
+
+#ifdef CONFIG_ARC_CURR_IN_REG
+	POP	r25
+#else
+	add	sp, sp, 4
+#endif
+
+	POP	gp
+	POP	fp
+
+	; Don't touch AUX_USER_SP if returning to K mode (Z bit set)
+	; (Z bit set on K mode is inverse of INTERRUPT_PROLOGUE)
+	add.z	sp, sp, 4
+	bz	1f
+
+	POPAX	AUX_USER_SP
+1:
+	POP	r12
+
+.endm
+
+/*------------------------------------------------------------------------*/
+.macro EXCEPTION_PROLOGUE
+
+	; Before jumping to Exception Vector, hardware micro-ops did following:
+	;   1. SP auto-switched to kernel mode stack
+	;   2. STATUS32.Z flag set to U mode at time of interrupt (U:1,K:0)
+	;
+	; Now manually save the complete reg file
+
+	PUSH	r9		; freeup a register: slot of erstatus
+
+	PUSHAX	eret
+	sub	sp, sp, 12	; skip JLI, LDI, EI
+	PUSH	lp_count
+	PUSHAX	lp_start
+	PUSHAX	lp_end
+	PUSH	blink
+
+	PUSH	r11
+	PUSH	r10
+
+	ld.as	r9,  [sp, 10]	; load stashed r9 (status32 stack slot)
+	lr	r10, [erstatus]
+	st.as	r10, [sp, 10]	; save status32 at it's right stack slot
+
+	PUSH	r9
+	PUSH	r8
+	PUSH	r7
+	PUSH	r6
+	PUSH	r5
+	PUSH	r4
+	PUSH	r3
+	PUSH	r2
+	PUSH	r1
+	PUSH	r0
+
+	; -- for interrupts, regs above are auto-saved by h/w in that order --
+	; Now do what ISR prologue does (manually save r12, sp, fp, gp, r25)
+	;
+	; Set Z flag if this was from U mode (expected by INTERRUPT_PROLOGUE)
+	; Although H/w exception micro-ops do set Z flag for U mode (just like
+	; for interrupts), it could get clobbered in case we soft land here from
+	; a TLB Miss exception handler (tlbex.S)
+
+	and	r10, r10, STATUS_U_MASK
+	xor.f	0, r10, STATUS_U_MASK
+
+	INTERRUPT_PROLOGUE  exception
+
+	PUSHAX	erbta
+	PUSHAX	ecr		; r9 contains ECR, expected by EV_Trap
+
+	PUSH	r0		; orig_r0
+.endm
+
+/*------------------------------------------------------------------------*/
+.macro EXCEPTION_EPILOGUE
+
+	; Assumes r0 has PT_status32
+	btst   r0, STATUS_U_BIT	; Z flag set if K, used in INTERRUPT_EPILOGUE
+
+	add	sp, sp, 8	; orig_r0/ECR don't need restoring
+	POPAX	erbta
+
+	INTERRUPT_EPILOGUE  exception
+
+	POP	r0
+	POP	r1
+	POP	r2
+	POP	r3
+	POP	r4
+	POP	r5
+	POP	r6
+	POP	r7
+	POP	r8
+	POP	r9
+	POP	r10
+	POP	r11
+
+	POP	blink
+	POPAX	lp_end
+	POPAX	lp_start
+
+	POP	r9
+	mov	lp_count, r9
+
+	add	sp, sp, 12	; skip JLI, LDI, EI
+	POPAX	eret
+	POPAX	erstatus
+
+	ld.as	r9, [sp, -12]	; reload r9 which got clobbered
+.endm
+
+.macro FAKE_RET_FROM_EXCPN
+	lr      r9, [status32]
+	bic     r9, r9, (STATUS_U_MASK|STATUS_DE_MASK|STATUS_AE_MASK)
+	or      r9, r9, (STATUS_L_MASK|STATUS_IE_MASK)
+	kflag   r9
+.endm
+
+/* Get thread_info of "current" tsk */
+.macro GET_CURR_THR_INFO_FROM_SP  reg
+	bmskn \reg, sp, THREAD_SHIFT - 1
+.endm
+
+/* Get CPU-ID of this core */
+.macro  GET_CPU_ID  reg
+	lr  \reg, [identity]
+	xbfu \reg, \reg, 0xE8	/* 00111    01000 */
+				/* M = 8-1  N = 8 */
+.endm
+
+#endif
diff --git a/arch/arc/include/asm/entry.h b/arch/arc/include/asm/entry.h
index f61032c..29d0ab6 100644
--- a/arch/arc/include/asm/entry.h
+++ b/arch/arc/include/asm/entry.h
@@ -16,7 +16,11 @@
 #include <asm/processor.h>	/* For VMALLOC_START */
 #include <asm/mmu.h>
 
+#ifdef CONFIG_ISA_ARCOMPACT
 #include <asm/entry-compact.h>	/* ISA specific bits */
+#else
+#include <asm/entry-arcv2.h>
+#endif
 
 /* Note on the LD/ST addr modes with addr reg wback
  *
diff --git a/arch/arc/include/asm/irq.h b/arch/arc/include/asm/irq.h
index f38652f..49014f0 100644
--- a/arch/arc/include/asm/irq.h
+++ b/arch/arc/include/asm/irq.h
@@ -13,8 +13,13 @@
 #define NR_IRQS		128 /* allow some CPU external IRQ handling */
 
 /* Platform Independent IRQs */
+#ifdef CONFIG_ISA_ARCOMPACT
 #define TIMER0_IRQ      3
 #define TIMER1_IRQ      4
+#else
+#define TIMER0_IRQ      16
+#define TIMER1_IRQ      17
+#endif
 
 #include <linux/interrupt.h>
 #include <asm-generic/irq.h>
diff --git a/arch/arc/include/asm/irqflags-arcv2.h b/arch/arc/include/asm/irqflags-arcv2.h
index c946c56..1eb41b0 100644
--- a/arch/arc/include/asm/irqflags-arcv2.h
+++ b/arch/arc/include/asm/irqflags-arcv2.h
@@ -27,6 +27,9 @@
 #define AUX_IRQ_SELECT		0x40b
 #define AUX_IRQ_ENABLE		0x40c
 
+/* Was Intr taken in User Mode */
+#define AUX_IRQ_ACT_BIT_U	31
+
 /* 0 is highest level, but taken by FIRQs, if present in design */
 #define ARCV2_IRQ_DEF_PRIO		0
 
diff --git a/arch/arc/include/asm/irqflags-compact.h b/arch/arc/include/asm/irqflags-compact.h
index 18f3634..aa80557 100644
--- a/arch/arc/include/asm/irqflags-compact.h
+++ b/arch/arc/include/asm/irqflags-compact.h
@@ -39,6 +39,8 @@
 #define AUX_ITRIGGER		0x40d
 #define AUX_IPULSE		0x415
 
+#define ISA_INIT_STATUS_BITS	STATUS_IE_MASK
+
 #ifndef __ASSEMBLY__
 
 /******************************************************************
diff --git a/arch/arc/include/asm/irqflags.h b/arch/arc/include/asm/irqflags.h
index 3339726..59bc6a6 100644
--- a/arch/arc/include/asm/irqflags.h
+++ b/arch/arc/include/asm/irqflags.h
@@ -10,6 +10,10 @@
 #ifndef __ASM_ARC_IRQFLAGS_H
 #define __ASM_ARC_IRQFLAGS_H
 
+#ifdef CONFIG_ISA_ARCOMPACT
 #include <asm/irqflags-compact.h>
+#else
+#include <asm/irqflags-arcv2.h>
+#endif
 
 #endif
diff --git a/arch/arc/include/asm/ptrace.h b/arch/arc/include/asm/ptrace.h
index 1bfeec2..9175597 100644
--- a/arch/arc/include/asm/ptrace.h
+++ b/arch/arc/include/asm/ptrace.h
@@ -16,6 +16,7 @@
 
 /* THE pt_regs: Defines how regs are saved during entry into kernel */
 
+#ifdef CONFIG_ISA_ARCOMPACT
 struct pt_regs {
 
 	/* Real registers */
@@ -56,6 +57,48 @@
 
 	long user_r25;
 };
+#else
+
+struct pt_regs {
+
+	long orig_r0;
+
+	union {
+		struct {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+			unsigned long state:8, ecr_vec:8,
+				      ecr_cause:8, ecr_param:8;
+#else
+			unsigned long ecr_param:8, ecr_cause:8,
+				      ecr_vec:8, state:8;
+#endif
+		};
+		unsigned long event;
+	};
+
+	long bta;	/* bta_l1, bta_l2, erbta */
+
+	long user_r25;
+
+	long r26;	/* gp */
+	long fp;
+	long sp;	/* user/kernel sp depending on where we came from  */
+
+	long r12;
+
+	/*------- Below list auto saved by h/w -----------*/
+	long r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11;
+
+	long blink;
+	long lp_end, lp_start, lp_count;
+
+	long ei, ldi, jli;
+
+	long ret;
+	long status32;
+};
+
+#endif
 
 /* Callee saved registers - need to be saved only when you are scheduled out */
 
diff --git a/arch/arc/include/asm/thread_info.h b/arch/arc/include/asm/thread_info.h
index aca0d5a..3af6745 100644
--- a/arch/arc/include/asm/thread_info.h
+++ b/arch/arc/include/asm/thread_info.h
@@ -25,6 +25,7 @@
 #endif
 
 #define THREAD_SIZE     (PAGE_SIZE << THREAD_SIZE_ORDER)
+#define THREAD_SHIFT	(PAGE_SHIFT << THREAD_SIZE_ORDER)
 
 #ifndef __ASSEMBLY__