sparc64: Store per-cpu offset in trap_block[]
Surprisingly this actually makes LOAD_PER_CPU_BASE() a little
more efficient.
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/arch/sparc/include/asm/trap_block.h b/arch/sparc/include/asm/trap_block.h
index 68fd9ee..7e26b2d 100644
--- a/arch/sparc/include/asm/trap_block.h
+++ b/arch/sparc/include/asm/trap_block.h
@@ -48,7 +48,7 @@
unsigned int dev_mondo_qmask;
unsigned int resum_qmask;
unsigned int nonresum_qmask;
- unsigned long __unused;
+ unsigned long __per_cpu_base;
} __attribute__((aligned(64)));
extern struct trap_per_cpu trap_block[NR_CPUS];
extern void init_cur_cpu_trap(struct thread_info *);
@@ -101,6 +101,7 @@
#define TRAP_PER_CPU_DEV_MONDO_QMASK 0xec
#define TRAP_PER_CPU_RESUM_QMASK 0xf0
#define TRAP_PER_CPU_NONRESUM_QMASK 0xf4
+#define TRAP_PER_CPU_PER_CPU_BASE 0xf8
#define TRAP_BLOCK_SZ_SHIFT 8
@@ -172,12 +173,11 @@
*/
#define LOAD_PER_CPU_BASE(DEST, THR, REG1, REG2, REG3) \
lduh [THR + TI_CPU], REG1; \
- sethi %hi(__per_cpu_shift), REG3; \
- sethi %hi(__per_cpu_base), REG2; \
- ldx [REG3 + %lo(__per_cpu_shift)], REG3; \
- ldx [REG2 + %lo(__per_cpu_base)], REG2; \
- sllx REG1, REG3, REG3; \
- add REG3, REG2, DEST;
+ sethi %hi(trap_block), REG2; \
+ sllx REG1, TRAP_BLOCK_SZ_SHIFT, REG1; \
+ or REG2, %lo(trap_block), REG2; \
+ add REG2, REG1, REG2; \
+ ldx [REG2 + TRAP_PER_CPU_PER_CPU_BASE], DEST;
#else