powerpc/time: Optimise decrementer_check_overflow

decrementer_check_overflow is called from arch_local_irq_restore so
we want to make it as light weight as possible. As such, turn
decrementer_check_overflow into an inline function.

To avoid a circular mess of includes, separate out the two components
of struct decrementer_clock and keep the struct clock_event_device
part local to time.c.

The fast path improves from:

arch_local_irq_restore
     0:       mflr    r0
     4:       std     r0,16(r1)
     8:       stdu    r1,-112(r1)
     c:       stb     r3,578(r13)
    10:       cmpdi   cr7,r3,0
    14:       beq-    cr7,24 <.arch_local_irq_restore+0x24>
...
    24:       addi    r1,r1,112
    28:       ld      r0,16(r1)
    2c:       mtlr    r0
    30:       blr

to:

arch_local_irq_restore
    0:       std     r30,-16(r1)
    4:       ld      r30,0(r2)
    8:       stb     r3,578(r13)
    c:       cmpdi   cr7,r3,0
   10:       beq-    cr7,6c <.arch_local_irq_restore+0x6c>
...
   6c:       ld      r30,-16(r1)
   70:       blr

Unfortunately we still setup a local TOC (due to -mminimal-toc). Yet
another sign we should be moving to -mcmodel=medium.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h
index bc3c745..7eb10fb 100644
--- a/arch/powerpc/include/asm/time.h
+++ b/arch/powerpc/include/asm/time.h
@@ -219,7 +219,7 @@
 extern void secondary_cpu_time_init(void);
 extern void iSeries_time_init_early(void);
 
-extern void decrementer_check_overflow(void);
+DECLARE_PER_CPU(u64, decrementers_next_tb);
 
 #endif /* __KERNEL__ */
 #endif /* __POWERPC_TIME_H */
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 745c1e7..2ff4f5e 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -115,6 +115,15 @@
 	: : "r" (enable), "i" (offsetof(struct paca_struct, soft_enabled)));
 }
 
+static inline notrace void decrementer_check_overflow(void)
+{
+	u64 now = get_tb_or_rtc();
+	u64 *next_tb = &__get_cpu_var(decrementers_next_tb);
+
+	if (now >= *next_tb)
+		set_dec(1);
+}
+
 notrace void arch_local_irq_restore(unsigned long en)
 {
 	/*
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index b1990b9..9754743 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -114,12 +114,8 @@
 	.features       = CLOCK_EVT_FEAT_ONESHOT,
 };
 
-struct decrementer_clock {
-	struct clock_event_device event;
-	u64 next_tb;
-};
-
-static DEFINE_PER_CPU(struct decrementer_clock, decrementers);
+DEFINE_PER_CPU(u64, decrementers_next_tb);
+static DEFINE_PER_CPU(struct clock_event_device, decrementers);
 
 #ifdef CONFIG_PPC_ISERIES
 static unsigned long __initdata iSeries_recal_titan;
@@ -570,8 +566,8 @@
 void timer_interrupt(struct pt_regs * regs)
 {
 	struct pt_regs *old_regs;
-	struct decrementer_clock *decrementer =  &__get_cpu_var(decrementers);
-	struct clock_event_device *evt = &decrementer->event;
+	u64 *next_tb = &__get_cpu_var(decrementers_next_tb);
+	struct clock_event_device *evt = &__get_cpu_var(decrementers);
 
 	/* Ensure a positive value is written to the decrementer, or else
 	 * some CPUs will continue to take decrementer exceptions.
@@ -606,7 +602,7 @@
 		get_lppaca()->int_dword.fields.decr_int = 0;
 #endif
 
-	decrementer->next_tb = ~(u64)0;
+	*next_tb = ~(u64)0;
 	if (evt->event_handler)
 		evt->event_handler(evt);
 
@@ -872,19 +868,10 @@
 	       clock->name, clock->mult, clock->shift);
 }
 
-void decrementer_check_overflow(void)
-{
-	u64 now = get_tb_or_rtc();
-	struct decrementer_clock *decrementer = &__get_cpu_var(decrementers);
-
-	if (now >= decrementer->next_tb)
-		set_dec(1);
-}
-
 static int decrementer_set_next_event(unsigned long evt,
 				      struct clock_event_device *dev)
 {
-	__get_cpu_var(decrementers).next_tb = get_tb_or_rtc() + evt;
+	__get_cpu_var(decrementers_next_tb) = get_tb_or_rtc() + evt;
 	set_dec(evt);
 	return 0;
 }
@@ -898,7 +885,7 @@
 
 static void register_decrementer_clockevent(int cpu)
 {
-	struct clock_event_device *dec = &per_cpu(decrementers, cpu).event;
+	struct clock_event_device *dec = &per_cpu(decrementers, cpu);
 
 	*dec = decrementer_clockevent;
 	dec->cpumask = cpumask_of(cpu);