sh: intc: IRQ auto-distribution support.

This implements support for hardware-managed IRQ balancing as implemented
by SH-X3 cores (presently only hooked up for SH7786, but can probably be
carried over to other SH-X3 cores, too).

CPUs need to specify their distribution register along with the mask
definitions, as these follow the same format. Peripheral IRQs that don't
opt out of balancing will be automatically distributed at the whim of the
hardware block, while each CPU needs to verify whether it is handling the
IRQ or not, especially before clearing the mask.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
diff --git a/arch/sh/include/asm/irq.h b/arch/sh/include/asm/irq.h
index df8e150..99c593b 100644
--- a/arch/sh/include/asm/irq.h
+++ b/arch/sh/include/asm/irq.h
@@ -12,6 +12,14 @@
 #define NR_IRQS_LEGACY		8	/* Legacy external IRQ0-7 */
 
 /*
+ * This is a special IRQ number for indicating that no IRQ has been
+ * triggered and to simply ignore the IRQ dispatch. This is a special
+ * case that can happen with IRQ auto-distribution when multiple CPUs
+ * are woken up and signalled in parallel.
+ */
+#define NO_IRQ_IGNORE		((unsigned int)-1)
+
+/*
  * Convert back and forth between INTEVT and IRQ values.
  */
 #ifdef CONFIG_CPU_HAS_INTEVT
@@ -53,6 +61,14 @@
 # define irq_ctx_exit(cpu) do { } while (0)
 #endif
 
+#ifdef CONFIG_INTC_BALANCING
+extern unsigned int irq_lookup(unsigned int irq);
+extern void irq_finish(unsigned int irq);
+#else
+#define irq_lookup(irq)		(irq)
+#define irq_finish(irq)		do { } while (0)
+#endif
+
 #include <asm-generic/irq.h>
 #ifdef CONFIG_CPU_SH5
 #include <cpu/irq.h>
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7786.c b/arch/sh/kernel/cpu/sh4a/setup-sh7786.c
index 235edf8..d733603 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7786.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7786.c
@@ -573,7 +573,6 @@
 	&usb_ohci_device,
 };
 
-
 /*
  * Please call this function if your platform board
  * use external clock for USB
@@ -581,6 +580,7 @@
 #define USBCTL0		0xffe70858
 #define CLOCK_MODE_MASK 0xffffff7f
 #define EXT_CLOCK_MODE  0x00000080
+
 void __init sh7786_usb_use_exclock(void)
 {
 	u32 val = __raw_readl(USBCTL0) & CLOCK_MODE_MASK;
@@ -598,6 +598,7 @@
 #define PLL_ENB		0x00000002
 #define PHY_RST		0x00000004
 #define ACT_PLL_STATUS	0xc0000000
+
 static void __init sh7786_usb_setup(void)
 {
 	int i = 1000000;
@@ -753,9 +754,19 @@
 #define INTMSK2		0xfe410068
 #define INTMSKCLR2	0xfe41006c
 
+#define INTDISTCR0	0xfe4100b0
+#define INTDISTCR1	0xfe4100b4
+#define INTACK		0xfe4100b8
+#define INTACKCLR	0xfe4100bc
+#define INT2DISTCR0	0xfe410900
+#define INT2DISTCR1	0xfe410904
+#define INT2DISTCR2	0xfe410908
+#define INT2DISTCR3	0xfe41090c
+
 static struct intc_mask_reg mask_registers[] __initdata = {
 	{ CnINTMSK0, CnINTMSKCLR0, 32,
-	  { IRQ0, IRQ1, IRQ2, IRQ3, IRQ4, IRQ5, IRQ6, IRQ7 } },
+	  { IRQ0, IRQ1, IRQ2, IRQ3, IRQ4, IRQ5, IRQ6, IRQ7 },
+	    INTC_SMP_BALANCING(INTDISTCR0) },
 	{ INTMSK2, INTMSKCLR2, 32,
 	  { IRL0_LLLL, IRL0_LLLH, IRL0_LLHL, IRL0_LLHH,
 	    IRL0_LHLL, IRL0_LHLH, IRL0_LHHL, IRL0_LHHH,
@@ -767,7 +778,8 @@
 	    IRL4_HHLL, IRL4_HHLH, IRL4_HHHL, 0, } },
 	{ CnINT2MSKR0, CnINT2MSKCR0 , 32,
 	  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-	    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, WDT } },
+	    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, WDT },
+	    INTC_SMP_BALANCING(INT2DISTCR0) },
 	{ CnINT2MSKR1, CnINT2MSKCR1, 32,
 	  { TMU0_0, TMU0_1, TMU0_2, TMU0_3, TMU1_0, TMU1_1, TMU1_2, 0,
 	    DMAC0_0, DMAC0_1, DMAC0_2, DMAC0_3, DMAC0_4, DMAC0_5, DMAC0_6,
@@ -776,14 +788,14 @@
 	    HPB_0, HPB_1, HPB_2,
 	    SCIF0_0, SCIF0_1, SCIF0_2, SCIF0_3,
 	    SCIF1,
-	    TMU2, TMU3, 0, } },
+	    TMU2, TMU3, 0, }, INTC_SMP_BALANCING(INT2DISTCR1) },
 	{ CnINT2MSKR2, CnINT2MSKCR2, 32,
 	  { 0, 0, SCIF2, SCIF3, SCIF4, SCIF5,
 	    Eth_0, Eth_1,
 	    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 	    PCIeC0_0, PCIeC0_1, PCIeC0_2,
 	    PCIeC1_0, PCIeC1_1, PCIeC1_2,
-	    USB, 0, 0 } },
+	    USB, 0, 0 }, INTC_SMP_BALANCING(INT2DISTCR2) },
 	{ CnINT2MSKR3, CnINT2MSKCR3, 32,
 	  { 0, 0, 0, 0, 0, 0,
 	    I2C0, I2C1,
@@ -792,7 +804,7 @@
 	    HAC0, HAC1,
 	    FLCTL, 0,
 	    HSPI, GPIO0, GPIO1, Thermal,
-	    0, 0, 0, 0, 0, 0, 0, 0 } },
+	    0, 0, 0, 0, 0, 0, 0, 0 }, INTC_SMP_BALANCING(INT2DISTCR3) },
 };
 
 static struct intc_prio_reg prio_registers[] __initdata = {
@@ -910,6 +922,18 @@
 #define INTC_INTMSKCLR2	INTMSKCLR2
 #define INTC_USERIMASK	0xfe411000
 
+#ifdef CONFIG_INTC_BALANCING
+unsigned int irq_lookup(unsigned int irq)
+{
+	return __raw_readl(INTACK) & 1 ? irq : NO_IRQ_IGNORE;
+}
+
+void irq_finish(unsigned int irq)
+{
+	__raw_writel(irq2evt(irq), INTACKCLR);
+}
+#endif
+
 void __init plat_irq_setup(void)
 {
 	/* disable IRQ3-0 + IRQ7-4 */
diff --git a/arch/sh/kernel/irq.c b/arch/sh/kernel/irq.c
index d2d41d0..f6a9319 100644
--- a/arch/sh/kernel/irq.c
+++ b/arch/sh/kernel/irq.c
@@ -113,19 +113,14 @@
 
 static union irq_ctx *hardirq_ctx[NR_CPUS] __read_mostly;
 static union irq_ctx *softirq_ctx[NR_CPUS] __read_mostly;
-#endif
 
-asmlinkage __irq_entry int do_IRQ(unsigned int irq, struct pt_regs *regs)
+static char softirq_stack[NR_CPUS * THREAD_SIZE] __page_aligned_bss;
+static char hardirq_stack[NR_CPUS * THREAD_SIZE] __page_aligned_bss;
+
+static inline void handle_one_irq(unsigned int irq)
 {
-	struct pt_regs *old_regs = set_irq_regs(regs);
-#ifdef CONFIG_IRQSTACKS
 	union irq_ctx *curctx, *irqctx;
-#endif
 
-	irq_enter();
-	irq = irq_demux(irq);
-
-#ifdef CONFIG_IRQSTACKS
 	curctx = (union irq_ctx *)current_thread_info();
 	irqctx = hardirq_ctx[smp_processor_id()];
 
@@ -164,20 +159,9 @@
 			  "r5", "r6", "r7", "r8", "t", "pr"
 		);
 	} else
-#endif
 		generic_handle_irq(irq);
-
-	irq_exit();
-
-	set_irq_regs(old_regs);
-	return 1;
 }
 
-#ifdef CONFIG_IRQSTACKS
-static char softirq_stack[NR_CPUS * THREAD_SIZE] __page_aligned_bss;
-
-static char hardirq_stack[NR_CPUS * THREAD_SIZE] __page_aligned_bss;
-
 /*
  * allocate per-cpu stacks for hardirq and for softirq processing
  */
@@ -257,8 +241,33 @@
 
 	local_irq_restore(flags);
 }
+#else
+static inline void handle_one_irq(unsigned int irq)
+{
+	generic_handle_irq(irq);
+}
 #endif
 
+asmlinkage __irq_entry int do_IRQ(unsigned int irq, struct pt_regs *regs)
+{
+	struct pt_regs *old_regs = set_irq_regs(regs);
+
+	irq_enter();
+
+	irq = irq_demux(irq_lookup(irq));
+
+	if (irq != NO_IRQ_IGNORE) {
+		handle_one_irq(irq);
+		irq_finish(irq);
+	}
+
+	irq_exit();
+
+	set_irq_regs(old_regs);
+
+	return IRQ_HANDLED;
+}
+
 void __init init_IRQ(void)
 {
 	plat_irq_setup();
diff --git a/drivers/sh/Kconfig b/drivers/sh/Kconfig
index 22c3cda..a54de0b 100644
--- a/drivers/sh/Kconfig
+++ b/drivers/sh/Kconfig
@@ -11,3 +11,14 @@
 	  drivers that are using special priority levels.
 
 	  If in doubt, say N.
+
+config INTC_BALANCING
+	bool "Hardware IRQ balancing support"
+	depends on SMP && SUPERH && CPU_SUBTYPE_SH7786
+	help
+	  This enables support for IRQ auto-distribution mode on SH-X3
+	  SMP parts. All of the balancing and CPU wakeup decisions are
+	  taken care of automatically by hardware for distributed
+	  vectors.
+
+	  If in doubt, say N.
diff --git a/drivers/sh/intc.c b/drivers/sh/intc.c
index 77d10ac..dcb4c83 100644
--- a/drivers/sh/intc.c
+++ b/drivers/sh/intc.c
@@ -98,6 +98,9 @@
 static unsigned int intc_prio_level[NR_IRQS];	/* for now */
 static unsigned int default_prio_level = 2;	/* 2 - 16 */
 static unsigned long ack_handle[NR_IRQS];
+#ifdef CONFIG_INTC_BALANCING
+static unsigned long dist_handle[NR_IRQS];
+#endif
 
 static inline struct intc_desc_int *get_intc_desc(unsigned int irq)
 {
@@ -105,6 +108,47 @@
 	return container_of(chip, struct intc_desc_int, chip);
 }
 
+static unsigned long intc_phys_to_virt(struct intc_desc_int *d,
+				       unsigned long address)
+{
+	struct intc_window *window;
+	int k;
+
+	/* scan through physical windows and convert address */
+	for (k = 0; k < d->nr_windows; k++) {
+		window = d->window + k;
+
+		if (address < window->phys)
+			continue;
+
+		if (address >= (window->phys + window->size))
+			continue;
+
+		address -= window->phys;
+		address += (unsigned long)window->virt;
+
+		return address;
+	}
+
+	/* no windows defined, register must be 1:1 mapped virt:phys */
+	return address;
+}
+
+static unsigned int intc_get_reg(struct intc_desc_int *d, unsigned long address)
+{
+	unsigned int k;
+
+	address = intc_phys_to_virt(d, address);
+
+	for (k = 0; k < d->nr_reg; k++) {
+		if (d->reg[k] == address)
+			return k;
+	}
+
+	BUG();
+	return 0;
+}
+
 static inline unsigned int set_field(unsigned int value,
 				     unsigned int field_value,
 				     unsigned int handle)
@@ -238,6 +282,85 @@
 	[MODE_PCLR_REG] = intc_mode_field,
 };
 
+#ifdef CONFIG_INTC_BALANCING
+static inline void intc_balancing_enable(unsigned int irq)
+{
+	struct intc_desc_int *d = get_intc_desc(irq);
+	unsigned long handle = dist_handle[irq];
+	unsigned long addr;
+
+	if (irq_balancing_disabled(irq) || !handle)
+		return;
+
+	addr = INTC_REG(d, _INTC_ADDR_D(handle), 0);
+	intc_reg_fns[_INTC_FN(handle)](addr, handle, 1);
+}
+
+static inline void intc_balancing_disable(unsigned int irq)
+{
+	struct intc_desc_int *d = get_intc_desc(irq);
+	unsigned long handle = dist_handle[irq];
+	unsigned long addr;
+
+	if (irq_balancing_disabled(irq) || !handle)
+		return;
+
+	addr = INTC_REG(d, _INTC_ADDR_D(handle), 0);
+	intc_reg_fns[_INTC_FN(handle)](addr, handle, 0);
+}
+
+static unsigned int intc_dist_data(struct intc_desc *desc,
+				   struct intc_desc_int *d,
+				   intc_enum enum_id)
+{
+	struct intc_mask_reg *mr = desc->hw.mask_regs;
+	unsigned int i, j, fn, mode;
+	unsigned long reg_e, reg_d;
+
+	for (i = 0; mr && enum_id && i < desc->hw.nr_mask_regs; i++) {
+		mr = desc->hw.mask_regs + i;
+
+		/*
+		 * Skip this entry if there's no auto-distribution
+		 * register associated with it.
+		 */
+		if (!mr->dist_reg)
+			continue;
+
+		for (j = 0; j < ARRAY_SIZE(mr->enum_ids); j++) {
+			if (mr->enum_ids[j] != enum_id)
+				continue;
+
+			fn = REG_FN_MODIFY_BASE;
+			mode = MODE_ENABLE_REG;
+			reg_e = mr->dist_reg;
+			reg_d = mr->dist_reg;
+
+			fn += (mr->reg_width >> 3) - 1;
+			return _INTC_MK(fn, mode,
+					intc_get_reg(d, reg_e),
+					intc_get_reg(d, reg_d),
+					1,
+					(mr->reg_width - 1) - j);
+		}
+	}
+
+	/*
+	 * It's possible we've gotten here with no distribution options
+	 * available for the IRQ in question, so we just skip over those.
+	 */
+	return 0;
+}
+#else
+static inline void intc_balancing_enable(unsigned int irq)
+{
+}
+
+static inline void intc_balancing_disable(unsigned int irq)
+{
+}
+#endif
+
 static inline void _intc_enable(unsigned int irq, unsigned long handle)
 {
 	struct intc_desc_int *d = get_intc_desc(irq);
@@ -253,6 +376,8 @@
 		intc_enable_fns[_INTC_MODE(handle)](addr, handle, intc_reg_fns\
 						    [_INTC_FN(handle)], irq);
 	}
+
+	intc_balancing_enable(irq);
 }
 
 static void intc_enable(unsigned int irq)
@@ -263,10 +388,12 @@
 static void intc_disable(unsigned int irq)
 {
 	struct intc_desc_int *d = get_intc_desc(irq);
-	unsigned long handle = (unsigned long) get_irq_chip_data(irq);
+	unsigned long handle = (unsigned long)get_irq_chip_data(irq);
 	unsigned long addr;
 	unsigned int cpu;
 
+	intc_balancing_disable(irq);
+
 	for (cpu = 0; cpu < SMP_NR(d, _INTC_ADDR_D(handle)); cpu++) {
 #ifdef CONFIG_SMP
 		if (!cpumask_test_cpu(cpu, irq_to_desc(irq)->affinity))
@@ -345,8 +472,7 @@
 
 	intc_disable(irq);
 
-	/* read register and write zero only to the assocaited bit */
-
+	/* read register and write zero only to the associated bit */
 	if (handle) {
 		addr = INTC_REG(d, _INTC_ADDR_D(handle), 0);
 		switch (_INTC_FN(handle)) {
@@ -375,7 +501,8 @@
 {
 	int i;
 
-	/* this doesn't scale well, but...
+	/*
+	 * this doesn't scale well, but...
 	 *
 	 * this function should only be used for cerain uncommon
 	 * operations such as intc_set_priority() and intc_set_sense()
@@ -386,7 +513,6 @@
 	 * memory footprint down is to make sure the array is sorted
 	 * and then perform a bisect to lookup the irq.
 	 */
-
 	for (i = 0; i < nr_hp; i++) {
 		if ((hp + i)->irq != irq)
 			continue;
@@ -417,7 +543,6 @@
 		 * primary masking method is using intc_prio_level[irq]
 		 * priority level will be set during next enable()
 		 */
-
 		if (_INTC_FN(ihp->handle) != REG_FN_ERR)
 			_intc_enable(irq, ihp->handle);
 	}
@@ -456,48 +581,6 @@
 	return 0;
 }
 
-static unsigned long intc_phys_to_virt(struct intc_desc_int *d,
-				       unsigned long address)
-{
-	struct intc_window *window;
-	int k;
-
-	/* scan through physical windows and convert address */
-	for (k = 0; k < d->nr_windows; k++) {
-		window = d->window + k;
-
-		if (address < window->phys)
-			continue;
-
-		if (address >= (window->phys + window->size))
-			continue;
-
-		address -= window->phys;
-		address += (unsigned long)window->virt;
-
-		return address;
-	}
-
-	/* no windows defined, register must be 1:1 mapped virt:phys */
-	return address;
-}
-
-static unsigned int __init intc_get_reg(struct intc_desc_int *d,
-					unsigned long address)
-{
-	unsigned int k;
-
-	address = intc_phys_to_virt(d, address);
-
-	for (k = 0; k < d->nr_reg; k++) {
-		if (d->reg[k] == address)
-			return k;
-	}
-
-	BUG();
-	return 0;
-}
-
 static intc_enum __init intc_grp_id(struct intc_desc *desc,
 				    intc_enum enum_id)
 {
@@ -755,13 +838,14 @@
 	 */
 	set_bit(irq, intc_irq_map);
 
-	/* Prefer single interrupt source bitmap over other combinations:
+	/*
+	 * Prefer single interrupt source bitmap over other combinations:
+	 *
 	 * 1. bitmap, single interrupt source
 	 * 2. priority, single interrupt source
 	 * 3. bitmap, multiple interrupt sources (groups)
 	 * 4. priority, multiple interrupt sources (groups)
 	 */
-
 	data[0] = intc_mask_data(desc, d, enum_id, 0);
 	data[1] = intc_prio_data(desc, d, enum_id, 0);
 
@@ -786,7 +870,8 @@
 				      handle_level_irq, "level");
 	set_irq_chip_data(irq, (void *)data[primary]);
 
-	/* set priority level
+	/*
+	 * set priority level
 	 * - this needs to be at least 2 for 5-bit priorities on 7780
 	 */
 	intc_prio_level[irq] = default_prio_level;
@@ -806,7 +891,6 @@
 			 * only secondary priority should access registers, so
 			 * set _INTC_FN(h) = REG_FN_ERR for intc_set_priority()
 			 */
-
 			hp->handle &= ~_INTC_MK(0x0f, 0, 0, 0, 0, 0);
 			hp->handle |= _INTC_MK(REG_FN_ERR, 0, 0, 0, 0, 0);
 		}
@@ -827,6 +911,11 @@
 	if (desc->hw.ack_regs)
 		ack_handle[irq] = intc_ack_data(desc, d, enum_id);
 
+#ifdef CONFIG_INTC_BALANCING
+	if (desc->hw.mask_regs)
+		dist_handle[irq] = intc_dist_data(desc, d, enum_id);
+#endif
+
 #ifdef CONFIG_ARM
 	set_irq_flags(irq, IRQF_VALID); /* Enable IRQ on ARM systems */
 #endif
@@ -892,6 +981,10 @@
 	}
 
 	d->nr_reg = hw->mask_regs ? hw->nr_mask_regs * 2 : 0;
+#ifdef CONFIG_INTC_BALANCING
+	if (d->nr_reg)
+		d->nr_reg += hw->nr_mask_regs;
+#endif
 	d->nr_reg += hw->prio_regs ? hw->nr_prio_regs * 2 : 0;
 	d->nr_reg += hw->sense_regs ? hw->nr_sense_regs : 0;
 	d->nr_reg += hw->ack_regs ? hw->nr_ack_regs : 0;
@@ -912,6 +1005,9 @@
 			smp = IS_SMP(hw->mask_regs[i]);
 			k += save_reg(d, k, hw->mask_regs[i].set_reg, smp);
 			k += save_reg(d, k, hw->mask_regs[i].clr_reg, smp);
+#ifdef CONFIG_INTC_BALANCING
+			k += save_reg(d, k, hw->mask_regs[i].dist_reg, 0);
+#endif
 		}
 	}
 
diff --git a/include/linux/sh_intc.h b/include/linux/sh_intc.h
index f0e8cca..0d6cd38 100644
--- a/include/linux/sh_intc.h
+++ b/include/linux/sh_intc.h
@@ -23,6 +23,9 @@
 struct intc_mask_reg {
 	unsigned long set_reg, clr_reg, reg_width;
 	intc_enum enum_ids[32];
+#ifdef CONFIG_INTC_BALANCING
+	unsigned long dist_reg;
+#endif
 #ifdef CONFIG_SMP
 	unsigned long smp;
 #endif
@@ -41,8 +44,14 @@
 	intc_enum enum_ids[16];
 };
 
+#ifdef CONFIG_INTC_BALANCING
+#define INTC_SMP_BALANCING(reg)	.dist_reg = (reg)
+#else
+#define INTC_SMP_BALANCING(reg)
+#endif
+
 #ifdef CONFIG_SMP
-#define INTC_SMP(stride, nr) .smp = (stride) | ((nr) << 8)
+#define INTC_SMP(stride, nr)	.smp = (stride) | ((nr) << 8)
 #else
 #define INTC_SMP(stride, nr)
 #endif