s390/pci: use adapter interrupt vector helpers

Make use of the adapter interrupt helpers in the PCI code. This is
the first step to convert the MSI interrupt code to PCI domains.
The patch removes the limitation of 64 adapter interrupts per
PCI function.

Reviewed-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 5b8feff..27e7fed 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -42,7 +42,6 @@
 #define	SIC_IRQ_MODE_SINGLE		1
 
 #define ZPCI_NR_DMA_SPACES		1
-#define ZPCI_MSI_VEC_BITS		6
 #define ZPCI_NR_DEVICES			CONFIG_PCI_NR_FUNCTIONS
 
 /* list of all detected zpci devices */
@@ -62,25 +61,13 @@
 };
 
 struct zdev_irq_map {
-	unsigned long	aibv;		/* AI bit vector */
-	int		msi_vecs;	/* consecutive MSI-vectors used */
-	int		__unused;
-	struct callback	cb[ZPCI_NR_MSI_VECS]; /* callback handler array */
-	spinlock_t	lock;		/* protect callbacks against de-reg */
+	struct airq_iv *aibv;		/* Adapter interrupt bit vector */
+	struct callback	*cb;		/* callback handler array */
+	int msi_vecs;			/* consecutive MSI-vectors used */
 };
 
-struct intr_bucket {
-	/* amap of adapters, one bit per dev, corresponds to one irq nr */
-	unsigned long	*alloc;
-	/* AI summary bit, global page for all devices */
-	unsigned long	*aisb;
-	/* pointer to aibv and callback data in zdev */
-	struct zdev_irq_map *imap[ZPCI_NR_DEVICES];
-	/* protects the whole bucket struct */
-	spinlock_t	lock;
-};
-
-static struct intr_bucket *bucket;
+static struct airq_iv *zpci_aisb_iv;
+static struct zdev_irq_map *zpci_imap[ZPCI_NR_DEVICES];
 
 /* Adapter interrupt definitions */
 static void zpci_irq_handler(struct airq_struct *airq);
@@ -96,15 +83,12 @@
 struct zpci_iomap_entry *zpci_iomap_start;
 EXPORT_SYMBOL_GPL(zpci_iomap_start);
 
-/* highest irq summary bit */
-static int __read_mostly aisb_max;
-
 static struct kmem_cache *zdev_irq_cache;
 static struct kmem_cache *zdev_fmb_cache;
 
 static inline int irq_to_msi_nr(unsigned int irq)
 {
-	return irq & ZPCI_MSI_MASK;
+	return irq & ZPCI_MSI_VEC_MASK;
 }
 
 static inline int irq_to_dev_nr(unsigned int irq)
@@ -112,11 +96,6 @@
 	return irq >> ZPCI_MSI_VEC_BITS;
 }
 
-static inline struct zdev_irq_map *get_imap(unsigned int irq)
-{
-	return bucket->imap[irq_to_dev_nr(irq)];
-}
-
 struct zpci_dev *get_zdev(struct pci_dev *pdev)
 {
 	return (struct zpci_dev *) pdev->sysdata;
@@ -160,8 +139,7 @@
 EXPORT_SYMBOL_GPL(pci_proc_domain);
 
 /* Modify PCI: Register adapter interruptions */
-static int zpci_register_airq(struct zpci_dev *zdev, unsigned int aisb,
-			      u64 aibv)
+static int zpci_set_airq(struct zpci_dev *zdev)
 {
 	u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_REG_INT);
 	struct zpci_fib *fib;
@@ -172,12 +150,12 @@
 		return -ENOMEM;
 
 	fib->isc = PCI_ISC;
-	fib->noi = zdev->irq_map->msi_vecs;
 	fib->sum = 1;		/* enable summary notifications */
-	fib->aibv = aibv;
-	fib->aibvo = 0;		/* every function has its own page */
-	fib->aisb = (u64) bucket->aisb + aisb / 8;
-	fib->aisbo = aisb & ZPCI_MSI_MASK;
+	fib->noi = airq_iv_end(zdev->aibv);
+	fib->aibv = (unsigned long) zdev->aibv->vector;
+	fib->aibvo = 0;		/* each zdev has its own interrupt vector */
+	fib->aisb = (unsigned long) zpci_aisb_iv->vector + (zdev->aisb/64)*8;
+	fib->aisbo = zdev->aisb & 63;
 
 	rc = zpci_mod_fc(req, fib);
 	pr_debug("%s mpcifc returned noi: %d\n", __func__, fib->noi);
@@ -234,7 +212,7 @@
 }
 
 /* Modify PCI: Unregister adapter interruptions */
-static int zpci_unregister_airq(struct zpci_dev *zdev)
+static int zpci_clear_airq(struct zpci_dev *zdev)
 {
 	struct mod_pci_args args = { 0, 0, 0, 0 };
 
@@ -404,152 +382,171 @@
 	.write = pci_write,
 };
 
-/* store the last handled bit to implement fair scheduling of devices */
-static DEFINE_PER_CPU(unsigned long, next_sbit);
-
 static void zpci_irq_handler(struct airq_struct *airq)
 {
-	unsigned long sbit, mbit, last = 0, start = __get_cpu_var(next_sbit);
-	int rescan = 0, max = aisb_max;
+	unsigned long si, ai;
 	struct zdev_irq_map *imap;
+	int irqs_on = 0;
 
 	inc_irq_stat(IRQIO_PCI);
-	sbit = start;
+	for (si = 0;;) {
+		/* Scan adapter summary indicator bit vector */
+		si = airq_iv_scan(zpci_aisb_iv, si, airq_iv_end(zpci_aisb_iv));
+		if (si == -1UL) {
+			if (irqs_on++)
+				/* End of second scan with interrupts on. */
+				break;
+			/* First scan complete, reenable interrupts. */
+			zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC);
+			si = 0;
+			continue;
+		}
 
-scan:
-	/* find summary_bit */
-	for_each_set_bit_left_cont(sbit, bucket->aisb, max) {
-		clear_bit(63 - (sbit & 63), bucket->aisb + (sbit >> 6));
-		last = sbit;
-
-		/* find vector bit */
-		imap = bucket->imap[sbit];
-		for_each_set_bit_left(mbit, &imap->aibv, imap->msi_vecs) {
+		/* Scan the adapter interrupt vector for this device. */
+		imap = zpci_imap[si];
+		for (ai = 0;;) {
+			ai = airq_iv_scan(imap->aibv, ai, imap->msi_vecs);
+			if (ai == -1UL)
+				break;
 			inc_irq_stat(IRQIO_MSI);
-			clear_bit(63 - mbit, &imap->aibv);
-
-			spin_lock(&imap->lock);
-			if (imap->cb[mbit].handler)
-				imap->cb[mbit].handler(mbit,
-					imap->cb[mbit].data);
-			spin_unlock(&imap->lock);
+			airq_iv_lock(imap->aibv, ai);
+			if (imap->cb[ai].handler)
+				imap->cb[ai].handler(ai, imap->cb[ai].data);
+			airq_iv_unlock(imap->aibv, ai);
 		}
 	}
-
-	if (rescan)
-		goto out;
-
-	/* scan the skipped bits */
-	if (start > 0) {
-		sbit = 0;
-		max = start;
-		start = 0;
-		goto scan;
-	}
-
-	/* enable interrupts again */
-	zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC);
-
-	/* check again to not lose initiative */
-	rmb();
-	max = aisb_max;
-	sbit = find_first_bit_left(bucket->aisb, max);
-	if (sbit != max) {
-		rescan++;
-		goto scan;
-	}
-out:
-	/* store next device bit to scan */
-	__get_cpu_var(next_sbit) = (++last >= aisb_max) ? 0 : last;
 }
 
-/* msi_vecs - number of requested interrupts, 0 place function to error state */
-static int zpci_setup_msi(struct pci_dev *pdev, int msi_vecs)
+static int zpci_alloc_msi(struct zpci_dev *zdev, int msi_vecs)
+{
+	unsigned long size;
+
+	/* Alloc aibv & callback space */
+	zdev->irq_map = kmem_cache_zalloc(zdev_irq_cache, GFP_KERNEL);
+	if (!zdev->irq_map)
+		goto out;
+	/* Store the number of used MSI vectors */
+	zdev->irq_map->msi_vecs = msi_vecs;
+	/* Allocate callback array */
+	size = sizeof(struct callback) * msi_vecs;
+	zdev->irq_map->cb = kzalloc(size, GFP_KERNEL);
+	if (!zdev->irq_map->cb)
+		goto out_map;
+	/* Allocate msi_map array */
+	size = sizeof(struct msi_map) * msi_vecs;
+	zdev->msi_map = kzalloc(size, GFP_KERNEL);
+	if (!zdev->msi_map)
+		goto out_cb;
+	return 0;
+
+out_cb:
+	kfree(zdev->irq_map->cb);
+out_map:
+	kmem_cache_free(zdev_irq_cache, zdev->irq_map);
+out:
+	return -ENOMEM;
+}
+
+static void zpci_free_msi(struct zpci_dev *zdev)
+{
+	kfree(zdev->msi_map);
+	kfree(zdev->irq_map->cb);
+	kmem_cache_free(zdev_irq_cache, zdev->irq_map);
+}
+
+int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
 {
 	struct zpci_dev *zdev = get_zdev(pdev);
-	unsigned int aisb, msi_nr;
+	unsigned int msi_nr, msi_vecs;
+	unsigned long aisb;
 	struct msi_desc *msi;
 	int rc;
 
-	/* store the number of used MSI vectors */
-	zdev->irq_map->msi_vecs = min(msi_vecs, ZPCI_NR_MSI_VECS);
+	pr_debug("%s: requesting %d MSI-X interrupts...", __func__, nvec);
+	if (type != PCI_CAP_ID_MSIX && type != PCI_CAP_ID_MSI)
+		return -EINVAL;
+	msi_vecs = min(nvec, ZPCI_MSI_VEC_MAX);
 
-	spin_lock(&bucket->lock);
-	aisb = find_first_zero_bit(bucket->alloc, PAGE_SIZE);
-	/* alloc map exhausted? */
-	if (aisb == PAGE_SIZE) {
-		spin_unlock(&bucket->lock);
-		return -EIO;
-	}
-	set_bit(aisb, bucket->alloc);
-	spin_unlock(&bucket->lock);
-
+	/* Allocate adapter summary indicator bit */
+	rc = -EIO;
+	aisb = airq_iv_alloc_bit(zpci_aisb_iv);
+	if (aisb == -1UL)
+		goto out;
 	zdev->aisb = aisb;
-	if (aisb + 1 > aisb_max)
-		aisb_max = aisb + 1;
 
-	/* wire up IRQ shortcut pointer */
-	bucket->imap[zdev->aisb] = zdev->irq_map;
-	pr_debug("%s: imap[%u] linked to %p\n", __func__, zdev->aisb, zdev->irq_map);
+	/* Create adapter interrupt vector */
+	rc = -ENOMEM;
+	zdev->aibv = airq_iv_create(msi_vecs, AIRQ_IV_BITLOCK);
+	if (!zdev->aibv)
+		goto out_si;
 
-	/* TODO: irq number 0 wont be found if we return less than requested MSIs.
-	 * ignore it for now and fix in common code.
+	/* Allocate data structures for msi interrupts */
+	rc = zpci_alloc_msi(zdev, msi_vecs);
+	if (rc)
+		goto out_iv;
+
+	/* Wire up shortcut pointer */
+	zpci_imap[aisb] = zdev->irq_map;
+	zdev->irq_map->aibv = zdev->aibv;
+
+	/*
+	 * TODO: irq number 0 wont be found if we return less than the
+	 * requested MSIs. Ignore it for now and fix in common code.
 	 */
 	msi_nr = aisb << ZPCI_MSI_VEC_BITS;
-
 	list_for_each_entry(msi, &pdev->msi_list, list) {
 		rc = zpci_setup_msi_irq(zdev, msi, msi_nr,
 					  aisb << ZPCI_MSI_VEC_BITS);
 		if (rc)
-			return rc;
+			goto out_msi;
 		msi_nr++;
 	}
 
-	rc = zpci_register_airq(zdev, aisb, (u64) &zdev->irq_map->aibv);
-	if (rc) {
-		clear_bit(aisb, bucket->alloc);
-		dev_err(&pdev->dev, "register MSI failed with: %d\n", rc);
-		return rc;
+	/* Enable adapter interrupts */
+	rc = zpci_set_airq(zdev);
+	if (rc)
+		goto out_msi;
+
+	return (msi_vecs == nvec) ? 0 : msi_vecs;
+
+out_msi:
+	msi_nr -= aisb << ZPCI_MSI_VEC_BITS;
+	list_for_each_entry(msi, &pdev->msi_list, list) {
+		if (msi_nr-- == 0)
+			break;
+		zpci_teardown_msi_irq(zdev, msi);
 	}
-	return (zdev->irq_map->msi_vecs == msi_vecs) ?
-		0 : zdev->irq_map->msi_vecs;
+	zpci_free_msi(zdev);
+out_iv:
+	airq_iv_release(zdev->aibv);
+out_si:
+	airq_iv_free_bit(zpci_aisb_iv, aisb);
+out:
+	dev_err(&pdev->dev, "register MSI failed with: %d\n", rc);
+	return rc;
 }
 
-static void zpci_teardown_msi(struct pci_dev *pdev)
+void arch_teardown_msi_irqs(struct pci_dev *pdev)
 {
 	struct zpci_dev *zdev = get_zdev(pdev);
 	struct msi_desc *msi;
-	int aisb, rc;
+	int rc;
 
-	rc = zpci_unregister_airq(zdev);
+	pr_info("%s: on pdev: %p\n", __func__, pdev);
+
+	/* Disable adapter interrupts */
+	rc = zpci_clear_airq(zdev);
 	if (rc) {
 		dev_err(&pdev->dev, "deregister MSI failed with: %d\n", rc);
 		return;
 	}
 
-	msi = list_first_entry(&pdev->msi_list, struct msi_desc, list);
-	aisb = irq_to_dev_nr(msi->irq);
-
 	list_for_each_entry(msi, &pdev->msi_list, list)
 		zpci_teardown_msi_irq(zdev, msi);
 
-	clear_bit(aisb, bucket->alloc);
-	if (aisb + 1 == aisb_max)
-		aisb_max--;
-}
-
-int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
-{
-	pr_debug("%s: requesting %d MSI-X interrupts...", __func__, nvec);
-	if (type != PCI_CAP_ID_MSIX && type != PCI_CAP_ID_MSI)
-		return -EINVAL;
-	return zpci_setup_msi(pdev, nvec);
-}
-
-void arch_teardown_msi_irqs(struct pci_dev *pdev)
-{
-	pr_info("%s: on pdev: %p\n", __func__, pdev);
-	zpci_teardown_msi(pdev);
+	zpci_free_msi(zdev);
+	airq_iv_release(zdev->aibv);
+	airq_iv_free_bit(zpci_aisb_iv, zdev->aisb);
 }
 
 static void zpci_map_resources(struct zpci_dev *zdev)
@@ -589,24 +586,11 @@
 
 	/* Alloc memory for our private pci device data */
 	zdev = kzalloc(sizeof(*zdev), GFP_KERNEL);
-	if (!zdev)
-		return ERR_PTR(-ENOMEM);
-
-	/* Alloc aibv & callback space */
-	zdev->irq_map = kmem_cache_zalloc(zdev_irq_cache, GFP_KERNEL);
-	if (!zdev->irq_map)
-		goto error;
-	WARN_ON((u64) zdev->irq_map & 0xff);
-	return zdev;
-
-error:
-	kfree(zdev);
-	return ERR_PTR(-ENOMEM);
+	return zdev ? : ERR_PTR(-ENOMEM);
 }
 
 void zpci_free_device(struct zpci_dev *zdev)
 {
-	kmem_cache_free(zdev_irq_cache, zdev->irq_map);
 	kfree(zdev);
 }
 
@@ -643,7 +627,8 @@
 
 int zpci_request_irq(unsigned int irq, irq_handler_t handler, void *data)
 {
-	int msi_nr = irq_to_msi_nr(irq);
+	unsigned int msi_nr = irq_to_msi_nr(irq);
+	unsigned int dev_nr = irq_to_dev_nr(irq);
 	struct zdev_irq_map *imap;
 	struct msi_desc *msi;
 
@@ -651,10 +636,7 @@
 	if (!msi)
 		return -EIO;
 
-	imap = get_imap(irq);
-	spin_lock_init(&imap->lock);
-
-	pr_debug("%s: register handler for IRQ:MSI %d:%d\n", __func__, irq >> 6, msi_nr);
+	imap = zpci_imap[dev_nr];
 	imap->cb[msi_nr].handler = handler;
 	imap->cb[msi_nr].data = data;
 
@@ -669,24 +651,25 @@
 
 void zpci_free_irq(unsigned int irq)
 {
-	struct zdev_irq_map *imap = get_imap(irq);
-	int msi_nr = irq_to_msi_nr(irq);
-	unsigned long flags;
+	unsigned int msi_nr = irq_to_msi_nr(irq);
+	unsigned int dev_nr = irq_to_dev_nr(irq);
+	struct zdev_irq_map *imap;
+	struct msi_desc *msi;
 
-	pr_debug("%s: for irq: %d\n", __func__, irq);
-
-	spin_lock_irqsave(&imap->lock, flags);
+	/* Disable interrupt */
+	msi = irq_get_msi_desc(irq);
+	if (!msi)
+		return;
+	zpci_msi_set_mask_bits(msi, 1, 1);
+	imap = zpci_imap[dev_nr];
 	imap->cb[msi_nr].handler = NULL;
 	imap->cb[msi_nr].data = NULL;
-	spin_unlock_irqrestore(&imap->lock, flags);
+	synchronize_rcu();
 }
 
 int request_irq(unsigned int irq, irq_handler_t handler,
 		unsigned long irqflags, const char *devname, void *dev_id)
 {
-	pr_debug("%s: irq: %d  handler: %p  flags: %lx  dev: %s\n",
-		__func__, irq, handler, irqflags, devname);
-
 	return zpci_request_irq(irq, handler, dev_id);
 }
 EXPORT_SYMBOL_GPL(request_irq);
@@ -699,52 +682,32 @@
 
 static int __init zpci_irq_init(void)
 {
-	int cpu, rc;
-
-	bucket = kzalloc(sizeof(*bucket), GFP_KERNEL);
-	if (!bucket)
-		return -ENOMEM;
-
-	bucket->aisb = (unsigned long *) get_zeroed_page(GFP_KERNEL);
-	if (!bucket->aisb) {
-		rc = -ENOMEM;
-		goto out_aisb;
-	}
-
-	bucket->alloc = (unsigned long *) get_zeroed_page(GFP_KERNEL);
-	if (!bucket->alloc) {
-		rc = -ENOMEM;
-		goto out_alloc;
-	}
+	int rc;
 
 	rc = register_adapter_interrupt(&zpci_airq);
 	if (rc)
-		goto out_ai;
+		goto out;
 	/* Set summary to 1 to be called every time for the ISC. */
 	*zpci_airq.lsi_ptr = 1;
 
-	for_each_online_cpu(cpu)
-		per_cpu(next_sbit, cpu) = 0;
+	rc = -ENOMEM;
+	zpci_aisb_iv = airq_iv_create(ZPCI_NR_DEVICES, AIRQ_IV_ALLOC);
+	if (!zpci_aisb_iv)
+		goto out_airq;
 
-	spin_lock_init(&bucket->lock);
 	zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC);
 	return 0;
 
-out_ai:
-	free_page((unsigned long) bucket->alloc);
-out_alloc:
-	free_page((unsigned long) bucket->aisb);
-out_aisb:
-	kfree(bucket);
+out_airq:
+	unregister_adapter_interrupt(&zpci_airq);
+out:
 	return rc;
 }
 
 static void zpci_irq_exit(void)
 {
-	free_page((unsigned long) bucket->alloc);
-	free_page((unsigned long) bucket->aisb);
+	airq_iv_release(zpci_aisb_iv);
 	unregister_adapter_interrupt(&zpci_airq);
-	kfree(bucket);
 }
 
 static struct resource *zpci_alloc_bus_resource(unsigned long start, unsigned long size,