Automatic merge of rsync://rsync.kernel.org/pub/scm/linux/kernel/git/davem/sparc-2.6
diff --git a/arch/sparc64/kernel/pci_iommu.c b/arch/sparc64/kernel/pci_iommu.c
index 33ca56c..2803bc7 100644
--- a/arch/sparc64/kernel/pci_iommu.c
+++ b/arch/sparc64/kernel/pci_iommu.c
@@ -196,6 +196,34 @@
 	return NULL;
 }
 
+static int iommu_alloc_ctx(struct pci_iommu *iommu)
+{
+	int lowest = iommu->ctx_lowest_free;
+	int sz = IOMMU_NUM_CTXS - lowest;
+	int n = find_next_zero_bit(iommu->ctx_bitmap, sz, lowest);
+
+	if (unlikely(n == sz)) {
+		n = find_next_zero_bit(iommu->ctx_bitmap, lowest, 1);
+		if (unlikely(n == lowest)) {
+			printk(KERN_WARNING "IOMMU: Ran out of contexts.\n");
+			n = 0;
+		}
+	}
+	if (n)
+		__set_bit(n, iommu->ctx_bitmap);
+
+	return n;
+}
+
+static inline void iommu_free_ctx(struct pci_iommu *iommu, int ctx)
+{
+	if (likely(ctx)) {
+		__clear_bit(ctx, iommu->ctx_bitmap);
+		if (ctx < iommu->ctx_lowest_free)
+			iommu->ctx_lowest_free = ctx;
+	}
+}
+
 /* Allocate and map kernel buffer of size SIZE using consistent mode
  * DMA for PCI device PDEV.  Return non-NULL cpu-side address if
  * successful and set *DMA_ADDRP to the PCI side dma address.
@@ -236,7 +264,7 @@
 	npages = size >> IO_PAGE_SHIFT;
 	ctx = 0;
 	if (iommu->iommu_ctxflush)
-		ctx = iommu->iommu_cur_ctx++;
+		ctx = iommu_alloc_ctx(iommu);
 	first_page = __pa(first_page);
 	while (npages--) {
 		iopte_val(*iopte) = (IOPTE_CONSISTENT(ctx) |
@@ -317,6 +345,8 @@
 		}
 	}
 
+	iommu_free_ctx(iommu, ctx);
+
 	spin_unlock_irqrestore(&iommu->lock, flags);
 
 	order = get_order(size);
@@ -360,7 +390,7 @@
 	base_paddr = __pa(oaddr & IO_PAGE_MASK);
 	ctx = 0;
 	if (iommu->iommu_ctxflush)
-		ctx = iommu->iommu_cur_ctx++;
+		ctx = iommu_alloc_ctx(iommu);
 	if (strbuf->strbuf_enabled)
 		iopte_protection = IOPTE_STREAMING(ctx);
 	else
@@ -380,39 +410,53 @@
 	return PCI_DMA_ERROR_CODE;
 }
 
-static void pci_strbuf_flush(struct pci_strbuf *strbuf, struct pci_iommu *iommu, u32 vaddr, unsigned long ctx, unsigned long npages)
+static void pci_strbuf_flush(struct pci_strbuf *strbuf, struct pci_iommu *iommu, u32 vaddr, unsigned long ctx, unsigned long npages, int direction)
 {
 	int limit;
 
-	PCI_STC_FLUSHFLAG_INIT(strbuf);
 	if (strbuf->strbuf_ctxflush &&
 	    iommu->iommu_ctxflush) {
 		unsigned long matchreg, flushreg;
+		u64 val;
 
 		flushreg = strbuf->strbuf_ctxflush;
 		matchreg = PCI_STC_CTXMATCH_ADDR(strbuf, ctx);
 
-		limit = 100000;
 		pci_iommu_write(flushreg, ctx);
-		for(;;) {
-			if (((long)pci_iommu_read(matchreg)) >= 0L)
-				break;
-			limit--;
-			if (!limit)
-				break;
-			udelay(1);
+		val = pci_iommu_read(matchreg);
+		val &= 0xffff;
+		if (!val)
+			goto do_flush_sync;
+
+		while (val) {
+			if (val & 0x1)
+				pci_iommu_write(flushreg, ctx);
+			val >>= 1;
 		}
-		if (!limit)
+		val = pci_iommu_read(matchreg);
+		if (unlikely(val)) {
 			printk(KERN_WARNING "pci_strbuf_flush: ctx flush "
-			       "timeout vaddr[%08x] ctx[%lx]\n",
-			       vaddr, ctx);
+			       "timeout matchreg[%lx] ctx[%lx]\n",
+			       val, ctx);
+			goto do_page_flush;
+		}
 	} else {
 		unsigned long i;
 
+	do_page_flush:
 		for (i = 0; i < npages; i++, vaddr += IO_PAGE_SIZE)
 			pci_iommu_write(strbuf->strbuf_pflush, vaddr);
 	}
 
+do_flush_sync:
+	/* If the device could not have possibly put dirty data into
+	 * the streaming cache, no flush-flag synchronization needs
+	 * to be performed.
+	 */
+	if (direction == PCI_DMA_TODEVICE)
+		return;
+
+	PCI_STC_FLUSHFLAG_INIT(strbuf);
 	pci_iommu_write(strbuf->strbuf_fsync, strbuf->strbuf_flushflag_pa);
 	(void) pci_iommu_read(iommu->write_complete_reg);
 
@@ -466,7 +510,7 @@
 
 	/* Step 1: Kick data out of streaming buffers if necessary. */
 	if (strbuf->strbuf_enabled)
-		pci_strbuf_flush(strbuf, iommu, bus_addr, ctx, npages);
+		pci_strbuf_flush(strbuf, iommu, bus_addr, ctx, npages, direction);
 
 	/* Step 2: Clear out first TSB entry. */
 	iopte_make_dummy(iommu, base);
@@ -474,6 +518,8 @@
 	free_streaming_cluster(iommu, bus_addr - iommu->page_table_map_base,
 			       npages, ctx);
 
+	iommu_free_ctx(iommu, ctx);
+
 	spin_unlock_irqrestore(&iommu->lock, flags);
 }
 
@@ -613,7 +659,7 @@
 	/* Step 4: Choose a context if necessary. */
 	ctx = 0;
 	if (iommu->iommu_ctxflush)
-		ctx = iommu->iommu_cur_ctx++;
+		ctx = iommu_alloc_ctx(iommu);
 
 	/* Step 5: Create the mappings. */
 	if (strbuf->strbuf_enabled)
@@ -678,7 +724,7 @@
 
 	/* Step 1: Kick data out of streaming buffers if necessary. */
 	if (strbuf->strbuf_enabled)
-		pci_strbuf_flush(strbuf, iommu, bus_addr, ctx, npages);
+		pci_strbuf_flush(strbuf, iommu, bus_addr, ctx, npages, direction);
 
 	/* Step 2: Clear out first TSB entry. */
 	iopte_make_dummy(iommu, base);
@@ -686,6 +732,8 @@
 	free_streaming_cluster(iommu, bus_addr - iommu->page_table_map_base,
 			       npages, ctx);
 
+	iommu_free_ctx(iommu, ctx);
+
 	spin_unlock_irqrestore(&iommu->lock, flags);
 }
 
@@ -724,7 +772,7 @@
 	}
 
 	/* Step 2: Kick data out of streaming buffers. */
-	pci_strbuf_flush(strbuf, iommu, bus_addr, ctx, npages);
+	pci_strbuf_flush(strbuf, iommu, bus_addr, ctx, npages, direction);
 
 	spin_unlock_irqrestore(&iommu->lock, flags);
 }
@@ -768,7 +816,7 @@
 	i--;
 	npages = (IO_PAGE_ALIGN(sglist[i].dma_address + sglist[i].dma_length)
 		  - bus_addr) >> IO_PAGE_SHIFT;
-	pci_strbuf_flush(strbuf, iommu, bus_addr, ctx, npages);
+	pci_strbuf_flush(strbuf, iommu, bus_addr, ctx, npages, direction);
 
 	spin_unlock_irqrestore(&iommu->lock, flags);
 }
diff --git a/arch/sparc64/kernel/pci_psycho.c b/arch/sparc64/kernel/pci_psycho.c
index 3567fa8..534320e 100644
--- a/arch/sparc64/kernel/pci_psycho.c
+++ b/arch/sparc64/kernel/pci_psycho.c
@@ -1212,7 +1212,7 @@
 
 	/* Setup initial software IOMMU state. */
 	spin_lock_init(&iommu->lock);
-	iommu->iommu_cur_ctx = 0;
+	iommu->ctx_lowest_free = 1;
 
 	/* Register addresses. */
 	iommu->iommu_control  = p->pbm_A.controller_regs + PSYCHO_IOMMU_CONTROL;
diff --git a/arch/sparc64/kernel/pci_sabre.c b/arch/sparc64/kernel/pci_sabre.c
index 5525d1e..53d333b 100644
--- a/arch/sparc64/kernel/pci_sabre.c
+++ b/arch/sparc64/kernel/pci_sabre.c
@@ -1265,7 +1265,7 @@
 
 	/* Setup initial software IOMMU state. */
 	spin_lock_init(&iommu->lock);
-	iommu->iommu_cur_ctx = 0;
+	iommu->ctx_lowest_free = 1;
 
 	/* Register addresses. */
 	iommu->iommu_control  = p->pbm_A.controller_regs + SABRE_IOMMU_CONTROL;
diff --git a/arch/sparc64/kernel/pci_schizo.c b/arch/sparc64/kernel/pci_schizo.c
index e93fcad..5753175 100644
--- a/arch/sparc64/kernel/pci_schizo.c
+++ b/arch/sparc64/kernel/pci_schizo.c
@@ -1753,7 +1753,7 @@
 
 	/* Setup initial software IOMMU state. */
 	spin_lock_init(&iommu->lock);
-	iommu->iommu_cur_ctx = 0;
+	iommu->ctx_lowest_free = 1;
 
 	/* Register addresses, SCHIZO has iommu ctx flushing. */
 	iommu->iommu_control  = pbm->pbm_regs + SCHIZO_IOMMU_CONTROL;
diff --git a/arch/sparc64/kernel/sbus.c b/arch/sparc64/kernel/sbus.c
index 76ea645..89f5e01 100644
--- a/arch/sparc64/kernel/sbus.c
+++ b/arch/sparc64/kernel/sbus.c
@@ -117,17 +117,25 @@
 
 #define STRBUF_TAG_VALID	0x02UL
 
-static void sbus_strbuf_flush(struct sbus_iommu *iommu, u32 base, unsigned long npages)
+static void sbus_strbuf_flush(struct sbus_iommu *iommu, u32 base, unsigned long npages, int direction)
 {
 	unsigned long n;
 	int limit;
 
-	iommu->strbuf_flushflag = 0UL;
 	n = npages;
 	while (n--)
 		upa_writeq(base + (n << IO_PAGE_SHIFT),
 			   iommu->strbuf_regs + STRBUF_PFLUSH);
 
+	/* If the device could not have possibly put dirty data into
+	 * the streaming cache, no flush-flag synchronization needs
+	 * to be performed.
+	 */
+	if (direction == SBUS_DMA_TODEVICE)
+		return;
+
+	iommu->strbuf_flushflag = 0UL;
+
 	/* Whoopee cushion! */
 	upa_writeq(__pa(&iommu->strbuf_flushflag),
 		   iommu->strbuf_regs + STRBUF_FSYNC);
@@ -421,7 +429,7 @@
 
 	spin_lock_irqsave(&iommu->lock, flags);
 	free_streaming_cluster(iommu, dma_base, size >> IO_PAGE_SHIFT);
-	sbus_strbuf_flush(iommu, dma_base, size >> IO_PAGE_SHIFT);
+	sbus_strbuf_flush(iommu, dma_base, size >> IO_PAGE_SHIFT, direction);
 	spin_unlock_irqrestore(&iommu->lock, flags);
 }
 
@@ -584,7 +592,7 @@
 	iommu = sdev->bus->iommu;
 	spin_lock_irqsave(&iommu->lock, flags);
 	free_streaming_cluster(iommu, dvma_base, size >> IO_PAGE_SHIFT);
-	sbus_strbuf_flush(iommu, dvma_base, size >> IO_PAGE_SHIFT);
+	sbus_strbuf_flush(iommu, dvma_base, size >> IO_PAGE_SHIFT, direction);
 	spin_unlock_irqrestore(&iommu->lock, flags);
 }
 
@@ -596,7 +604,7 @@
 	size = (IO_PAGE_ALIGN(base + size) - (base & IO_PAGE_MASK));
 
 	spin_lock_irqsave(&iommu->lock, flags);
-	sbus_strbuf_flush(iommu, base & IO_PAGE_MASK, size >> IO_PAGE_SHIFT);
+	sbus_strbuf_flush(iommu, base & IO_PAGE_MASK, size >> IO_PAGE_SHIFT, direction);
 	spin_unlock_irqrestore(&iommu->lock, flags);
 }
 
@@ -620,7 +628,7 @@
 	size = IO_PAGE_ALIGN(sg[i].dma_address + sg[i].dma_length) - base;
 
 	spin_lock_irqsave(&iommu->lock, flags);
-	sbus_strbuf_flush(iommu, base, size >> IO_PAGE_SHIFT);
+	sbus_strbuf_flush(iommu, base, size >> IO_PAGE_SHIFT, direction);
 	spin_unlock_irqrestore(&iommu->lock, flags);
 }
 
diff --git a/include/asm-sparc64/iommu.h b/include/asm-sparc64/iommu.h
index 5fd16e4..0de7a3d 100644
--- a/include/asm-sparc64/iommu.h
+++ b/include/asm-sparc64/iommu.h
@@ -16,4 +16,6 @@
 #define IOPTE_CACHE   0x0000000000000010UL /* Cached (in UPA E-cache)         */
 #define IOPTE_WRITE   0x0000000000000002UL /* Writeable                       */
 
+#define IOMMU_NUM_CTXS	4096
+
 #endif /* !(_SPARC_IOMMU_H) */
diff --git a/include/asm-sparc64/pbm.h b/include/asm-sparc64/pbm.h
index 9299963..4c15610 100644
--- a/include/asm-sparc64/pbm.h
+++ b/include/asm-sparc64/pbm.h
@@ -15,6 +15,7 @@
 #include <asm/io.h>
 #include <asm/page.h>
 #include <asm/oplib.h>
+#include <asm/iommu.h>
 
 /* The abstraction used here is that there are PCI controllers,
  * each with one (Sabre) or two (PSYCHO/SCHIZO) PCI bus modules
@@ -40,9 +41,6 @@
 	 */
 	spinlock_t	lock;
 
-	/* Context allocator. */
-	unsigned int	iommu_cur_ctx;
-
 	/* IOMMU page table, a linear array of ioptes. */
 	iopte_t		*page_table;		/* The page table itself. */
 	int		page_table_sz_bits;	/* log2 of ow many pages does it map? */
@@ -87,6 +85,10 @@
 		u16	flush;
 	} alloc_info[PBM_NCLUSTERS];
 
+	/* CTX allocation. */
+	unsigned long ctx_lowest_free;
+	unsigned long ctx_bitmap[IOMMU_NUM_CTXS / (sizeof(unsigned long) * 8)];
+
 	/* Here a PCI controller driver describes the areas of
 	 * PCI memory space where DMA to/from physical memory
 	 * are addressed.  Drivers interrogate the PCI layer