iommu/io-pgtable-fast: Prove correctness of TLB maintenance

A common software error when it comes to page table code is missing TLB
maintenance.  Add some checks to the io-pgtable-fast code to detect when
an address that might be stale in the TLB is being re-used.  This can be
accomplished by writing a "stale TLB" flag value to the reserved bits of
the PTE during unmap and then removing the flag value when the TLBs are
invalidated (by sweeping the entire page table).  That way, whenever we
map we can know that there might be a stale TLB in the location being
mapped into if it contains the "stale TLB" flag value.

CRs-Fixed: 997751
Change-Id: Icf9c1e41977cb71e8b137190adb3b4a201c339da
Signed-off-by: Mitchel Humpherys <mitchelh@codeaurora.org>
Signed-off-by: Patrick Daly <pdaly@codeaurora.org>
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index 19b1b13..db933df 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -79,6 +79,21 @@
 
 	  If unsure, say N here.
 
+config IOMMU_IO_PGTABLE_FAST_PROVE_TLB
+	bool "Prove correctness of TLB maintenance in the Fast DMA mapper"
+	depends on IOMMU_IO_PGTABLE_FAST
+	help
+          Enables some debug features that help prove correctness of TLB
+          maintenance routines in the Fast DMA mapper.  This option will
+          slow things down considerably, so should only be used in a debug
+          configuration.  This relies on the ability to set bits in an
+          invalid page table entry, which is disallowed on some hardware
+          due to errata.  If you're running on such a platform then this
+          option can only be used with unit tests.  It will break real use
+          cases.
+
+	  If unsure, say N here.
+
 endmenu
 
 config IOMMU_IOVA
diff --git a/drivers/iommu/dma-mapping-fast.c b/drivers/iommu/dma-mapping-fast.c
index 2d36ee3..8b2be30 100644
--- a/drivers/iommu/dma-mapping-fast.c
+++ b/drivers/iommu/dma-mapping-fast.c
@@ -80,6 +80,7 @@
 }
 
 static dma_addr_t __fast_smmu_alloc_iova(struct dma_fast_smmu_mapping *mapping,
+					 unsigned long attrs,
 					 size_t size)
 {
 	unsigned long bit, prev_search_start, nbits = size >> FAST_PAGE_SHIFT;
@@ -114,8 +115,11 @@
 	    __bit_covered_stale(mapping->upcoming_stale_bit,
 				prev_search_start,
 				bit + nbits - 1)) {
+		bool skip_sync = (attrs & DMA_ATTR_SKIP_CPU_SYNC);
+
 		iommu_tlbiall(mapping->domain);
 		mapping->have_stale_tlbs = false;
+		av8l_fast_clear_stale_ptes(mapping->pgtbl_pmds, skip_sync);
 	}
 
 	return (bit << FAST_PAGE_SHIFT) + mapping->base;
@@ -287,7 +291,7 @@
 
 	spin_lock_irqsave(&mapping->lock, flags);
 
-	iova = __fast_smmu_alloc_iova(mapping, len);
+	iova = __fast_smmu_alloc_iova(mapping, attrs, len);
 
 	if (unlikely(iova == DMA_ERROR_CODE))
 		goto fail;
@@ -429,7 +433,7 @@
 	}
 
 	spin_lock_irqsave(&mapping->lock, flags);
-	dma_addr = __fast_smmu_alloc_iova(mapping, size);
+	dma_addr = __fast_smmu_alloc_iova(mapping, attrs, size);
 	if (dma_addr == DMA_ERROR_CODE) {
 		dev_err(dev, "no iova\n");
 		spin_unlock_irqrestore(&mapping->lock, flags);
@@ -521,6 +525,39 @@
 	return dma_addr == DMA_ERROR_CODE;
 }
 
+static void __fast_smmu_mapped_over_stale(struct dma_fast_smmu_mapping *fast,
+					  void *data)
+{
+	av8l_fast_iopte *ptep = data;
+	dma_addr_t iova;
+	unsigned long bitmap_idx;
+
+	bitmap_idx = (unsigned long)(ptep - fast->pgtbl_pmds);
+	iova = bitmap_idx << FAST_PAGE_SHIFT;
+	dev_err(fast->dev, "Mapped over stale tlb at %pa\n", &iova);
+	dev_err(fast->dev, "bitmap (failure at idx %lu):\n", bitmap_idx);
+	dev_err(fast->dev, "ptep: %p pmds: %p diff: %lu\n", ptep,
+		fast->pgtbl_pmds, ptep - fast->pgtbl_pmds);
+	print_hex_dump(KERN_ERR, "bmap: ", DUMP_PREFIX_ADDRESS,
+		       32, 8, fast->bitmap, fast->bitmap_size, false);
+}
+
+static int fast_smmu_notify(struct notifier_block *self,
+			    unsigned long action, void *data)
+{
+	struct dma_fast_smmu_mapping *fast = container_of(
+		self, struct dma_fast_smmu_mapping, notifier);
+
+	switch (action) {
+	case MAPPED_OVER_STALE_TLB:
+		__fast_smmu_mapped_over_stale(fast, data);
+		return NOTIFY_OK;
+	default:
+		WARN(1, "Unhandled notifier action");
+		return NOTIFY_DONE;
+	}
+}
+
 static const struct dma_map_ops fast_smmu_dma_ops = {
 	.alloc = fast_smmu_alloc,
 	.free = fast_smmu_free,
@@ -618,6 +655,9 @@
 	}
 	mapping->fast->pgtbl_pmds = info.pmds;
 
+	mapping->fast->notifier.notifier_call = fast_smmu_notify;
+	av8l_register_notify(&mapping->fast->notifier);
+
 	dev->archdata.mapping = mapping;
 	set_dma_ops(dev, &fast_smmu_dma_ops);
 
diff --git a/drivers/iommu/io-pgtable-fast.c b/drivers/iommu/io-pgtable-fast.c
index 6329849..68a69e6 100644
--- a/drivers/iommu/io-pgtable-fast.c
+++ b/drivers/iommu/io-pgtable-fast.c
@@ -140,6 +140,53 @@
 #define AV8L_FAST_PAGE_SHIFT		12
 
 
+#ifdef CONFIG_IOMMU_IO_PGTABLE_FAST_PROVE_TLB
+
+#include <asm/cacheflush.h>
+#include <linux/notifier.h>
+
+static ATOMIC_NOTIFIER_HEAD(av8l_notifier_list);
+
+void av8l_register_notify(struct notifier_block *nb)
+{
+	atomic_notifier_chain_register(&av8l_notifier_list, nb);
+}
+EXPORT_SYMBOL(av8l_register_notify);
+
+static void __av8l_check_for_stale_tlb(av8l_fast_iopte *ptep)
+{
+	if (unlikely(*ptep)) {
+		atomic_notifier_call_chain(
+			&av8l_notifier_list, MAPPED_OVER_STALE_TLB,
+			(void *) ptep);
+		pr_err("Tried to map over a non-vacant pte: 0x%llx @ %p\n",
+		       *ptep, ptep);
+		pr_err("Nearby memory:\n");
+		print_hex_dump(KERN_ERR, "pgtbl: ", DUMP_PREFIX_ADDRESS,
+			       32, 8, ptep - 16, 32 * sizeof(*ptep), false);
+	}
+}
+
+void av8l_fast_clear_stale_ptes(av8l_fast_iopte *pmds, bool skip_sync)
+{
+	int i;
+	av8l_fast_iopte *pmdp = pmds;
+
+	for (i = 0; i < ((SZ_1G * 4UL) >> AV8L_FAST_PAGE_SHIFT); ++i) {
+		if (!(*pmdp & AV8L_FAST_PTE_VALID)) {
+			*pmdp = 0;
+			if (!skip_sync)
+				dmac_clean_range(pmdp, pmdp + 1);
+		}
+		pmdp++;
+	}
+}
+#else
+static void __av8l_check_for_stale_tlb(av8l_fast_iopte *ptep)
+{
+}
+#endif
+
 /* caller must take care of cache maintenance on *ptep */
 int av8l_fast_map_public(av8l_fast_iopte *ptep, phys_addr_t paddr, size_t size,
 			 int prot)
@@ -164,8 +211,10 @@
 		pte |= AV8L_FAST_PTE_AP_RW;
 
 	paddr &= AV8L_FAST_PTE_ADDR_MASK;
-	for (i = 0; i < nptes; i++, paddr += SZ_4K)
+	for (i = 0; i < nptes; i++, paddr += SZ_4K) {
+		__av8l_check_for_stale_tlb(ptep + i);
 		*(ptep + i) = pte | paddr;
+	}
 
 	return 0;
 }
@@ -183,12 +232,21 @@
 	return 0;
 }
 
+static void __av8l_fast_unmap(av8l_fast_iopte *ptep, size_t size,
+			      bool need_stale_tlb_tracking)
+{
+	unsigned long nptes = size >> AV8L_FAST_PAGE_SHIFT;
+	int val = need_stale_tlb_tracking
+		? AV8L_FAST_PTE_UNMAPPED_NEED_TLBI
+		: 0;
+
+	memset(ptep, val, sizeof(*ptep) * nptes);
+}
+
 /* caller must take care of cache maintenance on *ptep */
 void av8l_fast_unmap_public(av8l_fast_iopte *ptep, size_t size)
 {
-	unsigned long nptes = size >> AV8L_FAST_PAGE_SHIFT;
-
-	memset(ptep, 0, sizeof(*ptep) * nptes);
+	__av8l_fast_unmap(ptep, size, true);
 }
 
 /* upper layer must take care of TLB invalidation */
@@ -199,7 +257,7 @@
 	av8l_fast_iopte *ptep = iopte_pmd_offset(data->pmds, iova);
 	unsigned long nptes = size >> AV8L_FAST_PAGE_SHIFT;
 
-	av8l_fast_unmap_public(ptep, size);
+	__av8l_fast_unmap(ptep, size, false);
 	dmac_clean_range(ptep, ptep + nptes);
 
 	return size;
@@ -539,6 +597,9 @@
 			failed++;
 	}
 
+	/* sweep up TLB proving PTEs */
+	av8l_fast_clear_stale_ptes(pmds, false);
+
 	/* map the entire 4GB VA space with 8K map calls */
 	for (iova = 0; iova < SZ_1G * 4UL; iova += SZ_8K) {
 		if (WARN_ON(ops->map(ops, iova, iova, SZ_8K, IOMMU_READ))) {
@@ -557,6 +618,9 @@
 			failed++;
 	}
 
+	/* sweep up TLB proving PTEs */
+	av8l_fast_clear_stale_ptes(pmds, false);
+
 	/* map the entire 4GB VA space with 16K map calls */
 	for (iova = 0; iova < SZ_1G * 4UL; iova += SZ_16K) {
 		if (WARN_ON(ops->map(ops, iova, iova, SZ_16K, IOMMU_READ))) {
@@ -575,6 +639,9 @@
 			failed++;
 	}
 
+	/* sweep up TLB proving PTEs */
+	av8l_fast_clear_stale_ptes(pmds, false);
+
 	/* map the entire 4GB VA space with 64K map calls */
 	for (iova = 0; iova < SZ_1G * 4UL; iova += SZ_64K) {
 		if (WARN_ON(ops->map(ops, iova, iova, SZ_64K, IOMMU_READ))) {
diff --git a/include/linux/dma-mapping-fast.h b/include/linux/dma-mapping-fast.h
index ad82efc..aa9fcfe 100644
--- a/include/linux/dma-mapping-fast.h
+++ b/include/linux/dma-mapping-fast.h
@@ -33,6 +33,7 @@
 	av8l_fast_iopte	*pgtbl_pmds;
 
 	spinlock_t	lock;
+	struct notifier_block notifier;
 };
 
 #ifdef CONFIG_IOMMU_IO_PGTABLE_FAST
diff --git a/include/linux/io-pgtable-fast.h b/include/linux/io-pgtable-fast.h
index b482ffe..ab5a1dc 100644
--- a/include/linux/io-pgtable-fast.h
+++ b/include/linux/io-pgtable-fast.h
@@ -13,6 +13,8 @@
 #ifndef __LINUX_IO_PGTABLE_FAST_H
 #define __LINUX_IO_PGTABLE_FAST_H
 
+#include <linux/notifier.h>
+
 typedef u64 av8l_fast_iopte;
 
 #define iopte_pmd_offset(pmds, iova) (pmds + (iova >> 12))
@@ -21,4 +23,35 @@
 			 int prot);
 void av8l_fast_unmap_public(av8l_fast_iopte *ptep, size_t size);
 
+/* events for notifiers passed to av8l_register_notify */
+#define MAPPED_OVER_STALE_TLB 1
+
+
+#ifdef CONFIG_IOMMU_IO_PGTABLE_FAST_PROVE_TLB
+/*
+ * Doesn't matter what we use as long as bit 0 is unset.  The reason why we
+ * need a different value at all is that there are certain hardware
+ * platforms with erratum that require that a PTE actually be zero'd out
+ * and not just have its valid bit unset.
+ */
+#define AV8L_FAST_PTE_UNMAPPED_NEED_TLBI 0xa
+
+void av8l_fast_clear_stale_ptes(av8l_fast_iopte *puds, bool skip_sync);
+void av8l_register_notify(struct notifier_block *nb);
+
+#else  /* !CONFIG_IOMMU_IO_PGTABLE_FAST_PROVE_TLB */
+
+#define AV8L_FAST_PTE_UNMAPPED_NEED_TLBI 0
+
+static inline void av8l_fast_clear_stale_ptes(av8l_fast_iopte *puds,
+					      bool skip_sync)
+{
+}
+
+static inline void av8l_register_notify(struct notifier_block *nb)
+{
+}
+
+#endif	/* CONFIG_IOMMU_IO_PGTABLE_FAST_PROVE_TLB */
+
 #endif /* __LINUX_IO_PGTABLE_FAST_H */