Merge branch 'stable/for-linus-4.10' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/swiotlb

Pull swiotlb fixes from Konrad Rzeszutek Wilk:
 "This has one fix to make i915 work when using Xen SWIOTLB, and a
  feature from Geert to aid in debugging of devices that can't do DMA
  outside the 32-bit address space.

  The feature from Geert is on top of v4.10 merge window commit
  (specifically you pulling my previous branch), as his changes were
  dependent on the Documentation/ movement patches.

  I figured it would just easier than me trying than to cherry-pick the
  Documentation patches to satisfy git.

  The patches have been soaking since 12/20, albeit I updated the last
  patch due to linux-next catching an compiler error and adding an
  Tested-and-Reported-by tag"

* 'stable/for-linus-4.10' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/swiotlb:
  swiotlb: Export swiotlb_max_segment to users
  swiotlb: Add swiotlb=noforce debug option
  swiotlb: Convert swiotlb_force from int to enum
  x86, swiotlb: Simplify pci_swiotlb_detect_override()
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index d1eec5e..be7c0d9 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -3821,10 +3821,11 @@
 			it if 0 is given (See Documentation/cgroup-v1/memory.txt)
 
 	swiotlb=	[ARM,IA-64,PPC,MIPS,X86]
-			Format: { <int> | force }
+			Format: { <int> | force | noforce }
 			<int> -- Number of I/O TLB slabs
 			force -- force using of bounce buffers even if they
 			         wouldn't be automatically used by the kernel
+			noforce -- Never use bounce buffers (for debugging)
 
 	switches=	[HW,M68k]
 
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 290a84f..e040827 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -524,7 +524,8 @@
 
 static int __init arm64_dma_init(void)
 {
-	if (swiotlb_force || max_pfn > (arm64_dma_phys_limit >> PAGE_SHIFT))
+	if (swiotlb_force == SWIOTLB_FORCE ||
+	    max_pfn > (arm64_dma_phys_limit >> PAGE_SHIFT))
 		swiotlb = 1;
 
 	return atomic_pool_init();
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 212c4d1..716d122 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -401,7 +401,8 @@
  */
 void __init mem_init(void)
 {
-	if (swiotlb_force || max_pfn > (arm64_dma_phys_limit >> PAGE_SHIFT))
+	if (swiotlb_force == SWIOTLB_FORCE ||
+	    max_pfn > (arm64_dma_phys_limit >> PAGE_SHIFT))
 		swiotlb_init(1);
 
 	set_max_mapnr(pfn_to_page(max_pfn) - mem_map);
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
index b47edb8..410efb2 100644
--- a/arch/x86/kernel/pci-swiotlb.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -68,12 +68,10 @@
  */
 int __init pci_swiotlb_detect_override(void)
 {
-	int use_swiotlb = swiotlb | swiotlb_force;
-
-	if (swiotlb_force)
+	if (swiotlb_force == SWIOTLB_FORCE)
 		swiotlb = 1;
 
-	return use_swiotlb;
+	return swiotlb;
 }
 IOMMU_INIT_FINISH(pci_swiotlb_detect_override,
 		  pci_xen_swiotlb_detect,
diff --git a/arch/x86/xen/pci-swiotlb-xen.c b/arch/x86/xen/pci-swiotlb-xen.c
index a9fafb5..a0b36a9 100644
--- a/arch/x86/xen/pci-swiotlb-xen.c
+++ b/arch/x86/xen/pci-swiotlb-xen.c
@@ -48,7 +48,7 @@
 	 * activate this IOMMU. If running as PV privileged, activate it
 	 * irregardless.
 	 */
-	if ((xen_initial_domain() || swiotlb || swiotlb_force))
+	if (xen_initial_domain() || swiotlb || swiotlb_force == SWIOTLB_FORCE)
 		xen_swiotlb = 1;
 
 	/* If we are running under Xen, we MUST disable the native SWIOTLB.
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 1e505d3..3dd7fc6 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2306,15 +2306,6 @@
 	mutex_unlock(&obj->mm.lock);
 }
 
-static unsigned int swiotlb_max_size(void)
-{
-#if IS_ENABLED(CONFIG_SWIOTLB)
-	return rounddown(swiotlb_nr_tbl() << IO_TLB_SHIFT, PAGE_SIZE);
-#else
-	return 0;
-#endif
-}
-
 static void i915_sg_trim(struct sg_table *orig_st)
 {
 	struct sg_table new_st;
@@ -2362,7 +2353,7 @@
 	GEM_BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS);
 	GEM_BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS);
 
-	max_segment = swiotlb_max_size();
+	max_segment = swiotlb_max_segment();
 	if (!max_segment)
 		max_segment = rounddown(UINT_MAX, PAGE_SIZE);
 
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index 478fb91..f905d6e 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -275,6 +275,10 @@
 		rc = 0;
 	} else
 		rc = swiotlb_late_init_with_tbl(xen_io_tlb_start, xen_io_tlb_nslabs);
+
+	if (!rc)
+		swiotlb_set_max_segment(PAGE_SIZE);
+
 	return rc;
 error:
 	if (repeat--) {
@@ -392,7 +396,7 @@
 	if (dma_capable(dev, dev_addr, size) &&
 	    !range_straddles_page_boundary(phys, size) &&
 		!xen_arch_need_swiotlb(dev, phys, dev_addr) &&
-		!swiotlb_force) {
+		(swiotlb_force != SWIOTLB_FORCE)) {
 		/* we are not interested in the dma_addr returned by
 		 * xen_dma_map_page, only in the potential cache flushes executed
 		 * by the function. */
@@ -552,7 +556,7 @@
 		phys_addr_t paddr = sg_phys(sg);
 		dma_addr_t dev_addr = xen_phys_to_bus(paddr);
 
-		if (swiotlb_force ||
+		if (swiotlb_force == SWIOTLB_FORCE ||
 		    xen_arch_need_swiotlb(hwdev, paddr, dev_addr) ||
 		    !dma_capable(hwdev, dev_addr, sg->length) ||
 		    range_straddles_page_boundary(paddr, sg->length)) {
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 183f37c..4ee479f 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -9,7 +9,13 @@
 struct page;
 struct scatterlist;
 
-extern int swiotlb_force;
+enum swiotlb_force {
+	SWIOTLB_NORMAL,		/* Default - depending on HW DMA mask etc. */
+	SWIOTLB_FORCE,		/* swiotlb=force */
+	SWIOTLB_NO_FORCE,	/* swiotlb=noforce */
+};
+
+extern enum swiotlb_force swiotlb_force;
 
 /*
  * Maximum allowable number of contiguous slabs to map,
@@ -108,11 +114,14 @@
 
 #ifdef CONFIG_SWIOTLB
 extern void __init swiotlb_free(void);
+unsigned int swiotlb_max_segment(void);
 #else
 static inline void swiotlb_free(void) { }
+static inline unsigned int swiotlb_max_segment(void) { return 0; }
 #endif
 
 extern void swiotlb_print_info(void);
 extern int is_swiotlb_buffer(phys_addr_t paddr);
+extern void swiotlb_set_max_segment(unsigned int);
 
 #endif /* __LINUX_SWIOTLB_H */
diff --git a/include/trace/events/swiotlb.h b/include/trace/events/swiotlb.h
index 7ea4c5e..288c0c5 100644
--- a/include/trace/events/swiotlb.h
+++ b/include/trace/events/swiotlb.h
@@ -11,16 +11,16 @@
 	TP_PROTO(struct device *dev,
 		 dma_addr_t dev_addr,
 		 size_t size,
-		 int swiotlb_force),
+		 enum swiotlb_force swiotlb_force),
 
 	TP_ARGS(dev, dev_addr, size, swiotlb_force),
 
 	TP_STRUCT__entry(
-		__string(	dev_name,	dev_name(dev)	)
-		__field(	u64,	dma_mask		)
-		__field(	dma_addr_t,	dev_addr	)
-		__field(	size_t,	size			)
-		__field(	int,	swiotlb_force		)
+		__string(	dev_name,	dev_name(dev)		)
+		__field(	u64,	dma_mask			)
+		__field(	dma_addr_t,	dev_addr		)
+		__field(	size_t,	size				)
+		__field(	enum swiotlb_force,	swiotlb_force	)
 	),
 
 	TP_fast_assign(
@@ -37,7 +37,10 @@
 		__entry->dma_mask,
 		(unsigned long long)__entry->dev_addr,
 		__entry->size,
-		__entry->swiotlb_force ? "swiotlb_force" : "" )
+		__print_symbolic(__entry->swiotlb_force,
+			{ SWIOTLB_NORMAL,	"NORMAL" },
+			{ SWIOTLB_FORCE,	"FORCE" },
+			{ SWIOTLB_NO_FORCE,	"NO_FORCE" }))
 );
 
 #endif /*  _TRACE_SWIOTLB_H */
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index cb1b54e..975b8fc 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -53,7 +53,7 @@
  */
 #define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT)
 
-int swiotlb_force;
+enum swiotlb_force swiotlb_force;
 
 /*
  * Used to do a quick range check in swiotlb_tbl_unmap_single and
@@ -83,6 +83,12 @@
 static unsigned int io_tlb_index;
 
 /*
+ * Max segment that we can provide which (if pages are contingous) will
+ * not be bounced (unless SWIOTLB_FORCE is set).
+ */
+unsigned int max_segment;
+
+/*
  * We need to save away the original address corresponding to a mapped entry
  * for the sync operations.
  */
@@ -106,8 +112,12 @@
 	}
 	if (*str == ',')
 		++str;
-	if (!strcmp(str, "force"))
-		swiotlb_force = 1;
+	if (!strcmp(str, "force")) {
+		swiotlb_force = SWIOTLB_FORCE;
+	} else if (!strcmp(str, "noforce")) {
+		swiotlb_force = SWIOTLB_NO_FORCE;
+		io_tlb_nslabs = 1;
+	}
 
 	return 0;
 }
@@ -120,6 +130,20 @@
 }
 EXPORT_SYMBOL_GPL(swiotlb_nr_tbl);
 
+unsigned int swiotlb_max_segment(void)
+{
+	return max_segment;
+}
+EXPORT_SYMBOL_GPL(swiotlb_max_segment);
+
+void swiotlb_set_max_segment(unsigned int val)
+{
+	if (swiotlb_force == SWIOTLB_FORCE)
+		max_segment = 1;
+	else
+		max_segment = rounddown(val, PAGE_SIZE);
+}
+
 /* default to 64MB */
 #define IO_TLB_DEFAULT_SIZE (64UL<<20)
 unsigned long swiotlb_size_or_default(void)
@@ -201,6 +225,7 @@
 	if (verbose)
 		swiotlb_print_info();
 
+	swiotlb_set_max_segment(io_tlb_nslabs << IO_TLB_SHIFT);
 	return 0;
 }
 
@@ -279,6 +304,7 @@
 	rc = swiotlb_late_init_with_tbl(vstart, io_tlb_nslabs);
 	if (rc)
 		free_pages((unsigned long)vstart, order);
+
 	return rc;
 }
 
@@ -333,6 +359,8 @@
 
 	late_alloc = 1;
 
+	swiotlb_set_max_segment(io_tlb_nslabs << IO_TLB_SHIFT);
+
 	return 0;
 
 cleanup4:
@@ -347,6 +375,7 @@
 	io_tlb_end = 0;
 	io_tlb_start = 0;
 	io_tlb_nslabs = 0;
+	max_segment = 0;
 	return -ENOMEM;
 }
 
@@ -375,6 +404,7 @@
 				   PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
 	}
 	io_tlb_nslabs = 0;
+	max_segment = 0;
 }
 
 int is_swiotlb_buffer(phys_addr_t paddr)
@@ -543,8 +573,15 @@
 map_single(struct device *hwdev, phys_addr_t phys, size_t size,
 	   enum dma_data_direction dir, unsigned long attrs)
 {
-	dma_addr_t start_dma_addr = phys_to_dma(hwdev, io_tlb_start);
+	dma_addr_t start_dma_addr;
 
+	if (swiotlb_force == SWIOTLB_NO_FORCE) {
+		dev_warn_ratelimited(hwdev, "Cannot do DMA to address %pa\n",
+				     &phys);
+		return SWIOTLB_MAP_ERROR;
+	}
+
+	start_dma_addr = phys_to_dma(hwdev, io_tlb_start);
 	return swiotlb_tbl_map_single(hwdev, start_dma_addr, phys, size,
 				      dir, attrs);
 }
@@ -721,6 +758,9 @@
 swiotlb_full(struct device *dev, size_t size, enum dma_data_direction dir,
 	     int do_panic)
 {
+	if (swiotlb_force == SWIOTLB_NO_FORCE)
+		return;
+
 	/*
 	 * Ran out of IOMMU space for this operation. This is very bad.
 	 * Unfortunately the drivers cannot handle this operation properly.
@@ -763,7 +803,7 @@
 	 * we can safely return the device addr and not worry about bounce
 	 * buffering it.
 	 */
-	if (dma_capable(dev, dev_addr, size) && !swiotlb_force)
+	if (dma_capable(dev, dev_addr, size) && swiotlb_force != SWIOTLB_FORCE)
 		return dev_addr;
 
 	trace_swiotlb_bounced(dev, dev_addr, size, swiotlb_force);
@@ -904,7 +944,7 @@
 		phys_addr_t paddr = sg_phys(sg);
 		dma_addr_t dev_addr = phys_to_dma(hwdev, paddr);
 
-		if (swiotlb_force ||
+		if (swiotlb_force == SWIOTLB_FORCE ||
 		    !dma_capable(hwdev, dev_addr, sg->length)) {
 			phys_addr_t map = map_single(hwdev, sg_phys(sg),
 						     sg->length, dir, attrs);