[PATCH] x86_64: Use function pointers to call DMA mapping functions

AK: I hacked Muli's original patch a lot and there were a lot
of changes - all bugs are probably to blame on me now.
There were also some changes in the fall back behaviour
for swiotlb - in particular it doesn't try to use GFP_DMA
now anymore. Also all DMA mapping operations use the
same core dma_alloc_coherent code with proper fallbacks now.
And various other changes and cleanups.

Known problems: iommu=force swiotlb=force together breaks
                needs more testing.

This patch cleans up x86_64's DMA mapping dispatching code. Right now
we have three possible IOMMU types: AGP GART, swiotlb and nommu, and
in the future we will also have Xen's x86_64 swiotlb and other HW
IOMMUs for x86_64. In order to support all of them cleanly, this
patch:

- introduces a struct dma_mapping_ops with function pointers for each
  of the DMA mapping operations of gart (AMD HW IOMMU), swiotlb
  (software IOMMU) and nommu (no IOMMU).

- gets rid of:

  if (swiotlb)
      return swiotlb_xxx();

- PCI_DMA_BUS_IS_PHYS is now checked against the dma_ops being set
This makes swiotlb faster by avoiding double copying in some cases.

Signed-Off-By: Muli Ben-Yehuda <mulix@mulix.org>
Signed-Off-By: Jon D. Mason <jdmason@us.ibm.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
diff --git a/include/asm-x86_64/dma-mapping.h b/include/asm-x86_64/dma-mapping.h
index 36d16df..49a81a6 100644
--- a/include/asm-x86_64/dma-mapping.h
+++ b/include/asm-x86_64/dma-mapping.h
@@ -12,155 +12,176 @@
 #include <asm/io.h>
 #include <asm/swiotlb.h>
 
+struct dma_mapping_ops {
+	int             (*mapping_error)(dma_addr_t dma_addr);
+	void*           (*alloc_coherent)(struct device *dev, size_t size,
+                                dma_addr_t *dma_handle, gfp_t gfp);
+	void            (*free_coherent)(struct device *dev, size_t size,
+                                void *vaddr, dma_addr_t dma_handle);
+	dma_addr_t      (*map_single)(struct device *hwdev, void *ptr,
+                                size_t size, int direction);
+	/* like map_single, but doesn't check the device mask */
+	dma_addr_t      (*map_simple)(struct device *hwdev, char *ptr,
+                                size_t size, int direction);
+	void            (*unmap_single)(struct device *dev, dma_addr_t addr,
+		                size_t size, int direction);
+	void            (*sync_single_for_cpu)(struct device *hwdev,
+		                dma_addr_t dma_handle, size_t size,
+				int direction);
+	void            (*sync_single_for_device)(struct device *hwdev,
+                                dma_addr_t dma_handle, size_t size,
+				int direction);
+	void            (*sync_single_range_for_cpu)(struct device *hwdev,
+                                dma_addr_t dma_handle, unsigned long offset,
+		                size_t size, int direction);
+	void            (*sync_single_range_for_device)(struct device *hwdev,
+				dma_addr_t dma_handle, unsigned long offset,
+		                size_t size, int direction);
+	void            (*sync_sg_for_cpu)(struct device *hwdev,
+                                struct scatterlist *sg, int nelems,
+				int direction);
+	void            (*sync_sg_for_device)(struct device *hwdev,
+				struct scatterlist *sg, int nelems,
+				int direction);
+	int             (*map_sg)(struct device *hwdev, struct scatterlist *sg,
+		                int nents, int direction);
+	void            (*unmap_sg)(struct device *hwdev,
+				struct scatterlist *sg, int nents,
+				int direction);
+	int             (*dma_supported)(struct device *hwdev, u64 mask);
+	int		is_phys;
+};
+
 extern dma_addr_t bad_dma_address;
-#define dma_mapping_error(x) \
-	(swiotlb ? swiotlb_dma_mapping_error(x) : ((x) == bad_dma_address))
+extern struct dma_mapping_ops* dma_ops;
+extern int iommu_merge;
 
-void *dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
-			 gfp_t gfp);
-void dma_free_coherent(struct device *dev, size_t size, void *vaddr,
-			 dma_addr_t dma_handle);
-
-#ifdef CONFIG_GART_IOMMU
-
-extern dma_addr_t dma_map_single(struct device *hwdev, void *ptr, size_t size,
-				 int direction);
-extern void dma_unmap_single(struct device *dev, dma_addr_t addr,size_t size,
-			     int direction);
-
-#else
-
-/* No IOMMU */
-
-static inline dma_addr_t dma_map_single(struct device *hwdev, void *ptr,
-					size_t size, int direction)
+static inline int dma_mapping_error(dma_addr_t dma_addr)
 {
-	dma_addr_t addr;
+	if (dma_ops->mapping_error)
+		return dma_ops->mapping_error(dma_addr);
 
-	if (direction == DMA_NONE)
-		out_of_line_bug();
-	addr = virt_to_bus(ptr);
-
-	if ((addr+size) & ~*hwdev->dma_mask)
-		out_of_line_bug();
-	return addr;
+	return (dma_addr == bad_dma_address);
 }
 
-static inline void dma_unmap_single(struct device *hwdev, dma_addr_t dma_addr,
-				    size_t size, int direction)
+extern void *dma_alloc_coherent(struct device *dev, size_t size,
+				dma_addr_t *dma_handle, gfp_t gfp);
+extern void dma_free_coherent(struct device *dev, size_t size, void *vaddr,
+			      dma_addr_t dma_handle);
+
+static inline dma_addr_t
+dma_map_single(struct device *hwdev, void *ptr, size_t size,
+	       int direction)
 {
-	if (direction == DMA_NONE)
-		out_of_line_bug();
-	/* Nothing to do */
+	return dma_ops->map_single(hwdev, ptr, size, direction);
 }
 
-#endif
+static inline void
+dma_unmap_single(struct device *dev, dma_addr_t addr,size_t size,
+		 int direction)
+{
+	dma_ops->unmap_single(dev, addr, size, direction);
+}
 
 #define dma_map_page(dev,page,offset,size,dir) \
 	dma_map_single((dev), page_address(page)+(offset), (size), (dir))
 
-static inline void dma_sync_single_for_cpu(struct device *hwdev,
-					       dma_addr_t dma_handle,
-					       size_t size, int direction)
-{
-	if (direction == DMA_NONE)
-		out_of_line_bug();
-
-	if (swiotlb)
-		return swiotlb_sync_single_for_cpu(hwdev,dma_handle,size,direction);
-
-	flush_write_buffers();
-}
-
-static inline void dma_sync_single_for_device(struct device *hwdev,
-						  dma_addr_t dma_handle,
-						  size_t size, int direction)
-{
-        if (direction == DMA_NONE)
-		out_of_line_bug();
-
-	if (swiotlb)
-		return swiotlb_sync_single_for_device(hwdev,dma_handle,size,direction);
-
-	flush_write_buffers();
-}
-
-static inline void dma_sync_single_range_for_cpu(struct device *hwdev,
-						 dma_addr_t dma_handle,
-						 unsigned long offset,
-						 size_t size, int direction)
-{
-	if (direction == DMA_NONE)
-		out_of_line_bug();
-
-	if (swiotlb)
-		return swiotlb_sync_single_range_for_cpu(hwdev,dma_handle,offset,size,direction);
-
-	flush_write_buffers();
-}
-
-static inline void dma_sync_single_range_for_device(struct device *hwdev,
-						    dma_addr_t dma_handle,
-						    unsigned long offset,
-						    size_t size, int direction)
-{
-        if (direction == DMA_NONE)
-		out_of_line_bug();
-
-	if (swiotlb)
-		return swiotlb_sync_single_range_for_device(hwdev,dma_handle,offset,size,direction);
-
-	flush_write_buffers();
-}
-
-static inline void dma_sync_sg_for_cpu(struct device *hwdev,
-				       struct scatterlist *sg,
-				       int nelems, int direction)
-{
-	if (direction == DMA_NONE)
-		out_of_line_bug();
-
-	if (swiotlb)
-		return swiotlb_sync_sg_for_cpu(hwdev,sg,nelems,direction);
-
-	flush_write_buffers();
-}
-
-static inline void dma_sync_sg_for_device(struct device *hwdev,
-					  struct scatterlist *sg,
-					  int nelems, int direction)
-{
-	if (direction == DMA_NONE)
-		out_of_line_bug();
-
-	if (swiotlb)
-		return swiotlb_sync_sg_for_device(hwdev,sg,nelems,direction);
-
-	flush_write_buffers();
-}
-
-extern int dma_map_sg(struct device *hwdev, struct scatterlist *sg,
-		      int nents, int direction);
-extern void dma_unmap_sg(struct device *hwdev, struct scatterlist *sg,
-			 int nents, int direction);
-
 #define dma_unmap_page dma_unmap_single
 
-extern int dma_supported(struct device *hwdev, u64 mask);
-extern int dma_get_cache_alignment(void);
-#define dma_is_consistent(h) 1
-
-static inline int dma_set_mask(struct device *dev, u64 mask)
+static inline void
+dma_sync_single_for_cpu(struct device *hwdev, dma_addr_t dma_handle,
+			size_t size, int direction)
 {
-	if (!dev->dma_mask || !dma_supported(dev, mask))
-		return -EIO;
-	*dev->dma_mask = mask;
-	return 0;
+	if (dma_ops->sync_single_for_cpu)
+		dma_ops->sync_single_for_cpu(hwdev, dma_handle, size,
+					     direction);
+	flush_write_buffers();
 }
 
-static inline void dma_cache_sync(void *vaddr, size_t size, enum dma_data_direction dir)
+static inline void
+dma_sync_single_for_device(struct device *hwdev, dma_addr_t dma_handle,
+			   size_t size, int direction)
+{
+	if (dma_ops->sync_single_for_device)
+		dma_ops->sync_single_for_device(hwdev, dma_handle, size,
+						direction);
+	flush_write_buffers();
+}
+
+static inline void
+dma_sync_single_range_for_cpu(struct device *hwdev, dma_addr_t dma_handle,
+			      unsigned long offset, size_t size, int direction)
+{
+	if (dma_ops->sync_single_range_for_cpu) {
+		dma_ops->sync_single_range_for_cpu(hwdev, dma_handle, offset, size, direction);
+	}
+
+	flush_write_buffers();
+}
+
+static inline void
+dma_sync_single_range_for_device(struct device *hwdev, dma_addr_t dma_handle,
+				 unsigned long offset, size_t size, int direction)
+{
+	if (dma_ops->sync_single_range_for_device)
+		dma_ops->sync_single_range_for_device(hwdev, dma_handle,
+						      offset, size, direction);
+
+	flush_write_buffers();
+}
+
+static inline void
+dma_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg,
+		    int nelems, int direction)
+{
+	if (dma_ops->sync_sg_for_cpu)
+		dma_ops->sync_sg_for_cpu(hwdev, sg, nelems, direction);
+	flush_write_buffers();
+}
+
+static inline void
+dma_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
+		       int nelems, int direction)
+{
+	if (dma_ops->sync_sg_for_device) {
+		dma_ops->sync_sg_for_device(hwdev, sg, nelems, direction);
+	}
+
+	flush_write_buffers();
+}
+
+static inline int
+dma_map_sg(struct device *hwdev, struct scatterlist *sg, int nents, int direction)
+{
+	return dma_ops->map_sg(hwdev, sg, nents, direction);
+}
+
+static inline void
+dma_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nents,
+	     int direction)
+{
+	dma_ops->unmap_sg(hwdev, sg, nents, direction);
+}
+
+extern int dma_supported(struct device *hwdev, u64 mask);
+
+/* same for gart, swiotlb, and nommu */
+static inline int dma_get_cache_alignment(void)
+{
+	return boot_cpu_data.x86_clflush_size;
+}
+
+#define dma_is_consistent(h) 1
+
+extern int dma_set_mask(struct device *dev, u64 mask);
+
+static inline void
+dma_cache_sync(void *vaddr, size_t size, enum dma_data_direction dir)
 {
 	flush_write_buffers();
 }
 
-#endif
+extern struct device fallback_dev;
+extern int panic_on_overflow;
+
+#endif /* _X8664_DMA_MAPPING_H */
diff --git a/include/asm-x86_64/gart-mapping.h b/include/asm-x86_64/gart-mapping.h
new file mode 100644
index 0000000..ada497b
--- /dev/null
+++ b/include/asm-x86_64/gart-mapping.h
@@ -0,0 +1,16 @@
+#ifndef _X8664_GART_MAPPING_H
+#define _X8664_GART_MAPPING_H 1
+
+#include <linux/types.h>
+#include <asm/types.h>
+
+struct device;
+
+extern void*
+gart_alloc_coherent(struct device *dev, size_t size,
+        dma_addr_t *dma_handle, gfp_t gfp);
+
+extern int
+gart_dma_supported(struct device *hwdev, u64 mask);
+
+#endif /* _X8664_GART_MAPPING_H */
diff --git a/include/asm-x86_64/pci.h b/include/asm-x86_64/pci.h
index eeb3088..fd03e15 100644
--- a/include/asm-x86_64/pci.h
+++ b/include/asm-x86_64/pci.h
@@ -42,18 +42,20 @@
 #include <asm/scatterlist.h>
 #include <linux/string.h>
 #include <asm/page.h>
+#include <linux/dma-mapping.h> /* for have_iommu */
 
 extern int iommu_setup(char *opt);
 
-#ifdef CONFIG_GART_IOMMU
 /* The PCI address space does equal the physical memory
  * address space.  The networking and block device layers use
  * this boolean for bounce buffer decisions
  *
- * On AMD64 it mostly equals, but we set it to zero to tell some subsystems
- * that an IOMMU is available.
+ * On AMD64 it mostly equals, but we set it to zero if a hardware
+ * IOMMU (gart) of sotware IOMMU (swiotlb) is available.
  */
-#define PCI_DMA_BUS_IS_PHYS	(no_iommu ? 1 : 0)
+#define PCI_DMA_BUS_IS_PHYS (dma_ops->is_phys)
+
+#ifdef CONFIG_GART_IOMMU
 
 /*
  * x86-64 always supports DAC, but sometimes it is useful to force
@@ -79,7 +81,6 @@
 #else
 /* No IOMMU */
 
-#define PCI_DMA_BUS_IS_PHYS	1
 #define pci_dac_dma_supported(pci_dev, mask)    1
 
 #define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME)
diff --git a/include/asm-x86_64/proto.h b/include/asm-x86_64/proto.h
index 3450108..113e8a2 100644
--- a/include/asm-x86_64/proto.h
+++ b/include/asm-x86_64/proto.h
@@ -92,7 +92,9 @@
 extern int unhandled_signal(struct task_struct *tsk, int sig);
 
 extern void select_idle_routine(const struct cpuinfo_x86 *c);
-extern void swiotlb_init(void);
+
+extern void gart_parse_options(char *);
+extern void __init no_iommu_init(void);
 
 extern unsigned long table_start, table_end;
 
@@ -106,12 +108,17 @@
 extern int acpi_ht;
 extern int acpi_disabled;
 
+#ifdef CONFIG_GART_IOMMU
 extern int fallback_aper_order;
 extern int fallback_aper_force;
 extern int iommu_aperture;
-extern int iommu_aperture_disabled;
 extern int iommu_aperture_allowed;
+extern int iommu_aperture_disabled;
 extern int fix_aperture;
+#else
+#define iommu_aperture 0
+#define iommu_aperture_allowed 0
+#endif
 extern int force_iommu;
 
 extern int reboot_force;
diff --git a/include/asm-x86_64/swiotlb.h b/include/asm-x86_64/swiotlb.h
index dddf1b2..60757ef 100644
--- a/include/asm-x86_64/swiotlb.h
+++ b/include/asm-x86_64/swiotlb.h
@@ -3,10 +3,14 @@
 
 #include <linux/config.h>
 
+#include <asm/dma-mapping.h>
+
 /* SWIOTLB interface */
 
-extern dma_addr_t swiotlb_map_single(struct device *hwdev, void *ptr, size_t size,
-				      int dir);
+extern dma_addr_t swiotlb_map_single(struct device *hwdev, void *ptr,
+				     size_t size, int dir);
+extern void *swiotlb_alloc_coherent(struct device *hwdev, size_t size,
+                       dma_addr_t *dma_handle, gfp_t flags);
 extern void swiotlb_unmap_single(struct device *hwdev, dma_addr_t dev_addr,
 				  size_t size, int dir);
 extern void swiotlb_sync_single_for_cpu(struct device *hwdev,
@@ -34,10 +38,10 @@
 extern void swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sg,
 			 int nents, int direction);
 extern int swiotlb_dma_mapping_error(dma_addr_t dma_addr);
-extern void *swiotlb_alloc_coherent (struct device *hwdev, size_t size,
-				     dma_addr_t *dma_handle, gfp_t flags);
 extern void swiotlb_free_coherent (struct device *hwdev, size_t size,
 				   void *vaddr, dma_addr_t dma_handle);
+extern int swiotlb_dma_supported(struct device *hwdev, u64 mask);
+extern void swiotlb_init(void);
 
 #ifdef CONFIG_SWIOTLB
 extern int swiotlb;
@@ -45,4 +49,6 @@
 #define swiotlb 0
 #endif
 
-#endif
+extern void pci_swiotlb_init(void);
+
+#endif /* _ASM_SWTIOLB_H */