drm/i915: Replace the array of pages with a scatterlist

Rather than have multiple data structures for describing our page layout
in conjunction with the array of pages, we can migrate all users over to
a scatterlist.

One major advantage, other than unifying the page tracking structures,
this offers is that we replace the vmalloc'ed array (which can be up to
a megabyte in size) with a chain of individual pages which helps reduce
memory pressure.

The disadvantage is that we then do not have a simple array to iterate,
or to access randomly. The common case for this is in the relocation
processing, which will typically fit within a single scatterlist page
and so be almost the same cost as the simple array. For iterating over
the array, the extra function call could be optimised away, but in
reality is an insignificant cost of either binding the pages, or
performing the pwrite/pread.

v2: Fix drm_clflush_sg() to not invoke wbinvd as well! And fix the
trivial compile error from rebasing.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 1847731..e0c9bdd 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -167,8 +167,7 @@
 }
 
 static void i915_ppgtt_insert_sg_entries(struct i915_hw_ppgtt *ppgtt,
-					 struct scatterlist *sg_list,
-					 unsigned sg_len,
+					 const struct sg_table *pages,
 					 unsigned first_entry,
 					 uint32_t pte_flags)
 {
@@ -180,12 +179,12 @@
 	struct scatterlist *sg;
 
 	/* init sg walking */
-	sg = sg_list;
+	sg = pages->sgl;
 	i = 0;
 	segment_len = sg_dma_len(sg) >> PAGE_SHIFT;
 	m = 0;
 
-	while (i < sg_len) {
+	while (i < pages->nents) {
 		pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pd]);
 
 		for (j = first_pte; j < I915_PPGTT_PT_ENTRIES; j++) {
@@ -194,13 +193,11 @@
 			pt_vaddr[j] = pte | pte_flags;
 
 			/* grab the next page */
-			m++;
-			if (m == segment_len) {
-				sg = sg_next(sg);
-				i++;
-				if (i == sg_len)
+			if (++m == segment_len) {
+				if (++i == pages->nents)
 					break;
 
+				sg = sg_next(sg);
 				segment_len = sg_dma_len(sg) >> PAGE_SHIFT;
 				m = 0;
 			}
@@ -213,44 +210,10 @@
 	}
 }
 
-static void i915_ppgtt_insert_pages(struct i915_hw_ppgtt *ppgtt,
-				    unsigned first_entry, unsigned num_entries,
-				    struct page **pages, uint32_t pte_flags)
-{
-	uint32_t *pt_vaddr, pte;
-	unsigned act_pd = first_entry / I915_PPGTT_PT_ENTRIES;
-	unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES;
-	unsigned last_pte, i;
-	dma_addr_t page_addr;
-
-	while (num_entries) {
-		last_pte = first_pte + num_entries;
-		last_pte = min_t(unsigned, last_pte, I915_PPGTT_PT_ENTRIES);
-
-		pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pd]);
-
-		for (i = first_pte; i < last_pte; i++) {
-			page_addr = page_to_phys(*pages);
-			pte = GEN6_PTE_ADDR_ENCODE(page_addr);
-			pt_vaddr[i] = pte | pte_flags;
-
-			pages++;
-		}
-
-		kunmap_atomic(pt_vaddr);
-
-		num_entries -= last_pte - first_pte;
-		first_pte = 0;
-		act_pd++;
-	}
-}
-
 void i915_ppgtt_bind_object(struct i915_hw_ppgtt *ppgtt,
 			    struct drm_i915_gem_object *obj,
 			    enum i915_cache_level cache_level)
 {
-	struct drm_device *dev = obj->base.dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
 	uint32_t pte_flags = GEN6_PTE_VALID;
 
 	switch (cache_level) {
@@ -261,7 +224,7 @@
 		pte_flags |= GEN6_PTE_CACHE_LLC;
 		break;
 	case I915_CACHE_NONE:
-		if (IS_HASWELL(dev))
+		if (IS_HASWELL(obj->base.dev))
 			pte_flags |= HSW_PTE_UNCACHED;
 		else
 			pte_flags |= GEN6_PTE_UNCACHED;
@@ -270,26 +233,10 @@
 		BUG();
 	}
 
-	if (obj->sg_table) {
-		i915_ppgtt_insert_sg_entries(ppgtt,
-					     obj->sg_table->sgl,
-					     obj->sg_table->nents,
-					     obj->gtt_space->start >> PAGE_SHIFT,
-					     pte_flags);
-	} else if (dev_priv->mm.gtt->needs_dmar) {
-		BUG_ON(!obj->sg_list);
-
-		i915_ppgtt_insert_sg_entries(ppgtt,
-					     obj->sg_list,
-					     obj->num_sg,
-					     obj->gtt_space->start >> PAGE_SHIFT,
-					     pte_flags);
-	} else
-		i915_ppgtt_insert_pages(ppgtt,
-					obj->gtt_space->start >> PAGE_SHIFT,
-					obj->base.size >> PAGE_SHIFT,
-					obj->pages,
-					pte_flags);
+	i915_ppgtt_insert_sg_entries(ppgtt,
+				     obj->sg_table ?: obj->pages,
+				     obj->gtt_space->start >> PAGE_SHIFT,
+				     pte_flags);
 }
 
 void i915_ppgtt_unbind_object(struct i915_hw_ppgtt *ppgtt,
@@ -361,44 +308,26 @@
 
 int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj)
 {
-	struct drm_device *dev = obj->base.dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-
-	/* don't map imported dma buf objects */
-	if (dev_priv->mm.gtt->needs_dmar && !obj->sg_table)
-		return intel_gtt_map_memory(obj->pages,
-					    obj->base.size >> PAGE_SHIFT,
-					    &obj->sg_list,
-					    &obj->num_sg);
-	else
+	if (obj->has_dma_mapping)
 		return 0;
+
+	if (!dma_map_sg(&obj->base.dev->pdev->dev,
+			obj->pages->sgl, obj->pages->nents,
+			PCI_DMA_BIDIRECTIONAL))
+		return -ENOSPC;
+
+	return 0;
 }
 
 void i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj,
 			      enum i915_cache_level cache_level)
 {
 	struct drm_device *dev = obj->base.dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
 	unsigned int agp_type = cache_level_to_agp_type(dev, cache_level);
 
-	if (obj->sg_table) {
-		intel_gtt_insert_sg_entries(obj->sg_table->sgl,
-					    obj->sg_table->nents,
-					    obj->gtt_space->start >> PAGE_SHIFT,
-					    agp_type);
-	} else if (dev_priv->mm.gtt->needs_dmar) {
-		BUG_ON(!obj->sg_list);
-
-		intel_gtt_insert_sg_entries(obj->sg_list,
-					    obj->num_sg,
-					    obj->gtt_space->start >> PAGE_SHIFT,
-					    agp_type);
-	} else
-		intel_gtt_insert_pages(obj->gtt_space->start >> PAGE_SHIFT,
-				       obj->base.size >> PAGE_SHIFT,
-				       obj->pages,
-				       agp_type);
-
+	intel_gtt_insert_sg_entries(obj->sg_table ?: obj->pages,
+				    obj->gtt_space->start >> PAGE_SHIFT,
+				    agp_type);
 	obj->has_global_gtt_mapping = 1;
 }
 
@@ -418,10 +347,10 @@
 
 	interruptible = do_idling(dev_priv);
 
-	if (obj->sg_list) {
-		intel_gtt_unmap_memory(obj->sg_list, obj->num_sg);
-		obj->sg_list = NULL;
-	}
+	if (!obj->has_dma_mapping)
+		dma_unmap_sg(&dev->pdev->dev,
+			     obj->pages->sgl, obj->pages->nents,
+			     PCI_DMA_BIDIRECTIONAL);
 
 	undo_idling(dev_priv, interruptible);
 }