drm: Add support for two-ended allocation, v3

Clients like i915 need to segregate cache domains within the GTT which
can lead to small amounts of fragmentation. By allocating the uncached
buffers from the bottom and the cacheable buffers from the top, we can
reduce the amount of wasted space and also optimize allocation of the
mappable portion of the GTT to only those buffers that require CPU
access through the GTT.

For other drivers, allocating small bos from one end and large ones
from the other helps improve the quality of fragmentation.

Based on drm_mm work by Chris Wilson.

v3: Changed to use a TTM placement flag
v2: Updated kerneldoc

Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Ben Widawsky <ben@bwidawsk.net>
Cc: Christian König <deathsimple@vodafone.de>
Signed-off-by: Lauri Kasanen <cand@gmx.com>
Signed-off-by: David Airlie <airlied@redhat.com>
diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c
index a2d45b74..8f64be4 100644
--- a/drivers/gpu/drm/drm_mm.c
+++ b/drivers/gpu/drm/drm_mm.c
@@ -82,6 +82,10 @@
  * this to implement guard pages between incompatible caching domains in the
  * graphics TT.
  *
+ * Two behaviors are supported for searching and allocating: bottom-up and top-down.
+ * The default is bottom-up. Top-down allocation can be used if the memory area
+ * has different restrictions, or just to reduce fragmentation.
+ *
  * Finally iteration helpers to walk all nodes and all holes are provided as are
  * some basic allocator dumpers for debugging.
  */
@@ -102,7 +106,8 @@
 static void drm_mm_insert_helper(struct drm_mm_node *hole_node,
 				 struct drm_mm_node *node,
 				 unsigned long size, unsigned alignment,
-				 unsigned long color)
+				 unsigned long color,
+				 enum drm_mm_allocator_flags flags)
 {
 	struct drm_mm *mm = hole_node->mm;
 	unsigned long hole_start = drm_mm_hole_node_start(hole_node);
@@ -115,12 +120,22 @@
 	if (mm->color_adjust)
 		mm->color_adjust(hole_node, color, &adj_start, &adj_end);
 
+	if (flags & DRM_MM_CREATE_TOP)
+		adj_start = adj_end - size;
+
 	if (alignment) {
 		unsigned tmp = adj_start % alignment;
-		if (tmp)
-			adj_start += alignment - tmp;
+		if (tmp) {
+			if (flags & DRM_MM_CREATE_TOP)
+				adj_start -= tmp;
+			else
+				adj_start += alignment - tmp;
+		}
 	}
 
+	BUG_ON(adj_start < hole_start);
+	BUG_ON(adj_end > hole_end);
+
 	if (adj_start == hole_start) {
 		hole_node->hole_follows = 0;
 		list_del(&hole_node->hole_stack);
@@ -205,7 +220,8 @@
  * @size: size of the allocation
  * @alignment: alignment of the allocation
  * @color: opaque tag value to use for this node
- * @flags: flags to fine-tune the allocation
+ * @sflags: flags to fine-tune the allocation search
+ * @aflags: flags to fine-tune the allocation behavior
  *
  * The preallocated node must be cleared to 0.
  *
@@ -215,16 +231,17 @@
 int drm_mm_insert_node_generic(struct drm_mm *mm, struct drm_mm_node *node,
 			       unsigned long size, unsigned alignment,
 			       unsigned long color,
-			       enum drm_mm_search_flags flags)
+			       enum drm_mm_search_flags sflags,
+			       enum drm_mm_allocator_flags aflags)
 {
 	struct drm_mm_node *hole_node;
 
 	hole_node = drm_mm_search_free_generic(mm, size, alignment,
-					       color, flags);
+					       color, sflags);
 	if (!hole_node)
 		return -ENOSPC;
 
-	drm_mm_insert_helper(hole_node, node, size, alignment, color);
+	drm_mm_insert_helper(hole_node, node, size, alignment, color, aflags);
 	return 0;
 }
 EXPORT_SYMBOL(drm_mm_insert_node_generic);
@@ -233,7 +250,8 @@
 				       struct drm_mm_node *node,
 				       unsigned long size, unsigned alignment,
 				       unsigned long color,
-				       unsigned long start, unsigned long end)
+				       unsigned long start, unsigned long end,
+				       enum drm_mm_allocator_flags flags)
 {
 	struct drm_mm *mm = hole_node->mm;
 	unsigned long hole_start = drm_mm_hole_node_start(hole_node);
@@ -248,13 +266,20 @@
 	if (adj_end > end)
 		adj_end = end;
 
+	if (flags & DRM_MM_CREATE_TOP)
+		adj_start = adj_end - size;
+
 	if (mm->color_adjust)
 		mm->color_adjust(hole_node, color, &adj_start, &adj_end);
 
 	if (alignment) {
 		unsigned tmp = adj_start % alignment;
-		if (tmp)
-			adj_start += alignment - tmp;
+		if (tmp) {
+			if (flags & DRM_MM_CREATE_TOP)
+				adj_start -= tmp;
+			else
+				adj_start += alignment - tmp;
+		}
 	}
 
 	if (adj_start == hole_start) {
@@ -271,6 +296,8 @@
 	INIT_LIST_HEAD(&node->hole_stack);
 	list_add(&node->node_list, &hole_node->node_list);
 
+	BUG_ON(node->start < start);
+	BUG_ON(node->start < adj_start);
 	BUG_ON(node->start + node->size > adj_end);
 	BUG_ON(node->start + node->size > end);
 
@@ -290,7 +317,8 @@
  * @color: opaque tag value to use for this node
  * @start: start of the allowed range for this node
  * @end: end of the allowed range for this node
- * @flags: flags to fine-tune the allocation
+ * @sflags: flags to fine-tune the allocation search
+ * @aflags: flags to fine-tune the allocation behavior
  *
  * The preallocated node must be cleared to 0.
  *
@@ -298,21 +326,23 @@
  * 0 on success, -ENOSPC if there's no suitable hole.
  */
 int drm_mm_insert_node_in_range_generic(struct drm_mm *mm, struct drm_mm_node *node,
-					unsigned long size, unsigned alignment, unsigned long color,
+					unsigned long size, unsigned alignment,
+					unsigned long color,
 					unsigned long start, unsigned long end,
-					enum drm_mm_search_flags flags)
+					enum drm_mm_search_flags sflags,
+					enum drm_mm_allocator_flags aflags)
 {
 	struct drm_mm_node *hole_node;
 
 	hole_node = drm_mm_search_free_in_range_generic(mm,
 							size, alignment, color,
-							start, end, flags);
+							start, end, sflags);
 	if (!hole_node)
 		return -ENOSPC;
 
 	drm_mm_insert_helper_range(hole_node, node,
 				   size, alignment, color,
-				   start, end);
+				   start, end, aflags);
 	return 0;
 }
 EXPORT_SYMBOL(drm_mm_insert_node_in_range_generic);
@@ -391,7 +421,8 @@
 	best = NULL;
 	best_size = ~0UL;
 
-	drm_mm_for_each_hole(entry, mm, adj_start, adj_end) {
+	__drm_mm_for_each_hole(entry, mm, adj_start, adj_end,
+			       flags & DRM_MM_SEARCH_BELOW) {
 		if (mm->color_adjust) {
 			mm->color_adjust(entry, color, &adj_start, &adj_end);
 			if (adj_end <= adj_start)
@@ -432,7 +463,8 @@
 	best = NULL;
 	best_size = ~0UL;
 
-	drm_mm_for_each_hole(entry, mm, adj_start, adj_end) {
+	__drm_mm_for_each_hole(entry, mm, adj_start, adj_end,
+			       flags & DRM_MM_SEARCH_BELOW) {
 		if (adj_start < start)
 			adj_start = start;
 		if (adj_end > end)