drm: Improve drm_mm search (and fix topdown allocation) with rbtrees
The drm_mm range manager claimed to support top-down insertion, but it
was neither searching for the top-most hole that could fit the
allocation request nor fitting the request to the hole correctly.
In order to search the range efficiently, we create a secondary index
for the holes using either their size or their address. This index
allows us to find the smallest hole or the hole at the bottom or top of
the range efficiently, whilst keeping the hole stack to rapidly service
evictions.
v2: Search for holes both high and low. Rename flags to mode.
v3: Discover rb_entry_safe() and use it!
v4: Kerneldoc for enum drm_mm_insert_mode.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: "Christian König" <christian.koenig@amd.com>
Cc: David Airlie <airlied@linux.ie>
Cc: Russell King <rmk+kernel@armlinux.org.uk>
Cc: Daniel Vetter <daniel.vetter@intel.com>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Sean Paul <seanpaul@chromium.org>
Cc: Lucas Stach <l.stach@pengutronix.de>
Cc: Christian Gmeiner <christian.gmeiner@gmail.com>
Cc: Rob Clark <robdclark@gmail.com>
Cc: Thierry Reding <thierry.reding@gmail.com>
Cc: Stephen Warren <swarren@wwwdotorg.org>
Cc: Alexandre Courbot <gnurou@gmail.com>
Cc: Eric Anholt <eric@anholt.net>
Cc: Sinclair Yeh <syeh@vmware.com>
Cc: Thomas Hellstrom <thellstrom@vmware.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Sinclair Yeh <syeh@vmware.com> # vmwgfx
Reviewed-by: Lucas Stach <l.stach@pengutronix.de> #etnaviv
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: http://patchwork.freedesktop.org/patch/msgid/20170202210438.28702-1-chris@chris-wilson.co.uk
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index a07b627..c868989 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -69,12 +69,10 @@ insert_mappable_node(struct i915_ggtt *ggtt,
struct drm_mm_node *node, u32 size)
{
memset(node, 0, sizeof(*node));
- return drm_mm_insert_node_in_range_generic(&ggtt->base.mm, node,
- size, 0,
- I915_COLOR_UNEVICTABLE,
- 0, ggtt->mappable_end,
- DRM_MM_SEARCH_DEFAULT,
- DRM_MM_CREATE_DEFAULT);
+ return drm_mm_insert_node_in_range(&ggtt->base.mm, node,
+ size, 0, I915_COLOR_UNEVICTABLE,
+ 0, ggtt->mappable_end,
+ DRM_MM_INSERT_LOW);
}
static void
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index a43e44e..c181b1b 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -109,6 +109,7 @@ i915_gem_evict_something(struct i915_address_space *vm,
}, **phase;
struct i915_vma *vma, *next;
struct drm_mm_node *node;
+ enum drm_mm_insert_mode mode;
int ret;
lockdep_assert_held(&vm->i915->drm.struct_mutex);
@@ -127,10 +128,14 @@ i915_gem_evict_something(struct i915_address_space *vm,
* On each list, the oldest objects lie at the HEAD with the freshest
* object on the TAIL.
*/
+ mode = DRM_MM_INSERT_BEST;
+ if (flags & PIN_HIGH)
+ mode = DRM_MM_INSERT_HIGH;
+ if (flags & PIN_MAPPABLE)
+ mode = DRM_MM_INSERT_LOW;
drm_mm_scan_init_with_range(&scan, &vm->mm,
min_size, alignment, cache_level,
- start, end,
- flags & PIN_HIGH ? DRM_MM_CREATE_TOP : 0);
+ start, end, mode);
/* Retire before we search the active list. Although we have
* reasonable accuracy in our retirement lists, we may have
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index c66e905..57bec08 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -436,12 +436,11 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj,
PIN_MAPPABLE | PIN_NONBLOCK);
if (IS_ERR(vma)) {
memset(&cache->node, 0, sizeof(cache->node));
- ret = drm_mm_insert_node_in_range_generic
+ ret = drm_mm_insert_node_in_range
(&ggtt->base.mm, &cache->node,
PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE,
0, ggtt->mappable_end,
- DRM_MM_SEARCH_DEFAULT,
- DRM_MM_CREATE_DEFAULT);
+ DRM_MM_INSERT_LOW);
if (ret) /* no inactive aperture space, use cpu reloc */
return NULL;
} else {
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index e808aad..30d8dbd 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2748,12 +2748,10 @@ int i915_gem_init_ggtt(struct drm_i915_private *dev_priv)
return ret;
/* Reserve a mappable slot for our lockless error capture */
- ret = drm_mm_insert_node_in_range_generic(&ggtt->base.mm,
- &ggtt->error_capture,
- PAGE_SIZE, 0,
- I915_COLOR_UNEVICTABLE,
- 0, ggtt->mappable_end,
- 0, 0);
+ ret = drm_mm_insert_node_in_range(&ggtt->base.mm, &ggtt->error_capture,
+ PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE,
+ 0, ggtt->mappable_end,
+ DRM_MM_INSERT_LOW);
if (ret)
return ret;
@@ -3663,7 +3661,7 @@ int i915_gem_gtt_insert(struct i915_address_space *vm,
u64 size, u64 alignment, unsigned long color,
u64 start, u64 end, unsigned int flags)
{
- u32 search_flag, alloc_flag;
+ enum drm_mm_insert_mode mode;
u64 offset;
int err;
@@ -3684,13 +3682,11 @@ int i915_gem_gtt_insert(struct i915_address_space *vm,
if (unlikely(round_up(start, alignment) > round_down(end - size, alignment)))
return -ENOSPC;
- if (flags & PIN_HIGH) {
- search_flag = DRM_MM_SEARCH_BELOW;
- alloc_flag = DRM_MM_CREATE_TOP;
- } else {
- search_flag = DRM_MM_SEARCH_DEFAULT;
- alloc_flag = DRM_MM_CREATE_DEFAULT;
- }
+ mode = DRM_MM_INSERT_BEST;
+ if (flags & PIN_HIGH)
+ mode = DRM_MM_INSERT_HIGH;
+ if (flags & PIN_MAPPABLE)
+ mode = DRM_MM_INSERT_LOW;
/* We only allocate in PAGE_SIZE/GTT_PAGE_SIZE (4096) chunks,
* so we know that we always have a minimum alignment of 4096.
@@ -3702,10 +3698,9 @@ int i915_gem_gtt_insert(struct i915_address_space *vm,
if (alignment <= I915_GTT_MIN_ALIGNMENT)
alignment = 0;
- err = drm_mm_insert_node_in_range_generic(&vm->mm, node,
- size, alignment, color,
- start, end,
- search_flag, alloc_flag);
+ err = drm_mm_insert_node_in_range(&vm->mm, node,
+ size, alignment, color,
+ start, end, mode);
if (err != -ENOSPC)
return err;
@@ -3743,9 +3738,7 @@ int i915_gem_gtt_insert(struct i915_address_space *vm,
if (err)
return err;
- search_flag = DRM_MM_SEARCH_DEFAULT;
- return drm_mm_insert_node_in_range_generic(&vm->mm, node,
- size, alignment, color,
- start, end,
- search_flag, alloc_flag);
+ return drm_mm_insert_node_in_range(&vm->mm, node,
+ size, alignment, color,
+ start, end, DRM_MM_INSERT_EVICT);
}
diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
index 127d698..ec7c5d8 100644
--- a/drivers/gpu/drm/i915/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
@@ -55,9 +55,9 @@ int i915_gem_stolen_insert_node_in_range(struct drm_i915_private *dev_priv,
return -ENODEV;
mutex_lock(&dev_priv->mm.stolen_lock);
- ret = drm_mm_insert_node_in_range(&dev_priv->mm.stolen, node, size,
- alignment, start, end,
- DRM_MM_SEARCH_DEFAULT);
+ ret = drm_mm_insert_node_in_range(&dev_priv->mm.stolen, node,
+ size, alignment, 0,
+ start, end, DRM_MM_INSERT_BEST);
mutex_unlock(&dev_priv->mm.stolen_lock);
return ret;