drm/ttm: Add a bo list reserve fastpath (v2)
Makes it possible to reserve a list of buffer objects with a single
spin lock / unlock if there is no contention.
Should improve cpu usage on SMP kernels.
v2: Initialize private list members on reserve and don't call
ttm_bo_list_ref_sub() with zero put_count.
Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 148a322..a586378 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -169,7 +169,7 @@
}
EXPORT_SYMBOL(ttm_bo_wait_unreserved);
-static void ttm_bo_add_to_lru(struct ttm_buffer_object *bo)
+void ttm_bo_add_to_lru(struct ttm_buffer_object *bo)
{
struct ttm_bo_device *bdev = bo->bdev;
struct ttm_mem_type_manager *man;
@@ -191,11 +191,7 @@
}
}
-/**
- * Call with the lru_lock held.
- */
-
-static int ttm_bo_del_from_lru(struct ttm_buffer_object *bo)
+int ttm_bo_del_from_lru(struct ttm_buffer_object *bo)
{
int put_count = 0;
@@ -267,6 +263,15 @@
BUG();
}
+void ttm_bo_list_ref_sub(struct ttm_buffer_object *bo, int count,
+ bool never_free)
+{
+ while (count--)
+ kref_put(&bo->list_kref,
+ (never_free || (count >= 0)) ? ttm_bo_ref_bug :
+ ttm_bo_release_list);
+}
+
int ttm_bo_reserve(struct ttm_buffer_object *bo,
bool interruptible,
bool no_wait, bool use_sequence, uint32_t sequence)
@@ -282,8 +287,7 @@
put_count = ttm_bo_del_from_lru(bo);
spin_unlock(&glob->lru_lock);
- while (put_count--)
- kref_put(&bo->list_kref, ttm_bo_ref_bug);
+ ttm_bo_list_ref_sub(bo, put_count, true);
return ret;
}
@@ -496,8 +500,7 @@
spin_unlock(&glob->lru_lock);
ttm_bo_cleanup_memtype_use(bo);
- while (put_count--)
- kref_put(&bo->list_kref, ttm_bo_ref_bug);
+ ttm_bo_list_ref_sub(bo, put_count, true);
return;
} else {
@@ -580,8 +583,7 @@
spin_unlock(&glob->lru_lock);
ttm_bo_cleanup_memtype_use(bo);
- while (put_count--)
- kref_put(&bo->list_kref, ttm_bo_ref_bug);
+ ttm_bo_list_ref_sub(bo, put_count, true);
return 0;
}
@@ -802,8 +804,7 @@
BUG_ON(ret != 0);
- while (put_count--)
- kref_put(&bo->list_kref, ttm_bo_ref_bug);
+ ttm_bo_list_ref_sub(bo, put_count, true);
ret = ttm_bo_evict(bo, interruptible, no_wait_reserve, no_wait_gpu);
ttm_bo_unreserve(bo);
@@ -1783,8 +1784,7 @@
put_count = ttm_bo_del_from_lru(bo);
spin_unlock(&glob->lru_lock);
- while (put_count--)
- kref_put(&bo->list_kref, ttm_bo_ref_bug);
+ ttm_bo_list_ref_sub(bo, put_count, true);
/**
* Wait for GPU, then move to system cached.
diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
index c285c29..201a71d 100644
--- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c
+++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
@@ -32,6 +32,72 @@
#include <linux/sched.h>
#include <linux/module.h>
+static void ttm_eu_backoff_reservation_locked(struct list_head *list)
+{
+ struct ttm_validate_buffer *entry;
+
+ list_for_each_entry(entry, list, head) {
+ struct ttm_buffer_object *bo = entry->bo;
+ if (!entry->reserved)
+ continue;
+
+ if (entry->removed) {
+ ttm_bo_add_to_lru(bo);
+ entry->removed = false;
+
+ }
+ entry->reserved = false;
+ atomic_set(&bo->reserved, 0);
+ wake_up_all(&bo->event_queue);
+ }
+}
+
+static void ttm_eu_del_from_lru_locked(struct list_head *list)
+{
+ struct ttm_validate_buffer *entry;
+
+ list_for_each_entry(entry, list, head) {
+ struct ttm_buffer_object *bo = entry->bo;
+ if (!entry->reserved)
+ continue;
+
+ if (!entry->removed) {
+ entry->put_count = ttm_bo_del_from_lru(bo);
+ entry->removed = true;
+ }
+ }
+}
+
+static void ttm_eu_list_ref_sub(struct list_head *list)
+{
+ struct ttm_validate_buffer *entry;
+
+ list_for_each_entry(entry, list, head) {
+ struct ttm_buffer_object *bo = entry->bo;
+
+ if (entry->put_count) {
+ ttm_bo_list_ref_sub(bo, entry->put_count, true);
+ entry->put_count = 0;
+ }
+ }
+}
+
+static int ttm_eu_wait_unreserved_locked(struct list_head *list,
+ struct ttm_buffer_object *bo)
+{
+ struct ttm_bo_global *glob = bo->glob;
+ int ret;
+
+ ttm_eu_del_from_lru_locked(list);
+ spin_unlock(&glob->lru_lock);
+ ret = ttm_bo_wait_unreserved(bo, true);
+ spin_lock(&glob->lru_lock);
+ if (unlikely(ret != 0))
+ ttm_eu_backoff_reservation_locked(list);
+ return ret;
+}
+
+
void ttm_eu_backoff_reservation(struct list_head *list)
{
struct ttm_validate_buffer *entry;
@@ -61,35 +127,71 @@
int ttm_eu_reserve_buffers(struct list_head *list, uint32_t val_seq)
{
+ struct ttm_bo_global *glob;
struct ttm_validate_buffer *entry;
int ret;
+ if (list_empty(list))
+ return 0;
+
+ list_for_each_entry(entry, list, head) {
+ entry->reserved = false;
+ entry->put_count = 0;
+ entry->removed = false;
+ }
+
+ entry = list_first_entry(list, struct ttm_validate_buffer, head);
+ glob = entry->bo->glob;
+
retry:
+ spin_lock(&glob->lru_lock);
list_for_each_entry(entry, list, head) {
struct ttm_buffer_object *bo = entry->bo;
- entry->reserved = false;
- ret = ttm_bo_reserve(bo, true, false, true, val_seq);
- if (ret != 0) {
- ttm_eu_backoff_reservation(list);
- if (ret == -EAGAIN) {
- ret = ttm_bo_wait_unreserved(bo, true);
- if (unlikely(ret != 0))
- return ret;
- goto retry;
- } else
+retry_this_bo:
+ ret = ttm_bo_reserve_locked(bo, true, true, true, val_seq);
+ switch (ret) {
+ case 0:
+ break;
+ case -EBUSY:
+ ret = ttm_eu_wait_unreserved_locked(list, bo);
+ if (unlikely(ret != 0)) {
+ spin_unlock(&glob->lru_lock);
+ ttm_eu_list_ref_sub(list);
return ret;
+ }
+ goto retry_this_bo;
+ case -EAGAIN:
+ ttm_eu_backoff_reservation_locked(list);
+ spin_unlock(&glob->lru_lock);
+ ttm_eu_list_ref_sub(list);
+ ret = ttm_bo_wait_unreserved(bo, true);
+ if (unlikely(ret != 0))
+ return ret;
+ goto retry;
+ default:
+ ttm_eu_backoff_reservation_locked(list);
+ spin_unlock(&glob->lru_lock);
+ ttm_eu_list_ref_sub(list);
+ return ret;
}
entry->reserved = true;
if (unlikely(atomic_read(&bo->cpu_writers) > 0)) {
- ttm_eu_backoff_reservation(list);
+ ttm_eu_backoff_reservation_locked(list);
+ spin_unlock(&glob->lru_lock);
+ ttm_eu_list_ref_sub(list);
ret = ttm_bo_wait_cpu(bo, false);
if (ret)
return ret;
goto retry;
}
}
+
+ ttm_eu_del_from_lru_locked(list);
+ spin_unlock(&glob->lru_lock);
+ ttm_eu_list_ref_sub(list);
+
return 0;
}
EXPORT_SYMBOL(ttm_eu_reserve_buffers);
diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
index beafc15..b0fc9c1 100644
--- a/include/drm/ttm/ttm_bo_api.h
+++ b/include/drm/ttm/ttm_bo_api.h
@@ -364,6 +364,44 @@
*/
extern void ttm_bo_unref(struct ttm_buffer_object **bo);
+
+/**
+ * ttm_bo_list_ref_sub
+ *
+ * @bo: The buffer object.
+ * @count: The number of references with which to decrease @bo::list_kref;
+ * @never_free: The refcount should not reach zero with this operation.
+ *
+ * Release @count lru list references to this buffer object.
+ */
+extern void ttm_bo_list_ref_sub(struct ttm_buffer_object *bo, int count,
+ bool never_free);
+
+/**
+ * ttm_bo_add_to_lru
+ *
+ * @bo: The buffer object.
+ *
+ * Add this bo to the relevant mem type lru and, if it's backed by
+ * system pages (ttms) to the swap list.
+ * This function must be called with struct ttm_bo_global::lru_lock held, and
+ * is typically called immediately prior to unreserving a bo.
+ */
+extern void ttm_bo_add_to_lru(struct ttm_buffer_object *bo);
+
+/**
+ * ttm_bo_del_from_lru
+ *
+ * @bo: The buffer object.
+ *
+ * Remove this bo from all lru lists used to lookup and reserve an object.
+ * This function must be called with struct ttm_bo_global::lru_lock held,
+ * and is usually called just immediately after the bo has been reserved to
+ * avoid recursive reservation from lru lists.
+ */
+extern int ttm_bo_del_from_lru(struct ttm_buffer_object *bo);
+
+
/**
* ttm_bo_lock_delayed_workqueue
*
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index 8e0c848..95068e6 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -864,6 +864,20 @@
bool interruptible,
bool no_wait, bool use_sequence, uint32_t sequence);
+
+/**
+ * ttm_bo_reserve_locked:
+ *
+ * Similar to ttm_bo_reserve, but must be called with the glob::lru_lock
+ * spinlock held, and will not remove reserved buffers from the lru lists.
+ * The function may release the LRU spinlock if it needs to sleep.
+ */
+
+extern int ttm_bo_reserve_locked(struct ttm_buffer_object *bo,
+ bool interruptible,
+ bool no_wait, bool use_sequence,
+ uint32_t sequence);
+
/**
* ttm_bo_unreserve
*
diff --git a/include/drm/ttm/ttm_execbuf_util.h b/include/drm/ttm/ttm_execbuf_util.h
index cd2c475..fd09b84 100644
--- a/include/drm/ttm/ttm_execbuf_util.h
+++ b/include/drm/ttm/ttm_execbuf_util.h
@@ -41,7 +41,9 @@
* @bo: refcounted buffer object pointer.
* @new_sync_obj_arg: New sync_obj_arg for @bo, to be used once
* adding a new sync object.
- * @reservied: Indicates whether @bo has been reserved for validation.
+ * @reserved: Indicates whether @bo has been reserved for validation.
+ * @removed: Indicates whether @bo has been removed from lru lists.
+ * @put_count: Number of outstanding references on bo::list_kref.
*/
struct ttm_validate_buffer {
@@ -49,6 +51,8 @@
struct ttm_buffer_object *bo;
void *new_sync_obj_arg;
bool reserved;
+ bool removed;
+ int put_count;
};
/**