drm/ttm: add a way to bo_wait for either the last read or last write
Sometimes we want to know whether a buffer is busy and wait for it (bo_wait).
However, sometimes it would be more useful to be able to query whether
a buffer is busy and being either read or written, and wait until it's stopped
being either read or written. The point of this is to be able to avoid
unnecessary waiting, e.g. if a GPU has written something to a buffer and is now
reading that buffer, and a CPU wants to map that buffer for read, it needs to
only wait for the last write. If there were no write, there wouldn't be any
waiting needed.
This, or course, requires user space drivers to send read/write flags
with each relocation (like we have read/write domains in radeon, so we can
actually use those for something useful now).
Now how this patch works:
The read/write flags should passed to ttm_validate_buffer. TTM maintains
separate sync objects of the last read and write for each buffer, in addition
to the sync object of the last use of a buffer. ttm_bo_wait then operates
with one the sync objects.
Signed-off-by: Marek Olšák <maraeo@gmail.com>
Reviewed-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 890d50e..e87e24b 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -1104,7 +1104,8 @@
if (vma->node) {
if (nvbo->bo.mem.mem_type != TTM_PL_SYSTEM) {
spin_lock(&nvbo->bo.bdev->fence_lock);
- ttm_bo_wait(&nvbo->bo, false, false, false);
+ ttm_bo_wait(&nvbo->bo, false, false, false,
+ TTM_USAGE_READWRITE);
spin_unlock(&nvbo->bo.bdev->fence_lock);
nouveau_vm_unmap(vma);
}
diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c
index 5f0bc57..322bf62 100644
--- a/drivers/gpu/drm/nouveau/nouveau_gem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_gem.c
@@ -589,7 +589,8 @@
}
spin_lock(&nvbo->bo.bdev->fence_lock);
- ret = ttm_bo_wait(&nvbo->bo, false, false, false);
+ ret = ttm_bo_wait(&nvbo->bo, false, false, false,
+ TTM_USAGE_READWRITE);
spin_unlock(&nvbo->bo.bdev->fence_lock);
if (ret) {
NV_ERROR(dev, "reloc wait_idle failed: %d\n", ret);
@@ -825,7 +826,7 @@
nvbo = nouveau_gem_object(gem);
spin_lock(&nvbo->bo.bdev->fence_lock);
- ret = ttm_bo_wait(&nvbo->bo, true, true, no_wait);
+ ret = ttm_bo_wait(&nvbo->bo, true, true, no_wait, TTM_USAGE_READWRITE);
spin_unlock(&nvbo->bo.bdev->fence_lock);
drm_gem_object_unreference_unlocked(gem);
return ret;
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
index fae00c0..14e8531 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -80,6 +80,7 @@
p->relocs[i].lobj.wdomain = r->write_domain;
p->relocs[i].lobj.rdomain = r->read_domains;
p->relocs[i].lobj.tv.bo = &p->relocs[i].robj->tbo;
+ p->relocs[i].lobj.tv.usage = TTM_USAGE_READWRITE;
p->relocs[i].handle = r->handle;
p->relocs[i].flags = r->flags;
radeon_bo_list_add_object(&p->relocs[i].lobj,
diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h
index ede6c13..e9dc8b2 100644
--- a/drivers/gpu/drm/radeon/radeon_object.h
+++ b/drivers/gpu/drm/radeon/radeon_object.h
@@ -130,7 +130,7 @@
if (mem_type)
*mem_type = bo->tbo.mem.mem_type;
if (bo->tbo.sync_obj)
- r = ttm_bo_wait(&bo->tbo, true, true, no_wait);
+ r = ttm_bo_wait(&bo->tbo, true, true, no_wait, TTM_USAGE_READWRITE);
spin_unlock(&bo->tbo.bdev->fence_lock);
ttm_bo_unreserve(&bo->tbo);
return r;
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index a4d38d8..b824d9b 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -498,7 +498,7 @@
int ret;
spin_lock(&bdev->fence_lock);
- (void) ttm_bo_wait(bo, false, false, true);
+ (void) ttm_bo_wait(bo, false, false, true, TTM_USAGE_READWRITE);
if (!bo->sync_obj) {
spin_lock(&glob->lru_lock);
@@ -566,7 +566,8 @@
retry:
spin_lock(&bdev->fence_lock);
- ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu);
+ ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu,
+ TTM_USAGE_READWRITE);
spin_unlock(&bdev->fence_lock);
if (unlikely(ret != 0))
@@ -725,7 +726,8 @@
int ret = 0;
spin_lock(&bdev->fence_lock);
- ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu);
+ ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu,
+ TTM_USAGE_READWRITE);
spin_unlock(&bdev->fence_lock);
if (unlikely(ret != 0)) {
@@ -1072,7 +1074,8 @@
* instead of doing it here.
*/
spin_lock(&bdev->fence_lock);
- ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu);
+ ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu,
+ TTM_USAGE_READWRITE);
spin_unlock(&bdev->fence_lock);
if (ret)
return ret;
@@ -1692,34 +1695,83 @@
return ret;
}
+static void ttm_bo_unref_sync_obj_locked(struct ttm_buffer_object *bo,
+ void *sync_obj,
+ void **extra_sync_obj)
+{
+ struct ttm_bo_device *bdev = bo->bdev;
+ struct ttm_bo_driver *driver = bdev->driver;
+ void *tmp_obj = NULL, *tmp_obj_read = NULL, *tmp_obj_write = NULL;
+
+ /* We must unref the sync obj wherever it's ref'd.
+ * Note that if we unref bo->sync_obj, we can unref both the read
+ * and write sync objs too, because they can't be newer than
+ * bo->sync_obj, so they are no longer relevant. */
+ if (sync_obj == bo->sync_obj ||
+ sync_obj == bo->sync_obj_read) {
+ tmp_obj_read = bo->sync_obj_read;
+ bo->sync_obj_read = NULL;
+ }
+ if (sync_obj == bo->sync_obj ||
+ sync_obj == bo->sync_obj_write) {
+ tmp_obj_write = bo->sync_obj_write;
+ bo->sync_obj_write = NULL;
+ }
+ if (sync_obj == bo->sync_obj) {
+ tmp_obj = bo->sync_obj;
+ bo->sync_obj = NULL;
+ }
+
+ clear_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags);
+ spin_unlock(&bdev->fence_lock);
+ if (tmp_obj)
+ driver->sync_obj_unref(&tmp_obj);
+ if (tmp_obj_read)
+ driver->sync_obj_unref(&tmp_obj_read);
+ if (tmp_obj_write)
+ driver->sync_obj_unref(&tmp_obj_write);
+ if (extra_sync_obj)
+ driver->sync_obj_unref(extra_sync_obj);
+ spin_lock(&bdev->fence_lock);
+}
+
int ttm_bo_wait(struct ttm_buffer_object *bo,
- bool lazy, bool interruptible, bool no_wait)
+ bool lazy, bool interruptible, bool no_wait,
+ enum ttm_buffer_usage usage)
{
struct ttm_bo_driver *driver = bo->bdev->driver;
struct ttm_bo_device *bdev = bo->bdev;
void *sync_obj;
void *sync_obj_arg;
int ret = 0;
+ void **bo_sync_obj;
- if (likely(bo->sync_obj == NULL))
+ switch (usage) {
+ case TTM_USAGE_READ:
+ bo_sync_obj = &bo->sync_obj_read;
+ break;
+ case TTM_USAGE_WRITE:
+ bo_sync_obj = &bo->sync_obj_write;
+ break;
+ case TTM_USAGE_READWRITE:
+ default:
+ bo_sync_obj = &bo->sync_obj;
+ }
+
+ if (likely(*bo_sync_obj == NULL))
return 0;
- while (bo->sync_obj) {
+ while (*bo_sync_obj) {
- if (driver->sync_obj_signaled(bo->sync_obj, bo->sync_obj_arg)) {
- void *tmp_obj = bo->sync_obj;
- bo->sync_obj = NULL;
- clear_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags);
- spin_unlock(&bdev->fence_lock);
- driver->sync_obj_unref(&tmp_obj);
- spin_lock(&bdev->fence_lock);
+ if (driver->sync_obj_signaled(*bo_sync_obj, bo->sync_obj_arg)) {
+ ttm_bo_unref_sync_obj_locked(bo, *bo_sync_obj, NULL);
continue;
}
if (no_wait)
return -EBUSY;
- sync_obj = driver->sync_obj_ref(bo->sync_obj);
+ sync_obj = driver->sync_obj_ref(*bo_sync_obj);
sync_obj_arg = bo->sync_obj_arg;
spin_unlock(&bdev->fence_lock);
ret = driver->sync_obj_wait(sync_obj, sync_obj_arg,
@@ -1730,16 +1782,9 @@
return ret;
}
spin_lock(&bdev->fence_lock);
- if (likely(bo->sync_obj == sync_obj &&
+ if (likely(*bo_sync_obj == sync_obj &&
bo->sync_obj_arg == sync_obj_arg)) {
- void *tmp_obj = bo->sync_obj;
- bo->sync_obj = NULL;
- clear_bit(TTM_BO_PRIV_FLAG_MOVING,
- &bo->priv_flags);
- spin_unlock(&bdev->fence_lock);
- driver->sync_obj_unref(&sync_obj);
- driver->sync_obj_unref(&tmp_obj);
- spin_lock(&bdev->fence_lock);
+ ttm_bo_unref_sync_obj_locked(bo, *bo_sync_obj, &sync_obj);
} else {
spin_unlock(&bdev->fence_lock);
driver->sync_obj_unref(&sync_obj);
@@ -1763,7 +1808,7 @@
if (unlikely(ret != 0))
return ret;
spin_lock(&bdev->fence_lock);
- ret = ttm_bo_wait(bo, false, true, no_wait);
+ ret = ttm_bo_wait(bo, false, true, no_wait, TTM_USAGE_READWRITE);
spin_unlock(&bdev->fence_lock);
if (likely(ret == 0))
atomic_inc(&bo->cpu_writers);
@@ -1837,7 +1882,7 @@
*/
spin_lock(&bo->bdev->fence_lock);
- ret = ttm_bo_wait(bo, false, false, false);
+ ret = ttm_bo_wait(bo, false, false, false, TTM_USAGE_READWRITE);
spin_unlock(&bo->bdev->fence_lock);
if (unlikely(ret != 0))
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
index ae3c6f5..6135f58 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -436,6 +436,8 @@
atomic_set(&fbo->cpu_writers, 0);
fbo->sync_obj = driver->sync_obj_ref(bo->sync_obj);
+ fbo->sync_obj_read = driver->sync_obj_ref(bo->sync_obj_read);
+ fbo->sync_obj_write = driver->sync_obj_ref(bo->sync_obj_write);
kref_init(&fbo->list_kref);
kref_init(&fbo->kref);
fbo->destroy = &ttm_transfered_destroy;
@@ -618,20 +620,30 @@
struct ttm_mem_reg *old_mem = &bo->mem;
int ret;
struct ttm_buffer_object *ghost_obj;
- void *tmp_obj = NULL;
+ void *tmp_obj = NULL, *tmp_obj_read = NULL, *tmp_obj_write = NULL;
spin_lock(&bdev->fence_lock);
- if (bo->sync_obj) {
+ if (bo->sync_obj)
tmp_obj = bo->sync_obj;
- bo->sync_obj = NULL;
- }
+ if (bo->sync_obj_read)
+ tmp_obj_read = bo->sync_obj_read;
+ if (bo->sync_obj_write)
+ tmp_obj_write = bo->sync_obj_write;
+
bo->sync_obj = driver->sync_obj_ref(sync_obj);
+ bo->sync_obj_read = driver->sync_obj_ref(sync_obj);
+ bo->sync_obj_write = driver->sync_obj_ref(sync_obj);
bo->sync_obj_arg = sync_obj_arg;
if (evict) {
- ret = ttm_bo_wait(bo, false, false, false);
+ ret = ttm_bo_wait(bo, false, false, false,
+ TTM_USAGE_READWRITE);
spin_unlock(&bdev->fence_lock);
if (tmp_obj)
driver->sync_obj_unref(&tmp_obj);
+ if (tmp_obj_read)
+ driver->sync_obj_unref(&tmp_obj_read);
+ if (tmp_obj_write)
+ driver->sync_obj_unref(&tmp_obj_write);
if (ret)
return ret;
@@ -655,6 +667,10 @@
spin_unlock(&bdev->fence_lock);
if (tmp_obj)
driver->sync_obj_unref(&tmp_obj);
+ if (tmp_obj_read)
+ driver->sync_obj_unref(&tmp_obj_read);
+ if (tmp_obj_write)
+ driver->sync_obj_unref(&tmp_obj_write);
ret = ttm_buffer_object_transfer(bo, &ghost_obj);
if (ret)
diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
index 221b924..ff1e26f 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@@ -122,7 +122,7 @@
spin_lock(&bdev->fence_lock);
if (test_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags)) {
- ret = ttm_bo_wait(bo, false, true, false);
+ ret = ttm_bo_wait(bo, false, true, false, TTM_USAGE_READWRITE);
spin_unlock(&bdev->fence_lock);
if (unlikely(ret != 0)) {
retval = (ret != -ERESTARTSYS) ?
diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
index 3832fe1..36d111a 100644
--- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c
+++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
@@ -221,8 +221,18 @@
list_for_each_entry(entry, list, head) {
bo = entry->bo;
+ entry->old_sync_obj_read = NULL;
+ entry->old_sync_obj_write = NULL;
entry->old_sync_obj = bo->sync_obj;
bo->sync_obj = driver->sync_obj_ref(sync_obj);
+ if (entry->usage & TTM_USAGE_READ) {
+ entry->old_sync_obj_read = bo->sync_obj_read;
+ bo->sync_obj_read = driver->sync_obj_ref(sync_obj);
+ }
+ if (entry->usage & TTM_USAGE_WRITE) {
+ entry->old_sync_obj_write = bo->sync_obj_write;
+ bo->sync_obj_write = driver->sync_obj_ref(sync_obj);
+ }
bo->sync_obj_arg = entry->new_sync_obj_arg;
ttm_bo_unreserve_locked(bo);
entry->reserved = false;
@@ -231,8 +241,15 @@
spin_unlock(&bdev->fence_lock);
list_for_each_entry(entry, list, head) {
- if (entry->old_sync_obj)
+ if (entry->old_sync_obj) {
driver->sync_obj_unref(&entry->old_sync_obj);
+ }
+ if (entry->old_sync_obj_read) {
+ driver->sync_obj_unref(&entry->old_sync_obj_read);
+ }
+ if (entry->old_sync_obj_write) {
+ driver->sync_obj_unref(&entry->old_sync_obj_write);
+ }
}
}
EXPORT_SYMBOL(ttm_eu_fence_buffer_objects);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
index 41b95ed..8ca3ddb 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
@@ -224,6 +224,7 @@
if (unlikely(cur_validate_node == sw_context->cur_val_buf)) {
val_buf = &sw_context->val_bufs[cur_validate_node];
val_buf->bo = ttm_bo_reference(bo);
+ val_buf->usage = TTM_USAGE_READWRITE;
val_buf->new_sync_obj_arg = (void *) dev_priv;
list_add_tail(&val_buf->head, &sw_context->validate_nodes);
++sw_context->cur_val_buf;