winsys/radeon: move managing GEM domains back to drivers

This partially reverts commit 363ff844753c46ac9c13866627e096b091ea81f8.

It caused severe performance drops in Nexuiz. Reported by Phoronix.

Tested by me on r300g and by IRC people on r600g.
diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h
index e1c12d9..5c0f53e 100644
--- a/src/gallium/drivers/r300/r300_context.h
+++ b/src/gallium/drivers/r300/r300_context.h
@@ -302,6 +302,8 @@
     struct pb_buffer *buf;
     struct radeon_winsys_cs_handle *cs_buf;
 
+    enum radeon_bo_domain domain;
+
     uint32_t offset;    /* COLOROFFSET or DEPTHOFFSET. */
     uint32_t pitch;     /* COLORPITCH or DEPTHPITCH. */
     uint32_t pitch_zmask; /* ZMASK_PITCH */
@@ -385,6 +387,7 @@
     /* Winsys buffer backing this resource. */
     struct pb_buffer *buf;
     struct radeon_winsys_cs_handle *cs_buf;
+    enum radeon_bo_domain domain;
 
     /* Constant buffers are in user memory. */
     uint8_t *constant_buffer;
diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index d93a578..3897e99 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -1190,14 +1190,16 @@
             tex = r300_resource(fb->cbufs[i]->texture);
             assert(tex && tex->buf && "cbuf is marked, but NULL!");
             r300->rws->cs_add_reloc(r300->cs, tex->cs_buf,
-                                    RADEON_USAGE_READWRITE);
+                                    RADEON_USAGE_READWRITE,
+                                    r300_surface(fb->cbufs[i])->domain);
         }
         /* ...depth buffer... */
         if (fb->zsbuf) {
             tex = r300_resource(fb->zsbuf->texture);
             assert(tex && tex->buf && "zsbuf is marked, but NULL!");
             r300->rws->cs_add_reloc(r300->cs, tex->cs_buf,
-                                    RADEON_USAGE_READWRITE);
+                                    RADEON_USAGE_READWRITE,
+                                    r300_surface(fb->zsbuf)->domain);
         }
     }
     if (r300->textures_state.dirty) {
@@ -1208,17 +1210,19 @@
             }
 
             tex = r300_resource(texstate->sampler_views[i]->base.texture);
-            r300->rws->cs_add_reloc(r300->cs, tex->cs_buf, RADEON_USAGE_READ);
+            r300->rws->cs_add_reloc(r300->cs, tex->cs_buf, RADEON_USAGE_READ,
+                                    tex->domain);
         }
     }
     /* ...occlusion query buffer... */
     if (r300->query_current)
         r300->rws->cs_add_reloc(r300->cs, r300->query_current->cs_buf,
-                                RADEON_USAGE_WRITE);
+                                RADEON_USAGE_WRITE, RADEON_DOMAIN_GTT);
     /* ...vertex buffer for SWTCL path... */
     if (r300->vbo)
         r300->rws->cs_add_reloc(r300->cs, r300_resource(r300->vbo)->cs_buf,
-                                RADEON_USAGE_READ);
+                                RADEON_USAGE_READ,
+                                r300_resource(r300->vbo)->domain);
     /* ...vertex buffers for HWTCL path... */
     if (do_validate_vertex_buffers && r300->vertex_arrays_dirty) {
         struct pipe_vertex_buffer *vbuf = r300->vbuf_mgr->real_vertex_buffer;
@@ -1231,13 +1235,15 @@
                 continue;
 
             r300->rws->cs_add_reloc(r300->cs, r300_resource(buf)->cs_buf,
-                                    RADEON_USAGE_READ);
+                                    RADEON_USAGE_READ,
+                                    r300_resource(buf)->domain);
         }
     }
     /* ...and index buffer for HWTCL path. */
     if (index_buffer)
         r300->rws->cs_add_reloc(r300->cs, r300_resource(index_buffer)->cs_buf,
-                                RADEON_USAGE_READ);
+                                RADEON_USAGE_READ,
+                                r300_resource(index_buffer)->domain);
 
     /* Now do the validation (flush is called inside cs_validate on failure). */
     if (!r300->rws->cs_validate(r300->cs)) {
diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c
index 9459a95..f854644 100644
--- a/src/gallium/drivers/r300/r300_flush.c
+++ b/src/gallium/drivers/r300/r300_flush.c
@@ -80,11 +80,11 @@
         /* Create a fence, which is a dummy BO. */
         *rfence = r300->rws->buffer_create(r300->rws, 1, 1,
                                            PIPE_BIND_CUSTOM,
-                                           PIPE_USAGE_IMMUTABLE);
+                                           RADEON_DOMAIN_GTT);
         /* Add the fence as a dummy relocation. */
         r300->rws->cs_add_reloc(r300->cs,
                                 r300->rws->buffer_get_cs_handle(*rfence),
-                                RADEON_USAGE_READWRITE);
+                                RADEON_USAGE_READWRITE, RADEON_DOMAIN_GTT);
     }
 
     if (r300->dirty_hw) {
diff --git a/src/gallium/drivers/r300/r300_query.c b/src/gallium/drivers/r300/r300_query.c
index 8f7de79..bcf6d0e 100644
--- a/src/gallium/drivers/r300/r300_query.c
+++ b/src/gallium/drivers/r300/r300_query.c
@@ -58,7 +58,7 @@
         q->num_pipes = r300screen->info.r300_num_gb_pipes;
 
     q->buf = r300->rws->buffer_create(r300->rws, 4096, 4096,
-                                      PIPE_BIND_CUSTOM, PIPE_USAGE_STAGING);
+                                      PIPE_BIND_CUSTOM, RADEON_DOMAIN_GTT);
     if (!q->buf) {
         FREE(q);
         return NULL;
diff --git a/src/gallium/drivers/r300/r300_screen_buffer.c b/src/gallium/drivers/r300/r300_screen_buffer.c
index a5ec8ef..a8392d2 100644
--- a/src/gallium/drivers/r300/r300_screen_buffer.c
+++ b/src/gallium/drivers/r300/r300_screen_buffer.c
@@ -187,6 +187,7 @@
     pipe_reference_init(&rbuf->b.b.b.reference, 1);
     rbuf->b.b.b.screen = screen;
     rbuf->b.user_ptr = NULL;
+    rbuf->domain = RADEON_DOMAIN_GTT;
     rbuf->buf = NULL;
     rbuf->constant_buffer = NULL;
 
@@ -196,16 +197,10 @@
         return &rbuf->b.b.b;
     }
 
-#ifdef PIPE_ARCH_BIG_ENDIAN
-    /* Force buffer placement to GTT on big endian machines, because
-     * the vertex fetcher can't swap bytes from VRAM. */
-    rbuf->b.b.b.usage = PIPE_USAGE_STAGING;
-#endif
-
     rbuf->buf =
         r300screen->rws->buffer_create(r300screen->rws,
                                        rbuf->b.b.b.width0, alignment,
-                                       rbuf->b.b.b.bind, rbuf->b.b.b.usage);
+                                       rbuf->b.b.b.bind, rbuf->domain);
     if (!rbuf->buf) {
         util_slab_free(&r300screen->pool_buffers, rbuf);
         return NULL;
@@ -239,6 +234,7 @@
     rbuf->b.b.b.flags = 0;
     rbuf->b.b.vtbl = &r300_buffer_vtbl;
     rbuf->b.user_ptr = ptr;
+    rbuf->domain = RADEON_DOMAIN_GTT;
     rbuf->buf = NULL;
     rbuf->constant_buffer = NULL;
     return &rbuf->b.b.b;
diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c
index 2738f58..6fc60fb 100644
--- a/src/gallium/drivers/r300/r300_texture.c
+++ b/src/gallium/drivers/r300/r300_texture.c
@@ -901,6 +901,9 @@
     tex->tex.microtile = microtile;
     tex->tex.macrotile[0] = macrotile;
     tex->tex.stride_in_bytes_override = stride_in_bytes_override;
+    tex->domain = base->flags & R300_RESOURCE_FLAG_TRANSFER ?
+                  RADEON_DOMAIN_GTT :
+                  RADEON_DOMAIN_VRAM | RADEON_DOMAIN_GTT;
     tex->buf = buffer;
 
     r300_resource_set_properties(&rscreen->screen, &tex->b.b.b, base);
@@ -908,7 +911,7 @@
     /* Create the backing buffer if needed. */
     if (!tex->buf) {
         tex->buf = rws->buffer_create(rws, tex->tex.size_in_bytes, 2048,
-                                      base->bind, base->usage);
+                                      base->bind, tex->domain);
 
         if (!tex->buf) {
             FREE(tex);
@@ -1019,6 +1022,11 @@
         surface->buf = tex->buf;
         surface->cs_buf = tex->cs_buf;
 
+        /* Prefer VRAM if there are multiple domains to choose from. */
+        surface->domain = tex->domain;
+        if (surface->domain & RADEON_DOMAIN_VRAM)
+            surface->domain &= ~RADEON_DOMAIN_GTT;
+
         surface->offset = r300_texture_get_offset(tex, level,
                                                   surf_tmpl->u.tex.first_layer);
         r300_texture_setup_fb_state(surface);
diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h
index fbd12fb..4bfb5a9 100644
--- a/src/gallium/drivers/r600/r600.h
+++ b/src/gallium/drivers/r600/r600.h
@@ -88,6 +88,9 @@
 	/* Winsys objects. */
 	struct pb_buffer		*buf;
 	struct radeon_winsys_cs_handle	*cs_buf;
+
+	/* Resource state. */
+	unsigned			domains;
 };
 
 /* R600/R700 STATES */
diff --git a/src/gallium/drivers/r600/r600_buffer.c b/src/gallium/drivers/r600/r600_buffer.c
index f438886..a0386fe 100644
--- a/src/gallium/drivers/r600/r600_buffer.c
+++ b/src/gallium/drivers/r600/r600_buffer.c
@@ -151,12 +151,40 @@
 			unsigned size, unsigned alignment,
 			unsigned bind, unsigned usage)
 {
-	res->buf = rscreen->ws->buffer_create(rscreen->ws, size, alignment, bind, usage);
+	uint32_t initial_domain, domains;
+
+	/* Staging resources particpate in transfers and blits only
+	 * and are used for uploads and downloads from regular
+	 * resources.  We generate them internally for some transfers.
+	 */
+	if (usage == PIPE_USAGE_STAGING) {
+		domains = RADEON_DOMAIN_GTT;
+		initial_domain = RADEON_DOMAIN_GTT;
+	} else {
+		domains = RADEON_DOMAIN_GTT | RADEON_DOMAIN_VRAM;
+
+		switch(usage) {
+		case PIPE_USAGE_DYNAMIC:
+		case PIPE_USAGE_STREAM:
+		case PIPE_USAGE_STAGING:
+			initial_domain = RADEON_DOMAIN_GTT;
+			break;
+		case PIPE_USAGE_DEFAULT:
+		case PIPE_USAGE_STATIC:
+		case PIPE_USAGE_IMMUTABLE:
+		default:
+			initial_domain = RADEON_DOMAIN_VRAM;
+			break;
+		}
+	}
+
+	res->buf = rscreen->ws->buffer_create(rscreen->ws, size, alignment, bind, initial_domain);
 	if (!res->buf) {
 		return false;
 	}
 
 	res->cs_buf = rscreen->ws->buffer_get_cs_handle(res->buf);
+	res->domains = domains;
 	return true;
 }
 
diff --git a/src/gallium/drivers/r600/r600_hw_context_priv.h b/src/gallium/drivers/r600/r600_hw_context_priv.h
index 206de7e..2ad5624 100644
--- a/src/gallium/drivers/r600/r600_hw_context_priv.h
+++ b/src/gallium/drivers/r600/r600_hw_context_priv.h
@@ -90,7 +90,7 @@
 
 	assert(usage);
 
-	reloc_index = ctx->ws->cs_add_reloc(ctx->cs, rbo->cs_buf, usage);
+	reloc_index = ctx->ws->cs_add_reloc(ctx->cs, rbo->cs_buf, usage, rbo->domains);
 	if (reloc_index >= ctx->creloc)
 		ctx->creloc = reloc_index+1;
 
diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c
index 2d041b0..8fe54c8 100644
--- a/src/gallium/drivers/r600/r600_texture.c
+++ b/src/gallium/drivers/r600/r600_texture.c
@@ -469,11 +469,13 @@
 	} else if (buf) {
 		resource->buf = buf;
 		resource->cs_buf = rscreen->ws->buffer_get_cs_handle(buf);
+		resource->domains = RADEON_DOMAIN_GTT | RADEON_DOMAIN_VRAM;
 	}
 
 	if (rtex->stencil) {
 		pb_reference(&rtex->stencil->resource.buf, rtex->resource.buf);
 		rtex->stencil->resource.cs_buf = rtex->resource.cs_buf;
+		rtex->stencil->resource.domains = rtex->resource.domains;
 	}
 	return rtex;
 }
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
index ccf9c4f..d4746ff 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
@@ -346,11 +346,9 @@
 
     memset(&args, 0, sizeof(args));
 
-    assert(rdesc->initial_domains && rdesc->reloc_domains);
+    assert(rdesc->initial_domains);
     assert((rdesc->initial_domains &
             ~(RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM)) == 0);
-    assert((rdesc->reloc_domains &
-            ~(RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM)) == 0);
 
     args.size = size;
     args.alignment = desc->alignment;
@@ -377,7 +375,6 @@
     bo->mgr = mgr;
     bo->rws = mgr->rws;
     bo->handle = args.handle;
-    bo->reloc_domains = rdesc->reloc_domains;
     pipe_mutex_init(bo->map_mutex);
 
     return &bo->base;
@@ -526,7 +523,8 @@
 radeon_winsys_bo_create(struct radeon_winsys *rws,
                         unsigned size,
                         unsigned alignment,
-                        unsigned bind, unsigned usage)
+                        unsigned bind,
+                        enum radeon_bo_domain domain)
 {
     struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
     struct radeon_bo_desc desc;
@@ -536,31 +534,9 @@
     memset(&desc, 0, sizeof(desc));
     desc.base.alignment = alignment;
 
-    /* Determine the memory domains. */
-    switch (usage) {
-    case PIPE_USAGE_STAGING:
-    case PIPE_USAGE_STREAM:
-    case PIPE_USAGE_DYNAMIC:
-            desc.initial_domains = RADEON_GEM_DOMAIN_GTT;
-            desc.reloc_domains = RADEON_GEM_DOMAIN_GTT;
-            break;
-    case PIPE_USAGE_IMMUTABLE:
-    case PIPE_USAGE_STATIC:
-            desc.initial_domains = RADEON_GEM_DOMAIN_VRAM;
-            desc.reloc_domains = RADEON_GEM_DOMAIN_VRAM;
-            break;
-    default:
-            if (bind & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER |
-                        PIPE_BIND_CONSTANT_BUFFER)) {
-                desc.initial_domains = RADEON_GEM_DOMAIN_GTT;
-            } else {
-                desc.initial_domains = RADEON_GEM_DOMAIN_VRAM;
-            }
-            desc.reloc_domains = RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM;
-    }
-
     /* Additional criteria for the cache manager. */
-    desc.base.usage = desc.initial_domains;
+    desc.base.usage = domain;
+    desc.initial_domains = domain;
 
     /* Assign a buffer manager. */
     if (bind & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER |
@@ -618,7 +594,6 @@
     }
     bo->handle = open_arg.handle;
     bo->name = whandle->handle;
-    bo->reloc_domains = RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM;
 
     /* Initialize it. */
     pipe_reference_init(&bo->base.reference, 1);
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.h b/src/gallium/winsys/radeon/drm/radeon_drm_bo.h
index ba71cfb..35d25e8 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.h
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.h
@@ -42,7 +42,6 @@
     struct pb_desc base;
 
     unsigned initial_domains;
-    unsigned reloc_domains;
 };
 
 struct radeon_bo {
@@ -58,7 +57,6 @@
     void *ptr;
     pipe_mutex map_mutex;
 
-    uint32_t reloc_domains;
     uint32_t handle;
     uint32_t name;
 
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
index 2239059..e6109af 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
@@ -181,13 +181,14 @@
 #define OUT_CS(cs, value) (cs)->buf[(cs)->cdw++] = (value)
 
 static INLINE void update_reloc_domains(struct drm_radeon_cs_reloc *reloc,
-                                        enum radeon_bo_usage usage,
-                                        unsigned domains)
+                                        enum radeon_bo_domain rd,
+                                        enum radeon_bo_domain wd,
+                                        enum radeon_bo_domain *added_domains)
 {
-    if (usage & RADEON_USAGE_READ)
-        reloc->read_domains |= domains;
-    if (usage & RADEON_USAGE_WRITE)
-        reloc->write_domain |= domains;
+    *added_domains = (rd | wd) & ~(reloc->read_domains | reloc->write_domain);
+
+    reloc->read_domains |= rd;
+    reloc->write_domain |= wd;
 }
 
 int radeon_get_reloc(struct radeon_cs_context *csc, struct radeon_bo *bo)
@@ -209,7 +210,7 @@
             if (reloc->handle == bo->handle) {
                 /* Put this reloc in the hash list.
                  * This will prevent additional hash collisions if there are
-                 * several subsequent get_reloc calls of the same buffer.
+                 * several consecutive get_reloc calls for the same buffer.
                  *
                  * Example: Assuming buffers A,B,C collide in the hash list,
                  * the following sequence of relocs:
@@ -230,16 +231,19 @@
 static unsigned radeon_add_reloc(struct radeon_cs_context *csc,
                                  struct radeon_bo *bo,
                                  enum radeon_bo_usage usage,
-                                 unsigned *added_domains)
+                                 enum radeon_bo_domain domains,
+                                 enum radeon_bo_domain *added_domains)
 {
     struct drm_radeon_cs_reloc *reloc;
     unsigned i;
     unsigned hash = bo->handle & (sizeof(csc->is_handle_added)-1);
+    enum radeon_bo_domain rd = usage & RADEON_USAGE_READ ? domains : 0;
+    enum radeon_bo_domain wd = usage & RADEON_USAGE_WRITE ? domains : 0;
 
     if (csc->is_handle_added[hash]) {
         reloc = csc->relocs_hashlist[hash];
         if (reloc->handle == bo->handle) {
-            update_reloc_domains(reloc, usage, bo->reloc_domains);
+            update_reloc_domains(reloc, rd, wd, added_domains);
             return csc->reloc_indices_hashlist[hash];
         }
 
@@ -248,7 +252,7 @@
             --i;
             reloc = &csc->relocs[i];
             if (reloc->handle == bo->handle) {
-                update_reloc_domains(reloc, usage, bo->reloc_domains);
+                update_reloc_domains(reloc, rd, wd, added_domains);
 
                 csc->relocs_hashlist[hash] = reloc;
                 csc->reloc_indices_hashlist[hash] = i;
@@ -278,10 +282,8 @@
     p_atomic_inc(&bo->num_cs_references);
     reloc = &csc->relocs[csc->crelocs];
     reloc->handle = bo->handle;
-    if (usage & RADEON_USAGE_READ)
-        reloc->read_domains = bo->reloc_domains;
-    if (usage & RADEON_USAGE_WRITE)
-        reloc->write_domain = bo->reloc_domains;
+    reloc->read_domains = rd;
+    reloc->write_domain = wd;
     reloc->flags = 0;
 
     csc->is_handle_added[hash] = TRUE;
@@ -290,23 +292,24 @@
 
     csc->chunks[1].length_dw += RELOC_DWORDS;
 
-    *added_domains = bo->reloc_domains;
+    *added_domains = rd | wd;
     return csc->crelocs++;
 }
 
 static unsigned radeon_drm_cs_add_reloc(struct radeon_winsys_cs *rcs,
                                         struct radeon_winsys_cs_handle *buf,
-                                        enum radeon_bo_usage usage)
+                                        enum radeon_bo_usage usage,
+                                        enum radeon_bo_domain domains)
 {
     struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
     struct radeon_bo *bo = (struct radeon_bo*)buf;
-    unsigned added_domains = 0;
+    enum radeon_bo_domain added_domains;
 
-    unsigned index = radeon_add_reloc(cs->csc, bo, usage, &added_domains);
+    unsigned index = radeon_add_reloc(cs->csc, bo, usage, domains, &added_domains);
 
-    if (added_domains & RADEON_GEM_DOMAIN_GTT)
+    if (added_domains & RADEON_DOMAIN_GTT)
         cs->csc->used_gart += bo->base.size;
-    if (added_domains & RADEON_GEM_DOMAIN_VRAM)
+    if (added_domains & RADEON_DOMAIN_VRAM)
         cs->csc->used_vram += bo->base.size;
 
     return index;
diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h b/src/gallium/winsys/radeon/drm/radeon_winsys.h
index ea335d8..59c1aad 100644
--- a/src/gallium/winsys/radeon/drm/radeon_winsys.h
+++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h
@@ -58,6 +58,11 @@
     RADEON_LAYOUT_UNKNOWN
 };
 
+enum radeon_bo_domain { /* bitfield */
+    RADEON_DOMAIN_GTT  = 2,
+    RADEON_DOMAIN_VRAM = 4
+};
+
 enum radeon_bo_usage { /* bitfield */
     RADEON_USAGE_READ = 2,
     RADEON_USAGE_WRITE = 4,
@@ -137,13 +142,14 @@
      * \param size      The size to allocate.
      * \param alignment An alignment of the buffer in memory.
      * \param bind      A bitmask of the PIPE_BIND_* flags.
-     * \param usage     A bitmask of the PIPE_USAGE_* flags.
+     * \param domain    A bitmask of the RADEON_DOMAIN_* flags.
      * \return          The created buffer object.
      */
     struct pb_buffer *(*buffer_create)(struct radeon_winsys *ws,
                                        unsigned size,
                                        unsigned alignment,
-                                       unsigned bind, unsigned usage);
+                                       unsigned bind,
+                                       enum radeon_bo_domain domain);
 
     struct radeon_winsys_cs_handle *(*buffer_get_cs_handle)(
             struct pb_buffer *buf);
@@ -271,12 +277,14 @@
      *
      * \param cs  A command stream to add buffer for validation against.
      * \param buf A winsys buffer to validate.
-     * \param usage  Whether the buffer is used for read and/or write.
+     * \param usage   Whether the buffer is used for read and/or write.
+     * \param domain  Bitmask of the RADEON_DOMAIN_* flags.
      * \return Relocation index.
      */
     unsigned (*cs_add_reloc)(struct radeon_winsys_cs *cs,
                              struct radeon_winsys_cs_handle *buf,
-                             enum radeon_bo_usage usage);
+                             enum radeon_bo_usage usage,
+                             enum radeon_bo_domain domain);
 
     /**
      * Return TRUE if there is enough memory in VRAM and GTT for the relocs