r600g: set read/write usage flags for each relocation

This takes advantage of the new GEM_WAIT ioctl when mapping buffers.

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
diff --git a/src/gallium/winsys/r600/drm/evergreen_hw_context.c b/src/gallium/winsys/r600/drm/evergreen_hw_context.c
index eaf4618..30bb0b8 100644
--- a/src/gallium/winsys/r600/drm/evergreen_hw_context.c
+++ b/src/gallium/winsys/r600/drm/evergreen_hw_context.c
@@ -1186,7 +1186,7 @@
 		pm4[7] = draw->vgt_num_indices;
 		pm4[8] = draw->vgt_draw_initiator;
 		pm4[9] = PKT3(PKT3_NOP, 0, ctx->predicate_drawing);
-		pm4[10] = r600_context_bo_reloc(ctx, draw->indices);
+		pm4[10] = r600_context_bo_reloc(ctx, draw->indices, RADEON_USAGE_READ);
 	} else {
 		pm4[4] = PKT3(PKT3_DRAW_INDEX_AUTO, 1, ctx->predicate_drawing);
 		pm4[5] = draw->vgt_num_indices;
diff --git a/src/gallium/winsys/r600/drm/r600_hw_context.c b/src/gallium/winsys/r600/drm/r600_hw_context.c
index ba8d6c2..6c5b4b8 100644
--- a/src/gallium/winsys/r600/drm/r600_hw_context.c
+++ b/src/gallium/winsys/r600/drm/r600_hw_context.c
@@ -84,7 +84,7 @@
 		ctx->pm4[ctx->pm4_cdwords++] = 0;
 
 		ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0);
-		ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, buffer);
+		ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, buffer, RADEON_USAGE_WRITE);
 
 		/* execute */
 		r600_context_flush(ctx, 0);
@@ -986,7 +986,7 @@
 		ctx->pm4[ctx->pm4_cdwords++] = 0x00000000;
 		ctx->pm4[ctx->pm4_cdwords++] = 0x0000000A;
 		ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, ctx->predicate_drawing);
-		ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, bo);
+		ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, bo, RADEON_USAGE_WRITE);
 	}
 	bo->last_flush = (bo->last_flush | flush_flags) & flush_mask;
 }
@@ -1067,6 +1067,7 @@
 			/* find relocation */
 			reloc_id = block->pm4_bo_index[id];
 			r600_bo_reference(&block->reloc[reloc_id].bo, reg->bo);
+			block->reloc[reloc_id].bo_usage = reg->bo_usage;
 			/* always force dirty for relocs for now */
 			dirty |= R600_BLOCK_STATUS_DIRTY;
 		}
@@ -1140,11 +1141,14 @@
 			 * we have single case btw VERTEX & TEXTURE resource
 			 */
 			r600_bo_reference(&block->reloc[1].bo, state->bo[0]);
+			block->reloc[1].bo_usage = state->bo_usage[0];
 			r600_bo_reference(&block->reloc[2].bo, NULL);
 		} else {
 			/* TEXTURE RESOURCE */
 			r600_bo_reference(&block->reloc[1].bo, state->bo[0]);
+			block->reloc[1].bo_usage = state->bo_usage[0];
 			r600_bo_reference(&block->reloc[2].bo, state->bo[1]);
+			block->reloc[2].bo_usage = state->bo_usage[1];
 			state->bo[0]->binding |= BO_BOUND_TEXTURE;
 		}
 
@@ -1279,7 +1283,6 @@
 
 void r600_context_block_emit_dirty(struct r600_context *ctx, struct r600_block *block)
 {
-	int id;
 	int optional = block->nbo == 0 && !(block->flags & REG_FLAG_DIRTY_ALWAYS);
 	int cp_dwords = block->pm4_ndwords, start_dword = 0;
 	int new_dwords = 0;
@@ -1295,13 +1298,13 @@
 		for (int j = 0; j < block->nreg; j++) {
 			if (block->pm4_bo_index[j]) {
 				/* find relocation */
-				id = block->pm4_bo_index[j];
-				block->pm4[block->reloc[id].bo_pm4_index] =
-					r600_context_bo_reloc(ctx, block->reloc[id].bo);
+				struct r600_block_reloc *reloc = &block->reloc[block->pm4_bo_index[j]];
+				block->pm4[reloc->bo_pm4_index] =
+					r600_context_bo_reloc(ctx, reloc->bo, reloc->bo_usage);
 				r600_context_bo_flush(ctx,
-						      block->reloc[id].flush_flags,
-						      block->reloc[id].flush_mask,
-						      block->reloc[id].bo);
+						      reloc->flush_flags,
+						      reloc->flush_mask,
+						      reloc->bo);
 				nbo--;
 				if (nbo == 0)
 					break;
@@ -1335,7 +1338,6 @@
 
 void r600_context_block_resource_emit_dirty(struct r600_context *ctx, struct r600_block *block)
 {
-	int id;
 	int cp_dwords = block->pm4_ndwords;
 	int nbo = block->nbo;
 
@@ -1349,13 +1351,13 @@
 	for (int j = 0; j < nbo; j++) {
 		if (block->pm4_bo_index[j]) {
 			/* find relocation */
-			id = block->pm4_bo_index[j];
-			block->pm4[block->reloc[id].bo_pm4_index] =
-				r600_context_bo_reloc(ctx, block->reloc[id].bo);
+			struct r600_block_reloc *reloc = &block->reloc[block->pm4_bo_index[j]];
+			block->pm4[reloc->bo_pm4_index] =
+				r600_context_bo_reloc(ctx, reloc->bo, reloc->bo_usage);
 			r600_context_bo_flush(ctx,
-					      block->reloc[id].flush_flags,
-					      block->reloc[id].flush_mask,
-					      block->reloc[id].bo);
+					      reloc->flush_flags,
+					      reloc->flush_mask,
+					      reloc->bo);
 		}
 	}
 	ctx->flags &= ~R600_CONTEXT_CHECK_EVENT_FLUSH;
@@ -1466,7 +1468,7 @@
 		pm4[7] = draw->vgt_num_indices;
 		pm4[8] = draw->vgt_draw_initiator;
 		pm4[9] = PKT3(PKT3_NOP, 0, ctx->predicate_drawing);
-		pm4[10] = r600_context_bo_reloc(ctx, draw->indices);
+		pm4[10] = r600_context_bo_reloc(ctx, draw->indices, RADEON_USAGE_READ);
 	} else {
 		pm4[4] = PKT3(PKT3_DRAW_INDEX_AUTO, 1, ctx->predicate_drawing);
 		pm4[5] = draw->vgt_num_indices;
@@ -1561,7 +1563,7 @@
 	ctx->pm4[ctx->pm4_cdwords++] = value;                   /* DATA_LO */
 	ctx->pm4[ctx->pm4_cdwords++] = 0;                       /* DATA_HI */
 	ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0);
-	ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, fence_bo);
+	ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, fence_bo, RADEON_USAGE_WRITE);
 }
 
 static boolean r600_query_result(struct r600_context *ctx, struct r600_query *query, boolean wait)
@@ -1672,7 +1674,7 @@
 		ctx->pm4[ctx->pm4_cdwords++] = 0;
 	}
 	ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0);
-	ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, query->buffer);
+	ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, query->buffer, RADEON_USAGE_WRITE);
 
 	query->state |= R600_QUERY_STATE_STARTED;
 	query->state ^= R600_QUERY_STATE_ENDED;
@@ -1696,7 +1698,7 @@
 		ctx->pm4[ctx->pm4_cdwords++] = 0;
 	}
 	ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0);
-	ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, query->buffer);
+	ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, query->buffer, RADEON_USAGE_WRITE);
 
 	query->results_end += query->result_size;
 	if (query->results_end >= query->buffer_size)
@@ -1741,7 +1743,8 @@
 			ctx->pm4[ctx->pm4_cdwords++] = results_base;
 			ctx->pm4[ctx->pm4_cdwords++] = op;
 			ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0);
-			ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, query->buffer);
+			ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, query->buffer,
+									     RADEON_USAGE_READ);
 			results_base += query->result_size;
 			if (results_base >= query->buffer_size)
 				results_base = 0;
diff --git a/src/gallium/winsys/r600/drm/r600_priv.h b/src/gallium/winsys/r600/drm/r600_priv.h
index c5b82fd..1e90189 100644
--- a/src/gallium/winsys/r600/drm/r600_priv.h
+++ b/src/gallium/winsys/r600/drm/r600_priv.h
@@ -95,11 +95,17 @@
 void r600_init_cs(struct r600_context *ctx);
 int r600_resource_init(struct r600_context *ctx, struct r600_range *range, unsigned offset, unsigned nblocks, unsigned stride, struct r600_reg *reg, int nreg, unsigned offset_base);
 
-static INLINE unsigned r600_context_bo_reloc(struct r600_context *ctx, struct r600_bo *rbo)
+static INLINE unsigned r600_context_bo_reloc(struct r600_context *ctx, struct r600_bo *rbo,
+					     enum radeon_bo_usage usage)
 {
+	enum radeon_bo_domain rd = usage & RADEON_USAGE_READ ? rbo->domains : 0;
+	enum radeon_bo_domain wd = usage & RADEON_USAGE_WRITE ? rbo->domains : 0;
+
+	assert(usage);
+
 	unsigned reloc_index =
 		ctx->radeon->ws->cs_add_reloc(ctx->cs, rbo->cs_buf,
-					      rbo->domains, rbo->domains);
+					      rd, wd);
 
 	if (reloc_index >= ctx->creloc)
 		ctx->creloc = reloc_index+1;