r600g: use an enabled list to track enabled blocks.

At the end of flushing we were scanning over 450 blocks
with generally about 50 enabled. This reduces the scanning
to just the list of enabled blocks.

Signed-off-by: Dave Airlie <airlied@redhat.com>
diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h
index cc70600..b1444bf 100644
--- a/src/gallium/drivers/r600/r600.h
+++ b/src/gallium/drivers/r600/r600.h
@@ -188,6 +188,7 @@
 
 struct r600_block {
 	struct list_head	list;
+	struct list_head	enable_list;
 	unsigned		status;
 	unsigned                flags;
 	unsigned		start_offset;
@@ -251,6 +252,7 @@
 	unsigned		nblocks;
 	struct r600_block	**blocks;
 	struct list_head	dirty;
+	struct list_head	enable_list;
 	unsigned		pm4_ndwords;
 	unsigned		pm4_cdwords;
 	unsigned		pm4_dirty_cdwords;
diff --git a/src/gallium/winsys/r600/drm/evergreen_hw_context.c b/src/gallium/winsys/r600/drm/evergreen_hw_context.c
index 7072461..7a1be87 100644
--- a/src/gallium/winsys/r600/drm/evergreen_hw_context.c
+++ b/src/gallium/winsys/r600/drm/evergreen_hw_context.c
@@ -43,31 +43,31 @@
 static const struct r600_reg evergreen_config_reg_list[] = {
 	{R_008958_VGT_PRIMITIVE_TYPE, 0, 0, 0},
 	{R_008A14_PA_CL_ENHANCE, 0, 0, 0},
-	{R_008C00_SQ_CONFIG, 0, 0, 0},
-	{R_008C04_SQ_GPR_RESOURCE_MGMT_1, 0, 0, 0},
-	{R_008C08_SQ_GPR_RESOURCE_MGMT_2, 0, 0, 0},
-	{R_008C0C_SQ_THREAD_RESOURCE_MGMT, 0, 0, 0},
-	{R_008C18_SQ_THREAD_RESOURCE_MGMT_1, 0, 0, 0},
-	{R_008C1C_SQ_THREAD_RESOURCE_MGMT_2, 0, 0, 0},
-	{R_008C20_SQ_STACK_RESOURCE_MGMT_1, 0, 0, 0},
-	{R_008C24_SQ_STACK_RESOURCE_MGMT_2, 0, 0, 0},
-	{R_008C28_SQ_STACK_RESOURCE_MGMT_3, 0, 0, 0},
-	{R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0, 0, 0},
-	{R_009100_SPI_CONFIG_CNTL, 0, 0, 0},
-	{R_00913C_SPI_CONFIG_CNTL_1, 0, 0, 0},
+	{R_008C00_SQ_CONFIG, REG_FLAG_ENABLE_ALWAYS, 0, 0},
+	{R_008C04_SQ_GPR_RESOURCE_MGMT_1, REG_FLAG_ENABLE_ALWAYS, 0, 0},
+	{R_008C08_SQ_GPR_RESOURCE_MGMT_2, REG_FLAG_ENABLE_ALWAYS, 0, 0},
+	{R_008C0C_SQ_THREAD_RESOURCE_MGMT, REG_FLAG_ENABLE_ALWAYS, 0, 0},
+	{R_008C18_SQ_THREAD_RESOURCE_MGMT_1, REG_FLAG_ENABLE_ALWAYS, 0, 0},
+	{R_008C1C_SQ_THREAD_RESOURCE_MGMT_2, REG_FLAG_ENABLE_ALWAYS, 0, 0},
+	{R_008C20_SQ_STACK_RESOURCE_MGMT_1, REG_FLAG_ENABLE_ALWAYS, 0, 0},
+	{R_008C24_SQ_STACK_RESOURCE_MGMT_2, REG_FLAG_ENABLE_ALWAYS, 0, 0},
+	{R_008C28_SQ_STACK_RESOURCE_MGMT_3, REG_FLAG_ENABLE_ALWAYS, 0, 0},
+	{R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, REG_FLAG_ENABLE_ALWAYS, 0, 0},
+	{R_009100_SPI_CONFIG_CNTL, REG_FLAG_ENABLE_ALWAYS, 0, 0},
+	{R_00913C_SPI_CONFIG_CNTL_1, REG_FLAG_ENABLE_ALWAYS, 0, 0},
 };
 
 
 static const struct r600_reg cayman_config_reg_list[] = {
 	{R_008958_VGT_PRIMITIVE_TYPE, 0, 0, 0},
 	{R_008A14_PA_CL_ENHANCE, 0, 0, 0},
-	{R_008C00_SQ_CONFIG, 0, 0, 0},
-	{R_008C04_SQ_GPR_RESOURCE_MGMT_1, 0, 0, 0},
-	{CM_R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, 0, 0, 0},
-	{CM_R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2, 0, 0, 0},
-	{R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0, 0, 0},
-	{R_009100_SPI_CONFIG_CNTL, 0, 0, 0},
-	{R_00913C_SPI_CONFIG_CNTL_1, 0, 0, 0},
+	{R_008C00_SQ_CONFIG, REG_FLAG_ENABLE_ALWAYS, 0, 0},
+	{R_008C04_SQ_GPR_RESOURCE_MGMT_1, REG_FLAG_ENABLE_ALWAYS, 0, 0},
+	{CM_R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, REG_FLAG_ENABLE_ALWAYS, 0, 0},
+	{CM_R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2, REG_FLAG_ENABLE_ALWAYS, 0, 0},
+	{R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, REG_FLAG_ENABLE_ALWAYS, 0, 0},
+	{R_009100_SPI_CONFIG_CNTL, REG_FLAG_ENABLE_ALWAYS, 0, 0},
+	{R_00913C_SPI_CONFIG_CNTL_1, REG_FLAG_ENABLE_ALWAYS, 0, 0},
 };
 
 static const struct r600_reg evergreen_ctl_const_list[] = {
@@ -904,6 +904,10 @@
 	ctx->radeon = radeon;
 	LIST_INITHEAD(&ctx->query_list);
 
+	/* init dirty list */
+	LIST_INITHEAD(&ctx->dirty);
+	LIST_INITHEAD(&ctx->enable_list);
+
 	ctx->range = calloc(NUM_RANGES, sizeof(struct r600_range));
 	if (!ctx->range) {
 		r = -ENOMEM;
@@ -1007,8 +1011,6 @@
 
 	LIST_INITHEAD(&ctx->fenced_bo);
 
-	/* init dirty list */
-	LIST_INITHEAD(&ctx->dirty);
 	return 0;
 out_err:
 	r600_context_fini(ctx);
@@ -1048,6 +1050,7 @@
 	if (state == NULL) {
 		block->status &= ~(R600_BLOCK_STATUS_ENABLED | R600_BLOCK_STATUS_DIRTY);
 		LIST_DELINIT(&block->list);
+		LIST_DELINIT(&block->enable_list);
 		return;
 	}
 	dirty = block->status & R600_BLOCK_STATUS_DIRTY;
@@ -1086,6 +1089,7 @@
 	if (state == NULL) {
 		block->status &= ~(R600_BLOCK_STATUS_ENABLED | R600_BLOCK_STATUS_DIRTY);
 		LIST_DELINIT(&block->list);
+		LIST_DELINIT(&block->enable_list);
 		return;
 	}
 	if (state->nregs <= 3) {
diff --git a/src/gallium/winsys/r600/drm/r600_hw_context.c b/src/gallium/winsys/r600/drm/r600_hw_context.c
index 1fd6d34..c5551b2 100644
--- a/src/gallium/winsys/r600/drm/r600_hw_context.c
+++ b/src/gallium/winsys/r600/drm/r600_hw_context.c
@@ -98,11 +98,17 @@
 	block->nreg_dirty = n;
 	block->flags = 0;
 	LIST_INITHEAD(&block->list);
+	LIST_INITHEAD(&block->enable_list);
 
 	for (j = 0; j < n; j++) {
 		if (reg[i+j].flags & REG_FLAG_DIRTY_ALWAYS) {
 			block->flags |= REG_FLAG_DIRTY_ALWAYS;
 		}
+		if (reg[i+j].flags & REG_FLAG_ENABLE_ALWAYS) {
+			block->status |= R600_BLOCK_STATUS_ENABLED;
+			LIST_ADDTAIL(&block->enable_list, &ctx->enable_list);
+		}
+
 		if (reg[i+j].flags & REG_FLAG_NEED_BO) {
 			block->nbo++;
 			assert(block->nbo < R600_BLOCK_MAX_BO);
@@ -184,6 +190,7 @@
 		}
 
 		r600_init_block(ctx, block, reg, i, n, opcode, offset_base);
+
 	}
 	return 0;
 }
@@ -768,6 +775,10 @@
 	ctx->radeon = radeon;
 	LIST_INITHEAD(&ctx->query_list);
 
+	/* init dirty list */
+	LIST_INITHEAD(&ctx->dirty);
+	LIST_INITHEAD(&ctx->enable_list);
+
 	ctx->range = calloc(NUM_RANGES, sizeof(struct r600_range));
 	if (!ctx->range) {
 		r = -ENOMEM;
@@ -861,9 +872,6 @@
 
 	LIST_INITHEAD(&ctx->fenced_bo);
 
-	/* init dirty list */
-	LIST_INITHEAD(&ctx->dirty);
-
 	ctx->max_db = 4;
 
 	return 0;
@@ -985,17 +993,20 @@
 		r600_context_dirty_block(ctx, block, dirty, id);
 }
 
-void r600_context_dirty_block(struct r600_context *ctx, struct r600_block *block,
+void r600_context_dirty_block(struct r600_context *ctx,
+			      struct r600_block *block,
 			      int dirty, int index)
 {
 	if ((index + 1) > block->nreg_dirty)
 		block->nreg_dirty = index + 1;
 
 	if ((dirty != (block->status & R600_BLOCK_STATUS_DIRTY)) || !(block->status & R600_BLOCK_STATUS_ENABLED)) {
-
-		block->status |= R600_BLOCK_STATUS_ENABLED;
 		block->status |= R600_BLOCK_STATUS_DIRTY;
 		ctx->pm4_dirty_cdwords += block->pm4_ndwords + block->pm4_flush_ndwords;
+		if (!(block->status & R600_BLOCK_STATUS_ENABLED)) {
+			block->status |= R600_BLOCK_STATUS_ENABLED;
+			LIST_ADDTAIL(&block->enable_list, &ctx->enable_list);
+		}
 		LIST_ADDTAIL(&block->list,&ctx->dirty);
 	}
 }
@@ -1052,6 +1063,7 @@
 		r600_bo_reference(ctx->radeon, &block->reloc[1].bo, NULL);
 		r600_bo_reference(ctx->radeon , &block->reloc[2].bo, NULL);
 		LIST_DELINIT(&block->list);
+		LIST_DELINIT(&block->enable_list);
 		return;
 	}
 
@@ -1143,6 +1155,7 @@
 	if (state == NULL) {
 		block->status &= ~(R600_BLOCK_STATUS_ENABLED | R600_BLOCK_STATUS_DIRTY);
 		LIST_DELINIT(&block->list);
+		LIST_DELINIT(&block->enable_list);
 		return;
 	}
 	dirty = block->status & R600_BLOCK_STATUS_DIRTY;
@@ -1180,6 +1193,7 @@
 	if (state == NULL) {
 		block->status &= ~(R600_BLOCK_STATUS_ENABLED | R600_BLOCK_STATUS_DIRTY);
 		LIST_DELINIT(&block->list);
+		LIST_DELINIT(&block->enable_list);
 		return;
 	}
 	if (state->nregs <= 3) {
@@ -1407,6 +1421,7 @@
 	uint64_t chunk_array[2];
 	unsigned fence;
 	int r;
+	struct r600_block *enable_block = NULL, *next_block;
 
 	if (!ctx->pm4_cdwords)
 		return;
@@ -1480,15 +1495,14 @@
 	/* set all valid group as dirty so they get reemited on
 	 * next draw command
 	 */
-	for (int i = 0; i < ctx->nblocks; i++) {
-		if (ctx->blocks[i]->status & R600_BLOCK_STATUS_ENABLED) {
-			if(!(ctx->blocks[i]->status & R600_BLOCK_STATUS_DIRTY)) {
-				LIST_ADDTAIL(&ctx->blocks[i]->list,&ctx->dirty);
-			}
-			ctx->pm4_dirty_cdwords += ctx->blocks[i]->pm4_ndwords + ctx->blocks[i]->pm4_flush_ndwords;
-			ctx->blocks[i]->status |= R600_BLOCK_STATUS_DIRTY;
-			ctx->blocks[i]->nreg_dirty = ctx->blocks[i]->nreg;
+	LIST_FOR_EACH_ENTRY(enable_block, &ctx->enable_list, enable_list) {
+		if(!(enable_block->status & R600_BLOCK_STATUS_DIRTY)) {
+			LIST_ADDTAIL(&enable_block->list,&ctx->dirty);
 		}
+		ctx->pm4_dirty_cdwords += enable_block->pm4_ndwords + 
+			enable_block->pm4_flush_ndwords;
+		enable_block->status |= R600_BLOCK_STATUS_DIRTY;
+		enable_block->nreg_dirty = enable_block->nreg;
 	}
 }
 
diff --git a/src/gallium/winsys/r600/drm/r600_priv.h b/src/gallium/winsys/r600/drm/r600_priv.h
index 3e0fd6d..d9cb524 100644
--- a/src/gallium/winsys/r600/drm/r600_priv.h
+++ b/src/gallium/winsys/r600/drm/r600_priv.h
@@ -66,6 +66,7 @@
 #define REG_FLAG_DIRTY_ALWAYS 2
 #define REG_FLAG_RV6XX_SBU 4
 #define REG_FLAG_NOT_R600 8
+#define REG_FLAG_ENABLE_ALWAYS 16
 
 struct r600_reg {
 	unsigned			offset;