r600g: improve bo flushing

Flush read cache before writting register. Track flushing inside
of a same cs and avoid reflushing same bo if not necessary. Allmost
properly force flush if bo rendered too and then use as a texture
in same cs (missing pipeline flush dunno if it's needed or not).

Signed-off-by: Jerome Glisse <jglisse@redhat.com>
diff --git a/src/gallium/winsys/r600/drm/r600_priv.h b/src/gallium/winsys/r600/drm/r600_priv.h
index 4619207..f6ceb0a 100644
--- a/src/gallium/winsys/r600/drm/r600_priv.h
+++ b/src/gallium/winsys/r600/drm/r600_priv.h
@@ -53,6 +53,7 @@
 	unsigned			offset;
 	unsigned			need_bo;
 	unsigned			flush_flags;
+	unsigned			flush_mask;
 };
 
 struct radeon_bo {
@@ -68,6 +69,7 @@
 	boolean				set_busy;
 	struct r600_reloc		*reloc;
 	unsigned			reloc_id;
+	unsigned			last_flush;
 };
 
 struct r600_bo {
@@ -102,6 +104,8 @@
 
 /* r600_hw_context.c */
 void r600_context_bo_reloc(struct r600_context *ctx, u32 *pm4, struct r600_bo *rbo);
+void r600_context_bo_flush(struct r600_context *ctx, unsigned flush_flags,
+				unsigned flush_mask, struct r600_bo *rbo);
 struct r600_bo *r600_context_reg_bo(struct r600_context *ctx, unsigned offset);
 int r600_context_add_block(struct r600_context *ctx, const struct r600_reg *reg, unsigned nreg);
 
@@ -148,6 +152,10 @@
 				r600_context_bo_reloc(ctx,
 					&block->pm4[block->reloc[id].bo_pm4_index[k]],
 					block->reloc[id].bo);
+				r600_context_bo_flush(ctx,
+							block->reloc[id].flush_flags,
+							block->reloc[id].flush_mask,
+							block->reloc[id].bo);
 			}
 		}
 	}