freedreno/a6xx: cache flush harder

Signed-off-by: Rob Clark <robdclark@gmail.com>
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_compute.c b/src/gallium/drivers/freedreno/a6xx/fd6_compute.c
index fd6b2d0..a3f7d53 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_compute.c
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_compute.c
@@ -199,6 +199,8 @@
 	}
 
 	OUT_WFI5(ring);
+
+	fd6_cache_flush(ctx->batch, ring);
 }
 
 void
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_draw.c b/src/gallium/drivers/freedreno/a6xx/fd6_draw.c
index 1c703a2..adbdef2 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_draw.c
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_draw.c
@@ -368,7 +368,7 @@
 	fd6_event_write(batch, ring, FACENESS_FLUSH, true);
 	fd6_event_write(batch, ring, CACHE_FLUSH_TS, true);
 
-	fd6_cache_flush(batch, ring);
+	fd6_cache_inv(batch, ring);
 }
 
 static bool is_z32(enum pipe_format format)
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c
index bf9aed6..daa06d2 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c
@@ -1055,7 +1055,7 @@
 {
 	//struct fd_context *ctx = batch->ctx;
 
-	fd6_cache_flush(batch, ring);
+	fd6_cache_inv(batch, ring);
 
 	OUT_PKT4(ring, REG_A6XX_HLSQ_UPDATE_CNTL, 1);
 	OUT_RING(ring, 0xfffff);
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_emit.h b/src/gallium/drivers/freedreno/a6xx/fd6_emit.h
index ab7ace4..6c02bd9 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_emit.h
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.h
@@ -119,25 +119,53 @@
 	g->enable_mask = enable_mask;
 }
 
-static inline void
+static inline unsigned
 fd6_event_write(struct fd_batch *batch, struct fd_ringbuffer *ring,
 		enum vgt_event_type evt, bool timestamp)
 {
+	unsigned seqno = 0;
+
 	fd_reset_wfi(batch);
 
 	OUT_PKT7(ring, CP_EVENT_WRITE, timestamp ? 4 : 1);
 	OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(evt));
 	if (timestamp) {
 		struct fd6_context *fd6_ctx = fd6_context(batch->ctx);
+		seqno = ++fd6_ctx->seqno;
 		OUT_RELOCW(ring, fd6_ctx->blit_mem, 0, 0, 0);  /* ADDR_LO/HI */
-		OUT_RING(ring, ++fd6_ctx->seqno);
+		OUT_RING(ring, seqno);
 	}
+
+	return seqno;
+}
+
+static inline void
+fd6_cache_inv(struct fd_batch *batch, struct fd_ringbuffer *ring)
+{
+	fd6_event_write(batch, ring, 0x31, false);
 }
 
 static inline void
 fd6_cache_flush(struct fd_batch *batch, struct fd_ringbuffer *ring)
 {
-	fd6_event_write(batch, ring, 0x31, false);
+	struct fd6_context *fd6_ctx = fd6_context(batch->ctx);
+	unsigned seqno;
+
+	seqno = fd6_event_write(batch, ring, CACHE_FLUSH_AND_INV_EVENT, true);
+
+	OUT_PKT7(ring, CP_WAIT_REG_MEM, 6);
+	OUT_RING(ring, 0x00000013);
+	OUT_RELOC(ring, fd6_ctx->blit_mem, 0, 0, 0);
+	OUT_RING(ring, seqno);
+	OUT_RING(ring, 0xffffffff);
+	OUT_RING(ring, 0x00000010);
+
+	seqno = fd6_event_write(batch, ring, CACHE_FLUSH_TS, true);
+
+	OUT_PKT7(ring, CP_UNK_A6XX_14, 4);
+	OUT_RING(ring, 0x00000000);
+	OUT_RELOC(ring, fd6_ctx->blit_mem, 0, 0, 0);
+	OUT_RING(ring, seqno);
 }
 
 static inline void
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c b/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c
index 3c9df92..fa1090d 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c
@@ -463,7 +463,7 @@
 	if (batch->lrz_clear)
 		fd6_emit_ib(ring, batch->lrz_clear);
 
-	fd6_cache_flush(batch, ring);
+	fd6_cache_inv(batch, ring);
 
 	prepare_tile_setup_ib(batch);
 	prepare_tile_fini_ib(batch);
@@ -1045,7 +1045,7 @@
 	OUT_RING(ring, 0x0);
 
 	fd6_event_write(batch, ring, PC_CCU_INVALIDATE_COLOR, false);
-	fd6_cache_flush(batch, ring);
+	fd6_cache_inv(batch, ring);
 
 #if 0
 	OUT_PKT4(ring, REG_A6XX_PC_POWER_CNTL, 1);