radeonsi: implement and rely on set_active_query_state

Reviewed-by: Edward O'Callaghan <eocallaghan@alterapraxis.com>
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c
index c5ea8b1..aed783f 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -52,8 +52,6 @@
 {
 	struct si_context *sctx = (struct si_context *)ctx;
 
-	r600_suspend_nontimer_queries(&sctx->b);
-
 	util_blitter_save_vertex_buffer_slot(sctx->blitter, sctx->vertex_buffer);
 	util_blitter_save_vertex_elements(sctx->blitter, sctx->vertex_elements);
 	util_blitter_save_vertex_shader(sctx->blitter, sctx->vs_shader.cso);
@@ -95,7 +93,6 @@
 	struct si_context *sctx = (struct si_context *)ctx;
 
 	sctx->b.render_cond_force_off = false;
-	r600_resume_nontimer_queries(&sctx->b);
 }
 
 static unsigned u_max_sample(struct pipe_resource *r)
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 4158fc5..8fcfcd2 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -66,6 +66,9 @@
 /* Compute only. */
 #define SI_CONTEXT_FLUSH_WITH_INV_L2	(R600_CONTEXT_PRIVATE_FLAG << 13) /* TODO: merge with TC? */
 #define SI_CONTEXT_FLAG_COMPUTE		(R600_CONTEXT_PRIVATE_FLAG << 14)
+/* Pipeline & streamout query controls. */
+#define SI_CONTEXT_START_PIPELINE_STATS	(R600_CONTEXT_PRIVATE_FLAG << 15)
+#define SI_CONTEXT_STOP_PIPELINE_STATS	(R600_CONTEXT_PRIVATE_FLAG << 16)
 
 #define SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER (SI_CONTEXT_FLUSH_AND_INV_CB | \
 					      SI_CONTEXT_FLUSH_AND_INV_CB_META | \
@@ -289,6 +292,7 @@
 	bool			db_stencil_clear;
 	bool			db_stencil_disable_expclear;
 	unsigned		ps_db_shader_control;
+	bool			occlusion_queries_disabled;
 
 	/* Emitted draw state. */
 	int			last_base_vertex;
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 8087d23..6e40671 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -1348,6 +1348,26 @@
 
 /* DB RENDER STATE */
 
+static void si_set_active_query_state(struct pipe_context *ctx, boolean enable)
+{
+	struct si_context *sctx = (struct si_context*)ctx;
+
+	/* Pipeline stat & streamout queries. */
+	if (enable) {
+		sctx->b.flags &= ~SI_CONTEXT_STOP_PIPELINE_STATS;
+		sctx->b.flags |= SI_CONTEXT_START_PIPELINE_STATS;
+	} else {
+		sctx->b.flags &= ~SI_CONTEXT_START_PIPELINE_STATS;
+		sctx->b.flags |= SI_CONTEXT_STOP_PIPELINE_STATS;
+	}
+
+	/* Occlusion queries. */
+	if (sctx->occlusion_queries_disabled != !enable) {
+		sctx->occlusion_queries_disabled = !enable;
+		si_mark_atom_dirty(sctx, &sctx->db_render_state);
+	}
+}
+
 static void si_set_occlusion_query_state(struct pipe_context *ctx, bool enable)
 {
 	struct si_context *sctx = (struct si_context*)ctx;
@@ -1382,7 +1402,8 @@
 	}
 
 	/* DB_COUNT_CONTROL (occlusion queries) */
-	if (sctx->b.num_occlusion_queries > 0) {
+	if (sctx->b.num_occlusion_queries > 0 &&
+	    !sctx->occlusion_queries_disabled) {
 		bool perfect = sctx->b.num_perfect_occlusion_queries > 0;
 
 		if (sctx->b.chip_class >= CIK) {
@@ -3740,6 +3761,7 @@
 	sctx->b.b.set_min_samples = si_set_min_samples;
 	sctx->b.b.set_tess_state = si_set_tess_state;
 
+	sctx->b.b.set_active_query_state = si_set_active_query_state;
 	sctx->b.set_occlusion_query_state = si_set_occlusion_query_state;
 	sctx->b.need_gfx_cs_space = si_need_gfx_cs_space;
 
@@ -3970,6 +3992,14 @@
 	si_pm4_cmd_add(pm4, 0x80000000);
 	si_pm4_cmd_end(pm4, false);
 
+	/* This enables pipeline stat & streamout queries.
+	 * They are only disabled by blits.
+	 */
+	si_pm4_cmd_begin(pm4, PKT3_EVENT_WRITE);
+	si_pm4_cmd_add(pm4, EVENT_TYPE(V_028A90_PIPELINESTAT_START) |
+		            EVENT_INDEX(0));
+	si_pm4_cmd_end(pm4, false);
+
 	si_pm4_set_reg(pm4, R_028A18_VGT_HOS_MAX_TESS_LEVEL, fui(64));
 	si_pm4_set_reg(pm4, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, fui(0));
 
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index 84b850a..ece0c6d 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -722,6 +722,16 @@
 		}
 	}
 
+	if (sctx->flags & SI_CONTEXT_START_PIPELINE_STATS) {
+		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+		radeon_emit(cs, EVENT_TYPE(V_028A90_PIPELINESTAT_START) |
+			        EVENT_INDEX(0));
+	} else if (sctx->flags & SI_CONTEXT_STOP_PIPELINE_STATS) {
+		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+		radeon_emit(cs, EVENT_TYPE(V_028A90_PIPELINESTAT_STOP) |
+			        EVENT_INDEX(0));
+	}
+
 	sctx->flags = 0;
 }