radeonsi: implement EXT_window_rectangles

Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
diff --git a/docs/relnotes/18.3.0.html b/docs/relnotes/18.3.0.html
index 786145a..ac2cc1e 100644
--- a/docs/relnotes/18.3.0.html
+++ b/docs/relnotes/18.3.0.html
@@ -52,6 +52,7 @@
 
 <ul>
 <li>GL_AMD_framebuffer_multisample_advanced on radeonsi.</li>
+<li>GL_EXT_window_rectangles on radeonsi.</li>
 </ul>
 
 <h2>Bug fixes</h2>
diff --git a/src/amd/common/sid.h b/src/amd/common/sid.h
index 3c3bc54..0671f7d 100644
--- a/src/amd/common/sid.h
+++ b/src/amd/common/sid.h
@@ -5282,6 +5282,22 @@
 #define   S_02820C_CLIP_RULE(x)                                       (((unsigned)(x) & 0xFFFF) << 0)
 #define   G_02820C_CLIP_RULE(x)                                       (((x) >> 0) & 0xFFFF)
 #define   C_02820C_CLIP_RULE                                          0xFFFF0000
+#define     V_02820C_OUT                                            0x0001
+#define     V_02820C_IN_0                                           0x0002
+#define     V_02820C_IN_1                                           0x0004
+#define     V_02820C_IN_10                                          0x0008
+#define     V_02820C_IN_2                                           0x0010
+#define     V_02820C_IN_20                                          0x0020
+#define     V_02820C_IN_21                                          0x0040
+#define     V_02820C_IN_210                                         0x0080
+#define     V_02820C_IN_3                                           0x0100
+#define     V_02820C_IN_30                                          0x0200
+#define     V_02820C_IN_31                                          0x0400
+#define     V_02820C_IN_310                                         0x0800
+#define     V_02820C_IN_32                                          0x1000
+#define     V_02820C_IN_320                                         0x2000
+#define     V_02820C_IN_321                                         0x4000
+#define     V_02820C_IN_3210                                        0x8000
 #define R_028210_PA_SC_CLIPRECT_0_TL                                    0x028210
 #define   S_028210_TL_X(x)                                            (((unsigned)(x) & 0x7FFF) << 0)
 #define   G_028210_TL_X(x)                                            (((x) >> 0) & 0x7FFF)
diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c
index 0fd69f3..cf64952 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -59,6 +59,10 @@
 		util_blitter_save_fragment_shader(sctx->blitter, sctx->ps_shader.cso);
 		util_blitter_save_sample_mask(sctx->blitter, sctx->sample_mask);
 		util_blitter_save_scissor(sctx->blitter, &sctx->scissors.states[0]);
+		util_blitter_save_window_rectangles(sctx->blitter,
+						    sctx->window_rectangles_include,
+						    sctx->num_window_rectangles,
+						    sctx->window_rectangles);
 	}
 
 	if (op & SI_SAVE_FRAMEBUFFER)
diff --git a/src/gallium/drivers/radeonsi/si_get.c b/src/gallium/drivers/radeonsi/si_get.c
index 6e80479..e316703 100644
--- a/src/gallium/drivers/radeonsi/si_get.c
+++ b/src/gallium/drivers/radeonsi/si_get.c
@@ -208,6 +208,7 @@
 	case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
 	case PIPE_CAP_MAX_VERTEX_STREAMS:
 	case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
+	case PIPE_CAP_MAX_WINDOW_RECTANGLES:
 		return 4;
 
 	case PIPE_CAP_GLSL_FEATURE_LEVEL:
@@ -243,7 +244,6 @@
 	case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
 	case PIPE_CAP_VERTEXID_NOBASE:
 	case PIPE_CAP_PRIMITIVE_RESTART_FOR_PATCHES:
-	case PIPE_CAP_MAX_WINDOW_RECTANGLES:
 	case PIPE_CAP_TGSI_MUL_ZERO_WINS:
 	case PIPE_CAP_UMA:
 	case PIPE_CAP_POLYGON_MODE_FILL_RECTANGLE:
diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c
index 9dbd4c6..c39564e 100644
--- a/src/gallium/drivers/radeonsi/si_gfx_cs.c
+++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c
@@ -278,6 +278,9 @@
 	si_mark_atom_dirty(ctx, &ctx->atoms.s.spi_map);
 	si_mark_atom_dirty(ctx, &ctx->atoms.s.streamout_enable);
 	si_mark_atom_dirty(ctx, &ctx->atoms.s.render_cond);
+	/* CLEAR_STATE disables all window rectangles. */
+	if (!has_clear_state || ctx->num_window_rectangles > 0)
+		si_mark_atom_dirty(ctx, &ctx->atoms.s.window_rectangles);
 	si_all_descriptors_begin_new_cs(ctx);
 	si_all_resident_buffers_begin_new_cs(ctx);
 
@@ -346,6 +349,7 @@
 		ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_GB_VERT_DISC_ADJ]	= 0x3f800000;
 		ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_GB_HORZ_CLIP_ADJ]	= 0x3f800000;
 		ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_GB_HORZ_DISC_ADJ]	= 0x3f800000;
+		ctx->tracked_regs.reg_value[SI_TRACKED_PA_SC_CLIPRECT_RULE]	= 0xffff;
 
 		/* Set all saved registers state to saved. */
 		ctx->tracked_regs.reg_saved = 0xffffffff;
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 2f77cc7..cfe28c2 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -804,6 +804,9 @@
 	struct si_scissors		scissors;
 	struct si_streamout		streamout;
 	struct si_viewports		viewports;
+	unsigned			num_window_rectangles;
+	bool				window_rectangles_include;
+	struct pipe_scissor_state	window_rectangles[4];
 
 	/* Precomputed states. */
 	struct si_pm4_state		*init_config;
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index fc1ec83..780d901 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -4872,7 +4872,6 @@
 	}
 
 	if (!has_clear_state) {
-		si_pm4_set_reg(pm4, R_02820C_PA_SC_CLIPRECT_RULE, 0xFFFF);
 		si_pm4_set_reg(pm4, R_028230_PA_SC_EDGERULE,
 			       S_028230_ER_TRI(0xA) |
 			       S_028230_ER_POINT(0xA) |
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index 1edf5d6..1a8f3de 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -213,6 +213,7 @@
 		struct si_atom stencil_ref;
 		struct si_atom spi_map;
 		struct si_atom scratch_state;
+		struct si_atom window_rectangles;
 	} s;
 	struct si_atom array[0];
 };
@@ -281,6 +282,8 @@
 	SI_TRACKED_PA_CL_GB_HORZ_CLIP_ADJ,
 	SI_TRACKED_PA_CL_GB_HORZ_DISC_ADJ,
 
+	SI_TRACKED_PA_SC_CLIPRECT_RULE,
+
 	SI_NUM_TRACKED_REGS,
 };
 
diff --git a/src/gallium/drivers/radeonsi/si_state_viewport.c b/src/gallium/drivers/radeonsi/si_state_viewport.c
index 4183be0..8dc68b1 100644
--- a/src/gallium/drivers/radeonsi/si_state_viewport.c
+++ b/src/gallium/drivers/radeonsi/si_state_viewport.c
@@ -435,12 +435,92 @@
 	    si_mark_atom_dirty(ctx, &ctx->atoms.s.viewports);
 }
 
+static void si_emit_window_rectangles(struct si_context *sctx)
+{
+	/* There are four clipping rectangles. Their corner coordinates are inclusive.
+	 * Every pixel is assigned a number from 0 and 15 by setting bits 0-3 depending
+	 * on whether the pixel is inside cliprects 0-3, respectively. For example,
+	 * if a pixel is inside cliprects 0 and 1, but outside 2 and 3, it is assigned
+	 * the number 3 (binary 0011).
+	 *
+	 * If CLIPRECT_RULE & (1 << number), the pixel is rasterized.
+	 */
+	struct radeon_cmdbuf *cs = sctx->gfx_cs;
+	static const unsigned outside[4] = {
+		/* outside rectangle 0 */
+		V_02820C_OUT |
+		V_02820C_IN_1 |
+		V_02820C_IN_2 |
+		V_02820C_IN_21 |
+		V_02820C_IN_3 |
+		V_02820C_IN_31 |
+		V_02820C_IN_32 |
+		V_02820C_IN_321,
+		/* outside rectangles 0, 1 */
+		V_02820C_OUT |
+		V_02820C_IN_2 |
+		V_02820C_IN_3 |
+		V_02820C_IN_32,
+		/* outside rectangles 0, 1, 2 */
+		V_02820C_OUT |
+		V_02820C_IN_3,
+		/* outside rectangles 0, 1, 2, 3 */
+		V_02820C_OUT,
+	};
+	const unsigned disabled = 0xffff; /* all inside and outside cases */
+	unsigned num_rectangles = sctx->num_window_rectangles;
+	struct pipe_scissor_state *rects = sctx->window_rectangles;
+	unsigned rule;
+
+	assert(num_rectangles <= 4);
+
+	if (num_rectangles == 0)
+		rule = disabled;
+	else if (sctx->window_rectangles_include)
+		rule = ~outside[num_rectangles - 1];
+	else
+		rule = outside[num_rectangles - 1];
+
+	radeon_opt_set_context_reg(sctx, R_02820C_PA_SC_CLIPRECT_RULE,
+				   SI_TRACKED_PA_SC_CLIPRECT_RULE, rule);
+	if (num_rectangles == 0)
+		return;
+
+	radeon_set_context_reg_seq(cs, R_028210_PA_SC_CLIPRECT_0_TL,
+				   num_rectangles * 2);
+	for (unsigned i = 0; i < num_rectangles; i++) {
+		radeon_emit(cs, S_028210_TL_X(rects[i].minx) |
+				S_028210_TL_Y(rects[i].miny));
+		radeon_emit(cs, S_028214_BR_X(rects[i].maxx) |
+				S_028214_BR_Y(rects[i].maxy));
+	}
+}
+
+static void si_set_window_rectangles(struct pipe_context *ctx,
+				     boolean include,
+				     unsigned num_rectangles,
+				     const struct pipe_scissor_state *rects)
+{
+	struct si_context *sctx = (struct si_context *)ctx;
+
+	sctx->num_window_rectangles = num_rectangles;
+	sctx->window_rectangles_include = include;
+	if (num_rectangles) {
+		memcpy(sctx->window_rectangles, rects,
+		       sizeof(*rects) * num_rectangles);
+	}
+
+	si_mark_atom_dirty(sctx, &sctx->atoms.s.window_rectangles);
+}
+
 void si_init_viewport_functions(struct si_context *ctx)
 {
 	ctx->atoms.s.guardband.emit = si_emit_guardband;
 	ctx->atoms.s.scissors.emit = si_emit_scissors;
 	ctx->atoms.s.viewports.emit = si_emit_viewport_states;
+	ctx->atoms.s.window_rectangles.emit = si_emit_window_rectangles;
 
 	ctx->b.set_scissor_states = si_set_scissor_states;
 	ctx->b.set_viewport_states = si_set_viewport_states;
+	ctx->b.set_window_rectangles = si_set_window_rectangles;
 }