i965: Stop lying about cpp and height of a stencil buffer.
When using a separate stencil buffer, i965 requires that the pitch of
the buffer (in the 3DSTATE_STENCIL_BUFFER command) be specified as 2x
the actual pitch.
Previously this was accomplished by doubling the "cpp" and "pitch"
values stored in the intel_region data structure, and halving the
height. However, this was confusing, and it led to a subtle (but
benign) bug: since a stencil buffer is W-tiled, its true height must
be aligned to a multiple of 64; we were accidentally aligning its faux
height to a multiple of 64, causing memory to be wasted.
Note that for window system stencil buffers, the DDX also doubles the
cpp and pitch values. To facilitate fixing this DDX server bug in the
future, we fix the cpp and pitch values we receive from the X server
only if cpp has the "incorrect" value of 2.
Acked-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Chad Versace <chad.versace@linux.intel.com>
v2: Clarify comments about the DDX.
diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c
index 3479333..62bcc93 100644
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -326,10 +326,6 @@
* 3DSTATE_DEPTH_BUFFER: namely the tile walk, surface type, width, and
* height.
*
- * Since the stencil buffer has quirky pitch requirements, its region
- * was allocated with half height and double cpp. So we need
- * a multiplier of 2 to obtain the surface's real height.
- *
* Enable the hiz bit because it and the separate stencil bit must have
* the same value. From Section 2.11.5.6.1.1 3DSTATE_DEPTH_BUFFER, Bit
* 1.21 "Separate Stencil Enable":
@@ -435,7 +431,12 @@
struct intel_region *region = stencil_mt->region;
BEGIN_BATCH(3);
OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2));
- OUT_BATCH(region->pitch * region->cpp - 1);
+ /* The stencil buffer has quirky pitch requirements. From Vol 2a,
+ * 11.5.6.2.1 3DSTATE_STENCIL_BUFFER, field "Surface Pitch":
+ * The pitch must be set to 2x the value computed based on width, as
+ * the stencil buffer is stored with two rows interleaved.
+ */
+ OUT_BATCH(2 * region->pitch * region->cpp - 1);
OUT_RELOC(region->bo,
I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
0);
diff --git a/src/mesa/drivers/dri/i965/gen7_misc_state.c b/src/mesa/drivers/dri/i965/gen7_misc_state.c
index 5da6434..3a6144f 100644
--- a/src/mesa/drivers/dri/i965/gen7_misc_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_misc_state.c
@@ -143,8 +143,22 @@
BEGIN_BATCH(3);
OUT_BATCH(GEN7_3DSTATE_STENCIL_BUFFER << 16 | (3 - 2));
+ /* The stencil buffer has quirky pitch requirements. From the Graphics
+ * BSpec: vol2a.11 3D Pipeline Windower > Early Depth/Stencil Processing
+ * > Depth/Stencil Buffer State > 3DSTATE_STENCIL_BUFFER [DevIVB+],
+ * field "Surface Pitch":
+ *
+ * The pitch must be set to 2x the value computed based on width, as
+ * the stencil buffer is stored with two rows interleaved.
+ *
+ * (Note that it is not 100% clear whether this intended to apply to
+ * Gen7; the BSpec flags this comment as "DevILK,DevSNB" (which would
+ * imply that it doesn't), however the comment appears on a "DevIVB+"
+ * page (which would imply that it does). Experiments with the hardware
+ * indicate that it does.
+ */
OUT_BATCH(enabled |
- (stencil_mt->region->pitch * stencil_mt->region->cpp - 1));
+ (2 * stencil_mt->region->pitch * stencil_mt->region->cpp - 1));
OUT_RELOC(stencil_mt->region->bo,
I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
0);
diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c
index 16a9887..b8472b6 100644
--- a/src/mesa/drivers/dri/intel/intel_context.c
+++ b/src/mesa/drivers/dri/intel/intel_context.c
@@ -1251,17 +1251,35 @@
int buffer_width;
int buffer_height;
+ int buffer_cpp = buffer->cpp;
+ int buffer_pitch = buffer->pitch;
if (buffer->attachment == __DRI_BUFFER_STENCIL) {
- /* The stencil buffer has quirky pitch requirements. From Section
- * 2.11.5.6.2.1 3DSTATE_STENCIL_BUFFER, field "Surface Pitch":
- * The pitch must be set to 2x the value computed based on width, as
- * the stencil buffer is stored with two rows interleaved.
+ /* Stencil buffers use W tiling, a tiling format that the DRM functions
+ * don't properly account for. Therefore, when we allocate a stencil
+ * buffer that is private to Mesa (see intel_miptree_create), we round
+ * the height and width up to the next multiple of the tile size (64x64)
+ * and then ask DRM to allocate an untiled buffer. Consequently, the
+ * height and the width stored in the stencil buffer's region structure
+ * are always multiples of 64, even if the stencil buffer itself is
+ * smaller.
*
- * To satisfy the pitch requirement, the X driver allocated the region
- * with the following dimensions.
+ * To avoid inconsistencies between how we represent private buffers and
+ * buffers shared with the window system, round up the height and width
+ * for window system buffers too.
*/
buffer_width = ALIGN(drawable->w, 64);
- buffer_height = ALIGN(ALIGN(drawable->h, 2) / 2, 64);
+ buffer_height = ALIGN(drawable->h, 64);
+
+ /* Versions 2.17.0 and earlier of xf86-video-intel (a.k.a. the DDX) lie
+ * and send cpp and pitch values that are two times too large for
+ * stencil buffers. Hopefully this will be fixed in a future version
+ * of xf86-video-intel, but for now detect the bug by checking if cpp
+ * is 2, and fixing cpp and pitch if it is.
+ */
+ if (buffer_cpp == 2) {
+ buffer_cpp = 1;
+ buffer_pitch /= 2;
+ }
} else {
buffer_width = drawable->w;
buffer_height = drawable->h;
@@ -1279,10 +1297,10 @@
struct intel_region *region =
intel_region_alloc_for_handle(intel->intelScreen,
- buffer->cpp,
+ buffer_cpp,
buffer_width,
buffer_height,
- buffer->pitch / buffer->cpp,
+ buffer_pitch / buffer_cpp,
buffer->name,
buffer_name);
if (!region)
diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
index 636ee17..91ebc8d 100644
--- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
@@ -102,16 +102,6 @@
mt->depth0 = depth0;
}
- if (format == MESA_FORMAT_S8) {
- /* The stencil buffer has quirky pitch requirements. From Vol 2a,
- * 11.5.6.2.1 3DSTATE_STENCIL_BUFFER, field "Surface Pitch":
- * The pitch must be set to 2x the value computed based on width, as
- * the stencil buffer is stored with two rows interleaved.
- */
- assert(intel->has_separate_stencil);
- mt->cpp = 2;
- }
-
if (!for_region &&
_mesa_is_depthstencil_format(_mesa_get_format_base_format(format)) &&
(intel->must_use_separate_stencil ||
@@ -188,20 +178,12 @@
if (format == MESA_FORMAT_S8) {
/* The stencil buffer is W tiled. However, we request from the kernel a
- * non-tiled buffer because the GTT is incapable of W fencing.
- *
- * The stencil buffer has quirky pitch requirements. From Vol 2a,
- * 11.5.6.2.1 3DSTATE_STENCIL_BUFFER, field "Surface Pitch":
- * The pitch must be set to 2x the value computed based on width, as
- * the stencil buffer is stored with two rows interleaved.
- * To accomplish this, we resort to the nasty hack of doubling the drm
- * region's cpp and halving its height.
- *
- * If we neglect to double the pitch, then render corruption occurs.
+ * non-tiled buffer because the GTT is incapable of W fencing. So round
+ * up the width and height to match the size of W tiles (64x64).
*/
tiling = I915_TILING_NONE;
width0 = ALIGN(width0, 64);
- height0 = ALIGN((height0 + 1) / 2, 64);
+ height0 = ALIGN(height0, 64);
}
mt = intel_miptree_create_internal(intel, target, format,
diff --git a/src/mesa/drivers/dri/intel/intel_screen.c b/src/mesa/drivers/dri/intel/intel_screen.c
index 49e208c..3fc1d3c 100644
--- a/src/mesa/drivers/dri/intel/intel_screen.c
+++ b/src/mesa/drivers/dri/intel/intel_screen.c
@@ -956,22 +956,28 @@
return NULL;
if (attachment == __DRI_BUFFER_STENCIL) {
- /* The stencil buffer has quirky pitch requirements. From Vol 2a,
- * 11.5.6.2.1 3DSTATE_STENCIL_BUFFER, field "Surface Pitch":
- * The pitch must be set to 2x the value computed based on width, as
- * the stencil buffer is stored with two rows interleaved.
- * To accomplish this, we resort to the nasty hack of doubling the
- * region's cpp and halving its height.
+ /* Stencil buffers use W tiling, a tiling format that the DRM functions
+ * don't properly account for. Therefore, when we allocate a stencil
+ * buffer that is private to Mesa (see intel_miptree_create), we round
+ * the height and width up to the next multiple of the tile size (64x64)
+ * and then ask DRM to allocate an untiled buffer. Consequently, the
+ * height and the width stored in the stencil buffer's region structure
+ * are always multiples of 64, even if the stencil buffer itself is
+ * smaller.
+ *
+ * To avoid inconsistencies between how we represent private buffers and
+ * buffers shared with the window system, round up the height and width
+ * for window system buffers too.
*/
region_width = ALIGN(width, 64);
- region_height = ALIGN(ALIGN(height, 2) / 2, 64);
- region_cpp = format / 4;
+ region_height = ALIGN(height, 64);
} else {
region_width = width;
region_height = height;
- region_cpp = format / 8;
}
+ region_cpp = format / 8;
+
intelBuffer->region = intel_region_alloc(intelScreen,
tiling,
region_cpp,