intel: enable HiZ on Gen7+
The support is restricted to Gen7+. To support HiZ on Gen6, we need to
perform manual combined/separated stencil resolves and that may hurt the
performance instead.
Also, we do not support fast depth clears. xglSetFastClearDepth() will be
removed in a later xgl.h revision.
diff --git a/icd/intel/cmd_barrier.c b/icd/intel/cmd_barrier.c
index ca111c1..bc36945 100644
--- a/icd/intel/cmd_barrier.c
+++ b/icd/intel/cmd_barrier.c
@@ -31,6 +31,12 @@
#include "cmd_priv.h"
enum {
+ READ_OP = 1 << 0,
+ WRITE_OP = 1 << 1,
+ HIZ_OP = 1 << 2,
+};
+
+enum {
MEM_CACHE = 1 << 0,
DATA_READ_CACHE = 1 << 1,
DATA_WRITE_CACHE = 1 << 2,
@@ -38,6 +44,44 @@
SAMPLER_CACHE = 1 << 4,
};
+static uint32_t img_get_layout_ops(const struct intel_img *img,
+ XGL_IMAGE_LAYOUT layout)
+{
+ uint32_t ops;
+
+ switch (layout) {
+ case XGL_IMAGE_LAYOUT_GENERAL:
+ ops = READ_OP | WRITE_OP;
+ break;
+ case XGL_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
+ ops = READ_OP | WRITE_OP;
+ break;
+ case XGL_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
+ ops = READ_OP | WRITE_OP | HIZ_OP;
+ break;
+ case XGL_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL:
+ ops = READ_OP | HIZ_OP;
+ break;
+ case XGL_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL:
+ ops = READ_OP;
+ break;
+ case XGL_IMAGE_LAYOUT_CLEAR_OPTIMAL:
+ ops = WRITE_OP | HIZ_OP;
+ break;
+ case XGL_IMAGE_LAYOUT_TRANSFER_SOURCE_OPTIMAL:
+ ops = READ_OP;
+ break;
+ case XGL_IMAGE_LAYOUT_TRANSFER_DESTINATION_OPTIMAL:
+ ops = WRITE_OP;
+ break;
+ default:
+ ops = 0;
+ break;
+ }
+
+ return ops;
+}
+
static uint32_t img_get_layout_caches(const struct intel_img *img,
XGL_IMAGE_LAYOUT layout)
{
@@ -84,6 +128,23 @@
return caches;
}
+static void cmd_resolve_depth(struct intel_cmd *cmd,
+ struct intel_img *img,
+ XGL_IMAGE_LAYOUT old_layout,
+ XGL_IMAGE_LAYOUT new_layout,
+ const XGL_IMAGE_SUBRESOURCE_RANGE *range)
+{
+ const uint32_t old_ops = img_get_layout_ops(img, old_layout);
+ const uint32_t new_ops = img_get_layout_ops(img, new_layout);
+
+ if (old_ops & WRITE_OP) {
+ if ((old_ops & HIZ_OP) && !(new_ops & HIZ_OP))
+ cmd_meta_ds_op(cmd, INTEL_CMD_META_DS_RESOLVE, img, range);
+ else if (!(old_ops & HIZ_OP) && (new_ops & HIZ_OP))
+ cmd_meta_ds_op(cmd, INTEL_CMD_META_DS_HIZ_RESOLVE, img, range);
+ }
+}
+
static uint32_t cmd_get_flush_flags(const struct intel_cmd *cmd,
uint32_t old_caches,
uint32_t new_caches,
@@ -156,6 +217,10 @@
input_mask |= u->img.inputMask;
{
struct intel_img *img = intel_img(u->img.image);
+
+ cmd_resolve_depth(cmd, img, u->img.oldLayout,
+ u->img.newLayout, &u->img.subresourceRange);
+
flush_flags |= cmd_get_flush_flags(cmd,
img_get_layout_caches(img, u->img.oldLayout),
img_get_layout_caches(img, u->img.newLayout),
diff --git a/icd/intel/cmd_pipeline.c b/icd/intel/cmd_pipeline.c
index 4981164..1d6093c 100644
--- a/icd/intel/cmd_pipeline.c
+++ b/icd/intel/cmd_pipeline.c
@@ -1840,9 +1840,9 @@
}
cmd_wa_gen6_pre_ds_flush(cmd);
- gen6_3DSTATE_DEPTH_BUFFER(cmd, ds, false);
- gen6_3DSTATE_STENCIL_BUFFER(cmd, ds, false);
- gen6_3DSTATE_HIER_DEPTH_BUFFER(cmd, ds, false);
+ gen6_3DSTATE_DEPTH_BUFFER(cmd, ds, fb->optimal_ds);
+ gen6_3DSTATE_STENCIL_BUFFER(cmd, ds, fb->optimal_ds);
+ gen6_3DSTATE_HIER_DEPTH_BUFFER(cmd, ds, fb->optimal_ds);
if (cmd_gen(cmd) >= INTEL_GEN(7))
gen7_3DSTATE_CLEAR_PARAMS(cmd, 0);
diff --git a/icd/intel/fb.c b/icd/intel/fb.c
index e2115af..f6c0bad 100644
--- a/icd/intel/fb.c
+++ b/icd/intel/fb.c
@@ -97,8 +97,19 @@
}
fb->ds = ds;
+
+ switch (att->layout) {
+ case XGL_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
+ case XGL_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL:
+ fb->optimal_ds = true;
+ break;
+ default:
+ fb->optimal_ds = false;
+ break;
+ }
} else {
fb->ds = NULL;
+ fb->optimal_ds = false;
}
fb->width = width;
diff --git a/icd/intel/fb.h b/icd/intel/fb.h
index b1526c7..f7c6b1f 100644
--- a/icd/intel/fb.h
+++ b/icd/intel/fb.h
@@ -36,6 +36,7 @@
uint32_t rt_count;
const struct intel_ds_view *ds;
+ bool optimal_ds;
uint32_t sample_count;
uint32_t width;
diff --git a/icd/intel/intel.c b/icd/intel/intel.c
index c5b4f63..2ffdad2 100644
--- a/icd/intel/intel.c
+++ b/icd/intel/intel.c
@@ -60,6 +60,8 @@
intel_debug |= INTEL_DEBUG_NOHW;
} else if (strncmp(env, "nocache", len) == 0) {
intel_debug |= INTEL_DEBUG_NOCACHE;
+ } else if (strncmp(env, "nohiz", len) == 0) {
+ intel_debug |= INTEL_DEBUG_NOHIZ;
} else if (strncmp(env, "hang", len) == 0) {
intel_debug |= INTEL_DEBUG_HANG;
} else if (strncmp(env, "0x", 2) == 0) {
diff --git a/icd/intel/intel.h b/icd/intel/intel.h
index 11b69e2..774abf3 100644
--- a/icd/intel/intel.h
+++ b/icd/intel/intel.h
@@ -64,7 +64,8 @@
INTEL_DEBUG_NOHW = 1 << 20,
INTEL_DEBUG_NOCACHE = 1 << 21,
- INTEL_DEBUG_HANG = 1 << 22,
+ INTEL_DEBUG_NOHIZ = 1 << 22,
+ INTEL_DEBUG_HANG = 1 << 23,
};
extern int intel_debug;
diff --git a/icd/intel/layout.c b/icd/intel/layout.c
index e68b079..8cec97c 100644
--- a/icd/intel/layout.c
+++ b/icd/intel/layout.c
@@ -762,6 +762,9 @@
{
const XGL_IMAGE_CREATE_INFO *info = params->info;
+ if (intel_debug & INTEL_DEBUG_NOHIZ)
+ return false;
+
if (!(info->usage & XGL_IMAGE_USAGE_DEPTH_STENCIL_BIT))
return false;
@@ -769,23 +772,12 @@
return false;
/*
- * As can be seen in layout_calculate_hiz_size(), HiZ may not be enabled
- * for every level. This is generally fine except on GEN6, where HiZ and
- * separate stencil are enabled and disabled at the same time. When the
- * format is PIPE_FORMAT_Z32_FLOAT_S8X24_UINT, enabling and disabling HiZ
- * can result in incompatible formats.
+ * HiZ implies separate stencil on Gen6. We do not want to copy stencils
+ * values between combined and separate stencil buffers when HiZ is enabled
+ * or disabled.
*/
- if (intel_gpu_gen(params->gpu) == INTEL_GEN(6) &&
- info->format == XGL_FMT_D32_SFLOAT_S8_UINT &&
- info->mipLevels > 1)
- return false;
-
- if (true) {
- intel_dev_log(params->dev, XGL_DBG_MSG_PERF_WARNING,
- XGL_VALIDATION_LEVEL_0, XGL_NULL_HANDLE, 0, 0,
- "HiZ disabled");
+ if (intel_gpu_gen(params->gpu) == INTEL_GEN(6))
return false;
- }
return true;
}