intel: enable HiZ on Gen7+

The support is restricted to Gen7+.  To support HiZ on Gen6, we need to
perform manual combined/separated stencil resolves and that may hurt the
performance instead.

Also, we do not support fast depth clears.  xglSetFastClearDepth() will be
removed in a later xgl.h revision.
diff --git a/icd/intel/cmd_barrier.c b/icd/intel/cmd_barrier.c
index ca111c1..bc36945 100644
--- a/icd/intel/cmd_barrier.c
+++ b/icd/intel/cmd_barrier.c
@@ -31,6 +31,12 @@
 #include "cmd_priv.h"
 
 enum {
+    READ_OP          = 1 << 0,
+    WRITE_OP         = 1 << 1,
+    HIZ_OP           = 1 << 2,
+};
+
+enum {
     MEM_CACHE        = 1 << 0,
     DATA_READ_CACHE  = 1 << 1,
     DATA_WRITE_CACHE = 1 << 2,
@@ -38,6 +44,44 @@
     SAMPLER_CACHE    = 1 << 4,
 };
 
+static uint32_t img_get_layout_ops(const struct intel_img *img,
+                                   XGL_IMAGE_LAYOUT layout)
+{
+    uint32_t ops;
+
+    switch (layout) {
+    case XGL_IMAGE_LAYOUT_GENERAL:
+        ops = READ_OP | WRITE_OP;
+        break;
+    case XGL_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
+        ops = READ_OP | WRITE_OP;
+        break;
+    case XGL_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
+        ops = READ_OP | WRITE_OP | HIZ_OP;
+        break;
+    case XGL_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL:
+        ops = READ_OP | HIZ_OP;
+        break;
+    case XGL_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL:
+        ops = READ_OP;
+        break;
+    case XGL_IMAGE_LAYOUT_CLEAR_OPTIMAL:
+        ops = WRITE_OP | HIZ_OP;
+        break;
+    case XGL_IMAGE_LAYOUT_TRANSFER_SOURCE_OPTIMAL:
+        ops = READ_OP;
+        break;
+    case XGL_IMAGE_LAYOUT_TRANSFER_DESTINATION_OPTIMAL:
+        ops = WRITE_OP;
+        break;
+    default:
+        ops = 0;
+        break;
+    }
+
+    return ops;
+}
+
 static uint32_t img_get_layout_caches(const struct intel_img *img,
                                      XGL_IMAGE_LAYOUT layout)
 {
@@ -84,6 +128,23 @@
     return caches;
 }
 
+static void cmd_resolve_depth(struct intel_cmd *cmd,
+                              struct intel_img *img,
+                              XGL_IMAGE_LAYOUT old_layout,
+                              XGL_IMAGE_LAYOUT new_layout,
+                              const XGL_IMAGE_SUBRESOURCE_RANGE *range)
+{
+    const uint32_t old_ops = img_get_layout_ops(img, old_layout);
+    const uint32_t new_ops = img_get_layout_ops(img, new_layout);
+
+    if (old_ops & WRITE_OP) {
+        if ((old_ops & HIZ_OP) && !(new_ops & HIZ_OP))
+            cmd_meta_ds_op(cmd, INTEL_CMD_META_DS_RESOLVE, img, range);
+        else if (!(old_ops & HIZ_OP) && (new_ops & HIZ_OP))
+            cmd_meta_ds_op(cmd, INTEL_CMD_META_DS_HIZ_RESOLVE, img, range);
+    }
+}
+
 static uint32_t cmd_get_flush_flags(const struct intel_cmd *cmd,
                                     uint32_t old_caches,
                                     uint32_t new_caches,
@@ -156,6 +217,10 @@
             input_mask  |= u->img.inputMask;
             {
                 struct intel_img *img = intel_img(u->img.image);
+
+                cmd_resolve_depth(cmd, img, u->img.oldLayout,
+                        u->img.newLayout, &u->img.subresourceRange);
+
                 flush_flags |= cmd_get_flush_flags(cmd,
                         img_get_layout_caches(img, u->img.oldLayout),
                         img_get_layout_caches(img, u->img.newLayout),
diff --git a/icd/intel/cmd_pipeline.c b/icd/intel/cmd_pipeline.c
index 4981164..1d6093c 100644
--- a/icd/intel/cmd_pipeline.c
+++ b/icd/intel/cmd_pipeline.c
@@ -1840,9 +1840,9 @@
     }
 
     cmd_wa_gen6_pre_ds_flush(cmd);
-    gen6_3DSTATE_DEPTH_BUFFER(cmd, ds, false);
-    gen6_3DSTATE_STENCIL_BUFFER(cmd, ds, false);
-    gen6_3DSTATE_HIER_DEPTH_BUFFER(cmd, ds, false);
+    gen6_3DSTATE_DEPTH_BUFFER(cmd, ds, fb->optimal_ds);
+    gen6_3DSTATE_STENCIL_BUFFER(cmd, ds, fb->optimal_ds);
+    gen6_3DSTATE_HIER_DEPTH_BUFFER(cmd, ds, fb->optimal_ds);
 
     if (cmd_gen(cmd) >= INTEL_GEN(7))
         gen7_3DSTATE_CLEAR_PARAMS(cmd, 0);
diff --git a/icd/intel/fb.c b/icd/intel/fb.c
index e2115af..f6c0bad 100644
--- a/icd/intel/fb.c
+++ b/icd/intel/fb.c
@@ -97,8 +97,19 @@
         }
 
         fb->ds = ds;
+
+        switch (att->layout) {
+        case XGL_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
+        case XGL_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL:
+            fb->optimal_ds = true;
+            break;
+        default:
+            fb->optimal_ds = false;
+            break;
+        }
     } else {
         fb->ds = NULL;
+        fb->optimal_ds = false;
     }
 
     fb->width = width;
diff --git a/icd/intel/fb.h b/icd/intel/fb.h
index b1526c7..f7c6b1f 100644
--- a/icd/intel/fb.h
+++ b/icd/intel/fb.h
@@ -36,6 +36,7 @@
     uint32_t rt_count;
 
     const struct intel_ds_view *ds;
+    bool optimal_ds;
 
     uint32_t sample_count;
     uint32_t width;
diff --git a/icd/intel/intel.c b/icd/intel/intel.c
index c5b4f63..2ffdad2 100644
--- a/icd/intel/intel.c
+++ b/icd/intel/intel.c
@@ -60,6 +60,8 @@
                 intel_debug |= INTEL_DEBUG_NOHW;
             } else if (strncmp(env, "nocache", len) == 0) {
                 intel_debug |= INTEL_DEBUG_NOCACHE;
+            } else if (strncmp(env, "nohiz", len) == 0) {
+                intel_debug |= INTEL_DEBUG_NOHIZ;
             } else if (strncmp(env, "hang", len) == 0) {
                 intel_debug |= INTEL_DEBUG_HANG;
             } else if (strncmp(env, "0x", 2) == 0) {
diff --git a/icd/intel/intel.h b/icd/intel/intel.h
index 11b69e2..774abf3 100644
--- a/icd/intel/intel.h
+++ b/icd/intel/intel.h
@@ -64,7 +64,8 @@
 
     INTEL_DEBUG_NOHW        = 1 << 20,
     INTEL_DEBUG_NOCACHE     = 1 << 21,
-    INTEL_DEBUG_HANG        = 1 << 22,
+    INTEL_DEBUG_NOHIZ       = 1 << 22,
+    INTEL_DEBUG_HANG        = 1 << 23,
 };
 
 extern int intel_debug;
diff --git a/icd/intel/layout.c b/icd/intel/layout.c
index e68b079..8cec97c 100644
--- a/icd/intel/layout.c
+++ b/icd/intel/layout.c
@@ -762,6 +762,9 @@
 {
    const XGL_IMAGE_CREATE_INFO *info = params->info;
 
+   if (intel_debug & INTEL_DEBUG_NOHIZ)
+       return false;
+
    if (!(info->usage & XGL_IMAGE_USAGE_DEPTH_STENCIL_BIT))
       return false;
 
@@ -769,23 +772,12 @@
       return false;
 
    /*
-    * As can be seen in layout_calculate_hiz_size(), HiZ may not be enabled
-    * for every level.  This is generally fine except on GEN6, where HiZ and
-    * separate stencil are enabled and disabled at the same time.  When the
-    * format is PIPE_FORMAT_Z32_FLOAT_S8X24_UINT, enabling and disabling HiZ
-    * can result in incompatible formats.
+    * HiZ implies separate stencil on Gen6.  We do not want to copy stencils
+    * values between combined and separate stencil buffers when HiZ is enabled
+    * or disabled.
     */
-   if (intel_gpu_gen(params->gpu) == INTEL_GEN(6) &&
-       info->format == XGL_FMT_D32_SFLOAT_S8_UINT &&
-       info->mipLevels > 1)
-      return false;
-
-   if (true) {
-       intel_dev_log(params->dev, XGL_DBG_MSG_PERF_WARNING,
-               XGL_VALIDATION_LEVEL_0, XGL_NULL_HANDLE, 0, 0,
-               "HiZ disabled");
+   if (intel_gpu_gen(params->gpu) == INTEL_GEN(6))
        return false;
-   }
 
    return true;
 }