intel: update intel_img to use intel_layout

commit: 2b685d7c2c6719005abb44bb5cc0e9d3bd2657a3 [log] [tgz]
author: Chia-I Wu <olvaffe@gmail.com> Thu Aug 14 13:45:37 2014 +0800
committer: Chia-I Wu <olvaffe@gmail.com> Fri Aug 15 15:11:34 2014 +0800
tree: 20b57aff8f468265417e2cc8f759942a004e7385
parent: 8a8d8b6887e85a9a89fe51ba3db2594c2f30a346 [diff] [blame]
diff --git a/icd/intel/img.c b/icd/intel/img.c
index 2e07325..6d54a9e 100644
--- a/icd/intel/img.c
+++ b/icd/intel/img.c

@@ -27,37 +27,6 @@
 #include "gpu.h"
 #include "img.h"
 
-struct tex_layout {
-   const struct intel_gpu *gpu;
-   const XGL_IMAGE_CREATE_INFO *info;
-
-   bool has_depth, has_stencil;
-   bool hiz, separate_stencil;
-
-   XGL_FORMAT format;
-   unsigned block_width, block_height, block_size;
-   bool compressed;
-
-   enum intel_tiling_mode tiling;
-   unsigned valid_tilings; /* bitmask of valid tiling modes */
-
-   bool array_spacing_full;
-   bool interleaved;
-
-   struct {
-      int w, h, d;
-      struct intel_img_slice *slices;
-   } levels[INTEL_IMG_MAX_LEVELS];
-
-   int align_i, align_j;
-   int qpitch;
-
-   int width, height;
-
-   int bo_stride, bo_height;
-   int hiz_stride, hiz_height;
-};
-
 /*
  * From the Ivy Bridge PRM, volume 1 part 1, page 105:
  *
@@ -67,992 +36,6 @@
  */
 static const size_t intel_max_resource_size = 1u << 31;
 
-static void
-tex_layout_init_qpitch(struct tex_layout *layout)
-{
-   const XGL_IMAGE_CREATE_INFO *info = layout->info;
-   int h0, h1;
-
-   if (info->arraySize <= 1)
-      return;
-
-   h0 = u_align(layout->levels[0].h, layout->align_j);
-
-   if (!layout->array_spacing_full) {
-      layout->qpitch = h0;
-      return;
-   }
-
-   h1 = u_align(layout->levels[1].h, layout->align_j);
-
-   /*
-    * From the Sandy Bridge PRM, volume 1 part 1, page 115:
-    *
-    *     "The following equation is used for surface formats other than
-    *      compressed textures:
-    *
-    *        QPitch = (h0 + h1 + 11j)"
-    *
-    *     "The equation for compressed textures (BC* and FXT1 surface formats)
-    *      follows:
-    *
-    *        QPitch = (h0 + h1 + 11j) / 4"
-    *
-    *     "[DevSNB] Errata: Sampler MSAA Qpitch will be 4 greater than the
-    *      value calculated in the equation above, for every other odd Surface
-    *      Height starting from 1 i.e. 1,5,9,13"
-    *
-    * From the Ivy Bridge PRM, volume 1 part 1, page 111-112:
-    *
-    *     "If Surface Array Spacing is set to ARYSPC_FULL (note that the depth
-    *      buffer and stencil buffer have an implied value of ARYSPC_FULL):
-    *
-    *        QPitch = (h0 + h1 + 12j)
-    *        QPitch = (h0 + h1 + 12j) / 4 (compressed)
-    *
-    *      (There are many typos or missing words here...)"
-    *
-    * To access the N-th slice, an offset of (Stride * QPitch * N) is added to
-    * the base address.  The PRM divides QPitch by 4 for compressed formats
-    * because the block height for those formats are 4, and it wants QPitch to
-    * mean the number of memory rows, as opposed to texel rows, between
-    * slices.  Since we use texel rows in tex->slice_offsets, we do not need
-    * to divide QPitch by 4.
-    */
-   layout->qpitch = h0 + h1 +
-      ((intel_gpu_gen(layout->gpu) >= INTEL_GEN(7)) ? 12 : 11) * layout->align_j;
-
-   if (intel_gpu_gen(layout->gpu) == INTEL_GEN(6) && info->samples > 1 &&
-       info->extent.height % 4 == 1)
-      layout->qpitch += 4;
-}
-
-static void
-tex_layout_init_alignments(struct tex_layout *layout)
-{
-   const XGL_IMAGE_CREATE_INFO *info = layout->info;
-
-   /*
-    * From the Sandy Bridge PRM, volume 1 part 1, page 113:
-    *
-    *     "surface format           align_i     align_j
-    *      YUV 4:2:2 formats        4           *see below
-    *      BC1-5                    4           4
-    *      FXT1                     8           4
-    *      all other formats        4           *see below"
-    *
-    *     "- align_j = 4 for any depth buffer
-    *      - align_j = 2 for separate stencil buffer
-    *      - align_j = 4 for any render target surface is multisampled (4x)
-    *      - align_j = 4 for any render target surface with Surface Vertical
-    *        Alignment = VALIGN_4
-    *      - align_j = 2 for any render target surface with Surface Vertical
-    *        Alignment = VALIGN_2
-    *      - align_j = 2 for all other render target surface
-    *      - align_j = 2 for any sampling engine surface with Surface Vertical
-    *        Alignment = VALIGN_2
-    *      - align_j = 4 for any sampling engine surface with Surface Vertical
-    *        Alignment = VALIGN_4"
-    *
-    * From the Sandy Bridge PRM, volume 4 part 1, page 86:
-    *
-    *     "This field (Surface Vertical Alignment) must be set to VALIGN_2 if
-    *      the Surface Format is 96 bits per element (BPE)."
-    *
-    * They can be rephrased as
-    *
-    *                                  align_i        align_j
-    *   compressed formats             block width    block height
-    *   PIPE_FORMAT_S8_UINT            4              2
-    *   other depth/stencil formats    4              4
-    *   4x multisampled                4              4
-    *   bpp 96                         4              2
-    *   others                         4              2 or 4
-    */
-
-   /*
-    * From the Ivy Bridge PRM, volume 1 part 1, page 110:
-    *
-    *     "surface defined by      surface format     align_i     align_j
-    *      3DSTATE_DEPTH_BUFFER    D16_UNORM          8           4
-    *                              not D16_UNORM      4           4
-    *      3DSTATE_STENCIL_BUFFER  N/A                8           8
-    *      SURFACE_STATE           BC*, ETC*, EAC*    4           4
-    *                              FXT1               8           4
-    *                              all others         (set by SURFACE_STATE)"
-    *
-    * From the Ivy Bridge PRM, volume 4 part 1, page 63:
-    *
-    *     "- This field (Surface Vertical Aligment) is intended to be set to
-    *        VALIGN_4 if the surface was rendered as a depth buffer, for a
-    *        multisampled (4x) render target, or for a multisampled (8x)
-    *        render target, since these surfaces support only alignment of 4.
-    *      - Use of VALIGN_4 for other surfaces is supported, but uses more
-    *        memory.
-    *      - This field must be set to VALIGN_4 for all tiled Y Render Target
-    *        surfaces.
-    *      - Value of 1 is not supported for format YCRCB_NORMAL (0x182),
-    *        YCRCB_SWAPUVY (0x183), YCRCB_SWAPUV (0x18f), YCRCB_SWAPY (0x190)
-    *      - If Number of Multisamples is not MULTISAMPLECOUNT_1, this field
-    *        must be set to VALIGN_4."
-    *      - VALIGN_4 is not supported for surface format R32G32B32_FLOAT."
-    *
-    *     "- This field (Surface Horizontal Aligment) is intended to be set to
-    *        HALIGN_8 only if the surface was rendered as a depth buffer with
-    *        Z16 format or a stencil buffer, since these surfaces support only
-    *        alignment of 8.
-    *      - Use of HALIGN_8 for other surfaces is supported, but uses more
-    *        memory.
-    *      - This field must be set to HALIGN_4 if the Surface Format is BC*.
-    *      - This field must be set to HALIGN_8 if the Surface Format is
-    *        FXT1."
-    *
-    * They can be rephrased as
-    *
-    *                                  align_i        align_j
-    *  compressed formats              block width    block height
-    *  PIPE_FORMAT_Z16_UNORM           8              4
-    *  PIPE_FORMAT_S8_UINT             8              8
-    *  other depth/stencil formats     4 or 8         4
-    *  2x or 4x multisampled           4 or 8         4
-    *  tiled Y                         4 or 8         4 (if rt)
-    *  PIPE_FORMAT_R32G32B32_FLOAT     4 or 8         2
-    *  others                          4 or 8         2 or 4
-    */
-
-   if (layout->compressed) {
-      /* this happens to be the case */
-      layout->align_i = layout->block_width;
-      layout->align_j = layout->block_height;
-   }
-   else if (layout->format.numericFormat == XGL_NUM_FMT_DS) {
-      if (intel_gpu_gen(layout->gpu) >= INTEL_GEN(7)) {
-         switch (layout->format.channelFormat) {
-         case XGL_CH_FMT_R16:
-            layout->align_i = 8;
-            layout->align_j = 4;
-            break;
-         case XGL_CH_FMT_R8:
-            layout->align_i = 8;
-            layout->align_j = 8;
-            break;
-         default:
-            layout->align_i = 4;
-            layout->align_j = 4;
-            break;
-         }
-      }
-      else {
-         switch (layout->format.channelFormat) {
-         case XGL_CH_FMT_R8:
-            layout->align_i = 4;
-            layout->align_j = 2;
-            break;
-         default:
-            layout->align_i = 4;
-            layout->align_j = 4;
-            break;
-         }
-      }
-   }
-   else {
-      const bool valign_4 = (info->samples > 1) ||
-         (intel_gpu_gen(layout->gpu) >= INTEL_GEN(7) &&
-          layout->tiling == INTEL_TILING_Y &&
-          (info->usage & XGL_IMAGE_USAGE_COLOR_ATTACHMENT_BIT));
-
-      if (valign_4)
-         assert(layout->block_size != 12);
-
-      layout->align_i = 4;
-      layout->align_j = (valign_4) ? 4 : 2;
-   }
-
-   /*
-    * the fact that align i and j are multiples of block width and height
-    * respectively is what makes the size of the bo a multiple of the block
-    * size, slices start at block boundaries, and many of the computations
-    * work.
-    */
-   assert(layout->align_i % layout->block_width == 0);
-   assert(layout->align_j % layout->block_height == 0);
-
-   /* make sure u_align() works */
-   assert(u_is_pow2(layout->align_i) &&
-          u_is_pow2(layout->align_j));
-   assert(u_is_pow2(layout->block_width) &&
-          u_is_pow2(layout->block_height));
-}
-
-static void
-tex_layout_init_levels(struct tex_layout *layout)
-{
-   const XGL_IMAGE_CREATE_INFO *info = layout->info;
-   int last_level, lv;
-
-   last_level = info->mipLevels - 1;
-
-   /* need at least 2 levels to compute full qpitch */
-   if (last_level == 0 && info->arraySize > 1 && layout->array_spacing_full)
-      last_level++;
-
-   /* compute mip level sizes */
-   for (lv = 0; lv <= last_level; lv++) {
-      int w, h, d;
-
-      w = u_minify(info->extent.width, lv);
-      h = u_minify(info->extent.height, lv);
-      d = u_minify(info->extent.depth, lv);
-
-      /*
-       * From the Sandy Bridge PRM, volume 1 part 1, page 114:
-       *
-       *     "The dimensions of the mip maps are first determined by applying
-       *      the sizing algorithm presented in Non-Power-of-Two Mipmaps
-       *      above. Then, if necessary, they are padded out to compression
-       *      block boundaries."
-       */
-      w = u_align(w, layout->block_width);
-      h = u_align(h, layout->block_height);
-
-      /*
-       * From the Sandy Bridge PRM, volume 1 part 1, page 111:
-       *
-       *     "If the surface is multisampled (4x), these values must be
-       *      adjusted as follows before proceeding:
-       *
-       *        W_L = ceiling(W_L / 2) * 4
-       *        H_L = ceiling(H_L / 2) * 4"
-       *
-       * From the Ivy Bridge PRM, volume 1 part 1, page 108:
-       *
-       *     "If the surface is multisampled and it is a depth or stencil
-       *      surface or Multisampled Surface StorageFormat in SURFACE_STATE
-       *      is MSFMT_DEPTH_STENCIL, W_L and H_L must be adjusted as follows
-       *      before proceeding:
-       *
-       *        #samples  W_L =                    H_L =
-       *        2         ceiling(W_L / 2) * 4     HL [no adjustment]
-       *        4         ceiling(W_L / 2) * 4     ceiling(H_L / 2) * 4
-       *        8         ceiling(W_L / 2) * 8     ceiling(H_L / 2) * 4
-       *        16        ceiling(W_L / 2) * 8     ceiling(H_L / 2) * 8"
-       *
-       * For interleaved samples (4x), where pixels
-       *
-       *   (x, y  ) (x+1, y  )
-       *   (x, y+1) (x+1, y+1)
-       *
-       * would be is occupied by
-       *
-       *   (x, y  , si0) (x+1, y  , si0) (x, y  , si1) (x+1, y  , si1)
-       *   (x, y+1, si0) (x+1, y+1, si0) (x, y+1, si1) (x+1, y+1, si1)
-       *   (x, y  , si2) (x+1, y  , si2) (x, y  , si3) (x+1, y  , si3)
-       *   (x, y+1, si2) (x+1, y+1, si2) (x, y+1, si3) (x+1, y+1, si3)
-       *
-       * Thus the need to
-       *
-       *   w = u_align(w, 2) * 2;
-       *   y = u_align(y, 2) * 2;
-       */
-      if (layout->interleaved) {
-         switch (info->samples) {
-         case 0:
-         case 1:
-            break;
-         case 2:
-            w = u_align(w, 2) * 2;
-            break;
-         case 4:
-            w = u_align(w, 2) * 2;
-            h = u_align(h, 2) * 2;
-            break;
-         case 8:
-            w = u_align(w, 2) * 4;
-            h = u_align(h, 2) * 2;
-            break;
-         case 16:
-            w = u_align(w, 2) * 4;
-            h = u_align(h, 2) * 4;
-            break;
-         default:
-            assert(!"unsupported sample count");
-            break;
-         }
-      }
-
-      layout->levels[lv].w = w;
-      layout->levels[lv].h = h;
-      layout->levels[lv].d = d;
-   }
-}
-
-static void
-tex_layout_init_spacing(struct tex_layout *layout)
-{
-   const XGL_IMAGE_CREATE_INFO *info = layout->info;
-
-   if (intel_gpu_gen(layout->gpu) >= INTEL_GEN(7)) {
-      /*
-       * It is not explicitly states, but render targets are expected to be
-       * UMS/CMS (samples non-interleaved) and depth/stencil buffers are
-       * expected to be IMS (samples interleaved).
-       *
-       * See "Multisampled Surface Storage Format" field of SURFACE_STATE.
-       */
-      if (layout->has_depth || layout->has_stencil) {
-         layout->interleaved = true;
-
-         /*
-          * From the Ivy Bridge PRM, volume 1 part 1, page 111:
-          *
-          *     "note that the depth buffer and stencil buffer have an implied
-          *      value of ARYSPC_FULL"
-          */
-         layout->array_spacing_full = true;
-      }
-      else {
-         layout->interleaved = false;
-
-         /*
-          * From the Ivy Bridge PRM, volume 4 part 1, page 66:
-          *
-          *     "If Multisampled Surface Storage Format is MSFMT_MSS and
-          *      Number of Multisamples is not MULTISAMPLECOUNT_1, this field
-          *      (Surface Array Spacing) must be set to ARYSPC_LOD0."
-          *
-          * As multisampled resources are not mipmapped, we never use
-          * ARYSPC_FULL for them.
-          */
-         if (info->samples > 1)
-            assert(info->mipLevels == 1);
-         layout->array_spacing_full = (info->mipLevels > 1);
-      }
-   }
-   else {
-      /* GEN6 supports only interleaved samples */
-      layout->interleaved = true;
-
-      /*
-       * From the Sandy Bridge PRM, volume 1 part 1, page 115:
-       *
-       *     "The separate stencil buffer does not support mip mapping, thus
-       *      the storage for LODs other than LOD 0 is not needed. The
-       *      following QPitch equation applies only to the separate stencil
-       *      buffer:
-       *
-       *        QPitch = h_0"
-       *
-       * GEN6 does not support compact spacing otherwise.
-       */
-      layout->array_spacing_full =
-          !(layout->format.channelFormat == XGL_CH_FMT_R8 &&
-            layout->format.numericFormat == XGL_NUM_FMT_DS);
-   }
-}
-
-static void
-tex_layout_init_tiling(struct tex_layout *layout)
-{
-   const XGL_IMAGE_CREATE_INFO *info = layout->info;
-   const unsigned tile_none = 1 << INTEL_TILING_NONE;
-   const unsigned tile_x = 1 << INTEL_TILING_X;
-   const unsigned tile_y = 1 << INTEL_TILING_Y;
-   unsigned valid_tilings = tile_none | tile_x | tile_y;
-
-   if (info->tiling == XGL_LINEAR_TILING)
-      valid_tilings &= tile_none;
-
-   /*
-    * From the Sandy Bridge PRM, volume 2 part 1, page 318:
-    *
-    *     "[DevSNB+]: This field (Tiled Surface) must be set to TRUE. Linear
-    *      Depth Buffer is not supported."
-    *
-    *     "The Depth Buffer, if tiled, must use Y-Major tiling."
-    *
-    * From the Sandy Bridge PRM, volume 1 part 2, page 22:
-    *
-    *     "W-Major Tile Format is used for separate stencil."
-    *
-    * Since the HW does not support W-tiled fencing, we have to do it in the
-    * driver.
-    */
-   if (info->usage & XGL_IMAGE_USAGE_DEPTH_STENCIL_BIT) {
-       if (layout->format.channelFormat == XGL_CH_FMT_R8 &&
-           layout->format.numericFormat == XGL_NUM_FMT_DS)
-           valid_tilings &= tile_none;
-       else
-           valid_tilings &= tile_y;
-   }
-
-   if (info->usage & XGL_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) {
-      /*
-       * From the Sandy Bridge PRM, volume 1 part 2, page 32:
-       *
-       *     "NOTE: 128BPE Format Color buffer ( render target ) MUST be
-       *      either TileX or Linear."
-       */
-      if (layout->block_size == 16)
-         valid_tilings &= ~tile_y;
-
-      /*
-       * From the Ivy Bridge PRM, volume 4 part 1, page 63:
-       *
-       *     "This field (Surface Vertical Aligment) must be set to VALIGN_4
-       *      for all tiled Y Render Target surfaces."
-       *
-       *     "VALIGN_4 is not supported for surface format R32G32B32_FLOAT."
-       */
-      if (intel_gpu_gen(layout->gpu) >= INTEL_GEN(7) && layout->block_size == 12)
-         valid_tilings &= ~tile_y;
-   }
-
-   /* no conflicting binding flags */
-   assert(valid_tilings);
-
-   layout->valid_tilings = valid_tilings;
-
-   if (info->usage & (XGL_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |
-                      XGL_IMAGE_USAGE_SHADER_ACCESS_READ_BIT)) {
-      /*
-       * heuristically set a minimum width/height for enabling tiling
-       */
-      if (info->extent.width < 64 && (valid_tilings & ~tile_x))
-         valid_tilings &= ~tile_x;
-
-      if ((info->extent.width < 32 || info->extent.height < 16) &&
-          (info->extent.width < 16 || info->extent.height < 32) &&
-          (valid_tilings & ~tile_y))
-         valid_tilings &= ~tile_y;
-   }
-   else {
-      /* force linear if we are not sure where the texture is bound to */
-      if (valid_tilings & tile_none)
-         valid_tilings &= tile_none;
-   }
-
-   /* prefer tiled over linear */
-   if (valid_tilings & tile_y)
-      layout->tiling = INTEL_TILING_Y;
-   else if (valid_tilings & tile_x)
-      layout->tiling = INTEL_TILING_X;
-   else
-      layout->tiling = INTEL_TILING_NONE;
-}
-
-static void
-tex_layout_init_format(struct tex_layout *layout)
-{
-   const XGL_IMAGE_CREATE_INFO *info = layout->info;
-   XGL_FORMAT format = info->format;
-
-   if (format.numericFormat == XGL_NUM_FMT_DS) {
-       switch (format.channelFormat) {
-       case XGL_CH_FMT_R32G8:
-           if (layout->separate_stencil)
-               format.channelFormat = XGL_CH_FMT_R32;
-           break;
-       default:
-           break;
-       }
-   }
-
-   layout->format = format;
-
-   switch (format.channelFormat) {
-   case XGL_CH_FMT_BC1:
-   case XGL_CH_FMT_BC2:
-   case XGL_CH_FMT_BC3:
-   case XGL_CH_FMT_BC4:
-   case XGL_CH_FMT_BC5:
-   case XGL_CH_FMT_BC6U:
-   case XGL_CH_FMT_BC6S:
-   case XGL_CH_FMT_BC7:
-       layout->block_width = 4;
-       layout->block_height = 4;
-       layout->block_size =
-           (format.channelFormat == XGL_CH_FMT_BC1 ||
-            format.channelFormat == XGL_CH_FMT_BC4) ? 8 : 16;
-       layout->compressed = true;
-       break;
-   default:
-       layout->block_width = 1;
-       layout->block_height = 1;
-       layout->block_size = 1;
-       layout->compressed = false;
-       break;
-   }
-}
-
-static void
-tex_layout_init_hiz(struct tex_layout *layout)
-{
-   const XGL_IMAGE_CREATE_INFO *info = layout->info;
-
-   if (info->format.numericFormat == XGL_NUM_FMT_DS) {
-       switch (info->format.channelFormat) {
-       case XGL_CH_FMT_R32G8:
-           layout->has_depth = true;
-           layout->has_stencil = true;
-           break;
-       case XGL_CH_FMT_R32:
-           layout->has_depth = true;
-           break;
-       case XGL_CH_FMT_R8:
-           layout->has_stencil = true;
-           break;
-       default:
-           assert(!"unsupported DS format");
-           break;
-       }
-   }
-
-   if (!layout->has_depth)
-      return;
-
-   layout->hiz = true;
-
-   if (intel_gpu_gen(layout->gpu) == INTEL_GEN(6)) {
-      /*
-       * From the Sandy Bridge PRM, volume 2 part 1, page 312:
-       *
-       *     "The hierarchical depth buffer does not support the LOD field, it
-       *      is assumed by hardware to be zero. A separate hierarachical
-       *      depth buffer is required for each LOD used, and the
-       *      corresponding buffer's state delivered to hardware each time a
-       *      new depth buffer state with modified LOD is delivered."
-       *
-       * But we have a stronger requirement.  Because of layer offsetting
-       * (check out the callers of ilo_texture_get_slice_offset()), we already
-       * have to require the texture to be non-mipmapped and non-array.
-       */
-      if (info->mipLevels > 1 || info->arraySize > 1 || info->extent.depth > 1)
-         layout->hiz = false;
-   }
-
-   if (layout->has_stencil) {
-      /*
-       * From the Sandy Bridge PRM, volume 2 part 1, page 317:
-       *
-       *     "This field (Separate Stencil Buffer Enable) must be set to the
-       *      same value (enabled or disabled) as Hierarchical Depth Buffer
-       *      Enable."
-       *
-       * GEN7+ requires separate stencil buffers.
-       */
-      if (intel_gpu_gen(layout->gpu) >= INTEL_GEN(7))
-         layout->separate_stencil = true;
-      else
-         layout->separate_stencil = layout->hiz;
-
-      if (layout->separate_stencil)
-         layout->has_stencil = false;
-   }
-}
-
-static XGL_RESULT
-tex_layout_init(struct tex_layout *layout,
-                const struct intel_gpu *gpu,
-                const XGL_IMAGE_CREATE_INFO *info,
-                struct intel_img_slice **slices)
-{
-   memset(layout, 0, sizeof(*layout));
-
-   layout->gpu = gpu;
-   layout->info = info;
-
-   /* note that there are dependencies between these functions */
-   tex_layout_init_hiz(layout);
-   tex_layout_init_format(layout);
-   tex_layout_init_tiling(layout);
-   tex_layout_init_spacing(layout);
-   tex_layout_init_levels(layout);
-   tex_layout_init_alignments(layout);
-   tex_layout_init_qpitch(layout);
-
-   if (slices) {
-      int lv;
-
-      for (lv = 0; lv < info->mipLevels; lv++)
-         layout->levels[lv].slices = slices[lv];
-   }
-
-   return XGL_SUCCESS;
-}
-
-static void
-tex_layout_align(struct tex_layout *layout)
-{
-   int align_w = 1, align_h = 1, pad_h = 0;
-
-   /*
-    * From the Sandy Bridge PRM, volume 1 part 1, page 118:
-    *
-    *     "To determine the necessary padding on the bottom and right side of
-    *      the surface, refer to the table in Section 7.18.3.4 for the i and j
-    *      parameters for the surface format in use. The surface must then be
-    *      extended to the next multiple of the alignment unit size in each
-    *      dimension, and all texels contained in this extended surface must
-    *      have valid GTT entries."
-    *
-    *     "For cube surfaces, an additional two rows of padding are required
-    *      at the bottom of the surface. This must be ensured regardless of
-    *      whether the surface is stored tiled or linear.  This is due to the
-    *      potential rotation of cache line orientation from memory to cache."
-    *
-    *     "For compressed textures (BC* and FXT1 surface formats), padding at
-    *      the bottom of the surface is to an even compressed row, which is
-    *      equal to a multiple of 8 uncompressed texel rows. Thus, for padding
-    *      purposes, these surfaces behave as if j = 8 only for surface
-    *      padding purposes. The value of 4 for j still applies for mip level
-    *      alignment and QPitch calculation."
-    */
-   if (layout->info->usage & XGL_IMAGE_USAGE_SHADER_ACCESS_READ_BIT) {
-       if (align_w < layout->align_i)
-           align_w = layout->align_i;
-       if (align_h < layout->align_j)
-           align_h = layout->align_j;
-
-       /* in case it is used as a cube */
-      if (layout->info->imageType == XGL_IMAGE_2D)
-         pad_h += 2;
-
-      if (layout->compressed && align_h < 8)
-         align_h = 8;
-   }
-
-   /*
-    * From the Sandy Bridge PRM, volume 1 part 1, page 118:
-    *
-    *     "If the surface contains an odd number of rows of data, a final row
-    *      below the surface must be allocated."
-    */
-   if (layout->info->usage & XGL_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) {
-       if (align_h < 2)
-           align_h = 2;
-   }
-
-   /*
-    * Depth Buffer Clear/Resolve works in 8x4 sample blocks.  In
-    * ilo_texture_can_enable_hiz(), we always return true for the first slice.
-    * To avoid out-of-bound access, we have to pad.
-    */
-   if (layout->hiz) {
-       if (align_w < 8)
-           align_w = 8;
-       if (align_h < 4)
-           align_h = 4;
-   }
-
-   layout->width = u_align(layout->width, align_w);
-   layout->height = u_align(layout->height + pad_h, align_h);
-}
-
-/**
- * Layout a 2D texture.
- */
-static void
-tex_layout_2d(struct tex_layout *layout)
-{
-   const XGL_IMAGE_CREATE_INFO *info = layout->info;
-   unsigned int level_x, level_y, num_slices;
-   int lv;
-
-   level_x = 0;
-   level_y = 0;
-   for (lv = 0; lv < info->mipLevels; lv++) {
-      const unsigned int level_w = layout->levels[lv].w;
-      const unsigned int level_h = layout->levels[lv].h;
-      int slice;
-
-      /* set slice offsets */
-      if (layout->levels[lv].slices) {
-         for (slice = 0; slice < info->arraySize; slice++) {
-            layout->levels[lv].slices[slice].x = level_x;
-            /* slices are qpitch apart in Y-direction */
-            layout->levels[lv].slices[slice].y =
-               level_y + layout->qpitch * slice;
-         }
-      }
-
-      /* extend the size of the monolithic bo to cover this mip level */
-      if (layout->width < level_x + level_w)
-         layout->width = level_x + level_w;
-      if (layout->height < level_y + level_h)
-         layout->height = level_y + level_h;
-
-      /* MIPLAYOUT_BELOW */
-      if (lv == 1)
-         level_x += u_align(level_w, layout->align_i);
-      else
-         level_y += u_align(level_h, layout->align_j);
-   }
-
-   num_slices = info->arraySize;
-   /* samples of the same index are stored in a slice */
-   if (info->samples > 1 && !layout->interleaved)
-      num_slices *= info->samples;
-
-   /* we did not take slices into consideration in the computation above */
-   layout->height += layout->qpitch * (num_slices - 1);
-
-   tex_layout_align(layout);
-}
-
-/**
- * Layout a 3D texture.
- */
-static void
-tex_layout_3d(struct tex_layout *layout)
-{
-   const XGL_IMAGE_CREATE_INFO *info = layout->info;
-   unsigned int level_y;
-   int lv;
-
-   level_y = 0;
-   for (lv = 0; lv < info->mipLevels; lv++) {
-      const unsigned int level_w = layout->levels[lv].w;
-      const unsigned int level_h = layout->levels[lv].h;
-      const unsigned int level_d = layout->levels[lv].d;
-      const unsigned int slice_pitch = u_align(level_w, layout->align_i);
-      const unsigned int slice_qpitch = u_align(level_h, layout->align_j);
-      const unsigned int num_slices_per_row = 1 << lv;
-      int slice;
-
-      for (slice = 0; slice < level_d; slice += num_slices_per_row) {
-         int i;
-
-         /* set slice offsets */
-         if (layout->levels[lv].slices) {
-            for (i = 0; i < num_slices_per_row && slice + i < level_d; i++) {
-               layout->levels[lv].slices[slice + i].x = slice_pitch * i;
-               layout->levels[lv].slices[slice + i].y = level_y;
-            }
-         }
-
-         /* move on to the next slice row */
-         level_y += slice_qpitch;
-      }
-
-      /* rightmost slice */
-      if (num_slices_per_row < level_d)
-          slice = num_slices_per_row - 1;
-      else
-          slice = level_d - 1;
-
-      /* extend the size of the monolithic bo to cover this slice */
-      if (layout->width < slice_pitch * slice + level_w)
-         layout->width = slice_pitch * slice + level_w;
-      if (lv == info->mipLevels - 1)
-         layout->height = (level_y - slice_qpitch) + level_h;
-   }
-
-   tex_layout_align(layout);
-}
-
-/* note that this may force the texture to be linear */
-static bool
-tex_layout_calculate_bo_size(struct tex_layout *layout)
-{
-   assert(layout->width % layout->block_width == 0);
-   assert(layout->height % layout->block_height == 0);
-   assert(layout->qpitch % layout->block_height == 0);
-
-   layout->bo_stride =
-      (layout->width / layout->block_width) * layout->block_size;
-   layout->bo_height = layout->height / layout->block_height;
-
-   while (true) {
-      int w = layout->bo_stride, h = layout->bo_height;
-      int align_w, align_h;
-
-      /*
-       * From the Haswell PRM, volume 5, page 163:
-       *
-       *     "For linear surfaces, additional padding of 64 bytes is required
-       *      at the bottom of the surface. This is in addition to the padding
-       *      required above."
-       */
-      if (intel_gpu_gen(layout->gpu) >= INTEL_GEN(7.5) &&
-          (layout->info->usage & XGL_IMAGE_USAGE_SHADER_ACCESS_READ_BIT) &&
-          layout->tiling == INTEL_TILING_NONE) {
-         layout->bo_height +=
-            (64 + layout->bo_stride - 1) / layout->bo_stride;
-      }
-
-      /*
-       * From the Sandy Bridge PRM, volume 4 part 1, page 81:
-       *
-       *     "- For linear render target surfaces, the pitch must be a
-       *        multiple of the element size for non-YUV surface formats.
-       *        Pitch must be a multiple of 2 * element size for YUV surface
-       *        formats.
-       *      - For other linear surfaces, the pitch can be any multiple of
-       *        bytes.
-       *      - For tiled surfaces, the pitch must be a multiple of the tile
-       *        width."
-       *
-       * Different requirements may exist when the bo is used in different
-       * places, but our alignments here should be good enough that we do not
-       * need to check layout->info->usage.
-       */
-      switch (layout->tiling) {
-      case INTEL_TILING_X:
-         align_w = 512;
-         align_h = 8;
-         break;
-      case INTEL_TILING_Y:
-         align_w = 128;
-         align_h = 32;
-         break;
-      default:
-         if (layout->format.channelFormat == XGL_CH_FMT_R8 &&
-             layout->format.numericFormat == XGL_NUM_FMT_DS) {
-            /*
-             * From the Sandy Bridge PRM, volume 1 part 2, page 22:
-             *
-             *     "A 4KB tile is subdivided into 8-high by 8-wide array of
-             *      Blocks for W-Major Tiles (W Tiles). Each Block is 8 rows by 8
-             *      bytes."
-             *
-             * Since we asked for INTEL_TILING_NONE instead of the non-existent
-             * INTEL_TILING_W, we want to align to W tiles here.
-             */
-            align_w = 64;
-            align_h = 64;
-         }
-         else {
-            /* some good enough values */
-            align_w = 64;
-            align_h = 2;
-         }
-         break;
-      }
-
-      w = u_align(w, align_w);
-      h = u_align(h, align_h);
-
-      /* make sure the bo is mappable */
-      if (layout->tiling != INTEL_TILING_NONE) {
-         /*
-          * Usually only the first 256MB of the GTT is mappable.
-          *
-          * See also how intel_context::max_gtt_map_object_size is calculated.
-          */
-         const size_t mappable_gtt_size = 256 * 1024 * 1024;
-
-         /*
-          * Be conservative.  We may be able to switch from VALIGN_4 to
-          * VALIGN_2 if the layout was Y-tiled, but let's keep it simple.
-          */
-         if (mappable_gtt_size / w / 4 < h) {
-            if (layout->valid_tilings & (1 << INTEL_TILING_NONE)) {
-               layout->tiling = INTEL_TILING_NONE;
-               continue;
-            }
-            else {
-               /* send a warning? */
-            }
-         }
-      }
-
-      layout->bo_stride = w;
-      layout->bo_height = h;
-      break;
-   }
-
-   return (layout->bo_height <= intel_max_resource_size / layout->bo_stride);
-}
-
-static void
-tex_layout_calculate_hiz_size(struct tex_layout *layout)
-{
-   const XGL_IMAGE_CREATE_INFO *info = layout->info;
-   const int hz_align_j = 8;
-   int hz_width, hz_height;
-
-   if (!layout->hiz)
-      return;
-
-   /*
-    * See the Sandy Bridge PRM, volume 2 part 1, page 312, and the Ivy Bridge
-    * PRM, volume 2 part 1, page 312-313.
-    *
-    * It seems HiZ buffer is aligned to 8x8, with every two rows packed into a
-    * memory row.
-    */
-
-   hz_width = u_align(layout->levels[0].w, 16);
-
-   if (info->imageType == XGL_IMAGE_3D) {
-      unsigned lv;
-
-      hz_height = 0;
-
-      for (lv = 0; lv < info->mipLevels; lv++) {
-         const unsigned h = u_align(layout->levels[lv].h, hz_align_j);
-         hz_height += h * layout->levels[lv].d;
-      }
-
-      hz_height /= 2;
-   }
-   else {
-      const unsigned h0 = u_align(layout->levels[0].h, hz_align_j);
-      unsigned hz_qpitch = h0;
-
-      if (layout->array_spacing_full) {
-         const unsigned h1 = u_align(layout->levels[1].h, hz_align_j);
-         const unsigned htail =
-            ((intel_gpu_gen(layout->gpu) >= INTEL_GEN(7)) ? 12 : 11) * hz_align_j;
-
-         hz_qpitch += h1 + htail;
-      }
-
-      hz_height = hz_qpitch * info->arraySize / 2;
-
-      if (intel_gpu_gen(layout->gpu) >= INTEL_GEN(7))
-         hz_height = u_align(hz_height, 8);
-   }
-
-   /* align to Y-tile */
-   layout->hiz_stride = u_align(hz_width, 128);
-   layout->hiz_height = u_align(hz_height, 32);
-}
-
-static bool
-img_alloc_slices(struct intel_img *img,
-                 XGL_UINT levels, XGL_INT depth,
-                 XGL_UINT array_size)
-{
-   struct intel_img_slice *slices;
-   int total_depth, lv;
-
-   /* sum the depths of all levels */
-   total_depth = 0;
-   for (lv = 0; lv < levels; lv++)
-      total_depth += u_minify(depth, lv);
-
-   /*
-    * There are (depth * tex->base.array_size) slices in total.  Either depth
-    * is one (non-3D) or templ->array_size is one (non-array), but it does
-    * not matter.
-    */
-   slices = icd_alloc(sizeof(*slices) * total_depth * array_size,
-           0, XGL_SYSTEM_ALLOC_INTERNAL);
-   if (!slices)
-      return false;
-
-   img->slices[0] = slices;
-
-   /* point to the respective positions in the buffer */
-   for (lv = 1; lv < levels; lv++) {
-      img->slices[lv] = img->slices[lv - 1] +
-         u_minify(depth, lv - 1) * array_size;
-   }
-
-   return true;
-}
-
 static void img_destroy(struct intel_obj *obj)
 {
     struct intel_img *img = intel_img_from_obj(obj);
@@ -1071,7 +54,7 @@
         {
             XGL_MEMORY_REQUIREMENTS *mem_req = data;
 
-            mem_req->size = img->bo_stride * img->bo_height;
+            mem_req->size = img->layout.bo_stride * img->layout.bo_height;
             mem_req->alignment = 4096;
             mem_req->heapCount = 1;
             mem_req->heaps[0] = 0;
@@ -1091,59 +74,24 @@
                             const XGL_IMAGE_CREATE_INFO *info,
                             struct intel_img **img_ret)
 {
-    struct tex_layout layout;
     struct intel_img *img;
-    XGL_RESULT ret;
 
     img = (struct intel_img *) intel_base_create(dev, sizeof(*img),
             dev->base.dbg, XGL_DBG_OBJECT_IMAGE, info, 0);
     if (!img)
         return XGL_ERROR_OUT_OF_MEMORY;
 
-    if (!img_alloc_slices(img, info->mipLevels, info->extent.depth,
-                info->arraySize)) {
-        intel_img_destroy(img);
-        return XGL_ERROR_OUT_OF_MEMORY;
-    }
-
-    ret = tex_layout_init(&layout, dev->gpu, info, img->slices);
-    if (ret != XGL_SUCCESS) {
-        intel_img_destroy(img);
-        return ret;
-    }
-
-    if (info->imageType == XGL_IMAGE_3D)
-        tex_layout_3d(&layout);
-    else
-        tex_layout_2d(&layout);
-
-    if (!tex_layout_calculate_bo_size(&layout)) {
-        intel_img_destroy(img);
-        return XGL_ERROR_INVALID_MEMORY_SIZE;
-    }
-
-    tex_layout_calculate_hiz_size(&layout);
+    intel_layout_init(&img->layout, dev, info);
 
     /* TODO */
-    if (layout.hiz || layout.separate_stencil) {
+    if (img->layout.aux_type != INTEL_LAYOUT_AUX_NONE ||
+        img->layout.separate_stencil) {
         intel_dev_log(dev, XGL_DBG_MSG_ERROR, XGL_VALIDATION_LEVEL_0,
                 XGL_NULL_HANDLE, 0, 0, "HiZ or separate stencil enabled");
         intel_img_destroy(img);
         return XGL_ERROR_INVALID_MEMORY_SIZE;
     }
 
-    img->bo_format = layout.format;
-    img->tiling = layout.tiling;
-    img->bo_stride = layout.bo_stride;
-    img->bo_height = layout.bo_height;
-    img->block_width = layout.block_width;
-    img->block_height = layout.block_height;
-    img->block_size = layout.block_size;
-    img->halign_8 = (layout.align_i == 8);
-    img->valign_4 = (layout.align_j == 4);
-    img->array_spacing_full = layout.array_spacing_full;
-    img->interleaved = layout.interleaved;
-
     img->obj.destroy = img_destroy;
     img->obj.base.get_info = img_get_info;
 
@@ -1154,9 +102,6 @@
 
 void intel_img_destroy(struct intel_img *img)
 {
-    if (img->slices[0])
-        icd_free(img->slices[0]);
-
     intel_base_destroy(&img->obj.base);
 }
 
@@ -1184,21 +129,20 @@
     case XGL_INFO_TYPE_SUBRESOURCE_LAYOUT:
         {
             XGL_SUBRESOURCE_LAYOUT *layout = (XGL_SUBRESOURCE_LAYOUT *) pData;
-            const struct intel_img_slice *slice =
-                &img->slices[pSubresource->mipLevel][pSubresource->arraySlice];
-            const unsigned int bx = slice->x / img->block_width;
-            const unsigned int by = slice->y / img->block_height;
+            unsigned x, y;
+
+            intel_layout_get_slice_pos(&img->layout, pSubresource->mipLevel,
+                    pSubresource->arraySlice, &x, &y);
+            intel_layout_pos_to_mem(&img->layout, x, y, &x, &y);
 
             *pDataSize = sizeof(XGL_SUBRESOURCE_LAYOUT);
 
-            /*
-             * size is not readily available and depthPitch might not be
-             * available.  Leave them alone for now.
-             */
-            layout->offset = by * img->bo_stride + bx * img->block_size;
-            layout->size = 0;
-            layout->rowPitch = img->bo_stride;
-            layout->depthPitch = 0;
+            layout->offset = intel_layout_mem_to_off(&img->layout, x, y);
+            layout->size = intel_layout_get_slice_size(&img->layout,
+                    pSubresource->mipLevel);
+            layout->rowPitch = img->layout.bo_stride;
+            layout->depthPitch = intel_layout_get_slice_stride(&img->layout,
+                    pSubresource->mipLevel);
         }
         break;
     default:
commit	2b685d7c2c6719005abb44bb5cc0e9d3bd2657a3	[log] [tgz]
author	Chia-I Wu <olvaffe@gmail.com>	Thu Aug 14 13:45:37 2014 +0800
committer	Chia-I Wu <olvaffe@gmail.com>	Fri Aug 15 15:11:34 2014 +0800
tree	20b57aff8f468265417e2cc8f759942a004e7385
parent	8a8d8b6887e85a9a89fe51ba3db2594c2f30a346 [diff] [blame]