blob: 3a2a78122a43c565b5370e03a3adc191945dbba9 [file] [log] [blame]
Chia-I Wu4bc47012014-08-14 13:03:25 +08001/*
Courtney Goeltzenleuchter9cc421e2015-04-08 15:36:08 -06002 * Vulkan
Chia-I Wu4bc47012014-08-14 13:03:25 +08003 *
4 * Copyright (C) 2014 LunarG, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
Chia-I Wu44e42362014-09-02 08:32:09 +080025 * Chia-I Wu <olv@lunarg.com>
Chia-I Wu4bc47012014-08-14 13:03:25 +080026 */
27
Chia-I Wu8a8d8b62014-08-14 13:26:26 +080028#include "dev.h"
Chia-I Wu1bf06df2014-08-16 12:33:13 +080029#include "format.h"
Chia-I Wu8a8d8b62014-08-14 13:26:26 +080030#include "gpu.h"
31#include "layout.h"
Chia-I Wu4bc47012014-08-14 13:03:25 +080032
33enum {
Chia-I Wud1eb90c2015-03-07 06:01:45 +080034 LAYOUT_TILING_NONE = 1 << GEN6_TILING_NONE,
35 LAYOUT_TILING_X = 1 << GEN6_TILING_X,
36 LAYOUT_TILING_Y = 1 << GEN6_TILING_Y,
37 LAYOUT_TILING_W = 1 << GEN8_TILING_W,
Chia-I Wu4bc47012014-08-14 13:03:25 +080038
39 LAYOUT_TILING_ALL = (LAYOUT_TILING_NONE |
40 LAYOUT_TILING_X |
41 LAYOUT_TILING_Y |
42 LAYOUT_TILING_W)
43};
44
Chia-I Wu8a8d8b62014-08-14 13:26:26 +080045struct intel_layout_params {
Chia-I Wuc94f3e52014-10-07 14:45:05 +080046 struct intel_dev *dev;
47
Chia-I Wu8a8d8b62014-08-14 13:26:26 +080048 const struct intel_gpu *gpu;
Courtney Goeltzenleuchter382489d2015-04-10 08:34:15 -060049 const VkImageCreateInfo *info;
Chia-I Wu794d12a2014-09-15 14:55:25 +080050 bool scanout;
Chia-I Wu4bc47012014-08-14 13:03:25 +080051
52 bool compressed;
53
54 unsigned h0, h1;
55 unsigned max_x, max_y;
56};
57
58static void
Chia-I Wu8a8d8b62014-08-14 13:26:26 +080059layout_get_slice_size(const struct intel_layout *layout,
60 const struct intel_layout_params *params,
Chia-I Wu4bc47012014-08-14 13:03:25 +080061 unsigned level, unsigned *width, unsigned *height)
62{
Courtney Goeltzenleuchter382489d2015-04-10 08:34:15 -060063 const VkImageCreateInfo *info = params->info;
Chia-I Wu4bc47012014-08-14 13:03:25 +080064 unsigned w, h;
65
Chia-I Wu457d0a62014-08-18 13:02:26 +080066 w = u_minify(layout->width0, level);
67 h = u_minify(layout->height0, level);
Chia-I Wu4bc47012014-08-14 13:03:25 +080068
69 /*
70 * From the Sandy Bridge PRM, volume 1 part 1, page 114:
71 *
72 * "The dimensions of the mip maps are first determined by applying the
73 * sizing algorithm presented in Non-Power-of-Two Mipmaps above. Then,
74 * if necessary, they are padded out to compression block boundaries."
75 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +080076 w = u_align(w, layout->block_width);
77 h = u_align(h, layout->block_height);
Chia-I Wu4bc47012014-08-14 13:03:25 +080078
79 /*
80 * From the Sandy Bridge PRM, volume 1 part 1, page 111:
81 *
82 * "If the surface is multisampled (4x), these values must be adjusted
83 * as follows before proceeding:
84 *
85 * W_L = ceiling(W_L / 2) * 4
86 * H_L = ceiling(H_L / 2) * 4"
87 *
88 * From the Ivy Bridge PRM, volume 1 part 1, page 108:
89 *
90 * "If the surface is multisampled and it is a depth or stencil surface
91 * or Multisampled Surface StorageFormat in SURFACE_STATE is
92 * MSFMT_DEPTH_STENCIL, W_L and H_L must be adjusted as follows before
93 * proceeding:
94 *
95 * #samples W_L = H_L =
96 * 2 ceiling(W_L / 2) * 4 HL [no adjustment]
97 * 4 ceiling(W_L / 2) * 4 ceiling(H_L / 2) * 4
98 * 8 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 4
99 * 16 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 8"
100 *
101 * For interleaved samples (4x), where pixels
102 *
103 * (x, y ) (x+1, y )
104 * (x, y+1) (x+1, y+1)
105 *
106 * would be is occupied by
107 *
108 * (x, y , si0) (x+1, y , si0) (x, y , si1) (x+1, y , si1)
109 * (x, y+1, si0) (x+1, y+1, si0) (x, y+1, si1) (x+1, y+1, si1)
110 * (x, y , si2) (x+1, y , si2) (x, y , si3) (x+1, y , si3)
111 * (x, y+1, si2) (x+1, y+1, si2) (x, y+1, si3) (x+1, y+1, si3)
112 *
113 * Thus the need to
114 *
Chia-I Wu457d0a62014-08-18 13:02:26 +0800115 * w = align(w, 2) * 2;
116 * y = align(y, 2) * 2;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800117 */
118 if (layout->interleaved_samples) {
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800119 switch (info->samples) {
Chia-I Wu4bc47012014-08-14 13:03:25 +0800120 case 0:
121 case 1:
122 break;
123 case 2:
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800124 w = u_align(w, 2) * 2;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800125 break;
126 case 4:
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800127 w = u_align(w, 2) * 2;
128 h = u_align(h, 2) * 2;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800129 break;
130 case 8:
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800131 w = u_align(w, 2) * 4;
132 h = u_align(h, 2) * 2;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800133 break;
134 case 16:
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800135 w = u_align(w, 2) * 4;
136 h = u_align(h, 2) * 4;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800137 break;
138 default:
139 assert(!"unsupported sample count");
140 break;
141 }
142 }
143
Chia-I Wu457d0a62014-08-18 13:02:26 +0800144 /*
145 * From the Ivy Bridge PRM, volume 1 part 1, page 108:
146 *
147 * "For separate stencil buffer, the width must be mutiplied by 2 and
148 * height divided by 2..."
149 *
150 * To make things easier (for transfer), we will just double the stencil
151 * stride in 3DSTATE_STENCIL_BUFFER.
152 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800153 w = u_align(w, layout->align_i);
154 h = u_align(h, layout->align_j);
Chia-I Wu4bc47012014-08-14 13:03:25 +0800155
156 *width = w;
157 *height = h;
158}
159
160static unsigned
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800161layout_get_num_layers(const struct intel_layout *layout,
162 const struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800163{
Courtney Goeltzenleuchter382489d2015-04-10 08:34:15 -0600164 const VkImageCreateInfo *info = params->info;
Courtney Goeltzenleuchter2ebc2342015-10-21 17:57:31 -0600165 unsigned num_layers = info->arrayLayers;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800166
167 /* samples of the same index are stored in a layer */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800168 if (info->samples > 1 && !layout->interleaved_samples)
169 num_layers *= info->samples;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800170
171 return num_layers;
172}
173
174static void
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800175layout_init_layer_height(struct intel_layout *layout,
176 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800177{
Courtney Goeltzenleuchter382489d2015-04-10 08:34:15 -0600178 const VkImageCreateInfo *info = params->info;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800179 unsigned num_layers;
180
Chia-I Wu457d0a62014-08-18 13:02:26 +0800181 if (layout->walk != INTEL_LAYOUT_WALK_LAYER)
182 return;
183
Chia-I Wu4bc47012014-08-14 13:03:25 +0800184 num_layers = layout_get_num_layers(layout, params);
185 if (num_layers <= 1)
186 return;
187
Chia-I Wu4bc47012014-08-14 13:03:25 +0800188 /*
189 * From the Sandy Bridge PRM, volume 1 part 1, page 115:
190 *
191 * "The following equation is used for surface formats other than
192 * compressed textures:
193 *
194 * QPitch = (h0 + h1 + 11j)"
195 *
196 * "The equation for compressed textures (BC* and FXT1 surface formats)
197 * follows:
198 *
199 * QPitch = (h0 + h1 + 11j) / 4"
200 *
201 * "[DevSNB] Errata: Sampler MSAA Qpitch will be 4 greater than the
202 * value calculated in the equation above, for every other odd Surface
203 * Height starting from 1 i.e. 1,5,9,13"
204 *
205 * From the Ivy Bridge PRM, volume 1 part 1, page 111-112:
206 *
207 * "If Surface Array Spacing is set to ARYSPC_FULL (note that the depth
208 * buffer and stencil buffer have an implied value of ARYSPC_FULL):
209 *
210 * QPitch = (h0 + h1 + 12j)
211 * QPitch = (h0 + h1 + 12j) / 4 (compressed)
212 *
213 * (There are many typos or missing words here...)"
214 *
215 * To access the N-th slice, an offset of (Stride * QPitch * N) is added to
216 * the base address. The PRM divides QPitch by 4 for compressed formats
217 * because the block height for those formats are 4, and it wants QPitch to
218 * mean the number of memory rows, as opposed to texel rows, between
219 * slices. Since we use texel rows everywhere, we do not need to divide
220 * QPitch by 4.
221 */
222 layout->layer_height = params->h0 + params->h1 +
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800223 ((intel_gpu_gen(params->gpu) >= INTEL_GEN(7)) ? 12 : 11) * layout->align_j;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800224
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800225 if (intel_gpu_gen(params->gpu) == INTEL_GEN(6) && info->samples > 1 &&
Chia-I Wu457d0a62014-08-18 13:02:26 +0800226 layout->height0 % 4 == 1)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800227 layout->layer_height += 4;
228
229 params->max_y += layout->layer_height * (num_layers - 1);
230}
231
232static void
Chia-I Wu457d0a62014-08-18 13:02:26 +0800233layout_init_lods(struct intel_layout *layout,
234 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800235{
Courtney Goeltzenleuchter382489d2015-04-10 08:34:15 -0600236 const VkImageCreateInfo *info = params->info;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800237 unsigned cur_x, cur_y;
238 unsigned lv;
239
240 cur_x = 0;
241 cur_y = 0;
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800242 for (lv = 0; lv < info->mipLevels; lv++) {
Chia-I Wu457d0a62014-08-18 13:02:26 +0800243 unsigned lod_w, lod_h;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800244
Chia-I Wu457d0a62014-08-18 13:02:26 +0800245 layout_get_slice_size(layout, params, lv, &lod_w, &lod_h);
Chia-I Wu4bc47012014-08-14 13:03:25 +0800246
Chia-I Wu457d0a62014-08-18 13:02:26 +0800247 layout->lods[lv].x = cur_x;
248 layout->lods[lv].y = cur_y;
249 layout->lods[lv].slice_width = lod_w;
250 layout->lods[lv].slice_height = lod_h;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800251
Chia-I Wu457d0a62014-08-18 13:02:26 +0800252 switch (layout->walk) {
253 case INTEL_LAYOUT_WALK_LOD:
254 lod_h *= layout_get_num_layers(layout, params);
255 if (lv == 1)
256 cur_x += lod_w;
257 else
258 cur_y += lod_h;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800259
Chia-I Wu457d0a62014-08-18 13:02:26 +0800260 /* every LOD begins at tile boundaries */
261 if (info->mipLevels > 1) {
Tony Barbour8205d902015-04-16 15:59:00 -0600262 assert(layout->format == VK_FORMAT_S8_UINT);
Chia-I Wu457d0a62014-08-18 13:02:26 +0800263 cur_x = u_align(cur_x, 64);
264 cur_y = u_align(cur_y, 64);
265 }
266 break;
267 case INTEL_LAYOUT_WALK_LAYER:
Chia-I Wu4bc47012014-08-14 13:03:25 +0800268 /* MIPLAYOUT_BELOW */
269 if (lv == 1)
Chia-I Wu457d0a62014-08-18 13:02:26 +0800270 cur_x += lod_w;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800271 else
Chia-I Wu457d0a62014-08-18 13:02:26 +0800272 cur_y += lod_h;
273 break;
274 case INTEL_LAYOUT_WALK_3D:
275 {
276 const unsigned num_slices = u_minify(info->extent.depth, lv);
277 const unsigned num_slices_per_row = 1 << lv;
278 const unsigned num_rows =
279 (num_slices + num_slices_per_row - 1) / num_slices_per_row;
280
281 lod_w *= num_slices_per_row;
282 lod_h *= num_rows;
283
284 cur_y += lod_h;
285 }
286 break;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800287 }
288
Chia-I Wu457d0a62014-08-18 13:02:26 +0800289 if (params->max_x < layout->lods[lv].x + lod_w)
290 params->max_x = layout->lods[lv].x + lod_w;
291 if (params->max_y < layout->lods[lv].y + lod_h)
292 params->max_y = layout->lods[lv].y + lod_h;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800293 }
294
Chia-I Wu457d0a62014-08-18 13:02:26 +0800295 if (layout->walk == INTEL_LAYOUT_WALK_LAYER) {
296 params->h0 = layout->lods[0].slice_height;
297
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800298 if (info->mipLevels > 1)
Chia-I Wu457d0a62014-08-18 13:02:26 +0800299 params->h1 = layout->lods[1].slice_height;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800300 else
301 layout_get_slice_size(layout, params, 1, &cur_x, &params->h1);
302 }
303}
304
305static void
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800306layout_init_alignments(struct intel_layout *layout,
307 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800308{
Courtney Goeltzenleuchter382489d2015-04-10 08:34:15 -0600309 const VkImageCreateInfo *info = params->info;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800310
311 /*
312 * From the Sandy Bridge PRM, volume 1 part 1, page 113:
313 *
314 * "surface format align_i align_j
315 * YUV 4:2:2 formats 4 *see below
316 * BC1-5 4 4
317 * FXT1 8 4
318 * all other formats 4 *see below"
319 *
320 * "- align_j = 4 for any depth buffer
321 * - align_j = 2 for separate stencil buffer
322 * - align_j = 4 for any render target surface is multisampled (4x)
323 * - align_j = 4 for any render target surface with Surface Vertical
324 * Alignment = VALIGN_4
325 * - align_j = 2 for any render target surface with Surface Vertical
326 * Alignment = VALIGN_2
327 * - align_j = 2 for all other render target surface
328 * - align_j = 2 for any sampling engine surface with Surface Vertical
329 * Alignment = VALIGN_2
330 * - align_j = 4 for any sampling engine surface with Surface Vertical
331 * Alignment = VALIGN_4"
332 *
333 * From the Sandy Bridge PRM, volume 4 part 1, page 86:
334 *
335 * "This field (Surface Vertical Alignment) must be set to VALIGN_2 if
336 * the Surface Format is 96 bits per element (BPE)."
337 *
338 * They can be rephrased as
339 *
340 * align_i align_j
341 * compressed formats block width block height
342 * PIPE_FORMAT_S8_UINT 4 2
343 * other depth/stencil formats 4 4
344 * 4x multisampled 4 4
345 * bpp 96 4 2
346 * others 4 2 or 4
347 */
348
349 /*
350 * From the Ivy Bridge PRM, volume 1 part 1, page 110:
351 *
352 * "surface defined by surface format align_i align_j
353 * 3DSTATE_DEPTH_BUFFER D16_UNORM 8 4
354 * not D16_UNORM 4 4
355 * 3DSTATE_STENCIL_BUFFER N/A 8 8
356 * SURFACE_STATE BC*, ETC*, EAC* 4 4
357 * FXT1 8 4
358 * all others (set by SURFACE_STATE)"
359 *
360 * From the Ivy Bridge PRM, volume 4 part 1, page 63:
361 *
362 * "- This field (Surface Vertical Aligment) is intended to be set to
363 * VALIGN_4 if the surface was rendered as a depth buffer, for a
364 * multisampled (4x) render target, or for a multisampled (8x)
365 * render target, since these surfaces support only alignment of 4.
366 * - Use of VALIGN_4 for other surfaces is supported, but uses more
367 * memory.
368 * - This field must be set to VALIGN_4 for all tiled Y Render Target
369 * surfaces.
370 * - Value of 1 is not supported for format YCRCB_NORMAL (0x182),
371 * YCRCB_SWAPUVY (0x183), YCRCB_SWAPUV (0x18f), YCRCB_SWAPY (0x190)
372 * - If Number of Multisamples is not MULTISAMPLECOUNT_1, this field
373 * must be set to VALIGN_4."
374 * - VALIGN_4 is not supported for surface format R32G32B32_FLOAT."
375 *
376 * "- This field (Surface Horizontal Aligment) is intended to be set to
377 * HALIGN_8 only if the surface was rendered as a depth buffer with
378 * Z16 format or a stencil buffer, since these surfaces support only
379 * alignment of 8.
380 * - Use of HALIGN_8 for other surfaces is supported, but uses more
381 * memory.
382 * - This field must be set to HALIGN_4 if the Surface Format is BC*.
383 * - This field must be set to HALIGN_8 if the Surface Format is
384 * FXT1."
385 *
386 * They can be rephrased as
387 *
388 * align_i align_j
389 * compressed formats block width block height
390 * PIPE_FORMAT_Z16_UNORM 8 4
391 * PIPE_FORMAT_S8_UINT 8 8
Chia-I Wu457d0a62014-08-18 13:02:26 +0800392 * other depth/stencil formats 4 4
Chia-I Wu4bc47012014-08-14 13:03:25 +0800393 * 2x or 4x multisampled 4 or 8 4
394 * tiled Y 4 or 8 4 (if rt)
395 * PIPE_FORMAT_R32G32B32_FLOAT 4 or 8 2
396 * others 4 or 8 2 or 4
397 */
398
399 if (params->compressed) {
400 /* this happens to be the case */
401 layout->align_i = layout->block_width;
402 layout->align_j = layout->block_height;
Courtney Goeltzenleuchterc3b8eea2015-09-10 14:14:11 -0600403 } else if (info->usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800404 if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7)) {
Jeremy Hayes2b7e88a2015-01-23 08:51:43 -0700405 switch (layout->format) {
Tony Barbour8205d902015-04-16 15:59:00 -0600406 case VK_FORMAT_D16_UNORM:
Chia-I Wu4bc47012014-08-14 13:03:25 +0800407 layout->align_i = 8;
408 layout->align_j = 4;
409 break;
Tony Barbour8205d902015-04-16 15:59:00 -0600410 case VK_FORMAT_S8_UINT:
Chia-I Wu4bc47012014-08-14 13:03:25 +0800411 layout->align_i = 8;
412 layout->align_j = 8;
413 break;
414 default:
415 layout->align_i = 4;
416 layout->align_j = 4;
417 break;
418 }
419 } else {
Jeremy Hayes2b7e88a2015-01-23 08:51:43 -0700420 switch (layout->format) {
Tony Barbour8205d902015-04-16 15:59:00 -0600421 case VK_FORMAT_S8_UINT:
Chia-I Wu4bc47012014-08-14 13:03:25 +0800422 layout->align_i = 4;
423 layout->align_j = 2;
424 break;
425 default:
426 layout->align_i = 4;
427 layout->align_j = 4;
428 break;
429 }
430 }
431 } else {
Chia-I Wud1eb90c2015-03-07 06:01:45 +0800432 const bool valign_4 =
433 (info->samples > 1) ||
434 (intel_gpu_gen(params->gpu) >= INTEL_GEN(8)) ||
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800435 (intel_gpu_gen(params->gpu) >= INTEL_GEN(7) &&
Chia-I Wud1eb90c2015-03-07 06:01:45 +0800436 layout->tiling == GEN6_TILING_Y &&
Courtney Goeltzenleuchter9cc421e2015-04-08 15:36:08 -0600437 (info->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT));
Chia-I Wu4bc47012014-08-14 13:03:25 +0800438
Chia-I Wud1eb90c2015-03-07 06:01:45 +0800439 if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7) &&
440 intel_gpu_gen(params->gpu) <= INTEL_GEN(7.5) && valign_4)
Tony Barbour8205d902015-04-16 15:59:00 -0600441 assert(layout->format != VK_FORMAT_R32G32B32_SFLOAT);
Chia-I Wu4bc47012014-08-14 13:03:25 +0800442
443 layout->align_i = 4;
444 layout->align_j = (valign_4) ? 4 : 2;
445 }
446
447 /*
448 * the fact that align i and j are multiples of block width and height
449 * respectively is what makes the size of the bo a multiple of the block
450 * size, slices start at block boundaries, and many of the computations
451 * work.
452 */
453 assert(layout->align_i % layout->block_width == 0);
454 assert(layout->align_j % layout->block_height == 0);
455
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800456 /* make sure u_align() works */
457 assert(u_is_pow2(layout->align_i) &&
458 u_is_pow2(layout->align_j));
459 assert(u_is_pow2(layout->block_width) &&
460 u_is_pow2(layout->block_height));
Chia-I Wu4bc47012014-08-14 13:03:25 +0800461}
462
463static unsigned
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800464layout_get_valid_tilings(const struct intel_layout *layout,
465 const struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800466{
Courtney Goeltzenleuchter382489d2015-04-10 08:34:15 -0600467 const VkImageCreateInfo *info = params->info;
468 const VkFormat format = layout->format;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800469 unsigned valid_tilings = LAYOUT_TILING_ALL;
470
Chia-I Wu794d12a2014-09-15 14:55:25 +0800471 /*
472 * From the Sandy Bridge PRM, volume 1 part 2, page 32:
473 *
474 * "Display/Overlay Y-Major not supported.
475 * X-Major required for Async Flips"
476 */
477 if (params->scanout)
478 valid_tilings &= LAYOUT_TILING_X;
479
Tony Barbour8205d902015-04-16 15:59:00 -0600480 if (info->tiling == VK_IMAGE_TILING_LINEAR)
Chia-I Wu6ac93992014-08-30 18:23:28 +0800481 valid_tilings &= LAYOUT_TILING_NONE;
482
Chia-I Wu4bc47012014-08-14 13:03:25 +0800483 /*
Chia-I Wu4bc47012014-08-14 13:03:25 +0800484 * From the Sandy Bridge PRM, volume 2 part 1, page 318:
485 *
486 * "[DevSNB+]: This field (Tiled Surface) must be set to TRUE. Linear
487 * Depth Buffer is not supported."
488 *
489 * "The Depth Buffer, if tiled, must use Y-Major tiling."
490 *
491 * From the Sandy Bridge PRM, volume 1 part 2, page 22:
492 *
493 * "W-Major Tile Format is used for separate stencil."
494 */
Courtney Goeltzenleuchterc3b8eea2015-09-10 14:14:11 -0600495 if (info->usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
Jeremy Hayes2b7e88a2015-01-23 08:51:43 -0700496 switch (format) {
Tony Barbour8205d902015-04-16 15:59:00 -0600497 case VK_FORMAT_S8_UINT:
Chia-I Wu4bc47012014-08-14 13:03:25 +0800498 valid_tilings &= LAYOUT_TILING_W;
499 break;
500 default:
501 valid_tilings &= LAYOUT_TILING_Y;
502 break;
503 }
504 }
505
Courtney Goeltzenleuchter9cc421e2015-04-08 15:36:08 -0600506 if (info->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) {
Chia-I Wu4bc47012014-08-14 13:03:25 +0800507 /*
508 * From the Sandy Bridge PRM, volume 1 part 2, page 32:
509 *
510 * "NOTE: 128BPE Format Color buffer ( render target ) MUST be
511 * either TileX or Linear."
Chia-I Wud1eb90c2015-03-07 06:01:45 +0800512 *
513 * From the Haswell PRM, volume 5, page 32:
514 *
515 * "NOTE: 128 BPP format color buffer (render target) supports
516 * Linear, TiledX and TiledY."
Chia-I Wu4bc47012014-08-14 13:03:25 +0800517 */
Chia-I Wud1eb90c2015-03-07 06:01:45 +0800518 if (intel_gpu_gen(params->gpu) < INTEL_GEN(7.5) && layout->block_size == 16)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800519 valid_tilings &= ~LAYOUT_TILING_Y;
520
521 /*
522 * From the Ivy Bridge PRM, volume 4 part 1, page 63:
523 *
524 * "This field (Surface Vertical Aligment) must be set to VALIGN_4
525 * for all tiled Y Render Target surfaces."
526 *
527 * "VALIGN_4 is not supported for surface format R32G32B32_FLOAT."
528 */
Chia-I Wud1eb90c2015-03-07 06:01:45 +0800529 if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7) &&
530 intel_gpu_gen(params->gpu) <= INTEL_GEN(7.5) &&
Tony Barbour8205d902015-04-16 15:59:00 -0600531 layout->format == VK_FORMAT_R32G32B32_SFLOAT)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800532 valid_tilings &= ~LAYOUT_TILING_Y;
Chia-I Wud1eb90c2015-03-07 06:01:45 +0800533
534 valid_tilings &= ~LAYOUT_TILING_W;
535 }
536
Courtney Goeltzenleuchterad870812015-04-15 15:29:59 -0600537 if (info->usage & VK_IMAGE_USAGE_SAMPLED_BIT) {
Chia-I Wud1eb90c2015-03-07 06:01:45 +0800538 if (intel_gpu_gen(params->gpu) < INTEL_GEN(8))
539 valid_tilings &= ~LAYOUT_TILING_W;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800540 }
541
542 /* no conflicting binding flags */
543 assert(valid_tilings);
544
545 return valid_tilings;
546}
547
548static void
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800549layout_init_tiling(struct intel_layout *layout,
550 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800551{
Courtney Goeltzenleuchter382489d2015-04-10 08:34:15 -0600552 const VkImageCreateInfo *info = params->info;
Chia-I Wud1eb90c2015-03-07 06:01:45 +0800553 unsigned preferred_tilings;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800554
Chia-I Wud1eb90c2015-03-07 06:01:45 +0800555 layout->valid_tilings = layout_get_valid_tilings(layout, params);
Chia-I Wu457d0a62014-08-18 13:02:26 +0800556
Chia-I Wud1eb90c2015-03-07 06:01:45 +0800557 preferred_tilings = layout->valid_tilings;
558
559 /* no fencing nor BLT support */
560 if (preferred_tilings & ~LAYOUT_TILING_W)
561 preferred_tilings &= ~LAYOUT_TILING_W;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800562
Courtney Goeltzenleuchter9cc421e2015-04-08 15:36:08 -0600563 if (info->usage & (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |
Courtney Goeltzenleuchterad870812015-04-15 15:29:59 -0600564 VK_IMAGE_USAGE_SAMPLED_BIT)) {
Chia-I Wu4bc47012014-08-14 13:03:25 +0800565 /*
566 * heuristically set a minimum width/height for enabling tiling
567 */
Chia-I Wud1eb90c2015-03-07 06:01:45 +0800568 if (layout->width0 < 64 && (preferred_tilings & ~LAYOUT_TILING_X))
569 preferred_tilings &= ~LAYOUT_TILING_X;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800570
Chia-I Wu457d0a62014-08-18 13:02:26 +0800571 if ((layout->width0 < 32 || layout->height0 < 16) &&
572 (layout->width0 < 16 || layout->height0 < 32) &&
Chia-I Wud1eb90c2015-03-07 06:01:45 +0800573 (preferred_tilings & ~LAYOUT_TILING_Y))
574 preferred_tilings &= ~LAYOUT_TILING_Y;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800575 } else {
576 /* force linear if we are not sure where the texture is bound to */
Chia-I Wud1eb90c2015-03-07 06:01:45 +0800577 if (preferred_tilings & LAYOUT_TILING_NONE)
578 preferred_tilings &= LAYOUT_TILING_NONE;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800579 }
580
581 /* prefer tiled over linear */
Chia-I Wud1eb90c2015-03-07 06:01:45 +0800582 if (preferred_tilings & LAYOUT_TILING_Y)
583 layout->tiling = GEN6_TILING_Y;
584 else if (preferred_tilings & LAYOUT_TILING_X)
585 layout->tiling = GEN6_TILING_X;
586 else if (preferred_tilings & LAYOUT_TILING_W)
587 layout->tiling = GEN8_TILING_W;
Chia-I Wu457d0a62014-08-18 13:02:26 +0800588 else
Chia-I Wud1eb90c2015-03-07 06:01:45 +0800589 layout->tiling = GEN6_TILING_NONE;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800590}
591
592static void
Chia-I Wu457d0a62014-08-18 13:02:26 +0800593layout_init_walk_gen7(struct intel_layout *layout,
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800594 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800595{
Courtney Goeltzenleuchter382489d2015-04-10 08:34:15 -0600596 const VkImageCreateInfo *info = params->info;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800597
598 /*
599 * It is not explicitly states, but render targets are expected to be
600 * UMS/CMS (samples non-interleaved) and depth/stencil buffers are expected
601 * to be IMS (samples interleaved).
602 *
603 * See "Multisampled Surface Storage Format" field of SURFACE_STATE.
604 */
Courtney Goeltzenleuchterc3b8eea2015-09-10 14:14:11 -0600605 if (info->usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
Chia-I Wu4bc47012014-08-14 13:03:25 +0800606 /*
607 * From the Ivy Bridge PRM, volume 1 part 1, page 111:
608 *
609 * "note that the depth buffer and stencil buffer have an implied
610 * value of ARYSPC_FULL"
611 */
Tony Barbour8205d902015-04-16 15:59:00 -0600612 layout->walk = (info->imageType == VK_IMAGE_TYPE_3D) ?
Chia-I Wu457d0a62014-08-18 13:02:26 +0800613 INTEL_LAYOUT_WALK_3D : INTEL_LAYOUT_WALK_LAYER;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800614
Chia-I Wu457d0a62014-08-18 13:02:26 +0800615 layout->interleaved_samples = true;
616 } else {
Chia-I Wu4bc47012014-08-14 13:03:25 +0800617 /*
618 * From the Ivy Bridge PRM, volume 4 part 1, page 66:
619 *
620 * "If Multisampled Surface Storage Format is MSFMT_MSS and Number
621 * of Multisamples is not MULTISAMPLECOUNT_1, this field (Surface
622 * Array Spacing) must be set to ARYSPC_LOD0."
623 *
624 * As multisampled resources are not mipmapped, we never use
625 * ARYSPC_FULL for them.
626 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800627 if (info->samples > 1)
628 assert(info->mipLevels == 1);
Chia-I Wu457d0a62014-08-18 13:02:26 +0800629
630 layout->walk =
Tony Barbour8205d902015-04-16 15:59:00 -0600631 (info->imageType == VK_IMAGE_TYPE_3D) ? INTEL_LAYOUT_WALK_3D :
Chia-I Wu457d0a62014-08-18 13:02:26 +0800632 (info->mipLevels > 1) ? INTEL_LAYOUT_WALK_LAYER :
633 INTEL_LAYOUT_WALK_LOD;
634
635 layout->interleaved_samples = false;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800636 }
637}
638
639static void
Chia-I Wu457d0a62014-08-18 13:02:26 +0800640layout_init_walk_gen6(struct intel_layout *layout,
641 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800642{
Chia-I Wu4bc47012014-08-14 13:03:25 +0800643 /*
644 * From the Sandy Bridge PRM, volume 1 part 1, page 115:
645 *
646 * "The separate stencil buffer does not support mip mapping, thus the
647 * storage for LODs other than LOD 0 is not needed. The following
648 * QPitch equation applies only to the separate stencil buffer:
649 *
650 * QPitch = h_0"
651 *
652 * GEN6 does not support compact spacing otherwise.
653 */
Chia-I Wu457d0a62014-08-18 13:02:26 +0800654 layout->walk =
Tony Barbour8205d902015-04-16 15:59:00 -0600655 (params->info->imageType == VK_IMAGE_TYPE_3D) ? INTEL_LAYOUT_WALK_3D :
656 (layout->format == VK_FORMAT_S8_UINT) ? INTEL_LAYOUT_WALK_LOD :
Chia-I Wu457d0a62014-08-18 13:02:26 +0800657 INTEL_LAYOUT_WALK_LAYER;
658
659 /* GEN6 supports only interleaved samples */
660 layout->interleaved_samples = true;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800661}
662
663static void
Chia-I Wu457d0a62014-08-18 13:02:26 +0800664layout_init_walk(struct intel_layout *layout,
665 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800666{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800667 if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7))
Chia-I Wu457d0a62014-08-18 13:02:26 +0800668 layout_init_walk_gen7(layout, params);
Chia-I Wu4bc47012014-08-14 13:03:25 +0800669 else
Chia-I Wu457d0a62014-08-18 13:02:26 +0800670 layout_init_walk_gen6(layout, params);
Chia-I Wu4bc47012014-08-14 13:03:25 +0800671}
672
673static void
Chia-I Wu457d0a62014-08-18 13:02:26 +0800674layout_init_size_and_format(struct intel_layout *layout,
675 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800676{
Courtney Goeltzenleuchter382489d2015-04-10 08:34:15 -0600677 const VkImageCreateInfo *info = params->info;
678 VkFormat format = info->format;
Chia-I Wu9a056dd2015-02-11 13:19:39 -0700679 bool require_separate_stencil = false;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800680
Chia-I Wu457d0a62014-08-18 13:02:26 +0800681 layout->width0 = info->extent.width;
682 layout->height0 = info->extent.height;
683
Chia-I Wu4bc47012014-08-14 13:03:25 +0800684 /*
685 * From the Sandy Bridge PRM, volume 2 part 1, page 317:
686 *
687 * "This field (Separate Stencil Buffer Enable) must be set to the same
688 * value (enabled or disabled) as Hierarchical Depth Buffer Enable."
689 *
690 * GEN7+ requires separate stencil buffers.
691 */
Courtney Goeltzenleuchterc3b8eea2015-09-10 14:14:11 -0600692 if (info->usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800693 if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7))
Chia-I Wu4bc47012014-08-14 13:03:25 +0800694 require_separate_stencil = true;
695 else
Chia-I Wu457d0a62014-08-18 13:02:26 +0800696 require_separate_stencil = (layout->aux == INTEL_LAYOUT_AUX_HIZ);
Chia-I Wu4bc47012014-08-14 13:03:25 +0800697 }
698
Chia-I Wud1eb90c2015-03-07 06:01:45 +0800699 switch (format) {
Tony Barbour8205d902015-04-16 15:59:00 -0600700 case VK_FORMAT_D24_UNORM_S8_UINT:
Chia-I Wud1eb90c2015-03-07 06:01:45 +0800701 if (require_separate_stencil) {
Courtney Goeltzenleuchter7ed10592015-09-10 17:17:43 -0600702 format = VK_FORMAT_D24_UNORM_X8;
Chia-I Wud1eb90c2015-03-07 06:01:45 +0800703 layout->separate_stencil = true;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800704 }
Chia-I Wud1eb90c2015-03-07 06:01:45 +0800705 break;
Tony Barbour8205d902015-04-16 15:59:00 -0600706 case VK_FORMAT_D32_SFLOAT_S8_UINT:
Chia-I Wud1eb90c2015-03-07 06:01:45 +0800707 if (require_separate_stencil) {
Tony Barbour8205d902015-04-16 15:59:00 -0600708 format = VK_FORMAT_D32_SFLOAT;
Chia-I Wud1eb90c2015-03-07 06:01:45 +0800709 layout->separate_stencil = true;
710 }
711 break;
712 default:
713 break;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800714 }
715
Chia-I Wu4bc47012014-08-14 13:03:25 +0800716 layout->format = format;
Chia-I Wu1bf06df2014-08-16 12:33:13 +0800717 layout->block_width = icd_format_get_block_width(format);
718 layout->block_height = layout->block_width;
719 layout->block_size = icd_format_get_size(format);
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800720
Chia-I Wu1bf06df2014-08-16 12:33:13 +0800721 params->compressed = icd_format_is_compressed(format);
Chia-I Wu4bc47012014-08-14 13:03:25 +0800722}
723
724static bool
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800725layout_want_mcs(struct intel_layout *layout,
726 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800727{
Courtney Goeltzenleuchter382489d2015-04-10 08:34:15 -0600728 const VkImageCreateInfo *info = params->info;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800729 bool want_mcs = false;
730
731 /* MCS is for RT on GEN7+ */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800732 if (intel_gpu_gen(params->gpu) < INTEL_GEN(7))
Chia-I Wu4bc47012014-08-14 13:03:25 +0800733 return false;
734
Tony Barbour8205d902015-04-16 15:59:00 -0600735 if (info->imageType != VK_IMAGE_TYPE_2D ||
Courtney Goeltzenleuchter9cc421e2015-04-08 15:36:08 -0600736 !(info->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT))
Chia-I Wu4bc47012014-08-14 13:03:25 +0800737 return false;
738
739 /*
740 * From the Ivy Bridge PRM, volume 4 part 1, page 77:
741 *
742 * "For Render Target and Sampling Engine Surfaces:If the surface is
743 * multisampled (Number of Multisamples any value other than
744 * MULTISAMPLECOUNT_1), this field (MCS Enable) must be enabled."
745 *
746 * "This field must be set to 0 for all SINT MSRTs when all RT channels
747 * are not written"
748 */
Chia-I Wud1eb90c2015-03-07 06:01:45 +0800749 if (info->samples > 1 && !icd_format_is_int(info->format)) {
Chia-I Wu4bc47012014-08-14 13:03:25 +0800750 want_mcs = true;
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800751 } else if (info->samples <= 1) {
Chia-I Wu4bc47012014-08-14 13:03:25 +0800752 /*
753 * From the Ivy Bridge PRM, volume 2 part 1, page 326:
754 *
755 * "When MCS is buffer is used for color clear of non-multisampler
756 * render target, the following restrictions apply.
757 * - Support is limited to tiled render targets.
758 * - Support is for non-mip-mapped and non-array surface types
759 * only.
760 * - Clear is supported only on the full RT; i.e., no partial clear
761 * or overlapping clears.
762 * - MCS buffer for non-MSRT is supported only for RT formats
763 * 32bpp, 64bpp and 128bpp.
764 * ..."
765 */
Chia-I Wud1eb90c2015-03-07 06:01:45 +0800766 if (layout->tiling != GEN6_TILING_NONE &&
Courtney Goeltzenleuchter2ebc2342015-10-21 17:57:31 -0600767 info->mipLevels == 1 && info->arrayLayers == 1) {
Chia-I Wu4bc47012014-08-14 13:03:25 +0800768 switch (layout->block_size) {
769 case 4:
770 case 8:
771 case 16:
772 want_mcs = true;
773 break;
774 default:
775 break;
776 }
777 }
778 }
779
780 return want_mcs;
781}
782
783static bool
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800784layout_want_hiz(const struct intel_layout *layout,
785 const struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800786{
Courtney Goeltzenleuchter382489d2015-04-10 08:34:15 -0600787 const VkImageCreateInfo *info = params->info;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800788
Chia-I Wuc45db532015-02-19 11:20:38 -0700789 if (intel_debug & INTEL_DEBUG_NOHIZ)
790 return false;
791
Courtney Goeltzenleuchterc3b8eea2015-09-10 14:14:11 -0600792 if (!(info->usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT))
Chia-I Wu4bc47012014-08-14 13:03:25 +0800793 return false;
794
Chia-I Wu9ec7e702015-02-19 13:18:42 -0700795 if (!intel_format_has_depth(params->gpu, info->format))
Chia-I Wu4bc47012014-08-14 13:03:25 +0800796 return false;
797
Chia-I Wu457d0a62014-08-18 13:02:26 +0800798 /*
Chia-I Wuc45db532015-02-19 11:20:38 -0700799 * HiZ implies separate stencil on Gen6. We do not want to copy stencils
800 * values between combined and separate stencil buffers when HiZ is enabled
801 * or disabled.
Chia-I Wu457d0a62014-08-18 13:02:26 +0800802 */
Chia-I Wuc45db532015-02-19 11:20:38 -0700803 if (intel_gpu_gen(params->gpu) == INTEL_GEN(6))
Chia-I Wuc94f3e52014-10-07 14:45:05 +0800804 return false;
Chia-I Wuc94f3e52014-10-07 14:45:05 +0800805
Chia-I Wu457d0a62014-08-18 13:02:26 +0800806 return true;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800807}
808
809static void
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800810layout_init_aux(struct intel_layout *layout,
811 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800812{
813 if (layout_want_hiz(layout, params))
Chia-I Wu457d0a62014-08-18 13:02:26 +0800814 layout->aux = INTEL_LAYOUT_AUX_HIZ;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800815 else if (layout_want_mcs(layout, params))
Chia-I Wu457d0a62014-08-18 13:02:26 +0800816 layout->aux = INTEL_LAYOUT_AUX_MCS;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800817}
818
819static void
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800820layout_align(struct intel_layout *layout, struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800821{
Courtney Goeltzenleuchter382489d2015-04-10 08:34:15 -0600822 const VkImageCreateInfo *info = params->info;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800823 int align_w = 1, align_h = 1, pad_h = 0;
824
825 /*
826 * From the Sandy Bridge PRM, volume 1 part 1, page 118:
827 *
828 * "To determine the necessary padding on the bottom and right side of
829 * the surface, refer to the table in Section 7.18.3.4 for the i and j
830 * parameters for the surface format in use. The surface must then be
831 * extended to the next multiple of the alignment unit size in each
832 * dimension, and all texels contained in this extended surface must
833 * have valid GTT entries."
834 *
835 * "For cube surfaces, an additional two rows of padding are required
836 * at the bottom of the surface. This must be ensured regardless of
837 * whether the surface is stored tiled or linear. This is due to the
838 * potential rotation of cache line orientation from memory to cache."
839 *
840 * "For compressed textures (BC* and FXT1 surface formats), padding at
841 * the bottom of the surface is to an even compressed row, which is
842 * equal to a multiple of 8 uncompressed texel rows. Thus, for padding
843 * purposes, these surfaces behave as if j = 8 only for surface
844 * padding purposes. The value of 4 for j still applies for mip level
845 * alignment and QPitch calculation."
846 */
Courtney Goeltzenleuchterad870812015-04-15 15:29:59 -0600847 if (info->usage & VK_IMAGE_USAGE_SAMPLED_BIT) {
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800848 if (align_w < layout->align_i)
849 align_w = layout->align_i;
850 if (align_h < layout->align_j)
851 align_h = layout->align_j;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800852
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800853 /* in case it is used as a cube */
Tony Barbour8205d902015-04-16 15:59:00 -0600854 if (info->imageType == VK_IMAGE_TYPE_2D)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800855 pad_h += 2;
856
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800857 if (params->compressed && align_h < layout->align_j * 2)
858 align_h = layout->align_j * 2;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800859 }
860
861 /*
862 * From the Sandy Bridge PRM, volume 1 part 1, page 118:
863 *
864 * "If the surface contains an odd number of rows of data, a final row
865 * below the surface must be allocated."
866 */
Courtney Goeltzenleuchter9cc421e2015-04-08 15:36:08 -0600867 if ((info->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) && align_h < 2)
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800868 align_h = 2;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800869
870 /*
871 * Depth Buffer Clear/Resolve works in 8x4 sample blocks. In
Chia-I Wu457d0a62014-08-18 13:02:26 +0800872 * intel_texture_can_enable_hiz(), we always return true for the first slice.
Chia-I Wu4bc47012014-08-14 13:03:25 +0800873 * To avoid out-of-bound access, we have to pad.
874 */
Chia-I Wu457d0a62014-08-18 13:02:26 +0800875 if (layout->aux == INTEL_LAYOUT_AUX_HIZ &&
876 info->mipLevels == 1 &&
Courtney Goeltzenleuchter2ebc2342015-10-21 17:57:31 -0600877 info->arrayLayers == 1 &&
Chia-I Wu457d0a62014-08-18 13:02:26 +0800878 info->extent.depth == 1) {
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800879 if (align_w < 8)
880 align_w = 8;
881 if (align_h < 4)
882 align_h = 4;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800883 }
884
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800885 params->max_x = u_align(params->max_x, align_w);
886 params->max_y = u_align(params->max_y + pad_h, align_h);
Chia-I Wu4bc47012014-08-14 13:03:25 +0800887}
888
889/* note that this may force the texture to be linear */
890static void
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800891layout_calculate_bo_size(struct intel_layout *layout,
892 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800893{
894 assert(params->max_x % layout->block_width == 0);
895 assert(params->max_y % layout->block_height == 0);
896 assert(layout->layer_height % layout->block_height == 0);
897
898 layout->bo_stride =
899 (params->max_x / layout->block_width) * layout->block_size;
900 layout->bo_height = params->max_y / layout->block_height;
901
902 while (true) {
903 unsigned w = layout->bo_stride, h = layout->bo_height;
904 unsigned align_w, align_h;
905
906 /*
907 * From the Haswell PRM, volume 5, page 163:
908 *
909 * "For linear surfaces, additional padding of 64 bytes is required
910 * at the bottom of the surface. This is in addition to the padding
911 * required above."
912 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800913 if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7.5) &&
Courtney Goeltzenleuchterad870812015-04-15 15:29:59 -0600914 (params->info->usage & VK_IMAGE_USAGE_SAMPLED_BIT) &&
Chia-I Wud1eb90c2015-03-07 06:01:45 +0800915 layout->tiling == GEN6_TILING_NONE)
916 h += (64 + layout->bo_stride - 1) / layout->bo_stride;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800917
918 /*
919 * From the Sandy Bridge PRM, volume 4 part 1, page 81:
920 *
921 * "- For linear render target surfaces, the pitch must be a
922 * multiple of the element size for non-YUV surface formats.
923 * Pitch must be a multiple of 2 * element size for YUV surface
924 * formats.
925 * - For other linear surfaces, the pitch can be any multiple of
926 * bytes.
927 * - For tiled surfaces, the pitch must be a multiple of the tile
928 * width."
929 *
930 * Different requirements may exist when the bo is used in different
931 * places, but our alignments here should be good enough that we do not
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800932 * need to check layout->info->usage.
Chia-I Wu4bc47012014-08-14 13:03:25 +0800933 */
934 switch (layout->tiling) {
Chia-I Wud1eb90c2015-03-07 06:01:45 +0800935 case GEN6_TILING_X:
Chia-I Wu4bc47012014-08-14 13:03:25 +0800936 align_w = 512;
937 align_h = 8;
938 break;
Chia-I Wud1eb90c2015-03-07 06:01:45 +0800939 case GEN6_TILING_Y:
Chia-I Wu4bc47012014-08-14 13:03:25 +0800940 align_w = 128;
941 align_h = 32;
942 break;
Chia-I Wud1eb90c2015-03-07 06:01:45 +0800943 case GEN8_TILING_W:
944 /*
945 * From the Sandy Bridge PRM, volume 1 part 2, page 22:
946 *
947 * "A 4KB tile is subdivided into 8-high by 8-wide array of
948 * Blocks for W-Major Tiles (W Tiles). Each Block is 8 rows by 8
949 * bytes."
950 */
951 align_w = 64;
952 align_h = 64;
953 break;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800954 default:
Chia-I Wud1eb90c2015-03-07 06:01:45 +0800955 assert(layout->tiling == GEN6_TILING_NONE);
956 /* some good enough values */
957 align_w = 64;
958 align_h = 2;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800959 break;
960 }
961
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800962 w = u_align(w, align_w);
963 h = u_align(h, align_h);
Chia-I Wu4bc47012014-08-14 13:03:25 +0800964
965 /* make sure the bo is mappable */
Chia-I Wud1eb90c2015-03-07 06:01:45 +0800966 if (layout->tiling != GEN6_TILING_NONE) {
Chia-I Wu4bc47012014-08-14 13:03:25 +0800967 /*
968 * Usually only the first 256MB of the GTT is mappable.
969 *
970 * See also how intel_context::max_gtt_map_object_size is calculated.
971 */
972 const size_t mappable_gtt_size = 256 * 1024 * 1024;
973
974 /*
975 * Be conservative. We may be able to switch from VALIGN_4 to
976 * VALIGN_2 if the layout was Y-tiled, but let's keep it simple.
977 */
978 if (mappable_gtt_size / w / 4 < h) {
979 if (layout->valid_tilings & LAYOUT_TILING_NONE) {
Chia-I Wud1eb90c2015-03-07 06:01:45 +0800980 layout->tiling = GEN6_TILING_NONE;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800981 /* MCS support for non-MSRTs is limited to tiled RTs */
Chia-I Wu457d0a62014-08-18 13:02:26 +0800982 if (layout->aux == INTEL_LAYOUT_AUX_MCS &&
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800983 params->info->samples <= 1)
Chia-I Wu457d0a62014-08-18 13:02:26 +0800984 layout->aux = INTEL_LAYOUT_AUX_NONE;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800985
986 continue;
987 } else {
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800988 /* mapping will fail */
Chia-I Wu4bc47012014-08-14 13:03:25 +0800989 }
990 }
991 }
992
993 layout->bo_stride = w;
994 layout->bo_height = h;
995 break;
996 }
997}
998
999static void
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001000layout_calculate_hiz_size(struct intel_layout *layout,
1001 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +08001002{
Courtney Goeltzenleuchter382489d2015-04-10 08:34:15 -06001003 const VkImageCreateInfo *info = params->info;
Chia-I Wu457d0a62014-08-18 13:02:26 +08001004 const unsigned hz_align_j = 8;
1005 enum intel_layout_walk_type hz_walk;
1006 unsigned hz_width, hz_height, lv;
1007 unsigned hz_clear_w, hz_clear_h;
Chia-I Wu4bc47012014-08-14 13:03:25 +08001008
Chia-I Wu457d0a62014-08-18 13:02:26 +08001009 assert(layout->aux == INTEL_LAYOUT_AUX_HIZ);
1010
1011 assert(layout->walk == INTEL_LAYOUT_WALK_LAYER ||
1012 layout->walk == INTEL_LAYOUT_WALK_3D);
1013
1014 /*
1015 * From the Sandy Bridge PRM, volume 2 part 1, page 312:
1016 *
1017 * "The hierarchical depth buffer does not support the LOD field, it is
1018 * assumed by hardware to be zero. A separate hierarachical depth
1019 * buffer is required for each LOD used, and the corresponding
1020 * buffer's state delivered to hardware each time a new depth buffer
1021 * state with modified LOD is delivered."
1022 *
1023 * We will put all LODs in a single bo with INTEL_LAYOUT_WALK_LOD.
1024 */
1025 if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7))
1026 hz_walk = layout->walk;
1027 else
1028 hz_walk = INTEL_LAYOUT_WALK_LOD;
Chia-I Wu4bc47012014-08-14 13:03:25 +08001029
1030 /*
1031 * See the Sandy Bridge PRM, volume 2 part 1, page 312, and the Ivy Bridge
1032 * PRM, volume 2 part 1, page 312-313.
1033 *
1034 * It seems HiZ buffer is aligned to 8x8, with every two rows packed into a
1035 * memory row.
1036 */
Chia-I Wu457d0a62014-08-18 13:02:26 +08001037 switch (hz_walk) {
1038 case INTEL_LAYOUT_WALK_LOD:
1039 {
1040 unsigned lod_tx[INTEL_LAYOUT_MAX_LEVELS];
1041 unsigned lod_ty[INTEL_LAYOUT_MAX_LEVELS];
1042 unsigned cur_tx, cur_ty;
Chia-I Wu4bc47012014-08-14 13:03:25 +08001043
Chia-I Wu457d0a62014-08-18 13:02:26 +08001044 /* figure out the tile offsets of LODs */
1045 hz_width = 0;
1046 hz_height = 0;
1047 cur_tx = 0;
1048 cur_ty = 0;
1049 for (lv = 0; lv < info->mipLevels; lv++) {
1050 unsigned tw, th;
Chia-I Wu4bc47012014-08-14 13:03:25 +08001051
Chia-I Wu457d0a62014-08-18 13:02:26 +08001052 lod_tx[lv] = cur_tx;
1053 lod_ty[lv] = cur_ty;
Chia-I Wu4bc47012014-08-14 13:03:25 +08001054
Chia-I Wu457d0a62014-08-18 13:02:26 +08001055 tw = u_align(layout->lods[lv].slice_width, 16);
1056 th = u_align(layout->lods[lv].slice_height, hz_align_j) *
Courtney Goeltzenleuchter2ebc2342015-10-21 17:57:31 -06001057 info->arrayLayers / 2;
Chia-I Wu457d0a62014-08-18 13:02:26 +08001058 /* convert to Y-tiles */
1059 tw = u_align(tw, 128) / 128;
1060 th = u_align(th, 32) / 32;
Chia-I Wu4bc47012014-08-14 13:03:25 +08001061
Chia-I Wu457d0a62014-08-18 13:02:26 +08001062 if (hz_width < cur_tx + tw)
1063 hz_width = cur_tx + tw;
1064 if (hz_height < cur_ty + th)
1065 hz_height = cur_ty + th;
1066
1067 if (lv == 1)
1068 cur_tx += tw;
1069 else
1070 cur_ty += th;
1071 }
1072
1073 /* convert tile offsets to memory offsets */
1074 for (lv = 0; lv < info->mipLevels; lv++) {
1075 layout->aux_offsets[lv] =
1076 (lod_ty[lv] * hz_width + lod_tx[lv]) * 4096;
1077 }
1078 hz_width *= 128;
1079 hz_height *= 32;
Chia-I Wu4bc47012014-08-14 13:03:25 +08001080 }
Chia-I Wu457d0a62014-08-18 13:02:26 +08001081 break;
1082 case INTEL_LAYOUT_WALK_LAYER:
1083 {
1084 const unsigned h0 = u_align(params->h0, hz_align_j);
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001085 const unsigned h1 = u_align(params->h1, hz_align_j);
Chia-I Wu4bc47012014-08-14 13:03:25 +08001086 const unsigned htail =
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001087 ((intel_gpu_gen(params->gpu) >= INTEL_GEN(7)) ? 12 : 11) * hz_align_j;
Chia-I Wu457d0a62014-08-18 13:02:26 +08001088 const unsigned hz_qpitch = h0 + h1 + htail;
Chia-I Wu4bc47012014-08-14 13:03:25 +08001089
Chia-I Wu457d0a62014-08-18 13:02:26 +08001090 hz_width = u_align(layout->lods[0].slice_width, 16);
1091
Courtney Goeltzenleuchter2ebc2342015-10-21 17:57:31 -06001092 hz_height = hz_qpitch * info->arrayLayers / 2;
Chia-I Wu457d0a62014-08-18 13:02:26 +08001093 if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7))
1094 hz_height = u_align(hz_height, 8);
Chia-I Wud1eb90c2015-03-07 06:01:45 +08001095
1096 layout->aux_layer_height = hz_qpitch;
Chia-I Wu4bc47012014-08-14 13:03:25 +08001097 }
Chia-I Wu457d0a62014-08-18 13:02:26 +08001098 break;
1099 case INTEL_LAYOUT_WALK_3D:
1100 hz_width = u_align(layout->lods[0].slice_width, 16);
Chia-I Wu4bc47012014-08-14 13:03:25 +08001101
Chia-I Wu457d0a62014-08-18 13:02:26 +08001102 hz_height = 0;
1103 for (lv = 0; lv < info->mipLevels; lv++) {
1104 const unsigned h = u_align(layout->lods[lv].slice_height, hz_align_j);
1105 /* according to the formula, slices are packed together vertically */
1106 hz_height += h * u_minify(info->extent.depth, lv);
1107 }
1108 hz_height /= 2;
1109 break;
Chia-I Wu9a056dd2015-02-11 13:19:39 -07001110 default:
1111 assert(!"unknown layout walk");
1112 hz_width = 0;
1113 hz_height = 0;
1114 break;
Chia-I Wu4bc47012014-08-14 13:03:25 +08001115 }
1116
Chia-I Wu457d0a62014-08-18 13:02:26 +08001117 /*
1118 * In hiz_align_fb(), we will align the LODs to 8x4 sample blocks.
1119 * Experiments on Haswell show that aligning the RECTLIST primitive and
1120 * 3DSTATE_DRAWING_RECTANGLE alone are not enough. The LOD sizes must be
1121 * aligned.
1122 */
1123 hz_clear_w = 8;
1124 hz_clear_h = 4;
1125 switch (info->samples) {
1126 case 0:
1127 case 1:
1128 default:
1129 break;
1130 case 2:
1131 hz_clear_w /= 2;
1132 break;
1133 case 4:
1134 hz_clear_w /= 2;
1135 hz_clear_h /= 2;
1136 break;
1137 case 8:
1138 hz_clear_w /= 4;
1139 hz_clear_h /= 2;
1140 break;
1141 case 16:
1142 hz_clear_w /= 4;
1143 hz_clear_h /= 4;
1144 break;
1145 }
1146
1147 for (lv = 0; lv < info->mipLevels; lv++) {
1148 if (u_minify(layout->width0, lv) % hz_clear_w ||
1149 u_minify(layout->height0, lv) % hz_clear_h)
1150 break;
1151 layout->aux_enables |= 1 << lv;
1152 }
1153
1154 /* we padded to allow this in layout_align() */
Courtney Goeltzenleuchter2ebc2342015-10-21 17:57:31 -06001155 if (info->mipLevels == 1 && info->arrayLayers == 1 && info->extent.depth == 1)
Chia-I Wu457d0a62014-08-18 13:02:26 +08001156 layout->aux_enables |= 0x1;
1157
Chia-I Wu4bc47012014-08-14 13:03:25 +08001158 /* align to Y-tile */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001159 layout->aux_stride = u_align(hz_width, 128);
1160 layout->aux_height = u_align(hz_height, 32);
Chia-I Wu4bc47012014-08-14 13:03:25 +08001161}
1162
1163static void
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001164layout_calculate_mcs_size(struct intel_layout *layout,
1165 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +08001166{
Courtney Goeltzenleuchter382489d2015-04-10 08:34:15 -06001167 const VkImageCreateInfo *info = params->info;
Chia-I Wu4bc47012014-08-14 13:03:25 +08001168 int mcs_width, mcs_height, mcs_cpp;
1169 int downscale_x, downscale_y;
1170
Chia-I Wu457d0a62014-08-18 13:02:26 +08001171 assert(layout->aux == INTEL_LAYOUT_AUX_MCS);
Chia-I Wu4bc47012014-08-14 13:03:25 +08001172
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001173 if (info->samples > 1) {
Chia-I Wu4bc47012014-08-14 13:03:25 +08001174 /*
1175 * From the Ivy Bridge PRM, volume 2 part 1, page 326, the clear
1176 * rectangle is scaled down by 8x2 for 4X MSAA and 2x2 for 8X MSAA. The
1177 * need of scale down could be that the clear rectangle is used to clear
1178 * the MCS instead of the RT.
1179 *
1180 * For 8X MSAA, we need 32 bits in MCS for every pixel in the RT. The
1181 * 2x2 factor could come from that the hardware writes 128 bits (an
1182 * OWord) at a time, and the OWord in MCS maps to a 2x2 pixel block in
1183 * the RT. For 4X MSAA, we need 8 bits in MCS for every pixel in the
1184 * RT. Similarly, we could reason that an OWord in 4X MCS maps to a 8x2
1185 * pixel block in the RT.
1186 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001187 switch (info->samples) {
Chia-I Wu4bc47012014-08-14 13:03:25 +08001188 case 2:
1189 case 4:
1190 downscale_x = 8;
1191 downscale_y = 2;
1192 mcs_cpp = 1;
1193 break;
1194 case 8:
1195 downscale_x = 2;
1196 downscale_y = 2;
1197 mcs_cpp = 4;
1198 break;
1199 case 16:
1200 downscale_x = 2;
1201 downscale_y = 1;
1202 mcs_cpp = 8;
1203 break;
1204 default:
1205 assert(!"unsupported sample count");
1206 return;
1207 break;
1208 }
1209
1210 /*
1211 * It also appears that the 2x2 subspans generated by the scaled-down
1212 * clear rectangle cannot be masked. The scale-down clear rectangle
1213 * thus must be aligned to 2x2, and we need to pad.
1214 */
Chia-I Wu457d0a62014-08-18 13:02:26 +08001215 mcs_width = u_align(layout->width0, downscale_x * 2);
1216 mcs_height = u_align(layout->height0, downscale_y * 2);
1217 } else {
Chia-I Wu4bc47012014-08-14 13:03:25 +08001218 /*
1219 * From the Ivy Bridge PRM, volume 2 part 1, page 327:
1220 *
1221 * " Pixels Lines
1222 * TiledY RT CL
1223 * bpp
1224 * 32 8 4
1225 * 64 4 4
1226 * 128 2 4
1227 *
1228 * TiledX RT CL
1229 * bpp
1230 * 32 16 2
1231 * 64 8 2
1232 * 128 4 2"
1233 *
1234 * This table and the two following tables define the RT alignments, the
1235 * clear rectangle alignments, and the clear rectangle scale factors.
1236 * Viewing the RT alignments as the sizes of 128-byte blocks, we can see
1237 * that the clear rectangle alignments are 16x32 blocks, and the clear
1238 * rectangle scale factors are 8x16 blocks.
1239 *
1240 * For non-MSAA RT, we need 1 bit in MCS for every 128-byte block in the
1241 * RT. Similar to the MSAA cases, we can argue that an OWord maps to
1242 * 8x16 blocks.
1243 *
1244 * One problem with this reasoning is that a Y-tile in MCS has 8x32
1245 * OWords and maps to 64x512 128-byte blocks. This differs from i965,
1246 * which says that a Y-tile maps to 128x256 blocks (\see
1247 * intel_get_non_msrt_mcs_alignment). It does not really change
1248 * anything except for the size of the allocated MCS. Let's see if we
1249 * hit out-of-bound access.
1250 */
1251 switch (layout->tiling) {
Chia-I Wud1eb90c2015-03-07 06:01:45 +08001252 case GEN6_TILING_X:
Chia-I Wu4bc47012014-08-14 13:03:25 +08001253 downscale_x = 64 / layout->block_size;
1254 downscale_y = 2;
1255 break;
Chia-I Wud1eb90c2015-03-07 06:01:45 +08001256 case GEN6_TILING_Y:
Chia-I Wu4bc47012014-08-14 13:03:25 +08001257 downscale_x = 32 / layout->block_size;
1258 downscale_y = 4;
1259 break;
1260 default:
1261 assert(!"unsupported tiling mode");
1262 return;
1263 break;
1264 }
1265
1266 downscale_x *= 8;
1267 downscale_y *= 16;
1268
1269 /*
1270 * From the Haswell PRM, volume 7, page 652:
1271 *
1272 * "Clear rectangle must be aligned to two times the number of
1273 * pixels in the table shown below due to 16X16 hashing across the
1274 * slice."
1275 *
1276 * The scaled-down clear rectangle must be aligned to 4x4 instead of
1277 * 2x2, and we need to pad.
1278 */
Chia-I Wu457d0a62014-08-18 13:02:26 +08001279 mcs_width = u_align(layout->width0, downscale_x * 4) / downscale_x;
1280 mcs_height = u_align(layout->height0, downscale_y * 4) / downscale_y;
Chia-I Wu4bc47012014-08-14 13:03:25 +08001281 mcs_cpp = 16; /* an OWord */
1282 }
1283
Chia-I Wu457d0a62014-08-18 13:02:26 +08001284 layout->aux_enables = (1 << info->mipLevels) - 1;
Chia-I Wu4bc47012014-08-14 13:03:25 +08001285 /* align to Y-tile */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001286 layout->aux_stride = u_align(mcs_width * mcs_cpp, 128);
1287 layout->aux_height = u_align(mcs_height, 32);
Chia-I Wu4bc47012014-08-14 13:03:25 +08001288}
1289
1290/**
1291 * Initialize the layout. Callers should zero-initialize \p layout first.
1292 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001293void intel_layout_init(struct intel_layout *layout,
Chia-I Wuc94f3e52014-10-07 14:45:05 +08001294 struct intel_dev *dev,
Courtney Goeltzenleuchter382489d2015-04-10 08:34:15 -06001295 const VkImageCreateInfo *info,
Chia-I Wu794d12a2014-09-15 14:55:25 +08001296 bool scanout)
Chia-I Wu4bc47012014-08-14 13:03:25 +08001297{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001298 struct intel_layout_params params;
Chia-I Wu4bc47012014-08-14 13:03:25 +08001299
1300 memset(&params, 0, sizeof(params));
Chia-I Wuc94f3e52014-10-07 14:45:05 +08001301 params.dev = dev;
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001302 params.gpu = dev->gpu;
1303 params.info = info;
Chia-I Wu794d12a2014-09-15 14:55:25 +08001304 params.scanout = scanout;
Chia-I Wu4bc47012014-08-14 13:03:25 +08001305
1306 /* note that there are dependencies between these functions */
1307 layout_init_aux(layout, &params);
Chia-I Wu457d0a62014-08-18 13:02:26 +08001308 layout_init_size_and_format(layout, &params);
1309 layout_init_walk(layout, &params);
Chia-I Wu4bc47012014-08-14 13:03:25 +08001310 layout_init_tiling(layout, &params);
1311 layout_init_alignments(layout, &params);
Chia-I Wu457d0a62014-08-18 13:02:26 +08001312 layout_init_lods(layout, &params);
Chia-I Wu4bc47012014-08-14 13:03:25 +08001313 layout_init_layer_height(layout, &params);
1314
1315 layout_align(layout, &params);
1316 layout_calculate_bo_size(layout, &params);
1317
Chia-I Wu457d0a62014-08-18 13:02:26 +08001318 switch (layout->aux) {
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001319 case INTEL_LAYOUT_AUX_HIZ:
Chia-I Wu4bc47012014-08-14 13:03:25 +08001320 layout_calculate_hiz_size(layout, &params);
1321 break;
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001322 case INTEL_LAYOUT_AUX_MCS:
Chia-I Wu4bc47012014-08-14 13:03:25 +08001323 layout_calculate_mcs_size(layout, &params);
1324 break;
1325 default:
1326 break;
1327 }
1328}
1329
1330/**
Chia-I Wu63a53972014-12-04 12:51:54 +08001331 * Return the offset (in bytes) to a slice within the bo.
1332 *
1333 * The returned offset is aligned to tile size. Since slices are not
1334 * guaranteed to start at tile boundaries, the X and Y offsets (in pixels)
1335 * from the tile origin to the slice are also returned. X offset is always a
1336 * multiple of 4 and Y offset is always a multiple of 2.
1337 */
1338unsigned
1339intel_layout_get_slice_tile_offset(const struct intel_layout *layout,
1340 unsigned level, unsigned slice,
1341 unsigned *x_offset, unsigned *y_offset)
1342{
1343 unsigned tile_w, tile_h, tile_size, row_size;
1344 unsigned tile_offset, x, y;
1345
1346 /* see the Sandy Bridge PRM, volume 1 part 2, page 24 */
1347
1348 switch (layout->tiling) {
Chia-I Wud1eb90c2015-03-07 06:01:45 +08001349 case GEN6_TILING_NONE:
1350 tile_w = 1;
1351 tile_h = 1;
Chia-I Wu63a53972014-12-04 12:51:54 +08001352 break;
Chia-I Wud1eb90c2015-03-07 06:01:45 +08001353 case GEN6_TILING_X:
Chia-I Wu63a53972014-12-04 12:51:54 +08001354 tile_w = 512;
1355 tile_h = 8;
1356 break;
Chia-I Wud1eb90c2015-03-07 06:01:45 +08001357 case GEN6_TILING_Y:
Chia-I Wu63a53972014-12-04 12:51:54 +08001358 tile_w = 128;
1359 tile_h = 32;
1360 break;
Chia-I Wud1eb90c2015-03-07 06:01:45 +08001361 case GEN8_TILING_W:
1362 tile_w = 64;
1363 tile_h = 64;
1364 break;
Chia-I Wu63a53972014-12-04 12:51:54 +08001365 default:
1366 assert(!"unknown tiling");
1367 tile_w = 1;
1368 tile_h = 1;
1369 break;
1370 }
1371
1372 tile_size = tile_w * tile_h;
1373 row_size = layout->bo_stride * tile_h;
1374
1375 intel_layout_get_slice_pos(layout, level, slice, &x, &y);
1376 /* in bytes */
1377 intel_layout_pos_to_mem(layout, x, y, &x, &y);
1378 tile_offset = row_size * (y / tile_h) + tile_size * (x / tile_w);
1379
1380 /*
1381 * Since tex->bo_stride is a multiple of tile_w, slice_offset should be
1382 * aligned at this point.
1383 */
1384 assert(tile_offset % tile_size == 0);
1385
1386 /*
1387 * because of the possible values of align_i and align_j in
1388 * tex_layout_init_alignments(), x_offset is guaranteed to be a multiple of
1389 * 4 and y_offset is guaranteed to be a multiple of 2.
1390 */
1391 if (x_offset) {
1392 /* in pixels */
1393 x = (x % tile_w) / layout->block_size * layout->block_width;
1394 assert(x % 4 == 0);
1395
1396 *x_offset = x;
1397 }
1398
1399 if (y_offset) {
1400 /* in pixels */
1401 y = (y % tile_h) * layout->block_height;
1402 assert(y % 2 == 0);
1403
1404 *y_offset = y;
1405 }
1406
1407 return tile_offset;
1408}