blob: 1a2ee84928ef987126a2c04fae097f1d6a7604ca [file] [log] [blame]
Chia-I Wu4bc47012014-08-14 13:03:25 +08001/*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 2014 LunarG, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Chia-I Wu <olv@lunarg.com>
26 */
27
Chia-I Wu8a8d8b62014-08-14 13:26:26 +080028#include "dev.h"
Chia-I Wu1bf06df2014-08-16 12:33:13 +080029#include "format.h"
Chia-I Wu8a8d8b62014-08-14 13:26:26 +080030#include "gpu.h"
31#include "layout.h"
Chia-I Wu4bc47012014-08-14 13:03:25 +080032
33enum {
34 LAYOUT_TILING_NONE = 1 << INTEL_TILING_NONE,
35 LAYOUT_TILING_X = 1 << INTEL_TILING_X,
36 LAYOUT_TILING_Y = 1 << INTEL_TILING_Y,
37 LAYOUT_TILING_W = 1 << (INTEL_TILING_Y + 1),
38
39 LAYOUT_TILING_ALL = (LAYOUT_TILING_NONE |
40 LAYOUT_TILING_X |
41 LAYOUT_TILING_Y |
42 LAYOUT_TILING_W)
43};
44
Chia-I Wu8a8d8b62014-08-14 13:26:26 +080045struct intel_layout_params {
46 const struct intel_gpu *gpu;
47 const XGL_IMAGE_CREATE_INFO *info;
Chia-I Wu4bc47012014-08-14 13:03:25 +080048
49 bool compressed;
50
51 unsigned h0, h1;
52 unsigned max_x, max_y;
53};
54
55static void
Chia-I Wu8a8d8b62014-08-14 13:26:26 +080056layout_get_slice_size(const struct intel_layout *layout,
57 const struct intel_layout_params *params,
Chia-I Wu4bc47012014-08-14 13:03:25 +080058 unsigned level, unsigned *width, unsigned *height)
59{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +080060 const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu4bc47012014-08-14 13:03:25 +080061 unsigned w, h;
62
Chia-I Wu8a8d8b62014-08-14 13:26:26 +080063 w = u_minify(info->extent.width, level);
64 h = u_minify(info->extent.height, level);
Chia-I Wu4bc47012014-08-14 13:03:25 +080065
66 /*
67 * From the Sandy Bridge PRM, volume 1 part 1, page 114:
68 *
69 * "The dimensions of the mip maps are first determined by applying the
70 * sizing algorithm presented in Non-Power-of-Two Mipmaps above. Then,
71 * if necessary, they are padded out to compression block boundaries."
72 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +080073 w = u_align(w, layout->block_width);
74 h = u_align(h, layout->block_height);
Chia-I Wu4bc47012014-08-14 13:03:25 +080075
76 /*
77 * From the Sandy Bridge PRM, volume 1 part 1, page 111:
78 *
79 * "If the surface is multisampled (4x), these values must be adjusted
80 * as follows before proceeding:
81 *
82 * W_L = ceiling(W_L / 2) * 4
83 * H_L = ceiling(H_L / 2) * 4"
84 *
85 * From the Ivy Bridge PRM, volume 1 part 1, page 108:
86 *
87 * "If the surface is multisampled and it is a depth or stencil surface
88 * or Multisampled Surface StorageFormat in SURFACE_STATE is
89 * MSFMT_DEPTH_STENCIL, W_L and H_L must be adjusted as follows before
90 * proceeding:
91 *
92 * #samples W_L = H_L =
93 * 2 ceiling(W_L / 2) * 4 HL [no adjustment]
94 * 4 ceiling(W_L / 2) * 4 ceiling(H_L / 2) * 4
95 * 8 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 4
96 * 16 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 8"
97 *
98 * For interleaved samples (4x), where pixels
99 *
100 * (x, y ) (x+1, y )
101 * (x, y+1) (x+1, y+1)
102 *
103 * would be is occupied by
104 *
105 * (x, y , si0) (x+1, y , si0) (x, y , si1) (x+1, y , si1)
106 * (x, y+1, si0) (x+1, y+1, si0) (x, y+1, si1) (x+1, y+1, si1)
107 * (x, y , si2) (x+1, y , si2) (x, y , si3) (x+1, y , si3)
108 * (x, y+1, si2) (x+1, y+1, si2) (x, y+1, si3) (x+1, y+1, si3)
109 *
110 * Thus the need to
111 *
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800112 * w = u_align(w, 2) * 2;
113 * y = u_align(y, 2) * 2;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800114 */
115 if (layout->interleaved_samples) {
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800116 switch (info->samples) {
Chia-I Wu4bc47012014-08-14 13:03:25 +0800117 case 0:
118 case 1:
119 break;
120 case 2:
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800121 w = u_align(w, 2) * 2;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800122 break;
123 case 4:
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800124 w = u_align(w, 2) * 2;
125 h = u_align(h, 2) * 2;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800126 break;
127 case 8:
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800128 w = u_align(w, 2) * 4;
129 h = u_align(h, 2) * 2;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800130 break;
131 case 16:
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800132 w = u_align(w, 2) * 4;
133 h = u_align(h, 2) * 4;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800134 break;
135 default:
136 assert(!"unsupported sample count");
137 break;
138 }
139 }
140
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800141 w = u_align(w, layout->align_i);
142 h = u_align(h, layout->align_j);
Chia-I Wu4bc47012014-08-14 13:03:25 +0800143
144 *width = w;
145 *height = h;
146}
147
148static unsigned
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800149layout_get_num_layers(const struct intel_layout *layout,
150 const struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800151{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800152 const XGL_IMAGE_CREATE_INFO *info = params->info;
153 unsigned num_layers = info->arraySize;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800154
155 /* samples of the same index are stored in a layer */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800156 if (info->samples > 1 && !layout->interleaved_samples)
157 num_layers *= info->samples;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800158
159 return num_layers;
160}
161
162static void
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800163layout_init_layer_height(struct intel_layout *layout,
164 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800165{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800166 const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800167 unsigned num_layers;
168
169 num_layers = layout_get_num_layers(layout, params);
170 if (num_layers <= 1)
171 return;
172
173 if (!layout->full_layers) {
174 layout->layer_height = params->h0;
175 params->max_y += params->h0 * (num_layers - 1);
176 return;
177 }
178
179 /*
180 * From the Sandy Bridge PRM, volume 1 part 1, page 115:
181 *
182 * "The following equation is used for surface formats other than
183 * compressed textures:
184 *
185 * QPitch = (h0 + h1 + 11j)"
186 *
187 * "The equation for compressed textures (BC* and FXT1 surface formats)
188 * follows:
189 *
190 * QPitch = (h0 + h1 + 11j) / 4"
191 *
192 * "[DevSNB] Errata: Sampler MSAA Qpitch will be 4 greater than the
193 * value calculated in the equation above, for every other odd Surface
194 * Height starting from 1 i.e. 1,5,9,13"
195 *
196 * From the Ivy Bridge PRM, volume 1 part 1, page 111-112:
197 *
198 * "If Surface Array Spacing is set to ARYSPC_FULL (note that the depth
199 * buffer and stencil buffer have an implied value of ARYSPC_FULL):
200 *
201 * QPitch = (h0 + h1 + 12j)
202 * QPitch = (h0 + h1 + 12j) / 4 (compressed)
203 *
204 * (There are many typos or missing words here...)"
205 *
206 * To access the N-th slice, an offset of (Stride * QPitch * N) is added to
207 * the base address. The PRM divides QPitch by 4 for compressed formats
208 * because the block height for those formats are 4, and it wants QPitch to
209 * mean the number of memory rows, as opposed to texel rows, between
210 * slices. Since we use texel rows everywhere, we do not need to divide
211 * QPitch by 4.
212 */
213 layout->layer_height = params->h0 + params->h1 +
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800214 ((intel_gpu_gen(params->gpu) >= INTEL_GEN(7)) ? 12 : 11) * layout->align_j;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800215
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800216 if (intel_gpu_gen(params->gpu) == INTEL_GEN(6) && info->samples > 1 &&
217 info->extent.height % 4 == 1)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800218 layout->layer_height += 4;
219
220 params->max_y += layout->layer_height * (num_layers - 1);
221}
222
223static void
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800224layout_init_levels(struct intel_layout *layout,
225 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800226{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800227 const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800228 unsigned cur_x, cur_y;
229 unsigned lv;
230
231 cur_x = 0;
232 cur_y = 0;
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800233 for (lv = 0; lv < info->mipLevels; lv++) {
Chia-I Wu4bc47012014-08-14 13:03:25 +0800234 unsigned level_w, level_h;
235
236 layout_get_slice_size(layout, params, lv, &level_w, &level_h);
237
238 layout->levels[lv].x = cur_x;
239 layout->levels[lv].y = cur_y;
240 layout->levels[lv].slice_width = level_w;
241 layout->levels[lv].slice_height = level_h;
242
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800243 if (info->imageType == XGL_IMAGE_3D) {
244 const unsigned num_slices = u_minify(info->extent.depth, lv);
Chia-I Wu4bc47012014-08-14 13:03:25 +0800245 const unsigned num_slices_per_row = 1 << lv;
246 const unsigned num_rows =
247 (num_slices + num_slices_per_row - 1) / num_slices_per_row;
248
249 level_w *= num_slices_per_row;
250 level_h *= num_rows;
251
252 cur_y += level_h;
253 } else {
254 /* MIPLAYOUT_BELOW */
255 if (lv == 1)
256 cur_x += level_w;
257 else
258 cur_y += level_h;
259 }
260
261 if (params->max_x < layout->levels[lv].x + level_w)
262 params->max_x = layout->levels[lv].x + level_w;
263 if (params->max_y < layout->levels[lv].y + level_h)
264 params->max_y = layout->levels[lv].y + level_h;
265 }
266
267 params->h0 = layout->levels[0].slice_height;
268 if (layout->full_layers) {
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800269 if (info->mipLevels > 1)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800270 params->h1 = layout->levels[1].slice_height;
271 else
272 layout_get_slice_size(layout, params, 1, &cur_x, &params->h1);
273 }
274}
275
276static void
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800277layout_init_alignments(struct intel_layout *layout,
278 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800279{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800280 const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800281
282 /*
283 * From the Sandy Bridge PRM, volume 1 part 1, page 113:
284 *
285 * "surface format align_i align_j
286 * YUV 4:2:2 formats 4 *see below
287 * BC1-5 4 4
288 * FXT1 8 4
289 * all other formats 4 *see below"
290 *
291 * "- align_j = 4 for any depth buffer
292 * - align_j = 2 for separate stencil buffer
293 * - align_j = 4 for any render target surface is multisampled (4x)
294 * - align_j = 4 for any render target surface with Surface Vertical
295 * Alignment = VALIGN_4
296 * - align_j = 2 for any render target surface with Surface Vertical
297 * Alignment = VALIGN_2
298 * - align_j = 2 for all other render target surface
299 * - align_j = 2 for any sampling engine surface with Surface Vertical
300 * Alignment = VALIGN_2
301 * - align_j = 4 for any sampling engine surface with Surface Vertical
302 * Alignment = VALIGN_4"
303 *
304 * From the Sandy Bridge PRM, volume 4 part 1, page 86:
305 *
306 * "This field (Surface Vertical Alignment) must be set to VALIGN_2 if
307 * the Surface Format is 96 bits per element (BPE)."
308 *
309 * They can be rephrased as
310 *
311 * align_i align_j
312 * compressed formats block width block height
313 * PIPE_FORMAT_S8_UINT 4 2
314 * other depth/stencil formats 4 4
315 * 4x multisampled 4 4
316 * bpp 96 4 2
317 * others 4 2 or 4
318 */
319
320 /*
321 * From the Ivy Bridge PRM, volume 1 part 1, page 110:
322 *
323 * "surface defined by surface format align_i align_j
324 * 3DSTATE_DEPTH_BUFFER D16_UNORM 8 4
325 * not D16_UNORM 4 4
326 * 3DSTATE_STENCIL_BUFFER N/A 8 8
327 * SURFACE_STATE BC*, ETC*, EAC* 4 4
328 * FXT1 8 4
329 * all others (set by SURFACE_STATE)"
330 *
331 * From the Ivy Bridge PRM, volume 4 part 1, page 63:
332 *
333 * "- This field (Surface Vertical Aligment) is intended to be set to
334 * VALIGN_4 if the surface was rendered as a depth buffer, for a
335 * multisampled (4x) render target, or for a multisampled (8x)
336 * render target, since these surfaces support only alignment of 4.
337 * - Use of VALIGN_4 for other surfaces is supported, but uses more
338 * memory.
339 * - This field must be set to VALIGN_4 for all tiled Y Render Target
340 * surfaces.
341 * - Value of 1 is not supported for format YCRCB_NORMAL (0x182),
342 * YCRCB_SWAPUVY (0x183), YCRCB_SWAPUV (0x18f), YCRCB_SWAPY (0x190)
343 * - If Number of Multisamples is not MULTISAMPLECOUNT_1, this field
344 * must be set to VALIGN_4."
345 * - VALIGN_4 is not supported for surface format R32G32B32_FLOAT."
346 *
347 * "- This field (Surface Horizontal Aligment) is intended to be set to
348 * HALIGN_8 only if the surface was rendered as a depth buffer with
349 * Z16 format or a stencil buffer, since these surfaces support only
350 * alignment of 8.
351 * - Use of HALIGN_8 for other surfaces is supported, but uses more
352 * memory.
353 * - This field must be set to HALIGN_4 if the Surface Format is BC*.
354 * - This field must be set to HALIGN_8 if the Surface Format is
355 * FXT1."
356 *
357 * They can be rephrased as
358 *
359 * align_i align_j
360 * compressed formats block width block height
361 * PIPE_FORMAT_Z16_UNORM 8 4
362 * PIPE_FORMAT_S8_UINT 8 8
363 * other depth/stencil formats 4 or 8 4
364 * 2x or 4x multisampled 4 or 8 4
365 * tiled Y 4 or 8 4 (if rt)
366 * PIPE_FORMAT_R32G32B32_FLOAT 4 or 8 2
367 * others 4 or 8 2 or 4
368 */
369
370 if (params->compressed) {
371 /* this happens to be the case */
372 layout->align_i = layout->block_width;
373 layout->align_j = layout->block_height;
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800374 } else if (info->usage & XGL_IMAGE_USAGE_DEPTH_STENCIL_BIT) {
375 if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7)) {
376 switch (layout->format.channelFormat) {
377 case XGL_CH_FMT_R16:
Chia-I Wu4bc47012014-08-14 13:03:25 +0800378 layout->align_i = 8;
379 layout->align_j = 4;
380 break;
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800381 case XGL_CH_FMT_R8:
Chia-I Wu4bc47012014-08-14 13:03:25 +0800382 layout->align_i = 8;
383 layout->align_j = 8;
384 break;
385 default:
386 layout->align_i = 4;
387 layout->align_j = 4;
388 break;
389 }
390 } else {
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800391 switch (layout->format.channelFormat) {
392 case XGL_CH_FMT_R8:
Chia-I Wu4bc47012014-08-14 13:03:25 +0800393 layout->align_i = 4;
394 layout->align_j = 2;
395 break;
396 default:
397 layout->align_i = 4;
398 layout->align_j = 4;
399 break;
400 }
401 }
402 } else {
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800403 const bool valign_4 = (info->samples > 1) ||
404 (intel_gpu_gen(params->gpu) >= INTEL_GEN(7) &&
Chia-I Wu4bc47012014-08-14 13:03:25 +0800405 layout->tiling == INTEL_TILING_Y &&
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800406 (info->usage & XGL_IMAGE_USAGE_COLOR_ATTACHMENT_BIT));
Chia-I Wu4bc47012014-08-14 13:03:25 +0800407
408 if (valign_4)
409 assert(layout->block_size != 12);
410
411 layout->align_i = 4;
412 layout->align_j = (valign_4) ? 4 : 2;
413 }
414
415 /*
416 * the fact that align i and j are multiples of block width and height
417 * respectively is what makes the size of the bo a multiple of the block
418 * size, slices start at block boundaries, and many of the computations
419 * work.
420 */
421 assert(layout->align_i % layout->block_width == 0);
422 assert(layout->align_j % layout->block_height == 0);
423
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800424 /* make sure u_align() works */
425 assert(u_is_pow2(layout->align_i) &&
426 u_is_pow2(layout->align_j));
427 assert(u_is_pow2(layout->block_width) &&
428 u_is_pow2(layout->block_height));
Chia-I Wu4bc47012014-08-14 13:03:25 +0800429}
430
431static unsigned
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800432layout_get_valid_tilings(const struct intel_layout *layout,
433 const struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800434{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800435 const XGL_IMAGE_CREATE_INFO *info = params->info;
436 const XGL_FORMAT format = layout->format;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800437 unsigned valid_tilings = LAYOUT_TILING_ALL;
438
439 /*
Chia-I Wu4bc47012014-08-14 13:03:25 +0800440 * From the Sandy Bridge PRM, volume 2 part 1, page 318:
441 *
442 * "[DevSNB+]: This field (Tiled Surface) must be set to TRUE. Linear
443 * Depth Buffer is not supported."
444 *
445 * "The Depth Buffer, if tiled, must use Y-Major tiling."
446 *
447 * From the Sandy Bridge PRM, volume 1 part 2, page 22:
448 *
449 * "W-Major Tile Format is used for separate stencil."
450 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800451 if (info->usage & XGL_IMAGE_USAGE_DEPTH_STENCIL_BIT) {
452 switch (format.channelFormat) {
453 case XGL_CH_FMT_R8:
Chia-I Wu4bc47012014-08-14 13:03:25 +0800454 valid_tilings &= LAYOUT_TILING_W;
455 break;
456 default:
457 valid_tilings &= LAYOUT_TILING_Y;
458 break;
459 }
460 }
461
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800462 if (info->usage & XGL_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) {
Chia-I Wu4bc47012014-08-14 13:03:25 +0800463 /*
464 * From the Sandy Bridge PRM, volume 1 part 2, page 32:
465 *
466 * "NOTE: 128BPE Format Color buffer ( render target ) MUST be
467 * either TileX or Linear."
468 */
469 if (layout->block_size == 16)
470 valid_tilings &= ~LAYOUT_TILING_Y;
471
472 /*
473 * From the Ivy Bridge PRM, volume 4 part 1, page 63:
474 *
475 * "This field (Surface Vertical Aligment) must be set to VALIGN_4
476 * for all tiled Y Render Target surfaces."
477 *
478 * "VALIGN_4 is not supported for surface format R32G32B32_FLOAT."
479 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800480 if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7) && layout->block_size == 12)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800481 valid_tilings &= ~LAYOUT_TILING_Y;
482 }
483
484 /* no conflicting binding flags */
485 assert(valid_tilings);
486
487 return valid_tilings;
488}
489
490static void
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800491layout_init_tiling(struct intel_layout *layout,
492 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800493{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800494 const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800495 unsigned valid_tilings = layout_get_valid_tilings(layout, params);
496
497 layout->valid_tilings = valid_tilings;
498
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800499 if (info->usage & (XGL_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | XGL_IMAGE_USAGE_SHADER_ACCESS_READ_BIT)) {
Chia-I Wu4bc47012014-08-14 13:03:25 +0800500 /*
501 * heuristically set a minimum width/height for enabling tiling
502 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800503 if (info->extent.width < 64 && (valid_tilings & ~LAYOUT_TILING_X))
Chia-I Wu4bc47012014-08-14 13:03:25 +0800504 valid_tilings &= ~LAYOUT_TILING_X;
505
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800506 if ((info->extent.width < 32 || info->extent.height < 16) &&
507 (info->extent.width < 16 || info->extent.height < 32) &&
Chia-I Wu4bc47012014-08-14 13:03:25 +0800508 (valid_tilings & ~LAYOUT_TILING_Y))
509 valid_tilings &= ~LAYOUT_TILING_Y;
510 } else {
511 /* force linear if we are not sure where the texture is bound to */
512 if (valid_tilings & LAYOUT_TILING_NONE)
513 valid_tilings &= LAYOUT_TILING_NONE;
514 }
515
516 /* prefer tiled over linear */
517 if (valid_tilings & LAYOUT_TILING_Y)
518 layout->tiling = INTEL_TILING_Y;
519 else if (valid_tilings & LAYOUT_TILING_X)
520 layout->tiling = INTEL_TILING_X;
521 else /* linear or W-tiled, which has no hardware support */
522 layout->tiling = INTEL_TILING_NONE;
523}
524
525static void
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800526layout_init_arrangements_gen7(struct intel_layout *layout,
527 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800528{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800529 const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800530
531 /*
532 * It is not explicitly states, but render targets are expected to be
533 * UMS/CMS (samples non-interleaved) and depth/stencil buffers are expected
534 * to be IMS (samples interleaved).
535 *
536 * See "Multisampled Surface Storage Format" field of SURFACE_STATE.
537 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800538 if (info->usage & XGL_IMAGE_USAGE_DEPTH_STENCIL_BIT) {
Chia-I Wu4bc47012014-08-14 13:03:25 +0800539 layout->interleaved_samples = true;
540
541 /*
542 * From the Ivy Bridge PRM, volume 1 part 1, page 111:
543 *
544 * "note that the depth buffer and stencil buffer have an implied
545 * value of ARYSPC_FULL"
546 */
547 layout->full_layers = true;
548 } else {
549 layout->interleaved_samples = false;
550
551 /*
552 * From the Ivy Bridge PRM, volume 4 part 1, page 66:
553 *
554 * "If Multisampled Surface Storage Format is MSFMT_MSS and Number
555 * of Multisamples is not MULTISAMPLECOUNT_1, this field (Surface
556 * Array Spacing) must be set to ARYSPC_LOD0."
557 *
558 * As multisampled resources are not mipmapped, we never use
559 * ARYSPC_FULL for them.
560 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800561 if (info->samples > 1)
562 assert(info->mipLevels == 1);
563 layout->full_layers = (info->mipLevels > 1);
Chia-I Wu4bc47012014-08-14 13:03:25 +0800564 }
565}
566
567static void
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800568layout_init_arrangements_gen6(struct intel_layout *layout,
569 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800570{
571 /* GEN6 supports only interleaved samples */
572 layout->interleaved_samples = true;
573
574 /*
575 * From the Sandy Bridge PRM, volume 1 part 1, page 115:
576 *
577 * "The separate stencil buffer does not support mip mapping, thus the
578 * storage for LODs other than LOD 0 is not needed. The following
579 * QPitch equation applies only to the separate stencil buffer:
580 *
581 * QPitch = h_0"
582 *
583 * GEN6 does not support compact spacing otherwise.
584 */
Chia-I Wufb240262014-08-16 13:26:06 +0800585 layout->full_layers =
586 !intel_format_is_stencil(params->gpu, layout->format);
Chia-I Wu4bc47012014-08-14 13:03:25 +0800587}
588
589static void
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800590layout_init_arrangements(struct intel_layout *layout,
591 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800592{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800593 if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7))
Chia-I Wu4bc47012014-08-14 13:03:25 +0800594 layout_init_arrangements_gen7(layout, params);
595 else
596 layout_init_arrangements_gen6(layout, params);
597
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800598 layout->is_2d = (params->info->imageType != XGL_IMAGE_3D);
Chia-I Wu4bc47012014-08-14 13:03:25 +0800599}
600
601static void
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800602layout_init_format(struct intel_layout *layout,
603 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800604{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800605 const XGL_IMAGE_CREATE_INFO *info = params->info;
606 XGL_FORMAT format = params->info->format;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800607 bool require_separate_stencil;
608
609 /*
610 * From the Sandy Bridge PRM, volume 2 part 1, page 317:
611 *
612 * "This field (Separate Stencil Buffer Enable) must be set to the same
613 * value (enabled or disabled) as Hierarchical Depth Buffer Enable."
614 *
615 * GEN7+ requires separate stencil buffers.
616 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800617 if (info->usage & XGL_IMAGE_USAGE_DEPTH_STENCIL_BIT) {
618 if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7))
Chia-I Wu4bc47012014-08-14 13:03:25 +0800619 require_separate_stencil = true;
620 else
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800621 require_separate_stencil =(layout->aux_type == INTEL_LAYOUT_AUX_HIZ);
Chia-I Wu4bc47012014-08-14 13:03:25 +0800622 }
623
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800624 if (format.numericFormat == XGL_NUM_FMT_DS) {
625 switch (format.channelFormat) {
626 case XGL_CH_FMT_R32G8:
627 if (require_separate_stencil) {
628 format.channelFormat = XGL_CH_FMT_R32;
629 layout->separate_stencil = true;
630 }
631 break;
632 default:
633 break;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800634 }
Chia-I Wu4bc47012014-08-14 13:03:25 +0800635 }
636
Chia-I Wu4bc47012014-08-14 13:03:25 +0800637 layout->format = format;
Chia-I Wu1bf06df2014-08-16 12:33:13 +0800638 layout->block_width = icd_format_get_block_width(format);
639 layout->block_height = layout->block_width;
640 layout->block_size = icd_format_get_size(format);
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800641
Chia-I Wu1bf06df2014-08-16 12:33:13 +0800642 params->compressed = icd_format_is_compressed(format);
Chia-I Wu4bc47012014-08-14 13:03:25 +0800643}
644
645static bool
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800646layout_want_mcs(struct intel_layout *layout,
647 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800648{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800649 const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800650 bool want_mcs = false;
651
652 /* MCS is for RT on GEN7+ */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800653 if (intel_gpu_gen(params->gpu) < INTEL_GEN(7))
Chia-I Wu4bc47012014-08-14 13:03:25 +0800654 return false;
655
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800656 if (info->imageType != XGL_IMAGE_2D ||
657 !(info->usage & XGL_IMAGE_USAGE_COLOR_ATTACHMENT_BIT))
Chia-I Wu4bc47012014-08-14 13:03:25 +0800658 return false;
659
660 /*
661 * From the Ivy Bridge PRM, volume 4 part 1, page 77:
662 *
663 * "For Render Target and Sampling Engine Surfaces:If the surface is
664 * multisampled (Number of Multisamples any value other than
665 * MULTISAMPLECOUNT_1), this field (MCS Enable) must be enabled."
666 *
667 * "This field must be set to 0 for all SINT MSRTs when all RT channels
668 * are not written"
669 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800670 if (info->samples > 1 && !layout->interleaved_samples &&
671 !(info->format.numericFormat == XGL_NUM_FMT_UINT ||
672 info->format.numericFormat == XGL_NUM_FMT_SINT)) {
Chia-I Wu4bc47012014-08-14 13:03:25 +0800673 want_mcs = true;
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800674 } else if (info->samples <= 1) {
Chia-I Wu4bc47012014-08-14 13:03:25 +0800675 /*
676 * From the Ivy Bridge PRM, volume 2 part 1, page 326:
677 *
678 * "When MCS is buffer is used for color clear of non-multisampler
679 * render target, the following restrictions apply.
680 * - Support is limited to tiled render targets.
681 * - Support is for non-mip-mapped and non-array surface types
682 * only.
683 * - Clear is supported only on the full RT; i.e., no partial clear
684 * or overlapping clears.
685 * - MCS buffer for non-MSRT is supported only for RT formats
686 * 32bpp, 64bpp and 128bpp.
687 * ..."
688 */
689 if (layout->tiling != INTEL_TILING_NONE &&
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800690 info->mipLevels == 1 && info->arraySize == 1) {
Chia-I Wu4bc47012014-08-14 13:03:25 +0800691 switch (layout->block_size) {
692 case 4:
693 case 8:
694 case 16:
695 want_mcs = true;
696 break;
697 default:
698 break;
699 }
700 }
701 }
702
703 return want_mcs;
704}
705
706static bool
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800707layout_want_hiz(const struct intel_layout *layout,
708 const struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800709{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800710 const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800711 bool want_hiz = false;
712
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800713 if (!(info->usage & XGL_IMAGE_USAGE_DEPTH_STENCIL_BIT))
Chia-I Wu4bc47012014-08-14 13:03:25 +0800714 return false;
715
Chia-I Wufb240262014-08-16 13:26:06 +0800716 if (!intel_format_is_depth(params->gpu, info->format))
Chia-I Wu4bc47012014-08-14 13:03:25 +0800717 return false;
718
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800719 if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7)) {
Chia-I Wu4bc47012014-08-14 13:03:25 +0800720 want_hiz = true;
721 } else {
722 /*
723 * From the Sandy Bridge PRM, volume 2 part 1, page 312:
724 *
725 * "The hierarchical depth buffer does not support the LOD field, it
726 * is assumed by hardware to be zero. A separate hierarachical
727 * depth buffer is required for each LOD used, and the
728 * corresponding buffer's state delivered to hardware each time a
729 * new depth buffer state with modified LOD is delivered."
730 *
731 * But we have a stronger requirement. Because of layer offsetting
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800732 * (check out the callers of intel_layout_get_slice_tile_offset()), we
Chia-I Wu4bc47012014-08-14 13:03:25 +0800733 * already have to require the texture to be non-mipmapped and
734 * non-array.
735 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800736 if (info->mipLevels == 1 && info->arraySize == 1 &&
737 info->extent.depth == 1)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800738 want_hiz = true;
739 }
740
741 return want_hiz;
742}
743
744static void
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800745layout_init_aux(struct intel_layout *layout,
746 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800747{
748 if (layout_want_hiz(layout, params))
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800749 layout->aux_type = INTEL_LAYOUT_AUX_HIZ;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800750 else if (layout_want_mcs(layout, params))
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800751 layout->aux_type = INTEL_LAYOUT_AUX_MCS;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800752}
753
754static void
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800755layout_align(struct intel_layout *layout, struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800756{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800757 const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800758 int align_w = 1, align_h = 1, pad_h = 0;
759
760 /*
761 * From the Sandy Bridge PRM, volume 1 part 1, page 118:
762 *
763 * "To determine the necessary padding on the bottom and right side of
764 * the surface, refer to the table in Section 7.18.3.4 for the i and j
765 * parameters for the surface format in use. The surface must then be
766 * extended to the next multiple of the alignment unit size in each
767 * dimension, and all texels contained in this extended surface must
768 * have valid GTT entries."
769 *
770 * "For cube surfaces, an additional two rows of padding are required
771 * at the bottom of the surface. This must be ensured regardless of
772 * whether the surface is stored tiled or linear. This is due to the
773 * potential rotation of cache line orientation from memory to cache."
774 *
775 * "For compressed textures (BC* and FXT1 surface formats), padding at
776 * the bottom of the surface is to an even compressed row, which is
777 * equal to a multiple of 8 uncompressed texel rows. Thus, for padding
778 * purposes, these surfaces behave as if j = 8 only for surface
779 * padding purposes. The value of 4 for j still applies for mip level
780 * alignment and QPitch calculation."
781 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800782 if (info->usage & XGL_IMAGE_USAGE_SHADER_ACCESS_READ_BIT) {
783 if (align_w < layout->align_i)
784 align_w = layout->align_i;
785 if (align_h < layout->align_j)
786 align_h = layout->align_j;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800787
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800788 /* in case it is used as a cube */
789 if (info->imageType == XGL_IMAGE_2D)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800790 pad_h += 2;
791
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800792 if (params->compressed && align_h < layout->align_j * 2)
793 align_h = layout->align_j * 2;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800794 }
795
796 /*
797 * From the Sandy Bridge PRM, volume 1 part 1, page 118:
798 *
799 * "If the surface contains an odd number of rows of data, a final row
800 * below the surface must be allocated."
801 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800802 if ((info->usage & XGL_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) && align_h < 2)
803 align_h = 2;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800804
805 /*
806 * Depth Buffer Clear/Resolve works in 8x4 sample blocks. In
807 * ilo_texture_can_enable_hiz(), we always return true for the first slice.
808 * To avoid out-of-bound access, we have to pad.
809 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800810 if (layout->aux_type == INTEL_LAYOUT_AUX_HIZ) {
811 if (align_w < 8)
812 align_w = 8;
813 if (align_h < 4)
814 align_h = 4;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800815 }
816
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800817 params->max_x = u_align(params->max_x, align_w);
818 params->max_y = u_align(params->max_y + pad_h, align_h);
Chia-I Wu4bc47012014-08-14 13:03:25 +0800819}
820
821/* note that this may force the texture to be linear */
822static void
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800823layout_calculate_bo_size(struct intel_layout *layout,
824 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800825{
826 assert(params->max_x % layout->block_width == 0);
827 assert(params->max_y % layout->block_height == 0);
828 assert(layout->layer_height % layout->block_height == 0);
829
830 layout->bo_stride =
831 (params->max_x / layout->block_width) * layout->block_size;
832 layout->bo_height = params->max_y / layout->block_height;
833
834 while (true) {
835 unsigned w = layout->bo_stride, h = layout->bo_height;
836 unsigned align_w, align_h;
837
838 /*
839 * From the Haswell PRM, volume 5, page 163:
840 *
841 * "For linear surfaces, additional padding of 64 bytes is required
842 * at the bottom of the surface. This is in addition to the padding
843 * required above."
844 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800845 if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7.5) &&
846 (params->info->usage & XGL_IMAGE_USAGE_SHADER_ACCESS_READ_BIT) &&
Chia-I Wu4bc47012014-08-14 13:03:25 +0800847 layout->tiling == INTEL_TILING_NONE) {
848 layout->bo_height +=
849 (64 + layout->bo_stride - 1) / layout->bo_stride;
850 }
851
852 /*
853 * From the Sandy Bridge PRM, volume 4 part 1, page 81:
854 *
855 * "- For linear render target surfaces, the pitch must be a
856 * multiple of the element size for non-YUV surface formats.
857 * Pitch must be a multiple of 2 * element size for YUV surface
858 * formats.
859 * - For other linear surfaces, the pitch can be any multiple of
860 * bytes.
861 * - For tiled surfaces, the pitch must be a multiple of the tile
862 * width."
863 *
864 * Different requirements may exist when the bo is used in different
865 * places, but our alignments here should be good enough that we do not
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800866 * need to check layout->info->usage.
Chia-I Wu4bc47012014-08-14 13:03:25 +0800867 */
868 switch (layout->tiling) {
869 case INTEL_TILING_X:
870 align_w = 512;
871 align_h = 8;
872 break;
873 case INTEL_TILING_Y:
874 align_w = 128;
875 align_h = 32;
876 break;
877 default:
Chia-I Wufb240262014-08-16 13:26:06 +0800878 if (intel_format_is_stencil(params->gpu, layout->format)) {
Chia-I Wu4bc47012014-08-14 13:03:25 +0800879 /*
880 * From the Sandy Bridge PRM, volume 1 part 2, page 22:
881 *
882 * "A 4KB tile is subdivided into 8-high by 8-wide array of
883 * Blocks for W-Major Tiles (W Tiles). Each Block is 8 rows by 8
884 * bytes."
885 *
886 * Since we asked for INTEL_TILING_NONE instead of the non-existent
887 * INTEL_TILING_W, we want to align to W tiles here.
888 */
889 align_w = 64;
890 align_h = 64;
891 } else {
892 /* some good enough values */
893 align_w = 64;
894 align_h = 2;
895 }
896 break;
897 }
898
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800899 w = u_align(w, align_w);
900 h = u_align(h, align_h);
Chia-I Wu4bc47012014-08-14 13:03:25 +0800901
902 /* make sure the bo is mappable */
903 if (layout->tiling != INTEL_TILING_NONE) {
904 /*
905 * Usually only the first 256MB of the GTT is mappable.
906 *
907 * See also how intel_context::max_gtt_map_object_size is calculated.
908 */
909 const size_t mappable_gtt_size = 256 * 1024 * 1024;
910
911 /*
912 * Be conservative. We may be able to switch from VALIGN_4 to
913 * VALIGN_2 if the layout was Y-tiled, but let's keep it simple.
914 */
915 if (mappable_gtt_size / w / 4 < h) {
916 if (layout->valid_tilings & LAYOUT_TILING_NONE) {
917 layout->tiling = INTEL_TILING_NONE;
918 /* MCS support for non-MSRTs is limited to tiled RTs */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800919 if (layout->aux_type == INTEL_LAYOUT_AUX_MCS &&
920 params->info->samples <= 1)
921 layout->aux_type = INTEL_LAYOUT_AUX_NONE;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800922
923 continue;
924 } else {
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800925 /* mapping will fail */
Chia-I Wu4bc47012014-08-14 13:03:25 +0800926 }
927 }
928 }
929
930 layout->bo_stride = w;
931 layout->bo_height = h;
932 break;
933 }
934}
935
936static void
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800937layout_calculate_hiz_size(struct intel_layout *layout,
938 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800939{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800940 const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800941 const int hz_align_j = 8;
942 int hz_width, hz_height;
943
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800944 assert(layout->aux_type == INTEL_LAYOUT_AUX_HIZ);
Chia-I Wu4bc47012014-08-14 13:03:25 +0800945
946 /*
947 * See the Sandy Bridge PRM, volume 2 part 1, page 312, and the Ivy Bridge
948 * PRM, volume 2 part 1, page 312-313.
949 *
950 * It seems HiZ buffer is aligned to 8x8, with every two rows packed into a
951 * memory row.
952 */
953
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800954 hz_width = u_align(layout->levels[0].slice_width, 16);
Chia-I Wu4bc47012014-08-14 13:03:25 +0800955
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800956 if (info->imageType == XGL_IMAGE_3D) {
Chia-I Wu4bc47012014-08-14 13:03:25 +0800957 unsigned lv;
958
959 hz_height = 0;
960
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800961 for (lv = 0; lv < info->mipLevels; lv++) {
Chia-I Wu4bc47012014-08-14 13:03:25 +0800962 const unsigned h =
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800963 u_align(layout->levels[lv].slice_height, hz_align_j);
964 hz_height += h * u_minify(info->extent.depth, lv);
Chia-I Wu4bc47012014-08-14 13:03:25 +0800965 }
966
967 hz_height /= 2;
968 } else {
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800969 const unsigned h0 = u_align(params->h0, hz_align_j);
Chia-I Wu4bc47012014-08-14 13:03:25 +0800970 unsigned hz_qpitch = h0;
971
972 if (layout->full_layers) {
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800973 const unsigned h1 = u_align(params->h1, hz_align_j);
Chia-I Wu4bc47012014-08-14 13:03:25 +0800974 const unsigned htail =
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800975 ((intel_gpu_gen(params->gpu) >= INTEL_GEN(7)) ? 12 : 11) * hz_align_j;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800976
977 hz_qpitch += h1 + htail;
978 }
979
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800980 hz_height = hz_qpitch * info->arraySize / 2;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800981
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800982 if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7))
983 hz_height = u_align(hz_height, 8);
Chia-I Wu4bc47012014-08-14 13:03:25 +0800984 }
985
986 /* align to Y-tile */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800987 layout->aux_stride = u_align(hz_width, 128);
988 layout->aux_height = u_align(hz_height, 32);
Chia-I Wu4bc47012014-08-14 13:03:25 +0800989}
990
991static void
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800992layout_calculate_mcs_size(struct intel_layout *layout,
993 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800994{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800995 const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800996 int mcs_width, mcs_height, mcs_cpp;
997 int downscale_x, downscale_y;
998
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800999 assert(layout->aux_type == INTEL_LAYOUT_AUX_MCS);
Chia-I Wu4bc47012014-08-14 13:03:25 +08001000
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001001 if (info->samples > 1) {
Chia-I Wu4bc47012014-08-14 13:03:25 +08001002 /*
1003 * From the Ivy Bridge PRM, volume 2 part 1, page 326, the clear
1004 * rectangle is scaled down by 8x2 for 4X MSAA and 2x2 for 8X MSAA. The
1005 * need of scale down could be that the clear rectangle is used to clear
1006 * the MCS instead of the RT.
1007 *
1008 * For 8X MSAA, we need 32 bits in MCS for every pixel in the RT. The
1009 * 2x2 factor could come from that the hardware writes 128 bits (an
1010 * OWord) at a time, and the OWord in MCS maps to a 2x2 pixel block in
1011 * the RT. For 4X MSAA, we need 8 bits in MCS for every pixel in the
1012 * RT. Similarly, we could reason that an OWord in 4X MCS maps to a 8x2
1013 * pixel block in the RT.
1014 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001015 switch (info->samples) {
Chia-I Wu4bc47012014-08-14 13:03:25 +08001016 case 2:
1017 case 4:
1018 downscale_x = 8;
1019 downscale_y = 2;
1020 mcs_cpp = 1;
1021 break;
1022 case 8:
1023 downscale_x = 2;
1024 downscale_y = 2;
1025 mcs_cpp = 4;
1026 break;
1027 case 16:
1028 downscale_x = 2;
1029 downscale_y = 1;
1030 mcs_cpp = 8;
1031 break;
1032 default:
1033 assert(!"unsupported sample count");
1034 return;
1035 break;
1036 }
1037
1038 /*
1039 * It also appears that the 2x2 subspans generated by the scaled-down
1040 * clear rectangle cannot be masked. The scale-down clear rectangle
1041 * thus must be aligned to 2x2, and we need to pad.
1042 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001043 mcs_width = u_align(info->extent.width, downscale_x * 2);
1044 mcs_height = u_align(info->extent.height, downscale_y * 2);
Chia-I Wu4bc47012014-08-14 13:03:25 +08001045 }
1046 else {
1047 /*
1048 * From the Ivy Bridge PRM, volume 2 part 1, page 327:
1049 *
1050 * " Pixels Lines
1051 * TiledY RT CL
1052 * bpp
1053 * 32 8 4
1054 * 64 4 4
1055 * 128 2 4
1056 *
1057 * TiledX RT CL
1058 * bpp
1059 * 32 16 2
1060 * 64 8 2
1061 * 128 4 2"
1062 *
1063 * This table and the two following tables define the RT alignments, the
1064 * clear rectangle alignments, and the clear rectangle scale factors.
1065 * Viewing the RT alignments as the sizes of 128-byte blocks, we can see
1066 * that the clear rectangle alignments are 16x32 blocks, and the clear
1067 * rectangle scale factors are 8x16 blocks.
1068 *
1069 * For non-MSAA RT, we need 1 bit in MCS for every 128-byte block in the
1070 * RT. Similar to the MSAA cases, we can argue that an OWord maps to
1071 * 8x16 blocks.
1072 *
1073 * One problem with this reasoning is that a Y-tile in MCS has 8x32
1074 * OWords and maps to 64x512 128-byte blocks. This differs from i965,
1075 * which says that a Y-tile maps to 128x256 blocks (\see
1076 * intel_get_non_msrt_mcs_alignment). It does not really change
1077 * anything except for the size of the allocated MCS. Let's see if we
1078 * hit out-of-bound access.
1079 */
1080 switch (layout->tiling) {
1081 case INTEL_TILING_X:
1082 downscale_x = 64 / layout->block_size;
1083 downscale_y = 2;
1084 break;
1085 case INTEL_TILING_Y:
1086 downscale_x = 32 / layout->block_size;
1087 downscale_y = 4;
1088 break;
1089 default:
1090 assert(!"unsupported tiling mode");
1091 return;
1092 break;
1093 }
1094
1095 downscale_x *= 8;
1096 downscale_y *= 16;
1097
1098 /*
1099 * From the Haswell PRM, volume 7, page 652:
1100 *
1101 * "Clear rectangle must be aligned to two times the number of
1102 * pixels in the table shown below due to 16X16 hashing across the
1103 * slice."
1104 *
1105 * The scaled-down clear rectangle must be aligned to 4x4 instead of
1106 * 2x2, and we need to pad.
1107 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001108 mcs_width = u_align(info->extent.width, downscale_x * 4) / downscale_x;
1109 mcs_height = u_align(info->extent.height, downscale_y * 4) / downscale_y;
Chia-I Wu4bc47012014-08-14 13:03:25 +08001110 mcs_cpp = 16; /* an OWord */
1111 }
1112
1113 /* align to Y-tile */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001114 layout->aux_stride = u_align(mcs_width * mcs_cpp, 128);
1115 layout->aux_height = u_align(mcs_height, 32);
Chia-I Wu4bc47012014-08-14 13:03:25 +08001116}
1117
1118/**
1119 * Initialize the layout. Callers should zero-initialize \p layout first.
1120 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001121void intel_layout_init(struct intel_layout *layout,
1122 const struct intel_dev *dev,
1123 const XGL_IMAGE_CREATE_INFO *info)
Chia-I Wu4bc47012014-08-14 13:03:25 +08001124{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001125 struct intel_layout_params params;
Chia-I Wu4bc47012014-08-14 13:03:25 +08001126
1127 memset(&params, 0, sizeof(params));
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001128 params.gpu = dev->gpu;
1129 params.info = info;
Chia-I Wu4bc47012014-08-14 13:03:25 +08001130
1131 /* note that there are dependencies between these functions */
1132 layout_init_aux(layout, &params);
1133 layout_init_format(layout, &params);
1134 layout_init_arrangements(layout, &params);
1135 layout_init_tiling(layout, &params);
1136 layout_init_alignments(layout, &params);
1137 layout_init_levels(layout, &params);
1138 layout_init_layer_height(layout, &params);
1139
1140 layout_align(layout, &params);
1141 layout_calculate_bo_size(layout, &params);
1142
1143 switch (layout->aux_type) {
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001144 case INTEL_LAYOUT_AUX_HIZ:
Chia-I Wu4bc47012014-08-14 13:03:25 +08001145 layout_calculate_hiz_size(layout, &params);
1146 break;
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001147 case INTEL_LAYOUT_AUX_MCS:
Chia-I Wu4bc47012014-08-14 13:03:25 +08001148 layout_calculate_mcs_size(layout, &params);
1149 break;
1150 default:
1151 break;
1152 }
1153}
1154
1155/**
1156 * Update the tiling mode and bo stride (for imported resources).
1157 */
1158bool
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001159intel_layout_update_for_imported_bo(struct intel_layout *layout,
1160 enum intel_tiling_mode tiling,
1161 unsigned bo_stride)
Chia-I Wu4bc47012014-08-14 13:03:25 +08001162{
1163 if (!(layout->valid_tilings & (1 << tiling)))
1164 return false;
1165
1166 if ((tiling == INTEL_TILING_X && bo_stride % 512) ||
1167 (tiling == INTEL_TILING_Y && bo_stride % 128))
1168 return false;
1169
1170 layout->tiling = tiling;
1171 layout->bo_stride = bo_stride;
1172
1173 return true;
1174}