blob: 71a0de9b9cab7aa3cc2c5662340581f25f449689 [file] [log] [blame]
Chia-I Wu4bc47012014-08-14 13:03:25 +08001/*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 2014 LunarG, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Chia-I Wu <olv@lunarg.com>
26 */
27
Chia-I Wu8a8d8b62014-08-14 13:26:26 +080028#include "dev.h"
29#include "gpu.h"
30#include "layout.h"
Chia-I Wu4bc47012014-08-14 13:03:25 +080031
32enum {
33 LAYOUT_TILING_NONE = 1 << INTEL_TILING_NONE,
34 LAYOUT_TILING_X = 1 << INTEL_TILING_X,
35 LAYOUT_TILING_Y = 1 << INTEL_TILING_Y,
36 LAYOUT_TILING_W = 1 << (INTEL_TILING_Y + 1),
37
38 LAYOUT_TILING_ALL = (LAYOUT_TILING_NONE |
39 LAYOUT_TILING_X |
40 LAYOUT_TILING_Y |
41 LAYOUT_TILING_W)
42};
43
Chia-I Wu8a8d8b62014-08-14 13:26:26 +080044struct intel_layout_params {
45 const struct intel_gpu *gpu;
46 const XGL_IMAGE_CREATE_INFO *info;
Chia-I Wu4bc47012014-08-14 13:03:25 +080047
48 bool compressed;
49
50 unsigned h0, h1;
51 unsigned max_x, max_y;
52};
53
54static void
Chia-I Wu8a8d8b62014-08-14 13:26:26 +080055layout_get_slice_size(const struct intel_layout *layout,
56 const struct intel_layout_params *params,
Chia-I Wu4bc47012014-08-14 13:03:25 +080057 unsigned level, unsigned *width, unsigned *height)
58{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +080059 const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu4bc47012014-08-14 13:03:25 +080060 unsigned w, h;
61
Chia-I Wu8a8d8b62014-08-14 13:26:26 +080062 w = u_minify(info->extent.width, level);
63 h = u_minify(info->extent.height, level);
Chia-I Wu4bc47012014-08-14 13:03:25 +080064
65 /*
66 * From the Sandy Bridge PRM, volume 1 part 1, page 114:
67 *
68 * "The dimensions of the mip maps are first determined by applying the
69 * sizing algorithm presented in Non-Power-of-Two Mipmaps above. Then,
70 * if necessary, they are padded out to compression block boundaries."
71 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +080072 w = u_align(w, layout->block_width);
73 h = u_align(h, layout->block_height);
Chia-I Wu4bc47012014-08-14 13:03:25 +080074
75 /*
76 * From the Sandy Bridge PRM, volume 1 part 1, page 111:
77 *
78 * "If the surface is multisampled (4x), these values must be adjusted
79 * as follows before proceeding:
80 *
81 * W_L = ceiling(W_L / 2) * 4
82 * H_L = ceiling(H_L / 2) * 4"
83 *
84 * From the Ivy Bridge PRM, volume 1 part 1, page 108:
85 *
86 * "If the surface is multisampled and it is a depth or stencil surface
87 * or Multisampled Surface StorageFormat in SURFACE_STATE is
88 * MSFMT_DEPTH_STENCIL, W_L and H_L must be adjusted as follows before
89 * proceeding:
90 *
91 * #samples W_L = H_L =
92 * 2 ceiling(W_L / 2) * 4 HL [no adjustment]
93 * 4 ceiling(W_L / 2) * 4 ceiling(H_L / 2) * 4
94 * 8 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 4
95 * 16 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 8"
96 *
97 * For interleaved samples (4x), where pixels
98 *
99 * (x, y ) (x+1, y )
100 * (x, y+1) (x+1, y+1)
101 *
102 * would be is occupied by
103 *
104 * (x, y , si0) (x+1, y , si0) (x, y , si1) (x+1, y , si1)
105 * (x, y+1, si0) (x+1, y+1, si0) (x, y+1, si1) (x+1, y+1, si1)
106 * (x, y , si2) (x+1, y , si2) (x, y , si3) (x+1, y , si3)
107 * (x, y+1, si2) (x+1, y+1, si2) (x, y+1, si3) (x+1, y+1, si3)
108 *
109 * Thus the need to
110 *
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800111 * w = u_align(w, 2) * 2;
112 * y = u_align(y, 2) * 2;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800113 */
114 if (layout->interleaved_samples) {
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800115 switch (info->samples) {
Chia-I Wu4bc47012014-08-14 13:03:25 +0800116 case 0:
117 case 1:
118 break;
119 case 2:
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800120 w = u_align(w, 2) * 2;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800121 break;
122 case 4:
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800123 w = u_align(w, 2) * 2;
124 h = u_align(h, 2) * 2;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800125 break;
126 case 8:
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800127 w = u_align(w, 2) * 4;
128 h = u_align(h, 2) * 2;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800129 break;
130 case 16:
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800131 w = u_align(w, 2) * 4;
132 h = u_align(h, 2) * 4;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800133 break;
134 default:
135 assert(!"unsupported sample count");
136 break;
137 }
138 }
139
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800140 w = u_align(w, layout->align_i);
141 h = u_align(h, layout->align_j);
Chia-I Wu4bc47012014-08-14 13:03:25 +0800142
143 *width = w;
144 *height = h;
145}
146
147static unsigned
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800148layout_get_num_layers(const struct intel_layout *layout,
149 const struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800150{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800151 const XGL_IMAGE_CREATE_INFO *info = params->info;
152 unsigned num_layers = info->arraySize;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800153
154 /* samples of the same index are stored in a layer */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800155 if (info->samples > 1 && !layout->interleaved_samples)
156 num_layers *= info->samples;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800157
158 return num_layers;
159}
160
161static void
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800162layout_init_layer_height(struct intel_layout *layout,
163 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800164{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800165 const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800166 unsigned num_layers;
167
168 num_layers = layout_get_num_layers(layout, params);
169 if (num_layers <= 1)
170 return;
171
172 if (!layout->full_layers) {
173 layout->layer_height = params->h0;
174 params->max_y += params->h0 * (num_layers - 1);
175 return;
176 }
177
178 /*
179 * From the Sandy Bridge PRM, volume 1 part 1, page 115:
180 *
181 * "The following equation is used for surface formats other than
182 * compressed textures:
183 *
184 * QPitch = (h0 + h1 + 11j)"
185 *
186 * "The equation for compressed textures (BC* and FXT1 surface formats)
187 * follows:
188 *
189 * QPitch = (h0 + h1 + 11j) / 4"
190 *
191 * "[DevSNB] Errata: Sampler MSAA Qpitch will be 4 greater than the
192 * value calculated in the equation above, for every other odd Surface
193 * Height starting from 1 i.e. 1,5,9,13"
194 *
195 * From the Ivy Bridge PRM, volume 1 part 1, page 111-112:
196 *
197 * "If Surface Array Spacing is set to ARYSPC_FULL (note that the depth
198 * buffer and stencil buffer have an implied value of ARYSPC_FULL):
199 *
200 * QPitch = (h0 + h1 + 12j)
201 * QPitch = (h0 + h1 + 12j) / 4 (compressed)
202 *
203 * (There are many typos or missing words here...)"
204 *
205 * To access the N-th slice, an offset of (Stride * QPitch * N) is added to
206 * the base address. The PRM divides QPitch by 4 for compressed formats
207 * because the block height for those formats are 4, and it wants QPitch to
208 * mean the number of memory rows, as opposed to texel rows, between
209 * slices. Since we use texel rows everywhere, we do not need to divide
210 * QPitch by 4.
211 */
212 layout->layer_height = params->h0 + params->h1 +
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800213 ((intel_gpu_gen(params->gpu) >= INTEL_GEN(7)) ? 12 : 11) * layout->align_j;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800214
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800215 if (intel_gpu_gen(params->gpu) == INTEL_GEN(6) && info->samples > 1 &&
216 info->extent.height % 4 == 1)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800217 layout->layer_height += 4;
218
219 params->max_y += layout->layer_height * (num_layers - 1);
220}
221
222static void
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800223layout_init_levels(struct intel_layout *layout,
224 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800225{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800226 const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800227 unsigned cur_x, cur_y;
228 unsigned lv;
229
230 cur_x = 0;
231 cur_y = 0;
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800232 for (lv = 0; lv < info->mipLevels; lv++) {
Chia-I Wu4bc47012014-08-14 13:03:25 +0800233 unsigned level_w, level_h;
234
235 layout_get_slice_size(layout, params, lv, &level_w, &level_h);
236
237 layout->levels[lv].x = cur_x;
238 layout->levels[lv].y = cur_y;
239 layout->levels[lv].slice_width = level_w;
240 layout->levels[lv].slice_height = level_h;
241
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800242 if (info->imageType == XGL_IMAGE_3D) {
243 const unsigned num_slices = u_minify(info->extent.depth, lv);
Chia-I Wu4bc47012014-08-14 13:03:25 +0800244 const unsigned num_slices_per_row = 1 << lv;
245 const unsigned num_rows =
246 (num_slices + num_slices_per_row - 1) / num_slices_per_row;
247
248 level_w *= num_slices_per_row;
249 level_h *= num_rows;
250
251 cur_y += level_h;
252 } else {
253 /* MIPLAYOUT_BELOW */
254 if (lv == 1)
255 cur_x += level_w;
256 else
257 cur_y += level_h;
258 }
259
260 if (params->max_x < layout->levels[lv].x + level_w)
261 params->max_x = layout->levels[lv].x + level_w;
262 if (params->max_y < layout->levels[lv].y + level_h)
263 params->max_y = layout->levels[lv].y + level_h;
264 }
265
266 params->h0 = layout->levels[0].slice_height;
267 if (layout->full_layers) {
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800268 if (info->mipLevels > 1)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800269 params->h1 = layout->levels[1].slice_height;
270 else
271 layout_get_slice_size(layout, params, 1, &cur_x, &params->h1);
272 }
273}
274
275static void
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800276layout_init_alignments(struct intel_layout *layout,
277 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800278{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800279 const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800280
281 /*
282 * From the Sandy Bridge PRM, volume 1 part 1, page 113:
283 *
284 * "surface format align_i align_j
285 * YUV 4:2:2 formats 4 *see below
286 * BC1-5 4 4
287 * FXT1 8 4
288 * all other formats 4 *see below"
289 *
290 * "- align_j = 4 for any depth buffer
291 * - align_j = 2 for separate stencil buffer
292 * - align_j = 4 for any render target surface is multisampled (4x)
293 * - align_j = 4 for any render target surface with Surface Vertical
294 * Alignment = VALIGN_4
295 * - align_j = 2 for any render target surface with Surface Vertical
296 * Alignment = VALIGN_2
297 * - align_j = 2 for all other render target surface
298 * - align_j = 2 for any sampling engine surface with Surface Vertical
299 * Alignment = VALIGN_2
300 * - align_j = 4 for any sampling engine surface with Surface Vertical
301 * Alignment = VALIGN_4"
302 *
303 * From the Sandy Bridge PRM, volume 4 part 1, page 86:
304 *
305 * "This field (Surface Vertical Alignment) must be set to VALIGN_2 if
306 * the Surface Format is 96 bits per element (BPE)."
307 *
308 * They can be rephrased as
309 *
310 * align_i align_j
311 * compressed formats block width block height
312 * PIPE_FORMAT_S8_UINT 4 2
313 * other depth/stencil formats 4 4
314 * 4x multisampled 4 4
315 * bpp 96 4 2
316 * others 4 2 or 4
317 */
318
319 /*
320 * From the Ivy Bridge PRM, volume 1 part 1, page 110:
321 *
322 * "surface defined by surface format align_i align_j
323 * 3DSTATE_DEPTH_BUFFER D16_UNORM 8 4
324 * not D16_UNORM 4 4
325 * 3DSTATE_STENCIL_BUFFER N/A 8 8
326 * SURFACE_STATE BC*, ETC*, EAC* 4 4
327 * FXT1 8 4
328 * all others (set by SURFACE_STATE)"
329 *
330 * From the Ivy Bridge PRM, volume 4 part 1, page 63:
331 *
332 * "- This field (Surface Vertical Aligment) is intended to be set to
333 * VALIGN_4 if the surface was rendered as a depth buffer, for a
334 * multisampled (4x) render target, or for a multisampled (8x)
335 * render target, since these surfaces support only alignment of 4.
336 * - Use of VALIGN_4 for other surfaces is supported, but uses more
337 * memory.
338 * - This field must be set to VALIGN_4 for all tiled Y Render Target
339 * surfaces.
340 * - Value of 1 is not supported for format YCRCB_NORMAL (0x182),
341 * YCRCB_SWAPUVY (0x183), YCRCB_SWAPUV (0x18f), YCRCB_SWAPY (0x190)
342 * - If Number of Multisamples is not MULTISAMPLECOUNT_1, this field
343 * must be set to VALIGN_4."
344 * - VALIGN_4 is not supported for surface format R32G32B32_FLOAT."
345 *
346 * "- This field (Surface Horizontal Aligment) is intended to be set to
347 * HALIGN_8 only if the surface was rendered as a depth buffer with
348 * Z16 format or a stencil buffer, since these surfaces support only
349 * alignment of 8.
350 * - Use of HALIGN_8 for other surfaces is supported, but uses more
351 * memory.
352 * - This field must be set to HALIGN_4 if the Surface Format is BC*.
353 * - This field must be set to HALIGN_8 if the Surface Format is
354 * FXT1."
355 *
356 * They can be rephrased as
357 *
358 * align_i align_j
359 * compressed formats block width block height
360 * PIPE_FORMAT_Z16_UNORM 8 4
361 * PIPE_FORMAT_S8_UINT 8 8
362 * other depth/stencil formats 4 or 8 4
363 * 2x or 4x multisampled 4 or 8 4
364 * tiled Y 4 or 8 4 (if rt)
365 * PIPE_FORMAT_R32G32B32_FLOAT 4 or 8 2
366 * others 4 or 8 2 or 4
367 */
368
369 if (params->compressed) {
370 /* this happens to be the case */
371 layout->align_i = layout->block_width;
372 layout->align_j = layout->block_height;
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800373 } else if (info->usage & XGL_IMAGE_USAGE_DEPTH_STENCIL_BIT) {
374 if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7)) {
375 switch (layout->format.channelFormat) {
376 case XGL_CH_FMT_R16:
Chia-I Wu4bc47012014-08-14 13:03:25 +0800377 layout->align_i = 8;
378 layout->align_j = 4;
379 break;
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800380 case XGL_CH_FMT_R8:
Chia-I Wu4bc47012014-08-14 13:03:25 +0800381 layout->align_i = 8;
382 layout->align_j = 8;
383 break;
384 default:
385 layout->align_i = 4;
386 layout->align_j = 4;
387 break;
388 }
389 } else {
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800390 switch (layout->format.channelFormat) {
391 case XGL_CH_FMT_R8:
Chia-I Wu4bc47012014-08-14 13:03:25 +0800392 layout->align_i = 4;
393 layout->align_j = 2;
394 break;
395 default:
396 layout->align_i = 4;
397 layout->align_j = 4;
398 break;
399 }
400 }
401 } else {
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800402 const bool valign_4 = (info->samples > 1) ||
403 (intel_gpu_gen(params->gpu) >= INTEL_GEN(7) &&
Chia-I Wu4bc47012014-08-14 13:03:25 +0800404 layout->tiling == INTEL_TILING_Y &&
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800405 (info->usage & XGL_IMAGE_USAGE_COLOR_ATTACHMENT_BIT));
Chia-I Wu4bc47012014-08-14 13:03:25 +0800406
407 if (valign_4)
408 assert(layout->block_size != 12);
409
410 layout->align_i = 4;
411 layout->align_j = (valign_4) ? 4 : 2;
412 }
413
414 /*
415 * the fact that align i and j are multiples of block width and height
416 * respectively is what makes the size of the bo a multiple of the block
417 * size, slices start at block boundaries, and many of the computations
418 * work.
419 */
420 assert(layout->align_i % layout->block_width == 0);
421 assert(layout->align_j % layout->block_height == 0);
422
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800423 /* make sure u_align() works */
424 assert(u_is_pow2(layout->align_i) &&
425 u_is_pow2(layout->align_j));
426 assert(u_is_pow2(layout->block_width) &&
427 u_is_pow2(layout->block_height));
Chia-I Wu4bc47012014-08-14 13:03:25 +0800428}
429
430static unsigned
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800431layout_get_valid_tilings(const struct intel_layout *layout,
432 const struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800433{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800434 const XGL_IMAGE_CREATE_INFO *info = params->info;
435 const XGL_FORMAT format = layout->format;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800436 unsigned valid_tilings = LAYOUT_TILING_ALL;
437
438 /*
Chia-I Wu4bc47012014-08-14 13:03:25 +0800439 * From the Sandy Bridge PRM, volume 2 part 1, page 318:
440 *
441 * "[DevSNB+]: This field (Tiled Surface) must be set to TRUE. Linear
442 * Depth Buffer is not supported."
443 *
444 * "The Depth Buffer, if tiled, must use Y-Major tiling."
445 *
446 * From the Sandy Bridge PRM, volume 1 part 2, page 22:
447 *
448 * "W-Major Tile Format is used for separate stencil."
449 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800450 if (info->usage & XGL_IMAGE_USAGE_DEPTH_STENCIL_BIT) {
451 switch (format.channelFormat) {
452 case XGL_CH_FMT_R8:
Chia-I Wu4bc47012014-08-14 13:03:25 +0800453 valid_tilings &= LAYOUT_TILING_W;
454 break;
455 default:
456 valid_tilings &= LAYOUT_TILING_Y;
457 break;
458 }
459 }
460
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800461 if (info->usage & XGL_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) {
Chia-I Wu4bc47012014-08-14 13:03:25 +0800462 /*
463 * From the Sandy Bridge PRM, volume 1 part 2, page 32:
464 *
465 * "NOTE: 128BPE Format Color buffer ( render target ) MUST be
466 * either TileX or Linear."
467 */
468 if (layout->block_size == 16)
469 valid_tilings &= ~LAYOUT_TILING_Y;
470
471 /*
472 * From the Ivy Bridge PRM, volume 4 part 1, page 63:
473 *
474 * "This field (Surface Vertical Aligment) must be set to VALIGN_4
475 * for all tiled Y Render Target surfaces."
476 *
477 * "VALIGN_4 is not supported for surface format R32G32B32_FLOAT."
478 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800479 if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7) && layout->block_size == 12)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800480 valid_tilings &= ~LAYOUT_TILING_Y;
481 }
482
483 /* no conflicting binding flags */
484 assert(valid_tilings);
485
486 return valid_tilings;
487}
488
489static void
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800490layout_init_tiling(struct intel_layout *layout,
491 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800492{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800493 const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800494 unsigned valid_tilings = layout_get_valid_tilings(layout, params);
495
496 layout->valid_tilings = valid_tilings;
497
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800498 if (info->usage & (XGL_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | XGL_IMAGE_USAGE_SHADER_ACCESS_READ_BIT)) {
Chia-I Wu4bc47012014-08-14 13:03:25 +0800499 /*
500 * heuristically set a minimum width/height for enabling tiling
501 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800502 if (info->extent.width < 64 && (valid_tilings & ~LAYOUT_TILING_X))
Chia-I Wu4bc47012014-08-14 13:03:25 +0800503 valid_tilings &= ~LAYOUT_TILING_X;
504
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800505 if ((info->extent.width < 32 || info->extent.height < 16) &&
506 (info->extent.width < 16 || info->extent.height < 32) &&
Chia-I Wu4bc47012014-08-14 13:03:25 +0800507 (valid_tilings & ~LAYOUT_TILING_Y))
508 valid_tilings &= ~LAYOUT_TILING_Y;
509 } else {
510 /* force linear if we are not sure where the texture is bound to */
511 if (valid_tilings & LAYOUT_TILING_NONE)
512 valid_tilings &= LAYOUT_TILING_NONE;
513 }
514
515 /* prefer tiled over linear */
516 if (valid_tilings & LAYOUT_TILING_Y)
517 layout->tiling = INTEL_TILING_Y;
518 else if (valid_tilings & LAYOUT_TILING_X)
519 layout->tiling = INTEL_TILING_X;
520 else /* linear or W-tiled, which has no hardware support */
521 layout->tiling = INTEL_TILING_NONE;
522}
523
524static void
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800525layout_init_arrangements_gen7(struct intel_layout *layout,
526 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800527{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800528 const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800529
530 /*
531 * It is not explicitly states, but render targets are expected to be
532 * UMS/CMS (samples non-interleaved) and depth/stencil buffers are expected
533 * to be IMS (samples interleaved).
534 *
535 * See "Multisampled Surface Storage Format" field of SURFACE_STATE.
536 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800537 if (info->usage & XGL_IMAGE_USAGE_DEPTH_STENCIL_BIT) {
Chia-I Wu4bc47012014-08-14 13:03:25 +0800538 layout->interleaved_samples = true;
539
540 /*
541 * From the Ivy Bridge PRM, volume 1 part 1, page 111:
542 *
543 * "note that the depth buffer and stencil buffer have an implied
544 * value of ARYSPC_FULL"
545 */
546 layout->full_layers = true;
547 } else {
548 layout->interleaved_samples = false;
549
550 /*
551 * From the Ivy Bridge PRM, volume 4 part 1, page 66:
552 *
553 * "If Multisampled Surface Storage Format is MSFMT_MSS and Number
554 * of Multisamples is not MULTISAMPLECOUNT_1, this field (Surface
555 * Array Spacing) must be set to ARYSPC_LOD0."
556 *
557 * As multisampled resources are not mipmapped, we never use
558 * ARYSPC_FULL for them.
559 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800560 if (info->samples > 1)
561 assert(info->mipLevels == 1);
562 layout->full_layers = (info->mipLevels > 1);
Chia-I Wu4bc47012014-08-14 13:03:25 +0800563 }
564}
565
566static void
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800567layout_init_arrangements_gen6(struct intel_layout *layout,
568 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800569{
570 /* GEN6 supports only interleaved samples */
571 layout->interleaved_samples = true;
572
573 /*
574 * From the Sandy Bridge PRM, volume 1 part 1, page 115:
575 *
576 * "The separate stencil buffer does not support mip mapping, thus the
577 * storage for LODs other than LOD 0 is not needed. The following
578 * QPitch equation applies only to the separate stencil buffer:
579 *
580 * QPitch = h_0"
581 *
582 * GEN6 does not support compact spacing otherwise.
583 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800584 layout->full_layers = !(layout->format.channelFormat == XGL_CH_FMT_R8 &&
585 layout->format.numericFormat == XGL_NUM_FMT_DS);
Chia-I Wu4bc47012014-08-14 13:03:25 +0800586}
587
588static void
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800589layout_init_arrangements(struct intel_layout *layout,
590 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800591{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800592 if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7))
Chia-I Wu4bc47012014-08-14 13:03:25 +0800593 layout_init_arrangements_gen7(layout, params);
594 else
595 layout_init_arrangements_gen6(layout, params);
596
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800597 layout->is_2d = (params->info->imageType != XGL_IMAGE_3D);
Chia-I Wu4bc47012014-08-14 13:03:25 +0800598}
599
600static void
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800601layout_init_format(struct intel_layout *layout,
602 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800603{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800604 const XGL_IMAGE_CREATE_INFO *info = params->info;
605 XGL_FORMAT format = params->info->format;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800606 bool require_separate_stencil;
607
608 /*
609 * From the Sandy Bridge PRM, volume 2 part 1, page 317:
610 *
611 * "This field (Separate Stencil Buffer Enable) must be set to the same
612 * value (enabled or disabled) as Hierarchical Depth Buffer Enable."
613 *
614 * GEN7+ requires separate stencil buffers.
615 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800616 if (info->usage & XGL_IMAGE_USAGE_DEPTH_STENCIL_BIT) {
617 if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7))
Chia-I Wu4bc47012014-08-14 13:03:25 +0800618 require_separate_stencil = true;
619 else
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800620 require_separate_stencil =(layout->aux_type == INTEL_LAYOUT_AUX_HIZ);
Chia-I Wu4bc47012014-08-14 13:03:25 +0800621 }
622
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800623 if (format.numericFormat == XGL_NUM_FMT_DS) {
624 switch (format.channelFormat) {
625 case XGL_CH_FMT_R32G8:
626 if (require_separate_stencil) {
627 format.channelFormat = XGL_CH_FMT_R32;
628 layout->separate_stencil = true;
629 }
630 break;
631 default:
632 break;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800633 }
Chia-I Wu4bc47012014-08-14 13:03:25 +0800634 }
635
Chia-I Wu4bc47012014-08-14 13:03:25 +0800636 layout->format = format;
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800637
638 layout->block_width = 1;
639 layout->block_height = 1;
640 layout->block_size = 1;
641 params->compressed = false;
642
643 switch (format.channelFormat) {
644 case XGL_CH_FMT_UNDEFINED:
645 break;
646 case XGL_CH_FMT_R4G4:
647 layout->block_size = 1;
648 break;
649 case XGL_CH_FMT_R4G4B4A4:
650 layout->block_size = 2;
651 break;
652 case XGL_CH_FMT_R5G6B5:
653 case XGL_CH_FMT_B5G6R5:
654 case XGL_CH_FMT_R5G5B5A1:
655 layout->block_size = 2;
656 break;
657 case XGL_CH_FMT_R8:
658 layout->block_size = 1;
659 break;
660 case XGL_CH_FMT_R8G8:
661 layout->block_size = 2;
662 break;
663 case XGL_CH_FMT_R8G8B8A8:
664 case XGL_CH_FMT_B8G8R8A8:
665 case XGL_CH_FMT_R10G11B11:
666 case XGL_CH_FMT_R11G11B10:
667 case XGL_CH_FMT_R10G10B10A2:
668 layout->block_size = 4;
669 break;
670 case XGL_CH_FMT_R16:
671 layout->block_size = 2;
672 break;
673 case XGL_CH_FMT_R16G16:
674 layout->block_size = 4;
675 break;
676 case XGL_CH_FMT_R16G16B16A16:
677 layout->block_size = 8;
678 break;
679 case XGL_CH_FMT_R32:
680 layout->block_size = 4;
681 break;
682 case XGL_CH_FMT_R32G32:
683 layout->block_size = 8;
684 break;
685 case XGL_CH_FMT_R32G32B32:
686 layout->block_size = 12;
687 break;
688 case XGL_CH_FMT_R32G32B32A32:
689 layout->block_size = 16;
690 break;
691 case XGL_CH_FMT_R16G8:
692 layout->block_size = 3;
693 break;
694 case XGL_CH_FMT_R32G8:
695 layout->block_size = 5;
696 break;
697 case XGL_CH_FMT_R9G9B9E5:
698 layout->block_size = 4;
699 break;
700 case XGL_CH_FMT_BC1:
701 case XGL_CH_FMT_BC2:
702 case XGL_CH_FMT_BC3:
703 case XGL_CH_FMT_BC4:
704 case XGL_CH_FMT_BC5:
705 case XGL_CH_FMT_BC6U:
706 case XGL_CH_FMT_BC6S:
707 case XGL_CH_FMT_BC7:
708 layout->block_width = 4;
709 layout->block_height = 4;
710 layout->block_size =
711 (format.channelFormat == XGL_CH_FMT_BC1 ||
712 format.channelFormat == XGL_CH_FMT_BC4) ? 8 : 16;
713 params->compressed = true;
714 break;
715 default:
716 assert(!"unknown format");
717 break;
718 }
Chia-I Wu4bc47012014-08-14 13:03:25 +0800719}
720
721static bool
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800722layout_want_mcs(struct intel_layout *layout,
723 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800724{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800725 const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800726 bool want_mcs = false;
727
728 /* MCS is for RT on GEN7+ */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800729 if (intel_gpu_gen(params->gpu) < INTEL_GEN(7))
Chia-I Wu4bc47012014-08-14 13:03:25 +0800730 return false;
731
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800732 if (info->imageType != XGL_IMAGE_2D ||
733 !(info->usage & XGL_IMAGE_USAGE_COLOR_ATTACHMENT_BIT))
Chia-I Wu4bc47012014-08-14 13:03:25 +0800734 return false;
735
736 /*
737 * From the Ivy Bridge PRM, volume 4 part 1, page 77:
738 *
739 * "For Render Target and Sampling Engine Surfaces:If the surface is
740 * multisampled (Number of Multisamples any value other than
741 * MULTISAMPLECOUNT_1), this field (MCS Enable) must be enabled."
742 *
743 * "This field must be set to 0 for all SINT MSRTs when all RT channels
744 * are not written"
745 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800746 if (info->samples > 1 && !layout->interleaved_samples &&
747 !(info->format.numericFormat == XGL_NUM_FMT_UINT ||
748 info->format.numericFormat == XGL_NUM_FMT_SINT)) {
Chia-I Wu4bc47012014-08-14 13:03:25 +0800749 want_mcs = true;
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800750 } else if (info->samples <= 1) {
Chia-I Wu4bc47012014-08-14 13:03:25 +0800751 /*
752 * From the Ivy Bridge PRM, volume 2 part 1, page 326:
753 *
754 * "When MCS is buffer is used for color clear of non-multisampler
755 * render target, the following restrictions apply.
756 * - Support is limited to tiled render targets.
757 * - Support is for non-mip-mapped and non-array surface types
758 * only.
759 * - Clear is supported only on the full RT; i.e., no partial clear
760 * or overlapping clears.
761 * - MCS buffer for non-MSRT is supported only for RT formats
762 * 32bpp, 64bpp and 128bpp.
763 * ..."
764 */
765 if (layout->tiling != INTEL_TILING_NONE &&
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800766 info->mipLevels == 1 && info->arraySize == 1) {
Chia-I Wu4bc47012014-08-14 13:03:25 +0800767 switch (layout->block_size) {
768 case 4:
769 case 8:
770 case 16:
771 want_mcs = true;
772 break;
773 default:
774 break;
775 }
776 }
777 }
778
779 return want_mcs;
780}
781
782static bool
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800783layout_want_hiz(const struct intel_layout *layout,
784 const struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800785{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800786 const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800787 bool want_hiz = false;
788
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800789 if (!(info->usage & XGL_IMAGE_USAGE_DEPTH_STENCIL_BIT))
Chia-I Wu4bc47012014-08-14 13:03:25 +0800790 return false;
791
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800792 if (info->format.channelFormat == XGL_CH_FMT_R8)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800793 return false;
794
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800795 if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7)) {
Chia-I Wu4bc47012014-08-14 13:03:25 +0800796 want_hiz = true;
797 } else {
798 /*
799 * From the Sandy Bridge PRM, volume 2 part 1, page 312:
800 *
801 * "The hierarchical depth buffer does not support the LOD field, it
802 * is assumed by hardware to be zero. A separate hierarachical
803 * depth buffer is required for each LOD used, and the
804 * corresponding buffer's state delivered to hardware each time a
805 * new depth buffer state with modified LOD is delivered."
806 *
807 * But we have a stronger requirement. Because of layer offsetting
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800808 * (check out the callers of intel_layout_get_slice_tile_offset()), we
Chia-I Wu4bc47012014-08-14 13:03:25 +0800809 * already have to require the texture to be non-mipmapped and
810 * non-array.
811 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800812 if (info->mipLevels == 1 && info->arraySize == 1 &&
813 info->extent.depth == 1)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800814 want_hiz = true;
815 }
816
817 return want_hiz;
818}
819
820static void
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800821layout_init_aux(struct intel_layout *layout,
822 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800823{
824 if (layout_want_hiz(layout, params))
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800825 layout->aux_type = INTEL_LAYOUT_AUX_HIZ;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800826 else if (layout_want_mcs(layout, params))
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800827 layout->aux_type = INTEL_LAYOUT_AUX_MCS;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800828}
829
830static void
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800831layout_align(struct intel_layout *layout, struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800832{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800833 const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800834 int align_w = 1, align_h = 1, pad_h = 0;
835
836 /*
837 * From the Sandy Bridge PRM, volume 1 part 1, page 118:
838 *
839 * "To determine the necessary padding on the bottom and right side of
840 * the surface, refer to the table in Section 7.18.3.4 for the i and j
841 * parameters for the surface format in use. The surface must then be
842 * extended to the next multiple of the alignment unit size in each
843 * dimension, and all texels contained in this extended surface must
844 * have valid GTT entries."
845 *
846 * "For cube surfaces, an additional two rows of padding are required
847 * at the bottom of the surface. This must be ensured regardless of
848 * whether the surface is stored tiled or linear. This is due to the
849 * potential rotation of cache line orientation from memory to cache."
850 *
851 * "For compressed textures (BC* and FXT1 surface formats), padding at
852 * the bottom of the surface is to an even compressed row, which is
853 * equal to a multiple of 8 uncompressed texel rows. Thus, for padding
854 * purposes, these surfaces behave as if j = 8 only for surface
855 * padding purposes. The value of 4 for j still applies for mip level
856 * alignment and QPitch calculation."
857 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800858 if (info->usage & XGL_IMAGE_USAGE_SHADER_ACCESS_READ_BIT) {
859 if (align_w < layout->align_i)
860 align_w = layout->align_i;
861 if (align_h < layout->align_j)
862 align_h = layout->align_j;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800863
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800864 /* in case it is used as a cube */
865 if (info->imageType == XGL_IMAGE_2D)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800866 pad_h += 2;
867
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800868 if (params->compressed && align_h < layout->align_j * 2)
869 align_h = layout->align_j * 2;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800870 }
871
872 /*
873 * From the Sandy Bridge PRM, volume 1 part 1, page 118:
874 *
875 * "If the surface contains an odd number of rows of data, a final row
876 * below the surface must be allocated."
877 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800878 if ((info->usage & XGL_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) && align_h < 2)
879 align_h = 2;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800880
881 /*
882 * Depth Buffer Clear/Resolve works in 8x4 sample blocks. In
883 * ilo_texture_can_enable_hiz(), we always return true for the first slice.
884 * To avoid out-of-bound access, we have to pad.
885 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800886 if (layout->aux_type == INTEL_LAYOUT_AUX_HIZ) {
887 if (align_w < 8)
888 align_w = 8;
889 if (align_h < 4)
890 align_h = 4;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800891 }
892
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800893 params->max_x = u_align(params->max_x, align_w);
894 params->max_y = u_align(params->max_y + pad_h, align_h);
Chia-I Wu4bc47012014-08-14 13:03:25 +0800895}
896
897/* note that this may force the texture to be linear */
898static void
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800899layout_calculate_bo_size(struct intel_layout *layout,
900 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800901{
902 assert(params->max_x % layout->block_width == 0);
903 assert(params->max_y % layout->block_height == 0);
904 assert(layout->layer_height % layout->block_height == 0);
905
906 layout->bo_stride =
907 (params->max_x / layout->block_width) * layout->block_size;
908 layout->bo_height = params->max_y / layout->block_height;
909
910 while (true) {
911 unsigned w = layout->bo_stride, h = layout->bo_height;
912 unsigned align_w, align_h;
913
914 /*
915 * From the Haswell PRM, volume 5, page 163:
916 *
917 * "For linear surfaces, additional padding of 64 bytes is required
918 * at the bottom of the surface. This is in addition to the padding
919 * required above."
920 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800921 if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7.5) &&
922 (params->info->usage & XGL_IMAGE_USAGE_SHADER_ACCESS_READ_BIT) &&
Chia-I Wu4bc47012014-08-14 13:03:25 +0800923 layout->tiling == INTEL_TILING_NONE) {
924 layout->bo_height +=
925 (64 + layout->bo_stride - 1) / layout->bo_stride;
926 }
927
928 /*
929 * From the Sandy Bridge PRM, volume 4 part 1, page 81:
930 *
931 * "- For linear render target surfaces, the pitch must be a
932 * multiple of the element size for non-YUV surface formats.
933 * Pitch must be a multiple of 2 * element size for YUV surface
934 * formats.
935 * - For other linear surfaces, the pitch can be any multiple of
936 * bytes.
937 * - For tiled surfaces, the pitch must be a multiple of the tile
938 * width."
939 *
940 * Different requirements may exist when the bo is used in different
941 * places, but our alignments here should be good enough that we do not
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800942 * need to check layout->info->usage.
Chia-I Wu4bc47012014-08-14 13:03:25 +0800943 */
944 switch (layout->tiling) {
945 case INTEL_TILING_X:
946 align_w = 512;
947 align_h = 8;
948 break;
949 case INTEL_TILING_Y:
950 align_w = 128;
951 align_h = 32;
952 break;
953 default:
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800954 if (layout->format.channelFormat == XGL_CH_FMT_R8 &&
955 layout->format.numericFormat == XGL_NUM_FMT_DS) {
Chia-I Wu4bc47012014-08-14 13:03:25 +0800956 /*
957 * From the Sandy Bridge PRM, volume 1 part 2, page 22:
958 *
959 * "A 4KB tile is subdivided into 8-high by 8-wide array of
960 * Blocks for W-Major Tiles (W Tiles). Each Block is 8 rows by 8
961 * bytes."
962 *
963 * Since we asked for INTEL_TILING_NONE instead of the non-existent
964 * INTEL_TILING_W, we want to align to W tiles here.
965 */
966 align_w = 64;
967 align_h = 64;
968 } else {
969 /* some good enough values */
970 align_w = 64;
971 align_h = 2;
972 }
973 break;
974 }
975
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800976 w = u_align(w, align_w);
977 h = u_align(h, align_h);
Chia-I Wu4bc47012014-08-14 13:03:25 +0800978
979 /* make sure the bo is mappable */
980 if (layout->tiling != INTEL_TILING_NONE) {
981 /*
982 * Usually only the first 256MB of the GTT is mappable.
983 *
984 * See also how intel_context::max_gtt_map_object_size is calculated.
985 */
986 const size_t mappable_gtt_size = 256 * 1024 * 1024;
987
988 /*
989 * Be conservative. We may be able to switch from VALIGN_4 to
990 * VALIGN_2 if the layout was Y-tiled, but let's keep it simple.
991 */
992 if (mappable_gtt_size / w / 4 < h) {
993 if (layout->valid_tilings & LAYOUT_TILING_NONE) {
994 layout->tiling = INTEL_TILING_NONE;
995 /* MCS support for non-MSRTs is limited to tiled RTs */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800996 if (layout->aux_type == INTEL_LAYOUT_AUX_MCS &&
997 params->info->samples <= 1)
998 layout->aux_type = INTEL_LAYOUT_AUX_NONE;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800999
1000 continue;
1001 } else {
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001002 /* mapping will fail */
Chia-I Wu4bc47012014-08-14 13:03:25 +08001003 }
1004 }
1005 }
1006
1007 layout->bo_stride = w;
1008 layout->bo_height = h;
1009 break;
1010 }
1011}
1012
1013static void
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001014layout_calculate_hiz_size(struct intel_layout *layout,
1015 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +08001016{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001017 const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu4bc47012014-08-14 13:03:25 +08001018 const int hz_align_j = 8;
1019 int hz_width, hz_height;
1020
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001021 assert(layout->aux_type == INTEL_LAYOUT_AUX_HIZ);
Chia-I Wu4bc47012014-08-14 13:03:25 +08001022
1023 /*
1024 * See the Sandy Bridge PRM, volume 2 part 1, page 312, and the Ivy Bridge
1025 * PRM, volume 2 part 1, page 312-313.
1026 *
1027 * It seems HiZ buffer is aligned to 8x8, with every two rows packed into a
1028 * memory row.
1029 */
1030
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001031 hz_width = u_align(layout->levels[0].slice_width, 16);
Chia-I Wu4bc47012014-08-14 13:03:25 +08001032
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001033 if (info->imageType == XGL_IMAGE_3D) {
Chia-I Wu4bc47012014-08-14 13:03:25 +08001034 unsigned lv;
1035
1036 hz_height = 0;
1037
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001038 for (lv = 0; lv < info->mipLevels; lv++) {
Chia-I Wu4bc47012014-08-14 13:03:25 +08001039 const unsigned h =
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001040 u_align(layout->levels[lv].slice_height, hz_align_j);
1041 hz_height += h * u_minify(info->extent.depth, lv);
Chia-I Wu4bc47012014-08-14 13:03:25 +08001042 }
1043
1044 hz_height /= 2;
1045 } else {
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001046 const unsigned h0 = u_align(params->h0, hz_align_j);
Chia-I Wu4bc47012014-08-14 13:03:25 +08001047 unsigned hz_qpitch = h0;
1048
1049 if (layout->full_layers) {
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001050 const unsigned h1 = u_align(params->h1, hz_align_j);
Chia-I Wu4bc47012014-08-14 13:03:25 +08001051 const unsigned htail =
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001052 ((intel_gpu_gen(params->gpu) >= INTEL_GEN(7)) ? 12 : 11) * hz_align_j;
Chia-I Wu4bc47012014-08-14 13:03:25 +08001053
1054 hz_qpitch += h1 + htail;
1055 }
1056
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001057 hz_height = hz_qpitch * info->arraySize / 2;
Chia-I Wu4bc47012014-08-14 13:03:25 +08001058
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001059 if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7))
1060 hz_height = u_align(hz_height, 8);
Chia-I Wu4bc47012014-08-14 13:03:25 +08001061 }
1062
1063 /* align to Y-tile */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001064 layout->aux_stride = u_align(hz_width, 128);
1065 layout->aux_height = u_align(hz_height, 32);
Chia-I Wu4bc47012014-08-14 13:03:25 +08001066}
1067
1068static void
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001069layout_calculate_mcs_size(struct intel_layout *layout,
1070 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +08001071{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001072 const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu4bc47012014-08-14 13:03:25 +08001073 int mcs_width, mcs_height, mcs_cpp;
1074 int downscale_x, downscale_y;
1075
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001076 assert(layout->aux_type == INTEL_LAYOUT_AUX_MCS);
Chia-I Wu4bc47012014-08-14 13:03:25 +08001077
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001078 if (info->samples > 1) {
Chia-I Wu4bc47012014-08-14 13:03:25 +08001079 /*
1080 * From the Ivy Bridge PRM, volume 2 part 1, page 326, the clear
1081 * rectangle is scaled down by 8x2 for 4X MSAA and 2x2 for 8X MSAA. The
1082 * need of scale down could be that the clear rectangle is used to clear
1083 * the MCS instead of the RT.
1084 *
1085 * For 8X MSAA, we need 32 bits in MCS for every pixel in the RT. The
1086 * 2x2 factor could come from that the hardware writes 128 bits (an
1087 * OWord) at a time, and the OWord in MCS maps to a 2x2 pixel block in
1088 * the RT. For 4X MSAA, we need 8 bits in MCS for every pixel in the
1089 * RT. Similarly, we could reason that an OWord in 4X MCS maps to a 8x2
1090 * pixel block in the RT.
1091 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001092 switch (info->samples) {
Chia-I Wu4bc47012014-08-14 13:03:25 +08001093 case 2:
1094 case 4:
1095 downscale_x = 8;
1096 downscale_y = 2;
1097 mcs_cpp = 1;
1098 break;
1099 case 8:
1100 downscale_x = 2;
1101 downscale_y = 2;
1102 mcs_cpp = 4;
1103 break;
1104 case 16:
1105 downscale_x = 2;
1106 downscale_y = 1;
1107 mcs_cpp = 8;
1108 break;
1109 default:
1110 assert(!"unsupported sample count");
1111 return;
1112 break;
1113 }
1114
1115 /*
1116 * It also appears that the 2x2 subspans generated by the scaled-down
1117 * clear rectangle cannot be masked. The scale-down clear rectangle
1118 * thus must be aligned to 2x2, and we need to pad.
1119 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001120 mcs_width = u_align(info->extent.width, downscale_x * 2);
1121 mcs_height = u_align(info->extent.height, downscale_y * 2);
Chia-I Wu4bc47012014-08-14 13:03:25 +08001122 }
1123 else {
1124 /*
1125 * From the Ivy Bridge PRM, volume 2 part 1, page 327:
1126 *
1127 * " Pixels Lines
1128 * TiledY RT CL
1129 * bpp
1130 * 32 8 4
1131 * 64 4 4
1132 * 128 2 4
1133 *
1134 * TiledX RT CL
1135 * bpp
1136 * 32 16 2
1137 * 64 8 2
1138 * 128 4 2"
1139 *
1140 * This table and the two following tables define the RT alignments, the
1141 * clear rectangle alignments, and the clear rectangle scale factors.
1142 * Viewing the RT alignments as the sizes of 128-byte blocks, we can see
1143 * that the clear rectangle alignments are 16x32 blocks, and the clear
1144 * rectangle scale factors are 8x16 blocks.
1145 *
1146 * For non-MSAA RT, we need 1 bit in MCS for every 128-byte block in the
1147 * RT. Similar to the MSAA cases, we can argue that an OWord maps to
1148 * 8x16 blocks.
1149 *
1150 * One problem with this reasoning is that a Y-tile in MCS has 8x32
1151 * OWords and maps to 64x512 128-byte blocks. This differs from i965,
1152 * which says that a Y-tile maps to 128x256 blocks (\see
1153 * intel_get_non_msrt_mcs_alignment). It does not really change
1154 * anything except for the size of the allocated MCS. Let's see if we
1155 * hit out-of-bound access.
1156 */
1157 switch (layout->tiling) {
1158 case INTEL_TILING_X:
1159 downscale_x = 64 / layout->block_size;
1160 downscale_y = 2;
1161 break;
1162 case INTEL_TILING_Y:
1163 downscale_x = 32 / layout->block_size;
1164 downscale_y = 4;
1165 break;
1166 default:
1167 assert(!"unsupported tiling mode");
1168 return;
1169 break;
1170 }
1171
1172 downscale_x *= 8;
1173 downscale_y *= 16;
1174
1175 /*
1176 * From the Haswell PRM, volume 7, page 652:
1177 *
1178 * "Clear rectangle must be aligned to two times the number of
1179 * pixels in the table shown below due to 16X16 hashing across the
1180 * slice."
1181 *
1182 * The scaled-down clear rectangle must be aligned to 4x4 instead of
1183 * 2x2, and we need to pad.
1184 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001185 mcs_width = u_align(info->extent.width, downscale_x * 4) / downscale_x;
1186 mcs_height = u_align(info->extent.height, downscale_y * 4) / downscale_y;
Chia-I Wu4bc47012014-08-14 13:03:25 +08001187 mcs_cpp = 16; /* an OWord */
1188 }
1189
1190 /* align to Y-tile */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001191 layout->aux_stride = u_align(mcs_width * mcs_cpp, 128);
1192 layout->aux_height = u_align(mcs_height, 32);
Chia-I Wu4bc47012014-08-14 13:03:25 +08001193}
1194
1195/**
1196 * Initialize the layout. Callers should zero-initialize \p layout first.
1197 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001198void intel_layout_init(struct intel_layout *layout,
1199 const struct intel_dev *dev,
1200 const XGL_IMAGE_CREATE_INFO *info)
Chia-I Wu4bc47012014-08-14 13:03:25 +08001201{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001202 struct intel_layout_params params;
Chia-I Wu4bc47012014-08-14 13:03:25 +08001203
1204 memset(&params, 0, sizeof(params));
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001205 params.gpu = dev->gpu;
1206 params.info = info;
Chia-I Wu4bc47012014-08-14 13:03:25 +08001207
1208 /* note that there are dependencies between these functions */
1209 layout_init_aux(layout, &params);
1210 layout_init_format(layout, &params);
1211 layout_init_arrangements(layout, &params);
1212 layout_init_tiling(layout, &params);
1213 layout_init_alignments(layout, &params);
1214 layout_init_levels(layout, &params);
1215 layout_init_layer_height(layout, &params);
1216
1217 layout_align(layout, &params);
1218 layout_calculate_bo_size(layout, &params);
1219
1220 switch (layout->aux_type) {
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001221 case INTEL_LAYOUT_AUX_HIZ:
Chia-I Wu4bc47012014-08-14 13:03:25 +08001222 layout_calculate_hiz_size(layout, &params);
1223 break;
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001224 case INTEL_LAYOUT_AUX_MCS:
Chia-I Wu4bc47012014-08-14 13:03:25 +08001225 layout_calculate_mcs_size(layout, &params);
1226 break;
1227 default:
1228 break;
1229 }
1230}
1231
1232/**
1233 * Update the tiling mode and bo stride (for imported resources).
1234 */
1235bool
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001236intel_layout_update_for_imported_bo(struct intel_layout *layout,
1237 enum intel_tiling_mode tiling,
1238 unsigned bo_stride)
Chia-I Wu4bc47012014-08-14 13:03:25 +08001239{
1240 if (!(layout->valid_tilings & (1 << tiling)))
1241 return false;
1242
1243 if ((tiling == INTEL_TILING_X && bo_stride % 512) ||
1244 (tiling == INTEL_TILING_Y && bo_stride % 128))
1245 return false;
1246
1247 layout->tiling = tiling;
1248 layout->bo_stride = bo_stride;
1249
1250 return true;
1251}