blob: de1f41ebfbdfa06f12f82830192c4bb536da5a30 [file] [log] [blame]
Chia-I Wu4bc47012014-08-14 13:03:25 +08001/*
Chia-I Wu44e42362014-09-02 08:32:09 +08002 * XGL
Chia-I Wu4bc47012014-08-14 13:03:25 +08003 *
4 * Copyright (C) 2014 LunarG, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
Chia-I Wu44e42362014-09-02 08:32:09 +080025 * Chia-I Wu <olv@lunarg.com>
Chia-I Wu4bc47012014-08-14 13:03:25 +080026 */
27
Chia-I Wu8a8d8b62014-08-14 13:26:26 +080028#include "dev.h"
Chia-I Wu1bf06df2014-08-16 12:33:13 +080029#include "format.h"
Chia-I Wu8a8d8b62014-08-14 13:26:26 +080030#include "gpu.h"
31#include "layout.h"
Chia-I Wu4bc47012014-08-14 13:03:25 +080032
33enum {
34 LAYOUT_TILING_NONE = 1 << INTEL_TILING_NONE,
35 LAYOUT_TILING_X = 1 << INTEL_TILING_X,
36 LAYOUT_TILING_Y = 1 << INTEL_TILING_Y,
37 LAYOUT_TILING_W = 1 << (INTEL_TILING_Y + 1),
38
39 LAYOUT_TILING_ALL = (LAYOUT_TILING_NONE |
40 LAYOUT_TILING_X |
41 LAYOUT_TILING_Y |
42 LAYOUT_TILING_W)
43};
44
Chia-I Wu8a8d8b62014-08-14 13:26:26 +080045struct intel_layout_params {
46 const struct intel_gpu *gpu;
47 const XGL_IMAGE_CREATE_INFO *info;
Chia-I Wu794d12a2014-09-15 14:55:25 +080048 bool scanout;
Chia-I Wu4bc47012014-08-14 13:03:25 +080049
50 bool compressed;
51
52 unsigned h0, h1;
53 unsigned max_x, max_y;
54};
55
56static void
Chia-I Wu8a8d8b62014-08-14 13:26:26 +080057layout_get_slice_size(const struct intel_layout *layout,
58 const struct intel_layout_params *params,
Chia-I Wu4bc47012014-08-14 13:03:25 +080059 unsigned level, unsigned *width, unsigned *height)
60{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +080061 const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu4bc47012014-08-14 13:03:25 +080062 unsigned w, h;
63
Chia-I Wu457d0a62014-08-18 13:02:26 +080064 w = u_minify(layout->width0, level);
65 h = u_minify(layout->height0, level);
Chia-I Wu4bc47012014-08-14 13:03:25 +080066
67 /*
68 * From the Sandy Bridge PRM, volume 1 part 1, page 114:
69 *
70 * "The dimensions of the mip maps are first determined by applying the
71 * sizing algorithm presented in Non-Power-of-Two Mipmaps above. Then,
72 * if necessary, they are padded out to compression block boundaries."
73 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +080074 w = u_align(w, layout->block_width);
75 h = u_align(h, layout->block_height);
Chia-I Wu4bc47012014-08-14 13:03:25 +080076
77 /*
78 * From the Sandy Bridge PRM, volume 1 part 1, page 111:
79 *
80 * "If the surface is multisampled (4x), these values must be adjusted
81 * as follows before proceeding:
82 *
83 * W_L = ceiling(W_L / 2) * 4
84 * H_L = ceiling(H_L / 2) * 4"
85 *
86 * From the Ivy Bridge PRM, volume 1 part 1, page 108:
87 *
88 * "If the surface is multisampled and it is a depth or stencil surface
89 * or Multisampled Surface StorageFormat in SURFACE_STATE is
90 * MSFMT_DEPTH_STENCIL, W_L and H_L must be adjusted as follows before
91 * proceeding:
92 *
93 * #samples W_L = H_L =
94 * 2 ceiling(W_L / 2) * 4 HL [no adjustment]
95 * 4 ceiling(W_L / 2) * 4 ceiling(H_L / 2) * 4
96 * 8 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 4
97 * 16 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 8"
98 *
99 * For interleaved samples (4x), where pixels
100 *
101 * (x, y ) (x+1, y )
102 * (x, y+1) (x+1, y+1)
103 *
104 * would be is occupied by
105 *
106 * (x, y , si0) (x+1, y , si0) (x, y , si1) (x+1, y , si1)
107 * (x, y+1, si0) (x+1, y+1, si0) (x, y+1, si1) (x+1, y+1, si1)
108 * (x, y , si2) (x+1, y , si2) (x, y , si3) (x+1, y , si3)
109 * (x, y+1, si2) (x+1, y+1, si2) (x, y+1, si3) (x+1, y+1, si3)
110 *
111 * Thus the need to
112 *
Chia-I Wu457d0a62014-08-18 13:02:26 +0800113 * w = align(w, 2) * 2;
114 * y = align(y, 2) * 2;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800115 */
116 if (layout->interleaved_samples) {
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800117 switch (info->samples) {
Chia-I Wu4bc47012014-08-14 13:03:25 +0800118 case 0:
119 case 1:
120 break;
121 case 2:
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800122 w = u_align(w, 2) * 2;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800123 break;
124 case 4:
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800125 w = u_align(w, 2) * 2;
126 h = u_align(h, 2) * 2;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800127 break;
128 case 8:
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800129 w = u_align(w, 2) * 4;
130 h = u_align(h, 2) * 2;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800131 break;
132 case 16:
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800133 w = u_align(w, 2) * 4;
134 h = u_align(h, 2) * 4;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800135 break;
136 default:
137 assert(!"unsupported sample count");
138 break;
139 }
140 }
141
Chia-I Wu457d0a62014-08-18 13:02:26 +0800142 /*
143 * From the Ivy Bridge PRM, volume 1 part 1, page 108:
144 *
145 * "For separate stencil buffer, the width must be mutiplied by 2 and
146 * height divided by 2..."
147 *
148 * To make things easier (for transfer), we will just double the stencil
149 * stride in 3DSTATE_STENCIL_BUFFER.
150 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800151 w = u_align(w, layout->align_i);
152 h = u_align(h, layout->align_j);
Chia-I Wu4bc47012014-08-14 13:03:25 +0800153
154 *width = w;
155 *height = h;
156}
157
158static unsigned
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800159layout_get_num_layers(const struct intel_layout *layout,
160 const struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800161{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800162 const XGL_IMAGE_CREATE_INFO *info = params->info;
163 unsigned num_layers = info->arraySize;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800164
165 /* samples of the same index are stored in a layer */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800166 if (info->samples > 1 && !layout->interleaved_samples)
167 num_layers *= info->samples;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800168
169 return num_layers;
170}
171
172static void
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800173layout_init_layer_height(struct intel_layout *layout,
174 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800175{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800176 const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800177 unsigned num_layers;
178
Chia-I Wu457d0a62014-08-18 13:02:26 +0800179 if (layout->walk != INTEL_LAYOUT_WALK_LAYER)
180 return;
181
Chia-I Wu4bc47012014-08-14 13:03:25 +0800182 num_layers = layout_get_num_layers(layout, params);
183 if (num_layers <= 1)
184 return;
185
Chia-I Wu4bc47012014-08-14 13:03:25 +0800186 /*
187 * From the Sandy Bridge PRM, volume 1 part 1, page 115:
188 *
189 * "The following equation is used for surface formats other than
190 * compressed textures:
191 *
192 * QPitch = (h0 + h1 + 11j)"
193 *
194 * "The equation for compressed textures (BC* and FXT1 surface formats)
195 * follows:
196 *
197 * QPitch = (h0 + h1 + 11j) / 4"
198 *
199 * "[DevSNB] Errata: Sampler MSAA Qpitch will be 4 greater than the
200 * value calculated in the equation above, for every other odd Surface
201 * Height starting from 1 i.e. 1,5,9,13"
202 *
203 * From the Ivy Bridge PRM, volume 1 part 1, page 111-112:
204 *
205 * "If Surface Array Spacing is set to ARYSPC_FULL (note that the depth
206 * buffer and stencil buffer have an implied value of ARYSPC_FULL):
207 *
208 * QPitch = (h0 + h1 + 12j)
209 * QPitch = (h0 + h1 + 12j) / 4 (compressed)
210 *
211 * (There are many typos or missing words here...)"
212 *
213 * To access the N-th slice, an offset of (Stride * QPitch * N) is added to
214 * the base address. The PRM divides QPitch by 4 for compressed formats
215 * because the block height for those formats are 4, and it wants QPitch to
216 * mean the number of memory rows, as opposed to texel rows, between
217 * slices. Since we use texel rows everywhere, we do not need to divide
218 * QPitch by 4.
219 */
220 layout->layer_height = params->h0 + params->h1 +
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800221 ((intel_gpu_gen(params->gpu) >= INTEL_GEN(7)) ? 12 : 11) * layout->align_j;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800222
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800223 if (intel_gpu_gen(params->gpu) == INTEL_GEN(6) && info->samples > 1 &&
Chia-I Wu457d0a62014-08-18 13:02:26 +0800224 layout->height0 % 4 == 1)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800225 layout->layer_height += 4;
226
227 params->max_y += layout->layer_height * (num_layers - 1);
228}
229
230static void
Chia-I Wu457d0a62014-08-18 13:02:26 +0800231layout_init_lods(struct intel_layout *layout,
232 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800233{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800234 const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800235 unsigned cur_x, cur_y;
236 unsigned lv;
237
238 cur_x = 0;
239 cur_y = 0;
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800240 for (lv = 0; lv < info->mipLevels; lv++) {
Chia-I Wu457d0a62014-08-18 13:02:26 +0800241 unsigned lod_w, lod_h;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800242
Chia-I Wu457d0a62014-08-18 13:02:26 +0800243 layout_get_slice_size(layout, params, lv, &lod_w, &lod_h);
Chia-I Wu4bc47012014-08-14 13:03:25 +0800244
Chia-I Wu457d0a62014-08-18 13:02:26 +0800245 layout->lods[lv].x = cur_x;
246 layout->lods[lv].y = cur_y;
247 layout->lods[lv].slice_width = lod_w;
248 layout->lods[lv].slice_height = lod_h;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800249
Chia-I Wu457d0a62014-08-18 13:02:26 +0800250 switch (layout->walk) {
251 case INTEL_LAYOUT_WALK_LOD:
252 lod_h *= layout_get_num_layers(layout, params);
253 if (lv == 1)
254 cur_x += lod_w;
255 else
256 cur_y += lod_h;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800257
Chia-I Wu457d0a62014-08-18 13:02:26 +0800258 /* every LOD begins at tile boundaries */
259 if (info->mipLevels > 1) {
260 intel_format_is_stencil(params->gpu, layout->format);
261 cur_x = u_align(cur_x, 64);
262 cur_y = u_align(cur_y, 64);
263 }
264 break;
265 case INTEL_LAYOUT_WALK_LAYER:
Chia-I Wu4bc47012014-08-14 13:03:25 +0800266 /* MIPLAYOUT_BELOW */
267 if (lv == 1)
Chia-I Wu457d0a62014-08-18 13:02:26 +0800268 cur_x += lod_w;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800269 else
Chia-I Wu457d0a62014-08-18 13:02:26 +0800270 cur_y += lod_h;
271 break;
272 case INTEL_LAYOUT_WALK_3D:
273 {
274 const unsigned num_slices = u_minify(info->extent.depth, lv);
275 const unsigned num_slices_per_row = 1 << lv;
276 const unsigned num_rows =
277 (num_slices + num_slices_per_row - 1) / num_slices_per_row;
278
279 lod_w *= num_slices_per_row;
280 lod_h *= num_rows;
281
282 cur_y += lod_h;
283 }
284 break;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800285 }
286
Chia-I Wu457d0a62014-08-18 13:02:26 +0800287 if (params->max_x < layout->lods[lv].x + lod_w)
288 params->max_x = layout->lods[lv].x + lod_w;
289 if (params->max_y < layout->lods[lv].y + lod_h)
290 params->max_y = layout->lods[lv].y + lod_h;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800291 }
292
Chia-I Wu457d0a62014-08-18 13:02:26 +0800293 if (layout->walk == INTEL_LAYOUT_WALK_LAYER) {
294 params->h0 = layout->lods[0].slice_height;
295
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800296 if (info->mipLevels > 1)
Chia-I Wu457d0a62014-08-18 13:02:26 +0800297 params->h1 = layout->lods[1].slice_height;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800298 else
299 layout_get_slice_size(layout, params, 1, &cur_x, &params->h1);
300 }
301}
302
303static void
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800304layout_init_alignments(struct intel_layout *layout,
305 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800306{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800307 const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800308
309 /*
310 * From the Sandy Bridge PRM, volume 1 part 1, page 113:
311 *
312 * "surface format align_i align_j
313 * YUV 4:2:2 formats 4 *see below
314 * BC1-5 4 4
315 * FXT1 8 4
316 * all other formats 4 *see below"
317 *
318 * "- align_j = 4 for any depth buffer
319 * - align_j = 2 for separate stencil buffer
320 * - align_j = 4 for any render target surface is multisampled (4x)
321 * - align_j = 4 for any render target surface with Surface Vertical
322 * Alignment = VALIGN_4
323 * - align_j = 2 for any render target surface with Surface Vertical
324 * Alignment = VALIGN_2
325 * - align_j = 2 for all other render target surface
326 * - align_j = 2 for any sampling engine surface with Surface Vertical
327 * Alignment = VALIGN_2
328 * - align_j = 4 for any sampling engine surface with Surface Vertical
329 * Alignment = VALIGN_4"
330 *
331 * From the Sandy Bridge PRM, volume 4 part 1, page 86:
332 *
333 * "This field (Surface Vertical Alignment) must be set to VALIGN_2 if
334 * the Surface Format is 96 bits per element (BPE)."
335 *
336 * They can be rephrased as
337 *
338 * align_i align_j
339 * compressed formats block width block height
340 * PIPE_FORMAT_S8_UINT 4 2
341 * other depth/stencil formats 4 4
342 * 4x multisampled 4 4
343 * bpp 96 4 2
344 * others 4 2 or 4
345 */
346
347 /*
348 * From the Ivy Bridge PRM, volume 1 part 1, page 110:
349 *
350 * "surface defined by surface format align_i align_j
351 * 3DSTATE_DEPTH_BUFFER D16_UNORM 8 4
352 * not D16_UNORM 4 4
353 * 3DSTATE_STENCIL_BUFFER N/A 8 8
354 * SURFACE_STATE BC*, ETC*, EAC* 4 4
355 * FXT1 8 4
356 * all others (set by SURFACE_STATE)"
357 *
358 * From the Ivy Bridge PRM, volume 4 part 1, page 63:
359 *
360 * "- This field (Surface Vertical Aligment) is intended to be set to
361 * VALIGN_4 if the surface was rendered as a depth buffer, for a
362 * multisampled (4x) render target, or for a multisampled (8x)
363 * render target, since these surfaces support only alignment of 4.
364 * - Use of VALIGN_4 for other surfaces is supported, but uses more
365 * memory.
366 * - This field must be set to VALIGN_4 for all tiled Y Render Target
367 * surfaces.
368 * - Value of 1 is not supported for format YCRCB_NORMAL (0x182),
369 * YCRCB_SWAPUVY (0x183), YCRCB_SWAPUV (0x18f), YCRCB_SWAPY (0x190)
370 * - If Number of Multisamples is not MULTISAMPLECOUNT_1, this field
371 * must be set to VALIGN_4."
372 * - VALIGN_4 is not supported for surface format R32G32B32_FLOAT."
373 *
374 * "- This field (Surface Horizontal Aligment) is intended to be set to
375 * HALIGN_8 only if the surface was rendered as a depth buffer with
376 * Z16 format or a stencil buffer, since these surfaces support only
377 * alignment of 8.
378 * - Use of HALIGN_8 for other surfaces is supported, but uses more
379 * memory.
380 * - This field must be set to HALIGN_4 if the Surface Format is BC*.
381 * - This field must be set to HALIGN_8 if the Surface Format is
382 * FXT1."
383 *
384 * They can be rephrased as
385 *
386 * align_i align_j
387 * compressed formats block width block height
388 * PIPE_FORMAT_Z16_UNORM 8 4
389 * PIPE_FORMAT_S8_UINT 8 8
Chia-I Wu457d0a62014-08-18 13:02:26 +0800390 * other depth/stencil formats 4 4
Chia-I Wu4bc47012014-08-14 13:03:25 +0800391 * 2x or 4x multisampled 4 or 8 4
392 * tiled Y 4 or 8 4 (if rt)
393 * PIPE_FORMAT_R32G32B32_FLOAT 4 or 8 2
394 * others 4 or 8 2 or 4
395 */
396
397 if (params->compressed) {
398 /* this happens to be the case */
399 layout->align_i = layout->block_width;
400 layout->align_j = layout->block_height;
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800401 } else if (info->usage & XGL_IMAGE_USAGE_DEPTH_STENCIL_BIT) {
402 if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7)) {
403 switch (layout->format.channelFormat) {
404 case XGL_CH_FMT_R16:
Chia-I Wu4bc47012014-08-14 13:03:25 +0800405 layout->align_i = 8;
406 layout->align_j = 4;
407 break;
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800408 case XGL_CH_FMT_R8:
Chia-I Wu4bc47012014-08-14 13:03:25 +0800409 layout->align_i = 8;
410 layout->align_j = 8;
411 break;
412 default:
413 layout->align_i = 4;
414 layout->align_j = 4;
415 break;
416 }
417 } else {
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800418 switch (layout->format.channelFormat) {
419 case XGL_CH_FMT_R8:
Chia-I Wu4bc47012014-08-14 13:03:25 +0800420 layout->align_i = 4;
421 layout->align_j = 2;
422 break;
423 default:
424 layout->align_i = 4;
425 layout->align_j = 4;
426 break;
427 }
428 }
429 } else {
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800430 const bool valign_4 = (info->samples > 1) ||
431 (intel_gpu_gen(params->gpu) >= INTEL_GEN(7) &&
Chia-I Wu4bc47012014-08-14 13:03:25 +0800432 layout->tiling == INTEL_TILING_Y &&
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800433 (info->usage & XGL_IMAGE_USAGE_COLOR_ATTACHMENT_BIT));
Chia-I Wu4bc47012014-08-14 13:03:25 +0800434
435 if (valign_4)
436 assert(layout->block_size != 12);
437
438 layout->align_i = 4;
439 layout->align_j = (valign_4) ? 4 : 2;
440 }
441
442 /*
443 * the fact that align i and j are multiples of block width and height
444 * respectively is what makes the size of the bo a multiple of the block
445 * size, slices start at block boundaries, and many of the computations
446 * work.
447 */
448 assert(layout->align_i % layout->block_width == 0);
449 assert(layout->align_j % layout->block_height == 0);
450
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800451 /* make sure u_align() works */
452 assert(u_is_pow2(layout->align_i) &&
453 u_is_pow2(layout->align_j));
454 assert(u_is_pow2(layout->block_width) &&
455 u_is_pow2(layout->block_height));
Chia-I Wu4bc47012014-08-14 13:03:25 +0800456}
457
458static unsigned
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800459layout_get_valid_tilings(const struct intel_layout *layout,
460 const struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800461{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800462 const XGL_IMAGE_CREATE_INFO *info = params->info;
463 const XGL_FORMAT format = layout->format;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800464 unsigned valid_tilings = LAYOUT_TILING_ALL;
465
Chia-I Wu794d12a2014-09-15 14:55:25 +0800466 /*
467 * From the Sandy Bridge PRM, volume 1 part 2, page 32:
468 *
469 * "Display/Overlay Y-Major not supported.
470 * X-Major required for Async Flips"
471 */
472 if (params->scanout)
473 valid_tilings &= LAYOUT_TILING_X;
474
Chia-I Wu6ac93992014-08-30 18:23:28 +0800475 if (info->tiling == XGL_LINEAR_TILING)
476 valid_tilings &= LAYOUT_TILING_NONE;
477
Chia-I Wu4bc47012014-08-14 13:03:25 +0800478 /*
Chia-I Wu4bc47012014-08-14 13:03:25 +0800479 * From the Sandy Bridge PRM, volume 2 part 1, page 318:
480 *
481 * "[DevSNB+]: This field (Tiled Surface) must be set to TRUE. Linear
482 * Depth Buffer is not supported."
483 *
484 * "The Depth Buffer, if tiled, must use Y-Major tiling."
485 *
486 * From the Sandy Bridge PRM, volume 1 part 2, page 22:
487 *
488 * "W-Major Tile Format is used for separate stencil."
489 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800490 if (info->usage & XGL_IMAGE_USAGE_DEPTH_STENCIL_BIT) {
491 switch (format.channelFormat) {
492 case XGL_CH_FMT_R8:
Chia-I Wu4bc47012014-08-14 13:03:25 +0800493 valid_tilings &= LAYOUT_TILING_W;
494 break;
495 default:
496 valid_tilings &= LAYOUT_TILING_Y;
497 break;
498 }
499 }
500
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800501 if (info->usage & XGL_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) {
Chia-I Wu4bc47012014-08-14 13:03:25 +0800502 /*
503 * From the Sandy Bridge PRM, volume 1 part 2, page 32:
504 *
505 * "NOTE: 128BPE Format Color buffer ( render target ) MUST be
506 * either TileX or Linear."
507 */
508 if (layout->block_size == 16)
509 valid_tilings &= ~LAYOUT_TILING_Y;
510
511 /*
512 * From the Ivy Bridge PRM, volume 4 part 1, page 63:
513 *
514 * "This field (Surface Vertical Aligment) must be set to VALIGN_4
515 * for all tiled Y Render Target surfaces."
516 *
517 * "VALIGN_4 is not supported for surface format R32G32B32_FLOAT."
518 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800519 if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7) && layout->block_size == 12)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800520 valid_tilings &= ~LAYOUT_TILING_Y;
521 }
522
523 /* no conflicting binding flags */
524 assert(valid_tilings);
525
526 return valid_tilings;
527}
528
529static void
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800530layout_init_tiling(struct intel_layout *layout,
531 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800532{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800533 const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800534 unsigned valid_tilings = layout_get_valid_tilings(layout, params);
535
Chia-I Wu457d0a62014-08-18 13:02:26 +0800536 /* no hardware support for W-tile */
537 if (valid_tilings & LAYOUT_TILING_W)
538 valid_tilings = (valid_tilings & ~LAYOUT_TILING_W) | LAYOUT_TILING_NONE;
539
Chia-I Wu4bc47012014-08-14 13:03:25 +0800540 layout->valid_tilings = valid_tilings;
541
Chia-I Wu457d0a62014-08-18 13:02:26 +0800542 if (info->usage & (XGL_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |
543 XGL_IMAGE_USAGE_SHADER_ACCESS_READ_BIT)) {
Chia-I Wu4bc47012014-08-14 13:03:25 +0800544 /*
545 * heuristically set a minimum width/height for enabling tiling
546 */
Chia-I Wu457d0a62014-08-18 13:02:26 +0800547 if (layout->width0 < 64 && (valid_tilings & ~LAYOUT_TILING_X))
Chia-I Wu4bc47012014-08-14 13:03:25 +0800548 valid_tilings &= ~LAYOUT_TILING_X;
549
Chia-I Wu457d0a62014-08-18 13:02:26 +0800550 if ((layout->width0 < 32 || layout->height0 < 16) &&
551 (layout->width0 < 16 || layout->height0 < 32) &&
Chia-I Wu4bc47012014-08-14 13:03:25 +0800552 (valid_tilings & ~LAYOUT_TILING_Y))
553 valid_tilings &= ~LAYOUT_TILING_Y;
554 } else {
555 /* force linear if we are not sure where the texture is bound to */
556 if (valid_tilings & LAYOUT_TILING_NONE)
557 valid_tilings &= LAYOUT_TILING_NONE;
558 }
559
560 /* prefer tiled over linear */
561 if (valid_tilings & LAYOUT_TILING_Y)
562 layout->tiling = INTEL_TILING_Y;
563 else if (valid_tilings & LAYOUT_TILING_X)
564 layout->tiling = INTEL_TILING_X;
Chia-I Wu457d0a62014-08-18 13:02:26 +0800565 else
Chia-I Wu4bc47012014-08-14 13:03:25 +0800566 layout->tiling = INTEL_TILING_NONE;
567}
568
569static void
Chia-I Wu457d0a62014-08-18 13:02:26 +0800570layout_init_walk_gen7(struct intel_layout *layout,
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800571 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800572{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800573 const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800574
575 /*
576 * It is not explicitly states, but render targets are expected to be
577 * UMS/CMS (samples non-interleaved) and depth/stencil buffers are expected
578 * to be IMS (samples interleaved).
579 *
580 * See "Multisampled Surface Storage Format" field of SURFACE_STATE.
581 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800582 if (info->usage & XGL_IMAGE_USAGE_DEPTH_STENCIL_BIT) {
Chia-I Wu4bc47012014-08-14 13:03:25 +0800583 /*
584 * From the Ivy Bridge PRM, volume 1 part 1, page 111:
585 *
586 * "note that the depth buffer and stencil buffer have an implied
587 * value of ARYSPC_FULL"
588 */
Chia-I Wu457d0a62014-08-18 13:02:26 +0800589 layout->walk = (info->imageType == XGL_IMAGE_3D) ?
590 INTEL_LAYOUT_WALK_3D : INTEL_LAYOUT_WALK_LAYER;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800591
Chia-I Wu457d0a62014-08-18 13:02:26 +0800592 layout->interleaved_samples = true;
593 } else {
Chia-I Wu4bc47012014-08-14 13:03:25 +0800594 /*
595 * From the Ivy Bridge PRM, volume 4 part 1, page 66:
596 *
597 * "If Multisampled Surface Storage Format is MSFMT_MSS and Number
598 * of Multisamples is not MULTISAMPLECOUNT_1, this field (Surface
599 * Array Spacing) must be set to ARYSPC_LOD0."
600 *
601 * As multisampled resources are not mipmapped, we never use
602 * ARYSPC_FULL for them.
603 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800604 if (info->samples > 1)
605 assert(info->mipLevels == 1);
Chia-I Wu457d0a62014-08-18 13:02:26 +0800606
607 layout->walk =
608 (info->imageType == XGL_IMAGE_3D) ? INTEL_LAYOUT_WALK_3D :
609 (info->mipLevels > 1) ? INTEL_LAYOUT_WALK_LAYER :
610 INTEL_LAYOUT_WALK_LOD;
611
612 layout->interleaved_samples = false;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800613 }
614}
615
616static void
Chia-I Wu457d0a62014-08-18 13:02:26 +0800617layout_init_walk_gen6(struct intel_layout *layout,
618 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800619{
Chia-I Wu4bc47012014-08-14 13:03:25 +0800620 /*
621 * From the Sandy Bridge PRM, volume 1 part 1, page 115:
622 *
623 * "The separate stencil buffer does not support mip mapping, thus the
624 * storage for LODs other than LOD 0 is not needed. The following
625 * QPitch equation applies only to the separate stencil buffer:
626 *
627 * QPitch = h_0"
628 *
629 * GEN6 does not support compact spacing otherwise.
630 */
Chia-I Wu457d0a62014-08-18 13:02:26 +0800631 layout->walk =
632 (params->info->imageType == XGL_IMAGE_3D) ? INTEL_LAYOUT_WALK_3D :
633 intel_format_is_stencil(params->gpu, layout->format) ? INTEL_LAYOUT_WALK_LOD :
634 INTEL_LAYOUT_WALK_LAYER;
635
636 /* GEN6 supports only interleaved samples */
637 layout->interleaved_samples = true;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800638}
639
640static void
Chia-I Wu457d0a62014-08-18 13:02:26 +0800641layout_init_walk(struct intel_layout *layout,
642 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800643{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800644 if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7))
Chia-I Wu457d0a62014-08-18 13:02:26 +0800645 layout_init_walk_gen7(layout, params);
Chia-I Wu4bc47012014-08-14 13:03:25 +0800646 else
Chia-I Wu457d0a62014-08-18 13:02:26 +0800647 layout_init_walk_gen6(layout, params);
Chia-I Wu4bc47012014-08-14 13:03:25 +0800648}
649
650static void
Chia-I Wu457d0a62014-08-18 13:02:26 +0800651layout_init_size_and_format(struct intel_layout *layout,
652 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800653{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800654 const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu457d0a62014-08-18 13:02:26 +0800655 XGL_FORMAT format = info->format;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800656 bool require_separate_stencil;
657
Chia-I Wu457d0a62014-08-18 13:02:26 +0800658 layout->width0 = info->extent.width;
659 layout->height0 = info->extent.height;
660
Chia-I Wu4bc47012014-08-14 13:03:25 +0800661 /*
662 * From the Sandy Bridge PRM, volume 2 part 1, page 317:
663 *
664 * "This field (Separate Stencil Buffer Enable) must be set to the same
665 * value (enabled or disabled) as Hierarchical Depth Buffer Enable."
666 *
667 * GEN7+ requires separate stencil buffers.
668 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800669 if (info->usage & XGL_IMAGE_USAGE_DEPTH_STENCIL_BIT) {
670 if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7))
Chia-I Wu4bc47012014-08-14 13:03:25 +0800671 require_separate_stencil = true;
672 else
Chia-I Wu457d0a62014-08-18 13:02:26 +0800673 require_separate_stencil = (layout->aux == INTEL_LAYOUT_AUX_HIZ);
Chia-I Wu4bc47012014-08-14 13:03:25 +0800674 }
675
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800676 if (format.numericFormat == XGL_NUM_FMT_DS) {
677 switch (format.channelFormat) {
678 case XGL_CH_FMT_R32G8:
679 if (require_separate_stencil) {
680 format.channelFormat = XGL_CH_FMT_R32;
681 layout->separate_stencil = true;
682 }
683 break;
684 default:
685 break;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800686 }
Chia-I Wu4bc47012014-08-14 13:03:25 +0800687 }
688
Chia-I Wu4bc47012014-08-14 13:03:25 +0800689 layout->format = format;
Chia-I Wu1bf06df2014-08-16 12:33:13 +0800690 layout->block_width = icd_format_get_block_width(format);
691 layout->block_height = layout->block_width;
692 layout->block_size = icd_format_get_size(format);
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800693
Chia-I Wu1bf06df2014-08-16 12:33:13 +0800694 params->compressed = icd_format_is_compressed(format);
Chia-I Wu4bc47012014-08-14 13:03:25 +0800695}
696
697static bool
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800698layout_want_mcs(struct intel_layout *layout,
699 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800700{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800701 const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800702 bool want_mcs = false;
703
704 /* MCS is for RT on GEN7+ */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800705 if (intel_gpu_gen(params->gpu) < INTEL_GEN(7))
Chia-I Wu4bc47012014-08-14 13:03:25 +0800706 return false;
707
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800708 if (info->imageType != XGL_IMAGE_2D ||
709 !(info->usage & XGL_IMAGE_USAGE_COLOR_ATTACHMENT_BIT))
Chia-I Wu4bc47012014-08-14 13:03:25 +0800710 return false;
711
712 /*
713 * From the Ivy Bridge PRM, volume 4 part 1, page 77:
714 *
715 * "For Render Target and Sampling Engine Surfaces:If the surface is
716 * multisampled (Number of Multisamples any value other than
717 * MULTISAMPLECOUNT_1), this field (MCS Enable) must be enabled."
718 *
719 * "This field must be set to 0 for all SINT MSRTs when all RT channels
720 * are not written"
721 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800722 if (info->samples > 1 && !layout->interleaved_samples &&
Chia-I Wu457d0a62014-08-18 13:02:26 +0800723 !icd_format_is_int(info->format)) {
Chia-I Wu4bc47012014-08-14 13:03:25 +0800724 want_mcs = true;
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800725 } else if (info->samples <= 1) {
Chia-I Wu4bc47012014-08-14 13:03:25 +0800726 /*
727 * From the Ivy Bridge PRM, volume 2 part 1, page 326:
728 *
729 * "When MCS is buffer is used for color clear of non-multisampler
730 * render target, the following restrictions apply.
731 * - Support is limited to tiled render targets.
732 * - Support is for non-mip-mapped and non-array surface types
733 * only.
734 * - Clear is supported only on the full RT; i.e., no partial clear
735 * or overlapping clears.
736 * - MCS buffer for non-MSRT is supported only for RT formats
737 * 32bpp, 64bpp and 128bpp.
738 * ..."
739 */
740 if (layout->tiling != INTEL_TILING_NONE &&
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800741 info->mipLevels == 1 && info->arraySize == 1) {
Chia-I Wu4bc47012014-08-14 13:03:25 +0800742 switch (layout->block_size) {
743 case 4:
744 case 8:
745 case 16:
746 want_mcs = true;
747 break;
748 default:
749 break;
750 }
751 }
752 }
753
754 return want_mcs;
755}
756
757static bool
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800758layout_want_hiz(const struct intel_layout *layout,
759 const struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800760{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800761 const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800762
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800763 if (!(info->usage & XGL_IMAGE_USAGE_DEPTH_STENCIL_BIT))
Chia-I Wu4bc47012014-08-14 13:03:25 +0800764 return false;
765
Chia-I Wufb240262014-08-16 13:26:06 +0800766 if (!intel_format_is_depth(params->gpu, info->format))
Chia-I Wu4bc47012014-08-14 13:03:25 +0800767 return false;
768
Chia-I Wu457d0a62014-08-18 13:02:26 +0800769 /*
770 * As can be seen in layout_calculate_hiz_size(), HiZ may not be enabled
771 * for every level. This is generally fine except on GEN6, where HiZ and
772 * separate stencil are enabled and disabled at the same time. When the
773 * format is PIPE_FORMAT_Z32_FLOAT_S8X24_UINT, enabling and disabling HiZ
774 * can result in incompatible formats.
775 */
776 if (intel_gpu_gen(params->gpu) == INTEL_GEN(6) &&
777 info->format.channelFormat == XGL_CH_FMT_R32G8 &&
778 info->mipLevels > 1)
779 return false;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800780
Chia-I Wu457d0a62014-08-18 13:02:26 +0800781 return true;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800782}
783
784static void
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800785layout_init_aux(struct intel_layout *layout,
786 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800787{
788 if (layout_want_hiz(layout, params))
Chia-I Wu457d0a62014-08-18 13:02:26 +0800789 layout->aux = INTEL_LAYOUT_AUX_HIZ;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800790 else if (layout_want_mcs(layout, params))
Chia-I Wu457d0a62014-08-18 13:02:26 +0800791 layout->aux = INTEL_LAYOUT_AUX_MCS;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800792}
793
794static void
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800795layout_align(struct intel_layout *layout, struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800796{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800797 const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800798 int align_w = 1, align_h = 1, pad_h = 0;
799
800 /*
801 * From the Sandy Bridge PRM, volume 1 part 1, page 118:
802 *
803 * "To determine the necessary padding on the bottom and right side of
804 * the surface, refer to the table in Section 7.18.3.4 for the i and j
805 * parameters for the surface format in use. The surface must then be
806 * extended to the next multiple of the alignment unit size in each
807 * dimension, and all texels contained in this extended surface must
808 * have valid GTT entries."
809 *
810 * "For cube surfaces, an additional two rows of padding are required
811 * at the bottom of the surface. This must be ensured regardless of
812 * whether the surface is stored tiled or linear. This is due to the
813 * potential rotation of cache line orientation from memory to cache."
814 *
815 * "For compressed textures (BC* and FXT1 surface formats), padding at
816 * the bottom of the surface is to an even compressed row, which is
817 * equal to a multiple of 8 uncompressed texel rows. Thus, for padding
818 * purposes, these surfaces behave as if j = 8 only for surface
819 * padding purposes. The value of 4 for j still applies for mip level
820 * alignment and QPitch calculation."
821 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800822 if (info->usage & XGL_IMAGE_USAGE_SHADER_ACCESS_READ_BIT) {
823 if (align_w < layout->align_i)
824 align_w = layout->align_i;
825 if (align_h < layout->align_j)
826 align_h = layout->align_j;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800827
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800828 /* in case it is used as a cube */
829 if (info->imageType == XGL_IMAGE_2D)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800830 pad_h += 2;
831
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800832 if (params->compressed && align_h < layout->align_j * 2)
833 align_h = layout->align_j * 2;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800834 }
835
836 /*
837 * From the Sandy Bridge PRM, volume 1 part 1, page 118:
838 *
839 * "If the surface contains an odd number of rows of data, a final row
840 * below the surface must be allocated."
841 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800842 if ((info->usage & XGL_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) && align_h < 2)
843 align_h = 2;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800844
845 /*
846 * Depth Buffer Clear/Resolve works in 8x4 sample blocks. In
Chia-I Wu457d0a62014-08-18 13:02:26 +0800847 * intel_texture_can_enable_hiz(), we always return true for the first slice.
Chia-I Wu4bc47012014-08-14 13:03:25 +0800848 * To avoid out-of-bound access, we have to pad.
849 */
Chia-I Wu457d0a62014-08-18 13:02:26 +0800850 if (layout->aux == INTEL_LAYOUT_AUX_HIZ &&
851 info->mipLevels == 1 &&
852 info->arraySize == 1 &&
853 info->extent.depth == 1) {
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800854 if (align_w < 8)
855 align_w = 8;
856 if (align_h < 4)
857 align_h = 4;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800858 }
859
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800860 params->max_x = u_align(params->max_x, align_w);
861 params->max_y = u_align(params->max_y + pad_h, align_h);
Chia-I Wu4bc47012014-08-14 13:03:25 +0800862}
863
864/* note that this may force the texture to be linear */
865static void
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800866layout_calculate_bo_size(struct intel_layout *layout,
867 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800868{
869 assert(params->max_x % layout->block_width == 0);
870 assert(params->max_y % layout->block_height == 0);
871 assert(layout->layer_height % layout->block_height == 0);
872
873 layout->bo_stride =
874 (params->max_x / layout->block_width) * layout->block_size;
875 layout->bo_height = params->max_y / layout->block_height;
876
877 while (true) {
878 unsigned w = layout->bo_stride, h = layout->bo_height;
879 unsigned align_w, align_h;
880
881 /*
882 * From the Haswell PRM, volume 5, page 163:
883 *
884 * "For linear surfaces, additional padding of 64 bytes is required
885 * at the bottom of the surface. This is in addition to the padding
886 * required above."
887 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800888 if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7.5) &&
889 (params->info->usage & XGL_IMAGE_USAGE_SHADER_ACCESS_READ_BIT) &&
Chia-I Wu4bc47012014-08-14 13:03:25 +0800890 layout->tiling == INTEL_TILING_NONE) {
891 layout->bo_height +=
892 (64 + layout->bo_stride - 1) / layout->bo_stride;
893 }
894
895 /*
896 * From the Sandy Bridge PRM, volume 4 part 1, page 81:
897 *
898 * "- For linear render target surfaces, the pitch must be a
899 * multiple of the element size for non-YUV surface formats.
900 * Pitch must be a multiple of 2 * element size for YUV surface
901 * formats.
902 * - For other linear surfaces, the pitch can be any multiple of
903 * bytes.
904 * - For tiled surfaces, the pitch must be a multiple of the tile
905 * width."
906 *
907 * Different requirements may exist when the bo is used in different
908 * places, but our alignments here should be good enough that we do not
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800909 * need to check layout->info->usage.
Chia-I Wu4bc47012014-08-14 13:03:25 +0800910 */
911 switch (layout->tiling) {
912 case INTEL_TILING_X:
913 align_w = 512;
914 align_h = 8;
915 break;
916 case INTEL_TILING_Y:
917 align_w = 128;
918 align_h = 32;
919 break;
920 default:
Chia-I Wufb240262014-08-16 13:26:06 +0800921 if (intel_format_is_stencil(params->gpu, layout->format)) {
Chia-I Wu4bc47012014-08-14 13:03:25 +0800922 /*
923 * From the Sandy Bridge PRM, volume 1 part 2, page 22:
924 *
925 * "A 4KB tile is subdivided into 8-high by 8-wide array of
926 * Blocks for W-Major Tiles (W Tiles). Each Block is 8 rows by 8
927 * bytes."
928 *
929 * Since we asked for INTEL_TILING_NONE instead of the non-existent
930 * INTEL_TILING_W, we want to align to W tiles here.
931 */
932 align_w = 64;
933 align_h = 64;
934 } else {
935 /* some good enough values */
936 align_w = 64;
937 align_h = 2;
938 }
939 break;
940 }
941
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800942 w = u_align(w, align_w);
943 h = u_align(h, align_h);
Chia-I Wu4bc47012014-08-14 13:03:25 +0800944
945 /* make sure the bo is mappable */
946 if (layout->tiling != INTEL_TILING_NONE) {
947 /*
948 * Usually only the first 256MB of the GTT is mappable.
949 *
950 * See also how intel_context::max_gtt_map_object_size is calculated.
951 */
952 const size_t mappable_gtt_size = 256 * 1024 * 1024;
953
954 /*
955 * Be conservative. We may be able to switch from VALIGN_4 to
956 * VALIGN_2 if the layout was Y-tiled, but let's keep it simple.
957 */
958 if (mappable_gtt_size / w / 4 < h) {
959 if (layout->valid_tilings & LAYOUT_TILING_NONE) {
960 layout->tiling = INTEL_TILING_NONE;
961 /* MCS support for non-MSRTs is limited to tiled RTs */
Chia-I Wu457d0a62014-08-18 13:02:26 +0800962 if (layout->aux == INTEL_LAYOUT_AUX_MCS &&
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800963 params->info->samples <= 1)
Chia-I Wu457d0a62014-08-18 13:02:26 +0800964 layout->aux = INTEL_LAYOUT_AUX_NONE;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800965
966 continue;
967 } else {
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800968 /* mapping will fail */
Chia-I Wu4bc47012014-08-14 13:03:25 +0800969 }
970 }
971 }
972
973 layout->bo_stride = w;
974 layout->bo_height = h;
975 break;
976 }
977}
978
979static void
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800980layout_calculate_hiz_size(struct intel_layout *layout,
981 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800982{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800983 const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu457d0a62014-08-18 13:02:26 +0800984 const unsigned hz_align_j = 8;
985 enum intel_layout_walk_type hz_walk;
986 unsigned hz_width, hz_height, lv;
987 unsigned hz_clear_w, hz_clear_h;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800988
Chia-I Wu457d0a62014-08-18 13:02:26 +0800989 assert(layout->aux == INTEL_LAYOUT_AUX_HIZ);
990
991 assert(layout->walk == INTEL_LAYOUT_WALK_LAYER ||
992 layout->walk == INTEL_LAYOUT_WALK_3D);
993
994 /*
995 * From the Sandy Bridge PRM, volume 2 part 1, page 312:
996 *
997 * "The hierarchical depth buffer does not support the LOD field, it is
998 * assumed by hardware to be zero. A separate hierarachical depth
999 * buffer is required for each LOD used, and the corresponding
1000 * buffer's state delivered to hardware each time a new depth buffer
1001 * state with modified LOD is delivered."
1002 *
1003 * We will put all LODs in a single bo with INTEL_LAYOUT_WALK_LOD.
1004 */
1005 if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7))
1006 hz_walk = layout->walk;
1007 else
1008 hz_walk = INTEL_LAYOUT_WALK_LOD;
Chia-I Wu4bc47012014-08-14 13:03:25 +08001009
1010 /*
1011 * See the Sandy Bridge PRM, volume 2 part 1, page 312, and the Ivy Bridge
1012 * PRM, volume 2 part 1, page 312-313.
1013 *
1014 * It seems HiZ buffer is aligned to 8x8, with every two rows packed into a
1015 * memory row.
1016 */
Chia-I Wu457d0a62014-08-18 13:02:26 +08001017 switch (hz_walk) {
1018 case INTEL_LAYOUT_WALK_LOD:
1019 {
1020 unsigned lod_tx[INTEL_LAYOUT_MAX_LEVELS];
1021 unsigned lod_ty[INTEL_LAYOUT_MAX_LEVELS];
1022 unsigned cur_tx, cur_ty;
Chia-I Wu4bc47012014-08-14 13:03:25 +08001023
Chia-I Wu457d0a62014-08-18 13:02:26 +08001024 /* figure out the tile offsets of LODs */
1025 hz_width = 0;
1026 hz_height = 0;
1027 cur_tx = 0;
1028 cur_ty = 0;
1029 for (lv = 0; lv < info->mipLevels; lv++) {
1030 unsigned tw, th;
Chia-I Wu4bc47012014-08-14 13:03:25 +08001031
Chia-I Wu457d0a62014-08-18 13:02:26 +08001032 lod_tx[lv] = cur_tx;
1033 lod_ty[lv] = cur_ty;
Chia-I Wu4bc47012014-08-14 13:03:25 +08001034
Chia-I Wu457d0a62014-08-18 13:02:26 +08001035 tw = u_align(layout->lods[lv].slice_width, 16);
1036 th = u_align(layout->lods[lv].slice_height, hz_align_j) *
1037 info->arraySize / 2;
1038 /* convert to Y-tiles */
1039 tw = u_align(tw, 128) / 128;
1040 th = u_align(th, 32) / 32;
Chia-I Wu4bc47012014-08-14 13:03:25 +08001041
Chia-I Wu457d0a62014-08-18 13:02:26 +08001042 if (hz_width < cur_tx + tw)
1043 hz_width = cur_tx + tw;
1044 if (hz_height < cur_ty + th)
1045 hz_height = cur_ty + th;
1046
1047 if (lv == 1)
1048 cur_tx += tw;
1049 else
1050 cur_ty += th;
1051 }
1052
1053 /* convert tile offsets to memory offsets */
1054 for (lv = 0; lv < info->mipLevels; lv++) {
1055 layout->aux_offsets[lv] =
1056 (lod_ty[lv] * hz_width + lod_tx[lv]) * 4096;
1057 }
1058 hz_width *= 128;
1059 hz_height *= 32;
Chia-I Wu4bc47012014-08-14 13:03:25 +08001060 }
Chia-I Wu457d0a62014-08-18 13:02:26 +08001061 break;
1062 case INTEL_LAYOUT_WALK_LAYER:
1063 {
1064 const unsigned h0 = u_align(params->h0, hz_align_j);
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001065 const unsigned h1 = u_align(params->h1, hz_align_j);
Chia-I Wu4bc47012014-08-14 13:03:25 +08001066 const unsigned htail =
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001067 ((intel_gpu_gen(params->gpu) >= INTEL_GEN(7)) ? 12 : 11) * hz_align_j;
Chia-I Wu457d0a62014-08-18 13:02:26 +08001068 const unsigned hz_qpitch = h0 + h1 + htail;
Chia-I Wu4bc47012014-08-14 13:03:25 +08001069
Chia-I Wu457d0a62014-08-18 13:02:26 +08001070 hz_width = u_align(layout->lods[0].slice_width, 16);
1071
1072 hz_height = hz_qpitch * info->arraySize / 2;
1073 if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7))
1074 hz_height = u_align(hz_height, 8);
Chia-I Wu4bc47012014-08-14 13:03:25 +08001075 }
Chia-I Wu457d0a62014-08-18 13:02:26 +08001076 break;
1077 case INTEL_LAYOUT_WALK_3D:
1078 hz_width = u_align(layout->lods[0].slice_width, 16);
Chia-I Wu4bc47012014-08-14 13:03:25 +08001079
Chia-I Wu457d0a62014-08-18 13:02:26 +08001080 hz_height = 0;
1081 for (lv = 0; lv < info->mipLevels; lv++) {
1082 const unsigned h = u_align(layout->lods[lv].slice_height, hz_align_j);
1083 /* according to the formula, slices are packed together vertically */
1084 hz_height += h * u_minify(info->extent.depth, lv);
1085 }
1086 hz_height /= 2;
1087 break;
Chia-I Wu4bc47012014-08-14 13:03:25 +08001088 }
1089
Chia-I Wu457d0a62014-08-18 13:02:26 +08001090 /*
1091 * In hiz_align_fb(), we will align the LODs to 8x4 sample blocks.
1092 * Experiments on Haswell show that aligning the RECTLIST primitive and
1093 * 3DSTATE_DRAWING_RECTANGLE alone are not enough. The LOD sizes must be
1094 * aligned.
1095 */
1096 hz_clear_w = 8;
1097 hz_clear_h = 4;
1098 switch (info->samples) {
1099 case 0:
1100 case 1:
1101 default:
1102 break;
1103 case 2:
1104 hz_clear_w /= 2;
1105 break;
1106 case 4:
1107 hz_clear_w /= 2;
1108 hz_clear_h /= 2;
1109 break;
1110 case 8:
1111 hz_clear_w /= 4;
1112 hz_clear_h /= 2;
1113 break;
1114 case 16:
1115 hz_clear_w /= 4;
1116 hz_clear_h /= 4;
1117 break;
1118 }
1119
1120 for (lv = 0; lv < info->mipLevels; lv++) {
1121 if (u_minify(layout->width0, lv) % hz_clear_w ||
1122 u_minify(layout->height0, lv) % hz_clear_h)
1123 break;
1124 layout->aux_enables |= 1 << lv;
1125 }
1126
1127 /* we padded to allow this in layout_align() */
1128 if (info->mipLevels == 1 && info->arraySize == 1 && info->extent.depth == 1)
1129 layout->aux_enables |= 0x1;
1130
Chia-I Wu4bc47012014-08-14 13:03:25 +08001131 /* align to Y-tile */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001132 layout->aux_stride = u_align(hz_width, 128);
1133 layout->aux_height = u_align(hz_height, 32);
Chia-I Wu4bc47012014-08-14 13:03:25 +08001134}
1135
1136static void
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001137layout_calculate_mcs_size(struct intel_layout *layout,
1138 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +08001139{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001140 const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu4bc47012014-08-14 13:03:25 +08001141 int mcs_width, mcs_height, mcs_cpp;
1142 int downscale_x, downscale_y;
1143
Chia-I Wu457d0a62014-08-18 13:02:26 +08001144 assert(layout->aux == INTEL_LAYOUT_AUX_MCS);
Chia-I Wu4bc47012014-08-14 13:03:25 +08001145
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001146 if (info->samples > 1) {
Chia-I Wu4bc47012014-08-14 13:03:25 +08001147 /*
1148 * From the Ivy Bridge PRM, volume 2 part 1, page 326, the clear
1149 * rectangle is scaled down by 8x2 for 4X MSAA and 2x2 for 8X MSAA. The
1150 * need of scale down could be that the clear rectangle is used to clear
1151 * the MCS instead of the RT.
1152 *
1153 * For 8X MSAA, we need 32 bits in MCS for every pixel in the RT. The
1154 * 2x2 factor could come from that the hardware writes 128 bits (an
1155 * OWord) at a time, and the OWord in MCS maps to a 2x2 pixel block in
1156 * the RT. For 4X MSAA, we need 8 bits in MCS for every pixel in the
1157 * RT. Similarly, we could reason that an OWord in 4X MCS maps to a 8x2
1158 * pixel block in the RT.
1159 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001160 switch (info->samples) {
Chia-I Wu4bc47012014-08-14 13:03:25 +08001161 case 2:
1162 case 4:
1163 downscale_x = 8;
1164 downscale_y = 2;
1165 mcs_cpp = 1;
1166 break;
1167 case 8:
1168 downscale_x = 2;
1169 downscale_y = 2;
1170 mcs_cpp = 4;
1171 break;
1172 case 16:
1173 downscale_x = 2;
1174 downscale_y = 1;
1175 mcs_cpp = 8;
1176 break;
1177 default:
1178 assert(!"unsupported sample count");
1179 return;
1180 break;
1181 }
1182
1183 /*
1184 * It also appears that the 2x2 subspans generated by the scaled-down
1185 * clear rectangle cannot be masked. The scale-down clear rectangle
1186 * thus must be aligned to 2x2, and we need to pad.
1187 */
Chia-I Wu457d0a62014-08-18 13:02:26 +08001188 mcs_width = u_align(layout->width0, downscale_x * 2);
1189 mcs_height = u_align(layout->height0, downscale_y * 2);
1190 } else {
Chia-I Wu4bc47012014-08-14 13:03:25 +08001191 /*
1192 * From the Ivy Bridge PRM, volume 2 part 1, page 327:
1193 *
1194 * " Pixels Lines
1195 * TiledY RT CL
1196 * bpp
1197 * 32 8 4
1198 * 64 4 4
1199 * 128 2 4
1200 *
1201 * TiledX RT CL
1202 * bpp
1203 * 32 16 2
1204 * 64 8 2
1205 * 128 4 2"
1206 *
1207 * This table and the two following tables define the RT alignments, the
1208 * clear rectangle alignments, and the clear rectangle scale factors.
1209 * Viewing the RT alignments as the sizes of 128-byte blocks, we can see
1210 * that the clear rectangle alignments are 16x32 blocks, and the clear
1211 * rectangle scale factors are 8x16 blocks.
1212 *
1213 * For non-MSAA RT, we need 1 bit in MCS for every 128-byte block in the
1214 * RT. Similar to the MSAA cases, we can argue that an OWord maps to
1215 * 8x16 blocks.
1216 *
1217 * One problem with this reasoning is that a Y-tile in MCS has 8x32
1218 * OWords and maps to 64x512 128-byte blocks. This differs from i965,
1219 * which says that a Y-tile maps to 128x256 blocks (\see
1220 * intel_get_non_msrt_mcs_alignment). It does not really change
1221 * anything except for the size of the allocated MCS. Let's see if we
1222 * hit out-of-bound access.
1223 */
1224 switch (layout->tiling) {
1225 case INTEL_TILING_X:
1226 downscale_x = 64 / layout->block_size;
1227 downscale_y = 2;
1228 break;
1229 case INTEL_TILING_Y:
1230 downscale_x = 32 / layout->block_size;
1231 downscale_y = 4;
1232 break;
1233 default:
1234 assert(!"unsupported tiling mode");
1235 return;
1236 break;
1237 }
1238
1239 downscale_x *= 8;
1240 downscale_y *= 16;
1241
1242 /*
1243 * From the Haswell PRM, volume 7, page 652:
1244 *
1245 * "Clear rectangle must be aligned to two times the number of
1246 * pixels in the table shown below due to 16X16 hashing across the
1247 * slice."
1248 *
1249 * The scaled-down clear rectangle must be aligned to 4x4 instead of
1250 * 2x2, and we need to pad.
1251 */
Chia-I Wu457d0a62014-08-18 13:02:26 +08001252 mcs_width = u_align(layout->width0, downscale_x * 4) / downscale_x;
1253 mcs_height = u_align(layout->height0, downscale_y * 4) / downscale_y;
Chia-I Wu4bc47012014-08-14 13:03:25 +08001254 mcs_cpp = 16; /* an OWord */
1255 }
1256
Chia-I Wu457d0a62014-08-18 13:02:26 +08001257 layout->aux_enables = (1 << info->mipLevels) - 1;
Chia-I Wu4bc47012014-08-14 13:03:25 +08001258 /* align to Y-tile */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001259 layout->aux_stride = u_align(mcs_width * mcs_cpp, 128);
1260 layout->aux_height = u_align(mcs_height, 32);
Chia-I Wu4bc47012014-08-14 13:03:25 +08001261}
1262
1263/**
1264 * Initialize the layout. Callers should zero-initialize \p layout first.
1265 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001266void intel_layout_init(struct intel_layout *layout,
1267 const struct intel_dev *dev,
Chia-I Wu794d12a2014-09-15 14:55:25 +08001268 const XGL_IMAGE_CREATE_INFO *info,
1269 bool scanout)
Chia-I Wu4bc47012014-08-14 13:03:25 +08001270{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001271 struct intel_layout_params params;
Chia-I Wu4bc47012014-08-14 13:03:25 +08001272
1273 memset(&params, 0, sizeof(params));
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001274 params.gpu = dev->gpu;
1275 params.info = info;
Chia-I Wu794d12a2014-09-15 14:55:25 +08001276 params.scanout = scanout;
Chia-I Wu4bc47012014-08-14 13:03:25 +08001277
1278 /* note that there are dependencies between these functions */
1279 layout_init_aux(layout, &params);
Chia-I Wu457d0a62014-08-18 13:02:26 +08001280 layout_init_size_and_format(layout, &params);
1281 layout_init_walk(layout, &params);
Chia-I Wu4bc47012014-08-14 13:03:25 +08001282 layout_init_tiling(layout, &params);
1283 layout_init_alignments(layout, &params);
Chia-I Wu457d0a62014-08-18 13:02:26 +08001284 layout_init_lods(layout, &params);
Chia-I Wu4bc47012014-08-14 13:03:25 +08001285 layout_init_layer_height(layout, &params);
1286
1287 layout_align(layout, &params);
1288 layout_calculate_bo_size(layout, &params);
1289
Chia-I Wu457d0a62014-08-18 13:02:26 +08001290 switch (layout->aux) {
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001291 case INTEL_LAYOUT_AUX_HIZ:
Chia-I Wu4bc47012014-08-14 13:03:25 +08001292 layout_calculate_hiz_size(layout, &params);
1293 break;
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001294 case INTEL_LAYOUT_AUX_MCS:
Chia-I Wu4bc47012014-08-14 13:03:25 +08001295 layout_calculate_mcs_size(layout, &params);
1296 break;
1297 default:
1298 break;
1299 }
1300}
1301
1302/**
1303 * Update the tiling mode and bo stride (for imported resources).
1304 */
1305bool
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001306intel_layout_update_for_imported_bo(struct intel_layout *layout,
Chia-I Wu457d0a62014-08-18 13:02:26 +08001307 enum intel_tiling_mode tiling,
1308 unsigned bo_stride)
Chia-I Wu4bc47012014-08-14 13:03:25 +08001309{
1310 if (!(layout->valid_tilings & (1 << tiling)))
1311 return false;
1312
1313 if ((tiling == INTEL_TILING_X && bo_stride % 512) ||
1314 (tiling == INTEL_TILING_Y && bo_stride % 128))
1315 return false;
1316
1317 layout->tiling = tiling;
1318 layout->bo_stride = bo_stride;
1319
1320 return true;
1321}