blob: f1894d4afa207c986bcab9d61618e3822abac963 [file] [log] [blame]
Chia-I Wu4bc47012014-08-14 13:03:25 +08001/*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 2014 LunarG, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Chia-I Wu <olv@lunarg.com>
26 */
27
Chia-I Wu8a8d8b62014-08-14 13:26:26 +080028#include "dev.h"
Chia-I Wu1bf06df2014-08-16 12:33:13 +080029#include "format.h"
Chia-I Wu8a8d8b62014-08-14 13:26:26 +080030#include "gpu.h"
31#include "layout.h"
Chia-I Wu4bc47012014-08-14 13:03:25 +080032
33enum {
34 LAYOUT_TILING_NONE = 1 << INTEL_TILING_NONE,
35 LAYOUT_TILING_X = 1 << INTEL_TILING_X,
36 LAYOUT_TILING_Y = 1 << INTEL_TILING_Y,
37 LAYOUT_TILING_W = 1 << (INTEL_TILING_Y + 1),
38
39 LAYOUT_TILING_ALL = (LAYOUT_TILING_NONE |
40 LAYOUT_TILING_X |
41 LAYOUT_TILING_Y |
42 LAYOUT_TILING_W)
43};
44
Chia-I Wu8a8d8b62014-08-14 13:26:26 +080045struct intel_layout_params {
46 const struct intel_gpu *gpu;
47 const XGL_IMAGE_CREATE_INFO *info;
Chia-I Wu4bc47012014-08-14 13:03:25 +080048
49 bool compressed;
50
51 unsigned h0, h1;
52 unsigned max_x, max_y;
53};
54
55static void
Chia-I Wu8a8d8b62014-08-14 13:26:26 +080056layout_get_slice_size(const struct intel_layout *layout,
57 const struct intel_layout_params *params,
Chia-I Wu4bc47012014-08-14 13:03:25 +080058 unsigned level, unsigned *width, unsigned *height)
59{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +080060 const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu4bc47012014-08-14 13:03:25 +080061 unsigned w, h;
62
Chia-I Wu457d0a62014-08-18 13:02:26 +080063 w = u_minify(layout->width0, level);
64 h = u_minify(layout->height0, level);
Chia-I Wu4bc47012014-08-14 13:03:25 +080065
66 /*
67 * From the Sandy Bridge PRM, volume 1 part 1, page 114:
68 *
69 * "The dimensions of the mip maps are first determined by applying the
70 * sizing algorithm presented in Non-Power-of-Two Mipmaps above. Then,
71 * if necessary, they are padded out to compression block boundaries."
72 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +080073 w = u_align(w, layout->block_width);
74 h = u_align(h, layout->block_height);
Chia-I Wu4bc47012014-08-14 13:03:25 +080075
76 /*
77 * From the Sandy Bridge PRM, volume 1 part 1, page 111:
78 *
79 * "If the surface is multisampled (4x), these values must be adjusted
80 * as follows before proceeding:
81 *
82 * W_L = ceiling(W_L / 2) * 4
83 * H_L = ceiling(H_L / 2) * 4"
84 *
85 * From the Ivy Bridge PRM, volume 1 part 1, page 108:
86 *
87 * "If the surface is multisampled and it is a depth or stencil surface
88 * or Multisampled Surface StorageFormat in SURFACE_STATE is
89 * MSFMT_DEPTH_STENCIL, W_L and H_L must be adjusted as follows before
90 * proceeding:
91 *
92 * #samples W_L = H_L =
93 * 2 ceiling(W_L / 2) * 4 HL [no adjustment]
94 * 4 ceiling(W_L / 2) * 4 ceiling(H_L / 2) * 4
95 * 8 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 4
96 * 16 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 8"
97 *
98 * For interleaved samples (4x), where pixels
99 *
100 * (x, y ) (x+1, y )
101 * (x, y+1) (x+1, y+1)
102 *
103 * would be is occupied by
104 *
105 * (x, y , si0) (x+1, y , si0) (x, y , si1) (x+1, y , si1)
106 * (x, y+1, si0) (x+1, y+1, si0) (x, y+1, si1) (x+1, y+1, si1)
107 * (x, y , si2) (x+1, y , si2) (x, y , si3) (x+1, y , si3)
108 * (x, y+1, si2) (x+1, y+1, si2) (x, y+1, si3) (x+1, y+1, si3)
109 *
110 * Thus the need to
111 *
Chia-I Wu457d0a62014-08-18 13:02:26 +0800112 * w = align(w, 2) * 2;
113 * y = align(y, 2) * 2;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800114 */
115 if (layout->interleaved_samples) {
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800116 switch (info->samples) {
Chia-I Wu4bc47012014-08-14 13:03:25 +0800117 case 0:
118 case 1:
119 break;
120 case 2:
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800121 w = u_align(w, 2) * 2;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800122 break;
123 case 4:
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800124 w = u_align(w, 2) * 2;
125 h = u_align(h, 2) * 2;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800126 break;
127 case 8:
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800128 w = u_align(w, 2) * 4;
129 h = u_align(h, 2) * 2;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800130 break;
131 case 16:
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800132 w = u_align(w, 2) * 4;
133 h = u_align(h, 2) * 4;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800134 break;
135 default:
136 assert(!"unsupported sample count");
137 break;
138 }
139 }
140
Chia-I Wu457d0a62014-08-18 13:02:26 +0800141 /*
142 * From the Ivy Bridge PRM, volume 1 part 1, page 108:
143 *
144 * "For separate stencil buffer, the width must be mutiplied by 2 and
145 * height divided by 2..."
146 *
147 * To make things easier (for transfer), we will just double the stencil
148 * stride in 3DSTATE_STENCIL_BUFFER.
149 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800150 w = u_align(w, layout->align_i);
151 h = u_align(h, layout->align_j);
Chia-I Wu4bc47012014-08-14 13:03:25 +0800152
153 *width = w;
154 *height = h;
155}
156
157static unsigned
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800158layout_get_num_layers(const struct intel_layout *layout,
159 const struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800160{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800161 const XGL_IMAGE_CREATE_INFO *info = params->info;
162 unsigned num_layers = info->arraySize;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800163
164 /* samples of the same index are stored in a layer */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800165 if (info->samples > 1 && !layout->interleaved_samples)
166 num_layers *= info->samples;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800167
168 return num_layers;
169}
170
171static void
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800172layout_init_layer_height(struct intel_layout *layout,
173 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800174{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800175 const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800176 unsigned num_layers;
177
Chia-I Wu457d0a62014-08-18 13:02:26 +0800178 if (layout->walk != INTEL_LAYOUT_WALK_LAYER)
179 return;
180
Chia-I Wu4bc47012014-08-14 13:03:25 +0800181 num_layers = layout_get_num_layers(layout, params);
182 if (num_layers <= 1)
183 return;
184
Chia-I Wu4bc47012014-08-14 13:03:25 +0800185 /*
186 * From the Sandy Bridge PRM, volume 1 part 1, page 115:
187 *
188 * "The following equation is used for surface formats other than
189 * compressed textures:
190 *
191 * QPitch = (h0 + h1 + 11j)"
192 *
193 * "The equation for compressed textures (BC* and FXT1 surface formats)
194 * follows:
195 *
196 * QPitch = (h0 + h1 + 11j) / 4"
197 *
198 * "[DevSNB] Errata: Sampler MSAA Qpitch will be 4 greater than the
199 * value calculated in the equation above, for every other odd Surface
200 * Height starting from 1 i.e. 1,5,9,13"
201 *
202 * From the Ivy Bridge PRM, volume 1 part 1, page 111-112:
203 *
204 * "If Surface Array Spacing is set to ARYSPC_FULL (note that the depth
205 * buffer and stencil buffer have an implied value of ARYSPC_FULL):
206 *
207 * QPitch = (h0 + h1 + 12j)
208 * QPitch = (h0 + h1 + 12j) / 4 (compressed)
209 *
210 * (There are many typos or missing words here...)"
211 *
212 * To access the N-th slice, an offset of (Stride * QPitch * N) is added to
213 * the base address. The PRM divides QPitch by 4 for compressed formats
214 * because the block height for those formats are 4, and it wants QPitch to
215 * mean the number of memory rows, as opposed to texel rows, between
216 * slices. Since we use texel rows everywhere, we do not need to divide
217 * QPitch by 4.
218 */
219 layout->layer_height = params->h0 + params->h1 +
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800220 ((intel_gpu_gen(params->gpu) >= INTEL_GEN(7)) ? 12 : 11) * layout->align_j;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800221
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800222 if (intel_gpu_gen(params->gpu) == INTEL_GEN(6) && info->samples > 1 &&
Chia-I Wu457d0a62014-08-18 13:02:26 +0800223 layout->height0 % 4 == 1)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800224 layout->layer_height += 4;
225
226 params->max_y += layout->layer_height * (num_layers - 1);
227}
228
229static void
Chia-I Wu457d0a62014-08-18 13:02:26 +0800230layout_init_lods(struct intel_layout *layout,
231 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800232{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800233 const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800234 unsigned cur_x, cur_y;
235 unsigned lv;
236
237 cur_x = 0;
238 cur_y = 0;
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800239 for (lv = 0; lv < info->mipLevels; lv++) {
Chia-I Wu457d0a62014-08-18 13:02:26 +0800240 unsigned lod_w, lod_h;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800241
Chia-I Wu457d0a62014-08-18 13:02:26 +0800242 layout_get_slice_size(layout, params, lv, &lod_w, &lod_h);
Chia-I Wu4bc47012014-08-14 13:03:25 +0800243
Chia-I Wu457d0a62014-08-18 13:02:26 +0800244 layout->lods[lv].x = cur_x;
245 layout->lods[lv].y = cur_y;
246 layout->lods[lv].slice_width = lod_w;
247 layout->lods[lv].slice_height = lod_h;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800248
Chia-I Wu457d0a62014-08-18 13:02:26 +0800249 switch (layout->walk) {
250 case INTEL_LAYOUT_WALK_LOD:
251 lod_h *= layout_get_num_layers(layout, params);
252 if (lv == 1)
253 cur_x += lod_w;
254 else
255 cur_y += lod_h;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800256
Chia-I Wu457d0a62014-08-18 13:02:26 +0800257 /* every LOD begins at tile boundaries */
258 if (info->mipLevels > 1) {
259 intel_format_is_stencil(params->gpu, layout->format);
260 cur_x = u_align(cur_x, 64);
261 cur_y = u_align(cur_y, 64);
262 }
263 break;
264 case INTEL_LAYOUT_WALK_LAYER:
Chia-I Wu4bc47012014-08-14 13:03:25 +0800265 /* MIPLAYOUT_BELOW */
266 if (lv == 1)
Chia-I Wu457d0a62014-08-18 13:02:26 +0800267 cur_x += lod_w;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800268 else
Chia-I Wu457d0a62014-08-18 13:02:26 +0800269 cur_y += lod_h;
270 break;
271 case INTEL_LAYOUT_WALK_3D:
272 {
273 const unsigned num_slices = u_minify(info->extent.depth, lv);
274 const unsigned num_slices_per_row = 1 << lv;
275 const unsigned num_rows =
276 (num_slices + num_slices_per_row - 1) / num_slices_per_row;
277
278 lod_w *= num_slices_per_row;
279 lod_h *= num_rows;
280
281 cur_y += lod_h;
282 }
283 break;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800284 }
285
Chia-I Wu457d0a62014-08-18 13:02:26 +0800286 if (params->max_x < layout->lods[lv].x + lod_w)
287 params->max_x = layout->lods[lv].x + lod_w;
288 if (params->max_y < layout->lods[lv].y + lod_h)
289 params->max_y = layout->lods[lv].y + lod_h;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800290 }
291
Chia-I Wu457d0a62014-08-18 13:02:26 +0800292 if (layout->walk == INTEL_LAYOUT_WALK_LAYER) {
293 params->h0 = layout->lods[0].slice_height;
294
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800295 if (info->mipLevels > 1)
Chia-I Wu457d0a62014-08-18 13:02:26 +0800296 params->h1 = layout->lods[1].slice_height;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800297 else
298 layout_get_slice_size(layout, params, 1, &cur_x, &params->h1);
299 }
300}
301
302static void
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800303layout_init_alignments(struct intel_layout *layout,
304 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800305{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800306 const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800307
308 /*
309 * From the Sandy Bridge PRM, volume 1 part 1, page 113:
310 *
311 * "surface format align_i align_j
312 * YUV 4:2:2 formats 4 *see below
313 * BC1-5 4 4
314 * FXT1 8 4
315 * all other formats 4 *see below"
316 *
317 * "- align_j = 4 for any depth buffer
318 * - align_j = 2 for separate stencil buffer
319 * - align_j = 4 for any render target surface is multisampled (4x)
320 * - align_j = 4 for any render target surface with Surface Vertical
321 * Alignment = VALIGN_4
322 * - align_j = 2 for any render target surface with Surface Vertical
323 * Alignment = VALIGN_2
324 * - align_j = 2 for all other render target surface
325 * - align_j = 2 for any sampling engine surface with Surface Vertical
326 * Alignment = VALIGN_2
327 * - align_j = 4 for any sampling engine surface with Surface Vertical
328 * Alignment = VALIGN_4"
329 *
330 * From the Sandy Bridge PRM, volume 4 part 1, page 86:
331 *
332 * "This field (Surface Vertical Alignment) must be set to VALIGN_2 if
333 * the Surface Format is 96 bits per element (BPE)."
334 *
335 * They can be rephrased as
336 *
337 * align_i align_j
338 * compressed formats block width block height
339 * PIPE_FORMAT_S8_UINT 4 2
340 * other depth/stencil formats 4 4
341 * 4x multisampled 4 4
342 * bpp 96 4 2
343 * others 4 2 or 4
344 */
345
346 /*
347 * From the Ivy Bridge PRM, volume 1 part 1, page 110:
348 *
349 * "surface defined by surface format align_i align_j
350 * 3DSTATE_DEPTH_BUFFER D16_UNORM 8 4
351 * not D16_UNORM 4 4
352 * 3DSTATE_STENCIL_BUFFER N/A 8 8
353 * SURFACE_STATE BC*, ETC*, EAC* 4 4
354 * FXT1 8 4
355 * all others (set by SURFACE_STATE)"
356 *
357 * From the Ivy Bridge PRM, volume 4 part 1, page 63:
358 *
359 * "- This field (Surface Vertical Aligment) is intended to be set to
360 * VALIGN_4 if the surface was rendered as a depth buffer, for a
361 * multisampled (4x) render target, or for a multisampled (8x)
362 * render target, since these surfaces support only alignment of 4.
363 * - Use of VALIGN_4 for other surfaces is supported, but uses more
364 * memory.
365 * - This field must be set to VALIGN_4 for all tiled Y Render Target
366 * surfaces.
367 * - Value of 1 is not supported for format YCRCB_NORMAL (0x182),
368 * YCRCB_SWAPUVY (0x183), YCRCB_SWAPUV (0x18f), YCRCB_SWAPY (0x190)
369 * - If Number of Multisamples is not MULTISAMPLECOUNT_1, this field
370 * must be set to VALIGN_4."
371 * - VALIGN_4 is not supported for surface format R32G32B32_FLOAT."
372 *
373 * "- This field (Surface Horizontal Aligment) is intended to be set to
374 * HALIGN_8 only if the surface was rendered as a depth buffer with
375 * Z16 format or a stencil buffer, since these surfaces support only
376 * alignment of 8.
377 * - Use of HALIGN_8 for other surfaces is supported, but uses more
378 * memory.
379 * - This field must be set to HALIGN_4 if the Surface Format is BC*.
380 * - This field must be set to HALIGN_8 if the Surface Format is
381 * FXT1."
382 *
383 * They can be rephrased as
384 *
385 * align_i align_j
386 * compressed formats block width block height
387 * PIPE_FORMAT_Z16_UNORM 8 4
388 * PIPE_FORMAT_S8_UINT 8 8
Chia-I Wu457d0a62014-08-18 13:02:26 +0800389 * other depth/stencil formats 4 4
Chia-I Wu4bc47012014-08-14 13:03:25 +0800390 * 2x or 4x multisampled 4 or 8 4
391 * tiled Y 4 or 8 4 (if rt)
392 * PIPE_FORMAT_R32G32B32_FLOAT 4 or 8 2
393 * others 4 or 8 2 or 4
394 */
395
396 if (params->compressed) {
397 /* this happens to be the case */
398 layout->align_i = layout->block_width;
399 layout->align_j = layout->block_height;
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800400 } else if (info->usage & XGL_IMAGE_USAGE_DEPTH_STENCIL_BIT) {
401 if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7)) {
402 switch (layout->format.channelFormat) {
403 case XGL_CH_FMT_R16:
Chia-I Wu4bc47012014-08-14 13:03:25 +0800404 layout->align_i = 8;
405 layout->align_j = 4;
406 break;
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800407 case XGL_CH_FMT_R8:
Chia-I Wu4bc47012014-08-14 13:03:25 +0800408 layout->align_i = 8;
409 layout->align_j = 8;
410 break;
411 default:
412 layout->align_i = 4;
413 layout->align_j = 4;
414 break;
415 }
416 } else {
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800417 switch (layout->format.channelFormat) {
418 case XGL_CH_FMT_R8:
Chia-I Wu4bc47012014-08-14 13:03:25 +0800419 layout->align_i = 4;
420 layout->align_j = 2;
421 break;
422 default:
423 layout->align_i = 4;
424 layout->align_j = 4;
425 break;
426 }
427 }
428 } else {
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800429 const bool valign_4 = (info->samples > 1) ||
430 (intel_gpu_gen(params->gpu) >= INTEL_GEN(7) &&
Chia-I Wu4bc47012014-08-14 13:03:25 +0800431 layout->tiling == INTEL_TILING_Y &&
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800432 (info->usage & XGL_IMAGE_USAGE_COLOR_ATTACHMENT_BIT));
Chia-I Wu4bc47012014-08-14 13:03:25 +0800433
434 if (valign_4)
435 assert(layout->block_size != 12);
436
437 layout->align_i = 4;
438 layout->align_j = (valign_4) ? 4 : 2;
439 }
440
441 /*
442 * the fact that align i and j are multiples of block width and height
443 * respectively is what makes the size of the bo a multiple of the block
444 * size, slices start at block boundaries, and many of the computations
445 * work.
446 */
447 assert(layout->align_i % layout->block_width == 0);
448 assert(layout->align_j % layout->block_height == 0);
449
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800450 /* make sure u_align() works */
451 assert(u_is_pow2(layout->align_i) &&
452 u_is_pow2(layout->align_j));
453 assert(u_is_pow2(layout->block_width) &&
454 u_is_pow2(layout->block_height));
Chia-I Wu4bc47012014-08-14 13:03:25 +0800455}
456
457static unsigned
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800458layout_get_valid_tilings(const struct intel_layout *layout,
459 const struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800460{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800461 const XGL_IMAGE_CREATE_INFO *info = params->info;
462 const XGL_FORMAT format = layout->format;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800463 unsigned valid_tilings = LAYOUT_TILING_ALL;
464
Chia-I Wu6ac93992014-08-30 18:23:28 +0800465 if (info->tiling == XGL_LINEAR_TILING)
466 valid_tilings &= LAYOUT_TILING_NONE;
467
Chia-I Wu4bc47012014-08-14 13:03:25 +0800468 /*
Chia-I Wu4bc47012014-08-14 13:03:25 +0800469 * From the Sandy Bridge PRM, volume 2 part 1, page 318:
470 *
471 * "[DevSNB+]: This field (Tiled Surface) must be set to TRUE. Linear
472 * Depth Buffer is not supported."
473 *
474 * "The Depth Buffer, if tiled, must use Y-Major tiling."
475 *
476 * From the Sandy Bridge PRM, volume 1 part 2, page 22:
477 *
478 * "W-Major Tile Format is used for separate stencil."
479 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800480 if (info->usage & XGL_IMAGE_USAGE_DEPTH_STENCIL_BIT) {
481 switch (format.channelFormat) {
482 case XGL_CH_FMT_R8:
Chia-I Wu4bc47012014-08-14 13:03:25 +0800483 valid_tilings &= LAYOUT_TILING_W;
484 break;
485 default:
486 valid_tilings &= LAYOUT_TILING_Y;
487 break;
488 }
489 }
490
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800491 if (info->usage & XGL_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) {
Chia-I Wu4bc47012014-08-14 13:03:25 +0800492 /*
493 * From the Sandy Bridge PRM, volume 1 part 2, page 32:
494 *
495 * "NOTE: 128BPE Format Color buffer ( render target ) MUST be
496 * either TileX or Linear."
497 */
498 if (layout->block_size == 16)
499 valid_tilings &= ~LAYOUT_TILING_Y;
500
501 /*
502 * From the Ivy Bridge PRM, volume 4 part 1, page 63:
503 *
504 * "This field (Surface Vertical Aligment) must be set to VALIGN_4
505 * for all tiled Y Render Target surfaces."
506 *
507 * "VALIGN_4 is not supported for surface format R32G32B32_FLOAT."
508 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800509 if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7) && layout->block_size == 12)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800510 valid_tilings &= ~LAYOUT_TILING_Y;
511 }
512
513 /* no conflicting binding flags */
514 assert(valid_tilings);
515
516 return valid_tilings;
517}
518
519static void
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800520layout_init_tiling(struct intel_layout *layout,
521 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800522{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800523 const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800524 unsigned valid_tilings = layout_get_valid_tilings(layout, params);
525
Chia-I Wu457d0a62014-08-18 13:02:26 +0800526 /* no hardware support for W-tile */
527 if (valid_tilings & LAYOUT_TILING_W)
528 valid_tilings = (valid_tilings & ~LAYOUT_TILING_W) | LAYOUT_TILING_NONE;
529
Chia-I Wu4bc47012014-08-14 13:03:25 +0800530 layout->valid_tilings = valid_tilings;
531
Chia-I Wu457d0a62014-08-18 13:02:26 +0800532 if (info->usage & (XGL_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |
533 XGL_IMAGE_USAGE_SHADER_ACCESS_READ_BIT)) {
Chia-I Wu4bc47012014-08-14 13:03:25 +0800534 /*
535 * heuristically set a minimum width/height for enabling tiling
536 */
Chia-I Wu457d0a62014-08-18 13:02:26 +0800537 if (layout->width0 < 64 && (valid_tilings & ~LAYOUT_TILING_X))
Chia-I Wu4bc47012014-08-14 13:03:25 +0800538 valid_tilings &= ~LAYOUT_TILING_X;
539
Chia-I Wu457d0a62014-08-18 13:02:26 +0800540 if ((layout->width0 < 32 || layout->height0 < 16) &&
541 (layout->width0 < 16 || layout->height0 < 32) &&
Chia-I Wu4bc47012014-08-14 13:03:25 +0800542 (valid_tilings & ~LAYOUT_TILING_Y))
543 valid_tilings &= ~LAYOUT_TILING_Y;
544 } else {
545 /* force linear if we are not sure where the texture is bound to */
546 if (valid_tilings & LAYOUT_TILING_NONE)
547 valid_tilings &= LAYOUT_TILING_NONE;
548 }
549
550 /* prefer tiled over linear */
551 if (valid_tilings & LAYOUT_TILING_Y)
552 layout->tiling = INTEL_TILING_Y;
553 else if (valid_tilings & LAYOUT_TILING_X)
554 layout->tiling = INTEL_TILING_X;
Chia-I Wu457d0a62014-08-18 13:02:26 +0800555 else
Chia-I Wu4bc47012014-08-14 13:03:25 +0800556 layout->tiling = INTEL_TILING_NONE;
557}
558
559static void
Chia-I Wu457d0a62014-08-18 13:02:26 +0800560layout_init_walk_gen7(struct intel_layout *layout,
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800561 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800562{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800563 const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800564
565 /*
566 * It is not explicitly states, but render targets are expected to be
567 * UMS/CMS (samples non-interleaved) and depth/stencil buffers are expected
568 * to be IMS (samples interleaved).
569 *
570 * See "Multisampled Surface Storage Format" field of SURFACE_STATE.
571 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800572 if (info->usage & XGL_IMAGE_USAGE_DEPTH_STENCIL_BIT) {
Chia-I Wu4bc47012014-08-14 13:03:25 +0800573 /*
574 * From the Ivy Bridge PRM, volume 1 part 1, page 111:
575 *
576 * "note that the depth buffer and stencil buffer have an implied
577 * value of ARYSPC_FULL"
578 */
Chia-I Wu457d0a62014-08-18 13:02:26 +0800579 layout->walk = (info->imageType == XGL_IMAGE_3D) ?
580 INTEL_LAYOUT_WALK_3D : INTEL_LAYOUT_WALK_LAYER;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800581
Chia-I Wu457d0a62014-08-18 13:02:26 +0800582 layout->interleaved_samples = true;
583 } else {
Chia-I Wu4bc47012014-08-14 13:03:25 +0800584 /*
585 * From the Ivy Bridge PRM, volume 4 part 1, page 66:
586 *
587 * "If Multisampled Surface Storage Format is MSFMT_MSS and Number
588 * of Multisamples is not MULTISAMPLECOUNT_1, this field (Surface
589 * Array Spacing) must be set to ARYSPC_LOD0."
590 *
591 * As multisampled resources are not mipmapped, we never use
592 * ARYSPC_FULL for them.
593 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800594 if (info->samples > 1)
595 assert(info->mipLevels == 1);
Chia-I Wu457d0a62014-08-18 13:02:26 +0800596
597 layout->walk =
598 (info->imageType == XGL_IMAGE_3D) ? INTEL_LAYOUT_WALK_3D :
599 (info->mipLevels > 1) ? INTEL_LAYOUT_WALK_LAYER :
600 INTEL_LAYOUT_WALK_LOD;
601
602 layout->interleaved_samples = false;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800603 }
604}
605
606static void
Chia-I Wu457d0a62014-08-18 13:02:26 +0800607layout_init_walk_gen6(struct intel_layout *layout,
608 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800609{
Chia-I Wu4bc47012014-08-14 13:03:25 +0800610 /*
611 * From the Sandy Bridge PRM, volume 1 part 1, page 115:
612 *
613 * "The separate stencil buffer does not support mip mapping, thus the
614 * storage for LODs other than LOD 0 is not needed. The following
615 * QPitch equation applies only to the separate stencil buffer:
616 *
617 * QPitch = h_0"
618 *
619 * GEN6 does not support compact spacing otherwise.
620 */
Chia-I Wu457d0a62014-08-18 13:02:26 +0800621 layout->walk =
622 (params->info->imageType == XGL_IMAGE_3D) ? INTEL_LAYOUT_WALK_3D :
623 intel_format_is_stencil(params->gpu, layout->format) ? INTEL_LAYOUT_WALK_LOD :
624 INTEL_LAYOUT_WALK_LAYER;
625
626 /* GEN6 supports only interleaved samples */
627 layout->interleaved_samples = true;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800628}
629
630static void
Chia-I Wu457d0a62014-08-18 13:02:26 +0800631layout_init_walk(struct intel_layout *layout,
632 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800633{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800634 if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7))
Chia-I Wu457d0a62014-08-18 13:02:26 +0800635 layout_init_walk_gen7(layout, params);
Chia-I Wu4bc47012014-08-14 13:03:25 +0800636 else
Chia-I Wu457d0a62014-08-18 13:02:26 +0800637 layout_init_walk_gen6(layout, params);
Chia-I Wu4bc47012014-08-14 13:03:25 +0800638}
639
640static void
Chia-I Wu457d0a62014-08-18 13:02:26 +0800641layout_init_size_and_format(struct intel_layout *layout,
642 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800643{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800644 const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu457d0a62014-08-18 13:02:26 +0800645 XGL_FORMAT format = info->format;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800646 bool require_separate_stencil;
647
Chia-I Wu457d0a62014-08-18 13:02:26 +0800648 layout->width0 = info->extent.width;
649 layout->height0 = info->extent.height;
650
Chia-I Wu4bc47012014-08-14 13:03:25 +0800651 /*
652 * From the Sandy Bridge PRM, volume 2 part 1, page 317:
653 *
654 * "This field (Separate Stencil Buffer Enable) must be set to the same
655 * value (enabled or disabled) as Hierarchical Depth Buffer Enable."
656 *
657 * GEN7+ requires separate stencil buffers.
658 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800659 if (info->usage & XGL_IMAGE_USAGE_DEPTH_STENCIL_BIT) {
660 if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7))
Chia-I Wu4bc47012014-08-14 13:03:25 +0800661 require_separate_stencil = true;
662 else
Chia-I Wu457d0a62014-08-18 13:02:26 +0800663 require_separate_stencil = (layout->aux == INTEL_LAYOUT_AUX_HIZ);
Chia-I Wu4bc47012014-08-14 13:03:25 +0800664 }
665
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800666 if (format.numericFormat == XGL_NUM_FMT_DS) {
667 switch (format.channelFormat) {
668 case XGL_CH_FMT_R32G8:
669 if (require_separate_stencil) {
670 format.channelFormat = XGL_CH_FMT_R32;
671 layout->separate_stencil = true;
672 }
673 break;
674 default:
675 break;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800676 }
Chia-I Wu4bc47012014-08-14 13:03:25 +0800677 }
678
Chia-I Wu4bc47012014-08-14 13:03:25 +0800679 layout->format = format;
Chia-I Wu1bf06df2014-08-16 12:33:13 +0800680 layout->block_width = icd_format_get_block_width(format);
681 layout->block_height = layout->block_width;
682 layout->block_size = icd_format_get_size(format);
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800683
Chia-I Wu1bf06df2014-08-16 12:33:13 +0800684 params->compressed = icd_format_is_compressed(format);
Chia-I Wu4bc47012014-08-14 13:03:25 +0800685}
686
687static bool
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800688layout_want_mcs(struct intel_layout *layout,
689 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800690{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800691 const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800692 bool want_mcs = false;
693
694 /* MCS is for RT on GEN7+ */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800695 if (intel_gpu_gen(params->gpu) < INTEL_GEN(7))
Chia-I Wu4bc47012014-08-14 13:03:25 +0800696 return false;
697
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800698 if (info->imageType != XGL_IMAGE_2D ||
699 !(info->usage & XGL_IMAGE_USAGE_COLOR_ATTACHMENT_BIT))
Chia-I Wu4bc47012014-08-14 13:03:25 +0800700 return false;
701
702 /*
703 * From the Ivy Bridge PRM, volume 4 part 1, page 77:
704 *
705 * "For Render Target and Sampling Engine Surfaces:If the surface is
706 * multisampled (Number of Multisamples any value other than
707 * MULTISAMPLECOUNT_1), this field (MCS Enable) must be enabled."
708 *
709 * "This field must be set to 0 for all SINT MSRTs when all RT channels
710 * are not written"
711 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800712 if (info->samples > 1 && !layout->interleaved_samples &&
Chia-I Wu457d0a62014-08-18 13:02:26 +0800713 !icd_format_is_int(info->format)) {
Chia-I Wu4bc47012014-08-14 13:03:25 +0800714 want_mcs = true;
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800715 } else if (info->samples <= 1) {
Chia-I Wu4bc47012014-08-14 13:03:25 +0800716 /*
717 * From the Ivy Bridge PRM, volume 2 part 1, page 326:
718 *
719 * "When MCS is buffer is used for color clear of non-multisampler
720 * render target, the following restrictions apply.
721 * - Support is limited to tiled render targets.
722 * - Support is for non-mip-mapped and non-array surface types
723 * only.
724 * - Clear is supported only on the full RT; i.e., no partial clear
725 * or overlapping clears.
726 * - MCS buffer for non-MSRT is supported only for RT formats
727 * 32bpp, 64bpp and 128bpp.
728 * ..."
729 */
730 if (layout->tiling != INTEL_TILING_NONE &&
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800731 info->mipLevels == 1 && info->arraySize == 1) {
Chia-I Wu4bc47012014-08-14 13:03:25 +0800732 switch (layout->block_size) {
733 case 4:
734 case 8:
735 case 16:
736 want_mcs = true;
737 break;
738 default:
739 break;
740 }
741 }
742 }
743
744 return want_mcs;
745}
746
747static bool
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800748layout_want_hiz(const struct intel_layout *layout,
749 const struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800750{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800751 const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800752
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800753 if (!(info->usage & XGL_IMAGE_USAGE_DEPTH_STENCIL_BIT))
Chia-I Wu4bc47012014-08-14 13:03:25 +0800754 return false;
755
Chia-I Wufb240262014-08-16 13:26:06 +0800756 if (!intel_format_is_depth(params->gpu, info->format))
Chia-I Wu4bc47012014-08-14 13:03:25 +0800757 return false;
758
Chia-I Wu457d0a62014-08-18 13:02:26 +0800759 /*
760 * As can be seen in layout_calculate_hiz_size(), HiZ may not be enabled
761 * for every level. This is generally fine except on GEN6, where HiZ and
762 * separate stencil are enabled and disabled at the same time. When the
763 * format is PIPE_FORMAT_Z32_FLOAT_S8X24_UINT, enabling and disabling HiZ
764 * can result in incompatible formats.
765 */
766 if (intel_gpu_gen(params->gpu) == INTEL_GEN(6) &&
767 info->format.channelFormat == XGL_CH_FMT_R32G8 &&
768 info->mipLevels > 1)
769 return false;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800770
Chia-I Wu457d0a62014-08-18 13:02:26 +0800771 return true;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800772}
773
774static void
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800775layout_init_aux(struct intel_layout *layout,
776 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800777{
778 if (layout_want_hiz(layout, params))
Chia-I Wu457d0a62014-08-18 13:02:26 +0800779 layout->aux = INTEL_LAYOUT_AUX_HIZ;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800780 else if (layout_want_mcs(layout, params))
Chia-I Wu457d0a62014-08-18 13:02:26 +0800781 layout->aux = INTEL_LAYOUT_AUX_MCS;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800782}
783
784static void
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800785layout_align(struct intel_layout *layout, struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800786{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800787 const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800788 int align_w = 1, align_h = 1, pad_h = 0;
789
790 /*
791 * From the Sandy Bridge PRM, volume 1 part 1, page 118:
792 *
793 * "To determine the necessary padding on the bottom and right side of
794 * the surface, refer to the table in Section 7.18.3.4 for the i and j
795 * parameters for the surface format in use. The surface must then be
796 * extended to the next multiple of the alignment unit size in each
797 * dimension, and all texels contained in this extended surface must
798 * have valid GTT entries."
799 *
800 * "For cube surfaces, an additional two rows of padding are required
801 * at the bottom of the surface. This must be ensured regardless of
802 * whether the surface is stored tiled or linear. This is due to the
803 * potential rotation of cache line orientation from memory to cache."
804 *
805 * "For compressed textures (BC* and FXT1 surface formats), padding at
806 * the bottom of the surface is to an even compressed row, which is
807 * equal to a multiple of 8 uncompressed texel rows. Thus, for padding
808 * purposes, these surfaces behave as if j = 8 only for surface
809 * padding purposes. The value of 4 for j still applies for mip level
810 * alignment and QPitch calculation."
811 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800812 if (info->usage & XGL_IMAGE_USAGE_SHADER_ACCESS_READ_BIT) {
813 if (align_w < layout->align_i)
814 align_w = layout->align_i;
815 if (align_h < layout->align_j)
816 align_h = layout->align_j;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800817
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800818 /* in case it is used as a cube */
819 if (info->imageType == XGL_IMAGE_2D)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800820 pad_h += 2;
821
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800822 if (params->compressed && align_h < layout->align_j * 2)
823 align_h = layout->align_j * 2;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800824 }
825
826 /*
827 * From the Sandy Bridge PRM, volume 1 part 1, page 118:
828 *
829 * "If the surface contains an odd number of rows of data, a final row
830 * below the surface must be allocated."
831 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800832 if ((info->usage & XGL_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) && align_h < 2)
833 align_h = 2;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800834
835 /*
836 * Depth Buffer Clear/Resolve works in 8x4 sample blocks. In
Chia-I Wu457d0a62014-08-18 13:02:26 +0800837 * intel_texture_can_enable_hiz(), we always return true for the first slice.
Chia-I Wu4bc47012014-08-14 13:03:25 +0800838 * To avoid out-of-bound access, we have to pad.
839 */
Chia-I Wu457d0a62014-08-18 13:02:26 +0800840 if (layout->aux == INTEL_LAYOUT_AUX_HIZ &&
841 info->mipLevels == 1 &&
842 info->arraySize == 1 &&
843 info->extent.depth == 1) {
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800844 if (align_w < 8)
845 align_w = 8;
846 if (align_h < 4)
847 align_h = 4;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800848 }
849
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800850 params->max_x = u_align(params->max_x, align_w);
851 params->max_y = u_align(params->max_y + pad_h, align_h);
Chia-I Wu4bc47012014-08-14 13:03:25 +0800852}
853
854/* note that this may force the texture to be linear */
855static void
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800856layout_calculate_bo_size(struct intel_layout *layout,
857 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800858{
859 assert(params->max_x % layout->block_width == 0);
860 assert(params->max_y % layout->block_height == 0);
861 assert(layout->layer_height % layout->block_height == 0);
862
863 layout->bo_stride =
864 (params->max_x / layout->block_width) * layout->block_size;
865 layout->bo_height = params->max_y / layout->block_height;
866
867 while (true) {
868 unsigned w = layout->bo_stride, h = layout->bo_height;
869 unsigned align_w, align_h;
870
871 /*
872 * From the Haswell PRM, volume 5, page 163:
873 *
874 * "For linear surfaces, additional padding of 64 bytes is required
875 * at the bottom of the surface. This is in addition to the padding
876 * required above."
877 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800878 if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7.5) &&
879 (params->info->usage & XGL_IMAGE_USAGE_SHADER_ACCESS_READ_BIT) &&
Chia-I Wu4bc47012014-08-14 13:03:25 +0800880 layout->tiling == INTEL_TILING_NONE) {
881 layout->bo_height +=
882 (64 + layout->bo_stride - 1) / layout->bo_stride;
883 }
884
885 /*
886 * From the Sandy Bridge PRM, volume 4 part 1, page 81:
887 *
888 * "- For linear render target surfaces, the pitch must be a
889 * multiple of the element size for non-YUV surface formats.
890 * Pitch must be a multiple of 2 * element size for YUV surface
891 * formats.
892 * - For other linear surfaces, the pitch can be any multiple of
893 * bytes.
894 * - For tiled surfaces, the pitch must be a multiple of the tile
895 * width."
896 *
897 * Different requirements may exist when the bo is used in different
898 * places, but our alignments here should be good enough that we do not
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800899 * need to check layout->info->usage.
Chia-I Wu4bc47012014-08-14 13:03:25 +0800900 */
901 switch (layout->tiling) {
902 case INTEL_TILING_X:
903 align_w = 512;
904 align_h = 8;
905 break;
906 case INTEL_TILING_Y:
907 align_w = 128;
908 align_h = 32;
909 break;
910 default:
Chia-I Wufb240262014-08-16 13:26:06 +0800911 if (intel_format_is_stencil(params->gpu, layout->format)) {
Chia-I Wu4bc47012014-08-14 13:03:25 +0800912 /*
913 * From the Sandy Bridge PRM, volume 1 part 2, page 22:
914 *
915 * "A 4KB tile is subdivided into 8-high by 8-wide array of
916 * Blocks for W-Major Tiles (W Tiles). Each Block is 8 rows by 8
917 * bytes."
918 *
919 * Since we asked for INTEL_TILING_NONE instead of the non-existent
920 * INTEL_TILING_W, we want to align to W tiles here.
921 */
922 align_w = 64;
923 align_h = 64;
924 } else {
925 /* some good enough values */
926 align_w = 64;
927 align_h = 2;
928 }
929 break;
930 }
931
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800932 w = u_align(w, align_w);
933 h = u_align(h, align_h);
Chia-I Wu4bc47012014-08-14 13:03:25 +0800934
935 /* make sure the bo is mappable */
936 if (layout->tiling != INTEL_TILING_NONE) {
937 /*
938 * Usually only the first 256MB of the GTT is mappable.
939 *
940 * See also how intel_context::max_gtt_map_object_size is calculated.
941 */
942 const size_t mappable_gtt_size = 256 * 1024 * 1024;
943
944 /*
945 * Be conservative. We may be able to switch from VALIGN_4 to
946 * VALIGN_2 if the layout was Y-tiled, but let's keep it simple.
947 */
948 if (mappable_gtt_size / w / 4 < h) {
949 if (layout->valid_tilings & LAYOUT_TILING_NONE) {
950 layout->tiling = INTEL_TILING_NONE;
951 /* MCS support for non-MSRTs is limited to tiled RTs */
Chia-I Wu457d0a62014-08-18 13:02:26 +0800952 if (layout->aux == INTEL_LAYOUT_AUX_MCS &&
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800953 params->info->samples <= 1)
Chia-I Wu457d0a62014-08-18 13:02:26 +0800954 layout->aux = INTEL_LAYOUT_AUX_NONE;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800955
956 continue;
957 } else {
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800958 /* mapping will fail */
Chia-I Wu4bc47012014-08-14 13:03:25 +0800959 }
960 }
961 }
962
963 layout->bo_stride = w;
964 layout->bo_height = h;
965 break;
966 }
967}
968
969static void
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800970layout_calculate_hiz_size(struct intel_layout *layout,
971 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800972{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800973 const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu457d0a62014-08-18 13:02:26 +0800974 const unsigned hz_align_j = 8;
975 enum intel_layout_walk_type hz_walk;
976 unsigned hz_width, hz_height, lv;
977 unsigned hz_clear_w, hz_clear_h;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800978
Chia-I Wu457d0a62014-08-18 13:02:26 +0800979 assert(layout->aux == INTEL_LAYOUT_AUX_HIZ);
980
981 assert(layout->walk == INTEL_LAYOUT_WALK_LAYER ||
982 layout->walk == INTEL_LAYOUT_WALK_3D);
983
984 /*
985 * From the Sandy Bridge PRM, volume 2 part 1, page 312:
986 *
987 * "The hierarchical depth buffer does not support the LOD field, it is
988 * assumed by hardware to be zero. A separate hierarachical depth
989 * buffer is required for each LOD used, and the corresponding
990 * buffer's state delivered to hardware each time a new depth buffer
991 * state with modified LOD is delivered."
992 *
993 * We will put all LODs in a single bo with INTEL_LAYOUT_WALK_LOD.
994 */
995 if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7))
996 hz_walk = layout->walk;
997 else
998 hz_walk = INTEL_LAYOUT_WALK_LOD;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800999
1000 /*
1001 * See the Sandy Bridge PRM, volume 2 part 1, page 312, and the Ivy Bridge
1002 * PRM, volume 2 part 1, page 312-313.
1003 *
1004 * It seems HiZ buffer is aligned to 8x8, with every two rows packed into a
1005 * memory row.
1006 */
Chia-I Wu457d0a62014-08-18 13:02:26 +08001007 switch (hz_walk) {
1008 case INTEL_LAYOUT_WALK_LOD:
1009 {
1010 unsigned lod_tx[INTEL_LAYOUT_MAX_LEVELS];
1011 unsigned lod_ty[INTEL_LAYOUT_MAX_LEVELS];
1012 unsigned cur_tx, cur_ty;
Chia-I Wu4bc47012014-08-14 13:03:25 +08001013
Chia-I Wu457d0a62014-08-18 13:02:26 +08001014 /* figure out the tile offsets of LODs */
1015 hz_width = 0;
1016 hz_height = 0;
1017 cur_tx = 0;
1018 cur_ty = 0;
1019 for (lv = 0; lv < info->mipLevels; lv++) {
1020 unsigned tw, th;
Chia-I Wu4bc47012014-08-14 13:03:25 +08001021
Chia-I Wu457d0a62014-08-18 13:02:26 +08001022 lod_tx[lv] = cur_tx;
1023 lod_ty[lv] = cur_ty;
Chia-I Wu4bc47012014-08-14 13:03:25 +08001024
Chia-I Wu457d0a62014-08-18 13:02:26 +08001025 tw = u_align(layout->lods[lv].slice_width, 16);
1026 th = u_align(layout->lods[lv].slice_height, hz_align_j) *
1027 info->arraySize / 2;
1028 /* convert to Y-tiles */
1029 tw = u_align(tw, 128) / 128;
1030 th = u_align(th, 32) / 32;
Chia-I Wu4bc47012014-08-14 13:03:25 +08001031
Chia-I Wu457d0a62014-08-18 13:02:26 +08001032 if (hz_width < cur_tx + tw)
1033 hz_width = cur_tx + tw;
1034 if (hz_height < cur_ty + th)
1035 hz_height = cur_ty + th;
1036
1037 if (lv == 1)
1038 cur_tx += tw;
1039 else
1040 cur_ty += th;
1041 }
1042
1043 /* convert tile offsets to memory offsets */
1044 for (lv = 0; lv < info->mipLevels; lv++) {
1045 layout->aux_offsets[lv] =
1046 (lod_ty[lv] * hz_width + lod_tx[lv]) * 4096;
1047 }
1048 hz_width *= 128;
1049 hz_height *= 32;
Chia-I Wu4bc47012014-08-14 13:03:25 +08001050 }
Chia-I Wu457d0a62014-08-18 13:02:26 +08001051 break;
1052 case INTEL_LAYOUT_WALK_LAYER:
1053 {
1054 const unsigned h0 = u_align(params->h0, hz_align_j);
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001055 const unsigned h1 = u_align(params->h1, hz_align_j);
Chia-I Wu4bc47012014-08-14 13:03:25 +08001056 const unsigned htail =
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001057 ((intel_gpu_gen(params->gpu) >= INTEL_GEN(7)) ? 12 : 11) * hz_align_j;
Chia-I Wu457d0a62014-08-18 13:02:26 +08001058 const unsigned hz_qpitch = h0 + h1 + htail;
Chia-I Wu4bc47012014-08-14 13:03:25 +08001059
Chia-I Wu457d0a62014-08-18 13:02:26 +08001060 hz_width = u_align(layout->lods[0].slice_width, 16);
1061
1062 hz_height = hz_qpitch * info->arraySize / 2;
1063 if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7))
1064 hz_height = u_align(hz_height, 8);
Chia-I Wu4bc47012014-08-14 13:03:25 +08001065 }
Chia-I Wu457d0a62014-08-18 13:02:26 +08001066 break;
1067 case INTEL_LAYOUT_WALK_3D:
1068 hz_width = u_align(layout->lods[0].slice_width, 16);
Chia-I Wu4bc47012014-08-14 13:03:25 +08001069
Chia-I Wu457d0a62014-08-18 13:02:26 +08001070 hz_height = 0;
1071 for (lv = 0; lv < info->mipLevels; lv++) {
1072 const unsigned h = u_align(layout->lods[lv].slice_height, hz_align_j);
1073 /* according to the formula, slices are packed together vertically */
1074 hz_height += h * u_minify(info->extent.depth, lv);
1075 }
1076 hz_height /= 2;
1077 break;
Chia-I Wu4bc47012014-08-14 13:03:25 +08001078 }
1079
Chia-I Wu457d0a62014-08-18 13:02:26 +08001080 /*
1081 * In hiz_align_fb(), we will align the LODs to 8x4 sample blocks.
1082 * Experiments on Haswell show that aligning the RECTLIST primitive and
1083 * 3DSTATE_DRAWING_RECTANGLE alone are not enough. The LOD sizes must be
1084 * aligned.
1085 */
1086 hz_clear_w = 8;
1087 hz_clear_h = 4;
1088 switch (info->samples) {
1089 case 0:
1090 case 1:
1091 default:
1092 break;
1093 case 2:
1094 hz_clear_w /= 2;
1095 break;
1096 case 4:
1097 hz_clear_w /= 2;
1098 hz_clear_h /= 2;
1099 break;
1100 case 8:
1101 hz_clear_w /= 4;
1102 hz_clear_h /= 2;
1103 break;
1104 case 16:
1105 hz_clear_w /= 4;
1106 hz_clear_h /= 4;
1107 break;
1108 }
1109
1110 for (lv = 0; lv < info->mipLevels; lv++) {
1111 if (u_minify(layout->width0, lv) % hz_clear_w ||
1112 u_minify(layout->height0, lv) % hz_clear_h)
1113 break;
1114 layout->aux_enables |= 1 << lv;
1115 }
1116
1117 /* we padded to allow this in layout_align() */
1118 if (info->mipLevels == 1 && info->arraySize == 1 && info->extent.depth == 1)
1119 layout->aux_enables |= 0x1;
1120
Chia-I Wu4bc47012014-08-14 13:03:25 +08001121 /* align to Y-tile */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001122 layout->aux_stride = u_align(hz_width, 128);
1123 layout->aux_height = u_align(hz_height, 32);
Chia-I Wu4bc47012014-08-14 13:03:25 +08001124}
1125
1126static void
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001127layout_calculate_mcs_size(struct intel_layout *layout,
1128 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +08001129{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001130 const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu4bc47012014-08-14 13:03:25 +08001131 int mcs_width, mcs_height, mcs_cpp;
1132 int downscale_x, downscale_y;
1133
Chia-I Wu457d0a62014-08-18 13:02:26 +08001134 assert(layout->aux == INTEL_LAYOUT_AUX_MCS);
Chia-I Wu4bc47012014-08-14 13:03:25 +08001135
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001136 if (info->samples > 1) {
Chia-I Wu4bc47012014-08-14 13:03:25 +08001137 /*
1138 * From the Ivy Bridge PRM, volume 2 part 1, page 326, the clear
1139 * rectangle is scaled down by 8x2 for 4X MSAA and 2x2 for 8X MSAA. The
1140 * need of scale down could be that the clear rectangle is used to clear
1141 * the MCS instead of the RT.
1142 *
1143 * For 8X MSAA, we need 32 bits in MCS for every pixel in the RT. The
1144 * 2x2 factor could come from that the hardware writes 128 bits (an
1145 * OWord) at a time, and the OWord in MCS maps to a 2x2 pixel block in
1146 * the RT. For 4X MSAA, we need 8 bits in MCS for every pixel in the
1147 * RT. Similarly, we could reason that an OWord in 4X MCS maps to a 8x2
1148 * pixel block in the RT.
1149 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001150 switch (info->samples) {
Chia-I Wu4bc47012014-08-14 13:03:25 +08001151 case 2:
1152 case 4:
1153 downscale_x = 8;
1154 downscale_y = 2;
1155 mcs_cpp = 1;
1156 break;
1157 case 8:
1158 downscale_x = 2;
1159 downscale_y = 2;
1160 mcs_cpp = 4;
1161 break;
1162 case 16:
1163 downscale_x = 2;
1164 downscale_y = 1;
1165 mcs_cpp = 8;
1166 break;
1167 default:
1168 assert(!"unsupported sample count");
1169 return;
1170 break;
1171 }
1172
1173 /*
1174 * It also appears that the 2x2 subspans generated by the scaled-down
1175 * clear rectangle cannot be masked. The scale-down clear rectangle
1176 * thus must be aligned to 2x2, and we need to pad.
1177 */
Chia-I Wu457d0a62014-08-18 13:02:26 +08001178 mcs_width = u_align(layout->width0, downscale_x * 2);
1179 mcs_height = u_align(layout->height0, downscale_y * 2);
1180 } else {
Chia-I Wu4bc47012014-08-14 13:03:25 +08001181 /*
1182 * From the Ivy Bridge PRM, volume 2 part 1, page 327:
1183 *
1184 * " Pixels Lines
1185 * TiledY RT CL
1186 * bpp
1187 * 32 8 4
1188 * 64 4 4
1189 * 128 2 4
1190 *
1191 * TiledX RT CL
1192 * bpp
1193 * 32 16 2
1194 * 64 8 2
1195 * 128 4 2"
1196 *
1197 * This table and the two following tables define the RT alignments, the
1198 * clear rectangle alignments, and the clear rectangle scale factors.
1199 * Viewing the RT alignments as the sizes of 128-byte blocks, we can see
1200 * that the clear rectangle alignments are 16x32 blocks, and the clear
1201 * rectangle scale factors are 8x16 blocks.
1202 *
1203 * For non-MSAA RT, we need 1 bit in MCS for every 128-byte block in the
1204 * RT. Similar to the MSAA cases, we can argue that an OWord maps to
1205 * 8x16 blocks.
1206 *
1207 * One problem with this reasoning is that a Y-tile in MCS has 8x32
1208 * OWords and maps to 64x512 128-byte blocks. This differs from i965,
1209 * which says that a Y-tile maps to 128x256 blocks (\see
1210 * intel_get_non_msrt_mcs_alignment). It does not really change
1211 * anything except for the size of the allocated MCS. Let's see if we
1212 * hit out-of-bound access.
1213 */
1214 switch (layout->tiling) {
1215 case INTEL_TILING_X:
1216 downscale_x = 64 / layout->block_size;
1217 downscale_y = 2;
1218 break;
1219 case INTEL_TILING_Y:
1220 downscale_x = 32 / layout->block_size;
1221 downscale_y = 4;
1222 break;
1223 default:
1224 assert(!"unsupported tiling mode");
1225 return;
1226 break;
1227 }
1228
1229 downscale_x *= 8;
1230 downscale_y *= 16;
1231
1232 /*
1233 * From the Haswell PRM, volume 7, page 652:
1234 *
1235 * "Clear rectangle must be aligned to two times the number of
1236 * pixels in the table shown below due to 16X16 hashing across the
1237 * slice."
1238 *
1239 * The scaled-down clear rectangle must be aligned to 4x4 instead of
1240 * 2x2, and we need to pad.
1241 */
Chia-I Wu457d0a62014-08-18 13:02:26 +08001242 mcs_width = u_align(layout->width0, downscale_x * 4) / downscale_x;
1243 mcs_height = u_align(layout->height0, downscale_y * 4) / downscale_y;
Chia-I Wu4bc47012014-08-14 13:03:25 +08001244 mcs_cpp = 16; /* an OWord */
1245 }
1246
Chia-I Wu457d0a62014-08-18 13:02:26 +08001247 layout->aux_enables = (1 << info->mipLevels) - 1;
Chia-I Wu4bc47012014-08-14 13:03:25 +08001248 /* align to Y-tile */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001249 layout->aux_stride = u_align(mcs_width * mcs_cpp, 128);
1250 layout->aux_height = u_align(mcs_height, 32);
Chia-I Wu4bc47012014-08-14 13:03:25 +08001251}
1252
1253/**
1254 * Initialize the layout. Callers should zero-initialize \p layout first.
1255 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001256void intel_layout_init(struct intel_layout *layout,
1257 const struct intel_dev *dev,
1258 const XGL_IMAGE_CREATE_INFO *info)
Chia-I Wu4bc47012014-08-14 13:03:25 +08001259{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001260 struct intel_layout_params params;
Chia-I Wu4bc47012014-08-14 13:03:25 +08001261
1262 memset(&params, 0, sizeof(params));
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001263 params.gpu = dev->gpu;
1264 params.info = info;
Chia-I Wu4bc47012014-08-14 13:03:25 +08001265
1266 /* note that there are dependencies between these functions */
1267 layout_init_aux(layout, &params);
Chia-I Wu457d0a62014-08-18 13:02:26 +08001268 layout_init_size_and_format(layout, &params);
1269 layout_init_walk(layout, &params);
Chia-I Wu4bc47012014-08-14 13:03:25 +08001270 layout_init_tiling(layout, &params);
1271 layout_init_alignments(layout, &params);
Chia-I Wu457d0a62014-08-18 13:02:26 +08001272 layout_init_lods(layout, &params);
Chia-I Wu4bc47012014-08-14 13:03:25 +08001273 layout_init_layer_height(layout, &params);
1274
1275 layout_align(layout, &params);
1276 layout_calculate_bo_size(layout, &params);
1277
Chia-I Wu457d0a62014-08-18 13:02:26 +08001278 switch (layout->aux) {
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001279 case INTEL_LAYOUT_AUX_HIZ:
Chia-I Wu4bc47012014-08-14 13:03:25 +08001280 layout_calculate_hiz_size(layout, &params);
1281 break;
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001282 case INTEL_LAYOUT_AUX_MCS:
Chia-I Wu4bc47012014-08-14 13:03:25 +08001283 layout_calculate_mcs_size(layout, &params);
1284 break;
1285 default:
1286 break;
1287 }
1288}
1289
1290/**
1291 * Update the tiling mode and bo stride (for imported resources).
1292 */
1293bool
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001294intel_layout_update_for_imported_bo(struct intel_layout *layout,
Chia-I Wu457d0a62014-08-18 13:02:26 +08001295 enum intel_tiling_mode tiling,
1296 unsigned bo_stride)
Chia-I Wu4bc47012014-08-14 13:03:25 +08001297{
1298 if (!(layout->valid_tilings & (1 << tiling)))
1299 return false;
1300
1301 if ((tiling == INTEL_TILING_X && bo_stride % 512) ||
1302 (tiling == INTEL_TILING_Y && bo_stride % 128))
1303 return false;
1304
1305 layout->tiling = tiling;
1306 layout->bo_stride = bo_stride;
1307
1308 return true;
1309}