blob: c2f76161447442905112d3d8c4f5440cc6c1033e [file] [log] [blame]
Chia-I Wu4bc47012014-08-14 13:03:25 +08001/*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 2014 LunarG, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Chia-I Wu <olv@lunarg.com>
26 */
27
28#include "ilo_layout.h"
29
30enum {
31 LAYOUT_TILING_NONE = 1 << INTEL_TILING_NONE,
32 LAYOUT_TILING_X = 1 << INTEL_TILING_X,
33 LAYOUT_TILING_Y = 1 << INTEL_TILING_Y,
34 LAYOUT_TILING_W = 1 << (INTEL_TILING_Y + 1),
35
36 LAYOUT_TILING_ALL = (LAYOUT_TILING_NONE |
37 LAYOUT_TILING_X |
38 LAYOUT_TILING_Y |
39 LAYOUT_TILING_W)
40};
41
42struct ilo_layout_params {
43 const struct ilo_dev_info *dev;
44 const struct pipe_resource *templ;
45
46 bool compressed;
47
48 unsigned h0, h1;
49 unsigned max_x, max_y;
50};
51
52static void
53layout_get_slice_size(const struct ilo_layout *layout,
54 const struct ilo_layout_params *params,
55 unsigned level, unsigned *width, unsigned *height)
56{
57 const struct pipe_resource *templ = params->templ;
58 unsigned w, h;
59
60 w = u_minify(templ->width0, level);
61 h = u_minify(templ->height0, level);
62
63 /*
64 * From the Sandy Bridge PRM, volume 1 part 1, page 114:
65 *
66 * "The dimensions of the mip maps are first determined by applying the
67 * sizing algorithm presented in Non-Power-of-Two Mipmaps above. Then,
68 * if necessary, they are padded out to compression block boundaries."
69 */
70 w = align(w, layout->block_width);
71 h = align(h, layout->block_height);
72
73 /*
74 * From the Sandy Bridge PRM, volume 1 part 1, page 111:
75 *
76 * "If the surface is multisampled (4x), these values must be adjusted
77 * as follows before proceeding:
78 *
79 * W_L = ceiling(W_L / 2) * 4
80 * H_L = ceiling(H_L / 2) * 4"
81 *
82 * From the Ivy Bridge PRM, volume 1 part 1, page 108:
83 *
84 * "If the surface is multisampled and it is a depth or stencil surface
85 * or Multisampled Surface StorageFormat in SURFACE_STATE is
86 * MSFMT_DEPTH_STENCIL, W_L and H_L must be adjusted as follows before
87 * proceeding:
88 *
89 * #samples W_L = H_L =
90 * 2 ceiling(W_L / 2) * 4 HL [no adjustment]
91 * 4 ceiling(W_L / 2) * 4 ceiling(H_L / 2) * 4
92 * 8 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 4
93 * 16 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 8"
94 *
95 * For interleaved samples (4x), where pixels
96 *
97 * (x, y ) (x+1, y )
98 * (x, y+1) (x+1, y+1)
99 *
100 * would be is occupied by
101 *
102 * (x, y , si0) (x+1, y , si0) (x, y , si1) (x+1, y , si1)
103 * (x, y+1, si0) (x+1, y+1, si0) (x, y+1, si1) (x+1, y+1, si1)
104 * (x, y , si2) (x+1, y , si2) (x, y , si3) (x+1, y , si3)
105 * (x, y+1, si2) (x+1, y+1, si2) (x, y+1, si3) (x+1, y+1, si3)
106 *
107 * Thus the need to
108 *
109 * w = align(w, 2) * 2;
110 * y = align(y, 2) * 2;
111 */
112 if (layout->interleaved_samples) {
113 switch (templ->nr_samples) {
114 case 0:
115 case 1:
116 break;
117 case 2:
118 w = align(w, 2) * 2;
119 break;
120 case 4:
121 w = align(w, 2) * 2;
122 h = align(h, 2) * 2;
123 break;
124 case 8:
125 w = align(w, 2) * 4;
126 h = align(h, 2) * 2;
127 break;
128 case 16:
129 w = align(w, 2) * 4;
130 h = align(h, 2) * 4;
131 break;
132 default:
133 assert(!"unsupported sample count");
134 break;
135 }
136 }
137
138 w = align(w, layout->align_i);
139 h = align(h, layout->align_j);
140
141 *width = w;
142 *height = h;
143}
144
145static unsigned
146layout_get_num_layers(const struct ilo_layout *layout,
147 const struct ilo_layout_params *params)
148{
149 const struct pipe_resource *templ = params->templ;
150 unsigned num_layers = templ->array_size;
151
152 /* samples of the same index are stored in a layer */
153 if (templ->nr_samples > 1 && !layout->interleaved_samples)
154 num_layers *= templ->nr_samples;
155
156 return num_layers;
157}
158
159static void
160layout_init_layer_height(struct ilo_layout *layout,
161 struct ilo_layout_params *params)
162{
163 const struct pipe_resource *templ = params->templ;
164 unsigned num_layers;
165
166 num_layers = layout_get_num_layers(layout, params);
167 if (num_layers <= 1)
168 return;
169
170 if (!layout->full_layers) {
171 layout->layer_height = params->h0;
172 params->max_y += params->h0 * (num_layers - 1);
173 return;
174 }
175
176 /*
177 * From the Sandy Bridge PRM, volume 1 part 1, page 115:
178 *
179 * "The following equation is used for surface formats other than
180 * compressed textures:
181 *
182 * QPitch = (h0 + h1 + 11j)"
183 *
184 * "The equation for compressed textures (BC* and FXT1 surface formats)
185 * follows:
186 *
187 * QPitch = (h0 + h1 + 11j) / 4"
188 *
189 * "[DevSNB] Errata: Sampler MSAA Qpitch will be 4 greater than the
190 * value calculated in the equation above, for every other odd Surface
191 * Height starting from 1 i.e. 1,5,9,13"
192 *
193 * From the Ivy Bridge PRM, volume 1 part 1, page 111-112:
194 *
195 * "If Surface Array Spacing is set to ARYSPC_FULL (note that the depth
196 * buffer and stencil buffer have an implied value of ARYSPC_FULL):
197 *
198 * QPitch = (h0 + h1 + 12j)
199 * QPitch = (h0 + h1 + 12j) / 4 (compressed)
200 *
201 * (There are many typos or missing words here...)"
202 *
203 * To access the N-th slice, an offset of (Stride * QPitch * N) is added to
204 * the base address. The PRM divides QPitch by 4 for compressed formats
205 * because the block height for those formats are 4, and it wants QPitch to
206 * mean the number of memory rows, as opposed to texel rows, between
207 * slices. Since we use texel rows everywhere, we do not need to divide
208 * QPitch by 4.
209 */
210 layout->layer_height = params->h0 + params->h1 +
211 ((params->dev->gen >= ILO_GEN(7)) ? 12 : 11) * layout->align_j;
212
213 if (params->dev->gen == ILO_GEN(6) && templ->nr_samples > 1 &&
214 templ->height0 % 4 == 1)
215 layout->layer_height += 4;
216
217 params->max_y += layout->layer_height * (num_layers - 1);
218}
219
220static void
221layout_init_levels(struct ilo_layout *layout,
222 struct ilo_layout_params *params)
223{
224 const struct pipe_resource *templ = params->templ;
225 unsigned cur_x, cur_y;
226 unsigned lv;
227
228 cur_x = 0;
229 cur_y = 0;
230 for (lv = 0; lv <= templ->last_level; lv++) {
231 unsigned level_w, level_h;
232
233 layout_get_slice_size(layout, params, lv, &level_w, &level_h);
234
235 layout->levels[lv].x = cur_x;
236 layout->levels[lv].y = cur_y;
237 layout->levels[lv].slice_width = level_w;
238 layout->levels[lv].slice_height = level_h;
239
240 if (templ->target == PIPE_TEXTURE_3D) {
241 const unsigned num_slices = u_minify(templ->depth0, lv);
242 const unsigned num_slices_per_row = 1 << lv;
243 const unsigned num_rows =
244 (num_slices + num_slices_per_row - 1) / num_slices_per_row;
245
246 level_w *= num_slices_per_row;
247 level_h *= num_rows;
248
249 cur_y += level_h;
250 } else {
251 /* MIPLAYOUT_BELOW */
252 if (lv == 1)
253 cur_x += level_w;
254 else
255 cur_y += level_h;
256 }
257
258 if (params->max_x < layout->levels[lv].x + level_w)
259 params->max_x = layout->levels[lv].x + level_w;
260 if (params->max_y < layout->levels[lv].y + level_h)
261 params->max_y = layout->levels[lv].y + level_h;
262 }
263
264 params->h0 = layout->levels[0].slice_height;
265 if (layout->full_layers) {
266 if (templ->last_level > 0)
267 params->h1 = layout->levels[1].slice_height;
268 else
269 layout_get_slice_size(layout, params, 1, &cur_x, &params->h1);
270 }
271}
272
273static void
274layout_init_alignments(struct ilo_layout *layout,
275 struct ilo_layout_params *params)
276{
277 const struct pipe_resource *templ = params->templ;
278
279 /*
280 * From the Sandy Bridge PRM, volume 1 part 1, page 113:
281 *
282 * "surface format align_i align_j
283 * YUV 4:2:2 formats 4 *see below
284 * BC1-5 4 4
285 * FXT1 8 4
286 * all other formats 4 *see below"
287 *
288 * "- align_j = 4 for any depth buffer
289 * - align_j = 2 for separate stencil buffer
290 * - align_j = 4 for any render target surface is multisampled (4x)
291 * - align_j = 4 for any render target surface with Surface Vertical
292 * Alignment = VALIGN_4
293 * - align_j = 2 for any render target surface with Surface Vertical
294 * Alignment = VALIGN_2
295 * - align_j = 2 for all other render target surface
296 * - align_j = 2 for any sampling engine surface with Surface Vertical
297 * Alignment = VALIGN_2
298 * - align_j = 4 for any sampling engine surface with Surface Vertical
299 * Alignment = VALIGN_4"
300 *
301 * From the Sandy Bridge PRM, volume 4 part 1, page 86:
302 *
303 * "This field (Surface Vertical Alignment) must be set to VALIGN_2 if
304 * the Surface Format is 96 bits per element (BPE)."
305 *
306 * They can be rephrased as
307 *
308 * align_i align_j
309 * compressed formats block width block height
310 * PIPE_FORMAT_S8_UINT 4 2
311 * other depth/stencil formats 4 4
312 * 4x multisampled 4 4
313 * bpp 96 4 2
314 * others 4 2 or 4
315 */
316
317 /*
318 * From the Ivy Bridge PRM, volume 1 part 1, page 110:
319 *
320 * "surface defined by surface format align_i align_j
321 * 3DSTATE_DEPTH_BUFFER D16_UNORM 8 4
322 * not D16_UNORM 4 4
323 * 3DSTATE_STENCIL_BUFFER N/A 8 8
324 * SURFACE_STATE BC*, ETC*, EAC* 4 4
325 * FXT1 8 4
326 * all others (set by SURFACE_STATE)"
327 *
328 * From the Ivy Bridge PRM, volume 4 part 1, page 63:
329 *
330 * "- This field (Surface Vertical Aligment) is intended to be set to
331 * VALIGN_4 if the surface was rendered as a depth buffer, for a
332 * multisampled (4x) render target, or for a multisampled (8x)
333 * render target, since these surfaces support only alignment of 4.
334 * - Use of VALIGN_4 for other surfaces is supported, but uses more
335 * memory.
336 * - This field must be set to VALIGN_4 for all tiled Y Render Target
337 * surfaces.
338 * - Value of 1 is not supported for format YCRCB_NORMAL (0x182),
339 * YCRCB_SWAPUVY (0x183), YCRCB_SWAPUV (0x18f), YCRCB_SWAPY (0x190)
340 * - If Number of Multisamples is not MULTISAMPLECOUNT_1, this field
341 * must be set to VALIGN_4."
342 * - VALIGN_4 is not supported for surface format R32G32B32_FLOAT."
343 *
344 * "- This field (Surface Horizontal Aligment) is intended to be set to
345 * HALIGN_8 only if the surface was rendered as a depth buffer with
346 * Z16 format or a stencil buffer, since these surfaces support only
347 * alignment of 8.
348 * - Use of HALIGN_8 for other surfaces is supported, but uses more
349 * memory.
350 * - This field must be set to HALIGN_4 if the Surface Format is BC*.
351 * - This field must be set to HALIGN_8 if the Surface Format is
352 * FXT1."
353 *
354 * They can be rephrased as
355 *
356 * align_i align_j
357 * compressed formats block width block height
358 * PIPE_FORMAT_Z16_UNORM 8 4
359 * PIPE_FORMAT_S8_UINT 8 8
360 * other depth/stencil formats 4 or 8 4
361 * 2x or 4x multisampled 4 or 8 4
362 * tiled Y 4 or 8 4 (if rt)
363 * PIPE_FORMAT_R32G32B32_FLOAT 4 or 8 2
364 * others 4 or 8 2 or 4
365 */
366
367 if (params->compressed) {
368 /* this happens to be the case */
369 layout->align_i = layout->block_width;
370 layout->align_j = layout->block_height;
371 } else if (templ->bind & PIPE_BIND_DEPTH_STENCIL) {
372 if (params->dev->gen >= ILO_GEN(7)) {
373 switch (layout->format) {
374 case PIPE_FORMAT_Z16_UNORM:
375 layout->align_i = 8;
376 layout->align_j = 4;
377 break;
378 case PIPE_FORMAT_S8_UINT:
379 layout->align_i = 8;
380 layout->align_j = 8;
381 break;
382 default:
383 layout->align_i = 4;
384 layout->align_j = 4;
385 break;
386 }
387 } else {
388 switch (layout->format) {
389 case PIPE_FORMAT_S8_UINT:
390 layout->align_i = 4;
391 layout->align_j = 2;
392 break;
393 default:
394 layout->align_i = 4;
395 layout->align_j = 4;
396 break;
397 }
398 }
399 } else {
400 const bool valign_4 = (templ->nr_samples > 1) ||
401 (params->dev->gen >= ILO_GEN(7) &&
402 layout->tiling == INTEL_TILING_Y &&
403 (templ->bind & PIPE_BIND_RENDER_TARGET));
404
405 if (valign_4)
406 assert(layout->block_size != 12);
407
408 layout->align_i = 4;
409 layout->align_j = (valign_4) ? 4 : 2;
410 }
411
412 /*
413 * the fact that align i and j are multiples of block width and height
414 * respectively is what makes the size of the bo a multiple of the block
415 * size, slices start at block boundaries, and many of the computations
416 * work.
417 */
418 assert(layout->align_i % layout->block_width == 0);
419 assert(layout->align_j % layout->block_height == 0);
420
421 /* make sure align() works */
422 assert(util_is_power_of_two(layout->align_i) &&
423 util_is_power_of_two(layout->align_j));
424 assert(util_is_power_of_two(layout->block_width) &&
425 util_is_power_of_two(layout->block_height));
426}
427
428static unsigned
429layout_get_valid_tilings(const struct ilo_layout *layout,
430 const struct ilo_layout_params *params)
431{
432 const struct pipe_resource *templ = params->templ;
433 const enum pipe_format format = layout->format;
434 unsigned valid_tilings = LAYOUT_TILING_ALL;
435
436 /*
437 * From the Sandy Bridge PRM, volume 1 part 2, page 32:
438 *
439 * "Display/Overlay Y-Major not supported.
440 * X-Major required for Async Flips"
441 */
442 if (unlikely(templ->bind & PIPE_BIND_SCANOUT))
443 valid_tilings &= LAYOUT_TILING_X;
444
445 /*
446 * From the Sandy Bridge PRM, volume 3 part 2, page 158:
447 *
448 * "The cursor surface address must be 4K byte aligned. The cursor must
449 * be in linear memory, it cannot be tiled."
450 */
451 if (unlikely(templ->bind & (PIPE_BIND_CURSOR | PIPE_BIND_LINEAR)))
452 valid_tilings &= LAYOUT_TILING_NONE;
453
454 /*
455 * From the Sandy Bridge PRM, volume 2 part 1, page 318:
456 *
457 * "[DevSNB+]: This field (Tiled Surface) must be set to TRUE. Linear
458 * Depth Buffer is not supported."
459 *
460 * "The Depth Buffer, if tiled, must use Y-Major tiling."
461 *
462 * From the Sandy Bridge PRM, volume 1 part 2, page 22:
463 *
464 * "W-Major Tile Format is used for separate stencil."
465 */
466 if (templ->bind & PIPE_BIND_DEPTH_STENCIL) {
467 switch (format) {
468 case PIPE_FORMAT_S8_UINT:
469 valid_tilings &= LAYOUT_TILING_W;
470 break;
471 default:
472 valid_tilings &= LAYOUT_TILING_Y;
473 break;
474 }
475 }
476
477 if (templ->bind & PIPE_BIND_RENDER_TARGET) {
478 /*
479 * From the Sandy Bridge PRM, volume 1 part 2, page 32:
480 *
481 * "NOTE: 128BPE Format Color buffer ( render target ) MUST be
482 * either TileX or Linear."
483 */
484 if (layout->block_size == 16)
485 valid_tilings &= ~LAYOUT_TILING_Y;
486
487 /*
488 * From the Ivy Bridge PRM, volume 4 part 1, page 63:
489 *
490 * "This field (Surface Vertical Aligment) must be set to VALIGN_4
491 * for all tiled Y Render Target surfaces."
492 *
493 * "VALIGN_4 is not supported for surface format R32G32B32_FLOAT."
494 */
495 if (params->dev->gen >= ILO_GEN(7) && layout->block_size == 12)
496 valid_tilings &= ~LAYOUT_TILING_Y;
497 }
498
499 /* no conflicting binding flags */
500 assert(valid_tilings);
501
502 return valid_tilings;
503}
504
505static void
506layout_init_tiling(struct ilo_layout *layout,
507 struct ilo_layout_params *params)
508{
509 const struct pipe_resource *templ = params->templ;
510 unsigned valid_tilings = layout_get_valid_tilings(layout, params);
511
512 layout->valid_tilings = valid_tilings;
513
514 if (templ->bind & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW)) {
515 /*
516 * heuristically set a minimum width/height for enabling tiling
517 */
518 if (templ->width0 < 64 && (valid_tilings & ~LAYOUT_TILING_X))
519 valid_tilings &= ~LAYOUT_TILING_X;
520
521 if ((templ->width0 < 32 || templ->height0 < 16) &&
522 (templ->width0 < 16 || templ->height0 < 32) &&
523 (valid_tilings & ~LAYOUT_TILING_Y))
524 valid_tilings &= ~LAYOUT_TILING_Y;
525 } else {
526 /* force linear if we are not sure where the texture is bound to */
527 if (valid_tilings & LAYOUT_TILING_NONE)
528 valid_tilings &= LAYOUT_TILING_NONE;
529 }
530
531 /* prefer tiled over linear */
532 if (valid_tilings & LAYOUT_TILING_Y)
533 layout->tiling = INTEL_TILING_Y;
534 else if (valid_tilings & LAYOUT_TILING_X)
535 layout->tiling = INTEL_TILING_X;
536 else /* linear or W-tiled, which has no hardware support */
537 layout->tiling = INTEL_TILING_NONE;
538}
539
540static void
541layout_init_arrangements_gen7(struct ilo_layout *layout,
542 struct ilo_layout_params *params)
543{
544 const struct pipe_resource *templ = params->templ;
545
546 /*
547 * It is not explicitly states, but render targets are expected to be
548 * UMS/CMS (samples non-interleaved) and depth/stencil buffers are expected
549 * to be IMS (samples interleaved).
550 *
551 * See "Multisampled Surface Storage Format" field of SURFACE_STATE.
552 */
553 if (templ->bind & PIPE_BIND_DEPTH_STENCIL) {
554 layout->interleaved_samples = true;
555
556 /*
557 * From the Ivy Bridge PRM, volume 1 part 1, page 111:
558 *
559 * "note that the depth buffer and stencil buffer have an implied
560 * value of ARYSPC_FULL"
561 */
562 layout->full_layers = true;
563 } else {
564 layout->interleaved_samples = false;
565
566 /*
567 * From the Ivy Bridge PRM, volume 4 part 1, page 66:
568 *
569 * "If Multisampled Surface Storage Format is MSFMT_MSS and Number
570 * of Multisamples is not MULTISAMPLECOUNT_1, this field (Surface
571 * Array Spacing) must be set to ARYSPC_LOD0."
572 *
573 * As multisampled resources are not mipmapped, we never use
574 * ARYSPC_FULL for them.
575 */
576 if (templ->nr_samples > 1)
577 assert(templ->last_level == 0);
578 layout->full_layers = (templ->last_level > 0);
579 }
580}
581
582static void
583layout_init_arrangements_gen6(struct ilo_layout *layout,
584 struct ilo_layout_params *params)
585{
586 /* GEN6 supports only interleaved samples */
587 layout->interleaved_samples = true;
588
589 /*
590 * From the Sandy Bridge PRM, volume 1 part 1, page 115:
591 *
592 * "The separate stencil buffer does not support mip mapping, thus the
593 * storage for LODs other than LOD 0 is not needed. The following
594 * QPitch equation applies only to the separate stencil buffer:
595 *
596 * QPitch = h_0"
597 *
598 * GEN6 does not support compact spacing otherwise.
599 */
600 layout->full_layers = (layout->format != PIPE_FORMAT_S8_UINT);
601}
602
603static void
604layout_init_arrangements(struct ilo_layout *layout,
605 struct ilo_layout_params *params)
606{
607 if (params->dev->gen >= ILO_GEN(7))
608 layout_init_arrangements_gen7(layout, params);
609 else
610 layout_init_arrangements_gen6(layout, params);
611
612 layout->is_2d = (params->templ->target != PIPE_TEXTURE_3D);
613}
614
615static void
616layout_init_format(struct ilo_layout *layout,
617 struct ilo_layout_params *params)
618{
619 const struct pipe_resource *templ = params->templ;
620 enum pipe_format format = templ->format;
621 bool require_separate_stencil;
622
623 /*
624 * From the Sandy Bridge PRM, volume 2 part 1, page 317:
625 *
626 * "This field (Separate Stencil Buffer Enable) must be set to the same
627 * value (enabled or disabled) as Hierarchical Depth Buffer Enable."
628 *
629 * GEN7+ requires separate stencil buffers.
630 */
631 if (templ->bind & PIPE_BIND_DEPTH_STENCIL) {
632 if (params->dev->gen >= ILO_GEN(7))
633 require_separate_stencil = true;
634 else
635 require_separate_stencil = (layout->aux_type == ILO_LAYOUT_AUX_HIZ);
636 }
637
638 switch (format) {
639 case PIPE_FORMAT_ETC1_RGB8:
640 format = PIPE_FORMAT_R8G8B8X8_UNORM;
641 break;
642 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
643 if (require_separate_stencil) {
644 format = PIPE_FORMAT_Z24X8_UNORM;
645 layout->separate_stencil = true;
646 }
647 break;
648 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
649 if (require_separate_stencil) {
650 format = PIPE_FORMAT_Z32_FLOAT;
651 layout->separate_stencil = true;
652 }
653 break;
654 default:
655 break;
656 }
657
658 params->compressed = util_format_is_compressed(format);
659
660 layout->format = format;
661 layout->block_width = util_format_get_blockwidth(format);
662 layout->block_height = util_format_get_blockheight(format);
663 layout->block_size = util_format_get_blocksize(format);
664}
665
666static bool
667layout_want_mcs(struct ilo_layout *layout,
668 struct ilo_layout_params *params)
669{
670 const struct pipe_resource *templ = params->templ;
671 bool want_mcs = false;
672
673 /* MCS is for RT on GEN7+ */
674 if (params->dev->gen < ILO_GEN(7))
675 return false;
676
677 if (templ->target != PIPE_TEXTURE_2D ||
678 !(templ->bind & PIPE_BIND_RENDER_TARGET))
679 return false;
680
681 /*
682 * From the Ivy Bridge PRM, volume 4 part 1, page 77:
683 *
684 * "For Render Target and Sampling Engine Surfaces:If the surface is
685 * multisampled (Number of Multisamples any value other than
686 * MULTISAMPLECOUNT_1), this field (MCS Enable) must be enabled."
687 *
688 * "This field must be set to 0 for all SINT MSRTs when all RT channels
689 * are not written"
690 */
691 if (templ->nr_samples > 1 && !layout->interleaved_samples &&
692 !util_format_is_pure_sint(templ->format)) {
693 want_mcs = true;
694 } else if (templ->nr_samples <= 1) {
695 /*
696 * From the Ivy Bridge PRM, volume 2 part 1, page 326:
697 *
698 * "When MCS is buffer is used for color clear of non-multisampler
699 * render target, the following restrictions apply.
700 * - Support is limited to tiled render targets.
701 * - Support is for non-mip-mapped and non-array surface types
702 * only.
703 * - Clear is supported only on the full RT; i.e., no partial clear
704 * or overlapping clears.
705 * - MCS buffer for non-MSRT is supported only for RT formats
706 * 32bpp, 64bpp and 128bpp.
707 * ..."
708 */
709 if (layout->tiling != INTEL_TILING_NONE &&
710 templ->last_level == 0 && templ->array_size == 1) {
711 switch (layout->block_size) {
712 case 4:
713 case 8:
714 case 16:
715 want_mcs = true;
716 break;
717 default:
718 break;
719 }
720 }
721 }
722
723 return want_mcs;
724}
725
726static bool
727layout_want_hiz(const struct ilo_layout *layout,
728 const struct ilo_layout_params *params)
729{
730 const struct pipe_resource *templ = params->templ;
731 const struct util_format_description *desc =
732 util_format_description(templ->format);
733 bool want_hiz = false;
734
735 if (ilo_debug & ILO_DEBUG_NOHIZ)
736 return false;
737
738 if (!(templ->bind & PIPE_BIND_DEPTH_STENCIL))
739 return false;
740
741 if (!util_format_has_depth(desc))
742 return false;
743
744 /* no point in having HiZ */
745 if (templ->usage == PIPE_USAGE_STAGING)
746 return false;
747
748 if (params->dev->gen >= ILO_GEN(7)) {
749 want_hiz = true;
750 } else {
751 /*
752 * From the Sandy Bridge PRM, volume 2 part 1, page 312:
753 *
754 * "The hierarchical depth buffer does not support the LOD field, it
755 * is assumed by hardware to be zero. A separate hierarachical
756 * depth buffer is required for each LOD used, and the
757 * corresponding buffer's state delivered to hardware each time a
758 * new depth buffer state with modified LOD is delivered."
759 *
760 * But we have a stronger requirement. Because of layer offsetting
761 * (check out the callers of ilo_layout_get_slice_tile_offset()), we
762 * already have to require the texture to be non-mipmapped and
763 * non-array.
764 */
765 if (templ->last_level == 0 && templ->array_size == 1 &&
766 templ->depth0 == 1)
767 want_hiz = true;
768 }
769
770 return want_hiz;
771}
772
773static void
774layout_init_aux(struct ilo_layout *layout,
775 struct ilo_layout_params *params)
776{
777 if (layout_want_hiz(layout, params))
778 layout->aux_type = ILO_LAYOUT_AUX_HIZ;
779 else if (layout_want_mcs(layout, params))
780 layout->aux_type = ILO_LAYOUT_AUX_MCS;
781}
782
783static void
784layout_align(struct ilo_layout *layout, struct ilo_layout_params *params)
785{
786 const struct pipe_resource *templ = params->templ;
787 int align_w = 1, align_h = 1, pad_h = 0;
788
789 /*
790 * From the Sandy Bridge PRM, volume 1 part 1, page 118:
791 *
792 * "To determine the necessary padding on the bottom and right side of
793 * the surface, refer to the table in Section 7.18.3.4 for the i and j
794 * parameters for the surface format in use. The surface must then be
795 * extended to the next multiple of the alignment unit size in each
796 * dimension, and all texels contained in this extended surface must
797 * have valid GTT entries."
798 *
799 * "For cube surfaces, an additional two rows of padding are required
800 * at the bottom of the surface. This must be ensured regardless of
801 * whether the surface is stored tiled or linear. This is due to the
802 * potential rotation of cache line orientation from memory to cache."
803 *
804 * "For compressed textures (BC* and FXT1 surface formats), padding at
805 * the bottom of the surface is to an even compressed row, which is
806 * equal to a multiple of 8 uncompressed texel rows. Thus, for padding
807 * purposes, these surfaces behave as if j = 8 only for surface
808 * padding purposes. The value of 4 for j still applies for mip level
809 * alignment and QPitch calculation."
810 */
811 if (templ->bind & PIPE_BIND_SAMPLER_VIEW) {
812 align_w = MAX2(align_w, layout->align_i);
813 align_h = MAX2(align_h, layout->align_j);
814
815 if (templ->target == PIPE_TEXTURE_CUBE)
816 pad_h += 2;
817
818 if (params->compressed)
819 align_h = MAX2(align_h, layout->align_j * 2);
820 }
821
822 /*
823 * From the Sandy Bridge PRM, volume 1 part 1, page 118:
824 *
825 * "If the surface contains an odd number of rows of data, a final row
826 * below the surface must be allocated."
827 */
828 if (templ->bind & PIPE_BIND_RENDER_TARGET)
829 align_h = MAX2(align_h, 2);
830
831 /*
832 * Depth Buffer Clear/Resolve works in 8x4 sample blocks. In
833 * ilo_texture_can_enable_hiz(), we always return true for the first slice.
834 * To avoid out-of-bound access, we have to pad.
835 */
836 if (layout->aux_type == ILO_LAYOUT_AUX_HIZ) {
837 align_w = MAX2(align_w, 8);
838 align_h = MAX2(align_h, 4);
839 }
840
841 params->max_x = align(params->max_x, align_w);
842 params->max_y = align(params->max_y + pad_h, align_h);
843}
844
845/* note that this may force the texture to be linear */
846static void
847layout_calculate_bo_size(struct ilo_layout *layout,
848 struct ilo_layout_params *params)
849{
850 assert(params->max_x % layout->block_width == 0);
851 assert(params->max_y % layout->block_height == 0);
852 assert(layout->layer_height % layout->block_height == 0);
853
854 layout->bo_stride =
855 (params->max_x / layout->block_width) * layout->block_size;
856 layout->bo_height = params->max_y / layout->block_height;
857
858 while (true) {
859 unsigned w = layout->bo_stride, h = layout->bo_height;
860 unsigned align_w, align_h;
861
862 /*
863 * From the Haswell PRM, volume 5, page 163:
864 *
865 * "For linear surfaces, additional padding of 64 bytes is required
866 * at the bottom of the surface. This is in addition to the padding
867 * required above."
868 */
869 if (params->dev->gen >= ILO_GEN(7.5) &&
870 (params->templ->bind & PIPE_BIND_SAMPLER_VIEW) &&
871 layout->tiling == INTEL_TILING_NONE) {
872 layout->bo_height +=
873 (64 + layout->bo_stride - 1) / layout->bo_stride;
874 }
875
876 /*
877 * From the Sandy Bridge PRM, volume 4 part 1, page 81:
878 *
879 * "- For linear render target surfaces, the pitch must be a
880 * multiple of the element size for non-YUV surface formats.
881 * Pitch must be a multiple of 2 * element size for YUV surface
882 * formats.
883 * - For other linear surfaces, the pitch can be any multiple of
884 * bytes.
885 * - For tiled surfaces, the pitch must be a multiple of the tile
886 * width."
887 *
888 * Different requirements may exist when the bo is used in different
889 * places, but our alignments here should be good enough that we do not
890 * need to check layout->templ->bind.
891 */
892 switch (layout->tiling) {
893 case INTEL_TILING_X:
894 align_w = 512;
895 align_h = 8;
896 break;
897 case INTEL_TILING_Y:
898 align_w = 128;
899 align_h = 32;
900 break;
901 default:
902 if (layout->format == PIPE_FORMAT_S8_UINT) {
903 /*
904 * From the Sandy Bridge PRM, volume 1 part 2, page 22:
905 *
906 * "A 4KB tile is subdivided into 8-high by 8-wide array of
907 * Blocks for W-Major Tiles (W Tiles). Each Block is 8 rows by 8
908 * bytes."
909 *
910 * Since we asked for INTEL_TILING_NONE instead of the non-existent
911 * INTEL_TILING_W, we want to align to W tiles here.
912 */
913 align_w = 64;
914 align_h = 64;
915 } else {
916 /* some good enough values */
917 align_w = 64;
918 align_h = 2;
919 }
920 break;
921 }
922
923 w = align(w, align_w);
924 h = align(h, align_h);
925
926 /* make sure the bo is mappable */
927 if (layout->tiling != INTEL_TILING_NONE) {
928 /*
929 * Usually only the first 256MB of the GTT is mappable.
930 *
931 * See also how intel_context::max_gtt_map_object_size is calculated.
932 */
933 const size_t mappable_gtt_size = 256 * 1024 * 1024;
934
935 /*
936 * Be conservative. We may be able to switch from VALIGN_4 to
937 * VALIGN_2 if the layout was Y-tiled, but let's keep it simple.
938 */
939 if (mappable_gtt_size / w / 4 < h) {
940 if (layout->valid_tilings & LAYOUT_TILING_NONE) {
941 layout->tiling = INTEL_TILING_NONE;
942 /* MCS support for non-MSRTs is limited to tiled RTs */
943 if (layout->aux_type == ILO_LAYOUT_AUX_MCS &&
944 params->templ->nr_samples <= 1)
945 layout->aux_type = ILO_LAYOUT_AUX_NONE;
946
947 continue;
948 } else {
949 ilo_warn("cannot force texture to be linear\n");
950 }
951 }
952 }
953
954 layout->bo_stride = w;
955 layout->bo_height = h;
956 break;
957 }
958}
959
960static void
961layout_calculate_hiz_size(struct ilo_layout *layout,
962 struct ilo_layout_params *params)
963{
964 const struct pipe_resource *templ = params->templ;
965 const int hz_align_j = 8;
966 int hz_width, hz_height;
967
968 assert(layout->aux_type == ILO_LAYOUT_AUX_HIZ);
969
970 /*
971 * See the Sandy Bridge PRM, volume 2 part 1, page 312, and the Ivy Bridge
972 * PRM, volume 2 part 1, page 312-313.
973 *
974 * It seems HiZ buffer is aligned to 8x8, with every two rows packed into a
975 * memory row.
976 */
977
978 hz_width = align(layout->levels[0].slice_width, 16);
979
980 if (templ->target == PIPE_TEXTURE_3D) {
981 unsigned lv;
982
983 hz_height = 0;
984
985 for (lv = 0; lv <= templ->last_level; lv++) {
986 const unsigned h =
987 align(layout->levels[lv].slice_height, hz_align_j);
988 hz_height += h * u_minify(templ->depth0, lv);
989 }
990
991 hz_height /= 2;
992 } else {
993 const unsigned h0 = align(params->h0, hz_align_j);
994 unsigned hz_qpitch = h0;
995
996 if (layout->full_layers) {
997 const unsigned h1 = align(params->h1, hz_align_j);
998 const unsigned htail =
999 ((params->dev->gen >= ILO_GEN(7)) ? 12 : 11) * hz_align_j;
1000
1001 hz_qpitch += h1 + htail;
1002 }
1003
1004 hz_height = hz_qpitch * templ->array_size / 2;
1005
1006 if (params->dev->gen >= ILO_GEN(7))
1007 hz_height = align(hz_height, 8);
1008 }
1009
1010 /* align to Y-tile */
1011 layout->aux_stride = align(hz_width, 128);
1012 layout->aux_height = align(hz_height, 32);
1013}
1014
1015static void
1016layout_calculate_mcs_size(struct ilo_layout *layout,
1017 struct ilo_layout_params *params)
1018{
1019 const struct pipe_resource *templ = params->templ;
1020 int mcs_width, mcs_height, mcs_cpp;
1021 int downscale_x, downscale_y;
1022
1023 assert(layout->aux_type == ILO_LAYOUT_AUX_MCS);
1024
1025 if (templ->nr_samples > 1) {
1026 /*
1027 * From the Ivy Bridge PRM, volume 2 part 1, page 326, the clear
1028 * rectangle is scaled down by 8x2 for 4X MSAA and 2x2 for 8X MSAA. The
1029 * need of scale down could be that the clear rectangle is used to clear
1030 * the MCS instead of the RT.
1031 *
1032 * For 8X MSAA, we need 32 bits in MCS for every pixel in the RT. The
1033 * 2x2 factor could come from that the hardware writes 128 bits (an
1034 * OWord) at a time, and the OWord in MCS maps to a 2x2 pixel block in
1035 * the RT. For 4X MSAA, we need 8 bits in MCS for every pixel in the
1036 * RT. Similarly, we could reason that an OWord in 4X MCS maps to a 8x2
1037 * pixel block in the RT.
1038 */
1039 switch (templ->nr_samples) {
1040 case 2:
1041 case 4:
1042 downscale_x = 8;
1043 downscale_y = 2;
1044 mcs_cpp = 1;
1045 break;
1046 case 8:
1047 downscale_x = 2;
1048 downscale_y = 2;
1049 mcs_cpp = 4;
1050 break;
1051 case 16:
1052 downscale_x = 2;
1053 downscale_y = 1;
1054 mcs_cpp = 8;
1055 break;
1056 default:
1057 assert(!"unsupported sample count");
1058 return;
1059 break;
1060 }
1061
1062 /*
1063 * It also appears that the 2x2 subspans generated by the scaled-down
1064 * clear rectangle cannot be masked. The scale-down clear rectangle
1065 * thus must be aligned to 2x2, and we need to pad.
1066 */
1067 mcs_width = align(templ->width0, downscale_x * 2);
1068 mcs_height = align(templ->height0, downscale_y * 2);
1069 }
1070 else {
1071 /*
1072 * From the Ivy Bridge PRM, volume 2 part 1, page 327:
1073 *
1074 * " Pixels Lines
1075 * TiledY RT CL
1076 * bpp
1077 * 32 8 4
1078 * 64 4 4
1079 * 128 2 4
1080 *
1081 * TiledX RT CL
1082 * bpp
1083 * 32 16 2
1084 * 64 8 2
1085 * 128 4 2"
1086 *
1087 * This table and the two following tables define the RT alignments, the
1088 * clear rectangle alignments, and the clear rectangle scale factors.
1089 * Viewing the RT alignments as the sizes of 128-byte blocks, we can see
1090 * that the clear rectangle alignments are 16x32 blocks, and the clear
1091 * rectangle scale factors are 8x16 blocks.
1092 *
1093 * For non-MSAA RT, we need 1 bit in MCS for every 128-byte block in the
1094 * RT. Similar to the MSAA cases, we can argue that an OWord maps to
1095 * 8x16 blocks.
1096 *
1097 * One problem with this reasoning is that a Y-tile in MCS has 8x32
1098 * OWords and maps to 64x512 128-byte blocks. This differs from i965,
1099 * which says that a Y-tile maps to 128x256 blocks (\see
1100 * intel_get_non_msrt_mcs_alignment). It does not really change
1101 * anything except for the size of the allocated MCS. Let's see if we
1102 * hit out-of-bound access.
1103 */
1104 switch (layout->tiling) {
1105 case INTEL_TILING_X:
1106 downscale_x = 64 / layout->block_size;
1107 downscale_y = 2;
1108 break;
1109 case INTEL_TILING_Y:
1110 downscale_x = 32 / layout->block_size;
1111 downscale_y = 4;
1112 break;
1113 default:
1114 assert(!"unsupported tiling mode");
1115 return;
1116 break;
1117 }
1118
1119 downscale_x *= 8;
1120 downscale_y *= 16;
1121
1122 /*
1123 * From the Haswell PRM, volume 7, page 652:
1124 *
1125 * "Clear rectangle must be aligned to two times the number of
1126 * pixels in the table shown below due to 16X16 hashing across the
1127 * slice."
1128 *
1129 * The scaled-down clear rectangle must be aligned to 4x4 instead of
1130 * 2x2, and we need to pad.
1131 */
1132 mcs_width = align(templ->width0, downscale_x * 4) / downscale_x;
1133 mcs_height = align(templ->height0, downscale_y * 4) / downscale_y;
1134 mcs_cpp = 16; /* an OWord */
1135 }
1136
1137 /* align to Y-tile */
1138 layout->aux_stride = align(mcs_width * mcs_cpp, 128);
1139 layout->aux_height = align(mcs_height, 32);
1140}
1141
1142/**
1143 * The texutre is for transfer only. We can define our own layout to save
1144 * space.
1145 */
1146static void
1147layout_init_for_transfer(struct ilo_layout *layout,
1148 const struct ilo_dev_info *dev,
1149 const struct pipe_resource *templ)
1150{
1151 const unsigned num_layers = (templ->target == PIPE_TEXTURE_3D) ?
1152 templ->depth0 : templ->array_size;
1153 unsigned layer_width, layer_height;
1154
1155 assert(templ->last_level == 0);
1156 assert(templ->nr_samples <= 1);
1157
1158 layout->block_width = util_format_get_blockwidth(templ->format);
1159 layout->block_height = util_format_get_blockheight(templ->format);
1160 layout->block_size = util_format_get_blocksize(templ->format);
1161
1162 layout->valid_tilings = LAYOUT_TILING_NONE;
1163 layout->tiling = INTEL_TILING_NONE;
1164
1165 layout->align_i = layout->block_width;
1166 layout->align_j = layout->block_height;
1167
1168 assert(util_is_power_of_two(layout->block_width) &&
1169 util_is_power_of_two(layout->block_height));
1170
1171 /* use packed layout */
1172 layer_width = align(templ->width0, layout->align_i);
1173 layer_height = align(templ->height0, layout->align_j);
1174
1175 layout->levels[0].slice_width = layer_width;
1176 layout->levels[0].slice_height = layer_height;
1177 layout->layer_height = layer_height;
1178
1179 layout->bo_stride = (layer_width / layout->block_width) * layout->block_size;
1180 layout->bo_stride = align(layout->bo_stride, 64);
1181
1182 layout->bo_height = (layer_height / layout->block_height) * num_layers;
1183}
1184
1185/**
1186 * Initialize the layout. Callers should zero-initialize \p layout first.
1187 */
1188void ilo_layout_init(struct ilo_layout *layout,
1189 const struct ilo_dev_info *dev,
1190 const struct pipe_resource *templ)
1191{
1192 struct ilo_layout_params params;
1193 bool transfer_only;
1194
1195 /* use transfer layout when the texture is never bound to GPU */
1196 transfer_only = !(templ->bind & ~(PIPE_BIND_TRANSFER_WRITE |
1197 PIPE_BIND_TRANSFER_READ));
1198 if (transfer_only && templ->last_level == 0 && templ->nr_samples <= 1) {
1199 layout_init_for_transfer(layout, dev, templ);
1200 return;
1201 }
1202
1203 memset(&params, 0, sizeof(params));
1204 params.dev = dev;
1205 params.templ = templ;
1206
1207 /* note that there are dependencies between these functions */
1208 layout_init_aux(layout, &params);
1209 layout_init_format(layout, &params);
1210 layout_init_arrangements(layout, &params);
1211 layout_init_tiling(layout, &params);
1212 layout_init_alignments(layout, &params);
1213 layout_init_levels(layout, &params);
1214 layout_init_layer_height(layout, &params);
1215
1216 layout_align(layout, &params);
1217 layout_calculate_bo_size(layout, &params);
1218
1219 switch (layout->aux_type) {
1220 case ILO_LAYOUT_AUX_HIZ:
1221 layout_calculate_hiz_size(layout, &params);
1222 break;
1223 case ILO_LAYOUT_AUX_MCS:
1224 layout_calculate_mcs_size(layout, &params);
1225 break;
1226 default:
1227 break;
1228 }
1229}
1230
1231/**
1232 * Update the tiling mode and bo stride (for imported resources).
1233 */
1234bool
1235ilo_layout_update_for_imported_bo(struct ilo_layout *layout,
1236 enum intel_tiling_mode tiling,
1237 unsigned bo_stride)
1238{
1239 if (!(layout->valid_tilings & (1 << tiling)))
1240 return false;
1241
1242 if ((tiling == INTEL_TILING_X && bo_stride % 512) ||
1243 (tiling == INTEL_TILING_Y && bo_stride % 128))
1244 return false;
1245
1246 layout->tiling = tiling;
1247 layout->bo_stride = bo_stride;
1248
1249 return true;
1250}
1251
1252/**
1253 * Return the offset (in bytes) to a slice within the bo.
1254 *
1255 * The returned offset is aligned to tile size. Since slices are not
1256 * guaranteed to start at tile boundaries, the X and Y offsets (in pixels)
1257 * from the tile origin to the slice are also returned. X offset is always a
1258 * multiple of 4 and Y offset is always a multiple of 2.
1259 */
1260unsigned
1261ilo_layout_get_slice_tile_offset(const struct ilo_layout *layout,
1262 unsigned level, unsigned slice,
1263 unsigned *x_offset, unsigned *y_offset)
1264{
1265 unsigned tile_w, tile_h, tile_size, row_size;
1266 unsigned tile_offset, x, y;
1267
1268 /* see the Sandy Bridge PRM, volume 1 part 2, page 24 */
1269
1270 switch (layout->tiling) {
1271 case INTEL_TILING_NONE:
1272 /* W-tiled */
1273 if (layout->format == PIPE_FORMAT_S8_UINT) {
1274 tile_w = 64;
1275 tile_h = 64;
1276 }
1277 else {
1278 tile_w = 1;
1279 tile_h = 1;
1280 }
1281 break;
1282 case INTEL_TILING_X:
1283 tile_w = 512;
1284 tile_h = 8;
1285 break;
1286 case INTEL_TILING_Y:
1287 tile_w = 128;
1288 tile_h = 32;
1289 break;
1290 default:
1291 assert(!"unknown tiling");
1292 tile_w = 1;
1293 tile_h = 1;
1294 break;
1295 }
1296
1297 tile_size = tile_w * tile_h;
1298 row_size = layout->bo_stride * tile_h;
1299
1300 ilo_layout_get_slice_pos(layout, level, slice, &x, &y);
1301 /* in bytes */
1302 ilo_layout_pos_to_mem(layout, x, y, &x, &y);
1303 tile_offset = row_size * (y / tile_h) + tile_size * (x / tile_w);
1304
1305 /*
1306 * Since tex->bo_stride is a multiple of tile_w, slice_offset should be
1307 * aligned at this point.
1308 */
1309 assert(tile_offset % tile_size == 0);
1310
1311 /*
1312 * because of the possible values of align_i and align_j in
1313 * tex_layout_init_alignments(), x_offset is guaranteed to be a multiple of
1314 * 4 and y_offset is guaranteed to be a multiple of 2.
1315 */
1316 if (x_offset) {
1317 /* in pixels */
1318 x = (x % tile_w) / layout->block_size * layout->block_width;
1319 assert(x % 4 == 0);
1320
1321 *x_offset = x;
1322 }
1323
1324 if (y_offset) {
1325 /* in pixels */
1326 y = (y % tile_h) * layout->block_height;
1327 assert(y % 2 == 0);
1328
1329 *y_offset = y;
1330 }
1331
1332 return tile_offset;
1333}