blob: ff0d439ba24f2e96f202441d18c85796374cd170 [file] [log] [blame]
Chia-I Wu4ea339e2014-08-08 21:56:26 +08001/*
2 * XGL
3 *
4 * Copyright (C) 2014 LunarG, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25#include "ilo_screen.h"
26#include "ilo_resource.h"
27
28/* use PIPE_BIND_CUSTOM to indicate MCS */
29#define ILO_BIND_MCS PIPE_BIND_CUSTOM
30
31struct tex_layout {
32 const struct ilo_dev_info *dev;
33 const struct pipe_resource *templ;
34
35 bool has_depth, has_stencil;
36 bool hiz, separate_stencil;
37
38 enum pipe_format format;
39 unsigned block_width, block_height, block_size;
40 bool compressed;
41
42 enum intel_tiling_mode tiling;
43 unsigned valid_tilings; /* bitmask of valid tiling modes */
44
45 bool array_spacing_full;
46 bool interleaved;
47
48 struct {
49 int w, h, d;
50 struct ilo_texture_slice *slices;
51 } levels[PIPE_MAX_TEXTURE_LEVELS];
52
53 int align_i, align_j;
54 int qpitch;
55
56 int width, height;
57
58 int bo_stride, bo_height;
59 int hiz_stride, hiz_height;
60};
61
62/*
63 * From the Ivy Bridge PRM, volume 1 part 1, page 105:
64 *
65 * "In addition to restrictions on maximum height, width, and depth,
66 * surfaces are also restricted to a maximum size in bytes. This
67 * maximum is 2 GB for all products and all surface types."
68 */
69static const size_t max_resource_size = 1u << 31;
70
71static const char *
72resource_get_bo_name(const struct pipe_resource *templ)
73{
74 static const char *target_names[PIPE_MAX_TEXTURE_TYPES] = {
75 [PIPE_BUFFER] = "buf",
76 [PIPE_TEXTURE_1D] = "tex-1d",
77 [PIPE_TEXTURE_2D] = "tex-2d",
78 [PIPE_TEXTURE_3D] = "tex-3d",
79 [PIPE_TEXTURE_CUBE] = "tex-cube",
80 [PIPE_TEXTURE_RECT] = "tex-rect",
81 [PIPE_TEXTURE_1D_ARRAY] = "tex-1d-array",
82 [PIPE_TEXTURE_2D_ARRAY] = "tex-2d-array",
83 [PIPE_TEXTURE_CUBE_ARRAY] = "tex-cube-array",
84 };
85 const char *name = target_names[templ->target];
86
87 if (templ->target == PIPE_BUFFER) {
88 switch (templ->bind) {
89 case PIPE_BIND_VERTEX_BUFFER:
90 name = "buf-vb";
91 break;
92 case PIPE_BIND_INDEX_BUFFER:
93 name = "buf-ib";
94 break;
95 case PIPE_BIND_CONSTANT_BUFFER:
96 name = "buf-cb";
97 break;
98 case PIPE_BIND_STREAM_OUTPUT:
99 name = "buf-so";
100 break;
101 default:
102 break;
103 }
104 }
105
106 return name;
107}
108
109static enum intel_domain_flag
110resource_get_bo_initial_domain(const struct pipe_resource *templ)
111{
112 return (templ->bind & (PIPE_BIND_DEPTH_STENCIL |
113 PIPE_BIND_RENDER_TARGET |
114 PIPE_BIND_STREAM_OUTPUT)) ?
115 INTEL_DOMAIN_RENDER : 0;
116}
117
118static void
119tex_layout_init_qpitch(struct tex_layout *layout)
120{
121 const struct pipe_resource *templ = layout->templ;
122 int h0, h1;
123
124 if (templ->array_size <= 1)
125 return;
126
127 h0 = align(layout->levels[0].h, layout->align_j);
128
129 if (!layout->array_spacing_full) {
130 layout->qpitch = h0;
131 return;
132 }
133
134 h1 = align(layout->levels[1].h, layout->align_j);
135
136 /*
137 * From the Sandy Bridge PRM, volume 1 part 1, page 115:
138 *
139 * "The following equation is used for surface formats other than
140 * compressed textures:
141 *
142 * QPitch = (h0 + h1 + 11j)"
143 *
144 * "The equation for compressed textures (BC* and FXT1 surface formats)
145 * follows:
146 *
147 * QPitch = (h0 + h1 + 11j) / 4"
148 *
149 * "[DevSNB] Errata: Sampler MSAA Qpitch will be 4 greater than the
150 * value calculated in the equation above, for every other odd Surface
151 * Height starting from 1 i.e. 1,5,9,13"
152 *
153 * From the Ivy Bridge PRM, volume 1 part 1, page 111-112:
154 *
155 * "If Surface Array Spacing is set to ARYSPC_FULL (note that the depth
156 * buffer and stencil buffer have an implied value of ARYSPC_FULL):
157 *
158 * QPitch = (h0 + h1 + 12j)
159 * QPitch = (h0 + h1 + 12j) / 4 (compressed)
160 *
161 * (There are many typos or missing words here...)"
162 *
163 * To access the N-th slice, an offset of (Stride * QPitch * N) is added to
164 * the base address. The PRM divides QPitch by 4 for compressed formats
165 * because the block height for those formats are 4, and it wants QPitch to
166 * mean the number of memory rows, as opposed to texel rows, between
167 * slices. Since we use texel rows in tex->slice_offsets, we do not need
168 * to divide QPitch by 4.
169 */
170 layout->qpitch = h0 + h1 +
171 ((layout->dev->gen >= ILO_GEN(7)) ? 12 : 11) * layout->align_j;
172
173 if (layout->dev->gen == ILO_GEN(6) && templ->nr_samples > 1 &&
174 templ->height0 % 4 == 1)
175 layout->qpitch += 4;
176}
177
178static void
179tex_layout_init_alignments(struct tex_layout *layout)
180{
181 const struct pipe_resource *templ = layout->templ;
182
183 /*
184 * From the Sandy Bridge PRM, volume 1 part 1, page 113:
185 *
186 * "surface format align_i align_j
187 * YUV 4:2:2 formats 4 *see below
188 * BC1-5 4 4
189 * FXT1 8 4
190 * all other formats 4 *see below"
191 *
192 * "- align_j = 4 for any depth buffer
193 * - align_j = 2 for separate stencil buffer
194 * - align_j = 4 for any render target surface is multisampled (4x)
195 * - align_j = 4 for any render target surface with Surface Vertical
196 * Alignment = VALIGN_4
197 * - align_j = 2 for any render target surface with Surface Vertical
198 * Alignment = VALIGN_2
199 * - align_j = 2 for all other render target surface
200 * - align_j = 2 for any sampling engine surface with Surface Vertical
201 * Alignment = VALIGN_2
202 * - align_j = 4 for any sampling engine surface with Surface Vertical
203 * Alignment = VALIGN_4"
204 *
205 * From the Sandy Bridge PRM, volume 4 part 1, page 86:
206 *
207 * "This field (Surface Vertical Alignment) must be set to VALIGN_2 if
208 * the Surface Format is 96 bits per element (BPE)."
209 *
210 * They can be rephrased as
211 *
212 * align_i align_j
213 * compressed formats block width block height
214 * PIPE_FORMAT_S8_UINT 4 2
215 * other depth/stencil formats 4 4
216 * 4x multisampled 4 4
217 * bpp 96 4 2
218 * others 4 2 or 4
219 */
220
221 /*
222 * From the Ivy Bridge PRM, volume 1 part 1, page 110:
223 *
224 * "surface defined by surface format align_i align_j
225 * 3DSTATE_DEPTH_BUFFER D16_UNORM 8 4
226 * not D16_UNORM 4 4
227 * 3DSTATE_STENCIL_BUFFER N/A 8 8
228 * SURFACE_STATE BC*, ETC*, EAC* 4 4
229 * FXT1 8 4
230 * all others (set by SURFACE_STATE)"
231 *
232 * From the Ivy Bridge PRM, volume 4 part 1, page 63:
233 *
234 * "- This field (Surface Vertical Aligment) is intended to be set to
235 * VALIGN_4 if the surface was rendered as a depth buffer, for a
236 * multisampled (4x) render target, or for a multisampled (8x)
237 * render target, since these surfaces support only alignment of 4.
238 * - Use of VALIGN_4 for other surfaces is supported, but uses more
239 * memory.
240 * - This field must be set to VALIGN_4 for all tiled Y Render Target
241 * surfaces.
242 * - Value of 1 is not supported for format YCRCB_NORMAL (0x182),
243 * YCRCB_SWAPUVY (0x183), YCRCB_SWAPUV (0x18f), YCRCB_SWAPY (0x190)
244 * - If Number of Multisamples is not MULTISAMPLECOUNT_1, this field
245 * must be set to VALIGN_4."
246 * - VALIGN_4 is not supported for surface format R32G32B32_FLOAT."
247 *
248 * "- This field (Surface Horizontal Aligment) is intended to be set to
249 * HALIGN_8 only if the surface was rendered as a depth buffer with
250 * Z16 format or a stencil buffer, since these surfaces support only
251 * alignment of 8.
252 * - Use of HALIGN_8 for other surfaces is supported, but uses more
253 * memory.
254 * - This field must be set to HALIGN_4 if the Surface Format is BC*.
255 * - This field must be set to HALIGN_8 if the Surface Format is
256 * FXT1."
257 *
258 * They can be rephrased as
259 *
260 * align_i align_j
261 * compressed formats block width block height
262 * PIPE_FORMAT_Z16_UNORM 8 4
263 * PIPE_FORMAT_S8_UINT 8 8
264 * other depth/stencil formats 4 or 8 4
265 * 2x or 4x multisampled 4 or 8 4
266 * tiled Y 4 or 8 4 (if rt)
267 * PIPE_FORMAT_R32G32B32_FLOAT 4 or 8 2
268 * others 4 or 8 2 or 4
269 */
270
271 if (layout->compressed) {
272 /* this happens to be the case */
273 layout->align_i = layout->block_width;
274 layout->align_j = layout->block_height;
275 }
276 else if (layout->has_depth || layout->has_stencil) {
277 if (layout->dev->gen >= ILO_GEN(7)) {
278 switch (layout->format) {
279 case PIPE_FORMAT_Z16_UNORM:
280 layout->align_i = 8;
281 layout->align_j = 4;
282 break;
283 case PIPE_FORMAT_S8_UINT:
284 layout->align_i = 8;
285 layout->align_j = 8;
286 break;
287 default:
288 layout->align_i = 4;
289 layout->align_j = 4;
290 break;
291 }
292 }
293 else {
294 switch (layout->format) {
295 case PIPE_FORMAT_S8_UINT:
296 layout->align_i = 4;
297 layout->align_j = 2;
298 break;
299 default:
300 layout->align_i = 4;
301 layout->align_j = 4;
302 break;
303 }
304 }
305 }
306 else {
307 const bool valign_4 = (templ->nr_samples > 1) ||
308 (layout->dev->gen >= ILO_GEN(7) &&
309 layout->tiling == INTEL_TILING_Y &&
310 (templ->bind & PIPE_BIND_RENDER_TARGET));
311
312 if (valign_4)
313 assert(layout->block_size != 12);
314
315 layout->align_i = 4;
316 layout->align_j = (valign_4) ? 4 : 2;
317 }
318
319 /*
320 * the fact that align i and j are multiples of block width and height
321 * respectively is what makes the size of the bo a multiple of the block
322 * size, slices start at block boundaries, and many of the computations
323 * work.
324 */
325 assert(layout->align_i % layout->block_width == 0);
326 assert(layout->align_j % layout->block_height == 0);
327
328 /* make sure align() works */
329 assert(util_is_power_of_two(layout->align_i) &&
330 util_is_power_of_two(layout->align_j));
331 assert(util_is_power_of_two(layout->block_width) &&
332 util_is_power_of_two(layout->block_height));
333}
334
335static void
336tex_layout_init_levels(struct tex_layout *layout)
337{
338 const struct pipe_resource *templ = layout->templ;
339 int last_level, lv;
340
341 last_level = templ->last_level;
342
343 /* need at least 2 levels to compute full qpitch */
344 if (last_level == 0 && templ->array_size > 1 && layout->array_spacing_full)
345 last_level++;
346
347 /* compute mip level sizes */
348 for (lv = 0; lv <= last_level; lv++) {
349 int w, h, d;
350
351 w = u_minify(templ->width0, lv);
352 h = u_minify(templ->height0, lv);
353 d = u_minify(templ->depth0, lv);
354
355 /*
356 * From the Sandy Bridge PRM, volume 1 part 1, page 114:
357 *
358 * "The dimensions of the mip maps are first determined by applying
359 * the sizing algorithm presented in Non-Power-of-Two Mipmaps
360 * above. Then, if necessary, they are padded out to compression
361 * block boundaries."
362 */
363 w = align(w, layout->block_width);
364 h = align(h, layout->block_height);
365
366 /*
367 * From the Sandy Bridge PRM, volume 1 part 1, page 111:
368 *
369 * "If the surface is multisampled (4x), these values must be
370 * adjusted as follows before proceeding:
371 *
372 * W_L = ceiling(W_L / 2) * 4
373 * H_L = ceiling(H_L / 2) * 4"
374 *
375 * From the Ivy Bridge PRM, volume 1 part 1, page 108:
376 *
377 * "If the surface is multisampled and it is a depth or stencil
378 * surface or Multisampled Surface StorageFormat in SURFACE_STATE
379 * is MSFMT_DEPTH_STENCIL, W_L and H_L must be adjusted as follows
380 * before proceeding:
381 *
382 * #samples W_L = H_L =
383 * 2 ceiling(W_L / 2) * 4 HL [no adjustment]
384 * 4 ceiling(W_L / 2) * 4 ceiling(H_L / 2) * 4
385 * 8 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 4
386 * 16 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 8"
387 *
388 * For interleaved samples (4x), where pixels
389 *
390 * (x, y ) (x+1, y )
391 * (x, y+1) (x+1, y+1)
392 *
393 * would be is occupied by
394 *
395 * (x, y , si0) (x+1, y , si0) (x, y , si1) (x+1, y , si1)
396 * (x, y+1, si0) (x+1, y+1, si0) (x, y+1, si1) (x+1, y+1, si1)
397 * (x, y , si2) (x+1, y , si2) (x, y , si3) (x+1, y , si3)
398 * (x, y+1, si2) (x+1, y+1, si2) (x, y+1, si3) (x+1, y+1, si3)
399 *
400 * Thus the need to
401 *
402 * w = align(w, 2) * 2;
403 * y = align(y, 2) * 2;
404 */
405 if (layout->interleaved) {
406 switch (templ->nr_samples) {
407 case 0:
408 case 1:
409 break;
410 case 2:
411 w = align(w, 2) * 2;
412 break;
413 case 4:
414 w = align(w, 2) * 2;
415 h = align(h, 2) * 2;
416 break;
417 case 8:
418 w = align(w, 2) * 4;
419 h = align(h, 2) * 2;
420 break;
421 case 16:
422 w = align(w, 2) * 4;
423 h = align(h, 2) * 4;
424 break;
425 default:
426 assert(!"unsupported sample count");
427 break;
428 }
429 }
430
431 layout->levels[lv].w = w;
432 layout->levels[lv].h = h;
433 layout->levels[lv].d = d;
434 }
435}
436
437static void
438tex_layout_init_spacing(struct tex_layout *layout)
439{
440 const struct pipe_resource *templ = layout->templ;
441
442 if (layout->dev->gen >= ILO_GEN(7)) {
443 /*
444 * It is not explicitly states, but render targets are expected to be
445 * UMS/CMS (samples non-interleaved) and depth/stencil buffers are
446 * expected to be IMS (samples interleaved).
447 *
448 * See "Multisampled Surface Storage Format" field of SURFACE_STATE.
449 */
450 if (layout->has_depth || layout->has_stencil) {
451 layout->interleaved = true;
452
453 /*
454 * From the Ivy Bridge PRM, volume 1 part 1, page 111:
455 *
456 * "note that the depth buffer and stencil buffer have an implied
457 * value of ARYSPC_FULL"
458 */
459 layout->array_spacing_full = true;
460 }
461 else {
462 layout->interleaved = false;
463
464 /*
465 * From the Ivy Bridge PRM, volume 4 part 1, page 66:
466 *
467 * "If Multisampled Surface Storage Format is MSFMT_MSS and
468 * Number of Multisamples is not MULTISAMPLECOUNT_1, this field
469 * (Surface Array Spacing) must be set to ARYSPC_LOD0."
470 *
471 * As multisampled resources are not mipmapped, we never use
472 * ARYSPC_FULL for them.
473 */
474 if (templ->nr_samples > 1)
475 assert(templ->last_level == 0);
476 layout->array_spacing_full = (templ->last_level > 0);
477 }
478 }
479 else {
480 /* GEN6 supports only interleaved samples */
481 layout->interleaved = true;
482
483 /*
484 * From the Sandy Bridge PRM, volume 1 part 1, page 115:
485 *
486 * "The separate stencil buffer does not support mip mapping, thus
487 * the storage for LODs other than LOD 0 is not needed. The
488 * following QPitch equation applies only to the separate stencil
489 * buffer:
490 *
491 * QPitch = h_0"
492 *
493 * GEN6 does not support compact spacing otherwise.
494 */
495 layout->array_spacing_full = (layout->format != PIPE_FORMAT_S8_UINT);
496 }
497}
498
499static void
500tex_layout_init_tiling(struct tex_layout *layout)
501{
502 const struct pipe_resource *templ = layout->templ;
503 const enum pipe_format format = layout->format;
504 const unsigned tile_none = 1 << INTEL_TILING_NONE;
505 const unsigned tile_x = 1 << INTEL_TILING_X;
506 const unsigned tile_y = 1 << INTEL_TILING_Y;
507 unsigned valid_tilings = tile_none | tile_x | tile_y;
508
509 /*
510 * From the Sandy Bridge PRM, volume 1 part 2, page 32:
511 *
512 * "Display/Overlay Y-Major not supported.
513 * X-Major required for Async Flips"
514 */
515 if (unlikely(templ->bind & PIPE_BIND_SCANOUT))
516 valid_tilings &= tile_x;
517
518 /*
519 * From the Sandy Bridge PRM, volume 3 part 2, page 158:
520 *
521 * "The cursor surface address must be 4K byte aligned. The cursor must
522 * be in linear memory, it cannot be tiled."
523 */
524 if (unlikely(templ->bind & (PIPE_BIND_CURSOR | PIPE_BIND_LINEAR)))
525 valid_tilings &= tile_none;
526
527 /*
528 * From the Ivy Bridge PRM, volume 4 part 1, page 76:
529 *
530 * "The MCS surface must be stored as Tile Y."
531 */
532 if (templ->bind & ILO_BIND_MCS)
533 valid_tilings &= tile_y;
534
535 /*
536 * From the Sandy Bridge PRM, volume 2 part 1, page 318:
537 *
538 * "[DevSNB+]: This field (Tiled Surface) must be set to TRUE. Linear
539 * Depth Buffer is not supported."
540 *
541 * "The Depth Buffer, if tiled, must use Y-Major tiling."
542 *
543 * From the Sandy Bridge PRM, volume 1 part 2, page 22:
544 *
545 * "W-Major Tile Format is used for separate stencil."
546 *
547 * Since the HW does not support W-tiled fencing, we have to do it in the
548 * driver.
549 */
550 if (templ->bind & PIPE_BIND_DEPTH_STENCIL) {
551 switch (format) {
552 case PIPE_FORMAT_S8_UINT:
553 valid_tilings &= tile_none;
554 break;
555 default:
556 valid_tilings &= tile_y;
557 break;
558 }
559 }
560
561 if (templ->bind & PIPE_BIND_RENDER_TARGET) {
562 /*
563 * From the Sandy Bridge PRM, volume 1 part 2, page 32:
564 *
565 * "NOTE: 128BPE Format Color buffer ( render target ) MUST be
566 * either TileX or Linear."
567 */
568 if (layout->block_size == 16)
569 valid_tilings &= ~tile_y;
570
571 /*
572 * From the Ivy Bridge PRM, volume 4 part 1, page 63:
573 *
574 * "This field (Surface Vertical Aligment) must be set to VALIGN_4
575 * for all tiled Y Render Target surfaces."
576 *
577 * "VALIGN_4 is not supported for surface format R32G32B32_FLOAT."
578 */
579 if (layout->dev->gen >= ILO_GEN(7) && layout->block_size == 12)
580 valid_tilings &= ~tile_y;
581 }
582
583 /* no conflicting binding flags */
584 assert(valid_tilings);
585
586 layout->valid_tilings = valid_tilings;
587
588 if (templ->bind & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW)) {
589 /*
590 * heuristically set a minimum width/height for enabling tiling
591 */
592 if (templ->width0 < 64 && (valid_tilings & ~tile_x))
593 valid_tilings &= ~tile_x;
594
595 if ((templ->width0 < 32 || templ->height0 < 16) &&
596 (templ->width0 < 16 || templ->height0 < 32) &&
597 (valid_tilings & ~tile_y))
598 valid_tilings &= ~tile_y;
599 }
600 else {
601 /* force linear if we are not sure where the texture is bound to */
602 if (valid_tilings & tile_none)
603 valid_tilings &= tile_none;
604 }
605
606 /* prefer tiled over linear */
607 if (valid_tilings & tile_y)
608 layout->tiling = INTEL_TILING_Y;
609 else if (valid_tilings & tile_x)
610 layout->tiling = INTEL_TILING_X;
611 else
612 layout->tiling = INTEL_TILING_NONE;
613}
614
615static void
616tex_layout_init_format(struct tex_layout *layout)
617{
618 const struct pipe_resource *templ = layout->templ;
619 enum pipe_format format;
620
621 switch (templ->format) {
622 case PIPE_FORMAT_ETC1_RGB8:
623 format = PIPE_FORMAT_R8G8B8X8_UNORM;
624 break;
625 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
626 if (layout->separate_stencil)
627 format = PIPE_FORMAT_Z24X8_UNORM;
628 else
629 format = templ->format;
630 break;
631 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
632 if (layout->separate_stencil)
633 format = PIPE_FORMAT_Z32_FLOAT;
634 else
635 format = templ->format;
636 break;
637 default:
638 format = templ->format;
639 break;
640 }
641
642 layout->format = format;
643
644 layout->block_width = util_format_get_blockwidth(format);
645 layout->block_height = util_format_get_blockheight(format);
646 layout->block_size = util_format_get_blocksize(format);
647 layout->compressed = util_format_is_compressed(format);
648}
649
650static void
651tex_layout_init_hiz(struct tex_layout *layout)
652{
653 const struct pipe_resource *templ = layout->templ;
654 const struct util_format_description *desc;
655
656 desc = util_format_description(templ->format);
657 layout->has_depth = util_format_has_depth(desc);
658 layout->has_stencil = util_format_has_stencil(desc);
659
660 if (!layout->has_depth)
661 return;
662
663 layout->hiz = true;
664
665 /* no point in having HiZ */
666 if (templ->usage == PIPE_USAGE_STAGING)
667 layout->hiz = false;
668
669 if (layout->dev->gen == ILO_GEN(6)) {
670 /*
671 * From the Sandy Bridge PRM, volume 2 part 1, page 312:
672 *
673 * "The hierarchical depth buffer does not support the LOD field, it
674 * is assumed by hardware to be zero. A separate hierarachical
675 * depth buffer is required for each LOD used, and the
676 * corresponding buffer's state delivered to hardware each time a
677 * new depth buffer state with modified LOD is delivered."
678 *
679 * But we have a stronger requirement. Because of layer offsetting
680 * (check out the callers of ilo_texture_get_slice_offset()), we already
681 * have to require the texture to be non-mipmapped and non-array.
682 */
683 if (templ->last_level > 0 || templ->array_size > 1 || templ->depth0 > 1)
684 layout->hiz = false;
685 }
686
687 if (ilo_debug & ILO_DEBUG_NOHIZ)
688 layout->hiz = false;
689
690 if (layout->has_stencil) {
691 /*
692 * From the Sandy Bridge PRM, volume 2 part 1, page 317:
693 *
694 * "This field (Separate Stencil Buffer Enable) must be set to the
695 * same value (enabled or disabled) as Hierarchical Depth Buffer
696 * Enable."
697 *
698 * GEN7+ requires separate stencil buffers.
699 */
700 if (layout->dev->gen >= ILO_GEN(7))
701 layout->separate_stencil = true;
702 else
703 layout->separate_stencil = layout->hiz;
704
705 if (layout->separate_stencil)
706 layout->has_stencil = false;
707 }
708}
709
710static bool
711tex_layout_init(struct tex_layout *layout,
712 struct pipe_screen *screen,
713 const struct pipe_resource *templ,
714 struct ilo_texture_slice **slices)
715{
716 struct ilo_screen *is = ilo_screen(screen);
717
718 memset(layout, 0, sizeof(*layout));
719
720 layout->dev = &is->dev;
721 layout->templ = templ;
722
723 /* note that there are dependencies between these functions */
724 tex_layout_init_hiz(layout);
725 tex_layout_init_format(layout);
726 tex_layout_init_tiling(layout);
727 tex_layout_init_spacing(layout);
728 tex_layout_init_levels(layout);
729 tex_layout_init_alignments(layout);
730 tex_layout_init_qpitch(layout);
731
732 if (templ->flags & PIPE_RESOURCE_FLAG_MAP_PERSISTENT) {
733 /* require on-the-fly tiling/untiling or format conversion */
734 if (layout->separate_stencil ||
735 layout->format == PIPE_FORMAT_S8_UINT ||
736 layout->format != templ->format)
737 return false;
738 }
739
740 if (slices) {
741 int lv;
742
743 for (lv = 0; lv <= templ->last_level; lv++)
744 layout->levels[lv].slices = slices[lv];
745 }
746
747 return true;
748}
749
750static void
751tex_layout_align(struct tex_layout *layout)
752{
753 int align_w = 1, align_h = 1, pad_h = 0;
754
755 /*
756 * From the Sandy Bridge PRM, volume 1 part 1, page 118:
757 *
758 * "To determine the necessary padding on the bottom and right side of
759 * the surface, refer to the table in Section 7.18.3.4 for the i and j
760 * parameters for the surface format in use. The surface must then be
761 * extended to the next multiple of the alignment unit size in each
762 * dimension, and all texels contained in this extended surface must
763 * have valid GTT entries."
764 *
765 * "For cube surfaces, an additional two rows of padding are required
766 * at the bottom of the surface. This must be ensured regardless of
767 * whether the surface is stored tiled or linear. This is due to the
768 * potential rotation of cache line orientation from memory to cache."
769 *
770 * "For compressed textures (BC* and FXT1 surface formats), padding at
771 * the bottom of the surface is to an even compressed row, which is
772 * equal to a multiple of 8 uncompressed texel rows. Thus, for padding
773 * purposes, these surfaces behave as if j = 8 only for surface
774 * padding purposes. The value of 4 for j still applies for mip level
775 * alignment and QPitch calculation."
776 */
777 if (layout->templ->bind & PIPE_BIND_SAMPLER_VIEW) {
778 align_w = MAX2(align_w, layout->align_i);
779 align_h = MAX2(align_h, layout->align_j);
780
781 if (layout->templ->target == PIPE_TEXTURE_CUBE)
782 pad_h += 2;
783
784 if (layout->compressed)
785 align_h = MAX2(align_h, layout->align_j * 2);
786 }
787
788 /*
789 * From the Sandy Bridge PRM, volume 1 part 1, page 118:
790 *
791 * "If the surface contains an odd number of rows of data, a final row
792 * below the surface must be allocated."
793 */
794 if (layout->templ->bind & PIPE_BIND_RENDER_TARGET)
795 align_h = MAX2(align_h, 2);
796
797 /*
798 * Depth Buffer Clear/Resolve works in 8x4 sample blocks. In
799 * ilo_texture_can_enable_hiz(), we always return true for the first slice.
800 * To avoid out-of-bound access, we have to pad.
801 */
802 if (layout->hiz) {
803 align_w = MAX2(align_w, 8);
804 align_h = MAX2(align_h, 4);
805 }
806
807 layout->width = align(layout->width, align_w);
808 layout->height = align(layout->height + pad_h, align_h);
809}
810
811/**
812 * Layout a 2D texture.
813 */
814static void
815tex_layout_2d(struct tex_layout *layout)
816{
817 const struct pipe_resource *templ = layout->templ;
818 unsigned int level_x, level_y, num_slices;
819 int lv;
820
821 level_x = 0;
822 level_y = 0;
823 for (lv = 0; lv <= templ->last_level; lv++) {
824 const unsigned int level_w = layout->levels[lv].w;
825 const unsigned int level_h = layout->levels[lv].h;
826 int slice;
827
828 /* set slice offsets */
829 if (layout->levels[lv].slices) {
830 for (slice = 0; slice < templ->array_size; slice++) {
831 layout->levels[lv].slices[slice].x = level_x;
832 /* slices are qpitch apart in Y-direction */
833 layout->levels[lv].slices[slice].y =
834 level_y + layout->qpitch * slice;
835 }
836 }
837
838 /* extend the size of the monolithic bo to cover this mip level */
839 if (layout->width < level_x + level_w)
840 layout->width = level_x + level_w;
841 if (layout->height < level_y + level_h)
842 layout->height = level_y + level_h;
843
844 /* MIPLAYOUT_BELOW */
845 if (lv == 1)
846 level_x += align(level_w, layout->align_i);
847 else
848 level_y += align(level_h, layout->align_j);
849 }
850
851 num_slices = templ->array_size;
852 /* samples of the same index are stored in a slice */
853 if (templ->nr_samples > 1 && !layout->interleaved)
854 num_slices *= templ->nr_samples;
855
856 /* we did not take slices into consideration in the computation above */
857 layout->height += layout->qpitch * (num_slices - 1);
858
859 tex_layout_align(layout);
860}
861
862/**
863 * Layout a 3D texture.
864 */
865static void
866tex_layout_3d(struct tex_layout *layout)
867{
868 const struct pipe_resource *templ = layout->templ;
869 unsigned int level_y;
870 int lv;
871
872 level_y = 0;
873 for (lv = 0; lv <= templ->last_level; lv++) {
874 const unsigned int level_w = layout->levels[lv].w;
875 const unsigned int level_h = layout->levels[lv].h;
876 const unsigned int level_d = layout->levels[lv].d;
877 const unsigned int slice_pitch = align(level_w, layout->align_i);
878 const unsigned int slice_qpitch = align(level_h, layout->align_j);
879 const unsigned int num_slices_per_row = 1 << lv;
880 int slice;
881
882 for (slice = 0; slice < level_d; slice += num_slices_per_row) {
883 int i;
884
885 /* set slice offsets */
886 if (layout->levels[lv].slices) {
887 for (i = 0; i < num_slices_per_row && slice + i < level_d; i++) {
888 layout->levels[lv].slices[slice + i].x = slice_pitch * i;
889 layout->levels[lv].slices[slice + i].y = level_y;
890 }
891 }
892
893 /* move on to the next slice row */
894 level_y += slice_qpitch;
895 }
896
897 /* rightmost slice */
898 slice = MIN2(num_slices_per_row, level_d) - 1;
899
900 /* extend the size of the monolithic bo to cover this slice */
901 if (layout->width < slice_pitch * slice + level_w)
902 layout->width = slice_pitch * slice + level_w;
903 if (lv == templ->last_level)
904 layout->height = (level_y - slice_qpitch) + level_h;
905 }
906
907 tex_layout_align(layout);
908}
909
910/* note that this may force the texture to be linear */
911static bool
912tex_layout_calculate_bo_size(struct tex_layout *layout)
913{
914 assert(layout->width % layout->block_width == 0);
915 assert(layout->height % layout->block_height == 0);
916 assert(layout->qpitch % layout->block_height == 0);
917
918 layout->bo_stride =
919 (layout->width / layout->block_width) * layout->block_size;
920 layout->bo_height = layout->height / layout->block_height;
921
922 while (true) {
923 int w = layout->bo_stride, h = layout->bo_height;
924 int align_w, align_h;
925
926 /*
927 * From the Haswell PRM, volume 5, page 163:
928 *
929 * "For linear surfaces, additional padding of 64 bytes is required
930 * at the bottom of the surface. This is in addition to the padding
931 * required above."
932 */
933 if (layout->dev->gen >= ILO_GEN(7.5) &&
934 (layout->templ->bind & PIPE_BIND_SAMPLER_VIEW) &&
935 layout->tiling == INTEL_TILING_NONE) {
936 layout->bo_height +=
937 (64 + layout->bo_stride - 1) / layout->bo_stride;
938 }
939
940 /*
941 * From the Sandy Bridge PRM, volume 4 part 1, page 81:
942 *
943 * "- For linear render target surfaces, the pitch must be a
944 * multiple of the element size for non-YUV surface formats.
945 * Pitch must be a multiple of 2 * element size for YUV surface
946 * formats.
947 * - For other linear surfaces, the pitch can be any multiple of
948 * bytes.
949 * - For tiled surfaces, the pitch must be a multiple of the tile
950 * width."
951 *
952 * Different requirements may exist when the bo is used in different
953 * places, but our alignments here should be good enough that we do not
954 * need to check layout->templ->bind.
955 */
956 switch (layout->tiling) {
957 case INTEL_TILING_X:
958 align_w = 512;
959 align_h = 8;
960 break;
961 case INTEL_TILING_Y:
962 align_w = 128;
963 align_h = 32;
964 break;
965 default:
966 if (layout->format == PIPE_FORMAT_S8_UINT) {
967 /*
968 * From the Sandy Bridge PRM, volume 1 part 2, page 22:
969 *
970 * "A 4KB tile is subdivided into 8-high by 8-wide array of
971 * Blocks for W-Major Tiles (W Tiles). Each Block is 8 rows by 8
972 * bytes."
973 *
974 * Since we asked for INTEL_TILING_NONE instead of the non-existent
975 * INTEL_TILING_W, we want to align to W tiles here.
976 */
977 align_w = 64;
978 align_h = 64;
979 }
980 else {
981 /* some good enough values */
982 align_w = 64;
983 align_h = 2;
984 }
985 break;
986 }
987
988 w = align(w, align_w);
989 h = align(h, align_h);
990
991 /* make sure the bo is mappable */
992 if (layout->tiling != INTEL_TILING_NONE) {
993 /*
994 * Usually only the first 256MB of the GTT is mappable.
995 *
996 * See also how intel_context::max_gtt_map_object_size is calculated.
997 */
998 const size_t mappable_gtt_size = 256 * 1024 * 1024;
999
1000 /*
1001 * Be conservative. We may be able to switch from VALIGN_4 to
1002 * VALIGN_2 if the layout was Y-tiled, but let's keep it simple.
1003 */
1004 if (mappable_gtt_size / w / 4 < h) {
1005 if (layout->valid_tilings & (1 << INTEL_TILING_NONE)) {
1006 layout->tiling = INTEL_TILING_NONE;
1007 continue;
1008 }
1009 else {
1010 ilo_warn("cannot force texture to be linear\n");
1011 }
1012 }
1013 }
1014
1015 layout->bo_stride = w;
1016 layout->bo_height = h;
1017 break;
1018 }
1019
1020 return (layout->bo_height <= max_resource_size / layout->bo_stride);
1021}
1022
1023static void
1024tex_layout_calculate_hiz_size(struct tex_layout *layout)
1025{
1026 const struct pipe_resource *templ = layout->templ;
1027 const int hz_align_j = 8;
1028 int hz_width, hz_height;
1029
1030 if (!layout->hiz)
1031 return;
1032
1033 /*
1034 * See the Sandy Bridge PRM, volume 2 part 1, page 312, and the Ivy Bridge
1035 * PRM, volume 2 part 1, page 312-313.
1036 *
1037 * It seems HiZ buffer is aligned to 8x8, with every two rows packed into a
1038 * memory row.
1039 */
1040
1041 hz_width = align(layout->levels[0].w, 16);
1042
1043 if (templ->target == PIPE_TEXTURE_3D) {
1044 unsigned lv;
1045
1046 hz_height = 0;
1047
1048 for (lv = 0; lv <= templ->last_level; lv++) {
1049 const unsigned h = align(layout->levels[lv].h, hz_align_j);
1050 hz_height += h * layout->levels[lv].d;
1051 }
1052
1053 hz_height /= 2;
1054 }
1055 else {
1056 const unsigned h0 = align(layout->levels[0].h, hz_align_j);
1057 unsigned hz_qpitch = h0;
1058
1059 if (layout->array_spacing_full) {
1060 const unsigned h1 = align(layout->levels[1].h, hz_align_j);
1061 const unsigned htail =
1062 ((layout->dev->gen >= ILO_GEN(7)) ? 12 : 11) * hz_align_j;
1063
1064 hz_qpitch += h1 + htail;
1065 }
1066
1067 hz_height = hz_qpitch * templ->array_size / 2;
1068
1069 if (layout->dev->gen >= ILO_GEN(7))
1070 hz_height = align(hz_height, 8);
1071 }
1072
1073 /* align to Y-tile */
1074 layout->hiz_stride = align(hz_width, 128);
1075 layout->hiz_height = align(hz_height, 32);
1076}