blob: e68b07952eac8517265a03fe174bb71e20085678 [file] [log] [blame]
Chia-I Wu4bc47012014-08-14 13:03:25 +08001/*
Chia-I Wu44e42362014-09-02 08:32:09 +08002 * XGL
Chia-I Wu4bc47012014-08-14 13:03:25 +08003 *
4 * Copyright (C) 2014 LunarG, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
Chia-I Wu44e42362014-09-02 08:32:09 +080025 * Chia-I Wu <olv@lunarg.com>
Chia-I Wu4bc47012014-08-14 13:03:25 +080026 */
27
Chia-I Wu8a8d8b62014-08-14 13:26:26 +080028#include "dev.h"
Chia-I Wu1bf06df2014-08-16 12:33:13 +080029#include "format.h"
Chia-I Wu8a8d8b62014-08-14 13:26:26 +080030#include "gpu.h"
31#include "layout.h"
Chia-I Wu4bc47012014-08-14 13:03:25 +080032
33enum {
34 LAYOUT_TILING_NONE = 1 << INTEL_TILING_NONE,
35 LAYOUT_TILING_X = 1 << INTEL_TILING_X,
36 LAYOUT_TILING_Y = 1 << INTEL_TILING_Y,
37 LAYOUT_TILING_W = 1 << (INTEL_TILING_Y + 1),
38
39 LAYOUT_TILING_ALL = (LAYOUT_TILING_NONE |
40 LAYOUT_TILING_X |
41 LAYOUT_TILING_Y |
42 LAYOUT_TILING_W)
43};
44
Chia-I Wu8a8d8b62014-08-14 13:26:26 +080045struct intel_layout_params {
Chia-I Wuc94f3e52014-10-07 14:45:05 +080046 struct intel_dev *dev;
47
Chia-I Wu8a8d8b62014-08-14 13:26:26 +080048 const struct intel_gpu *gpu;
49 const XGL_IMAGE_CREATE_INFO *info;
Chia-I Wu794d12a2014-09-15 14:55:25 +080050 bool scanout;
Chia-I Wu4bc47012014-08-14 13:03:25 +080051
52 bool compressed;
53
54 unsigned h0, h1;
55 unsigned max_x, max_y;
56};
57
58static void
Chia-I Wu8a8d8b62014-08-14 13:26:26 +080059layout_get_slice_size(const struct intel_layout *layout,
60 const struct intel_layout_params *params,
Chia-I Wu4bc47012014-08-14 13:03:25 +080061 unsigned level, unsigned *width, unsigned *height)
62{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +080063 const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu4bc47012014-08-14 13:03:25 +080064 unsigned w, h;
65
Chia-I Wu457d0a62014-08-18 13:02:26 +080066 w = u_minify(layout->width0, level);
67 h = u_minify(layout->height0, level);
Chia-I Wu4bc47012014-08-14 13:03:25 +080068
69 /*
70 * From the Sandy Bridge PRM, volume 1 part 1, page 114:
71 *
72 * "The dimensions of the mip maps are first determined by applying the
73 * sizing algorithm presented in Non-Power-of-Two Mipmaps above. Then,
74 * if necessary, they are padded out to compression block boundaries."
75 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +080076 w = u_align(w, layout->block_width);
77 h = u_align(h, layout->block_height);
Chia-I Wu4bc47012014-08-14 13:03:25 +080078
79 /*
80 * From the Sandy Bridge PRM, volume 1 part 1, page 111:
81 *
82 * "If the surface is multisampled (4x), these values must be adjusted
83 * as follows before proceeding:
84 *
85 * W_L = ceiling(W_L / 2) * 4
86 * H_L = ceiling(H_L / 2) * 4"
87 *
88 * From the Ivy Bridge PRM, volume 1 part 1, page 108:
89 *
90 * "If the surface is multisampled and it is a depth or stencil surface
91 * or Multisampled Surface StorageFormat in SURFACE_STATE is
92 * MSFMT_DEPTH_STENCIL, W_L and H_L must be adjusted as follows before
93 * proceeding:
94 *
95 * #samples W_L = H_L =
96 * 2 ceiling(W_L / 2) * 4 HL [no adjustment]
97 * 4 ceiling(W_L / 2) * 4 ceiling(H_L / 2) * 4
98 * 8 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 4
99 * 16 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 8"
100 *
101 * For interleaved samples (4x), where pixels
102 *
103 * (x, y ) (x+1, y )
104 * (x, y+1) (x+1, y+1)
105 *
106 * would be is occupied by
107 *
108 * (x, y , si0) (x+1, y , si0) (x, y , si1) (x+1, y , si1)
109 * (x, y+1, si0) (x+1, y+1, si0) (x, y+1, si1) (x+1, y+1, si1)
110 * (x, y , si2) (x+1, y , si2) (x, y , si3) (x+1, y , si3)
111 * (x, y+1, si2) (x+1, y+1, si2) (x, y+1, si3) (x+1, y+1, si3)
112 *
113 * Thus the need to
114 *
Chia-I Wu457d0a62014-08-18 13:02:26 +0800115 * w = align(w, 2) * 2;
116 * y = align(y, 2) * 2;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800117 */
118 if (layout->interleaved_samples) {
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800119 switch (info->samples) {
Chia-I Wu4bc47012014-08-14 13:03:25 +0800120 case 0:
121 case 1:
122 break;
123 case 2:
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800124 w = u_align(w, 2) * 2;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800125 break;
126 case 4:
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800127 w = u_align(w, 2) * 2;
128 h = u_align(h, 2) * 2;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800129 break;
130 case 8:
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800131 w = u_align(w, 2) * 4;
132 h = u_align(h, 2) * 2;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800133 break;
134 case 16:
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800135 w = u_align(w, 2) * 4;
136 h = u_align(h, 2) * 4;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800137 break;
138 default:
139 assert(!"unsupported sample count");
140 break;
141 }
142 }
143
Chia-I Wu457d0a62014-08-18 13:02:26 +0800144 /*
145 * From the Ivy Bridge PRM, volume 1 part 1, page 108:
146 *
147 * "For separate stencil buffer, the width must be mutiplied by 2 and
148 * height divided by 2..."
149 *
150 * To make things easier (for transfer), we will just double the stencil
151 * stride in 3DSTATE_STENCIL_BUFFER.
152 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800153 w = u_align(w, layout->align_i);
154 h = u_align(h, layout->align_j);
Chia-I Wu4bc47012014-08-14 13:03:25 +0800155
156 *width = w;
157 *height = h;
158}
159
160static unsigned
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800161layout_get_num_layers(const struct intel_layout *layout,
162 const struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800163{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800164 const XGL_IMAGE_CREATE_INFO *info = params->info;
165 unsigned num_layers = info->arraySize;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800166
167 /* samples of the same index are stored in a layer */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800168 if (info->samples > 1 && !layout->interleaved_samples)
169 num_layers *= info->samples;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800170
171 return num_layers;
172}
173
174static void
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800175layout_init_layer_height(struct intel_layout *layout,
176 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800177{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800178 const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800179 unsigned num_layers;
180
Chia-I Wu457d0a62014-08-18 13:02:26 +0800181 if (layout->walk != INTEL_LAYOUT_WALK_LAYER)
182 return;
183
Chia-I Wu4bc47012014-08-14 13:03:25 +0800184 num_layers = layout_get_num_layers(layout, params);
185 if (num_layers <= 1)
186 return;
187
Chia-I Wu4bc47012014-08-14 13:03:25 +0800188 /*
189 * From the Sandy Bridge PRM, volume 1 part 1, page 115:
190 *
191 * "The following equation is used for surface formats other than
192 * compressed textures:
193 *
194 * QPitch = (h0 + h1 + 11j)"
195 *
196 * "The equation for compressed textures (BC* and FXT1 surface formats)
197 * follows:
198 *
199 * QPitch = (h0 + h1 + 11j) / 4"
200 *
201 * "[DevSNB] Errata: Sampler MSAA Qpitch will be 4 greater than the
202 * value calculated in the equation above, for every other odd Surface
203 * Height starting from 1 i.e. 1,5,9,13"
204 *
205 * From the Ivy Bridge PRM, volume 1 part 1, page 111-112:
206 *
207 * "If Surface Array Spacing is set to ARYSPC_FULL (note that the depth
208 * buffer and stencil buffer have an implied value of ARYSPC_FULL):
209 *
210 * QPitch = (h0 + h1 + 12j)
211 * QPitch = (h0 + h1 + 12j) / 4 (compressed)
212 *
213 * (There are many typos or missing words here...)"
214 *
215 * To access the N-th slice, an offset of (Stride * QPitch * N) is added to
216 * the base address. The PRM divides QPitch by 4 for compressed formats
217 * because the block height for those formats are 4, and it wants QPitch to
218 * mean the number of memory rows, as opposed to texel rows, between
219 * slices. Since we use texel rows everywhere, we do not need to divide
220 * QPitch by 4.
221 */
222 layout->layer_height = params->h0 + params->h1 +
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800223 ((intel_gpu_gen(params->gpu) >= INTEL_GEN(7)) ? 12 : 11) * layout->align_j;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800224
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800225 if (intel_gpu_gen(params->gpu) == INTEL_GEN(6) && info->samples > 1 &&
Chia-I Wu457d0a62014-08-18 13:02:26 +0800226 layout->height0 % 4 == 1)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800227 layout->layer_height += 4;
228
229 params->max_y += layout->layer_height * (num_layers - 1);
230}
231
232static void
Chia-I Wu457d0a62014-08-18 13:02:26 +0800233layout_init_lods(struct intel_layout *layout,
234 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800235{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800236 const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800237 unsigned cur_x, cur_y;
238 unsigned lv;
239
240 cur_x = 0;
241 cur_y = 0;
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800242 for (lv = 0; lv < info->mipLevels; lv++) {
Chia-I Wu457d0a62014-08-18 13:02:26 +0800243 unsigned lod_w, lod_h;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800244
Chia-I Wu457d0a62014-08-18 13:02:26 +0800245 layout_get_slice_size(layout, params, lv, &lod_w, &lod_h);
Chia-I Wu4bc47012014-08-14 13:03:25 +0800246
Chia-I Wu457d0a62014-08-18 13:02:26 +0800247 layout->lods[lv].x = cur_x;
248 layout->lods[lv].y = cur_y;
249 layout->lods[lv].slice_width = lod_w;
250 layout->lods[lv].slice_height = lod_h;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800251
Chia-I Wu457d0a62014-08-18 13:02:26 +0800252 switch (layout->walk) {
253 case INTEL_LAYOUT_WALK_LOD:
254 lod_h *= layout_get_num_layers(layout, params);
255 if (lv == 1)
256 cur_x += lod_w;
257 else
258 cur_y += lod_h;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800259
Chia-I Wu457d0a62014-08-18 13:02:26 +0800260 /* every LOD begins at tile boundaries */
261 if (info->mipLevels > 1) {
Chia-I Wu4806f2c2015-02-19 13:54:35 -0700262 assert(layout->format == XGL_FMT_S8_UINT);
Chia-I Wu457d0a62014-08-18 13:02:26 +0800263 cur_x = u_align(cur_x, 64);
264 cur_y = u_align(cur_y, 64);
265 }
266 break;
267 case INTEL_LAYOUT_WALK_LAYER:
Chia-I Wu4bc47012014-08-14 13:03:25 +0800268 /* MIPLAYOUT_BELOW */
269 if (lv == 1)
Chia-I Wu457d0a62014-08-18 13:02:26 +0800270 cur_x += lod_w;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800271 else
Chia-I Wu457d0a62014-08-18 13:02:26 +0800272 cur_y += lod_h;
273 break;
274 case INTEL_LAYOUT_WALK_3D:
275 {
276 const unsigned num_slices = u_minify(info->extent.depth, lv);
277 const unsigned num_slices_per_row = 1 << lv;
278 const unsigned num_rows =
279 (num_slices + num_slices_per_row - 1) / num_slices_per_row;
280
281 lod_w *= num_slices_per_row;
282 lod_h *= num_rows;
283
284 cur_y += lod_h;
285 }
286 break;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800287 }
288
Chia-I Wu457d0a62014-08-18 13:02:26 +0800289 if (params->max_x < layout->lods[lv].x + lod_w)
290 params->max_x = layout->lods[lv].x + lod_w;
291 if (params->max_y < layout->lods[lv].y + lod_h)
292 params->max_y = layout->lods[lv].y + lod_h;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800293 }
294
Chia-I Wu457d0a62014-08-18 13:02:26 +0800295 if (layout->walk == INTEL_LAYOUT_WALK_LAYER) {
296 params->h0 = layout->lods[0].slice_height;
297
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800298 if (info->mipLevels > 1)
Chia-I Wu457d0a62014-08-18 13:02:26 +0800299 params->h1 = layout->lods[1].slice_height;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800300 else
301 layout_get_slice_size(layout, params, 1, &cur_x, &params->h1);
302 }
303}
304
305static void
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800306layout_init_alignments(struct intel_layout *layout,
307 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800308{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800309 const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800310
311 /*
312 * From the Sandy Bridge PRM, volume 1 part 1, page 113:
313 *
314 * "surface format align_i align_j
315 * YUV 4:2:2 formats 4 *see below
316 * BC1-5 4 4
317 * FXT1 8 4
318 * all other formats 4 *see below"
319 *
320 * "- align_j = 4 for any depth buffer
321 * - align_j = 2 for separate stencil buffer
322 * - align_j = 4 for any render target surface is multisampled (4x)
323 * - align_j = 4 for any render target surface with Surface Vertical
324 * Alignment = VALIGN_4
325 * - align_j = 2 for any render target surface with Surface Vertical
326 * Alignment = VALIGN_2
327 * - align_j = 2 for all other render target surface
328 * - align_j = 2 for any sampling engine surface with Surface Vertical
329 * Alignment = VALIGN_2
330 * - align_j = 4 for any sampling engine surface with Surface Vertical
331 * Alignment = VALIGN_4"
332 *
333 * From the Sandy Bridge PRM, volume 4 part 1, page 86:
334 *
335 * "This field (Surface Vertical Alignment) must be set to VALIGN_2 if
336 * the Surface Format is 96 bits per element (BPE)."
337 *
338 * They can be rephrased as
339 *
340 * align_i align_j
341 * compressed formats block width block height
342 * PIPE_FORMAT_S8_UINT 4 2
343 * other depth/stencil formats 4 4
344 * 4x multisampled 4 4
345 * bpp 96 4 2
346 * others 4 2 or 4
347 */
348
349 /*
350 * From the Ivy Bridge PRM, volume 1 part 1, page 110:
351 *
352 * "surface defined by surface format align_i align_j
353 * 3DSTATE_DEPTH_BUFFER D16_UNORM 8 4
354 * not D16_UNORM 4 4
355 * 3DSTATE_STENCIL_BUFFER N/A 8 8
356 * SURFACE_STATE BC*, ETC*, EAC* 4 4
357 * FXT1 8 4
358 * all others (set by SURFACE_STATE)"
359 *
360 * From the Ivy Bridge PRM, volume 4 part 1, page 63:
361 *
362 * "- This field (Surface Vertical Aligment) is intended to be set to
363 * VALIGN_4 if the surface was rendered as a depth buffer, for a
364 * multisampled (4x) render target, or for a multisampled (8x)
365 * render target, since these surfaces support only alignment of 4.
366 * - Use of VALIGN_4 for other surfaces is supported, but uses more
367 * memory.
368 * - This field must be set to VALIGN_4 for all tiled Y Render Target
369 * surfaces.
370 * - Value of 1 is not supported for format YCRCB_NORMAL (0x182),
371 * YCRCB_SWAPUVY (0x183), YCRCB_SWAPUV (0x18f), YCRCB_SWAPY (0x190)
372 * - If Number of Multisamples is not MULTISAMPLECOUNT_1, this field
373 * must be set to VALIGN_4."
374 * - VALIGN_4 is not supported for surface format R32G32B32_FLOAT."
375 *
376 * "- This field (Surface Horizontal Aligment) is intended to be set to
377 * HALIGN_8 only if the surface was rendered as a depth buffer with
378 * Z16 format or a stencil buffer, since these surfaces support only
379 * alignment of 8.
380 * - Use of HALIGN_8 for other surfaces is supported, but uses more
381 * memory.
382 * - This field must be set to HALIGN_4 if the Surface Format is BC*.
383 * - This field must be set to HALIGN_8 if the Surface Format is
384 * FXT1."
385 *
386 * They can be rephrased as
387 *
388 * align_i align_j
389 * compressed formats block width block height
390 * PIPE_FORMAT_Z16_UNORM 8 4
391 * PIPE_FORMAT_S8_UINT 8 8
Chia-I Wu457d0a62014-08-18 13:02:26 +0800392 * other depth/stencil formats 4 4
Chia-I Wu4bc47012014-08-14 13:03:25 +0800393 * 2x or 4x multisampled 4 or 8 4
394 * tiled Y 4 or 8 4 (if rt)
395 * PIPE_FORMAT_R32G32B32_FLOAT 4 or 8 2
396 * others 4 or 8 2 or 4
397 */
398
399 if (params->compressed) {
400 /* this happens to be the case */
401 layout->align_i = layout->block_width;
402 layout->align_j = layout->block_height;
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800403 } else if (info->usage & XGL_IMAGE_USAGE_DEPTH_STENCIL_BIT) {
404 if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7)) {
Jeremy Hayes2b7e88a2015-01-23 08:51:43 -0700405 switch (layout->format) {
406 case XGL_FMT_D16_UNORM:
Chia-I Wu4bc47012014-08-14 13:03:25 +0800407 layout->align_i = 8;
408 layout->align_j = 4;
409 break;
Jeremy Hayes2b7e88a2015-01-23 08:51:43 -0700410 case XGL_FMT_S8_UINT:
Chia-I Wu4bc47012014-08-14 13:03:25 +0800411 layout->align_i = 8;
412 layout->align_j = 8;
413 break;
414 default:
415 layout->align_i = 4;
416 layout->align_j = 4;
417 break;
418 }
419 } else {
Jeremy Hayes2b7e88a2015-01-23 08:51:43 -0700420 switch (layout->format) {
421 case XGL_FMT_S8_UINT:
Chia-I Wu4bc47012014-08-14 13:03:25 +0800422 layout->align_i = 4;
423 layout->align_j = 2;
424 break;
425 default:
426 layout->align_i = 4;
427 layout->align_j = 4;
428 break;
429 }
430 }
431 } else {
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800432 const bool valign_4 = (info->samples > 1) ||
433 (intel_gpu_gen(params->gpu) >= INTEL_GEN(7) &&
Chia-I Wu4bc47012014-08-14 13:03:25 +0800434 layout->tiling == INTEL_TILING_Y &&
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800435 (info->usage & XGL_IMAGE_USAGE_COLOR_ATTACHMENT_BIT));
Chia-I Wu4bc47012014-08-14 13:03:25 +0800436
437 if (valign_4)
438 assert(layout->block_size != 12);
439
440 layout->align_i = 4;
441 layout->align_j = (valign_4) ? 4 : 2;
442 }
443
444 /*
445 * the fact that align i and j are multiples of block width and height
446 * respectively is what makes the size of the bo a multiple of the block
447 * size, slices start at block boundaries, and many of the computations
448 * work.
449 */
450 assert(layout->align_i % layout->block_width == 0);
451 assert(layout->align_j % layout->block_height == 0);
452
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800453 /* make sure u_align() works */
454 assert(u_is_pow2(layout->align_i) &&
455 u_is_pow2(layout->align_j));
456 assert(u_is_pow2(layout->block_width) &&
457 u_is_pow2(layout->block_height));
Chia-I Wu4bc47012014-08-14 13:03:25 +0800458}
459
460static unsigned
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800461layout_get_valid_tilings(const struct intel_layout *layout,
462 const struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800463{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800464 const XGL_IMAGE_CREATE_INFO *info = params->info;
465 const XGL_FORMAT format = layout->format;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800466 unsigned valid_tilings = LAYOUT_TILING_ALL;
467
Chia-I Wu794d12a2014-09-15 14:55:25 +0800468 /*
469 * From the Sandy Bridge PRM, volume 1 part 2, page 32:
470 *
471 * "Display/Overlay Y-Major not supported.
472 * X-Major required for Async Flips"
473 */
474 if (params->scanout)
475 valid_tilings &= LAYOUT_TILING_X;
476
Chia-I Wu6ac93992014-08-30 18:23:28 +0800477 if (info->tiling == XGL_LINEAR_TILING)
478 valid_tilings &= LAYOUT_TILING_NONE;
479
Chia-I Wu4bc47012014-08-14 13:03:25 +0800480 /*
Chia-I Wu4bc47012014-08-14 13:03:25 +0800481 * From the Sandy Bridge PRM, volume 2 part 1, page 318:
482 *
483 * "[DevSNB+]: This field (Tiled Surface) must be set to TRUE. Linear
484 * Depth Buffer is not supported."
485 *
486 * "The Depth Buffer, if tiled, must use Y-Major tiling."
487 *
488 * From the Sandy Bridge PRM, volume 1 part 2, page 22:
489 *
490 * "W-Major Tile Format is used for separate stencil."
491 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800492 if (info->usage & XGL_IMAGE_USAGE_DEPTH_STENCIL_BIT) {
Jeremy Hayes2b7e88a2015-01-23 08:51:43 -0700493 switch (format) {
494 case XGL_FMT_S8_UINT:
Chia-I Wu4bc47012014-08-14 13:03:25 +0800495 valid_tilings &= LAYOUT_TILING_W;
496 break;
497 default:
498 valid_tilings &= LAYOUT_TILING_Y;
499 break;
500 }
501 }
502
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800503 if (info->usage & XGL_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) {
Chia-I Wu4bc47012014-08-14 13:03:25 +0800504 /*
505 * From the Sandy Bridge PRM, volume 1 part 2, page 32:
506 *
507 * "NOTE: 128BPE Format Color buffer ( render target ) MUST be
508 * either TileX or Linear."
509 */
510 if (layout->block_size == 16)
511 valid_tilings &= ~LAYOUT_TILING_Y;
512
513 /*
514 * From the Ivy Bridge PRM, volume 4 part 1, page 63:
515 *
516 * "This field (Surface Vertical Aligment) must be set to VALIGN_4
517 * for all tiled Y Render Target surfaces."
518 *
519 * "VALIGN_4 is not supported for surface format R32G32B32_FLOAT."
520 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800521 if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7) && layout->block_size == 12)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800522 valid_tilings &= ~LAYOUT_TILING_Y;
523 }
524
525 /* no conflicting binding flags */
526 assert(valid_tilings);
527
528 return valid_tilings;
529}
530
531static void
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800532layout_init_tiling(struct intel_layout *layout,
533 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800534{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800535 const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800536 unsigned valid_tilings = layout_get_valid_tilings(layout, params);
537
Chia-I Wu457d0a62014-08-18 13:02:26 +0800538 /* no hardware support for W-tile */
539 if (valid_tilings & LAYOUT_TILING_W)
540 valid_tilings = (valid_tilings & ~LAYOUT_TILING_W) | LAYOUT_TILING_NONE;
541
Chia-I Wu4bc47012014-08-14 13:03:25 +0800542 layout->valid_tilings = valid_tilings;
543
Chia-I Wu457d0a62014-08-18 13:02:26 +0800544 if (info->usage & (XGL_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |
545 XGL_IMAGE_USAGE_SHADER_ACCESS_READ_BIT)) {
Chia-I Wu4bc47012014-08-14 13:03:25 +0800546 /*
547 * heuristically set a minimum width/height for enabling tiling
548 */
Chia-I Wu457d0a62014-08-18 13:02:26 +0800549 if (layout->width0 < 64 && (valid_tilings & ~LAYOUT_TILING_X))
Chia-I Wu4bc47012014-08-14 13:03:25 +0800550 valid_tilings &= ~LAYOUT_TILING_X;
551
Chia-I Wu457d0a62014-08-18 13:02:26 +0800552 if ((layout->width0 < 32 || layout->height0 < 16) &&
553 (layout->width0 < 16 || layout->height0 < 32) &&
Chia-I Wu4bc47012014-08-14 13:03:25 +0800554 (valid_tilings & ~LAYOUT_TILING_Y))
555 valid_tilings &= ~LAYOUT_TILING_Y;
556 } else {
557 /* force linear if we are not sure where the texture is bound to */
558 if (valid_tilings & LAYOUT_TILING_NONE)
559 valid_tilings &= LAYOUT_TILING_NONE;
560 }
561
562 /* prefer tiled over linear */
563 if (valid_tilings & LAYOUT_TILING_Y)
564 layout->tiling = INTEL_TILING_Y;
565 else if (valid_tilings & LAYOUT_TILING_X)
566 layout->tiling = INTEL_TILING_X;
Chia-I Wu457d0a62014-08-18 13:02:26 +0800567 else
Chia-I Wu4bc47012014-08-14 13:03:25 +0800568 layout->tiling = INTEL_TILING_NONE;
569}
570
571static void
Chia-I Wu457d0a62014-08-18 13:02:26 +0800572layout_init_walk_gen7(struct intel_layout *layout,
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800573 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800574{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800575 const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800576
577 /*
578 * It is not explicitly states, but render targets are expected to be
579 * UMS/CMS (samples non-interleaved) and depth/stencil buffers are expected
580 * to be IMS (samples interleaved).
581 *
582 * See "Multisampled Surface Storage Format" field of SURFACE_STATE.
583 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800584 if (info->usage & XGL_IMAGE_USAGE_DEPTH_STENCIL_BIT) {
Chia-I Wu4bc47012014-08-14 13:03:25 +0800585 /*
586 * From the Ivy Bridge PRM, volume 1 part 1, page 111:
587 *
588 * "note that the depth buffer and stencil buffer have an implied
589 * value of ARYSPC_FULL"
590 */
Chia-I Wu457d0a62014-08-18 13:02:26 +0800591 layout->walk = (info->imageType == XGL_IMAGE_3D) ?
592 INTEL_LAYOUT_WALK_3D : INTEL_LAYOUT_WALK_LAYER;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800593
Chia-I Wu457d0a62014-08-18 13:02:26 +0800594 layout->interleaved_samples = true;
595 } else {
Chia-I Wu4bc47012014-08-14 13:03:25 +0800596 /*
597 * From the Ivy Bridge PRM, volume 4 part 1, page 66:
598 *
599 * "If Multisampled Surface Storage Format is MSFMT_MSS and Number
600 * of Multisamples is not MULTISAMPLECOUNT_1, this field (Surface
601 * Array Spacing) must be set to ARYSPC_LOD0."
602 *
603 * As multisampled resources are not mipmapped, we never use
604 * ARYSPC_FULL for them.
605 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800606 if (info->samples > 1)
607 assert(info->mipLevels == 1);
Chia-I Wu457d0a62014-08-18 13:02:26 +0800608
609 layout->walk =
610 (info->imageType == XGL_IMAGE_3D) ? INTEL_LAYOUT_WALK_3D :
611 (info->mipLevels > 1) ? INTEL_LAYOUT_WALK_LAYER :
612 INTEL_LAYOUT_WALK_LOD;
613
614 layout->interleaved_samples = false;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800615 }
616}
617
618static void
Chia-I Wu457d0a62014-08-18 13:02:26 +0800619layout_init_walk_gen6(struct intel_layout *layout,
620 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800621{
Chia-I Wu4bc47012014-08-14 13:03:25 +0800622 /*
623 * From the Sandy Bridge PRM, volume 1 part 1, page 115:
624 *
625 * "The separate stencil buffer does not support mip mapping, thus the
626 * storage for LODs other than LOD 0 is not needed. The following
627 * QPitch equation applies only to the separate stencil buffer:
628 *
629 * QPitch = h_0"
630 *
631 * GEN6 does not support compact spacing otherwise.
632 */
Chia-I Wu457d0a62014-08-18 13:02:26 +0800633 layout->walk =
634 (params->info->imageType == XGL_IMAGE_3D) ? INTEL_LAYOUT_WALK_3D :
Chia-I Wu4806f2c2015-02-19 13:54:35 -0700635 (layout->format == XGL_FMT_S8_UINT) ? INTEL_LAYOUT_WALK_LOD :
Chia-I Wu457d0a62014-08-18 13:02:26 +0800636 INTEL_LAYOUT_WALK_LAYER;
637
638 /* GEN6 supports only interleaved samples */
639 layout->interleaved_samples = true;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800640}
641
642static void
Chia-I Wu457d0a62014-08-18 13:02:26 +0800643layout_init_walk(struct intel_layout *layout,
644 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800645{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800646 if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7))
Chia-I Wu457d0a62014-08-18 13:02:26 +0800647 layout_init_walk_gen7(layout, params);
Chia-I Wu4bc47012014-08-14 13:03:25 +0800648 else
Chia-I Wu457d0a62014-08-18 13:02:26 +0800649 layout_init_walk_gen6(layout, params);
Chia-I Wu4bc47012014-08-14 13:03:25 +0800650}
651
652static void
Chia-I Wu457d0a62014-08-18 13:02:26 +0800653layout_init_size_and_format(struct intel_layout *layout,
654 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800655{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800656 const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu457d0a62014-08-18 13:02:26 +0800657 XGL_FORMAT format = info->format;
Chia-I Wu9a056dd2015-02-11 13:19:39 -0700658 bool require_separate_stencil = false;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800659
Chia-I Wu457d0a62014-08-18 13:02:26 +0800660 layout->width0 = info->extent.width;
661 layout->height0 = info->extent.height;
662
Chia-I Wu4bc47012014-08-14 13:03:25 +0800663 /*
664 * From the Sandy Bridge PRM, volume 2 part 1, page 317:
665 *
666 * "This field (Separate Stencil Buffer Enable) must be set to the same
667 * value (enabled or disabled) as Hierarchical Depth Buffer Enable."
668 *
669 * GEN7+ requires separate stencil buffers.
670 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800671 if (info->usage & XGL_IMAGE_USAGE_DEPTH_STENCIL_BIT) {
672 if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7))
Chia-I Wu4bc47012014-08-14 13:03:25 +0800673 require_separate_stencil = true;
674 else
Chia-I Wu457d0a62014-08-18 13:02:26 +0800675 require_separate_stencil = (layout->aux == INTEL_LAYOUT_AUX_HIZ);
Chia-I Wu4bc47012014-08-14 13:03:25 +0800676 }
677
Jeremy Hayes2b7e88a2015-01-23 08:51:43 -0700678 if (icd_format_is_ds(format)) {
679 switch (format) {
680 case XGL_FMT_D32_SFLOAT_S8_UINT:
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800681 if (require_separate_stencil) {
Jeremy Hayes2b7e88a2015-01-23 08:51:43 -0700682 format = XGL_FMT_D32_SFLOAT;
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800683 layout->separate_stencil = true;
684 }
685 break;
686 default:
687 break;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800688 }
Chia-I Wu4bc47012014-08-14 13:03:25 +0800689 }
690
Chia-I Wu4bc47012014-08-14 13:03:25 +0800691 layout->format = format;
Chia-I Wu1bf06df2014-08-16 12:33:13 +0800692 layout->block_width = icd_format_get_block_width(format);
693 layout->block_height = layout->block_width;
694 layout->block_size = icd_format_get_size(format);
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800695
Chia-I Wu1bf06df2014-08-16 12:33:13 +0800696 params->compressed = icd_format_is_compressed(format);
Chia-I Wu4bc47012014-08-14 13:03:25 +0800697}
698
699static bool
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800700layout_want_mcs(struct intel_layout *layout,
701 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800702{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800703 const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800704 bool want_mcs = false;
705
706 /* MCS is for RT on GEN7+ */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800707 if (intel_gpu_gen(params->gpu) < INTEL_GEN(7))
Chia-I Wu4bc47012014-08-14 13:03:25 +0800708 return false;
709
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800710 if (info->imageType != XGL_IMAGE_2D ||
711 !(info->usage & XGL_IMAGE_USAGE_COLOR_ATTACHMENT_BIT))
Chia-I Wu4bc47012014-08-14 13:03:25 +0800712 return false;
713
714 /*
715 * From the Ivy Bridge PRM, volume 4 part 1, page 77:
716 *
717 * "For Render Target and Sampling Engine Surfaces:If the surface is
718 * multisampled (Number of Multisamples any value other than
719 * MULTISAMPLECOUNT_1), this field (MCS Enable) must be enabled."
720 *
721 * "This field must be set to 0 for all SINT MSRTs when all RT channels
722 * are not written"
723 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800724 if (info->samples > 1 && !layout->interleaved_samples &&
Chia-I Wu457d0a62014-08-18 13:02:26 +0800725 !icd_format_is_int(info->format)) {
Chia-I Wu4bc47012014-08-14 13:03:25 +0800726 want_mcs = true;
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800727 } else if (info->samples <= 1) {
Chia-I Wu4bc47012014-08-14 13:03:25 +0800728 /*
729 * From the Ivy Bridge PRM, volume 2 part 1, page 326:
730 *
731 * "When MCS is buffer is used for color clear of non-multisampler
732 * render target, the following restrictions apply.
733 * - Support is limited to tiled render targets.
734 * - Support is for non-mip-mapped and non-array surface types
735 * only.
736 * - Clear is supported only on the full RT; i.e., no partial clear
737 * or overlapping clears.
738 * - MCS buffer for non-MSRT is supported only for RT formats
739 * 32bpp, 64bpp and 128bpp.
740 * ..."
741 */
742 if (layout->tiling != INTEL_TILING_NONE &&
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800743 info->mipLevels == 1 && info->arraySize == 1) {
Chia-I Wu4bc47012014-08-14 13:03:25 +0800744 switch (layout->block_size) {
745 case 4:
746 case 8:
747 case 16:
748 want_mcs = true;
749 break;
750 default:
751 break;
752 }
753 }
754 }
755
756 return want_mcs;
757}
758
759static bool
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800760layout_want_hiz(const struct intel_layout *layout,
761 const struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800762{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800763 const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800764
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800765 if (!(info->usage & XGL_IMAGE_USAGE_DEPTH_STENCIL_BIT))
Chia-I Wu4bc47012014-08-14 13:03:25 +0800766 return false;
767
Chia-I Wu9ec7e702015-02-19 13:18:42 -0700768 if (!intel_format_has_depth(params->gpu, info->format))
Chia-I Wu4bc47012014-08-14 13:03:25 +0800769 return false;
770
Chia-I Wu457d0a62014-08-18 13:02:26 +0800771 /*
772 * As can be seen in layout_calculate_hiz_size(), HiZ may not be enabled
773 * for every level. This is generally fine except on GEN6, where HiZ and
774 * separate stencil are enabled and disabled at the same time. When the
775 * format is PIPE_FORMAT_Z32_FLOAT_S8X24_UINT, enabling and disabling HiZ
776 * can result in incompatible formats.
777 */
778 if (intel_gpu_gen(params->gpu) == INTEL_GEN(6) &&
Jeremy Hayes2b7e88a2015-01-23 08:51:43 -0700779 info->format == XGL_FMT_D32_SFLOAT_S8_UINT &&
Chia-I Wu457d0a62014-08-18 13:02:26 +0800780 info->mipLevels > 1)
781 return false;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800782
Chia-I Wuc94f3e52014-10-07 14:45:05 +0800783 if (true) {
784 intel_dev_log(params->dev, XGL_DBG_MSG_PERF_WARNING,
785 XGL_VALIDATION_LEVEL_0, XGL_NULL_HANDLE, 0, 0,
786 "HiZ disabled");
787 return false;
788 }
789
Chia-I Wu457d0a62014-08-18 13:02:26 +0800790 return true;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800791}
792
793static void
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800794layout_init_aux(struct intel_layout *layout,
795 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800796{
797 if (layout_want_hiz(layout, params))
Chia-I Wu457d0a62014-08-18 13:02:26 +0800798 layout->aux = INTEL_LAYOUT_AUX_HIZ;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800799 else if (layout_want_mcs(layout, params))
Chia-I Wu457d0a62014-08-18 13:02:26 +0800800 layout->aux = INTEL_LAYOUT_AUX_MCS;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800801}
802
803static void
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800804layout_align(struct intel_layout *layout, struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800805{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800806 const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800807 int align_w = 1, align_h = 1, pad_h = 0;
808
809 /*
810 * From the Sandy Bridge PRM, volume 1 part 1, page 118:
811 *
812 * "To determine the necessary padding on the bottom and right side of
813 * the surface, refer to the table in Section 7.18.3.4 for the i and j
814 * parameters for the surface format in use. The surface must then be
815 * extended to the next multiple of the alignment unit size in each
816 * dimension, and all texels contained in this extended surface must
817 * have valid GTT entries."
818 *
819 * "For cube surfaces, an additional two rows of padding are required
820 * at the bottom of the surface. This must be ensured regardless of
821 * whether the surface is stored tiled or linear. This is due to the
822 * potential rotation of cache line orientation from memory to cache."
823 *
824 * "For compressed textures (BC* and FXT1 surface formats), padding at
825 * the bottom of the surface is to an even compressed row, which is
826 * equal to a multiple of 8 uncompressed texel rows. Thus, for padding
827 * purposes, these surfaces behave as if j = 8 only for surface
828 * padding purposes. The value of 4 for j still applies for mip level
829 * alignment and QPitch calculation."
830 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800831 if (info->usage & XGL_IMAGE_USAGE_SHADER_ACCESS_READ_BIT) {
832 if (align_w < layout->align_i)
833 align_w = layout->align_i;
834 if (align_h < layout->align_j)
835 align_h = layout->align_j;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800836
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800837 /* in case it is used as a cube */
838 if (info->imageType == XGL_IMAGE_2D)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800839 pad_h += 2;
840
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800841 if (params->compressed && align_h < layout->align_j * 2)
842 align_h = layout->align_j * 2;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800843 }
844
845 /*
846 * From the Sandy Bridge PRM, volume 1 part 1, page 118:
847 *
848 * "If the surface contains an odd number of rows of data, a final row
849 * below the surface must be allocated."
850 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800851 if ((info->usage & XGL_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) && align_h < 2)
852 align_h = 2;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800853
854 /*
855 * Depth Buffer Clear/Resolve works in 8x4 sample blocks. In
Chia-I Wu457d0a62014-08-18 13:02:26 +0800856 * intel_texture_can_enable_hiz(), we always return true for the first slice.
Chia-I Wu4bc47012014-08-14 13:03:25 +0800857 * To avoid out-of-bound access, we have to pad.
858 */
Chia-I Wu457d0a62014-08-18 13:02:26 +0800859 if (layout->aux == INTEL_LAYOUT_AUX_HIZ &&
860 info->mipLevels == 1 &&
861 info->arraySize == 1 &&
862 info->extent.depth == 1) {
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800863 if (align_w < 8)
864 align_w = 8;
865 if (align_h < 4)
866 align_h = 4;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800867 }
868
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800869 params->max_x = u_align(params->max_x, align_w);
870 params->max_y = u_align(params->max_y + pad_h, align_h);
Chia-I Wu4bc47012014-08-14 13:03:25 +0800871}
872
873/* note that this may force the texture to be linear */
874static void
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800875layout_calculate_bo_size(struct intel_layout *layout,
876 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800877{
878 assert(params->max_x % layout->block_width == 0);
879 assert(params->max_y % layout->block_height == 0);
880 assert(layout->layer_height % layout->block_height == 0);
881
882 layout->bo_stride =
883 (params->max_x / layout->block_width) * layout->block_size;
884 layout->bo_height = params->max_y / layout->block_height;
885
886 while (true) {
887 unsigned w = layout->bo_stride, h = layout->bo_height;
888 unsigned align_w, align_h;
889
890 /*
891 * From the Haswell PRM, volume 5, page 163:
892 *
893 * "For linear surfaces, additional padding of 64 bytes is required
894 * at the bottom of the surface. This is in addition to the padding
895 * required above."
896 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800897 if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7.5) &&
898 (params->info->usage & XGL_IMAGE_USAGE_SHADER_ACCESS_READ_BIT) &&
Chia-I Wu4bc47012014-08-14 13:03:25 +0800899 layout->tiling == INTEL_TILING_NONE) {
900 layout->bo_height +=
901 (64 + layout->bo_stride - 1) / layout->bo_stride;
902 }
903
904 /*
905 * From the Sandy Bridge PRM, volume 4 part 1, page 81:
906 *
907 * "- For linear render target surfaces, the pitch must be a
908 * multiple of the element size for non-YUV surface formats.
909 * Pitch must be a multiple of 2 * element size for YUV surface
910 * formats.
911 * - For other linear surfaces, the pitch can be any multiple of
912 * bytes.
913 * - For tiled surfaces, the pitch must be a multiple of the tile
914 * width."
915 *
916 * Different requirements may exist when the bo is used in different
917 * places, but our alignments here should be good enough that we do not
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800918 * need to check layout->info->usage.
Chia-I Wu4bc47012014-08-14 13:03:25 +0800919 */
920 switch (layout->tiling) {
921 case INTEL_TILING_X:
922 align_w = 512;
923 align_h = 8;
924 break;
925 case INTEL_TILING_Y:
926 align_w = 128;
927 align_h = 32;
928 break;
929 default:
Chia-I Wu4806f2c2015-02-19 13:54:35 -0700930 if (layout->format == XGL_FMT_S8_UINT) {
Chia-I Wu4bc47012014-08-14 13:03:25 +0800931 /*
932 * From the Sandy Bridge PRM, volume 1 part 2, page 22:
933 *
934 * "A 4KB tile is subdivided into 8-high by 8-wide array of
935 * Blocks for W-Major Tiles (W Tiles). Each Block is 8 rows by 8
936 * bytes."
937 *
938 * Since we asked for INTEL_TILING_NONE instead of the non-existent
939 * INTEL_TILING_W, we want to align to W tiles here.
940 */
941 align_w = 64;
942 align_h = 64;
943 } else {
944 /* some good enough values */
945 align_w = 64;
946 align_h = 2;
947 }
948 break;
949 }
950
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800951 w = u_align(w, align_w);
952 h = u_align(h, align_h);
Chia-I Wu4bc47012014-08-14 13:03:25 +0800953
954 /* make sure the bo is mappable */
955 if (layout->tiling != INTEL_TILING_NONE) {
956 /*
957 * Usually only the first 256MB of the GTT is mappable.
958 *
959 * See also how intel_context::max_gtt_map_object_size is calculated.
960 */
961 const size_t mappable_gtt_size = 256 * 1024 * 1024;
962
963 /*
964 * Be conservative. We may be able to switch from VALIGN_4 to
965 * VALIGN_2 if the layout was Y-tiled, but let's keep it simple.
966 */
967 if (mappable_gtt_size / w / 4 < h) {
968 if (layout->valid_tilings & LAYOUT_TILING_NONE) {
969 layout->tiling = INTEL_TILING_NONE;
970 /* MCS support for non-MSRTs is limited to tiled RTs */
Chia-I Wu457d0a62014-08-18 13:02:26 +0800971 if (layout->aux == INTEL_LAYOUT_AUX_MCS &&
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800972 params->info->samples <= 1)
Chia-I Wu457d0a62014-08-18 13:02:26 +0800973 layout->aux = INTEL_LAYOUT_AUX_NONE;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800974
975 continue;
976 } else {
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800977 /* mapping will fail */
Chia-I Wu4bc47012014-08-14 13:03:25 +0800978 }
979 }
980 }
981
982 layout->bo_stride = w;
983 layout->bo_height = h;
984 break;
985 }
986}
987
988static void
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800989layout_calculate_hiz_size(struct intel_layout *layout,
990 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800991{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800992 const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu457d0a62014-08-18 13:02:26 +0800993 const unsigned hz_align_j = 8;
994 enum intel_layout_walk_type hz_walk;
995 unsigned hz_width, hz_height, lv;
996 unsigned hz_clear_w, hz_clear_h;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800997
Chia-I Wu457d0a62014-08-18 13:02:26 +0800998 assert(layout->aux == INTEL_LAYOUT_AUX_HIZ);
999
1000 assert(layout->walk == INTEL_LAYOUT_WALK_LAYER ||
1001 layout->walk == INTEL_LAYOUT_WALK_3D);
1002
1003 /*
1004 * From the Sandy Bridge PRM, volume 2 part 1, page 312:
1005 *
1006 * "The hierarchical depth buffer does not support the LOD field, it is
1007 * assumed by hardware to be zero. A separate hierarachical depth
1008 * buffer is required for each LOD used, and the corresponding
1009 * buffer's state delivered to hardware each time a new depth buffer
1010 * state with modified LOD is delivered."
1011 *
1012 * We will put all LODs in a single bo with INTEL_LAYOUT_WALK_LOD.
1013 */
1014 if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7))
1015 hz_walk = layout->walk;
1016 else
1017 hz_walk = INTEL_LAYOUT_WALK_LOD;
Chia-I Wu4bc47012014-08-14 13:03:25 +08001018
1019 /*
1020 * See the Sandy Bridge PRM, volume 2 part 1, page 312, and the Ivy Bridge
1021 * PRM, volume 2 part 1, page 312-313.
1022 *
1023 * It seems HiZ buffer is aligned to 8x8, with every two rows packed into a
1024 * memory row.
1025 */
Chia-I Wu457d0a62014-08-18 13:02:26 +08001026 switch (hz_walk) {
1027 case INTEL_LAYOUT_WALK_LOD:
1028 {
1029 unsigned lod_tx[INTEL_LAYOUT_MAX_LEVELS];
1030 unsigned lod_ty[INTEL_LAYOUT_MAX_LEVELS];
1031 unsigned cur_tx, cur_ty;
Chia-I Wu4bc47012014-08-14 13:03:25 +08001032
Chia-I Wu457d0a62014-08-18 13:02:26 +08001033 /* figure out the tile offsets of LODs */
1034 hz_width = 0;
1035 hz_height = 0;
1036 cur_tx = 0;
1037 cur_ty = 0;
1038 for (lv = 0; lv < info->mipLevels; lv++) {
1039 unsigned tw, th;
Chia-I Wu4bc47012014-08-14 13:03:25 +08001040
Chia-I Wu457d0a62014-08-18 13:02:26 +08001041 lod_tx[lv] = cur_tx;
1042 lod_ty[lv] = cur_ty;
Chia-I Wu4bc47012014-08-14 13:03:25 +08001043
Chia-I Wu457d0a62014-08-18 13:02:26 +08001044 tw = u_align(layout->lods[lv].slice_width, 16);
1045 th = u_align(layout->lods[lv].slice_height, hz_align_j) *
1046 info->arraySize / 2;
1047 /* convert to Y-tiles */
1048 tw = u_align(tw, 128) / 128;
1049 th = u_align(th, 32) / 32;
Chia-I Wu4bc47012014-08-14 13:03:25 +08001050
Chia-I Wu457d0a62014-08-18 13:02:26 +08001051 if (hz_width < cur_tx + tw)
1052 hz_width = cur_tx + tw;
1053 if (hz_height < cur_ty + th)
1054 hz_height = cur_ty + th;
1055
1056 if (lv == 1)
1057 cur_tx += tw;
1058 else
1059 cur_ty += th;
1060 }
1061
1062 /* convert tile offsets to memory offsets */
1063 for (lv = 0; lv < info->mipLevels; lv++) {
1064 layout->aux_offsets[lv] =
1065 (lod_ty[lv] * hz_width + lod_tx[lv]) * 4096;
1066 }
1067 hz_width *= 128;
1068 hz_height *= 32;
Chia-I Wu4bc47012014-08-14 13:03:25 +08001069 }
Chia-I Wu457d0a62014-08-18 13:02:26 +08001070 break;
1071 case INTEL_LAYOUT_WALK_LAYER:
1072 {
1073 const unsigned h0 = u_align(params->h0, hz_align_j);
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001074 const unsigned h1 = u_align(params->h1, hz_align_j);
Chia-I Wu4bc47012014-08-14 13:03:25 +08001075 const unsigned htail =
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001076 ((intel_gpu_gen(params->gpu) >= INTEL_GEN(7)) ? 12 : 11) * hz_align_j;
Chia-I Wu457d0a62014-08-18 13:02:26 +08001077 const unsigned hz_qpitch = h0 + h1 + htail;
Chia-I Wu4bc47012014-08-14 13:03:25 +08001078
Chia-I Wu457d0a62014-08-18 13:02:26 +08001079 hz_width = u_align(layout->lods[0].slice_width, 16);
1080
1081 hz_height = hz_qpitch * info->arraySize / 2;
1082 if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7))
1083 hz_height = u_align(hz_height, 8);
Chia-I Wu4bc47012014-08-14 13:03:25 +08001084 }
Chia-I Wu457d0a62014-08-18 13:02:26 +08001085 break;
1086 case INTEL_LAYOUT_WALK_3D:
1087 hz_width = u_align(layout->lods[0].slice_width, 16);
Chia-I Wu4bc47012014-08-14 13:03:25 +08001088
Chia-I Wu457d0a62014-08-18 13:02:26 +08001089 hz_height = 0;
1090 for (lv = 0; lv < info->mipLevels; lv++) {
1091 const unsigned h = u_align(layout->lods[lv].slice_height, hz_align_j);
1092 /* according to the formula, slices are packed together vertically */
1093 hz_height += h * u_minify(info->extent.depth, lv);
1094 }
1095 hz_height /= 2;
1096 break;
Chia-I Wu9a056dd2015-02-11 13:19:39 -07001097 default:
1098 assert(!"unknown layout walk");
1099 hz_width = 0;
1100 hz_height = 0;
1101 break;
Chia-I Wu4bc47012014-08-14 13:03:25 +08001102 }
1103
Chia-I Wu457d0a62014-08-18 13:02:26 +08001104 /*
1105 * In hiz_align_fb(), we will align the LODs to 8x4 sample blocks.
1106 * Experiments on Haswell show that aligning the RECTLIST primitive and
1107 * 3DSTATE_DRAWING_RECTANGLE alone are not enough. The LOD sizes must be
1108 * aligned.
1109 */
1110 hz_clear_w = 8;
1111 hz_clear_h = 4;
1112 switch (info->samples) {
1113 case 0:
1114 case 1:
1115 default:
1116 break;
1117 case 2:
1118 hz_clear_w /= 2;
1119 break;
1120 case 4:
1121 hz_clear_w /= 2;
1122 hz_clear_h /= 2;
1123 break;
1124 case 8:
1125 hz_clear_w /= 4;
1126 hz_clear_h /= 2;
1127 break;
1128 case 16:
1129 hz_clear_w /= 4;
1130 hz_clear_h /= 4;
1131 break;
1132 }
1133
1134 for (lv = 0; lv < info->mipLevels; lv++) {
1135 if (u_minify(layout->width0, lv) % hz_clear_w ||
1136 u_minify(layout->height0, lv) % hz_clear_h)
1137 break;
1138 layout->aux_enables |= 1 << lv;
1139 }
1140
1141 /* we padded to allow this in layout_align() */
1142 if (info->mipLevels == 1 && info->arraySize == 1 && info->extent.depth == 1)
1143 layout->aux_enables |= 0x1;
1144
Chia-I Wu4bc47012014-08-14 13:03:25 +08001145 /* align to Y-tile */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001146 layout->aux_stride = u_align(hz_width, 128);
1147 layout->aux_height = u_align(hz_height, 32);
Chia-I Wu4bc47012014-08-14 13:03:25 +08001148}
1149
1150static void
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001151layout_calculate_mcs_size(struct intel_layout *layout,
1152 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +08001153{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001154 const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu4bc47012014-08-14 13:03:25 +08001155 int mcs_width, mcs_height, mcs_cpp;
1156 int downscale_x, downscale_y;
1157
Chia-I Wu457d0a62014-08-18 13:02:26 +08001158 assert(layout->aux == INTEL_LAYOUT_AUX_MCS);
Chia-I Wu4bc47012014-08-14 13:03:25 +08001159
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001160 if (info->samples > 1) {
Chia-I Wu4bc47012014-08-14 13:03:25 +08001161 /*
1162 * From the Ivy Bridge PRM, volume 2 part 1, page 326, the clear
1163 * rectangle is scaled down by 8x2 for 4X MSAA and 2x2 for 8X MSAA. The
1164 * need of scale down could be that the clear rectangle is used to clear
1165 * the MCS instead of the RT.
1166 *
1167 * For 8X MSAA, we need 32 bits in MCS for every pixel in the RT. The
1168 * 2x2 factor could come from that the hardware writes 128 bits (an
1169 * OWord) at a time, and the OWord in MCS maps to a 2x2 pixel block in
1170 * the RT. For 4X MSAA, we need 8 bits in MCS for every pixel in the
1171 * RT. Similarly, we could reason that an OWord in 4X MCS maps to a 8x2
1172 * pixel block in the RT.
1173 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001174 switch (info->samples) {
Chia-I Wu4bc47012014-08-14 13:03:25 +08001175 case 2:
1176 case 4:
1177 downscale_x = 8;
1178 downscale_y = 2;
1179 mcs_cpp = 1;
1180 break;
1181 case 8:
1182 downscale_x = 2;
1183 downscale_y = 2;
1184 mcs_cpp = 4;
1185 break;
1186 case 16:
1187 downscale_x = 2;
1188 downscale_y = 1;
1189 mcs_cpp = 8;
1190 break;
1191 default:
1192 assert(!"unsupported sample count");
1193 return;
1194 break;
1195 }
1196
1197 /*
1198 * It also appears that the 2x2 subspans generated by the scaled-down
1199 * clear rectangle cannot be masked. The scale-down clear rectangle
1200 * thus must be aligned to 2x2, and we need to pad.
1201 */
Chia-I Wu457d0a62014-08-18 13:02:26 +08001202 mcs_width = u_align(layout->width0, downscale_x * 2);
1203 mcs_height = u_align(layout->height0, downscale_y * 2);
1204 } else {
Chia-I Wu4bc47012014-08-14 13:03:25 +08001205 /*
1206 * From the Ivy Bridge PRM, volume 2 part 1, page 327:
1207 *
1208 * " Pixels Lines
1209 * TiledY RT CL
1210 * bpp
1211 * 32 8 4
1212 * 64 4 4
1213 * 128 2 4
1214 *
1215 * TiledX RT CL
1216 * bpp
1217 * 32 16 2
1218 * 64 8 2
1219 * 128 4 2"
1220 *
1221 * This table and the two following tables define the RT alignments, the
1222 * clear rectangle alignments, and the clear rectangle scale factors.
1223 * Viewing the RT alignments as the sizes of 128-byte blocks, we can see
1224 * that the clear rectangle alignments are 16x32 blocks, and the clear
1225 * rectangle scale factors are 8x16 blocks.
1226 *
1227 * For non-MSAA RT, we need 1 bit in MCS for every 128-byte block in the
1228 * RT. Similar to the MSAA cases, we can argue that an OWord maps to
1229 * 8x16 blocks.
1230 *
1231 * One problem with this reasoning is that a Y-tile in MCS has 8x32
1232 * OWords and maps to 64x512 128-byte blocks. This differs from i965,
1233 * which says that a Y-tile maps to 128x256 blocks (\see
1234 * intel_get_non_msrt_mcs_alignment). It does not really change
1235 * anything except for the size of the allocated MCS. Let's see if we
1236 * hit out-of-bound access.
1237 */
1238 switch (layout->tiling) {
1239 case INTEL_TILING_X:
1240 downscale_x = 64 / layout->block_size;
1241 downscale_y = 2;
1242 break;
1243 case INTEL_TILING_Y:
1244 downscale_x = 32 / layout->block_size;
1245 downscale_y = 4;
1246 break;
1247 default:
1248 assert(!"unsupported tiling mode");
1249 return;
1250 break;
1251 }
1252
1253 downscale_x *= 8;
1254 downscale_y *= 16;
1255
1256 /*
1257 * From the Haswell PRM, volume 7, page 652:
1258 *
1259 * "Clear rectangle must be aligned to two times the number of
1260 * pixels in the table shown below due to 16X16 hashing across the
1261 * slice."
1262 *
1263 * The scaled-down clear rectangle must be aligned to 4x4 instead of
1264 * 2x2, and we need to pad.
1265 */
Chia-I Wu457d0a62014-08-18 13:02:26 +08001266 mcs_width = u_align(layout->width0, downscale_x * 4) / downscale_x;
1267 mcs_height = u_align(layout->height0, downscale_y * 4) / downscale_y;
Chia-I Wu4bc47012014-08-14 13:03:25 +08001268 mcs_cpp = 16; /* an OWord */
1269 }
1270
Chia-I Wu457d0a62014-08-18 13:02:26 +08001271 layout->aux_enables = (1 << info->mipLevels) - 1;
Chia-I Wu4bc47012014-08-14 13:03:25 +08001272 /* align to Y-tile */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001273 layout->aux_stride = u_align(mcs_width * mcs_cpp, 128);
1274 layout->aux_height = u_align(mcs_height, 32);
Chia-I Wu4bc47012014-08-14 13:03:25 +08001275}
1276
1277/**
1278 * Initialize the layout. Callers should zero-initialize \p layout first.
1279 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001280void intel_layout_init(struct intel_layout *layout,
Chia-I Wuc94f3e52014-10-07 14:45:05 +08001281 struct intel_dev *dev,
Chia-I Wu794d12a2014-09-15 14:55:25 +08001282 const XGL_IMAGE_CREATE_INFO *info,
1283 bool scanout)
Chia-I Wu4bc47012014-08-14 13:03:25 +08001284{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001285 struct intel_layout_params params;
Chia-I Wu4bc47012014-08-14 13:03:25 +08001286
1287 memset(&params, 0, sizeof(params));
Chia-I Wuc94f3e52014-10-07 14:45:05 +08001288 params.dev = dev;
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001289 params.gpu = dev->gpu;
1290 params.info = info;
Chia-I Wu794d12a2014-09-15 14:55:25 +08001291 params.scanout = scanout;
Chia-I Wu4bc47012014-08-14 13:03:25 +08001292
1293 /* note that there are dependencies between these functions */
1294 layout_init_aux(layout, &params);
Chia-I Wu457d0a62014-08-18 13:02:26 +08001295 layout_init_size_and_format(layout, &params);
1296 layout_init_walk(layout, &params);
Chia-I Wu4bc47012014-08-14 13:03:25 +08001297 layout_init_tiling(layout, &params);
1298 layout_init_alignments(layout, &params);
Chia-I Wu457d0a62014-08-18 13:02:26 +08001299 layout_init_lods(layout, &params);
Chia-I Wu4bc47012014-08-14 13:03:25 +08001300 layout_init_layer_height(layout, &params);
1301
1302 layout_align(layout, &params);
1303 layout_calculate_bo_size(layout, &params);
1304
Chia-I Wu457d0a62014-08-18 13:02:26 +08001305 switch (layout->aux) {
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001306 case INTEL_LAYOUT_AUX_HIZ:
Chia-I Wu4bc47012014-08-14 13:03:25 +08001307 layout_calculate_hiz_size(layout, &params);
1308 break;
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001309 case INTEL_LAYOUT_AUX_MCS:
Chia-I Wu4bc47012014-08-14 13:03:25 +08001310 layout_calculate_mcs_size(layout, &params);
1311 break;
1312 default:
1313 break;
1314 }
1315}
1316
1317/**
1318 * Update the tiling mode and bo stride (for imported resources).
1319 */
1320bool
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001321intel_layout_update_for_imported_bo(struct intel_layout *layout,
Chia-I Wu457d0a62014-08-18 13:02:26 +08001322 enum intel_tiling_mode tiling,
1323 unsigned bo_stride)
Chia-I Wu4bc47012014-08-14 13:03:25 +08001324{
1325 if (!(layout->valid_tilings & (1 << tiling)))
1326 return false;
1327
1328 if ((tiling == INTEL_TILING_X && bo_stride % 512) ||
1329 (tiling == INTEL_TILING_Y && bo_stride % 128))
1330 return false;
1331
1332 layout->tiling = tiling;
1333 layout->bo_stride = bo_stride;
1334
1335 return true;
1336}
Chia-I Wu63a53972014-12-04 12:51:54 +08001337
1338/**
1339 * Return the offset (in bytes) to a slice within the bo.
1340 *
1341 * The returned offset is aligned to tile size. Since slices are not
1342 * guaranteed to start at tile boundaries, the X and Y offsets (in pixels)
1343 * from the tile origin to the slice are also returned. X offset is always a
1344 * multiple of 4 and Y offset is always a multiple of 2.
1345 */
1346unsigned
1347intel_layout_get_slice_tile_offset(const struct intel_layout *layout,
1348 unsigned level, unsigned slice,
1349 unsigned *x_offset, unsigned *y_offset)
1350{
1351 unsigned tile_w, tile_h, tile_size, row_size;
1352 unsigned tile_offset, x, y;
1353
1354 /* see the Sandy Bridge PRM, volume 1 part 2, page 24 */
1355
1356 switch (layout->tiling) {
1357 case INTEL_TILING_NONE:
1358 /* W-tiled */
Jeremy Hayes2b7e88a2015-01-23 08:51:43 -07001359 if (layout->format == XGL_FMT_S8_UINT) {
Chia-I Wu63a53972014-12-04 12:51:54 +08001360 tile_w = 64;
1361 tile_h = 64;
1362 }
1363 else {
1364 tile_w = 1;
1365 tile_h = 1;
1366 }
1367 break;
1368 case INTEL_TILING_X:
1369 tile_w = 512;
1370 tile_h = 8;
1371 break;
1372 case INTEL_TILING_Y:
1373 tile_w = 128;
1374 tile_h = 32;
1375 break;
1376 default:
1377 assert(!"unknown tiling");
1378 tile_w = 1;
1379 tile_h = 1;
1380 break;
1381 }
1382
1383 tile_size = tile_w * tile_h;
1384 row_size = layout->bo_stride * tile_h;
1385
1386 intel_layout_get_slice_pos(layout, level, slice, &x, &y);
1387 /* in bytes */
1388 intel_layout_pos_to_mem(layout, x, y, &x, &y);
1389 tile_offset = row_size * (y / tile_h) + tile_size * (x / tile_w);
1390
1391 /*
1392 * Since tex->bo_stride is a multiple of tile_w, slice_offset should be
1393 * aligned at this point.
1394 */
1395 assert(tile_offset % tile_size == 0);
1396
1397 /*
1398 * because of the possible values of align_i and align_j in
1399 * tex_layout_init_alignments(), x_offset is guaranteed to be a multiple of
1400 * 4 and y_offset is guaranteed to be a multiple of 2.
1401 */
1402 if (x_offset) {
1403 /* in pixels */
1404 x = (x % tile_w) / layout->block_size * layout->block_width;
1405 assert(x % 4 == 0);
1406
1407 *x_offset = x;
1408 }
1409
1410 if (y_offset) {
1411 /* in pixels */
1412 y = (y % tile_h) * layout->block_height;
1413 assert(y % 2 == 0);
1414
1415 *y_offset = y;
1416 }
1417
1418 return tile_offset;
1419}