blob: 8cec97c5662b342bf4239da64ad91baf500cedc1 [file] [log] [blame]
Chia-I Wu4bc47012014-08-14 13:03:25 +08001/*
Chia-I Wu44e42362014-09-02 08:32:09 +08002 * XGL
Chia-I Wu4bc47012014-08-14 13:03:25 +08003 *
4 * Copyright (C) 2014 LunarG, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
Chia-I Wu44e42362014-09-02 08:32:09 +080025 * Chia-I Wu <olv@lunarg.com>
Chia-I Wu4bc47012014-08-14 13:03:25 +080026 */
27
Chia-I Wu8a8d8b62014-08-14 13:26:26 +080028#include "dev.h"
Chia-I Wu1bf06df2014-08-16 12:33:13 +080029#include "format.h"
Chia-I Wu8a8d8b62014-08-14 13:26:26 +080030#include "gpu.h"
31#include "layout.h"
Chia-I Wu4bc47012014-08-14 13:03:25 +080032
33enum {
34 LAYOUT_TILING_NONE = 1 << INTEL_TILING_NONE,
35 LAYOUT_TILING_X = 1 << INTEL_TILING_X,
36 LAYOUT_TILING_Y = 1 << INTEL_TILING_Y,
37 LAYOUT_TILING_W = 1 << (INTEL_TILING_Y + 1),
38
39 LAYOUT_TILING_ALL = (LAYOUT_TILING_NONE |
40 LAYOUT_TILING_X |
41 LAYOUT_TILING_Y |
42 LAYOUT_TILING_W)
43};
44
Chia-I Wu8a8d8b62014-08-14 13:26:26 +080045struct intel_layout_params {
Chia-I Wuc94f3e52014-10-07 14:45:05 +080046 struct intel_dev *dev;
47
Chia-I Wu8a8d8b62014-08-14 13:26:26 +080048 const struct intel_gpu *gpu;
49 const XGL_IMAGE_CREATE_INFO *info;
Chia-I Wu794d12a2014-09-15 14:55:25 +080050 bool scanout;
Chia-I Wu4bc47012014-08-14 13:03:25 +080051
52 bool compressed;
53
54 unsigned h0, h1;
55 unsigned max_x, max_y;
56};
57
58static void
Chia-I Wu8a8d8b62014-08-14 13:26:26 +080059layout_get_slice_size(const struct intel_layout *layout,
60 const struct intel_layout_params *params,
Chia-I Wu4bc47012014-08-14 13:03:25 +080061 unsigned level, unsigned *width, unsigned *height)
62{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +080063 const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu4bc47012014-08-14 13:03:25 +080064 unsigned w, h;
65
Chia-I Wu457d0a62014-08-18 13:02:26 +080066 w = u_minify(layout->width0, level);
67 h = u_minify(layout->height0, level);
Chia-I Wu4bc47012014-08-14 13:03:25 +080068
69 /*
70 * From the Sandy Bridge PRM, volume 1 part 1, page 114:
71 *
72 * "The dimensions of the mip maps are first determined by applying the
73 * sizing algorithm presented in Non-Power-of-Two Mipmaps above. Then,
74 * if necessary, they are padded out to compression block boundaries."
75 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +080076 w = u_align(w, layout->block_width);
77 h = u_align(h, layout->block_height);
Chia-I Wu4bc47012014-08-14 13:03:25 +080078
79 /*
80 * From the Sandy Bridge PRM, volume 1 part 1, page 111:
81 *
82 * "If the surface is multisampled (4x), these values must be adjusted
83 * as follows before proceeding:
84 *
85 * W_L = ceiling(W_L / 2) * 4
86 * H_L = ceiling(H_L / 2) * 4"
87 *
88 * From the Ivy Bridge PRM, volume 1 part 1, page 108:
89 *
90 * "If the surface is multisampled and it is a depth or stencil surface
91 * or Multisampled Surface StorageFormat in SURFACE_STATE is
92 * MSFMT_DEPTH_STENCIL, W_L and H_L must be adjusted as follows before
93 * proceeding:
94 *
95 * #samples W_L = H_L =
96 * 2 ceiling(W_L / 2) * 4 HL [no adjustment]
97 * 4 ceiling(W_L / 2) * 4 ceiling(H_L / 2) * 4
98 * 8 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 4
99 * 16 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 8"
100 *
101 * For interleaved samples (4x), where pixels
102 *
103 * (x, y ) (x+1, y )
104 * (x, y+1) (x+1, y+1)
105 *
106 * would be is occupied by
107 *
108 * (x, y , si0) (x+1, y , si0) (x, y , si1) (x+1, y , si1)
109 * (x, y+1, si0) (x+1, y+1, si0) (x, y+1, si1) (x+1, y+1, si1)
110 * (x, y , si2) (x+1, y , si2) (x, y , si3) (x+1, y , si3)
111 * (x, y+1, si2) (x+1, y+1, si2) (x, y+1, si3) (x+1, y+1, si3)
112 *
113 * Thus the need to
114 *
Chia-I Wu457d0a62014-08-18 13:02:26 +0800115 * w = align(w, 2) * 2;
116 * y = align(y, 2) * 2;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800117 */
118 if (layout->interleaved_samples) {
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800119 switch (info->samples) {
Chia-I Wu4bc47012014-08-14 13:03:25 +0800120 case 0:
121 case 1:
122 break;
123 case 2:
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800124 w = u_align(w, 2) * 2;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800125 break;
126 case 4:
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800127 w = u_align(w, 2) * 2;
128 h = u_align(h, 2) * 2;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800129 break;
130 case 8:
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800131 w = u_align(w, 2) * 4;
132 h = u_align(h, 2) * 2;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800133 break;
134 case 16:
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800135 w = u_align(w, 2) * 4;
136 h = u_align(h, 2) * 4;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800137 break;
138 default:
139 assert(!"unsupported sample count");
140 break;
141 }
142 }
143
Chia-I Wu457d0a62014-08-18 13:02:26 +0800144 /*
145 * From the Ivy Bridge PRM, volume 1 part 1, page 108:
146 *
147 * "For separate stencil buffer, the width must be mutiplied by 2 and
148 * height divided by 2..."
149 *
150 * To make things easier (for transfer), we will just double the stencil
151 * stride in 3DSTATE_STENCIL_BUFFER.
152 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800153 w = u_align(w, layout->align_i);
154 h = u_align(h, layout->align_j);
Chia-I Wu4bc47012014-08-14 13:03:25 +0800155
156 *width = w;
157 *height = h;
158}
159
160static unsigned
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800161layout_get_num_layers(const struct intel_layout *layout,
162 const struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800163{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800164 const XGL_IMAGE_CREATE_INFO *info = params->info;
165 unsigned num_layers = info->arraySize;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800166
167 /* samples of the same index are stored in a layer */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800168 if (info->samples > 1 && !layout->interleaved_samples)
169 num_layers *= info->samples;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800170
171 return num_layers;
172}
173
174static void
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800175layout_init_layer_height(struct intel_layout *layout,
176 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800177{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800178 const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800179 unsigned num_layers;
180
Chia-I Wu457d0a62014-08-18 13:02:26 +0800181 if (layout->walk != INTEL_LAYOUT_WALK_LAYER)
182 return;
183
Chia-I Wu4bc47012014-08-14 13:03:25 +0800184 num_layers = layout_get_num_layers(layout, params);
185 if (num_layers <= 1)
186 return;
187
Chia-I Wu4bc47012014-08-14 13:03:25 +0800188 /*
189 * From the Sandy Bridge PRM, volume 1 part 1, page 115:
190 *
191 * "The following equation is used for surface formats other than
192 * compressed textures:
193 *
194 * QPitch = (h0 + h1 + 11j)"
195 *
196 * "The equation for compressed textures (BC* and FXT1 surface formats)
197 * follows:
198 *
199 * QPitch = (h0 + h1 + 11j) / 4"
200 *
201 * "[DevSNB] Errata: Sampler MSAA Qpitch will be 4 greater than the
202 * value calculated in the equation above, for every other odd Surface
203 * Height starting from 1 i.e. 1,5,9,13"
204 *
205 * From the Ivy Bridge PRM, volume 1 part 1, page 111-112:
206 *
207 * "If Surface Array Spacing is set to ARYSPC_FULL (note that the depth
208 * buffer and stencil buffer have an implied value of ARYSPC_FULL):
209 *
210 * QPitch = (h0 + h1 + 12j)
211 * QPitch = (h0 + h1 + 12j) / 4 (compressed)
212 *
213 * (There are many typos or missing words here...)"
214 *
215 * To access the N-th slice, an offset of (Stride * QPitch * N) is added to
216 * the base address. The PRM divides QPitch by 4 for compressed formats
217 * because the block height for those formats are 4, and it wants QPitch to
218 * mean the number of memory rows, as opposed to texel rows, between
219 * slices. Since we use texel rows everywhere, we do not need to divide
220 * QPitch by 4.
221 */
222 layout->layer_height = params->h0 + params->h1 +
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800223 ((intel_gpu_gen(params->gpu) >= INTEL_GEN(7)) ? 12 : 11) * layout->align_j;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800224
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800225 if (intel_gpu_gen(params->gpu) == INTEL_GEN(6) && info->samples > 1 &&
Chia-I Wu457d0a62014-08-18 13:02:26 +0800226 layout->height0 % 4 == 1)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800227 layout->layer_height += 4;
228
229 params->max_y += layout->layer_height * (num_layers - 1);
230}
231
232static void
Chia-I Wu457d0a62014-08-18 13:02:26 +0800233layout_init_lods(struct intel_layout *layout,
234 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800235{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800236 const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800237 unsigned cur_x, cur_y;
238 unsigned lv;
239
240 cur_x = 0;
241 cur_y = 0;
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800242 for (lv = 0; lv < info->mipLevels; lv++) {
Chia-I Wu457d0a62014-08-18 13:02:26 +0800243 unsigned lod_w, lod_h;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800244
Chia-I Wu457d0a62014-08-18 13:02:26 +0800245 layout_get_slice_size(layout, params, lv, &lod_w, &lod_h);
Chia-I Wu4bc47012014-08-14 13:03:25 +0800246
Chia-I Wu457d0a62014-08-18 13:02:26 +0800247 layout->lods[lv].x = cur_x;
248 layout->lods[lv].y = cur_y;
249 layout->lods[lv].slice_width = lod_w;
250 layout->lods[lv].slice_height = lod_h;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800251
Chia-I Wu457d0a62014-08-18 13:02:26 +0800252 switch (layout->walk) {
253 case INTEL_LAYOUT_WALK_LOD:
254 lod_h *= layout_get_num_layers(layout, params);
255 if (lv == 1)
256 cur_x += lod_w;
257 else
258 cur_y += lod_h;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800259
Chia-I Wu457d0a62014-08-18 13:02:26 +0800260 /* every LOD begins at tile boundaries */
261 if (info->mipLevels > 1) {
Chia-I Wu4806f2c2015-02-19 13:54:35 -0700262 assert(layout->format == XGL_FMT_S8_UINT);
Chia-I Wu457d0a62014-08-18 13:02:26 +0800263 cur_x = u_align(cur_x, 64);
264 cur_y = u_align(cur_y, 64);
265 }
266 break;
267 case INTEL_LAYOUT_WALK_LAYER:
Chia-I Wu4bc47012014-08-14 13:03:25 +0800268 /* MIPLAYOUT_BELOW */
269 if (lv == 1)
Chia-I Wu457d0a62014-08-18 13:02:26 +0800270 cur_x += lod_w;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800271 else
Chia-I Wu457d0a62014-08-18 13:02:26 +0800272 cur_y += lod_h;
273 break;
274 case INTEL_LAYOUT_WALK_3D:
275 {
276 const unsigned num_slices = u_minify(info->extent.depth, lv);
277 const unsigned num_slices_per_row = 1 << lv;
278 const unsigned num_rows =
279 (num_slices + num_slices_per_row - 1) / num_slices_per_row;
280
281 lod_w *= num_slices_per_row;
282 lod_h *= num_rows;
283
284 cur_y += lod_h;
285 }
286 break;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800287 }
288
Chia-I Wu457d0a62014-08-18 13:02:26 +0800289 if (params->max_x < layout->lods[lv].x + lod_w)
290 params->max_x = layout->lods[lv].x + lod_w;
291 if (params->max_y < layout->lods[lv].y + lod_h)
292 params->max_y = layout->lods[lv].y + lod_h;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800293 }
294
Chia-I Wu457d0a62014-08-18 13:02:26 +0800295 if (layout->walk == INTEL_LAYOUT_WALK_LAYER) {
296 params->h0 = layout->lods[0].slice_height;
297
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800298 if (info->mipLevels > 1)
Chia-I Wu457d0a62014-08-18 13:02:26 +0800299 params->h1 = layout->lods[1].slice_height;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800300 else
301 layout_get_slice_size(layout, params, 1, &cur_x, &params->h1);
302 }
303}
304
305static void
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800306layout_init_alignments(struct intel_layout *layout,
307 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800308{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800309 const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800310
311 /*
312 * From the Sandy Bridge PRM, volume 1 part 1, page 113:
313 *
314 * "surface format align_i align_j
315 * YUV 4:2:2 formats 4 *see below
316 * BC1-5 4 4
317 * FXT1 8 4
318 * all other formats 4 *see below"
319 *
320 * "- align_j = 4 for any depth buffer
321 * - align_j = 2 for separate stencil buffer
322 * - align_j = 4 for any render target surface is multisampled (4x)
323 * - align_j = 4 for any render target surface with Surface Vertical
324 * Alignment = VALIGN_4
325 * - align_j = 2 for any render target surface with Surface Vertical
326 * Alignment = VALIGN_2
327 * - align_j = 2 for all other render target surface
328 * - align_j = 2 for any sampling engine surface with Surface Vertical
329 * Alignment = VALIGN_2
330 * - align_j = 4 for any sampling engine surface with Surface Vertical
331 * Alignment = VALIGN_4"
332 *
333 * From the Sandy Bridge PRM, volume 4 part 1, page 86:
334 *
335 * "This field (Surface Vertical Alignment) must be set to VALIGN_2 if
336 * the Surface Format is 96 bits per element (BPE)."
337 *
338 * They can be rephrased as
339 *
340 * align_i align_j
341 * compressed formats block width block height
342 * PIPE_FORMAT_S8_UINT 4 2
343 * other depth/stencil formats 4 4
344 * 4x multisampled 4 4
345 * bpp 96 4 2
346 * others 4 2 or 4
347 */
348
349 /*
350 * From the Ivy Bridge PRM, volume 1 part 1, page 110:
351 *
352 * "surface defined by surface format align_i align_j
353 * 3DSTATE_DEPTH_BUFFER D16_UNORM 8 4
354 * not D16_UNORM 4 4
355 * 3DSTATE_STENCIL_BUFFER N/A 8 8
356 * SURFACE_STATE BC*, ETC*, EAC* 4 4
357 * FXT1 8 4
358 * all others (set by SURFACE_STATE)"
359 *
360 * From the Ivy Bridge PRM, volume 4 part 1, page 63:
361 *
362 * "- This field (Surface Vertical Aligment) is intended to be set to
363 * VALIGN_4 if the surface was rendered as a depth buffer, for a
364 * multisampled (4x) render target, or for a multisampled (8x)
365 * render target, since these surfaces support only alignment of 4.
366 * - Use of VALIGN_4 for other surfaces is supported, but uses more
367 * memory.
368 * - This field must be set to VALIGN_4 for all tiled Y Render Target
369 * surfaces.
370 * - Value of 1 is not supported for format YCRCB_NORMAL (0x182),
371 * YCRCB_SWAPUVY (0x183), YCRCB_SWAPUV (0x18f), YCRCB_SWAPY (0x190)
372 * - If Number of Multisamples is not MULTISAMPLECOUNT_1, this field
373 * must be set to VALIGN_4."
374 * - VALIGN_4 is not supported for surface format R32G32B32_FLOAT."
375 *
376 * "- This field (Surface Horizontal Aligment) is intended to be set to
377 * HALIGN_8 only if the surface was rendered as a depth buffer with
378 * Z16 format or a stencil buffer, since these surfaces support only
379 * alignment of 8.
380 * - Use of HALIGN_8 for other surfaces is supported, but uses more
381 * memory.
382 * - This field must be set to HALIGN_4 if the Surface Format is BC*.
383 * - This field must be set to HALIGN_8 if the Surface Format is
384 * FXT1."
385 *
386 * They can be rephrased as
387 *
388 * align_i align_j
389 * compressed formats block width block height
390 * PIPE_FORMAT_Z16_UNORM 8 4
391 * PIPE_FORMAT_S8_UINT 8 8
Chia-I Wu457d0a62014-08-18 13:02:26 +0800392 * other depth/stencil formats 4 4
Chia-I Wu4bc47012014-08-14 13:03:25 +0800393 * 2x or 4x multisampled 4 or 8 4
394 * tiled Y 4 or 8 4 (if rt)
395 * PIPE_FORMAT_R32G32B32_FLOAT 4 or 8 2
396 * others 4 or 8 2 or 4
397 */
398
399 if (params->compressed) {
400 /* this happens to be the case */
401 layout->align_i = layout->block_width;
402 layout->align_j = layout->block_height;
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800403 } else if (info->usage & XGL_IMAGE_USAGE_DEPTH_STENCIL_BIT) {
404 if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7)) {
Jeremy Hayes2b7e88a2015-01-23 08:51:43 -0700405 switch (layout->format) {
406 case XGL_FMT_D16_UNORM:
Chia-I Wu4bc47012014-08-14 13:03:25 +0800407 layout->align_i = 8;
408 layout->align_j = 4;
409 break;
Jeremy Hayes2b7e88a2015-01-23 08:51:43 -0700410 case XGL_FMT_S8_UINT:
Chia-I Wu4bc47012014-08-14 13:03:25 +0800411 layout->align_i = 8;
412 layout->align_j = 8;
413 break;
414 default:
415 layout->align_i = 4;
416 layout->align_j = 4;
417 break;
418 }
419 } else {
Jeremy Hayes2b7e88a2015-01-23 08:51:43 -0700420 switch (layout->format) {
421 case XGL_FMT_S8_UINT:
Chia-I Wu4bc47012014-08-14 13:03:25 +0800422 layout->align_i = 4;
423 layout->align_j = 2;
424 break;
425 default:
426 layout->align_i = 4;
427 layout->align_j = 4;
428 break;
429 }
430 }
431 } else {
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800432 const bool valign_4 = (info->samples > 1) ||
433 (intel_gpu_gen(params->gpu) >= INTEL_GEN(7) &&
Chia-I Wu4bc47012014-08-14 13:03:25 +0800434 layout->tiling == INTEL_TILING_Y &&
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800435 (info->usage & XGL_IMAGE_USAGE_COLOR_ATTACHMENT_BIT));
Chia-I Wu4bc47012014-08-14 13:03:25 +0800436
437 if (valign_4)
438 assert(layout->block_size != 12);
439
440 layout->align_i = 4;
441 layout->align_j = (valign_4) ? 4 : 2;
442 }
443
444 /*
445 * the fact that align i and j are multiples of block width and height
446 * respectively is what makes the size of the bo a multiple of the block
447 * size, slices start at block boundaries, and many of the computations
448 * work.
449 */
450 assert(layout->align_i % layout->block_width == 0);
451 assert(layout->align_j % layout->block_height == 0);
452
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800453 /* make sure u_align() works */
454 assert(u_is_pow2(layout->align_i) &&
455 u_is_pow2(layout->align_j));
456 assert(u_is_pow2(layout->block_width) &&
457 u_is_pow2(layout->block_height));
Chia-I Wu4bc47012014-08-14 13:03:25 +0800458}
459
460static unsigned
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800461layout_get_valid_tilings(const struct intel_layout *layout,
462 const struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800463{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800464 const XGL_IMAGE_CREATE_INFO *info = params->info;
465 const XGL_FORMAT format = layout->format;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800466 unsigned valid_tilings = LAYOUT_TILING_ALL;
467
Chia-I Wu794d12a2014-09-15 14:55:25 +0800468 /*
469 * From the Sandy Bridge PRM, volume 1 part 2, page 32:
470 *
471 * "Display/Overlay Y-Major not supported.
472 * X-Major required for Async Flips"
473 */
474 if (params->scanout)
475 valid_tilings &= LAYOUT_TILING_X;
476
Chia-I Wu6ac93992014-08-30 18:23:28 +0800477 if (info->tiling == XGL_LINEAR_TILING)
478 valid_tilings &= LAYOUT_TILING_NONE;
479
Chia-I Wu4bc47012014-08-14 13:03:25 +0800480 /*
Chia-I Wu4bc47012014-08-14 13:03:25 +0800481 * From the Sandy Bridge PRM, volume 2 part 1, page 318:
482 *
483 * "[DevSNB+]: This field (Tiled Surface) must be set to TRUE. Linear
484 * Depth Buffer is not supported."
485 *
486 * "The Depth Buffer, if tiled, must use Y-Major tiling."
487 *
488 * From the Sandy Bridge PRM, volume 1 part 2, page 22:
489 *
490 * "W-Major Tile Format is used for separate stencil."
491 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800492 if (info->usage & XGL_IMAGE_USAGE_DEPTH_STENCIL_BIT) {
Jeremy Hayes2b7e88a2015-01-23 08:51:43 -0700493 switch (format) {
494 case XGL_FMT_S8_UINT:
Chia-I Wu4bc47012014-08-14 13:03:25 +0800495 valid_tilings &= LAYOUT_TILING_W;
496 break;
497 default:
498 valid_tilings &= LAYOUT_TILING_Y;
499 break;
500 }
501 }
502
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800503 if (info->usage & XGL_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) {
Chia-I Wu4bc47012014-08-14 13:03:25 +0800504 /*
505 * From the Sandy Bridge PRM, volume 1 part 2, page 32:
506 *
507 * "NOTE: 128BPE Format Color buffer ( render target ) MUST be
508 * either TileX or Linear."
509 */
510 if (layout->block_size == 16)
511 valid_tilings &= ~LAYOUT_TILING_Y;
512
513 /*
514 * From the Ivy Bridge PRM, volume 4 part 1, page 63:
515 *
516 * "This field (Surface Vertical Aligment) must be set to VALIGN_4
517 * for all tiled Y Render Target surfaces."
518 *
519 * "VALIGN_4 is not supported for surface format R32G32B32_FLOAT."
520 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800521 if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7) && layout->block_size == 12)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800522 valid_tilings &= ~LAYOUT_TILING_Y;
523 }
524
525 /* no conflicting binding flags */
526 assert(valid_tilings);
527
528 return valid_tilings;
529}
530
531static void
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800532layout_init_tiling(struct intel_layout *layout,
533 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800534{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800535 const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800536 unsigned valid_tilings = layout_get_valid_tilings(layout, params);
537
Chia-I Wu457d0a62014-08-18 13:02:26 +0800538 /* no hardware support for W-tile */
539 if (valid_tilings & LAYOUT_TILING_W)
540 valid_tilings = (valid_tilings & ~LAYOUT_TILING_W) | LAYOUT_TILING_NONE;
541
Chia-I Wu4bc47012014-08-14 13:03:25 +0800542 layout->valid_tilings = valid_tilings;
543
Chia-I Wu457d0a62014-08-18 13:02:26 +0800544 if (info->usage & (XGL_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |
545 XGL_IMAGE_USAGE_SHADER_ACCESS_READ_BIT)) {
Chia-I Wu4bc47012014-08-14 13:03:25 +0800546 /*
547 * heuristically set a minimum width/height for enabling tiling
548 */
Chia-I Wu457d0a62014-08-18 13:02:26 +0800549 if (layout->width0 < 64 && (valid_tilings & ~LAYOUT_TILING_X))
Chia-I Wu4bc47012014-08-14 13:03:25 +0800550 valid_tilings &= ~LAYOUT_TILING_X;
551
Chia-I Wu457d0a62014-08-18 13:02:26 +0800552 if ((layout->width0 < 32 || layout->height0 < 16) &&
553 (layout->width0 < 16 || layout->height0 < 32) &&
Chia-I Wu4bc47012014-08-14 13:03:25 +0800554 (valid_tilings & ~LAYOUT_TILING_Y))
555 valid_tilings &= ~LAYOUT_TILING_Y;
556 } else {
557 /* force linear if we are not sure where the texture is bound to */
558 if (valid_tilings & LAYOUT_TILING_NONE)
559 valid_tilings &= LAYOUT_TILING_NONE;
560 }
561
562 /* prefer tiled over linear */
563 if (valid_tilings & LAYOUT_TILING_Y)
564 layout->tiling = INTEL_TILING_Y;
565 else if (valid_tilings & LAYOUT_TILING_X)
566 layout->tiling = INTEL_TILING_X;
Chia-I Wu457d0a62014-08-18 13:02:26 +0800567 else
Chia-I Wu4bc47012014-08-14 13:03:25 +0800568 layout->tiling = INTEL_TILING_NONE;
569}
570
571static void
Chia-I Wu457d0a62014-08-18 13:02:26 +0800572layout_init_walk_gen7(struct intel_layout *layout,
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800573 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800574{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800575 const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800576
577 /*
578 * It is not explicitly states, but render targets are expected to be
579 * UMS/CMS (samples non-interleaved) and depth/stencil buffers are expected
580 * to be IMS (samples interleaved).
581 *
582 * See "Multisampled Surface Storage Format" field of SURFACE_STATE.
583 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800584 if (info->usage & XGL_IMAGE_USAGE_DEPTH_STENCIL_BIT) {
Chia-I Wu4bc47012014-08-14 13:03:25 +0800585 /*
586 * From the Ivy Bridge PRM, volume 1 part 1, page 111:
587 *
588 * "note that the depth buffer and stencil buffer have an implied
589 * value of ARYSPC_FULL"
590 */
Chia-I Wu457d0a62014-08-18 13:02:26 +0800591 layout->walk = (info->imageType == XGL_IMAGE_3D) ?
592 INTEL_LAYOUT_WALK_3D : INTEL_LAYOUT_WALK_LAYER;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800593
Chia-I Wu457d0a62014-08-18 13:02:26 +0800594 layout->interleaved_samples = true;
595 } else {
Chia-I Wu4bc47012014-08-14 13:03:25 +0800596 /*
597 * From the Ivy Bridge PRM, volume 4 part 1, page 66:
598 *
599 * "If Multisampled Surface Storage Format is MSFMT_MSS and Number
600 * of Multisamples is not MULTISAMPLECOUNT_1, this field (Surface
601 * Array Spacing) must be set to ARYSPC_LOD0."
602 *
603 * As multisampled resources are not mipmapped, we never use
604 * ARYSPC_FULL for them.
605 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800606 if (info->samples > 1)
607 assert(info->mipLevels == 1);
Chia-I Wu457d0a62014-08-18 13:02:26 +0800608
609 layout->walk =
610 (info->imageType == XGL_IMAGE_3D) ? INTEL_LAYOUT_WALK_3D :
611 (info->mipLevels > 1) ? INTEL_LAYOUT_WALK_LAYER :
612 INTEL_LAYOUT_WALK_LOD;
613
614 layout->interleaved_samples = false;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800615 }
616}
617
618static void
Chia-I Wu457d0a62014-08-18 13:02:26 +0800619layout_init_walk_gen6(struct intel_layout *layout,
620 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800621{
Chia-I Wu4bc47012014-08-14 13:03:25 +0800622 /*
623 * From the Sandy Bridge PRM, volume 1 part 1, page 115:
624 *
625 * "The separate stencil buffer does not support mip mapping, thus the
626 * storage for LODs other than LOD 0 is not needed. The following
627 * QPitch equation applies only to the separate stencil buffer:
628 *
629 * QPitch = h_0"
630 *
631 * GEN6 does not support compact spacing otherwise.
632 */
Chia-I Wu457d0a62014-08-18 13:02:26 +0800633 layout->walk =
634 (params->info->imageType == XGL_IMAGE_3D) ? INTEL_LAYOUT_WALK_3D :
Chia-I Wu4806f2c2015-02-19 13:54:35 -0700635 (layout->format == XGL_FMT_S8_UINT) ? INTEL_LAYOUT_WALK_LOD :
Chia-I Wu457d0a62014-08-18 13:02:26 +0800636 INTEL_LAYOUT_WALK_LAYER;
637
638 /* GEN6 supports only interleaved samples */
639 layout->interleaved_samples = true;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800640}
641
642static void
Chia-I Wu457d0a62014-08-18 13:02:26 +0800643layout_init_walk(struct intel_layout *layout,
644 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800645{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800646 if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7))
Chia-I Wu457d0a62014-08-18 13:02:26 +0800647 layout_init_walk_gen7(layout, params);
Chia-I Wu4bc47012014-08-14 13:03:25 +0800648 else
Chia-I Wu457d0a62014-08-18 13:02:26 +0800649 layout_init_walk_gen6(layout, params);
Chia-I Wu4bc47012014-08-14 13:03:25 +0800650}
651
652static void
Chia-I Wu457d0a62014-08-18 13:02:26 +0800653layout_init_size_and_format(struct intel_layout *layout,
654 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800655{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800656 const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu457d0a62014-08-18 13:02:26 +0800657 XGL_FORMAT format = info->format;
Chia-I Wu9a056dd2015-02-11 13:19:39 -0700658 bool require_separate_stencil = false;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800659
Chia-I Wu457d0a62014-08-18 13:02:26 +0800660 layout->width0 = info->extent.width;
661 layout->height0 = info->extent.height;
662
Chia-I Wu4bc47012014-08-14 13:03:25 +0800663 /*
664 * From the Sandy Bridge PRM, volume 2 part 1, page 317:
665 *
666 * "This field (Separate Stencil Buffer Enable) must be set to the same
667 * value (enabled or disabled) as Hierarchical Depth Buffer Enable."
668 *
669 * GEN7+ requires separate stencil buffers.
670 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800671 if (info->usage & XGL_IMAGE_USAGE_DEPTH_STENCIL_BIT) {
672 if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7))
Chia-I Wu4bc47012014-08-14 13:03:25 +0800673 require_separate_stencil = true;
674 else
Chia-I Wu457d0a62014-08-18 13:02:26 +0800675 require_separate_stencil = (layout->aux == INTEL_LAYOUT_AUX_HIZ);
Chia-I Wu4bc47012014-08-14 13:03:25 +0800676 }
677
Jeremy Hayes2b7e88a2015-01-23 08:51:43 -0700678 if (icd_format_is_ds(format)) {
679 switch (format) {
680 case XGL_FMT_D32_SFLOAT_S8_UINT:
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800681 if (require_separate_stencil) {
Jeremy Hayes2b7e88a2015-01-23 08:51:43 -0700682 format = XGL_FMT_D32_SFLOAT;
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800683 layout->separate_stencil = true;
684 }
685 break;
686 default:
687 break;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800688 }
Chia-I Wu4bc47012014-08-14 13:03:25 +0800689 }
690
Chia-I Wu4bc47012014-08-14 13:03:25 +0800691 layout->format = format;
Chia-I Wu1bf06df2014-08-16 12:33:13 +0800692 layout->block_width = icd_format_get_block_width(format);
693 layout->block_height = layout->block_width;
694 layout->block_size = icd_format_get_size(format);
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800695
Chia-I Wu1bf06df2014-08-16 12:33:13 +0800696 params->compressed = icd_format_is_compressed(format);
Chia-I Wu4bc47012014-08-14 13:03:25 +0800697}
698
699static bool
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800700layout_want_mcs(struct intel_layout *layout,
701 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800702{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800703 const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800704 bool want_mcs = false;
705
706 /* MCS is for RT on GEN7+ */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800707 if (intel_gpu_gen(params->gpu) < INTEL_GEN(7))
Chia-I Wu4bc47012014-08-14 13:03:25 +0800708 return false;
709
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800710 if (info->imageType != XGL_IMAGE_2D ||
711 !(info->usage & XGL_IMAGE_USAGE_COLOR_ATTACHMENT_BIT))
Chia-I Wu4bc47012014-08-14 13:03:25 +0800712 return false;
713
714 /*
715 * From the Ivy Bridge PRM, volume 4 part 1, page 77:
716 *
717 * "For Render Target and Sampling Engine Surfaces:If the surface is
718 * multisampled (Number of Multisamples any value other than
719 * MULTISAMPLECOUNT_1), this field (MCS Enable) must be enabled."
720 *
721 * "This field must be set to 0 for all SINT MSRTs when all RT channels
722 * are not written"
723 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800724 if (info->samples > 1 && !layout->interleaved_samples &&
Chia-I Wu457d0a62014-08-18 13:02:26 +0800725 !icd_format_is_int(info->format)) {
Chia-I Wu4bc47012014-08-14 13:03:25 +0800726 want_mcs = true;
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800727 } else if (info->samples <= 1) {
Chia-I Wu4bc47012014-08-14 13:03:25 +0800728 /*
729 * From the Ivy Bridge PRM, volume 2 part 1, page 326:
730 *
731 * "When MCS is buffer is used for color clear of non-multisampler
732 * render target, the following restrictions apply.
733 * - Support is limited to tiled render targets.
734 * - Support is for non-mip-mapped and non-array surface types
735 * only.
736 * - Clear is supported only on the full RT; i.e., no partial clear
737 * or overlapping clears.
738 * - MCS buffer for non-MSRT is supported only for RT formats
739 * 32bpp, 64bpp and 128bpp.
740 * ..."
741 */
742 if (layout->tiling != INTEL_TILING_NONE &&
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800743 info->mipLevels == 1 && info->arraySize == 1) {
Chia-I Wu4bc47012014-08-14 13:03:25 +0800744 switch (layout->block_size) {
745 case 4:
746 case 8:
747 case 16:
748 want_mcs = true;
749 break;
750 default:
751 break;
752 }
753 }
754 }
755
756 return want_mcs;
757}
758
759static bool
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800760layout_want_hiz(const struct intel_layout *layout,
761 const struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800762{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800763 const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800764
Chia-I Wuc45db532015-02-19 11:20:38 -0700765 if (intel_debug & INTEL_DEBUG_NOHIZ)
766 return false;
767
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800768 if (!(info->usage & XGL_IMAGE_USAGE_DEPTH_STENCIL_BIT))
Chia-I Wu4bc47012014-08-14 13:03:25 +0800769 return false;
770
Chia-I Wu9ec7e702015-02-19 13:18:42 -0700771 if (!intel_format_has_depth(params->gpu, info->format))
Chia-I Wu4bc47012014-08-14 13:03:25 +0800772 return false;
773
Chia-I Wu457d0a62014-08-18 13:02:26 +0800774 /*
Chia-I Wuc45db532015-02-19 11:20:38 -0700775 * HiZ implies separate stencil on Gen6. We do not want to copy stencils
776 * values between combined and separate stencil buffers when HiZ is enabled
777 * or disabled.
Chia-I Wu457d0a62014-08-18 13:02:26 +0800778 */
Chia-I Wuc45db532015-02-19 11:20:38 -0700779 if (intel_gpu_gen(params->gpu) == INTEL_GEN(6))
Chia-I Wuc94f3e52014-10-07 14:45:05 +0800780 return false;
Chia-I Wuc94f3e52014-10-07 14:45:05 +0800781
Chia-I Wu457d0a62014-08-18 13:02:26 +0800782 return true;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800783}
784
785static void
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800786layout_init_aux(struct intel_layout *layout,
787 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800788{
789 if (layout_want_hiz(layout, params))
Chia-I Wu457d0a62014-08-18 13:02:26 +0800790 layout->aux = INTEL_LAYOUT_AUX_HIZ;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800791 else if (layout_want_mcs(layout, params))
Chia-I Wu457d0a62014-08-18 13:02:26 +0800792 layout->aux = INTEL_LAYOUT_AUX_MCS;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800793}
794
795static void
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800796layout_align(struct intel_layout *layout, struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800797{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800798 const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800799 int align_w = 1, align_h = 1, pad_h = 0;
800
801 /*
802 * From the Sandy Bridge PRM, volume 1 part 1, page 118:
803 *
804 * "To determine the necessary padding on the bottom and right side of
805 * the surface, refer to the table in Section 7.18.3.4 for the i and j
806 * parameters for the surface format in use. The surface must then be
807 * extended to the next multiple of the alignment unit size in each
808 * dimension, and all texels contained in this extended surface must
809 * have valid GTT entries."
810 *
811 * "For cube surfaces, an additional two rows of padding are required
812 * at the bottom of the surface. This must be ensured regardless of
813 * whether the surface is stored tiled or linear. This is due to the
814 * potential rotation of cache line orientation from memory to cache."
815 *
816 * "For compressed textures (BC* and FXT1 surface formats), padding at
817 * the bottom of the surface is to an even compressed row, which is
818 * equal to a multiple of 8 uncompressed texel rows. Thus, for padding
819 * purposes, these surfaces behave as if j = 8 only for surface
820 * padding purposes. The value of 4 for j still applies for mip level
821 * alignment and QPitch calculation."
822 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800823 if (info->usage & XGL_IMAGE_USAGE_SHADER_ACCESS_READ_BIT) {
824 if (align_w < layout->align_i)
825 align_w = layout->align_i;
826 if (align_h < layout->align_j)
827 align_h = layout->align_j;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800828
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800829 /* in case it is used as a cube */
830 if (info->imageType == XGL_IMAGE_2D)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800831 pad_h += 2;
832
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800833 if (params->compressed && align_h < layout->align_j * 2)
834 align_h = layout->align_j * 2;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800835 }
836
837 /*
838 * From the Sandy Bridge PRM, volume 1 part 1, page 118:
839 *
840 * "If the surface contains an odd number of rows of data, a final row
841 * below the surface must be allocated."
842 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800843 if ((info->usage & XGL_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) && align_h < 2)
844 align_h = 2;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800845
846 /*
847 * Depth Buffer Clear/Resolve works in 8x4 sample blocks. In
Chia-I Wu457d0a62014-08-18 13:02:26 +0800848 * intel_texture_can_enable_hiz(), we always return true for the first slice.
Chia-I Wu4bc47012014-08-14 13:03:25 +0800849 * To avoid out-of-bound access, we have to pad.
850 */
Chia-I Wu457d0a62014-08-18 13:02:26 +0800851 if (layout->aux == INTEL_LAYOUT_AUX_HIZ &&
852 info->mipLevels == 1 &&
853 info->arraySize == 1 &&
854 info->extent.depth == 1) {
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800855 if (align_w < 8)
856 align_w = 8;
857 if (align_h < 4)
858 align_h = 4;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800859 }
860
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800861 params->max_x = u_align(params->max_x, align_w);
862 params->max_y = u_align(params->max_y + pad_h, align_h);
Chia-I Wu4bc47012014-08-14 13:03:25 +0800863}
864
865/* note that this may force the texture to be linear */
866static void
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800867layout_calculate_bo_size(struct intel_layout *layout,
868 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800869{
870 assert(params->max_x % layout->block_width == 0);
871 assert(params->max_y % layout->block_height == 0);
872 assert(layout->layer_height % layout->block_height == 0);
873
874 layout->bo_stride =
875 (params->max_x / layout->block_width) * layout->block_size;
876 layout->bo_height = params->max_y / layout->block_height;
877
878 while (true) {
879 unsigned w = layout->bo_stride, h = layout->bo_height;
880 unsigned align_w, align_h;
881
882 /*
883 * From the Haswell PRM, volume 5, page 163:
884 *
885 * "For linear surfaces, additional padding of 64 bytes is required
886 * at the bottom of the surface. This is in addition to the padding
887 * required above."
888 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800889 if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7.5) &&
890 (params->info->usage & XGL_IMAGE_USAGE_SHADER_ACCESS_READ_BIT) &&
Chia-I Wu4bc47012014-08-14 13:03:25 +0800891 layout->tiling == INTEL_TILING_NONE) {
892 layout->bo_height +=
893 (64 + layout->bo_stride - 1) / layout->bo_stride;
894 }
895
896 /*
897 * From the Sandy Bridge PRM, volume 4 part 1, page 81:
898 *
899 * "- For linear render target surfaces, the pitch must be a
900 * multiple of the element size for non-YUV surface formats.
901 * Pitch must be a multiple of 2 * element size for YUV surface
902 * formats.
903 * - For other linear surfaces, the pitch can be any multiple of
904 * bytes.
905 * - For tiled surfaces, the pitch must be a multiple of the tile
906 * width."
907 *
908 * Different requirements may exist when the bo is used in different
909 * places, but our alignments here should be good enough that we do not
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800910 * need to check layout->info->usage.
Chia-I Wu4bc47012014-08-14 13:03:25 +0800911 */
912 switch (layout->tiling) {
913 case INTEL_TILING_X:
914 align_w = 512;
915 align_h = 8;
916 break;
917 case INTEL_TILING_Y:
918 align_w = 128;
919 align_h = 32;
920 break;
921 default:
Chia-I Wu4806f2c2015-02-19 13:54:35 -0700922 if (layout->format == XGL_FMT_S8_UINT) {
Chia-I Wu4bc47012014-08-14 13:03:25 +0800923 /*
924 * From the Sandy Bridge PRM, volume 1 part 2, page 22:
925 *
926 * "A 4KB tile is subdivided into 8-high by 8-wide array of
927 * Blocks for W-Major Tiles (W Tiles). Each Block is 8 rows by 8
928 * bytes."
929 *
930 * Since we asked for INTEL_TILING_NONE instead of the non-existent
931 * INTEL_TILING_W, we want to align to W tiles here.
932 */
933 align_w = 64;
934 align_h = 64;
935 } else {
936 /* some good enough values */
937 align_w = 64;
938 align_h = 2;
939 }
940 break;
941 }
942
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800943 w = u_align(w, align_w);
944 h = u_align(h, align_h);
Chia-I Wu4bc47012014-08-14 13:03:25 +0800945
946 /* make sure the bo is mappable */
947 if (layout->tiling != INTEL_TILING_NONE) {
948 /*
949 * Usually only the first 256MB of the GTT is mappable.
950 *
951 * See also how intel_context::max_gtt_map_object_size is calculated.
952 */
953 const size_t mappable_gtt_size = 256 * 1024 * 1024;
954
955 /*
956 * Be conservative. We may be able to switch from VALIGN_4 to
957 * VALIGN_2 if the layout was Y-tiled, but let's keep it simple.
958 */
959 if (mappable_gtt_size / w / 4 < h) {
960 if (layout->valid_tilings & LAYOUT_TILING_NONE) {
961 layout->tiling = INTEL_TILING_NONE;
962 /* MCS support for non-MSRTs is limited to tiled RTs */
Chia-I Wu457d0a62014-08-18 13:02:26 +0800963 if (layout->aux == INTEL_LAYOUT_AUX_MCS &&
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800964 params->info->samples <= 1)
Chia-I Wu457d0a62014-08-18 13:02:26 +0800965 layout->aux = INTEL_LAYOUT_AUX_NONE;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800966
967 continue;
968 } else {
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800969 /* mapping will fail */
Chia-I Wu4bc47012014-08-14 13:03:25 +0800970 }
971 }
972 }
973
974 layout->bo_stride = w;
975 layout->bo_height = h;
976 break;
977 }
978}
979
980static void
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800981layout_calculate_hiz_size(struct intel_layout *layout,
982 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +0800983{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +0800984 const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu457d0a62014-08-18 13:02:26 +0800985 const unsigned hz_align_j = 8;
986 enum intel_layout_walk_type hz_walk;
987 unsigned hz_width, hz_height, lv;
988 unsigned hz_clear_w, hz_clear_h;
Chia-I Wu4bc47012014-08-14 13:03:25 +0800989
Chia-I Wu457d0a62014-08-18 13:02:26 +0800990 assert(layout->aux == INTEL_LAYOUT_AUX_HIZ);
991
992 assert(layout->walk == INTEL_LAYOUT_WALK_LAYER ||
993 layout->walk == INTEL_LAYOUT_WALK_3D);
994
995 /*
996 * From the Sandy Bridge PRM, volume 2 part 1, page 312:
997 *
998 * "The hierarchical depth buffer does not support the LOD field, it is
999 * assumed by hardware to be zero. A separate hierarachical depth
1000 * buffer is required for each LOD used, and the corresponding
1001 * buffer's state delivered to hardware each time a new depth buffer
1002 * state with modified LOD is delivered."
1003 *
1004 * We will put all LODs in a single bo with INTEL_LAYOUT_WALK_LOD.
1005 */
1006 if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7))
1007 hz_walk = layout->walk;
1008 else
1009 hz_walk = INTEL_LAYOUT_WALK_LOD;
Chia-I Wu4bc47012014-08-14 13:03:25 +08001010
1011 /*
1012 * See the Sandy Bridge PRM, volume 2 part 1, page 312, and the Ivy Bridge
1013 * PRM, volume 2 part 1, page 312-313.
1014 *
1015 * It seems HiZ buffer is aligned to 8x8, with every two rows packed into a
1016 * memory row.
1017 */
Chia-I Wu457d0a62014-08-18 13:02:26 +08001018 switch (hz_walk) {
1019 case INTEL_LAYOUT_WALK_LOD:
1020 {
1021 unsigned lod_tx[INTEL_LAYOUT_MAX_LEVELS];
1022 unsigned lod_ty[INTEL_LAYOUT_MAX_LEVELS];
1023 unsigned cur_tx, cur_ty;
Chia-I Wu4bc47012014-08-14 13:03:25 +08001024
Chia-I Wu457d0a62014-08-18 13:02:26 +08001025 /* figure out the tile offsets of LODs */
1026 hz_width = 0;
1027 hz_height = 0;
1028 cur_tx = 0;
1029 cur_ty = 0;
1030 for (lv = 0; lv < info->mipLevels; lv++) {
1031 unsigned tw, th;
Chia-I Wu4bc47012014-08-14 13:03:25 +08001032
Chia-I Wu457d0a62014-08-18 13:02:26 +08001033 lod_tx[lv] = cur_tx;
1034 lod_ty[lv] = cur_ty;
Chia-I Wu4bc47012014-08-14 13:03:25 +08001035
Chia-I Wu457d0a62014-08-18 13:02:26 +08001036 tw = u_align(layout->lods[lv].slice_width, 16);
1037 th = u_align(layout->lods[lv].slice_height, hz_align_j) *
1038 info->arraySize / 2;
1039 /* convert to Y-tiles */
1040 tw = u_align(tw, 128) / 128;
1041 th = u_align(th, 32) / 32;
Chia-I Wu4bc47012014-08-14 13:03:25 +08001042
Chia-I Wu457d0a62014-08-18 13:02:26 +08001043 if (hz_width < cur_tx + tw)
1044 hz_width = cur_tx + tw;
1045 if (hz_height < cur_ty + th)
1046 hz_height = cur_ty + th;
1047
1048 if (lv == 1)
1049 cur_tx += tw;
1050 else
1051 cur_ty += th;
1052 }
1053
1054 /* convert tile offsets to memory offsets */
1055 for (lv = 0; lv < info->mipLevels; lv++) {
1056 layout->aux_offsets[lv] =
1057 (lod_ty[lv] * hz_width + lod_tx[lv]) * 4096;
1058 }
1059 hz_width *= 128;
1060 hz_height *= 32;
Chia-I Wu4bc47012014-08-14 13:03:25 +08001061 }
Chia-I Wu457d0a62014-08-18 13:02:26 +08001062 break;
1063 case INTEL_LAYOUT_WALK_LAYER:
1064 {
1065 const unsigned h0 = u_align(params->h0, hz_align_j);
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001066 const unsigned h1 = u_align(params->h1, hz_align_j);
Chia-I Wu4bc47012014-08-14 13:03:25 +08001067 const unsigned htail =
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001068 ((intel_gpu_gen(params->gpu) >= INTEL_GEN(7)) ? 12 : 11) * hz_align_j;
Chia-I Wu457d0a62014-08-18 13:02:26 +08001069 const unsigned hz_qpitch = h0 + h1 + htail;
Chia-I Wu4bc47012014-08-14 13:03:25 +08001070
Chia-I Wu457d0a62014-08-18 13:02:26 +08001071 hz_width = u_align(layout->lods[0].slice_width, 16);
1072
1073 hz_height = hz_qpitch * info->arraySize / 2;
1074 if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7))
1075 hz_height = u_align(hz_height, 8);
Chia-I Wu4bc47012014-08-14 13:03:25 +08001076 }
Chia-I Wu457d0a62014-08-18 13:02:26 +08001077 break;
1078 case INTEL_LAYOUT_WALK_3D:
1079 hz_width = u_align(layout->lods[0].slice_width, 16);
Chia-I Wu4bc47012014-08-14 13:03:25 +08001080
Chia-I Wu457d0a62014-08-18 13:02:26 +08001081 hz_height = 0;
1082 for (lv = 0; lv < info->mipLevels; lv++) {
1083 const unsigned h = u_align(layout->lods[lv].slice_height, hz_align_j);
1084 /* according to the formula, slices are packed together vertically */
1085 hz_height += h * u_minify(info->extent.depth, lv);
1086 }
1087 hz_height /= 2;
1088 break;
Chia-I Wu9a056dd2015-02-11 13:19:39 -07001089 default:
1090 assert(!"unknown layout walk");
1091 hz_width = 0;
1092 hz_height = 0;
1093 break;
Chia-I Wu4bc47012014-08-14 13:03:25 +08001094 }
1095
Chia-I Wu457d0a62014-08-18 13:02:26 +08001096 /*
1097 * In hiz_align_fb(), we will align the LODs to 8x4 sample blocks.
1098 * Experiments on Haswell show that aligning the RECTLIST primitive and
1099 * 3DSTATE_DRAWING_RECTANGLE alone are not enough. The LOD sizes must be
1100 * aligned.
1101 */
1102 hz_clear_w = 8;
1103 hz_clear_h = 4;
1104 switch (info->samples) {
1105 case 0:
1106 case 1:
1107 default:
1108 break;
1109 case 2:
1110 hz_clear_w /= 2;
1111 break;
1112 case 4:
1113 hz_clear_w /= 2;
1114 hz_clear_h /= 2;
1115 break;
1116 case 8:
1117 hz_clear_w /= 4;
1118 hz_clear_h /= 2;
1119 break;
1120 case 16:
1121 hz_clear_w /= 4;
1122 hz_clear_h /= 4;
1123 break;
1124 }
1125
1126 for (lv = 0; lv < info->mipLevels; lv++) {
1127 if (u_minify(layout->width0, lv) % hz_clear_w ||
1128 u_minify(layout->height0, lv) % hz_clear_h)
1129 break;
1130 layout->aux_enables |= 1 << lv;
1131 }
1132
1133 /* we padded to allow this in layout_align() */
1134 if (info->mipLevels == 1 && info->arraySize == 1 && info->extent.depth == 1)
1135 layout->aux_enables |= 0x1;
1136
Chia-I Wu4bc47012014-08-14 13:03:25 +08001137 /* align to Y-tile */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001138 layout->aux_stride = u_align(hz_width, 128);
1139 layout->aux_height = u_align(hz_height, 32);
Chia-I Wu4bc47012014-08-14 13:03:25 +08001140}
1141
1142static void
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001143layout_calculate_mcs_size(struct intel_layout *layout,
1144 struct intel_layout_params *params)
Chia-I Wu4bc47012014-08-14 13:03:25 +08001145{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001146 const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu4bc47012014-08-14 13:03:25 +08001147 int mcs_width, mcs_height, mcs_cpp;
1148 int downscale_x, downscale_y;
1149
Chia-I Wu457d0a62014-08-18 13:02:26 +08001150 assert(layout->aux == INTEL_LAYOUT_AUX_MCS);
Chia-I Wu4bc47012014-08-14 13:03:25 +08001151
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001152 if (info->samples > 1) {
Chia-I Wu4bc47012014-08-14 13:03:25 +08001153 /*
1154 * From the Ivy Bridge PRM, volume 2 part 1, page 326, the clear
1155 * rectangle is scaled down by 8x2 for 4X MSAA and 2x2 for 8X MSAA. The
1156 * need of scale down could be that the clear rectangle is used to clear
1157 * the MCS instead of the RT.
1158 *
1159 * For 8X MSAA, we need 32 bits in MCS for every pixel in the RT. The
1160 * 2x2 factor could come from that the hardware writes 128 bits (an
1161 * OWord) at a time, and the OWord in MCS maps to a 2x2 pixel block in
1162 * the RT. For 4X MSAA, we need 8 bits in MCS for every pixel in the
1163 * RT. Similarly, we could reason that an OWord in 4X MCS maps to a 8x2
1164 * pixel block in the RT.
1165 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001166 switch (info->samples) {
Chia-I Wu4bc47012014-08-14 13:03:25 +08001167 case 2:
1168 case 4:
1169 downscale_x = 8;
1170 downscale_y = 2;
1171 mcs_cpp = 1;
1172 break;
1173 case 8:
1174 downscale_x = 2;
1175 downscale_y = 2;
1176 mcs_cpp = 4;
1177 break;
1178 case 16:
1179 downscale_x = 2;
1180 downscale_y = 1;
1181 mcs_cpp = 8;
1182 break;
1183 default:
1184 assert(!"unsupported sample count");
1185 return;
1186 break;
1187 }
1188
1189 /*
1190 * It also appears that the 2x2 subspans generated by the scaled-down
1191 * clear rectangle cannot be masked. The scale-down clear rectangle
1192 * thus must be aligned to 2x2, and we need to pad.
1193 */
Chia-I Wu457d0a62014-08-18 13:02:26 +08001194 mcs_width = u_align(layout->width0, downscale_x * 2);
1195 mcs_height = u_align(layout->height0, downscale_y * 2);
1196 } else {
Chia-I Wu4bc47012014-08-14 13:03:25 +08001197 /*
1198 * From the Ivy Bridge PRM, volume 2 part 1, page 327:
1199 *
1200 * " Pixels Lines
1201 * TiledY RT CL
1202 * bpp
1203 * 32 8 4
1204 * 64 4 4
1205 * 128 2 4
1206 *
1207 * TiledX RT CL
1208 * bpp
1209 * 32 16 2
1210 * 64 8 2
1211 * 128 4 2"
1212 *
1213 * This table and the two following tables define the RT alignments, the
1214 * clear rectangle alignments, and the clear rectangle scale factors.
1215 * Viewing the RT alignments as the sizes of 128-byte blocks, we can see
1216 * that the clear rectangle alignments are 16x32 blocks, and the clear
1217 * rectangle scale factors are 8x16 blocks.
1218 *
1219 * For non-MSAA RT, we need 1 bit in MCS for every 128-byte block in the
1220 * RT. Similar to the MSAA cases, we can argue that an OWord maps to
1221 * 8x16 blocks.
1222 *
1223 * One problem with this reasoning is that a Y-tile in MCS has 8x32
1224 * OWords and maps to 64x512 128-byte blocks. This differs from i965,
1225 * which says that a Y-tile maps to 128x256 blocks (\see
1226 * intel_get_non_msrt_mcs_alignment). It does not really change
1227 * anything except for the size of the allocated MCS. Let's see if we
1228 * hit out-of-bound access.
1229 */
1230 switch (layout->tiling) {
1231 case INTEL_TILING_X:
1232 downscale_x = 64 / layout->block_size;
1233 downscale_y = 2;
1234 break;
1235 case INTEL_TILING_Y:
1236 downscale_x = 32 / layout->block_size;
1237 downscale_y = 4;
1238 break;
1239 default:
1240 assert(!"unsupported tiling mode");
1241 return;
1242 break;
1243 }
1244
1245 downscale_x *= 8;
1246 downscale_y *= 16;
1247
1248 /*
1249 * From the Haswell PRM, volume 7, page 652:
1250 *
1251 * "Clear rectangle must be aligned to two times the number of
1252 * pixels in the table shown below due to 16X16 hashing across the
1253 * slice."
1254 *
1255 * The scaled-down clear rectangle must be aligned to 4x4 instead of
1256 * 2x2, and we need to pad.
1257 */
Chia-I Wu457d0a62014-08-18 13:02:26 +08001258 mcs_width = u_align(layout->width0, downscale_x * 4) / downscale_x;
1259 mcs_height = u_align(layout->height0, downscale_y * 4) / downscale_y;
Chia-I Wu4bc47012014-08-14 13:03:25 +08001260 mcs_cpp = 16; /* an OWord */
1261 }
1262
Chia-I Wu457d0a62014-08-18 13:02:26 +08001263 layout->aux_enables = (1 << info->mipLevels) - 1;
Chia-I Wu4bc47012014-08-14 13:03:25 +08001264 /* align to Y-tile */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001265 layout->aux_stride = u_align(mcs_width * mcs_cpp, 128);
1266 layout->aux_height = u_align(mcs_height, 32);
Chia-I Wu4bc47012014-08-14 13:03:25 +08001267}
1268
1269/**
1270 * Initialize the layout. Callers should zero-initialize \p layout first.
1271 */
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001272void intel_layout_init(struct intel_layout *layout,
Chia-I Wuc94f3e52014-10-07 14:45:05 +08001273 struct intel_dev *dev,
Chia-I Wu794d12a2014-09-15 14:55:25 +08001274 const XGL_IMAGE_CREATE_INFO *info,
1275 bool scanout)
Chia-I Wu4bc47012014-08-14 13:03:25 +08001276{
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001277 struct intel_layout_params params;
Chia-I Wu4bc47012014-08-14 13:03:25 +08001278
1279 memset(&params, 0, sizeof(params));
Chia-I Wuc94f3e52014-10-07 14:45:05 +08001280 params.dev = dev;
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001281 params.gpu = dev->gpu;
1282 params.info = info;
Chia-I Wu794d12a2014-09-15 14:55:25 +08001283 params.scanout = scanout;
Chia-I Wu4bc47012014-08-14 13:03:25 +08001284
1285 /* note that there are dependencies between these functions */
1286 layout_init_aux(layout, &params);
Chia-I Wu457d0a62014-08-18 13:02:26 +08001287 layout_init_size_and_format(layout, &params);
1288 layout_init_walk(layout, &params);
Chia-I Wu4bc47012014-08-14 13:03:25 +08001289 layout_init_tiling(layout, &params);
1290 layout_init_alignments(layout, &params);
Chia-I Wu457d0a62014-08-18 13:02:26 +08001291 layout_init_lods(layout, &params);
Chia-I Wu4bc47012014-08-14 13:03:25 +08001292 layout_init_layer_height(layout, &params);
1293
1294 layout_align(layout, &params);
1295 layout_calculate_bo_size(layout, &params);
1296
Chia-I Wu457d0a62014-08-18 13:02:26 +08001297 switch (layout->aux) {
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001298 case INTEL_LAYOUT_AUX_HIZ:
Chia-I Wu4bc47012014-08-14 13:03:25 +08001299 layout_calculate_hiz_size(layout, &params);
1300 break;
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001301 case INTEL_LAYOUT_AUX_MCS:
Chia-I Wu4bc47012014-08-14 13:03:25 +08001302 layout_calculate_mcs_size(layout, &params);
1303 break;
1304 default:
1305 break;
1306 }
1307}
1308
1309/**
1310 * Update the tiling mode and bo stride (for imported resources).
1311 */
1312bool
Chia-I Wu8a8d8b62014-08-14 13:26:26 +08001313intel_layout_update_for_imported_bo(struct intel_layout *layout,
Chia-I Wu457d0a62014-08-18 13:02:26 +08001314 enum intel_tiling_mode tiling,
1315 unsigned bo_stride)
Chia-I Wu4bc47012014-08-14 13:03:25 +08001316{
1317 if (!(layout->valid_tilings & (1 << tiling)))
1318 return false;
1319
1320 if ((tiling == INTEL_TILING_X && bo_stride % 512) ||
1321 (tiling == INTEL_TILING_Y && bo_stride % 128))
1322 return false;
1323
1324 layout->tiling = tiling;
1325 layout->bo_stride = bo_stride;
1326
1327 return true;
1328}
Chia-I Wu63a53972014-12-04 12:51:54 +08001329
1330/**
1331 * Return the offset (in bytes) to a slice within the bo.
1332 *
1333 * The returned offset is aligned to tile size. Since slices are not
1334 * guaranteed to start at tile boundaries, the X and Y offsets (in pixels)
1335 * from the tile origin to the slice are also returned. X offset is always a
1336 * multiple of 4 and Y offset is always a multiple of 2.
1337 */
1338unsigned
1339intel_layout_get_slice_tile_offset(const struct intel_layout *layout,
1340 unsigned level, unsigned slice,
1341 unsigned *x_offset, unsigned *y_offset)
1342{
1343 unsigned tile_w, tile_h, tile_size, row_size;
1344 unsigned tile_offset, x, y;
1345
1346 /* see the Sandy Bridge PRM, volume 1 part 2, page 24 */
1347
1348 switch (layout->tiling) {
1349 case INTEL_TILING_NONE:
1350 /* W-tiled */
Jeremy Hayes2b7e88a2015-01-23 08:51:43 -07001351 if (layout->format == XGL_FMT_S8_UINT) {
Chia-I Wu63a53972014-12-04 12:51:54 +08001352 tile_w = 64;
1353 tile_h = 64;
1354 }
1355 else {
1356 tile_w = 1;
1357 tile_h = 1;
1358 }
1359 break;
1360 case INTEL_TILING_X:
1361 tile_w = 512;
1362 tile_h = 8;
1363 break;
1364 case INTEL_TILING_Y:
1365 tile_w = 128;
1366 tile_h = 32;
1367 break;
1368 default:
1369 assert(!"unknown tiling");
1370 tile_w = 1;
1371 tile_h = 1;
1372 break;
1373 }
1374
1375 tile_size = tile_w * tile_h;
1376 row_size = layout->bo_stride * tile_h;
1377
1378 intel_layout_get_slice_pos(layout, level, slice, &x, &y);
1379 /* in bytes */
1380 intel_layout_pos_to_mem(layout, x, y, &x, &y);
1381 tile_offset = row_size * (y / tile_h) + tile_size * (x / tile_w);
1382
1383 /*
1384 * Since tex->bo_stride is a multiple of tile_w, slice_offset should be
1385 * aligned at this point.
1386 */
1387 assert(tile_offset % tile_size == 0);
1388
1389 /*
1390 * because of the possible values of align_i and align_j in
1391 * tex_layout_init_alignments(), x_offset is guaranteed to be a multiple of
1392 * 4 and y_offset is guaranteed to be a multiple of 2.
1393 */
1394 if (x_offset) {
1395 /* in pixels */
1396 x = (x % tile_w) / layout->block_size * layout->block_width;
1397 assert(x % 4 == 0);
1398
1399 *x_offset = x;
1400 }
1401
1402 if (y_offset) {
1403 /* in pixels */
1404 y = (y % tile_h) * layout->block_height;
1405 assert(y % 2 == 0);
1406
1407 *y_offset = y;
1408 }
1409
1410 return tile_offset;
1411}