blob: 8d7e9c1db0e2a86f362f0686daf1d7ecf9dc2c59 [file] [log] [blame]
Chia-I Wu4ea339e2014-08-08 21:56:26 +08001/*
2 * XGL
3 *
4 * Copyright (C) 2014 LunarG, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
Chia-I Wue46da3e2014-08-08 21:52:48 +080025#include "kmd/winsys.h"
26#include "dev.h"
27#include "gpu.h"
28#include "img.h"
Chia-I Wu4ea339e2014-08-08 21:56:26 +080029
30struct tex_layout {
Chia-I Wue46da3e2014-08-08 21:52:48 +080031 const struct intel_gpu *gpu;
32 const XGL_IMAGE_CREATE_INFO *info;
Chia-I Wu4ea339e2014-08-08 21:56:26 +080033
34 bool has_depth, has_stencil;
35 bool hiz, separate_stencil;
36
Chia-I Wue46da3e2014-08-08 21:52:48 +080037 XGL_FORMAT format;
Chia-I Wu4ea339e2014-08-08 21:56:26 +080038 unsigned block_width, block_height, block_size;
39 bool compressed;
40
41 enum intel_tiling_mode tiling;
42 unsigned valid_tilings; /* bitmask of valid tiling modes */
43
44 bool array_spacing_full;
45 bool interleaved;
46
47 struct {
48 int w, h, d;
Chia-I Wue46da3e2014-08-08 21:52:48 +080049 struct intel_img_slice *slices;
50 } levels[INTEL_IMG_MAX_LEVELS];
Chia-I Wu4ea339e2014-08-08 21:56:26 +080051
52 int align_i, align_j;
53 int qpitch;
54
55 int width, height;
56
57 int bo_stride, bo_height;
58 int hiz_stride, hiz_height;
59};
60
61/*
62 * From the Ivy Bridge PRM, volume 1 part 1, page 105:
63 *
64 * "In addition to restrictions on maximum height, width, and depth,
65 * surfaces are also restricted to a maximum size in bytes. This
66 * maximum is 2 GB for all products and all surface types."
67 */
Chia-I Wue46da3e2014-08-08 21:52:48 +080068static const size_t intel_max_resource_size = 1u << 31;
Chia-I Wu4ea339e2014-08-08 21:56:26 +080069
70static void
71tex_layout_init_qpitch(struct tex_layout *layout)
72{
Chia-I Wue46da3e2014-08-08 21:52:48 +080073 const XGL_IMAGE_CREATE_INFO *info = layout->info;
Chia-I Wu4ea339e2014-08-08 21:56:26 +080074 int h0, h1;
75
Chia-I Wue46da3e2014-08-08 21:52:48 +080076 if (info->arraySize <= 1)
Chia-I Wu4ea339e2014-08-08 21:56:26 +080077 return;
78
Chia-I Wue46da3e2014-08-08 21:52:48 +080079 h0 = u_align(layout->levels[0].h, layout->align_j);
Chia-I Wu4ea339e2014-08-08 21:56:26 +080080
81 if (!layout->array_spacing_full) {
82 layout->qpitch = h0;
83 return;
84 }
85
Chia-I Wue46da3e2014-08-08 21:52:48 +080086 h1 = u_align(layout->levels[1].h, layout->align_j);
Chia-I Wu4ea339e2014-08-08 21:56:26 +080087
88 /*
89 * From the Sandy Bridge PRM, volume 1 part 1, page 115:
90 *
91 * "The following equation is used for surface formats other than
92 * compressed textures:
93 *
94 * QPitch = (h0 + h1 + 11j)"
95 *
96 * "The equation for compressed textures (BC* and FXT1 surface formats)
97 * follows:
98 *
99 * QPitch = (h0 + h1 + 11j) / 4"
100 *
101 * "[DevSNB] Errata: Sampler MSAA Qpitch will be 4 greater than the
102 * value calculated in the equation above, for every other odd Surface
103 * Height starting from 1 i.e. 1,5,9,13"
104 *
105 * From the Ivy Bridge PRM, volume 1 part 1, page 111-112:
106 *
107 * "If Surface Array Spacing is set to ARYSPC_FULL (note that the depth
108 * buffer and stencil buffer have an implied value of ARYSPC_FULL):
109 *
110 * QPitch = (h0 + h1 + 12j)
111 * QPitch = (h0 + h1 + 12j) / 4 (compressed)
112 *
113 * (There are many typos or missing words here...)"
114 *
115 * To access the N-th slice, an offset of (Stride * QPitch * N) is added to
116 * the base address. The PRM divides QPitch by 4 for compressed formats
117 * because the block height for those formats are 4, and it wants QPitch to
118 * mean the number of memory rows, as opposed to texel rows, between
119 * slices. Since we use texel rows in tex->slice_offsets, we do not need
120 * to divide QPitch by 4.
121 */
122 layout->qpitch = h0 + h1 +
Chia-I Wue46da3e2014-08-08 21:52:48 +0800123 ((intel_gpu_gen(layout->gpu) >= INTEL_GEN(7)) ? 12 : 11) * layout->align_j;
Chia-I Wu4ea339e2014-08-08 21:56:26 +0800124
Chia-I Wue46da3e2014-08-08 21:52:48 +0800125 if (intel_gpu_gen(layout->gpu) == INTEL_GEN(6) && info->samples > 1 &&
126 info->extent.height % 4 == 1)
Chia-I Wu4ea339e2014-08-08 21:56:26 +0800127 layout->qpitch += 4;
128}
129
130static void
131tex_layout_init_alignments(struct tex_layout *layout)
132{
Chia-I Wue46da3e2014-08-08 21:52:48 +0800133 const XGL_IMAGE_CREATE_INFO *info = layout->info;
Chia-I Wu4ea339e2014-08-08 21:56:26 +0800134
135 /*
136 * From the Sandy Bridge PRM, volume 1 part 1, page 113:
137 *
138 * "surface format align_i align_j
139 * YUV 4:2:2 formats 4 *see below
140 * BC1-5 4 4
141 * FXT1 8 4
142 * all other formats 4 *see below"
143 *
144 * "- align_j = 4 for any depth buffer
145 * - align_j = 2 for separate stencil buffer
146 * - align_j = 4 for any render target surface is multisampled (4x)
147 * - align_j = 4 for any render target surface with Surface Vertical
148 * Alignment = VALIGN_4
149 * - align_j = 2 for any render target surface with Surface Vertical
150 * Alignment = VALIGN_2
151 * - align_j = 2 for all other render target surface
152 * - align_j = 2 for any sampling engine surface with Surface Vertical
153 * Alignment = VALIGN_2
154 * - align_j = 4 for any sampling engine surface with Surface Vertical
155 * Alignment = VALIGN_4"
156 *
157 * From the Sandy Bridge PRM, volume 4 part 1, page 86:
158 *
159 * "This field (Surface Vertical Alignment) must be set to VALIGN_2 if
160 * the Surface Format is 96 bits per element (BPE)."
161 *
162 * They can be rephrased as
163 *
164 * align_i align_j
165 * compressed formats block width block height
166 * PIPE_FORMAT_S8_UINT 4 2
167 * other depth/stencil formats 4 4
168 * 4x multisampled 4 4
169 * bpp 96 4 2
170 * others 4 2 or 4
171 */
172
173 /*
174 * From the Ivy Bridge PRM, volume 1 part 1, page 110:
175 *
176 * "surface defined by surface format align_i align_j
177 * 3DSTATE_DEPTH_BUFFER D16_UNORM 8 4
178 * not D16_UNORM 4 4
179 * 3DSTATE_STENCIL_BUFFER N/A 8 8
180 * SURFACE_STATE BC*, ETC*, EAC* 4 4
181 * FXT1 8 4
182 * all others (set by SURFACE_STATE)"
183 *
184 * From the Ivy Bridge PRM, volume 4 part 1, page 63:
185 *
186 * "- This field (Surface Vertical Aligment) is intended to be set to
187 * VALIGN_4 if the surface was rendered as a depth buffer, for a
188 * multisampled (4x) render target, or for a multisampled (8x)
189 * render target, since these surfaces support only alignment of 4.
190 * - Use of VALIGN_4 for other surfaces is supported, but uses more
191 * memory.
192 * - This field must be set to VALIGN_4 for all tiled Y Render Target
193 * surfaces.
194 * - Value of 1 is not supported for format YCRCB_NORMAL (0x182),
195 * YCRCB_SWAPUVY (0x183), YCRCB_SWAPUV (0x18f), YCRCB_SWAPY (0x190)
196 * - If Number of Multisamples is not MULTISAMPLECOUNT_1, this field
197 * must be set to VALIGN_4."
198 * - VALIGN_4 is not supported for surface format R32G32B32_FLOAT."
199 *
200 * "- This field (Surface Horizontal Aligment) is intended to be set to
201 * HALIGN_8 only if the surface was rendered as a depth buffer with
202 * Z16 format or a stencil buffer, since these surfaces support only
203 * alignment of 8.
204 * - Use of HALIGN_8 for other surfaces is supported, but uses more
205 * memory.
206 * - This field must be set to HALIGN_4 if the Surface Format is BC*.
207 * - This field must be set to HALIGN_8 if the Surface Format is
208 * FXT1."
209 *
210 * They can be rephrased as
211 *
212 * align_i align_j
213 * compressed formats block width block height
214 * PIPE_FORMAT_Z16_UNORM 8 4
215 * PIPE_FORMAT_S8_UINT 8 8
216 * other depth/stencil formats 4 or 8 4
217 * 2x or 4x multisampled 4 or 8 4
218 * tiled Y 4 or 8 4 (if rt)
219 * PIPE_FORMAT_R32G32B32_FLOAT 4 or 8 2
220 * others 4 or 8 2 or 4
221 */
222
223 if (layout->compressed) {
224 /* this happens to be the case */
225 layout->align_i = layout->block_width;
226 layout->align_j = layout->block_height;
227 }
Chia-I Wue46da3e2014-08-08 21:52:48 +0800228 else if (layout->format.numericFormat == XGL_NUM_FMT_DS) {
229 if (intel_gpu_gen(layout->gpu) >= INTEL_GEN(7)) {
230 switch (layout->format.channelFormat) {
231 case XGL_CH_FMT_R16:
Chia-I Wu4ea339e2014-08-08 21:56:26 +0800232 layout->align_i = 8;
233 layout->align_j = 4;
234 break;
Chia-I Wue46da3e2014-08-08 21:52:48 +0800235 case XGL_CH_FMT_R8:
Chia-I Wu4ea339e2014-08-08 21:56:26 +0800236 layout->align_i = 8;
237 layout->align_j = 8;
238 break;
239 default:
240 layout->align_i = 4;
241 layout->align_j = 4;
242 break;
243 }
244 }
245 else {
Chia-I Wue46da3e2014-08-08 21:52:48 +0800246 switch (layout->format.channelFormat) {
247 case XGL_CH_FMT_R8:
Chia-I Wu4ea339e2014-08-08 21:56:26 +0800248 layout->align_i = 4;
249 layout->align_j = 2;
250 break;
251 default:
252 layout->align_i = 4;
253 layout->align_j = 4;
254 break;
255 }
256 }
257 }
258 else {
Chia-I Wue46da3e2014-08-08 21:52:48 +0800259 const bool valign_4 = (info->samples > 1) ||
260 (intel_gpu_gen(layout->gpu) >= INTEL_GEN(7) &&
Chia-I Wu4ea339e2014-08-08 21:56:26 +0800261 layout->tiling == INTEL_TILING_Y &&
Chia-I Wue46da3e2014-08-08 21:52:48 +0800262 (info->usage & XGL_IMAGE_USAGE_COLOR_ATTACHMENT_BIT));
Chia-I Wu4ea339e2014-08-08 21:56:26 +0800263
264 if (valign_4)
265 assert(layout->block_size != 12);
266
267 layout->align_i = 4;
268 layout->align_j = (valign_4) ? 4 : 2;
269 }
270
271 /*
272 * the fact that align i and j are multiples of block width and height
273 * respectively is what makes the size of the bo a multiple of the block
274 * size, slices start at block boundaries, and many of the computations
275 * work.
276 */
277 assert(layout->align_i % layout->block_width == 0);
278 assert(layout->align_j % layout->block_height == 0);
279
Chia-I Wue46da3e2014-08-08 21:52:48 +0800280 /* make sure u_align() works */
281 assert(u_is_pow2(layout->align_i) &&
282 u_is_pow2(layout->align_j));
283 assert(u_is_pow2(layout->block_width) &&
284 u_is_pow2(layout->block_height));
Chia-I Wu4ea339e2014-08-08 21:56:26 +0800285}
286
287static void
288tex_layout_init_levels(struct tex_layout *layout)
289{
Chia-I Wue46da3e2014-08-08 21:52:48 +0800290 const XGL_IMAGE_CREATE_INFO *info = layout->info;
Chia-I Wu4ea339e2014-08-08 21:56:26 +0800291 int last_level, lv;
292
Chia-I Wue46da3e2014-08-08 21:52:48 +0800293 last_level = info->mipLevels - 1;
Chia-I Wu4ea339e2014-08-08 21:56:26 +0800294
295 /* need at least 2 levels to compute full qpitch */
Chia-I Wue46da3e2014-08-08 21:52:48 +0800296 if (last_level == 0 && info->arraySize > 1 && layout->array_spacing_full)
Chia-I Wu4ea339e2014-08-08 21:56:26 +0800297 last_level++;
298
299 /* compute mip level sizes */
300 for (lv = 0; lv <= last_level; lv++) {
301 int w, h, d;
302
Chia-I Wue46da3e2014-08-08 21:52:48 +0800303 w = u_minify(info->extent.width, lv);
304 h = u_minify(info->extent.height, lv);
305 d = u_minify(info->extent.depth, lv);
Chia-I Wu4ea339e2014-08-08 21:56:26 +0800306
307 /*
308 * From the Sandy Bridge PRM, volume 1 part 1, page 114:
309 *
310 * "The dimensions of the mip maps are first determined by applying
311 * the sizing algorithm presented in Non-Power-of-Two Mipmaps
312 * above. Then, if necessary, they are padded out to compression
313 * block boundaries."
314 */
Chia-I Wue46da3e2014-08-08 21:52:48 +0800315 w = u_align(w, layout->block_width);
316 h = u_align(h, layout->block_height);
Chia-I Wu4ea339e2014-08-08 21:56:26 +0800317
318 /*
319 * From the Sandy Bridge PRM, volume 1 part 1, page 111:
320 *
321 * "If the surface is multisampled (4x), these values must be
322 * adjusted as follows before proceeding:
323 *
324 * W_L = ceiling(W_L / 2) * 4
325 * H_L = ceiling(H_L / 2) * 4"
326 *
327 * From the Ivy Bridge PRM, volume 1 part 1, page 108:
328 *
329 * "If the surface is multisampled and it is a depth or stencil
330 * surface or Multisampled Surface StorageFormat in SURFACE_STATE
331 * is MSFMT_DEPTH_STENCIL, W_L and H_L must be adjusted as follows
332 * before proceeding:
333 *
334 * #samples W_L = H_L =
335 * 2 ceiling(W_L / 2) * 4 HL [no adjustment]
336 * 4 ceiling(W_L / 2) * 4 ceiling(H_L / 2) * 4
337 * 8 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 4
338 * 16 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 8"
339 *
340 * For interleaved samples (4x), where pixels
341 *
342 * (x, y ) (x+1, y )
343 * (x, y+1) (x+1, y+1)
344 *
345 * would be is occupied by
346 *
347 * (x, y , si0) (x+1, y , si0) (x, y , si1) (x+1, y , si1)
348 * (x, y+1, si0) (x+1, y+1, si0) (x, y+1, si1) (x+1, y+1, si1)
349 * (x, y , si2) (x+1, y , si2) (x, y , si3) (x+1, y , si3)
350 * (x, y+1, si2) (x+1, y+1, si2) (x, y+1, si3) (x+1, y+1, si3)
351 *
352 * Thus the need to
353 *
Chia-I Wue46da3e2014-08-08 21:52:48 +0800354 * w = u_align(w, 2) * 2;
355 * y = u_align(y, 2) * 2;
Chia-I Wu4ea339e2014-08-08 21:56:26 +0800356 */
357 if (layout->interleaved) {
Chia-I Wue46da3e2014-08-08 21:52:48 +0800358 switch (info->samples) {
Chia-I Wu4ea339e2014-08-08 21:56:26 +0800359 case 0:
360 case 1:
361 break;
362 case 2:
Chia-I Wue46da3e2014-08-08 21:52:48 +0800363 w = u_align(w, 2) * 2;
Chia-I Wu4ea339e2014-08-08 21:56:26 +0800364 break;
365 case 4:
Chia-I Wue46da3e2014-08-08 21:52:48 +0800366 w = u_align(w, 2) * 2;
367 h = u_align(h, 2) * 2;
Chia-I Wu4ea339e2014-08-08 21:56:26 +0800368 break;
369 case 8:
Chia-I Wue46da3e2014-08-08 21:52:48 +0800370 w = u_align(w, 2) * 4;
371 h = u_align(h, 2) * 2;
Chia-I Wu4ea339e2014-08-08 21:56:26 +0800372 break;
373 case 16:
Chia-I Wue46da3e2014-08-08 21:52:48 +0800374 w = u_align(w, 2) * 4;
375 h = u_align(h, 2) * 4;
Chia-I Wu4ea339e2014-08-08 21:56:26 +0800376 break;
377 default:
378 assert(!"unsupported sample count");
379 break;
380 }
381 }
382
383 layout->levels[lv].w = w;
384 layout->levels[lv].h = h;
385 layout->levels[lv].d = d;
386 }
387}
388
389static void
390tex_layout_init_spacing(struct tex_layout *layout)
391{
Chia-I Wue46da3e2014-08-08 21:52:48 +0800392 const XGL_IMAGE_CREATE_INFO *info = layout->info;
Chia-I Wu4ea339e2014-08-08 21:56:26 +0800393
Chia-I Wue46da3e2014-08-08 21:52:48 +0800394 if (intel_gpu_gen(layout->gpu) >= INTEL_GEN(7)) {
Chia-I Wu4ea339e2014-08-08 21:56:26 +0800395 /*
396 * It is not explicitly states, but render targets are expected to be
397 * UMS/CMS (samples non-interleaved) and depth/stencil buffers are
398 * expected to be IMS (samples interleaved).
399 *
400 * See "Multisampled Surface Storage Format" field of SURFACE_STATE.
401 */
402 if (layout->has_depth || layout->has_stencil) {
403 layout->interleaved = true;
404
405 /*
406 * From the Ivy Bridge PRM, volume 1 part 1, page 111:
407 *
408 * "note that the depth buffer and stencil buffer have an implied
409 * value of ARYSPC_FULL"
410 */
411 layout->array_spacing_full = true;
412 }
413 else {
414 layout->interleaved = false;
415
416 /*
417 * From the Ivy Bridge PRM, volume 4 part 1, page 66:
418 *
419 * "If Multisampled Surface Storage Format is MSFMT_MSS and
420 * Number of Multisamples is not MULTISAMPLECOUNT_1, this field
421 * (Surface Array Spacing) must be set to ARYSPC_LOD0."
422 *
423 * As multisampled resources are not mipmapped, we never use
424 * ARYSPC_FULL for them.
425 */
Chia-I Wue46da3e2014-08-08 21:52:48 +0800426 if (info->samples > 1)
427 assert(info->mipLevels == 1);
428 layout->array_spacing_full = (info->mipLevels > 1);
Chia-I Wu4ea339e2014-08-08 21:56:26 +0800429 }
430 }
431 else {
432 /* GEN6 supports only interleaved samples */
433 layout->interleaved = true;
434
435 /*
436 * From the Sandy Bridge PRM, volume 1 part 1, page 115:
437 *
438 * "The separate stencil buffer does not support mip mapping, thus
439 * the storage for LODs other than LOD 0 is not needed. The
440 * following QPitch equation applies only to the separate stencil
441 * buffer:
442 *
443 * QPitch = h_0"
444 *
445 * GEN6 does not support compact spacing otherwise.
446 */
Chia-I Wue46da3e2014-08-08 21:52:48 +0800447 layout->array_spacing_full =
448 !(layout->format.channelFormat == XGL_CH_FMT_R8 &&
449 layout->format.numericFormat == XGL_NUM_FMT_DS);
Chia-I Wu4ea339e2014-08-08 21:56:26 +0800450 }
451}
452
453static void
454tex_layout_init_tiling(struct tex_layout *layout)
455{
Chia-I Wue46da3e2014-08-08 21:52:48 +0800456 const XGL_IMAGE_CREATE_INFO *info = layout->info;
Chia-I Wu4ea339e2014-08-08 21:56:26 +0800457 const unsigned tile_none = 1 << INTEL_TILING_NONE;
458 const unsigned tile_x = 1 << INTEL_TILING_X;
459 const unsigned tile_y = 1 << INTEL_TILING_Y;
460 unsigned valid_tilings = tile_none | tile_x | tile_y;
461
Chia-I Wue46da3e2014-08-08 21:52:48 +0800462 if (info->tiling == XGL_LINEAR_TILING)
Chia-I Wu4ea339e2014-08-08 21:56:26 +0800463 valid_tilings &= tile_none;
464
465 /*
Chia-I Wu4ea339e2014-08-08 21:56:26 +0800466 * From the Sandy Bridge PRM, volume 2 part 1, page 318:
467 *
468 * "[DevSNB+]: This field (Tiled Surface) must be set to TRUE. Linear
469 * Depth Buffer is not supported."
470 *
471 * "The Depth Buffer, if tiled, must use Y-Major tiling."
472 *
473 * From the Sandy Bridge PRM, volume 1 part 2, page 22:
474 *
475 * "W-Major Tile Format is used for separate stencil."
476 *
477 * Since the HW does not support W-tiled fencing, we have to do it in the
478 * driver.
479 */
Chia-I Wue46da3e2014-08-08 21:52:48 +0800480 if (info->usage & XGL_IMAGE_USAGE_DEPTH_STENCIL_BIT) {
481 if (layout->format.channelFormat == XGL_CH_FMT_R8 &&
482 layout->format.numericFormat == XGL_NUM_FMT_DS)
483 valid_tilings &= tile_none;
484 else
485 valid_tilings &= tile_y;
Chia-I Wu4ea339e2014-08-08 21:56:26 +0800486 }
487
Chia-I Wue46da3e2014-08-08 21:52:48 +0800488 if (info->usage & XGL_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) {
Chia-I Wu4ea339e2014-08-08 21:56:26 +0800489 /*
490 * From the Sandy Bridge PRM, volume 1 part 2, page 32:
491 *
492 * "NOTE: 128BPE Format Color buffer ( render target ) MUST be
493 * either TileX or Linear."
494 */
495 if (layout->block_size == 16)
496 valid_tilings &= ~tile_y;
497
498 /*
499 * From the Ivy Bridge PRM, volume 4 part 1, page 63:
500 *
501 * "This field (Surface Vertical Aligment) must be set to VALIGN_4
502 * for all tiled Y Render Target surfaces."
503 *
504 * "VALIGN_4 is not supported for surface format R32G32B32_FLOAT."
505 */
Chia-I Wue46da3e2014-08-08 21:52:48 +0800506 if (intel_gpu_gen(layout->gpu) >= INTEL_GEN(7) && layout->block_size == 12)
Chia-I Wu4ea339e2014-08-08 21:56:26 +0800507 valid_tilings &= ~tile_y;
508 }
509
510 /* no conflicting binding flags */
511 assert(valid_tilings);
512
513 layout->valid_tilings = valid_tilings;
514
Chia-I Wue46da3e2014-08-08 21:52:48 +0800515 if (info->usage & (XGL_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |
516 XGL_IMAGE_USAGE_SHADER_ACCESS_READ_BIT)) {
Chia-I Wu4ea339e2014-08-08 21:56:26 +0800517 /*
518 * heuristically set a minimum width/height for enabling tiling
519 */
Chia-I Wue46da3e2014-08-08 21:52:48 +0800520 if (info->extent.width < 64 && (valid_tilings & ~tile_x))
Chia-I Wu4ea339e2014-08-08 21:56:26 +0800521 valid_tilings &= ~tile_x;
522
Chia-I Wue46da3e2014-08-08 21:52:48 +0800523 if ((info->extent.width < 32 || info->extent.height < 16) &&
524 (info->extent.width < 16 || info->extent.height < 32) &&
Chia-I Wu4ea339e2014-08-08 21:56:26 +0800525 (valid_tilings & ~tile_y))
526 valid_tilings &= ~tile_y;
527 }
528 else {
529 /* force linear if we are not sure where the texture is bound to */
530 if (valid_tilings & tile_none)
531 valid_tilings &= tile_none;
532 }
533
534 /* prefer tiled over linear */
535 if (valid_tilings & tile_y)
536 layout->tiling = INTEL_TILING_Y;
537 else if (valid_tilings & tile_x)
538 layout->tiling = INTEL_TILING_X;
539 else
540 layout->tiling = INTEL_TILING_NONE;
541}
542
543static void
544tex_layout_init_format(struct tex_layout *layout)
545{
Chia-I Wue46da3e2014-08-08 21:52:48 +0800546 const XGL_IMAGE_CREATE_INFO *info = layout->info;
547 XGL_FORMAT format = info->format;
Chia-I Wu4ea339e2014-08-08 21:56:26 +0800548
Chia-I Wue46da3e2014-08-08 21:52:48 +0800549 if (format.numericFormat == XGL_NUM_FMT_DS) {
550 switch (format.channelFormat) {
551 case XGL_CH_FMT_R32G8:
552 if (layout->separate_stencil)
553 format.channelFormat = XGL_CH_FMT_R32;
554 break;
555 default:
556 break;
557 }
Chia-I Wu4ea339e2014-08-08 21:56:26 +0800558 }
559
560 layout->format = format;
561
Chia-I Wue46da3e2014-08-08 21:52:48 +0800562 switch (format.channelFormat) {
563 case XGL_CH_FMT_BC1:
564 case XGL_CH_FMT_BC2:
565 case XGL_CH_FMT_BC3:
566 case XGL_CH_FMT_BC4:
567 case XGL_CH_FMT_BC5:
568 case XGL_CH_FMT_BC6U:
569 case XGL_CH_FMT_BC6S:
570 case XGL_CH_FMT_BC7:
571 layout->block_width = 4;
572 layout->block_height = 4;
573 layout->block_size =
574 (format.channelFormat == XGL_CH_FMT_BC1 ||
575 format.channelFormat == XGL_CH_FMT_BC4) ? 8 : 16;
576 layout->compressed = true;
577 break;
578 default:
579 layout->block_width = 1;
580 layout->block_height = 1;
581 layout->block_size = 1;
582 layout->compressed = false;
583 break;
584 }
Chia-I Wu4ea339e2014-08-08 21:56:26 +0800585}
586
587static void
588tex_layout_init_hiz(struct tex_layout *layout)
589{
Chia-I Wue46da3e2014-08-08 21:52:48 +0800590 const XGL_IMAGE_CREATE_INFO *info = layout->info;
Chia-I Wu4ea339e2014-08-08 21:56:26 +0800591
Chia-I Wue46da3e2014-08-08 21:52:48 +0800592 if (info->format.numericFormat == XGL_NUM_FMT_DS) {
593 switch (info->format.channelFormat) {
594 case XGL_CH_FMT_R32G8:
595 layout->has_depth = true;
596 layout->has_stencil = true;
597 break;
598 case XGL_CH_FMT_R32:
599 layout->has_depth = true;
600 break;
601 case XGL_CH_FMT_R8:
602 layout->has_stencil = true;
603 break;
604 default:
605 assert(!"unsupported DS format");
606 break;
607 }
608 }
Chia-I Wu4ea339e2014-08-08 21:56:26 +0800609
610 if (!layout->has_depth)
611 return;
612
613 layout->hiz = true;
614
Chia-I Wue46da3e2014-08-08 21:52:48 +0800615 if (intel_gpu_gen(layout->gpu) == INTEL_GEN(6)) {
Chia-I Wu4ea339e2014-08-08 21:56:26 +0800616 /*
617 * From the Sandy Bridge PRM, volume 2 part 1, page 312:
618 *
619 * "The hierarchical depth buffer does not support the LOD field, it
620 * is assumed by hardware to be zero. A separate hierarachical
621 * depth buffer is required for each LOD used, and the
622 * corresponding buffer's state delivered to hardware each time a
623 * new depth buffer state with modified LOD is delivered."
624 *
625 * But we have a stronger requirement. Because of layer offsetting
626 * (check out the callers of ilo_texture_get_slice_offset()), we already
627 * have to require the texture to be non-mipmapped and non-array.
628 */
Chia-I Wue46da3e2014-08-08 21:52:48 +0800629 if (info->mipLevels > 1 || info->arraySize > 1 || info->extent.depth > 1)
Chia-I Wu4ea339e2014-08-08 21:56:26 +0800630 layout->hiz = false;
631 }
632
Chia-I Wu4ea339e2014-08-08 21:56:26 +0800633 if (layout->has_stencil) {
634 /*
635 * From the Sandy Bridge PRM, volume 2 part 1, page 317:
636 *
637 * "This field (Separate Stencil Buffer Enable) must be set to the
638 * same value (enabled or disabled) as Hierarchical Depth Buffer
639 * Enable."
640 *
641 * GEN7+ requires separate stencil buffers.
642 */
Chia-I Wue46da3e2014-08-08 21:52:48 +0800643 if (intel_gpu_gen(layout->gpu) >= INTEL_GEN(7))
Chia-I Wu4ea339e2014-08-08 21:56:26 +0800644 layout->separate_stencil = true;
645 else
646 layout->separate_stencil = layout->hiz;
647
648 if (layout->separate_stencil)
649 layout->has_stencil = false;
650 }
651}
652
Chia-I Wue46da3e2014-08-08 21:52:48 +0800653static XGL_RESULT
Chia-I Wu4ea339e2014-08-08 21:56:26 +0800654tex_layout_init(struct tex_layout *layout,
Chia-I Wue46da3e2014-08-08 21:52:48 +0800655 const struct intel_gpu *gpu,
656 const XGL_IMAGE_CREATE_INFO *info,
657 struct intel_img_slice **slices)
Chia-I Wu4ea339e2014-08-08 21:56:26 +0800658{
Chia-I Wu4ea339e2014-08-08 21:56:26 +0800659 memset(layout, 0, sizeof(*layout));
660
Chia-I Wue46da3e2014-08-08 21:52:48 +0800661 layout->gpu = gpu;
662 layout->info = info;
Chia-I Wu4ea339e2014-08-08 21:56:26 +0800663
664 /* note that there are dependencies between these functions */
665 tex_layout_init_hiz(layout);
666 tex_layout_init_format(layout);
667 tex_layout_init_tiling(layout);
668 tex_layout_init_spacing(layout);
669 tex_layout_init_levels(layout);
670 tex_layout_init_alignments(layout);
671 tex_layout_init_qpitch(layout);
672
Chia-I Wu4ea339e2014-08-08 21:56:26 +0800673 if (slices) {
674 int lv;
675
Chia-I Wue46da3e2014-08-08 21:52:48 +0800676 for (lv = 0; lv < info->mipLevels; lv++)
Chia-I Wu4ea339e2014-08-08 21:56:26 +0800677 layout->levels[lv].slices = slices[lv];
678 }
679
Chia-I Wue46da3e2014-08-08 21:52:48 +0800680 return XGL_SUCCESS;
Chia-I Wu4ea339e2014-08-08 21:56:26 +0800681}
682
683static void
684tex_layout_align(struct tex_layout *layout)
685{
686 int align_w = 1, align_h = 1, pad_h = 0;
687
688 /*
689 * From the Sandy Bridge PRM, volume 1 part 1, page 118:
690 *
691 * "To determine the necessary padding on the bottom and right side of
692 * the surface, refer to the table in Section 7.18.3.4 for the i and j
693 * parameters for the surface format in use. The surface must then be
694 * extended to the next multiple of the alignment unit size in each
695 * dimension, and all texels contained in this extended surface must
696 * have valid GTT entries."
697 *
698 * "For cube surfaces, an additional two rows of padding are required
699 * at the bottom of the surface. This must be ensured regardless of
700 * whether the surface is stored tiled or linear. This is due to the
701 * potential rotation of cache line orientation from memory to cache."
702 *
703 * "For compressed textures (BC* and FXT1 surface formats), padding at
704 * the bottom of the surface is to an even compressed row, which is
705 * equal to a multiple of 8 uncompressed texel rows. Thus, for padding
706 * purposes, these surfaces behave as if j = 8 only for surface
707 * padding purposes. The value of 4 for j still applies for mip level
708 * alignment and QPitch calculation."
709 */
Chia-I Wue46da3e2014-08-08 21:52:48 +0800710 if (layout->info->usage & XGL_IMAGE_USAGE_SHADER_ACCESS_READ_BIT) {
711 if (align_w < layout->align_i)
712 align_w = layout->align_i;
713 if (align_h < layout->align_j)
714 align_h = layout->align_j;
Chia-I Wu4ea339e2014-08-08 21:56:26 +0800715
Chia-I Wue46da3e2014-08-08 21:52:48 +0800716 /* in case it is used as a cube */
717 if (layout->info->imageType == XGL_IMAGE_2D)
Chia-I Wu4ea339e2014-08-08 21:56:26 +0800718 pad_h += 2;
719
Chia-I Wue46da3e2014-08-08 21:52:48 +0800720 if (layout->compressed && align_h < 8)
721 align_h = 8;
Chia-I Wu4ea339e2014-08-08 21:56:26 +0800722 }
723
724 /*
725 * From the Sandy Bridge PRM, volume 1 part 1, page 118:
726 *
727 * "If the surface contains an odd number of rows of data, a final row
728 * below the surface must be allocated."
729 */
Chia-I Wue46da3e2014-08-08 21:52:48 +0800730 if (layout->info->usage & XGL_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) {
731 if (align_h < 2)
732 align_h = 2;
733 }
Chia-I Wu4ea339e2014-08-08 21:56:26 +0800734
735 /*
736 * Depth Buffer Clear/Resolve works in 8x4 sample blocks. In
737 * ilo_texture_can_enable_hiz(), we always return true for the first slice.
738 * To avoid out-of-bound access, we have to pad.
739 */
740 if (layout->hiz) {
Chia-I Wue46da3e2014-08-08 21:52:48 +0800741 if (align_w < 8)
742 align_w = 8;
743 if (align_h < 4)
744 align_h = 4;
Chia-I Wu4ea339e2014-08-08 21:56:26 +0800745 }
746
Chia-I Wue46da3e2014-08-08 21:52:48 +0800747 layout->width = u_align(layout->width, align_w);
748 layout->height = u_align(layout->height + pad_h, align_h);
Chia-I Wu4ea339e2014-08-08 21:56:26 +0800749}
750
751/**
752 * Layout a 2D texture.
753 */
754static void
755tex_layout_2d(struct tex_layout *layout)
756{
Chia-I Wue46da3e2014-08-08 21:52:48 +0800757 const XGL_IMAGE_CREATE_INFO *info = layout->info;
Chia-I Wu4ea339e2014-08-08 21:56:26 +0800758 unsigned int level_x, level_y, num_slices;
759 int lv;
760
761 level_x = 0;
762 level_y = 0;
Chia-I Wue46da3e2014-08-08 21:52:48 +0800763 for (lv = 0; lv < info->mipLevels; lv++) {
Chia-I Wu4ea339e2014-08-08 21:56:26 +0800764 const unsigned int level_w = layout->levels[lv].w;
765 const unsigned int level_h = layout->levels[lv].h;
766 int slice;
767
768 /* set slice offsets */
769 if (layout->levels[lv].slices) {
Chia-I Wue46da3e2014-08-08 21:52:48 +0800770 for (slice = 0; slice < info->arraySize; slice++) {
Chia-I Wu4ea339e2014-08-08 21:56:26 +0800771 layout->levels[lv].slices[slice].x = level_x;
772 /* slices are qpitch apart in Y-direction */
773 layout->levels[lv].slices[slice].y =
774 level_y + layout->qpitch * slice;
775 }
776 }
777
778 /* extend the size of the monolithic bo to cover this mip level */
779 if (layout->width < level_x + level_w)
780 layout->width = level_x + level_w;
781 if (layout->height < level_y + level_h)
782 layout->height = level_y + level_h;
783
784 /* MIPLAYOUT_BELOW */
785 if (lv == 1)
Chia-I Wue46da3e2014-08-08 21:52:48 +0800786 level_x += u_align(level_w, layout->align_i);
Chia-I Wu4ea339e2014-08-08 21:56:26 +0800787 else
Chia-I Wue46da3e2014-08-08 21:52:48 +0800788 level_y += u_align(level_h, layout->align_j);
Chia-I Wu4ea339e2014-08-08 21:56:26 +0800789 }
790
Chia-I Wue46da3e2014-08-08 21:52:48 +0800791 num_slices = info->arraySize;
Chia-I Wu4ea339e2014-08-08 21:56:26 +0800792 /* samples of the same index are stored in a slice */
Chia-I Wue46da3e2014-08-08 21:52:48 +0800793 if (info->samples > 1 && !layout->interleaved)
794 num_slices *= info->samples;
Chia-I Wu4ea339e2014-08-08 21:56:26 +0800795
796 /* we did not take slices into consideration in the computation above */
797 layout->height += layout->qpitch * (num_slices - 1);
798
799 tex_layout_align(layout);
800}
801
802/**
803 * Layout a 3D texture.
804 */
805static void
806tex_layout_3d(struct tex_layout *layout)
807{
Chia-I Wue46da3e2014-08-08 21:52:48 +0800808 const XGL_IMAGE_CREATE_INFO *info = layout->info;
Chia-I Wu4ea339e2014-08-08 21:56:26 +0800809 unsigned int level_y;
810 int lv;
811
812 level_y = 0;
Chia-I Wue46da3e2014-08-08 21:52:48 +0800813 for (lv = 0; lv < info->mipLevels; lv++) {
Chia-I Wu4ea339e2014-08-08 21:56:26 +0800814 const unsigned int level_w = layout->levels[lv].w;
815 const unsigned int level_h = layout->levels[lv].h;
816 const unsigned int level_d = layout->levels[lv].d;
Chia-I Wue46da3e2014-08-08 21:52:48 +0800817 const unsigned int slice_pitch = u_align(level_w, layout->align_i);
818 const unsigned int slice_qpitch = u_align(level_h, layout->align_j);
Chia-I Wu4ea339e2014-08-08 21:56:26 +0800819 const unsigned int num_slices_per_row = 1 << lv;
820 int slice;
821
822 for (slice = 0; slice < level_d; slice += num_slices_per_row) {
823 int i;
824
825 /* set slice offsets */
826 if (layout->levels[lv].slices) {
827 for (i = 0; i < num_slices_per_row && slice + i < level_d; i++) {
828 layout->levels[lv].slices[slice + i].x = slice_pitch * i;
829 layout->levels[lv].slices[slice + i].y = level_y;
830 }
831 }
832
833 /* move on to the next slice row */
834 level_y += slice_qpitch;
835 }
836
837 /* rightmost slice */
Chia-I Wue46da3e2014-08-08 21:52:48 +0800838 if (num_slices_per_row < level_d)
839 slice = num_slices_per_row - 1;
840 else
841 slice = level_d - 1;
Chia-I Wu4ea339e2014-08-08 21:56:26 +0800842
843 /* extend the size of the monolithic bo to cover this slice */
844 if (layout->width < slice_pitch * slice + level_w)
845 layout->width = slice_pitch * slice + level_w;
Chia-I Wue46da3e2014-08-08 21:52:48 +0800846 if (lv == info->mipLevels - 1)
Chia-I Wu4ea339e2014-08-08 21:56:26 +0800847 layout->height = (level_y - slice_qpitch) + level_h;
848 }
849
850 tex_layout_align(layout);
851}
852
853/* note that this may force the texture to be linear */
854static bool
855tex_layout_calculate_bo_size(struct tex_layout *layout)
856{
857 assert(layout->width % layout->block_width == 0);
858 assert(layout->height % layout->block_height == 0);
859 assert(layout->qpitch % layout->block_height == 0);
860
861 layout->bo_stride =
862 (layout->width / layout->block_width) * layout->block_size;
863 layout->bo_height = layout->height / layout->block_height;
864
865 while (true) {
866 int w = layout->bo_stride, h = layout->bo_height;
867 int align_w, align_h;
868
869 /*
870 * From the Haswell PRM, volume 5, page 163:
871 *
872 * "For linear surfaces, additional padding of 64 bytes is required
873 * at the bottom of the surface. This is in addition to the padding
874 * required above."
875 */
Chia-I Wue46da3e2014-08-08 21:52:48 +0800876 if (intel_gpu_gen(layout->gpu) >= INTEL_GEN(7.5) &&
877 (layout->info->usage & XGL_IMAGE_USAGE_SHADER_ACCESS_READ_BIT) &&
Chia-I Wu4ea339e2014-08-08 21:56:26 +0800878 layout->tiling == INTEL_TILING_NONE) {
879 layout->bo_height +=
880 (64 + layout->bo_stride - 1) / layout->bo_stride;
881 }
882
883 /*
884 * From the Sandy Bridge PRM, volume 4 part 1, page 81:
885 *
886 * "- For linear render target surfaces, the pitch must be a
887 * multiple of the element size for non-YUV surface formats.
888 * Pitch must be a multiple of 2 * element size for YUV surface
889 * formats.
890 * - For other linear surfaces, the pitch can be any multiple of
891 * bytes.
892 * - For tiled surfaces, the pitch must be a multiple of the tile
893 * width."
894 *
895 * Different requirements may exist when the bo is used in different
896 * places, but our alignments here should be good enough that we do not
Chia-I Wue46da3e2014-08-08 21:52:48 +0800897 * need to check layout->info->usage.
Chia-I Wu4ea339e2014-08-08 21:56:26 +0800898 */
899 switch (layout->tiling) {
900 case INTEL_TILING_X:
901 align_w = 512;
902 align_h = 8;
903 break;
904 case INTEL_TILING_Y:
905 align_w = 128;
906 align_h = 32;
907 break;
908 default:
Chia-I Wue46da3e2014-08-08 21:52:48 +0800909 if (layout->format.channelFormat == XGL_CH_FMT_R8 &&
910 layout->format.numericFormat == XGL_NUM_FMT_DS) {
Chia-I Wu4ea339e2014-08-08 21:56:26 +0800911 /*
912 * From the Sandy Bridge PRM, volume 1 part 2, page 22:
913 *
914 * "A 4KB tile is subdivided into 8-high by 8-wide array of
915 * Blocks for W-Major Tiles (W Tiles). Each Block is 8 rows by 8
916 * bytes."
917 *
918 * Since we asked for INTEL_TILING_NONE instead of the non-existent
919 * INTEL_TILING_W, we want to align to W tiles here.
920 */
921 align_w = 64;
922 align_h = 64;
923 }
924 else {
925 /* some good enough values */
926 align_w = 64;
927 align_h = 2;
928 }
929 break;
930 }
931
Chia-I Wue46da3e2014-08-08 21:52:48 +0800932 w = u_align(w, align_w);
933 h = u_align(h, align_h);
Chia-I Wu4ea339e2014-08-08 21:56:26 +0800934
935 /* make sure the bo is mappable */
936 if (layout->tiling != INTEL_TILING_NONE) {
937 /*
938 * Usually only the first 256MB of the GTT is mappable.
939 *
940 * See also how intel_context::max_gtt_map_object_size is calculated.
941 */
942 const size_t mappable_gtt_size = 256 * 1024 * 1024;
943
944 /*
945 * Be conservative. We may be able to switch from VALIGN_4 to
946 * VALIGN_2 if the layout was Y-tiled, but let's keep it simple.
947 */
948 if (mappable_gtt_size / w / 4 < h) {
949 if (layout->valid_tilings & (1 << INTEL_TILING_NONE)) {
950 layout->tiling = INTEL_TILING_NONE;
951 continue;
952 }
953 else {
Chia-I Wue46da3e2014-08-08 21:52:48 +0800954 /* send a warning? */
Chia-I Wu4ea339e2014-08-08 21:56:26 +0800955 }
956 }
957 }
958
959 layout->bo_stride = w;
960 layout->bo_height = h;
961 break;
962 }
963
Chia-I Wue46da3e2014-08-08 21:52:48 +0800964 return (layout->bo_height <= intel_max_resource_size / layout->bo_stride);
Chia-I Wu4ea339e2014-08-08 21:56:26 +0800965}
966
967static void
968tex_layout_calculate_hiz_size(struct tex_layout *layout)
969{
Chia-I Wue46da3e2014-08-08 21:52:48 +0800970 const XGL_IMAGE_CREATE_INFO *info = layout->info;
Chia-I Wu4ea339e2014-08-08 21:56:26 +0800971 const int hz_align_j = 8;
972 int hz_width, hz_height;
973
974 if (!layout->hiz)
975 return;
976
977 /*
978 * See the Sandy Bridge PRM, volume 2 part 1, page 312, and the Ivy Bridge
979 * PRM, volume 2 part 1, page 312-313.
980 *
981 * It seems HiZ buffer is aligned to 8x8, with every two rows packed into a
982 * memory row.
983 */
984
Chia-I Wue46da3e2014-08-08 21:52:48 +0800985 hz_width = u_align(layout->levels[0].w, 16);
Chia-I Wu4ea339e2014-08-08 21:56:26 +0800986
Chia-I Wue46da3e2014-08-08 21:52:48 +0800987 if (info->imageType == XGL_IMAGE_3D) {
Chia-I Wu4ea339e2014-08-08 21:56:26 +0800988 unsigned lv;
989
990 hz_height = 0;
991
Chia-I Wue46da3e2014-08-08 21:52:48 +0800992 for (lv = 0; lv < info->mipLevels; lv++) {
993 const unsigned h = u_align(layout->levels[lv].h, hz_align_j);
Chia-I Wu4ea339e2014-08-08 21:56:26 +0800994 hz_height += h * layout->levels[lv].d;
995 }
996
997 hz_height /= 2;
998 }
999 else {
Chia-I Wue46da3e2014-08-08 21:52:48 +08001000 const unsigned h0 = u_align(layout->levels[0].h, hz_align_j);
Chia-I Wu4ea339e2014-08-08 21:56:26 +08001001 unsigned hz_qpitch = h0;
1002
1003 if (layout->array_spacing_full) {
Chia-I Wue46da3e2014-08-08 21:52:48 +08001004 const unsigned h1 = u_align(layout->levels[1].h, hz_align_j);
Chia-I Wu4ea339e2014-08-08 21:56:26 +08001005 const unsigned htail =
Chia-I Wue46da3e2014-08-08 21:52:48 +08001006 ((intel_gpu_gen(layout->gpu) >= INTEL_GEN(7)) ? 12 : 11) * hz_align_j;
Chia-I Wu4ea339e2014-08-08 21:56:26 +08001007
1008 hz_qpitch += h1 + htail;
1009 }
1010
Chia-I Wue46da3e2014-08-08 21:52:48 +08001011 hz_height = hz_qpitch * info->arraySize / 2;
Chia-I Wu4ea339e2014-08-08 21:56:26 +08001012
Chia-I Wue46da3e2014-08-08 21:52:48 +08001013 if (intel_gpu_gen(layout->gpu) >= INTEL_GEN(7))
1014 hz_height = u_align(hz_height, 8);
Chia-I Wu4ea339e2014-08-08 21:56:26 +08001015 }
1016
1017 /* align to Y-tile */
Chia-I Wue46da3e2014-08-08 21:52:48 +08001018 layout->hiz_stride = u_align(hz_width, 128);
1019 layout->hiz_height = u_align(hz_height, 32);
Chia-I Wu4ea339e2014-08-08 21:56:26 +08001020}
Chia-I Wufeb441f2014-08-08 21:27:38 +08001021
1022static bool
1023img_alloc_slices(struct intel_img *img,
1024 XGL_UINT levels, XGL_INT depth,
1025 XGL_UINT array_size)
1026{
1027 struct intel_img_slice *slices;
1028 int total_depth, lv;
1029
1030 /* sum the depths of all levels */
1031 total_depth = 0;
1032 for (lv = 0; lv < levels; lv++)
1033 total_depth += u_minify(depth, lv);
1034
1035 /*
1036 * There are (depth * tex->base.array_size) slices in total. Either depth
1037 * is one (non-3D) or templ->array_size is one (non-array), but it does
1038 * not matter.
1039 */
1040 slices = icd_alloc(sizeof(*slices) * total_depth * array_size,
1041 0, XGL_SYSTEM_ALLOC_INTERNAL);
1042 if (!slices)
1043 return false;
1044
1045 img->slices[0] = slices;
1046
1047 /* point to the respective positions in the buffer */
1048 for (lv = 1; lv < levels; lv++) {
1049 img->slices[lv] = img->slices[lv - 1] +
1050 u_minify(depth, lv - 1) * array_size;
1051 }
1052
1053 return true;
1054}
1055
1056static void img_destroy(struct intel_obj *obj)
1057{
1058 struct intel_img *img = intel_img_from_obj(obj);
1059
1060 intel_img_destroy(img);
1061}
1062
1063static XGL_RESULT img_get_info(struct intel_base *base, int type,
1064 XGL_SIZE *size, XGL_VOID *data)
1065{
1066 struct intel_img *img = intel_img_from_base(base);
1067 XGL_RESULT ret = XGL_SUCCESS;
1068
1069 switch (type) {
1070 case XGL_INFO_TYPE_MEMORY_REQUIREMENTS:
1071 {
1072 XGL_MEMORY_REQUIREMENTS *mem_req = data;
1073
1074 mem_req->size = img->bo_stride * img->bo_height;
1075 mem_req->alignment = 4096;
1076 mem_req->heapCount = 1;
1077 mem_req->heaps[0] = 0;
1078
1079 *size = sizeof(*mem_req);
1080 }
1081 break;
1082 default:
1083 ret = intel_base_get_info(base, type, size, data);
1084 break;
1085 }
1086
1087 return ret;
1088}
1089
1090XGL_RESULT intel_img_create(struct intel_dev *dev,
1091 const XGL_IMAGE_CREATE_INFO *info,
1092 struct intel_img **img_ret)
1093{
1094 struct tex_layout layout;
1095 struct intel_img *img;
1096 XGL_RESULT ret;
1097
1098 img = (struct intel_img *) intel_base_create(sizeof(*img),
1099 dev->base.dbg, XGL_DBG_OBJECT_IMAGE, info, 0);
1100 if (!img)
1101 return XGL_ERROR_OUT_OF_MEMORY;
1102
1103 if (!img_alloc_slices(img, info->mipLevels, info->extent.depth,
1104 info->arraySize)) {
1105 intel_img_destroy(img);
1106 return XGL_ERROR_OUT_OF_MEMORY;
1107 }
1108
1109 ret = tex_layout_init(&layout, dev->gpu, info, img->slices);
1110 if (ret != XGL_SUCCESS) {
1111 intel_img_destroy(img);
1112 return ret;
1113 }
1114
1115 if (info->imageType == XGL_IMAGE_3D)
1116 tex_layout_3d(&layout);
1117 else
1118 tex_layout_2d(&layout);
1119
1120 if (!tex_layout_calculate_bo_size(&layout)) {
1121 intel_img_destroy(img);
1122 return XGL_ERROR_INVALID_MEMORY_SIZE;
1123 }
1124
1125 tex_layout_calculate_hiz_size(&layout);
1126
1127 /* TODO */
1128 if (layout.hiz || layout.separate_stencil) {
1129 intel_dev_log(dev, XGL_DBG_MSG_ERROR, XGL_VALIDATION_LEVEL_0,
1130 XGL_NULL_HANDLE, 0, 0, "HiZ or separate stencil enabled");
1131 intel_img_destroy(img);
1132 return XGL_ERROR_INVALID_MEMORY_SIZE;
1133 }
1134
1135 img->bo_format = layout.format;
1136 img->tiling = layout.tiling;
1137 img->bo_stride = layout.bo_stride;
1138 img->bo_height = layout.bo_height;
1139 img->block_width = layout.block_width;
1140 img->block_height = layout.block_height;
1141 img->block_size = layout.block_size;
1142 img->halign_8 = (layout.align_i == 8);
1143 img->valign_4 = (layout.align_j == 4);
1144 img->array_spacing_full = layout.array_spacing_full;
1145 img->interleaved = layout.interleaved;
1146
1147 img->obj.destroy = img_destroy;
1148 img->obj.base.get_info = img_get_info;
1149
1150 *img_ret = img;
1151
1152 return XGL_SUCCESS;
1153}
1154
1155void intel_img_destroy(struct intel_img *img)
1156{
1157 if (img->slices[0])
1158 icd_free(img->slices[0]);
1159
1160 intel_base_destroy(&img->obj.base);
1161}
1162
1163XGL_RESULT XGLAPI intelCreateImage(
1164 XGL_DEVICE device,
1165 const XGL_IMAGE_CREATE_INFO* pCreateInfo,
1166 XGL_IMAGE* pImage)
1167{
1168 struct intel_dev *dev = intel_dev(device);
1169
1170 return intel_img_create(dev, pCreateInfo, (struct intel_img **) pImage);
1171}
1172
1173XGL_RESULT XGLAPI intelGetImageSubresourceInfo(
1174 XGL_IMAGE image,
1175 const XGL_IMAGE_SUBRESOURCE* pSubresource,
1176 XGL_SUBRESOURCE_INFO_TYPE infoType,
1177 XGL_SIZE* pDataSize,
1178 XGL_VOID* pData)
1179{
1180 const struct intel_img *img = intel_img(image);
1181 XGL_RESULT ret = XGL_SUCCESS;
1182
1183 switch (infoType) {
1184 case XGL_INFO_TYPE_SUBRESOURCE_LAYOUT:
1185 {
1186 XGL_SUBRESOURCE_LAYOUT *layout = (XGL_SUBRESOURCE_LAYOUT *) pData;
1187 const struct intel_img_slice *slice =
1188 &img->slices[pSubresource->mipLevel][pSubresource->arraySlice];
1189 const unsigned int bx = slice->x / img->block_width;
1190 const unsigned int by = slice->y / img->block_height;
1191
1192 *pDataSize = sizeof(XGL_SUBRESOURCE_LAYOUT);
1193
1194 /*
1195 * size is not readily available and depthPitch might not be
1196 * available. Leave them alone for now.
1197 */
1198 layout->offset = by * img->bo_stride + bx * img->block_size;
1199 layout->size = 0;
1200 layout->rowPitch = img->bo_stride;
1201 layout->depthPitch = 0;
1202 }
1203 break;
1204 default:
1205 ret = XGL_ERROR_INVALID_VALUE;
1206 break;
1207 }
1208
1209 return ret;
1210}