Blame - icd/intel/layout.c - platform/external/vulkan-validation-layers

blob: de1f41ebfbdfa06f12f82830192c4bb536da5a30 [file] [log] [blame]

Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1	/*
Chia-I Wu	44e4236	2014-09-02 08:32:09 +0800	[diff] [blame]	2	* XGL
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	3	*
				4	* Copyright (C) 2014 LunarG, Inc.
				5	*
				6	* Permission is hereby granted, free of charge, to any person obtaining a
				7	* copy of this software and associated documentation files (the "Software"),
				8	* to deal in the Software without restriction, including without limitation
				9	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
				10	* and/or sell copies of the Software, and to permit persons to whom the
				11	* Software is furnished to do so, subject to the following conditions:
				12	*
				13	* The above copyright notice and this permission notice shall be included
				14	* in all copies or substantial portions of the Software.
				15	*
				16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
				19	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
				21	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
				22	* DEALINGS IN THE SOFTWARE.
				23	*
				24	* Authors:
Chia-I Wu	44e4236	2014-09-02 08:32:09 +0800	[diff] [blame]	25	* Chia-I Wu <olv@lunarg.com>
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	26	*/
				27
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	28	#include "dev.h"
Chia-I Wu	1bf06df	2014-08-16 12:33:13 +0800	[diff] [blame]	29	#include "format.h"
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	30	#include "gpu.h"
				31	#include "layout.h"
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	32
				33	enum {
				34	LAYOUT_TILING_NONE = 1 << INTEL_TILING_NONE,
				35	LAYOUT_TILING_X = 1 << INTEL_TILING_X,
				36	LAYOUT_TILING_Y = 1 << INTEL_TILING_Y,
				37	LAYOUT_TILING_W = 1 << (INTEL_TILING_Y + 1),
				38
				39	LAYOUT_TILING_ALL = (LAYOUT_TILING_NONE \|
				40	LAYOUT_TILING_X \|
				41	LAYOUT_TILING_Y \|
				42	LAYOUT_TILING_W)
				43	};
				44
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	45	struct intel_layout_params {
				46	const struct intel_gpu *gpu;
				47	const XGL_IMAGE_CREATE_INFO *info;
Chia-I Wu	794d12a	2014-09-15 14:55:25 +0800	[diff] [blame^]	48	bool scanout;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	49
				50	bool compressed;
				51
				52	unsigned h0, h1;
				53	unsigned max_x, max_y;
				54	};
				55
				56	static void
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	57	layout_get_slice_size(const struct intel_layout *layout,
				58	const struct intel_layout_params *params,
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	59	unsigned level, unsigned width, unsigned height)
				60	{
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	61	const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	62	unsigned w, h;
				63
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	64	w = u_minify(layout->width0, level);
				65	h = u_minify(layout->height0, level);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	66
				67	/*
				68	* From the Sandy Bridge PRM, volume 1 part 1, page 114:
				69	*
				70	* "The dimensions of the mip maps are first determined by applying the
				71	* sizing algorithm presented in Non-Power-of-Two Mipmaps above. Then,
				72	* if necessary, they are padded out to compression block boundaries."
				73	*/
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	74	w = u_align(w, layout->block_width);
				75	h = u_align(h, layout->block_height);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	76
				77	/*
				78	* From the Sandy Bridge PRM, volume 1 part 1, page 111:
				79	*
				80	* "If the surface is multisampled (4x), these values must be adjusted
				81	* as follows before proceeding:
				82	*
				83	* W_L = ceiling(W_L / 2) * 4
				84	* H_L = ceiling(H_L / 2) * 4"
				85	*
				86	* From the Ivy Bridge PRM, volume 1 part 1, page 108:
				87	*
				88	* "If the surface is multisampled and it is a depth or stencil surface
				89	* or Multisampled Surface StorageFormat in SURFACE_STATE is
				90	* MSFMT_DEPTH_STENCIL, W_L and H_L must be adjusted as follows before
				91	* proceeding:
				92	*
				93	* #samples W_L = H_L =
				94	* 2 ceiling(W_L / 2) * 4 HL [no adjustment]
				95	* 4 ceiling(W_L / 2) * 4 ceiling(H_L / 2) * 4
				96	* 8 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 4
				97	* 16 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 8"
				98	*
				99	* For interleaved samples (4x), where pixels
				100	*
				101	* (x, y ) (x+1, y )
				102	* (x, y+1) (x+1, y+1)
				103	*
				104	* would be is occupied by
				105	*
				106	* (x, y , si0) (x+1, y , si0) (x, y , si1) (x+1, y , si1)
				107	* (x, y+1, si0) (x+1, y+1, si0) (x, y+1, si1) (x+1, y+1, si1)
				108	* (x, y , si2) (x+1, y , si2) (x, y , si3) (x+1, y , si3)
				109	* (x, y+1, si2) (x+1, y+1, si2) (x, y+1, si3) (x+1, y+1, si3)
				110	*
				111	* Thus the need to
				112	*
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	113	* w = align(w, 2) * 2;
				114	* y = align(y, 2) * 2;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	115	*/
				116	if (layout->interleaved_samples) {
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	117	switch (info->samples) {
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	118	case 0:
				119	case 1:
				120	break;
				121	case 2:
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	122	w = u_align(w, 2) * 2;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	123	break;
				124	case 4:
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	125	w = u_align(w, 2) * 2;
				126	h = u_align(h, 2) * 2;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	127	break;
				128	case 8:
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	129	w = u_align(w, 2) * 4;
				130	h = u_align(h, 2) * 2;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	131	break;
				132	case 16:
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	133	w = u_align(w, 2) * 4;
				134	h = u_align(h, 2) * 4;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	135	break;
				136	default:
				137	assert(!"unsupported sample count");
				138	break;
				139	}
				140	}
				141
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	142	/*
				143	* From the Ivy Bridge PRM, volume 1 part 1, page 108:
				144	*
				145	* "For separate stencil buffer, the width must be mutiplied by 2 and
				146	* height divided by 2..."
				147	*
				148	* To make things easier (for transfer), we will just double the stencil
				149	* stride in 3DSTATE_STENCIL_BUFFER.
				150	*/
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	151	w = u_align(w, layout->align_i);
				152	h = u_align(h, layout->align_j);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	153
				154	*width = w;
				155	*height = h;
				156	}
				157
				158	static unsigned
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	159	layout_get_num_layers(const struct intel_layout *layout,
				160	const struct intel_layout_params *params)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	161	{
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	162	const XGL_IMAGE_CREATE_INFO *info = params->info;
				163	unsigned num_layers = info->arraySize;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	164
				165	/* samples of the same index are stored in a layer */
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	166	if (info->samples > 1 && !layout->interleaved_samples)
				167	num_layers *= info->samples;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	168
				169	return num_layers;
				170	}
				171
				172	static void
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	173	layout_init_layer_height(struct intel_layout *layout,
				174	struct intel_layout_params *params)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	175	{
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	176	const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	177	unsigned num_layers;
				178
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	179	if (layout->walk != INTEL_LAYOUT_WALK_LAYER)
				180	return;
				181
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	182	num_layers = layout_get_num_layers(layout, params);
				183	if (num_layers <= 1)
				184	return;
				185
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	186	/*
				187	* From the Sandy Bridge PRM, volume 1 part 1, page 115:
				188	*
				189	* "The following equation is used for surface formats other than
				190	* compressed textures:
				191	*
				192	* QPitch = (h0 + h1 + 11j)"
				193	*
				194	* "The equation for compressed textures (BC* and FXT1 surface formats)
				195	* follows:
				196	*
				197	* QPitch = (h0 + h1 + 11j) / 4"
				198	*
				199	* "[DevSNB] Errata: Sampler MSAA Qpitch will be 4 greater than the
				200	* value calculated in the equation above, for every other odd Surface
				201	* Height starting from 1 i.e. 1,5,9,13"
				202	*
				203	* From the Ivy Bridge PRM, volume 1 part 1, page 111-112:
				204	*
				205	* "If Surface Array Spacing is set to ARYSPC_FULL (note that the depth
				206	* buffer and stencil buffer have an implied value of ARYSPC_FULL):
				207	*
				208	* QPitch = (h0 + h1 + 12j)
				209	* QPitch = (h0 + h1 + 12j) / 4 (compressed)
				210	*
				211	* (There are many typos or missing words here...)"
				212	*
				213	* To access the N-th slice, an offset of (Stride * QPitch * N) is added to
				214	* the base address. The PRM divides QPitch by 4 for compressed formats
				215	* because the block height for those formats are 4, and it wants QPitch to
				216	* mean the number of memory rows, as opposed to texel rows, between
				217	* slices. Since we use texel rows everywhere, we do not need to divide
				218	* QPitch by 4.
				219	*/
				220	layout->layer_height = params->h0 + params->h1 +
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	221	((intel_gpu_gen(params->gpu) >= INTEL_GEN(7)) ? 12 : 11) * layout->align_j;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	222
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	223	if (intel_gpu_gen(params->gpu) == INTEL_GEN(6) && info->samples > 1 &&
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	224	layout->height0 % 4 == 1)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	225	layout->layer_height += 4;
				226
				227	params->max_y += layout->layer_height * (num_layers - 1);
				228	}
				229
				230	static void
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	231	layout_init_lods(struct intel_layout *layout,
				232	struct intel_layout_params *params)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	233	{
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	234	const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	235	unsigned cur_x, cur_y;
				236	unsigned lv;
				237
				238	cur_x = 0;
				239	cur_y = 0;
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	240	for (lv = 0; lv < info->mipLevels; lv++) {
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	241	unsigned lod_w, lod_h;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	242
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	243	layout_get_slice_size(layout, params, lv, &lod_w, &lod_h);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	244
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	245	layout->lods[lv].x = cur_x;
				246	layout->lods[lv].y = cur_y;
				247	layout->lods[lv].slice_width = lod_w;
				248	layout->lods[lv].slice_height = lod_h;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	249
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	250	switch (layout->walk) {
				251	case INTEL_LAYOUT_WALK_LOD:
				252	lod_h *= layout_get_num_layers(layout, params);
				253	if (lv == 1)
				254	cur_x += lod_w;
				255	else
				256	cur_y += lod_h;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	257
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	258	/* every LOD begins at tile boundaries */
				259	if (info->mipLevels > 1) {
				260	intel_format_is_stencil(params->gpu, layout->format);
				261	cur_x = u_align(cur_x, 64);
				262	cur_y = u_align(cur_y, 64);
				263	}
				264	break;
				265	case INTEL_LAYOUT_WALK_LAYER:
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	266	/* MIPLAYOUT_BELOW */
				267	if (lv == 1)
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	268	cur_x += lod_w;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	269	else
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	270	cur_y += lod_h;
				271	break;
				272	case INTEL_LAYOUT_WALK_3D:
				273	{
				274	const unsigned num_slices = u_minify(info->extent.depth, lv);
				275	const unsigned num_slices_per_row = 1 << lv;
				276	const unsigned num_rows =
				277	(num_slices + num_slices_per_row - 1) / num_slices_per_row;
				278
				279	lod_w *= num_slices_per_row;
				280	lod_h *= num_rows;
				281
				282	cur_y += lod_h;
				283	}
				284	break;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	285	}
				286
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	287	if (params->max_x < layout->lods[lv].x + lod_w)
				288	params->max_x = layout->lods[lv].x + lod_w;
				289	if (params->max_y < layout->lods[lv].y + lod_h)
				290	params->max_y = layout->lods[lv].y + lod_h;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	291	}
				292
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	293	if (layout->walk == INTEL_LAYOUT_WALK_LAYER) {
				294	params->h0 = layout->lods[0].slice_height;
				295
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	296	if (info->mipLevels > 1)
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	297	params->h1 = layout->lods[1].slice_height;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	298	else
				299	layout_get_slice_size(layout, params, 1, &cur_x, &params->h1);
				300	}
				301	}
				302
				303	static void
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	304	layout_init_alignments(struct intel_layout *layout,
				305	struct intel_layout_params *params)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	306	{
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	307	const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	308
				309	/*
				310	* From the Sandy Bridge PRM, volume 1 part 1, page 113:
				311	*
				312	* "surface format align_i align_j
				313	* YUV 4:2:2 formats 4 *see below
				314	* BC1-5 4 4
				315	* FXT1 8 4
				316	* all other formats 4 *see below"
				317	*
				318	* "- align_j = 4 for any depth buffer
				319	* - align_j = 2 for separate stencil buffer
				320	* - align_j = 4 for any render target surface is multisampled (4x)
				321	* - align_j = 4 for any render target surface with Surface Vertical
				322	* Alignment = VALIGN_4
				323	* - align_j = 2 for any render target surface with Surface Vertical
				324	* Alignment = VALIGN_2
				325	* - align_j = 2 for all other render target surface
				326	* - align_j = 2 for any sampling engine surface with Surface Vertical
				327	* Alignment = VALIGN_2
				328	* - align_j = 4 for any sampling engine surface with Surface Vertical
				329	* Alignment = VALIGN_4"
				330	*
				331	* From the Sandy Bridge PRM, volume 4 part 1, page 86:
				332	*
				333	* "This field (Surface Vertical Alignment) must be set to VALIGN_2 if
				334	* the Surface Format is 96 bits per element (BPE)."
				335	*
				336	* They can be rephrased as
				337	*
				338	* align_i align_j
				339	* compressed formats block width block height
				340	* PIPE_FORMAT_S8_UINT 4 2
				341	* other depth/stencil formats 4 4
				342	* 4x multisampled 4 4
				343	* bpp 96 4 2
				344	* others 4 2 or 4
				345	*/
				346
				347	/*
				348	* From the Ivy Bridge PRM, volume 1 part 1, page 110:
				349	*
				350	* "surface defined by surface format align_i align_j
				351	* 3DSTATE_DEPTH_BUFFER D16_UNORM 8 4
				352	* not D16_UNORM 4 4
				353	* 3DSTATE_STENCIL_BUFFER N/A 8 8
				354	* SURFACE_STATE BC, ETC, EAC* 4 4
				355	* FXT1 8 4
				356	* all others (set by SURFACE_STATE)"
				357	*
				358	* From the Ivy Bridge PRM, volume 4 part 1, page 63:
				359	*
				360	* "- This field (Surface Vertical Aligment) is intended to be set to
				361	* VALIGN_4 if the surface was rendered as a depth buffer, for a
				362	* multisampled (4x) render target, or for a multisampled (8x)
				363	* render target, since these surfaces support only alignment of 4.
				364	* - Use of VALIGN_4 for other surfaces is supported, but uses more
				365	* memory.
				366	* - This field must be set to VALIGN_4 for all tiled Y Render Target
				367	* surfaces.
				368	* - Value of 1 is not supported for format YCRCB_NORMAL (0x182),
				369	* YCRCB_SWAPUVY (0x183), YCRCB_SWAPUV (0x18f), YCRCB_SWAPY (0x190)
				370	* - If Number of Multisamples is not MULTISAMPLECOUNT_1, this field
				371	* must be set to VALIGN_4."
				372	* - VALIGN_4 is not supported for surface format R32G32B32_FLOAT."
				373	*
				374	* "- This field (Surface Horizontal Aligment) is intended to be set to
				375	* HALIGN_8 only if the surface was rendered as a depth buffer with
				376	* Z16 format or a stencil buffer, since these surfaces support only
				377	* alignment of 8.
				378	* - Use of HALIGN_8 for other surfaces is supported, but uses more
				379	* memory.
				380	* - This field must be set to HALIGN_4 if the Surface Format is BC*.
				381	* - This field must be set to HALIGN_8 if the Surface Format is
				382	* FXT1."
				383	*
				384	* They can be rephrased as
				385	*
				386	* align_i align_j
				387	* compressed formats block width block height
				388	* PIPE_FORMAT_Z16_UNORM 8 4
				389	* PIPE_FORMAT_S8_UINT 8 8
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	390	* other depth/stencil formats 4 4
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	391	* 2x or 4x multisampled 4 or 8 4
				392	* tiled Y 4 or 8 4 (if rt)
				393	* PIPE_FORMAT_R32G32B32_FLOAT 4 or 8 2
				394	* others 4 or 8 2 or 4
				395	*/
				396
				397	if (params->compressed) {
				398	/* this happens to be the case */
				399	layout->align_i = layout->block_width;
				400	layout->align_j = layout->block_height;
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	401	} else if (info->usage & XGL_IMAGE_USAGE_DEPTH_STENCIL_BIT) {
				402	if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7)) {
				403	switch (layout->format.channelFormat) {
				404	case XGL_CH_FMT_R16:
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	405	layout->align_i = 8;
				406	layout->align_j = 4;
				407	break;
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	408	case XGL_CH_FMT_R8:
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	409	layout->align_i = 8;
				410	layout->align_j = 8;
				411	break;
				412	default:
				413	layout->align_i = 4;
				414	layout->align_j = 4;
				415	break;
				416	}
				417	} else {
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	418	switch (layout->format.channelFormat) {
				419	case XGL_CH_FMT_R8:
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	420	layout->align_i = 4;
				421	layout->align_j = 2;
				422	break;
				423	default:
				424	layout->align_i = 4;
				425	layout->align_j = 4;
				426	break;
				427	}
				428	}
				429	} else {
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	430	const bool valign_4 = (info->samples > 1) \|\|
				431	(intel_gpu_gen(params->gpu) >= INTEL_GEN(7) &&
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	432	layout->tiling == INTEL_TILING_Y &&
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	433	(info->usage & XGL_IMAGE_USAGE_COLOR_ATTACHMENT_BIT));
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	434
				435	if (valign_4)
				436	assert(layout->block_size != 12);
				437
				438	layout->align_i = 4;
				439	layout->align_j = (valign_4) ? 4 : 2;
				440	}
				441
				442	/*
				443	* the fact that align i and j are multiples of block width and height
				444	* respectively is what makes the size of the bo a multiple of the block
				445	* size, slices start at block boundaries, and many of the computations
				446	* work.
				447	*/
				448	assert(layout->align_i % layout->block_width == 0);
				449	assert(layout->align_j % layout->block_height == 0);
				450
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	451	/* make sure u_align() works */
				452	assert(u_is_pow2(layout->align_i) &&
				453	u_is_pow2(layout->align_j));
				454	assert(u_is_pow2(layout->block_width) &&
				455	u_is_pow2(layout->block_height));
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	456	}
				457
				458	static unsigned
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	459	layout_get_valid_tilings(const struct intel_layout *layout,
				460	const struct intel_layout_params *params)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	461	{
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	462	const XGL_IMAGE_CREATE_INFO *info = params->info;
				463	const XGL_FORMAT format = layout->format;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	464	unsigned valid_tilings = LAYOUT_TILING_ALL;
				465
Chia-I Wu	794d12a	2014-09-15 14:55:25 +0800	[diff] [blame^]	466	/*
				467	* From the Sandy Bridge PRM, volume 1 part 2, page 32:
				468	*
				469	* "Display/Overlay Y-Major not supported.
				470	* X-Major required for Async Flips"
				471	*/
				472	if (params->scanout)
				473	valid_tilings &= LAYOUT_TILING_X;
				474
Chia-I Wu	6ac9399	2014-08-30 18:23:28 +0800	[diff] [blame]	475	if (info->tiling == XGL_LINEAR_TILING)
				476	valid_tilings &= LAYOUT_TILING_NONE;
				477
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	478	/*
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	479	* From the Sandy Bridge PRM, volume 2 part 1, page 318:
				480	*
				481	* "[DevSNB+]: This field (Tiled Surface) must be set to TRUE. Linear
				482	* Depth Buffer is not supported."
				483	*
				484	* "The Depth Buffer, if tiled, must use Y-Major tiling."
				485	*
				486	* From the Sandy Bridge PRM, volume 1 part 2, page 22:
				487	*
				488	* "W-Major Tile Format is used for separate stencil."
				489	*/
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	490	if (info->usage & XGL_IMAGE_USAGE_DEPTH_STENCIL_BIT) {
				491	switch (format.channelFormat) {
				492	case XGL_CH_FMT_R8:
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	493	valid_tilings &= LAYOUT_TILING_W;
				494	break;
				495	default:
				496	valid_tilings &= LAYOUT_TILING_Y;
				497	break;
				498	}
				499	}
				500
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	501	if (info->usage & XGL_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) {
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	502	/*
				503	* From the Sandy Bridge PRM, volume 1 part 2, page 32:
				504	*
				505	* "NOTE: 128BPE Format Color buffer ( render target ) MUST be
				506	* either TileX or Linear."
				507	*/
				508	if (layout->block_size == 16)
				509	valid_tilings &= ~LAYOUT_TILING_Y;
				510
				511	/*
				512	* From the Ivy Bridge PRM, volume 4 part 1, page 63:
				513	*
				514	* "This field (Surface Vertical Aligment) must be set to VALIGN_4
				515	* for all tiled Y Render Target surfaces."
				516	*
				517	* "VALIGN_4 is not supported for surface format R32G32B32_FLOAT."
				518	*/
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	519	if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7) && layout->block_size == 12)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	520	valid_tilings &= ~LAYOUT_TILING_Y;
				521	}
				522
				523	/* no conflicting binding flags */
				524	assert(valid_tilings);
				525
				526	return valid_tilings;
				527	}
				528
				529	static void
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	530	layout_init_tiling(struct intel_layout *layout,
				531	struct intel_layout_params *params)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	532	{
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	533	const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	534	unsigned valid_tilings = layout_get_valid_tilings(layout, params);
				535
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	536	/* no hardware support for W-tile */
				537	if (valid_tilings & LAYOUT_TILING_W)
				538	valid_tilings = (valid_tilings & ~LAYOUT_TILING_W) \| LAYOUT_TILING_NONE;
				539
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	540	layout->valid_tilings = valid_tilings;
				541
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	542	if (info->usage & (XGL_IMAGE_USAGE_COLOR_ATTACHMENT_BIT \|
				543	XGL_IMAGE_USAGE_SHADER_ACCESS_READ_BIT)) {
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	544	/*
				545	* heuristically set a minimum width/height for enabling tiling
				546	*/
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	547	if (layout->width0 < 64 && (valid_tilings & ~LAYOUT_TILING_X))
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	548	valid_tilings &= ~LAYOUT_TILING_X;
				549
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	550	if ((layout->width0 < 32 \|\| layout->height0 < 16) &&
				551	(layout->width0 < 16 \|\| layout->height0 < 32) &&
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	552	(valid_tilings & ~LAYOUT_TILING_Y))
				553	valid_tilings &= ~LAYOUT_TILING_Y;
				554	} else {
				555	/* force linear if we are not sure where the texture is bound to */
				556	if (valid_tilings & LAYOUT_TILING_NONE)
				557	valid_tilings &= LAYOUT_TILING_NONE;
				558	}
				559
				560	/* prefer tiled over linear */
				561	if (valid_tilings & LAYOUT_TILING_Y)
				562	layout->tiling = INTEL_TILING_Y;
				563	else if (valid_tilings & LAYOUT_TILING_X)
				564	layout->tiling = INTEL_TILING_X;
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	565	else
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	566	layout->tiling = INTEL_TILING_NONE;
				567	}
				568
				569	static void
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	570	layout_init_walk_gen7(struct intel_layout *layout,
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	571	struct intel_layout_params *params)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	572	{
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	573	const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	574
				575	/*
				576	* It is not explicitly states, but render targets are expected to be
				577	* UMS/CMS (samples non-interleaved) and depth/stencil buffers are expected
				578	* to be IMS (samples interleaved).
				579	*
				580	* See "Multisampled Surface Storage Format" field of SURFACE_STATE.
				581	*/
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	582	if (info->usage & XGL_IMAGE_USAGE_DEPTH_STENCIL_BIT) {
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	583	/*
				584	* From the Ivy Bridge PRM, volume 1 part 1, page 111:
				585	*
				586	* "note that the depth buffer and stencil buffer have an implied
				587	* value of ARYSPC_FULL"
				588	*/
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	589	layout->walk = (info->imageType == XGL_IMAGE_3D) ?
				590	INTEL_LAYOUT_WALK_3D : INTEL_LAYOUT_WALK_LAYER;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	591
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	592	layout->interleaved_samples = true;
				593	} else {
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	594	/*
				595	* From the Ivy Bridge PRM, volume 4 part 1, page 66:
				596	*
				597	* "If Multisampled Surface Storage Format is MSFMT_MSS and Number
				598	* of Multisamples is not MULTISAMPLECOUNT_1, this field (Surface
				599	* Array Spacing) must be set to ARYSPC_LOD0."
				600	*
				601	* As multisampled resources are not mipmapped, we never use
				602	* ARYSPC_FULL for them.
				603	*/
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	604	if (info->samples > 1)
				605	assert(info->mipLevels == 1);
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	606
				607	layout->walk =
				608	(info->imageType == XGL_IMAGE_3D) ? INTEL_LAYOUT_WALK_3D :
				609	(info->mipLevels > 1) ? INTEL_LAYOUT_WALK_LAYER :
				610	INTEL_LAYOUT_WALK_LOD;
				611
				612	layout->interleaved_samples = false;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	613	}
				614	}
				615
				616	static void
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	617	layout_init_walk_gen6(struct intel_layout *layout,
				618	struct intel_layout_params *params)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	619	{
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	620	/*
				621	* From the Sandy Bridge PRM, volume 1 part 1, page 115:
				622	*
				623	* "The separate stencil buffer does not support mip mapping, thus the
				624	* storage for LODs other than LOD 0 is not needed. The following
				625	* QPitch equation applies only to the separate stencil buffer:
				626	*
				627	* QPitch = h_0"
				628	*
				629	* GEN6 does not support compact spacing otherwise.
				630	*/
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	631	layout->walk =
				632	(params->info->imageType == XGL_IMAGE_3D) ? INTEL_LAYOUT_WALK_3D :
				633	intel_format_is_stencil(params->gpu, layout->format) ? INTEL_LAYOUT_WALK_LOD :
				634	INTEL_LAYOUT_WALK_LAYER;
				635
				636	/* GEN6 supports only interleaved samples */
				637	layout->interleaved_samples = true;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	638	}
				639
				640	static void
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	641	layout_init_walk(struct intel_layout *layout,
				642	struct intel_layout_params *params)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	643	{
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	644	if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7))
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	645	layout_init_walk_gen7(layout, params);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	646	else
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	647	layout_init_walk_gen6(layout, params);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	648	}
				649
				650	static void
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	651	layout_init_size_and_format(struct intel_layout *layout,
				652	struct intel_layout_params *params)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	653	{
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	654	const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	655	XGL_FORMAT format = info->format;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	656	bool require_separate_stencil;
				657
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	658	layout->width0 = info->extent.width;
				659	layout->height0 = info->extent.height;
				660
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	661	/*
				662	* From the Sandy Bridge PRM, volume 2 part 1, page 317:
				663	*
				664	* "This field (Separate Stencil Buffer Enable) must be set to the same
				665	* value (enabled or disabled) as Hierarchical Depth Buffer Enable."
				666	*
				667	* GEN7+ requires separate stencil buffers.
				668	*/
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	669	if (info->usage & XGL_IMAGE_USAGE_DEPTH_STENCIL_BIT) {
				670	if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7))
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	671	require_separate_stencil = true;
				672	else
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	673	require_separate_stencil = (layout->aux == INTEL_LAYOUT_AUX_HIZ);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	674	}
				675
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	676	if (format.numericFormat == XGL_NUM_FMT_DS) {
				677	switch (format.channelFormat) {
				678	case XGL_CH_FMT_R32G8:
				679	if (require_separate_stencil) {
				680	format.channelFormat = XGL_CH_FMT_R32;
				681	layout->separate_stencil = true;
				682	}
				683	break;
				684	default:
				685	break;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	686	}
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	687	}
				688
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	689	layout->format = format;
Chia-I Wu	1bf06df	2014-08-16 12:33:13 +0800	[diff] [blame]	690	layout->block_width = icd_format_get_block_width(format);
				691	layout->block_height = layout->block_width;
				692	layout->block_size = icd_format_get_size(format);
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	693
Chia-I Wu	1bf06df	2014-08-16 12:33:13 +0800	[diff] [blame]	694	params->compressed = icd_format_is_compressed(format);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	695	}
				696
				697	static bool
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	698	layout_want_mcs(struct intel_layout *layout,
				699	struct intel_layout_params *params)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	700	{
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	701	const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	702	bool want_mcs = false;
				703
				704	/* MCS is for RT on GEN7+ */
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	705	if (intel_gpu_gen(params->gpu) < INTEL_GEN(7))
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	706	return false;
				707
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	708	if (info->imageType != XGL_IMAGE_2D \|\|
				709	!(info->usage & XGL_IMAGE_USAGE_COLOR_ATTACHMENT_BIT))
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	710	return false;
				711
				712	/*
				713	* From the Ivy Bridge PRM, volume 4 part 1, page 77:
				714	*
				715	* "For Render Target and Sampling Engine Surfaces:If the surface is
				716	* multisampled (Number of Multisamples any value other than
				717	* MULTISAMPLECOUNT_1), this field (MCS Enable) must be enabled."
				718	*
				719	* "This field must be set to 0 for all SINT MSRTs when all RT channels
				720	* are not written"
				721	*/
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	722	if (info->samples > 1 && !layout->interleaved_samples &&
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	723	!icd_format_is_int(info->format)) {
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	724	want_mcs = true;
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	725	} else if (info->samples <= 1) {
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	726	/*
				727	* From the Ivy Bridge PRM, volume 2 part 1, page 326:
				728	*
				729	* "When MCS is buffer is used for color clear of non-multisampler
				730	* render target, the following restrictions apply.
				731	* - Support is limited to tiled render targets.
				732	* - Support is for non-mip-mapped and non-array surface types
				733	* only.
				734	* - Clear is supported only on the full RT; i.e., no partial clear
				735	* or overlapping clears.
				736	* - MCS buffer for non-MSRT is supported only for RT formats
				737	* 32bpp, 64bpp and 128bpp.
				738	* ..."
				739	*/
				740	if (layout->tiling != INTEL_TILING_NONE &&
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	741	info->mipLevels == 1 && info->arraySize == 1) {
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	742	switch (layout->block_size) {
				743	case 4:
				744	case 8:
				745	case 16:
				746	want_mcs = true;
				747	break;
				748	default:
				749	break;
				750	}
				751	}
				752	}
				753
				754	return want_mcs;
				755	}
				756
				757	static bool
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	758	layout_want_hiz(const struct intel_layout *layout,
				759	const struct intel_layout_params *params)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	760	{
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	761	const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	762
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	763	if (!(info->usage & XGL_IMAGE_USAGE_DEPTH_STENCIL_BIT))
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	764	return false;
				765
Chia-I Wu	fb24026	2014-08-16 13:26:06 +0800	[diff] [blame]	766	if (!intel_format_is_depth(params->gpu, info->format))
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	767	return false;
				768
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	769	/*
				770	* As can be seen in layout_calculate_hiz_size(), HiZ may not be enabled
				771	* for every level. This is generally fine except on GEN6, where HiZ and
				772	* separate stencil are enabled and disabled at the same time. When the
				773	* format is PIPE_FORMAT_Z32_FLOAT_S8X24_UINT, enabling and disabling HiZ
				774	* can result in incompatible formats.
				775	*/
				776	if (intel_gpu_gen(params->gpu) == INTEL_GEN(6) &&
				777	info->format.channelFormat == XGL_CH_FMT_R32G8 &&
				778	info->mipLevels > 1)
				779	return false;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	780
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	781	return true;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	782	}
				783
				784	static void
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	785	layout_init_aux(struct intel_layout *layout,
				786	struct intel_layout_params *params)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	787	{
				788	if (layout_want_hiz(layout, params))
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	789	layout->aux = INTEL_LAYOUT_AUX_HIZ;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	790	else if (layout_want_mcs(layout, params))
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	791	layout->aux = INTEL_LAYOUT_AUX_MCS;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	792	}
				793
				794	static void
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	795	layout_align(struct intel_layout layout, struct intel_layout_params params)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	796	{
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	797	const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	798	int align_w = 1, align_h = 1, pad_h = 0;
				799
				800	/*
				801	* From the Sandy Bridge PRM, volume 1 part 1, page 118:
				802	*
				803	* "To determine the necessary padding on the bottom and right side of
				804	* the surface, refer to the table in Section 7.18.3.4 for the i and j
				805	* parameters for the surface format in use. The surface must then be
				806	* extended to the next multiple of the alignment unit size in each
				807	* dimension, and all texels contained in this extended surface must
				808	* have valid GTT entries."
				809	*
				810	* "For cube surfaces, an additional two rows of padding are required
				811	* at the bottom of the surface. This must be ensured regardless of
				812	* whether the surface is stored tiled or linear. This is due to the
				813	* potential rotation of cache line orientation from memory to cache."
				814	*
				815	* "For compressed textures (BC* and FXT1 surface formats), padding at
				816	* the bottom of the surface is to an even compressed row, which is
				817	* equal to a multiple of 8 uncompressed texel rows. Thus, for padding
				818	* purposes, these surfaces behave as if j = 8 only for surface
				819	* padding purposes. The value of 4 for j still applies for mip level
				820	* alignment and QPitch calculation."
				821	*/
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	822	if (info->usage & XGL_IMAGE_USAGE_SHADER_ACCESS_READ_BIT) {
				823	if (align_w < layout->align_i)
				824	align_w = layout->align_i;
				825	if (align_h < layout->align_j)
				826	align_h = layout->align_j;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	827
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	828	/* in case it is used as a cube */
				829	if (info->imageType == XGL_IMAGE_2D)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	830	pad_h += 2;
				831
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	832	if (params->compressed && align_h < layout->align_j * 2)
				833	align_h = layout->align_j * 2;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	834	}
				835
				836	/*
				837	* From the Sandy Bridge PRM, volume 1 part 1, page 118:
				838	*
				839	* "If the surface contains an odd number of rows of data, a final row
				840	* below the surface must be allocated."
				841	*/
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	842	if ((info->usage & XGL_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) && align_h < 2)
				843	align_h = 2;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	844
				845	/*
				846	* Depth Buffer Clear/Resolve works in 8x4 sample blocks. In
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	847	* intel_texture_can_enable_hiz(), we always return true for the first slice.
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	848	* To avoid out-of-bound access, we have to pad.
				849	*/
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	850	if (layout->aux == INTEL_LAYOUT_AUX_HIZ &&
				851	info->mipLevels == 1 &&
				852	info->arraySize == 1 &&
				853	info->extent.depth == 1) {
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	854	if (align_w < 8)
				855	align_w = 8;
				856	if (align_h < 4)
				857	align_h = 4;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	858	}
				859
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	860	params->max_x = u_align(params->max_x, align_w);
				861	params->max_y = u_align(params->max_y + pad_h, align_h);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	862	}
				863
				864	/* note that this may force the texture to be linear */
				865	static void
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	866	layout_calculate_bo_size(struct intel_layout *layout,
				867	struct intel_layout_params *params)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	868	{
				869	assert(params->max_x % layout->block_width == 0);
				870	assert(params->max_y % layout->block_height == 0);
				871	assert(layout->layer_height % layout->block_height == 0);
				872
				873	layout->bo_stride =
				874	(params->max_x / layout->block_width) * layout->block_size;
				875	layout->bo_height = params->max_y / layout->block_height;
				876
				877	while (true) {
				878	unsigned w = layout->bo_stride, h = layout->bo_height;
				879	unsigned align_w, align_h;
				880
				881	/*
				882	* From the Haswell PRM, volume 5, page 163:
				883	*
				884	* "For linear surfaces, additional padding of 64 bytes is required
				885	* at the bottom of the surface. This is in addition to the padding
				886	* required above."
				887	*/
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	888	if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7.5) &&
				889	(params->info->usage & XGL_IMAGE_USAGE_SHADER_ACCESS_READ_BIT) &&
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	890	layout->tiling == INTEL_TILING_NONE) {
				891	layout->bo_height +=
				892	(64 + layout->bo_stride - 1) / layout->bo_stride;
				893	}
				894
				895	/*
				896	* From the Sandy Bridge PRM, volume 4 part 1, page 81:
				897	*
				898	* "- For linear render target surfaces, the pitch must be a
				899	* multiple of the element size for non-YUV surface formats.
				900	* Pitch must be a multiple of 2 * element size for YUV surface
				901	* formats.
				902	* - For other linear surfaces, the pitch can be any multiple of
				903	* bytes.
				904	* - For tiled surfaces, the pitch must be a multiple of the tile
				905	* width."
				906	*
				907	* Different requirements may exist when the bo is used in different
				908	* places, but our alignments here should be good enough that we do not
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	909	* need to check layout->info->usage.
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	910	*/
				911	switch (layout->tiling) {
				912	case INTEL_TILING_X:
				913	align_w = 512;
				914	align_h = 8;
				915	break;
				916	case INTEL_TILING_Y:
				917	align_w = 128;
				918	align_h = 32;
				919	break;
				920	default:
Chia-I Wu	fb24026	2014-08-16 13:26:06 +0800	[diff] [blame]	921	if (intel_format_is_stencil(params->gpu, layout->format)) {
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	922	/*
				923	* From the Sandy Bridge PRM, volume 1 part 2, page 22:
				924	*
				925	* "A 4KB tile is subdivided into 8-high by 8-wide array of
				926	* Blocks for W-Major Tiles (W Tiles). Each Block is 8 rows by 8
				927	* bytes."
				928	*
				929	* Since we asked for INTEL_TILING_NONE instead of the non-existent
				930	* INTEL_TILING_W, we want to align to W tiles here.
				931	*/
				932	align_w = 64;
				933	align_h = 64;
				934	} else {
				935	/* some good enough values */
				936	align_w = 64;
				937	align_h = 2;
				938	}
				939	break;
				940	}
				941
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	942	w = u_align(w, align_w);
				943	h = u_align(h, align_h);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	944
				945	/* make sure the bo is mappable */
				946	if (layout->tiling != INTEL_TILING_NONE) {
				947	/*
				948	* Usually only the first 256MB of the GTT is mappable.
				949	*
				950	* See also how intel_context::max_gtt_map_object_size is calculated.
				951	*/
				952	const size_t mappable_gtt_size = 256 * 1024 * 1024;
				953
				954	/*
				955	* Be conservative. We may be able to switch from VALIGN_4 to
				956	* VALIGN_2 if the layout was Y-tiled, but let's keep it simple.
				957	*/
				958	if (mappable_gtt_size / w / 4 < h) {
				959	if (layout->valid_tilings & LAYOUT_TILING_NONE) {
				960	layout->tiling = INTEL_TILING_NONE;
				961	/* MCS support for non-MSRTs is limited to tiled RTs */
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	962	if (layout->aux == INTEL_LAYOUT_AUX_MCS &&
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	963	params->info->samples <= 1)
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	964	layout->aux = INTEL_LAYOUT_AUX_NONE;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	965
				966	continue;
				967	} else {
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	968	/* mapping will fail */
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	969	}
				970	}
				971	}
				972
				973	layout->bo_stride = w;
				974	layout->bo_height = h;
				975	break;
				976	}
				977	}
				978
				979	static void
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	980	layout_calculate_hiz_size(struct intel_layout *layout,
				981	struct intel_layout_params *params)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	982	{
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	983	const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	984	const unsigned hz_align_j = 8;
				985	enum intel_layout_walk_type hz_walk;
				986	unsigned hz_width, hz_height, lv;
				987	unsigned hz_clear_w, hz_clear_h;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	988
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	989	assert(layout->aux == INTEL_LAYOUT_AUX_HIZ);
				990
				991	assert(layout->walk == INTEL_LAYOUT_WALK_LAYER \|\|
				992	layout->walk == INTEL_LAYOUT_WALK_3D);
				993
				994	/*
				995	* From the Sandy Bridge PRM, volume 2 part 1, page 312:
				996	*
				997	* "The hierarchical depth buffer does not support the LOD field, it is
				998	* assumed by hardware to be zero. A separate hierarachical depth
				999	* buffer is required for each LOD used, and the corresponding
				1000	* buffer's state delivered to hardware each time a new depth buffer
				1001	* state with modified LOD is delivered."
				1002	*
				1003	* We will put all LODs in a single bo with INTEL_LAYOUT_WALK_LOD.
				1004	*/
				1005	if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7))
				1006	hz_walk = layout->walk;
				1007	else
				1008	hz_walk = INTEL_LAYOUT_WALK_LOD;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1009
				1010	/*
				1011	* See the Sandy Bridge PRM, volume 2 part 1, page 312, and the Ivy Bridge
				1012	* PRM, volume 2 part 1, page 312-313.
				1013	*
				1014	* It seems HiZ buffer is aligned to 8x8, with every two rows packed into a
				1015	* memory row.
				1016	*/
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	1017	switch (hz_walk) {
				1018	case INTEL_LAYOUT_WALK_LOD:
				1019	{
				1020	unsigned lod_tx[INTEL_LAYOUT_MAX_LEVELS];
				1021	unsigned lod_ty[INTEL_LAYOUT_MAX_LEVELS];
				1022	unsigned cur_tx, cur_ty;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1023
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	1024	/* figure out the tile offsets of LODs */
				1025	hz_width = 0;
				1026	hz_height = 0;
				1027	cur_tx = 0;
				1028	cur_ty = 0;
				1029	for (lv = 0; lv < info->mipLevels; lv++) {
				1030	unsigned tw, th;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1031
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	1032	lod_tx[lv] = cur_tx;
				1033	lod_ty[lv] = cur_ty;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1034
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	1035	tw = u_align(layout->lods[lv].slice_width, 16);
				1036	th = u_align(layout->lods[lv].slice_height, hz_align_j) *
				1037	info->arraySize / 2;
				1038	/* convert to Y-tiles */
				1039	tw = u_align(tw, 128) / 128;
				1040	th = u_align(th, 32) / 32;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1041
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	1042	if (hz_width < cur_tx + tw)
				1043	hz_width = cur_tx + tw;
				1044	if (hz_height < cur_ty + th)
				1045	hz_height = cur_ty + th;
				1046
				1047	if (lv == 1)
				1048	cur_tx += tw;
				1049	else
				1050	cur_ty += th;
				1051	}
				1052
				1053	/* convert tile offsets to memory offsets */
				1054	for (lv = 0; lv < info->mipLevels; lv++) {
				1055	layout->aux_offsets[lv] =
				1056	(lod_ty[lv] * hz_width + lod_tx[lv]) * 4096;
				1057	}
				1058	hz_width *= 128;
				1059	hz_height *= 32;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1060	}
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	1061	break;
				1062	case INTEL_LAYOUT_WALK_LAYER:
				1063	{
				1064	const unsigned h0 = u_align(params->h0, hz_align_j);
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	1065	const unsigned h1 = u_align(params->h1, hz_align_j);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1066	const unsigned htail =
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	1067	((intel_gpu_gen(params->gpu) >= INTEL_GEN(7)) ? 12 : 11) * hz_align_j;
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	1068	const unsigned hz_qpitch = h0 + h1 + htail;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1069
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	1070	hz_width = u_align(layout->lods[0].slice_width, 16);
				1071
				1072	hz_height = hz_qpitch * info->arraySize / 2;
				1073	if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7))
				1074	hz_height = u_align(hz_height, 8);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1075	}
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	1076	break;
				1077	case INTEL_LAYOUT_WALK_3D:
				1078	hz_width = u_align(layout->lods[0].slice_width, 16);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1079
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	1080	hz_height = 0;
				1081	for (lv = 0; lv < info->mipLevels; lv++) {
				1082	const unsigned h = u_align(layout->lods[lv].slice_height, hz_align_j);
				1083	/* according to the formula, slices are packed together vertically */
				1084	hz_height += h * u_minify(info->extent.depth, lv);
				1085	}
				1086	hz_height /= 2;
				1087	break;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1088	}
				1089
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	1090	/*
				1091	* In hiz_align_fb(), we will align the LODs to 8x4 sample blocks.
				1092	* Experiments on Haswell show that aligning the RECTLIST primitive and
				1093	* 3DSTATE_DRAWING_RECTANGLE alone are not enough. The LOD sizes must be
				1094	* aligned.
				1095	*/
				1096	hz_clear_w = 8;
				1097	hz_clear_h = 4;
				1098	switch (info->samples) {
				1099	case 0:
				1100	case 1:
				1101	default:
				1102	break;
				1103	case 2:
				1104	hz_clear_w /= 2;
				1105	break;
				1106	case 4:
				1107	hz_clear_w /= 2;
				1108	hz_clear_h /= 2;
				1109	break;
				1110	case 8:
				1111	hz_clear_w /= 4;
				1112	hz_clear_h /= 2;
				1113	break;
				1114	case 16:
				1115	hz_clear_w /= 4;
				1116	hz_clear_h /= 4;
				1117	break;
				1118	}
				1119
				1120	for (lv = 0; lv < info->mipLevels; lv++) {
				1121	if (u_minify(layout->width0, lv) % hz_clear_w \|\|
				1122	u_minify(layout->height0, lv) % hz_clear_h)
				1123	break;
				1124	layout->aux_enables \|= 1 << lv;
				1125	}
				1126
				1127	/* we padded to allow this in layout_align() */
				1128	if (info->mipLevels == 1 && info->arraySize == 1 && info->extent.depth == 1)
				1129	layout->aux_enables \|= 0x1;
				1130
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1131	/* align to Y-tile */
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	1132	layout->aux_stride = u_align(hz_width, 128);
				1133	layout->aux_height = u_align(hz_height, 32);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1134	}
				1135
				1136	static void
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	1137	layout_calculate_mcs_size(struct intel_layout *layout,
				1138	struct intel_layout_params *params)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1139	{
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	1140	const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1141	int mcs_width, mcs_height, mcs_cpp;
				1142	int downscale_x, downscale_y;
				1143
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	1144	assert(layout->aux == INTEL_LAYOUT_AUX_MCS);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1145
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	1146	if (info->samples > 1) {
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1147	/*
				1148	* From the Ivy Bridge PRM, volume 2 part 1, page 326, the clear
				1149	* rectangle is scaled down by 8x2 for 4X MSAA and 2x2 for 8X MSAA. The
				1150	* need of scale down could be that the clear rectangle is used to clear
				1151	* the MCS instead of the RT.
				1152	*
				1153	* For 8X MSAA, we need 32 bits in MCS for every pixel in the RT. The
				1154	* 2x2 factor could come from that the hardware writes 128 bits (an
				1155	* OWord) at a time, and the OWord in MCS maps to a 2x2 pixel block in
				1156	* the RT. For 4X MSAA, we need 8 bits in MCS for every pixel in the
				1157	* RT. Similarly, we could reason that an OWord in 4X MCS maps to a 8x2
				1158	* pixel block in the RT.
				1159	*/
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	1160	switch (info->samples) {
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1161	case 2:
				1162	case 4:
				1163	downscale_x = 8;
				1164	downscale_y = 2;
				1165	mcs_cpp = 1;
				1166	break;
				1167	case 8:
				1168	downscale_x = 2;
				1169	downscale_y = 2;
				1170	mcs_cpp = 4;
				1171	break;
				1172	case 16:
				1173	downscale_x = 2;
				1174	downscale_y = 1;
				1175	mcs_cpp = 8;
				1176	break;
				1177	default:
				1178	assert(!"unsupported sample count");
				1179	return;
				1180	break;
				1181	}
				1182
				1183	/*
				1184	* It also appears that the 2x2 subspans generated by the scaled-down
				1185	* clear rectangle cannot be masked. The scale-down clear rectangle
				1186	* thus must be aligned to 2x2, and we need to pad.
				1187	*/
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	1188	mcs_width = u_align(layout->width0, downscale_x * 2);
				1189	mcs_height = u_align(layout->height0, downscale_y * 2);
				1190	} else {
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1191	/*
				1192	* From the Ivy Bridge PRM, volume 2 part 1, page 327:
				1193	*
				1194	* " Pixels Lines
				1195	* TiledY RT CL
				1196	* bpp
				1197	* 32 8 4
				1198	* 64 4 4
				1199	* 128 2 4
				1200	*
				1201	* TiledX RT CL
				1202	* bpp
				1203	* 32 16 2
				1204	* 64 8 2
				1205	* 128 4 2"
				1206	*
				1207	* This table and the two following tables define the RT alignments, the
				1208	* clear rectangle alignments, and the clear rectangle scale factors.
				1209	* Viewing the RT alignments as the sizes of 128-byte blocks, we can see
				1210	* that the clear rectangle alignments are 16x32 blocks, and the clear
				1211	* rectangle scale factors are 8x16 blocks.
				1212	*
				1213	* For non-MSAA RT, we need 1 bit in MCS for every 128-byte block in the
				1214	* RT. Similar to the MSAA cases, we can argue that an OWord maps to
				1215	* 8x16 blocks.
				1216	*
				1217	* One problem with this reasoning is that a Y-tile in MCS has 8x32
				1218	* OWords and maps to 64x512 128-byte blocks. This differs from i965,
				1219	* which says that a Y-tile maps to 128x256 blocks (\see
				1220	* intel_get_non_msrt_mcs_alignment). It does not really change
				1221	* anything except for the size of the allocated MCS. Let's see if we
				1222	* hit out-of-bound access.
				1223	*/
				1224	switch (layout->tiling) {
				1225	case INTEL_TILING_X:
				1226	downscale_x = 64 / layout->block_size;
				1227	downscale_y = 2;
				1228	break;
				1229	case INTEL_TILING_Y:
				1230	downscale_x = 32 / layout->block_size;
				1231	downscale_y = 4;
				1232	break;
				1233	default:
				1234	assert(!"unsupported tiling mode");
				1235	return;
				1236	break;
				1237	}
				1238
				1239	downscale_x *= 8;
				1240	downscale_y *= 16;
				1241
				1242	/*
				1243	* From the Haswell PRM, volume 7, page 652:
				1244	*
				1245	* "Clear rectangle must be aligned to two times the number of
				1246	* pixels in the table shown below due to 16X16 hashing across the
				1247	* slice."
				1248	*
				1249	* The scaled-down clear rectangle must be aligned to 4x4 instead of
				1250	* 2x2, and we need to pad.
				1251	*/
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	1252	mcs_width = u_align(layout->width0, downscale_x * 4) / downscale_x;
				1253	mcs_height = u_align(layout->height0, downscale_y * 4) / downscale_y;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1254	mcs_cpp = 16; /* an OWord */
				1255	}
				1256
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	1257	layout->aux_enables = (1 << info->mipLevels) - 1;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1258	/* align to Y-tile */
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	1259	layout->aux_stride = u_align(mcs_width * mcs_cpp, 128);
				1260	layout->aux_height = u_align(mcs_height, 32);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1261	}
				1262
				1263	/**
				1264	* Initialize the layout. Callers should zero-initialize \p layout first.
				1265	*/
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	1266	void intel_layout_init(struct intel_layout *layout,
				1267	const struct intel_dev *dev,
Chia-I Wu	794d12a	2014-09-15 14:55:25 +0800	[diff] [blame^]	1268	const XGL_IMAGE_CREATE_INFO *info,
				1269	bool scanout)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1270	{
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	1271	struct intel_layout_params params;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1272
				1273	memset(&params, 0, sizeof(params));
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	1274	params.gpu = dev->gpu;
				1275	params.info = info;
Chia-I Wu	794d12a	2014-09-15 14:55:25 +0800	[diff] [blame^]	1276	params.scanout = scanout;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1277
				1278	/* note that there are dependencies between these functions */
				1279	layout_init_aux(layout, &params);
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	1280	layout_init_size_and_format(layout, &params);
				1281	layout_init_walk(layout, &params);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1282	layout_init_tiling(layout, &params);
				1283	layout_init_alignments(layout, &params);
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	1284	layout_init_lods(layout, &params);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1285	layout_init_layer_height(layout, &params);
				1286
				1287	layout_align(layout, &params);
				1288	layout_calculate_bo_size(layout, &params);
				1289
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	1290	switch (layout->aux) {
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	1291	case INTEL_LAYOUT_AUX_HIZ:
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1292	layout_calculate_hiz_size(layout, &params);
				1293	break;
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	1294	case INTEL_LAYOUT_AUX_MCS:
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1295	layout_calculate_mcs_size(layout, &params);
				1296	break;
				1297	default:
				1298	break;
				1299	}
				1300	}
				1301
				1302	/**
				1303	* Update the tiling mode and bo stride (for imported resources).
				1304	*/
				1305	bool
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	1306	intel_layout_update_for_imported_bo(struct intel_layout *layout,
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	1307	enum intel_tiling_mode tiling,
				1308	unsigned bo_stride)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1309	{
				1310	if (!(layout->valid_tilings & (1 << tiling)))
				1311	return false;
				1312
				1313	if ((tiling == INTEL_TILING_X && bo_stride % 512) \|\|
				1314	(tiling == INTEL_TILING_Y && bo_stride % 128))
				1315	return false;
				1316
				1317	layout->tiling = tiling;
				1318	layout->bo_stride = bo_stride;
				1319
				1320	return true;
				1321	}