Blame - icd/intel/layout.c - platform/external/vulkan-validation-layers

blob: f1894d4afa207c986bcab9d61618e3822abac963 [file] [log] [blame]

Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1	/*
				2	* Mesa 3-D graphics library
				3	*
				4	* Copyright (C) 2014 LunarG, Inc.
				5	*
				6	* Permission is hereby granted, free of charge, to any person obtaining a
				7	* copy of this software and associated documentation files (the "Software"),
				8	* to deal in the Software without restriction, including without limitation
				9	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
				10	* and/or sell copies of the Software, and to permit persons to whom the
				11	* Software is furnished to do so, subject to the following conditions:
				12	*
				13	* The above copyright notice and this permission notice shall be included
				14	* in all copies or substantial portions of the Software.
				15	*
				16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
				19	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
				21	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
				22	* DEALINGS IN THE SOFTWARE.
				23	*
				24	* Authors:
				25	* Chia-I Wu <olv@lunarg.com>
				26	*/
				27
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	28	#include "dev.h"
Chia-I Wu	1bf06df	2014-08-16 12:33:13 +0800	[diff] [blame]	29	#include "format.h"
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	30	#include "gpu.h"
				31	#include "layout.h"
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	32
				33	enum {
				34	LAYOUT_TILING_NONE = 1 << INTEL_TILING_NONE,
				35	LAYOUT_TILING_X = 1 << INTEL_TILING_X,
				36	LAYOUT_TILING_Y = 1 << INTEL_TILING_Y,
				37	LAYOUT_TILING_W = 1 << (INTEL_TILING_Y + 1),
				38
				39	LAYOUT_TILING_ALL = (LAYOUT_TILING_NONE \|
				40	LAYOUT_TILING_X \|
				41	LAYOUT_TILING_Y \|
				42	LAYOUT_TILING_W)
				43	};
				44
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	45	struct intel_layout_params {
				46	const struct intel_gpu *gpu;
				47	const XGL_IMAGE_CREATE_INFO *info;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	48
				49	bool compressed;
				50
				51	unsigned h0, h1;
				52	unsigned max_x, max_y;
				53	};
				54
				55	static void
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	56	layout_get_slice_size(const struct intel_layout *layout,
				57	const struct intel_layout_params *params,
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	58	unsigned level, unsigned width, unsigned height)
				59	{
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	60	const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	61	unsigned w, h;
				62
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	63	w = u_minify(layout->width0, level);
				64	h = u_minify(layout->height0, level);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	65
				66	/*
				67	* From the Sandy Bridge PRM, volume 1 part 1, page 114:
				68	*
				69	* "The dimensions of the mip maps are first determined by applying the
				70	* sizing algorithm presented in Non-Power-of-Two Mipmaps above. Then,
				71	* if necessary, they are padded out to compression block boundaries."
				72	*/
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	73	w = u_align(w, layout->block_width);
				74	h = u_align(h, layout->block_height);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	75
				76	/*
				77	* From the Sandy Bridge PRM, volume 1 part 1, page 111:
				78	*
				79	* "If the surface is multisampled (4x), these values must be adjusted
				80	* as follows before proceeding:
				81	*
				82	* W_L = ceiling(W_L / 2) * 4
				83	* H_L = ceiling(H_L / 2) * 4"
				84	*
				85	* From the Ivy Bridge PRM, volume 1 part 1, page 108:
				86	*
				87	* "If the surface is multisampled and it is a depth or stencil surface
				88	* or Multisampled Surface StorageFormat in SURFACE_STATE is
				89	* MSFMT_DEPTH_STENCIL, W_L and H_L must be adjusted as follows before
				90	* proceeding:
				91	*
				92	* #samples W_L = H_L =
				93	* 2 ceiling(W_L / 2) * 4 HL [no adjustment]
				94	* 4 ceiling(W_L / 2) * 4 ceiling(H_L / 2) * 4
				95	* 8 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 4
				96	* 16 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 8"
				97	*
				98	* For interleaved samples (4x), where pixels
				99	*
				100	* (x, y ) (x+1, y )
				101	* (x, y+1) (x+1, y+1)
				102	*
				103	* would be is occupied by
				104	*
				105	* (x, y , si0) (x+1, y , si0) (x, y , si1) (x+1, y , si1)
				106	* (x, y+1, si0) (x+1, y+1, si0) (x, y+1, si1) (x+1, y+1, si1)
				107	* (x, y , si2) (x+1, y , si2) (x, y , si3) (x+1, y , si3)
				108	* (x, y+1, si2) (x+1, y+1, si2) (x, y+1, si3) (x+1, y+1, si3)
				109	*
				110	* Thus the need to
				111	*
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	112	* w = align(w, 2) * 2;
				113	* y = align(y, 2) * 2;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	114	*/
				115	if (layout->interleaved_samples) {
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	116	switch (info->samples) {
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	117	case 0:
				118	case 1:
				119	break;
				120	case 2:
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	121	w = u_align(w, 2) * 2;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	122	break;
				123	case 4:
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	124	w = u_align(w, 2) * 2;
				125	h = u_align(h, 2) * 2;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	126	break;
				127	case 8:
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	128	w = u_align(w, 2) * 4;
				129	h = u_align(h, 2) * 2;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	130	break;
				131	case 16:
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	132	w = u_align(w, 2) * 4;
				133	h = u_align(h, 2) * 4;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	134	break;
				135	default:
				136	assert(!"unsupported sample count");
				137	break;
				138	}
				139	}
				140
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	141	/*
				142	* From the Ivy Bridge PRM, volume 1 part 1, page 108:
				143	*
				144	* "For separate stencil buffer, the width must be mutiplied by 2 and
				145	* height divided by 2..."
				146	*
				147	* To make things easier (for transfer), we will just double the stencil
				148	* stride in 3DSTATE_STENCIL_BUFFER.
				149	*/
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	150	w = u_align(w, layout->align_i);
				151	h = u_align(h, layout->align_j);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	152
				153	*width = w;
				154	*height = h;
				155	}
				156
				157	static unsigned
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	158	layout_get_num_layers(const struct intel_layout *layout,
				159	const struct intel_layout_params *params)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	160	{
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	161	const XGL_IMAGE_CREATE_INFO *info = params->info;
				162	unsigned num_layers = info->arraySize;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	163
				164	/* samples of the same index are stored in a layer */
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	165	if (info->samples > 1 && !layout->interleaved_samples)
				166	num_layers *= info->samples;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	167
				168	return num_layers;
				169	}
				170
				171	static void
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	172	layout_init_layer_height(struct intel_layout *layout,
				173	struct intel_layout_params *params)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	174	{
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	175	const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	176	unsigned num_layers;
				177
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	178	if (layout->walk != INTEL_LAYOUT_WALK_LAYER)
				179	return;
				180
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	181	num_layers = layout_get_num_layers(layout, params);
				182	if (num_layers <= 1)
				183	return;
				184
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	185	/*
				186	* From the Sandy Bridge PRM, volume 1 part 1, page 115:
				187	*
				188	* "The following equation is used for surface formats other than
				189	* compressed textures:
				190	*
				191	* QPitch = (h0 + h1 + 11j)"
				192	*
				193	* "The equation for compressed textures (BC* and FXT1 surface formats)
				194	* follows:
				195	*
				196	* QPitch = (h0 + h1 + 11j) / 4"
				197	*
				198	* "[DevSNB] Errata: Sampler MSAA Qpitch will be 4 greater than the
				199	* value calculated in the equation above, for every other odd Surface
				200	* Height starting from 1 i.e. 1,5,9,13"
				201	*
				202	* From the Ivy Bridge PRM, volume 1 part 1, page 111-112:
				203	*
				204	* "If Surface Array Spacing is set to ARYSPC_FULL (note that the depth
				205	* buffer and stencil buffer have an implied value of ARYSPC_FULL):
				206	*
				207	* QPitch = (h0 + h1 + 12j)
				208	* QPitch = (h0 + h1 + 12j) / 4 (compressed)
				209	*
				210	* (There are many typos or missing words here...)"
				211	*
				212	* To access the N-th slice, an offset of (Stride * QPitch * N) is added to
				213	* the base address. The PRM divides QPitch by 4 for compressed formats
				214	* because the block height for those formats are 4, and it wants QPitch to
				215	* mean the number of memory rows, as opposed to texel rows, between
				216	* slices. Since we use texel rows everywhere, we do not need to divide
				217	* QPitch by 4.
				218	*/
				219	layout->layer_height = params->h0 + params->h1 +
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	220	((intel_gpu_gen(params->gpu) >= INTEL_GEN(7)) ? 12 : 11) * layout->align_j;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	221
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	222	if (intel_gpu_gen(params->gpu) == INTEL_GEN(6) && info->samples > 1 &&
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	223	layout->height0 % 4 == 1)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	224	layout->layer_height += 4;
				225
				226	params->max_y += layout->layer_height * (num_layers - 1);
				227	}
				228
				229	static void
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	230	layout_init_lods(struct intel_layout *layout,
				231	struct intel_layout_params *params)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	232	{
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	233	const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	234	unsigned cur_x, cur_y;
				235	unsigned lv;
				236
				237	cur_x = 0;
				238	cur_y = 0;
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	239	for (lv = 0; lv < info->mipLevels; lv++) {
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	240	unsigned lod_w, lod_h;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	241
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	242	layout_get_slice_size(layout, params, lv, &lod_w, &lod_h);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	243
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	244	layout->lods[lv].x = cur_x;
				245	layout->lods[lv].y = cur_y;
				246	layout->lods[lv].slice_width = lod_w;
				247	layout->lods[lv].slice_height = lod_h;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	248
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	249	switch (layout->walk) {
				250	case INTEL_LAYOUT_WALK_LOD:
				251	lod_h *= layout_get_num_layers(layout, params);
				252	if (lv == 1)
				253	cur_x += lod_w;
				254	else
				255	cur_y += lod_h;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	256
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	257	/* every LOD begins at tile boundaries */
				258	if (info->mipLevels > 1) {
				259	intel_format_is_stencil(params->gpu, layout->format);
				260	cur_x = u_align(cur_x, 64);
				261	cur_y = u_align(cur_y, 64);
				262	}
				263	break;
				264	case INTEL_LAYOUT_WALK_LAYER:
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	265	/* MIPLAYOUT_BELOW */
				266	if (lv == 1)
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	267	cur_x += lod_w;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	268	else
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	269	cur_y += lod_h;
				270	break;
				271	case INTEL_LAYOUT_WALK_3D:
				272	{
				273	const unsigned num_slices = u_minify(info->extent.depth, lv);
				274	const unsigned num_slices_per_row = 1 << lv;
				275	const unsigned num_rows =
				276	(num_slices + num_slices_per_row - 1) / num_slices_per_row;
				277
				278	lod_w *= num_slices_per_row;
				279	lod_h *= num_rows;
				280
				281	cur_y += lod_h;
				282	}
				283	break;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	284	}
				285
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	286	if (params->max_x < layout->lods[lv].x + lod_w)
				287	params->max_x = layout->lods[lv].x + lod_w;
				288	if (params->max_y < layout->lods[lv].y + lod_h)
				289	params->max_y = layout->lods[lv].y + lod_h;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	290	}
				291
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	292	if (layout->walk == INTEL_LAYOUT_WALK_LAYER) {
				293	params->h0 = layout->lods[0].slice_height;
				294
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	295	if (info->mipLevels > 1)
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	296	params->h1 = layout->lods[1].slice_height;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	297	else
				298	layout_get_slice_size(layout, params, 1, &cur_x, &params->h1);
				299	}
				300	}
				301
				302	static void
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	303	layout_init_alignments(struct intel_layout *layout,
				304	struct intel_layout_params *params)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	305	{
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	306	const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	307
				308	/*
				309	* From the Sandy Bridge PRM, volume 1 part 1, page 113:
				310	*
				311	* "surface format align_i align_j
				312	* YUV 4:2:2 formats 4 *see below
				313	* BC1-5 4 4
				314	* FXT1 8 4
				315	* all other formats 4 *see below"
				316	*
				317	* "- align_j = 4 for any depth buffer
				318	* - align_j = 2 for separate stencil buffer
				319	* - align_j = 4 for any render target surface is multisampled (4x)
				320	* - align_j = 4 for any render target surface with Surface Vertical
				321	* Alignment = VALIGN_4
				322	* - align_j = 2 for any render target surface with Surface Vertical
				323	* Alignment = VALIGN_2
				324	* - align_j = 2 for all other render target surface
				325	* - align_j = 2 for any sampling engine surface with Surface Vertical
				326	* Alignment = VALIGN_2
				327	* - align_j = 4 for any sampling engine surface with Surface Vertical
				328	* Alignment = VALIGN_4"
				329	*
				330	* From the Sandy Bridge PRM, volume 4 part 1, page 86:
				331	*
				332	* "This field (Surface Vertical Alignment) must be set to VALIGN_2 if
				333	* the Surface Format is 96 bits per element (BPE)."
				334	*
				335	* They can be rephrased as
				336	*
				337	* align_i align_j
				338	* compressed formats block width block height
				339	* PIPE_FORMAT_S8_UINT 4 2
				340	* other depth/stencil formats 4 4
				341	* 4x multisampled 4 4
				342	* bpp 96 4 2
				343	* others 4 2 or 4
				344	*/
				345
				346	/*
				347	* From the Ivy Bridge PRM, volume 1 part 1, page 110:
				348	*
				349	* "surface defined by surface format align_i align_j
				350	* 3DSTATE_DEPTH_BUFFER D16_UNORM 8 4
				351	* not D16_UNORM 4 4
				352	* 3DSTATE_STENCIL_BUFFER N/A 8 8
				353	* SURFACE_STATE BC, ETC, EAC* 4 4
				354	* FXT1 8 4
				355	* all others (set by SURFACE_STATE)"
				356	*
				357	* From the Ivy Bridge PRM, volume 4 part 1, page 63:
				358	*
				359	* "- This field (Surface Vertical Aligment) is intended to be set to
				360	* VALIGN_4 if the surface was rendered as a depth buffer, for a
				361	* multisampled (4x) render target, or for a multisampled (8x)
				362	* render target, since these surfaces support only alignment of 4.
				363	* - Use of VALIGN_4 for other surfaces is supported, but uses more
				364	* memory.
				365	* - This field must be set to VALIGN_4 for all tiled Y Render Target
				366	* surfaces.
				367	* - Value of 1 is not supported for format YCRCB_NORMAL (0x182),
				368	* YCRCB_SWAPUVY (0x183), YCRCB_SWAPUV (0x18f), YCRCB_SWAPY (0x190)
				369	* - If Number of Multisamples is not MULTISAMPLECOUNT_1, this field
				370	* must be set to VALIGN_4."
				371	* - VALIGN_4 is not supported for surface format R32G32B32_FLOAT."
				372	*
				373	* "- This field (Surface Horizontal Aligment) is intended to be set to
				374	* HALIGN_8 only if the surface was rendered as a depth buffer with
				375	* Z16 format or a stencil buffer, since these surfaces support only
				376	* alignment of 8.
				377	* - Use of HALIGN_8 for other surfaces is supported, but uses more
				378	* memory.
				379	* - This field must be set to HALIGN_4 if the Surface Format is BC*.
				380	* - This field must be set to HALIGN_8 if the Surface Format is
				381	* FXT1."
				382	*
				383	* They can be rephrased as
				384	*
				385	* align_i align_j
				386	* compressed formats block width block height
				387	* PIPE_FORMAT_Z16_UNORM 8 4
				388	* PIPE_FORMAT_S8_UINT 8 8
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	389	* other depth/stencil formats 4 4
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	390	* 2x or 4x multisampled 4 or 8 4
				391	* tiled Y 4 or 8 4 (if rt)
				392	* PIPE_FORMAT_R32G32B32_FLOAT 4 or 8 2
				393	* others 4 or 8 2 or 4
				394	*/
				395
				396	if (params->compressed) {
				397	/* this happens to be the case */
				398	layout->align_i = layout->block_width;
				399	layout->align_j = layout->block_height;
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	400	} else if (info->usage & XGL_IMAGE_USAGE_DEPTH_STENCIL_BIT) {
				401	if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7)) {
				402	switch (layout->format.channelFormat) {
				403	case XGL_CH_FMT_R16:
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	404	layout->align_i = 8;
				405	layout->align_j = 4;
				406	break;
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	407	case XGL_CH_FMT_R8:
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	408	layout->align_i = 8;
				409	layout->align_j = 8;
				410	break;
				411	default:
				412	layout->align_i = 4;
				413	layout->align_j = 4;
				414	break;
				415	}
				416	} else {
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	417	switch (layout->format.channelFormat) {
				418	case XGL_CH_FMT_R8:
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	419	layout->align_i = 4;
				420	layout->align_j = 2;
				421	break;
				422	default:
				423	layout->align_i = 4;
				424	layout->align_j = 4;
				425	break;
				426	}
				427	}
				428	} else {
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	429	const bool valign_4 = (info->samples > 1) \|\|
				430	(intel_gpu_gen(params->gpu) >= INTEL_GEN(7) &&
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	431	layout->tiling == INTEL_TILING_Y &&
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	432	(info->usage & XGL_IMAGE_USAGE_COLOR_ATTACHMENT_BIT));
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	433
				434	if (valign_4)
				435	assert(layout->block_size != 12);
				436
				437	layout->align_i = 4;
				438	layout->align_j = (valign_4) ? 4 : 2;
				439	}
				440
				441	/*
				442	* the fact that align i and j are multiples of block width and height
				443	* respectively is what makes the size of the bo a multiple of the block
				444	* size, slices start at block boundaries, and many of the computations
				445	* work.
				446	*/
				447	assert(layout->align_i % layout->block_width == 0);
				448	assert(layout->align_j % layout->block_height == 0);
				449
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	450	/* make sure u_align() works */
				451	assert(u_is_pow2(layout->align_i) &&
				452	u_is_pow2(layout->align_j));
				453	assert(u_is_pow2(layout->block_width) &&
				454	u_is_pow2(layout->block_height));
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	455	}
				456
				457	static unsigned
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	458	layout_get_valid_tilings(const struct intel_layout *layout,
				459	const struct intel_layout_params *params)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	460	{
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	461	const XGL_IMAGE_CREATE_INFO *info = params->info;
				462	const XGL_FORMAT format = layout->format;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	463	unsigned valid_tilings = LAYOUT_TILING_ALL;
				464
Chia-I Wu	6ac9399	2014-08-30 18:23:28 +0800	[diff] [blame^]	465	if (info->tiling == XGL_LINEAR_TILING)
				466	valid_tilings &= LAYOUT_TILING_NONE;
				467
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	468	/*
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	469	* From the Sandy Bridge PRM, volume 2 part 1, page 318:
				470	*
				471	* "[DevSNB+]: This field (Tiled Surface) must be set to TRUE. Linear
				472	* Depth Buffer is not supported."
				473	*
				474	* "The Depth Buffer, if tiled, must use Y-Major tiling."
				475	*
				476	* From the Sandy Bridge PRM, volume 1 part 2, page 22:
				477	*
				478	* "W-Major Tile Format is used for separate stencil."
				479	*/
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	480	if (info->usage & XGL_IMAGE_USAGE_DEPTH_STENCIL_BIT) {
				481	switch (format.channelFormat) {
				482	case XGL_CH_FMT_R8:
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	483	valid_tilings &= LAYOUT_TILING_W;
				484	break;
				485	default:
				486	valid_tilings &= LAYOUT_TILING_Y;
				487	break;
				488	}
				489	}
				490
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	491	if (info->usage & XGL_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) {
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	492	/*
				493	* From the Sandy Bridge PRM, volume 1 part 2, page 32:
				494	*
				495	* "NOTE: 128BPE Format Color buffer ( render target ) MUST be
				496	* either TileX or Linear."
				497	*/
				498	if (layout->block_size == 16)
				499	valid_tilings &= ~LAYOUT_TILING_Y;
				500
				501	/*
				502	* From the Ivy Bridge PRM, volume 4 part 1, page 63:
				503	*
				504	* "This field (Surface Vertical Aligment) must be set to VALIGN_4
				505	* for all tiled Y Render Target surfaces."
				506	*
				507	* "VALIGN_4 is not supported for surface format R32G32B32_FLOAT."
				508	*/
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	509	if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7) && layout->block_size == 12)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	510	valid_tilings &= ~LAYOUT_TILING_Y;
				511	}
				512
				513	/* no conflicting binding flags */
				514	assert(valid_tilings);
				515
				516	return valid_tilings;
				517	}
				518
				519	static void
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	520	layout_init_tiling(struct intel_layout *layout,
				521	struct intel_layout_params *params)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	522	{
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	523	const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	524	unsigned valid_tilings = layout_get_valid_tilings(layout, params);
				525
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	526	/* no hardware support for W-tile */
				527	if (valid_tilings & LAYOUT_TILING_W)
				528	valid_tilings = (valid_tilings & ~LAYOUT_TILING_W) \| LAYOUT_TILING_NONE;
				529
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	530	layout->valid_tilings = valid_tilings;
				531
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	532	if (info->usage & (XGL_IMAGE_USAGE_COLOR_ATTACHMENT_BIT \|
				533	XGL_IMAGE_USAGE_SHADER_ACCESS_READ_BIT)) {
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	534	/*
				535	* heuristically set a minimum width/height for enabling tiling
				536	*/
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	537	if (layout->width0 < 64 && (valid_tilings & ~LAYOUT_TILING_X))
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	538	valid_tilings &= ~LAYOUT_TILING_X;
				539
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	540	if ((layout->width0 < 32 \|\| layout->height0 < 16) &&
				541	(layout->width0 < 16 \|\| layout->height0 < 32) &&
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	542	(valid_tilings & ~LAYOUT_TILING_Y))
				543	valid_tilings &= ~LAYOUT_TILING_Y;
				544	} else {
				545	/* force linear if we are not sure where the texture is bound to */
				546	if (valid_tilings & LAYOUT_TILING_NONE)
				547	valid_tilings &= LAYOUT_TILING_NONE;
				548	}
				549
				550	/* prefer tiled over linear */
				551	if (valid_tilings & LAYOUT_TILING_Y)
				552	layout->tiling = INTEL_TILING_Y;
				553	else if (valid_tilings & LAYOUT_TILING_X)
				554	layout->tiling = INTEL_TILING_X;
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	555	else
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	556	layout->tiling = INTEL_TILING_NONE;
				557	}
				558
				559	static void
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	560	layout_init_walk_gen7(struct intel_layout *layout,
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	561	struct intel_layout_params *params)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	562	{
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	563	const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	564
				565	/*
				566	* It is not explicitly states, but render targets are expected to be
				567	* UMS/CMS (samples non-interleaved) and depth/stencil buffers are expected
				568	* to be IMS (samples interleaved).
				569	*
				570	* See "Multisampled Surface Storage Format" field of SURFACE_STATE.
				571	*/
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	572	if (info->usage & XGL_IMAGE_USAGE_DEPTH_STENCIL_BIT) {
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	573	/*
				574	* From the Ivy Bridge PRM, volume 1 part 1, page 111:
				575	*
				576	* "note that the depth buffer and stencil buffer have an implied
				577	* value of ARYSPC_FULL"
				578	*/
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	579	layout->walk = (info->imageType == XGL_IMAGE_3D) ?
				580	INTEL_LAYOUT_WALK_3D : INTEL_LAYOUT_WALK_LAYER;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	581
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	582	layout->interleaved_samples = true;
				583	} else {
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	584	/*
				585	* From the Ivy Bridge PRM, volume 4 part 1, page 66:
				586	*
				587	* "If Multisampled Surface Storage Format is MSFMT_MSS and Number
				588	* of Multisamples is not MULTISAMPLECOUNT_1, this field (Surface
				589	* Array Spacing) must be set to ARYSPC_LOD0."
				590	*
				591	* As multisampled resources are not mipmapped, we never use
				592	* ARYSPC_FULL for them.
				593	*/
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	594	if (info->samples > 1)
				595	assert(info->mipLevels == 1);
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	596
				597	layout->walk =
				598	(info->imageType == XGL_IMAGE_3D) ? INTEL_LAYOUT_WALK_3D :
				599	(info->mipLevels > 1) ? INTEL_LAYOUT_WALK_LAYER :
				600	INTEL_LAYOUT_WALK_LOD;
				601
				602	layout->interleaved_samples = false;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	603	}
				604	}
				605
				606	static void
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	607	layout_init_walk_gen6(struct intel_layout *layout,
				608	struct intel_layout_params *params)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	609	{
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	610	/*
				611	* From the Sandy Bridge PRM, volume 1 part 1, page 115:
				612	*
				613	* "The separate stencil buffer does not support mip mapping, thus the
				614	* storage for LODs other than LOD 0 is not needed. The following
				615	* QPitch equation applies only to the separate stencil buffer:
				616	*
				617	* QPitch = h_0"
				618	*
				619	* GEN6 does not support compact spacing otherwise.
				620	*/
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	621	layout->walk =
				622	(params->info->imageType == XGL_IMAGE_3D) ? INTEL_LAYOUT_WALK_3D :
				623	intel_format_is_stencil(params->gpu, layout->format) ? INTEL_LAYOUT_WALK_LOD :
				624	INTEL_LAYOUT_WALK_LAYER;
				625
				626	/* GEN6 supports only interleaved samples */
				627	layout->interleaved_samples = true;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	628	}
				629
				630	static void
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	631	layout_init_walk(struct intel_layout *layout,
				632	struct intel_layout_params *params)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	633	{
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	634	if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7))
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	635	layout_init_walk_gen7(layout, params);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	636	else
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	637	layout_init_walk_gen6(layout, params);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	638	}
				639
				640	static void
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	641	layout_init_size_and_format(struct intel_layout *layout,
				642	struct intel_layout_params *params)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	643	{
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	644	const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	645	XGL_FORMAT format = info->format;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	646	bool require_separate_stencil;
				647
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	648	layout->width0 = info->extent.width;
				649	layout->height0 = info->extent.height;
				650
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	651	/*
				652	* From the Sandy Bridge PRM, volume 2 part 1, page 317:
				653	*
				654	* "This field (Separate Stencil Buffer Enable) must be set to the same
				655	* value (enabled or disabled) as Hierarchical Depth Buffer Enable."
				656	*
				657	* GEN7+ requires separate stencil buffers.
				658	*/
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	659	if (info->usage & XGL_IMAGE_USAGE_DEPTH_STENCIL_BIT) {
				660	if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7))
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	661	require_separate_stencil = true;
				662	else
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	663	require_separate_stencil = (layout->aux == INTEL_LAYOUT_AUX_HIZ);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	664	}
				665
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	666	if (format.numericFormat == XGL_NUM_FMT_DS) {
				667	switch (format.channelFormat) {
				668	case XGL_CH_FMT_R32G8:
				669	if (require_separate_stencil) {
				670	format.channelFormat = XGL_CH_FMT_R32;
				671	layout->separate_stencil = true;
				672	}
				673	break;
				674	default:
				675	break;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	676	}
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	677	}
				678
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	679	layout->format = format;
Chia-I Wu	1bf06df	2014-08-16 12:33:13 +0800	[diff] [blame]	680	layout->block_width = icd_format_get_block_width(format);
				681	layout->block_height = layout->block_width;
				682	layout->block_size = icd_format_get_size(format);
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	683
Chia-I Wu	1bf06df	2014-08-16 12:33:13 +0800	[diff] [blame]	684	params->compressed = icd_format_is_compressed(format);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	685	}
				686
				687	static bool
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	688	layout_want_mcs(struct intel_layout *layout,
				689	struct intel_layout_params *params)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	690	{
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	691	const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	692	bool want_mcs = false;
				693
				694	/* MCS is for RT on GEN7+ */
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	695	if (intel_gpu_gen(params->gpu) < INTEL_GEN(7))
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	696	return false;
				697
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	698	if (info->imageType != XGL_IMAGE_2D \|\|
				699	!(info->usage & XGL_IMAGE_USAGE_COLOR_ATTACHMENT_BIT))
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	700	return false;
				701
				702	/*
				703	* From the Ivy Bridge PRM, volume 4 part 1, page 77:
				704	*
				705	* "For Render Target and Sampling Engine Surfaces:If the surface is
				706	* multisampled (Number of Multisamples any value other than
				707	* MULTISAMPLECOUNT_1), this field (MCS Enable) must be enabled."
				708	*
				709	* "This field must be set to 0 for all SINT MSRTs when all RT channels
				710	* are not written"
				711	*/
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	712	if (info->samples > 1 && !layout->interleaved_samples &&
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	713	!icd_format_is_int(info->format)) {
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	714	want_mcs = true;
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	715	} else if (info->samples <= 1) {
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	716	/*
				717	* From the Ivy Bridge PRM, volume 2 part 1, page 326:
				718	*
				719	* "When MCS is buffer is used for color clear of non-multisampler
				720	* render target, the following restrictions apply.
				721	* - Support is limited to tiled render targets.
				722	* - Support is for non-mip-mapped and non-array surface types
				723	* only.
				724	* - Clear is supported only on the full RT; i.e., no partial clear
				725	* or overlapping clears.
				726	* - MCS buffer for non-MSRT is supported only for RT formats
				727	* 32bpp, 64bpp and 128bpp.
				728	* ..."
				729	*/
				730	if (layout->tiling != INTEL_TILING_NONE &&
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	731	info->mipLevels == 1 && info->arraySize == 1) {
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	732	switch (layout->block_size) {
				733	case 4:
				734	case 8:
				735	case 16:
				736	want_mcs = true;
				737	break;
				738	default:
				739	break;
				740	}
				741	}
				742	}
				743
				744	return want_mcs;
				745	}
				746
				747	static bool
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	748	layout_want_hiz(const struct intel_layout *layout,
				749	const struct intel_layout_params *params)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	750	{
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	751	const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	752
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	753	if (!(info->usage & XGL_IMAGE_USAGE_DEPTH_STENCIL_BIT))
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	754	return false;
				755
Chia-I Wu	fb24026	2014-08-16 13:26:06 +0800	[diff] [blame]	756	if (!intel_format_is_depth(params->gpu, info->format))
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	757	return false;
				758
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	759	/*
				760	* As can be seen in layout_calculate_hiz_size(), HiZ may not be enabled
				761	* for every level. This is generally fine except on GEN6, where HiZ and
				762	* separate stencil are enabled and disabled at the same time. When the
				763	* format is PIPE_FORMAT_Z32_FLOAT_S8X24_UINT, enabling and disabling HiZ
				764	* can result in incompatible formats.
				765	*/
				766	if (intel_gpu_gen(params->gpu) == INTEL_GEN(6) &&
				767	info->format.channelFormat == XGL_CH_FMT_R32G8 &&
				768	info->mipLevels > 1)
				769	return false;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	770
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	771	return true;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	772	}
				773
				774	static void
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	775	layout_init_aux(struct intel_layout *layout,
				776	struct intel_layout_params *params)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	777	{
				778	if (layout_want_hiz(layout, params))
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	779	layout->aux = INTEL_LAYOUT_AUX_HIZ;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	780	else if (layout_want_mcs(layout, params))
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	781	layout->aux = INTEL_LAYOUT_AUX_MCS;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	782	}
				783
				784	static void
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	785	layout_align(struct intel_layout layout, struct intel_layout_params params)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	786	{
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	787	const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	788	int align_w = 1, align_h = 1, pad_h = 0;
				789
				790	/*
				791	* From the Sandy Bridge PRM, volume 1 part 1, page 118:
				792	*
				793	* "To determine the necessary padding on the bottom and right side of
				794	* the surface, refer to the table in Section 7.18.3.4 for the i and j
				795	* parameters for the surface format in use. The surface must then be
				796	* extended to the next multiple of the alignment unit size in each
				797	* dimension, and all texels contained in this extended surface must
				798	* have valid GTT entries."
				799	*
				800	* "For cube surfaces, an additional two rows of padding are required
				801	* at the bottom of the surface. This must be ensured regardless of
				802	* whether the surface is stored tiled or linear. This is due to the
				803	* potential rotation of cache line orientation from memory to cache."
				804	*
				805	* "For compressed textures (BC* and FXT1 surface formats), padding at
				806	* the bottom of the surface is to an even compressed row, which is
				807	* equal to a multiple of 8 uncompressed texel rows. Thus, for padding
				808	* purposes, these surfaces behave as if j = 8 only for surface
				809	* padding purposes. The value of 4 for j still applies for mip level
				810	* alignment and QPitch calculation."
				811	*/
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	812	if (info->usage & XGL_IMAGE_USAGE_SHADER_ACCESS_READ_BIT) {
				813	if (align_w < layout->align_i)
				814	align_w = layout->align_i;
				815	if (align_h < layout->align_j)
				816	align_h = layout->align_j;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	817
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	818	/* in case it is used as a cube */
				819	if (info->imageType == XGL_IMAGE_2D)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	820	pad_h += 2;
				821
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	822	if (params->compressed && align_h < layout->align_j * 2)
				823	align_h = layout->align_j * 2;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	824	}
				825
				826	/*
				827	* From the Sandy Bridge PRM, volume 1 part 1, page 118:
				828	*
				829	* "If the surface contains an odd number of rows of data, a final row
				830	* below the surface must be allocated."
				831	*/
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	832	if ((info->usage & XGL_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) && align_h < 2)
				833	align_h = 2;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	834
				835	/*
				836	* Depth Buffer Clear/Resolve works in 8x4 sample blocks. In
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	837	* intel_texture_can_enable_hiz(), we always return true for the first slice.
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	838	* To avoid out-of-bound access, we have to pad.
				839	*/
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	840	if (layout->aux == INTEL_LAYOUT_AUX_HIZ &&
				841	info->mipLevels == 1 &&
				842	info->arraySize == 1 &&
				843	info->extent.depth == 1) {
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	844	if (align_w < 8)
				845	align_w = 8;
				846	if (align_h < 4)
				847	align_h = 4;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	848	}
				849
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	850	params->max_x = u_align(params->max_x, align_w);
				851	params->max_y = u_align(params->max_y + pad_h, align_h);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	852	}
				853
				854	/* note that this may force the texture to be linear */
				855	static void
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	856	layout_calculate_bo_size(struct intel_layout *layout,
				857	struct intel_layout_params *params)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	858	{
				859	assert(params->max_x % layout->block_width == 0);
				860	assert(params->max_y % layout->block_height == 0);
				861	assert(layout->layer_height % layout->block_height == 0);
				862
				863	layout->bo_stride =
				864	(params->max_x / layout->block_width) * layout->block_size;
				865	layout->bo_height = params->max_y / layout->block_height;
				866
				867	while (true) {
				868	unsigned w = layout->bo_stride, h = layout->bo_height;
				869	unsigned align_w, align_h;
				870
				871	/*
				872	* From the Haswell PRM, volume 5, page 163:
				873	*
				874	* "For linear surfaces, additional padding of 64 bytes is required
				875	* at the bottom of the surface. This is in addition to the padding
				876	* required above."
				877	*/
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	878	if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7.5) &&
				879	(params->info->usage & XGL_IMAGE_USAGE_SHADER_ACCESS_READ_BIT) &&
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	880	layout->tiling == INTEL_TILING_NONE) {
				881	layout->bo_height +=
				882	(64 + layout->bo_stride - 1) / layout->bo_stride;
				883	}
				884
				885	/*
				886	* From the Sandy Bridge PRM, volume 4 part 1, page 81:
				887	*
				888	* "- For linear render target surfaces, the pitch must be a
				889	* multiple of the element size for non-YUV surface formats.
				890	* Pitch must be a multiple of 2 * element size for YUV surface
				891	* formats.
				892	* - For other linear surfaces, the pitch can be any multiple of
				893	* bytes.
				894	* - For tiled surfaces, the pitch must be a multiple of the tile
				895	* width."
				896	*
				897	* Different requirements may exist when the bo is used in different
				898	* places, but our alignments here should be good enough that we do not
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	899	* need to check layout->info->usage.
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	900	*/
				901	switch (layout->tiling) {
				902	case INTEL_TILING_X:
				903	align_w = 512;
				904	align_h = 8;
				905	break;
				906	case INTEL_TILING_Y:
				907	align_w = 128;
				908	align_h = 32;
				909	break;
				910	default:
Chia-I Wu	fb24026	2014-08-16 13:26:06 +0800	[diff] [blame]	911	if (intel_format_is_stencil(params->gpu, layout->format)) {
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	912	/*
				913	* From the Sandy Bridge PRM, volume 1 part 2, page 22:
				914	*
				915	* "A 4KB tile is subdivided into 8-high by 8-wide array of
				916	* Blocks for W-Major Tiles (W Tiles). Each Block is 8 rows by 8
				917	* bytes."
				918	*
				919	* Since we asked for INTEL_TILING_NONE instead of the non-existent
				920	* INTEL_TILING_W, we want to align to W tiles here.
				921	*/
				922	align_w = 64;
				923	align_h = 64;
				924	} else {
				925	/* some good enough values */
				926	align_w = 64;
				927	align_h = 2;
				928	}
				929	break;
				930	}
				931
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	932	w = u_align(w, align_w);
				933	h = u_align(h, align_h);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	934
				935	/* make sure the bo is mappable */
				936	if (layout->tiling != INTEL_TILING_NONE) {
				937	/*
				938	* Usually only the first 256MB of the GTT is mappable.
				939	*
				940	* See also how intel_context::max_gtt_map_object_size is calculated.
				941	*/
				942	const size_t mappable_gtt_size = 256 * 1024 * 1024;
				943
				944	/*
				945	* Be conservative. We may be able to switch from VALIGN_4 to
				946	* VALIGN_2 if the layout was Y-tiled, but let's keep it simple.
				947	*/
				948	if (mappable_gtt_size / w / 4 < h) {
				949	if (layout->valid_tilings & LAYOUT_TILING_NONE) {
				950	layout->tiling = INTEL_TILING_NONE;
				951	/* MCS support for non-MSRTs is limited to tiled RTs */
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	952	if (layout->aux == INTEL_LAYOUT_AUX_MCS &&
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	953	params->info->samples <= 1)
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	954	layout->aux = INTEL_LAYOUT_AUX_NONE;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	955
				956	continue;
				957	} else {
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	958	/* mapping will fail */
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	959	}
				960	}
				961	}
				962
				963	layout->bo_stride = w;
				964	layout->bo_height = h;
				965	break;
				966	}
				967	}
				968
				969	static void
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	970	layout_calculate_hiz_size(struct intel_layout *layout,
				971	struct intel_layout_params *params)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	972	{
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	973	const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	974	const unsigned hz_align_j = 8;
				975	enum intel_layout_walk_type hz_walk;
				976	unsigned hz_width, hz_height, lv;
				977	unsigned hz_clear_w, hz_clear_h;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	978
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	979	assert(layout->aux == INTEL_LAYOUT_AUX_HIZ);
				980
				981	assert(layout->walk == INTEL_LAYOUT_WALK_LAYER \|\|
				982	layout->walk == INTEL_LAYOUT_WALK_3D);
				983
				984	/*
				985	* From the Sandy Bridge PRM, volume 2 part 1, page 312:
				986	*
				987	* "The hierarchical depth buffer does not support the LOD field, it is
				988	* assumed by hardware to be zero. A separate hierarachical depth
				989	* buffer is required for each LOD used, and the corresponding
				990	* buffer's state delivered to hardware each time a new depth buffer
				991	* state with modified LOD is delivered."
				992	*
				993	* We will put all LODs in a single bo with INTEL_LAYOUT_WALK_LOD.
				994	*/
				995	if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7))
				996	hz_walk = layout->walk;
				997	else
				998	hz_walk = INTEL_LAYOUT_WALK_LOD;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	999
				1000	/*
				1001	* See the Sandy Bridge PRM, volume 2 part 1, page 312, and the Ivy Bridge
				1002	* PRM, volume 2 part 1, page 312-313.
				1003	*
				1004	* It seems HiZ buffer is aligned to 8x8, with every two rows packed into a
				1005	* memory row.
				1006	*/
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	1007	switch (hz_walk) {
				1008	case INTEL_LAYOUT_WALK_LOD:
				1009	{
				1010	unsigned lod_tx[INTEL_LAYOUT_MAX_LEVELS];
				1011	unsigned lod_ty[INTEL_LAYOUT_MAX_LEVELS];
				1012	unsigned cur_tx, cur_ty;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1013
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	1014	/* figure out the tile offsets of LODs */
				1015	hz_width = 0;
				1016	hz_height = 0;
				1017	cur_tx = 0;
				1018	cur_ty = 0;
				1019	for (lv = 0; lv < info->mipLevels; lv++) {
				1020	unsigned tw, th;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1021
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	1022	lod_tx[lv] = cur_tx;
				1023	lod_ty[lv] = cur_ty;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1024
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	1025	tw = u_align(layout->lods[lv].slice_width, 16);
				1026	th = u_align(layout->lods[lv].slice_height, hz_align_j) *
				1027	info->arraySize / 2;
				1028	/* convert to Y-tiles */
				1029	tw = u_align(tw, 128) / 128;
				1030	th = u_align(th, 32) / 32;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1031
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	1032	if (hz_width < cur_tx + tw)
				1033	hz_width = cur_tx + tw;
				1034	if (hz_height < cur_ty + th)
				1035	hz_height = cur_ty + th;
				1036
				1037	if (lv == 1)
				1038	cur_tx += tw;
				1039	else
				1040	cur_ty += th;
				1041	}
				1042
				1043	/* convert tile offsets to memory offsets */
				1044	for (lv = 0; lv < info->mipLevels; lv++) {
				1045	layout->aux_offsets[lv] =
				1046	(lod_ty[lv] * hz_width + lod_tx[lv]) * 4096;
				1047	}
				1048	hz_width *= 128;
				1049	hz_height *= 32;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1050	}
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	1051	break;
				1052	case INTEL_LAYOUT_WALK_LAYER:
				1053	{
				1054	const unsigned h0 = u_align(params->h0, hz_align_j);
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	1055	const unsigned h1 = u_align(params->h1, hz_align_j);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1056	const unsigned htail =
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	1057	((intel_gpu_gen(params->gpu) >= INTEL_GEN(7)) ? 12 : 11) * hz_align_j;
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	1058	const unsigned hz_qpitch = h0 + h1 + htail;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1059
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	1060	hz_width = u_align(layout->lods[0].slice_width, 16);
				1061
				1062	hz_height = hz_qpitch * info->arraySize / 2;
				1063	if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7))
				1064	hz_height = u_align(hz_height, 8);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1065	}
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	1066	break;
				1067	case INTEL_LAYOUT_WALK_3D:
				1068	hz_width = u_align(layout->lods[0].slice_width, 16);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1069
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	1070	hz_height = 0;
				1071	for (lv = 0; lv < info->mipLevels; lv++) {
				1072	const unsigned h = u_align(layout->lods[lv].slice_height, hz_align_j);
				1073	/* according to the formula, slices are packed together vertically */
				1074	hz_height += h * u_minify(info->extent.depth, lv);
				1075	}
				1076	hz_height /= 2;
				1077	break;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1078	}
				1079
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	1080	/*
				1081	* In hiz_align_fb(), we will align the LODs to 8x4 sample blocks.
				1082	* Experiments on Haswell show that aligning the RECTLIST primitive and
				1083	* 3DSTATE_DRAWING_RECTANGLE alone are not enough. The LOD sizes must be
				1084	* aligned.
				1085	*/
				1086	hz_clear_w = 8;
				1087	hz_clear_h = 4;
				1088	switch (info->samples) {
				1089	case 0:
				1090	case 1:
				1091	default:
				1092	break;
				1093	case 2:
				1094	hz_clear_w /= 2;
				1095	break;
				1096	case 4:
				1097	hz_clear_w /= 2;
				1098	hz_clear_h /= 2;
				1099	break;
				1100	case 8:
				1101	hz_clear_w /= 4;
				1102	hz_clear_h /= 2;
				1103	break;
				1104	case 16:
				1105	hz_clear_w /= 4;
				1106	hz_clear_h /= 4;
				1107	break;
				1108	}
				1109
				1110	for (lv = 0; lv < info->mipLevels; lv++) {
				1111	if (u_minify(layout->width0, lv) % hz_clear_w \|\|
				1112	u_minify(layout->height0, lv) % hz_clear_h)
				1113	break;
				1114	layout->aux_enables \|= 1 << lv;
				1115	}
				1116
				1117	/* we padded to allow this in layout_align() */
				1118	if (info->mipLevels == 1 && info->arraySize == 1 && info->extent.depth == 1)
				1119	layout->aux_enables \|= 0x1;
				1120
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1121	/* align to Y-tile */
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	1122	layout->aux_stride = u_align(hz_width, 128);
				1123	layout->aux_height = u_align(hz_height, 32);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1124	}
				1125
				1126	static void
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	1127	layout_calculate_mcs_size(struct intel_layout *layout,
				1128	struct intel_layout_params *params)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1129	{
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	1130	const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1131	int mcs_width, mcs_height, mcs_cpp;
				1132	int downscale_x, downscale_y;
				1133
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	1134	assert(layout->aux == INTEL_LAYOUT_AUX_MCS);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1135
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	1136	if (info->samples > 1) {
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1137	/*
				1138	* From the Ivy Bridge PRM, volume 2 part 1, page 326, the clear
				1139	* rectangle is scaled down by 8x2 for 4X MSAA and 2x2 for 8X MSAA. The
				1140	* need of scale down could be that the clear rectangle is used to clear
				1141	* the MCS instead of the RT.
				1142	*
				1143	* For 8X MSAA, we need 32 bits in MCS for every pixel in the RT. The
				1144	* 2x2 factor could come from that the hardware writes 128 bits (an
				1145	* OWord) at a time, and the OWord in MCS maps to a 2x2 pixel block in
				1146	* the RT. For 4X MSAA, we need 8 bits in MCS for every pixel in the
				1147	* RT. Similarly, we could reason that an OWord in 4X MCS maps to a 8x2
				1148	* pixel block in the RT.
				1149	*/
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	1150	switch (info->samples) {
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1151	case 2:
				1152	case 4:
				1153	downscale_x = 8;
				1154	downscale_y = 2;
				1155	mcs_cpp = 1;
				1156	break;
				1157	case 8:
				1158	downscale_x = 2;
				1159	downscale_y = 2;
				1160	mcs_cpp = 4;
				1161	break;
				1162	case 16:
				1163	downscale_x = 2;
				1164	downscale_y = 1;
				1165	mcs_cpp = 8;
				1166	break;
				1167	default:
				1168	assert(!"unsupported sample count");
				1169	return;
				1170	break;
				1171	}
				1172
				1173	/*
				1174	* It also appears that the 2x2 subspans generated by the scaled-down
				1175	* clear rectangle cannot be masked. The scale-down clear rectangle
				1176	* thus must be aligned to 2x2, and we need to pad.
				1177	*/
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	1178	mcs_width = u_align(layout->width0, downscale_x * 2);
				1179	mcs_height = u_align(layout->height0, downscale_y * 2);
				1180	} else {
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1181	/*
				1182	* From the Ivy Bridge PRM, volume 2 part 1, page 327:
				1183	*
				1184	* " Pixels Lines
				1185	* TiledY RT CL
				1186	* bpp
				1187	* 32 8 4
				1188	* 64 4 4
				1189	* 128 2 4
				1190	*
				1191	* TiledX RT CL
				1192	* bpp
				1193	* 32 16 2
				1194	* 64 8 2
				1195	* 128 4 2"
				1196	*
				1197	* This table and the two following tables define the RT alignments, the
				1198	* clear rectangle alignments, and the clear rectangle scale factors.
				1199	* Viewing the RT alignments as the sizes of 128-byte blocks, we can see
				1200	* that the clear rectangle alignments are 16x32 blocks, and the clear
				1201	* rectangle scale factors are 8x16 blocks.
				1202	*
				1203	* For non-MSAA RT, we need 1 bit in MCS for every 128-byte block in the
				1204	* RT. Similar to the MSAA cases, we can argue that an OWord maps to
				1205	* 8x16 blocks.
				1206	*
				1207	* One problem with this reasoning is that a Y-tile in MCS has 8x32
				1208	* OWords and maps to 64x512 128-byte blocks. This differs from i965,
				1209	* which says that a Y-tile maps to 128x256 blocks (\see
				1210	* intel_get_non_msrt_mcs_alignment). It does not really change
				1211	* anything except for the size of the allocated MCS. Let's see if we
				1212	* hit out-of-bound access.
				1213	*/
				1214	switch (layout->tiling) {
				1215	case INTEL_TILING_X:
				1216	downscale_x = 64 / layout->block_size;
				1217	downscale_y = 2;
				1218	break;
				1219	case INTEL_TILING_Y:
				1220	downscale_x = 32 / layout->block_size;
				1221	downscale_y = 4;
				1222	break;
				1223	default:
				1224	assert(!"unsupported tiling mode");
				1225	return;
				1226	break;
				1227	}
				1228
				1229	downscale_x *= 8;
				1230	downscale_y *= 16;
				1231
				1232	/*
				1233	* From the Haswell PRM, volume 7, page 652:
				1234	*
				1235	* "Clear rectangle must be aligned to two times the number of
				1236	* pixels in the table shown below due to 16X16 hashing across the
				1237	* slice."
				1238	*
				1239	* The scaled-down clear rectangle must be aligned to 4x4 instead of
				1240	* 2x2, and we need to pad.
				1241	*/
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	1242	mcs_width = u_align(layout->width0, downscale_x * 4) / downscale_x;
				1243	mcs_height = u_align(layout->height0, downscale_y * 4) / downscale_y;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1244	mcs_cpp = 16; /* an OWord */
				1245	}
				1246
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	1247	layout->aux_enables = (1 << info->mipLevels) - 1;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1248	/* align to Y-tile */
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	1249	layout->aux_stride = u_align(mcs_width * mcs_cpp, 128);
				1250	layout->aux_height = u_align(mcs_height, 32);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1251	}
				1252
				1253	/**
				1254	* Initialize the layout. Callers should zero-initialize \p layout first.
				1255	*/
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	1256	void intel_layout_init(struct intel_layout *layout,
				1257	const struct intel_dev *dev,
				1258	const XGL_IMAGE_CREATE_INFO *info)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1259	{
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	1260	struct intel_layout_params params;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1261
				1262	memset(&params, 0, sizeof(params));
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	1263	params.gpu = dev->gpu;
				1264	params.info = info;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1265
				1266	/* note that there are dependencies between these functions */
				1267	layout_init_aux(layout, &params);
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	1268	layout_init_size_and_format(layout, &params);
				1269	layout_init_walk(layout, &params);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1270	layout_init_tiling(layout, &params);
				1271	layout_init_alignments(layout, &params);
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	1272	layout_init_lods(layout, &params);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1273	layout_init_layer_height(layout, &params);
				1274
				1275	layout_align(layout, &params);
				1276	layout_calculate_bo_size(layout, &params);
				1277
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	1278	switch (layout->aux) {
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	1279	case INTEL_LAYOUT_AUX_HIZ:
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1280	layout_calculate_hiz_size(layout, &params);
				1281	break;
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	1282	case INTEL_LAYOUT_AUX_MCS:
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1283	layout_calculate_mcs_size(layout, &params);
				1284	break;
				1285	default:
				1286	break;
				1287	}
				1288	}
				1289
				1290	/**
				1291	* Update the tiling mode and bo stride (for imported resources).
				1292	*/
				1293	bool
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	1294	intel_layout_update_for_imported_bo(struct intel_layout *layout,
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	1295	enum intel_tiling_mode tiling,
				1296	unsigned bo_stride)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1297	{
				1298	if (!(layout->valid_tilings & (1 << tiling)))
				1299	return false;
				1300
				1301	if ((tiling == INTEL_TILING_X && bo_stride % 512) \|\|
				1302	(tiling == INTEL_TILING_Y && bo_stride % 128))
				1303	return false;
				1304
				1305	layout->tiling = tiling;
				1306	layout->bo_stride = bo_stride;
				1307
				1308	return true;
				1309	}