Blame - icd/intel/layout.c - platform/external/vulkan-validation-layers

blob: 71a0de9b9cab7aa3cc2c5662340581f25f449689 [file] [log] [blame]

Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1	/*
				2	* Mesa 3-D graphics library
				3	*
				4	* Copyright (C) 2014 LunarG, Inc.
				5	*
				6	* Permission is hereby granted, free of charge, to any person obtaining a
				7	* copy of this software and associated documentation files (the "Software"),
				8	* to deal in the Software without restriction, including without limitation
				9	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
				10	* and/or sell copies of the Software, and to permit persons to whom the
				11	* Software is furnished to do so, subject to the following conditions:
				12	*
				13	* The above copyright notice and this permission notice shall be included
				14	* in all copies or substantial portions of the Software.
				15	*
				16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
				19	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
				21	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
				22	* DEALINGS IN THE SOFTWARE.
				23	*
				24	* Authors:
				25	* Chia-I Wu <olv@lunarg.com>
				26	*/
				27
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	28	#include "dev.h"
				29	#include "gpu.h"
				30	#include "layout.h"
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	31
				32	enum {
				33	LAYOUT_TILING_NONE = 1 << INTEL_TILING_NONE,
				34	LAYOUT_TILING_X = 1 << INTEL_TILING_X,
				35	LAYOUT_TILING_Y = 1 << INTEL_TILING_Y,
				36	LAYOUT_TILING_W = 1 << (INTEL_TILING_Y + 1),
				37
				38	LAYOUT_TILING_ALL = (LAYOUT_TILING_NONE \|
				39	LAYOUT_TILING_X \|
				40	LAYOUT_TILING_Y \|
				41	LAYOUT_TILING_W)
				42	};
				43
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	44	struct intel_layout_params {
				45	const struct intel_gpu *gpu;
				46	const XGL_IMAGE_CREATE_INFO *info;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	47
				48	bool compressed;
				49
				50	unsigned h0, h1;
				51	unsigned max_x, max_y;
				52	};
				53
				54	static void
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	55	layout_get_slice_size(const struct intel_layout *layout,
				56	const struct intel_layout_params *params,
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	57	unsigned level, unsigned width, unsigned height)
				58	{
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	59	const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	60	unsigned w, h;
				61
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	62	w = u_minify(info->extent.width, level);
				63	h = u_minify(info->extent.height, level);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	64
				65	/*
				66	* From the Sandy Bridge PRM, volume 1 part 1, page 114:
				67	*
				68	* "The dimensions of the mip maps are first determined by applying the
				69	* sizing algorithm presented in Non-Power-of-Two Mipmaps above. Then,
				70	* if necessary, they are padded out to compression block boundaries."
				71	*/
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	72	w = u_align(w, layout->block_width);
				73	h = u_align(h, layout->block_height);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	74
				75	/*
				76	* From the Sandy Bridge PRM, volume 1 part 1, page 111:
				77	*
				78	* "If the surface is multisampled (4x), these values must be adjusted
				79	* as follows before proceeding:
				80	*
				81	* W_L = ceiling(W_L / 2) * 4
				82	* H_L = ceiling(H_L / 2) * 4"
				83	*
				84	* From the Ivy Bridge PRM, volume 1 part 1, page 108:
				85	*
				86	* "If the surface is multisampled and it is a depth or stencil surface
				87	* or Multisampled Surface StorageFormat in SURFACE_STATE is
				88	* MSFMT_DEPTH_STENCIL, W_L and H_L must be adjusted as follows before
				89	* proceeding:
				90	*
				91	* #samples W_L = H_L =
				92	* 2 ceiling(W_L / 2) * 4 HL [no adjustment]
				93	* 4 ceiling(W_L / 2) * 4 ceiling(H_L / 2) * 4
				94	* 8 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 4
				95	* 16 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 8"
				96	*
				97	* For interleaved samples (4x), where pixels
				98	*
				99	* (x, y ) (x+1, y )
				100	* (x, y+1) (x+1, y+1)
				101	*
				102	* would be is occupied by
				103	*
				104	* (x, y , si0) (x+1, y , si0) (x, y , si1) (x+1, y , si1)
				105	* (x, y+1, si0) (x+1, y+1, si0) (x, y+1, si1) (x+1, y+1, si1)
				106	* (x, y , si2) (x+1, y , si2) (x, y , si3) (x+1, y , si3)
				107	* (x, y+1, si2) (x+1, y+1, si2) (x, y+1, si3) (x+1, y+1, si3)
				108	*
				109	* Thus the need to
				110	*
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	111	* w = u_align(w, 2) * 2;
				112	* y = u_align(y, 2) * 2;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	113	*/
				114	if (layout->interleaved_samples) {
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	115	switch (info->samples) {
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	116	case 0:
				117	case 1:
				118	break;
				119	case 2:
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	120	w = u_align(w, 2) * 2;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	121	break;
				122	case 4:
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	123	w = u_align(w, 2) * 2;
				124	h = u_align(h, 2) * 2;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	125	break;
				126	case 8:
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	127	w = u_align(w, 2) * 4;
				128	h = u_align(h, 2) * 2;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	129	break;
				130	case 16:
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	131	w = u_align(w, 2) * 4;
				132	h = u_align(h, 2) * 4;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	133	break;
				134	default:
				135	assert(!"unsupported sample count");
				136	break;
				137	}
				138	}
				139
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	140	w = u_align(w, layout->align_i);
				141	h = u_align(h, layout->align_j);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	142
				143	*width = w;
				144	*height = h;
				145	}
				146
				147	static unsigned
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	148	layout_get_num_layers(const struct intel_layout *layout,
				149	const struct intel_layout_params *params)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	150	{
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	151	const XGL_IMAGE_CREATE_INFO *info = params->info;
				152	unsigned num_layers = info->arraySize;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	153
				154	/* samples of the same index are stored in a layer */
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	155	if (info->samples > 1 && !layout->interleaved_samples)
				156	num_layers *= info->samples;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	157
				158	return num_layers;
				159	}
				160
				161	static void
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	162	layout_init_layer_height(struct intel_layout *layout,
				163	struct intel_layout_params *params)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	164	{
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	165	const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	166	unsigned num_layers;
				167
				168	num_layers = layout_get_num_layers(layout, params);
				169	if (num_layers <= 1)
				170	return;
				171
				172	if (!layout->full_layers) {
				173	layout->layer_height = params->h0;
				174	params->max_y += params->h0 * (num_layers - 1);
				175	return;
				176	}
				177
				178	/*
				179	* From the Sandy Bridge PRM, volume 1 part 1, page 115:
				180	*
				181	* "The following equation is used for surface formats other than
				182	* compressed textures:
				183	*
				184	* QPitch = (h0 + h1 + 11j)"
				185	*
				186	* "The equation for compressed textures (BC* and FXT1 surface formats)
				187	* follows:
				188	*
				189	* QPitch = (h0 + h1 + 11j) / 4"
				190	*
				191	* "[DevSNB] Errata: Sampler MSAA Qpitch will be 4 greater than the
				192	* value calculated in the equation above, for every other odd Surface
				193	* Height starting from 1 i.e. 1,5,9,13"
				194	*
				195	* From the Ivy Bridge PRM, volume 1 part 1, page 111-112:
				196	*
				197	* "If Surface Array Spacing is set to ARYSPC_FULL (note that the depth
				198	* buffer and stencil buffer have an implied value of ARYSPC_FULL):
				199	*
				200	* QPitch = (h0 + h1 + 12j)
				201	* QPitch = (h0 + h1 + 12j) / 4 (compressed)
				202	*
				203	* (There are many typos or missing words here...)"
				204	*
				205	* To access the N-th slice, an offset of (Stride * QPitch * N) is added to
				206	* the base address. The PRM divides QPitch by 4 for compressed formats
				207	* because the block height for those formats are 4, and it wants QPitch to
				208	* mean the number of memory rows, as opposed to texel rows, between
				209	* slices. Since we use texel rows everywhere, we do not need to divide
				210	* QPitch by 4.
				211	*/
				212	layout->layer_height = params->h0 + params->h1 +
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	213	((intel_gpu_gen(params->gpu) >= INTEL_GEN(7)) ? 12 : 11) * layout->align_j;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	214
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	215	if (intel_gpu_gen(params->gpu) == INTEL_GEN(6) && info->samples > 1 &&
				216	info->extent.height % 4 == 1)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	217	layout->layer_height += 4;
				218
				219	params->max_y += layout->layer_height * (num_layers - 1);
				220	}
				221
				222	static void
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	223	layout_init_levels(struct intel_layout *layout,
				224	struct intel_layout_params *params)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	225	{
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	226	const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	227	unsigned cur_x, cur_y;
				228	unsigned lv;
				229
				230	cur_x = 0;
				231	cur_y = 0;
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	232	for (lv = 0; lv < info->mipLevels; lv++) {
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	233	unsigned level_w, level_h;
				234
				235	layout_get_slice_size(layout, params, lv, &level_w, &level_h);
				236
				237	layout->levels[lv].x = cur_x;
				238	layout->levels[lv].y = cur_y;
				239	layout->levels[lv].slice_width = level_w;
				240	layout->levels[lv].slice_height = level_h;
				241
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	242	if (info->imageType == XGL_IMAGE_3D) {
				243	const unsigned num_slices = u_minify(info->extent.depth, lv);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	244	const unsigned num_slices_per_row = 1 << lv;
				245	const unsigned num_rows =
				246	(num_slices + num_slices_per_row - 1) / num_slices_per_row;
				247
				248	level_w *= num_slices_per_row;
				249	level_h *= num_rows;
				250
				251	cur_y += level_h;
				252	} else {
				253	/* MIPLAYOUT_BELOW */
				254	if (lv == 1)
				255	cur_x += level_w;
				256	else
				257	cur_y += level_h;
				258	}
				259
				260	if (params->max_x < layout->levels[lv].x + level_w)
				261	params->max_x = layout->levels[lv].x + level_w;
				262	if (params->max_y < layout->levels[lv].y + level_h)
				263	params->max_y = layout->levels[lv].y + level_h;
				264	}
				265
				266	params->h0 = layout->levels[0].slice_height;
				267	if (layout->full_layers) {
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	268	if (info->mipLevels > 1)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	269	params->h1 = layout->levels[1].slice_height;
				270	else
				271	layout_get_slice_size(layout, params, 1, &cur_x, &params->h1);
				272	}
				273	}
				274
				275	static void
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	276	layout_init_alignments(struct intel_layout *layout,
				277	struct intel_layout_params *params)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	278	{
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	279	const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	280
				281	/*
				282	* From the Sandy Bridge PRM, volume 1 part 1, page 113:
				283	*
				284	* "surface format align_i align_j
				285	* YUV 4:2:2 formats 4 *see below
				286	* BC1-5 4 4
				287	* FXT1 8 4
				288	* all other formats 4 *see below"
				289	*
				290	* "- align_j = 4 for any depth buffer
				291	* - align_j = 2 for separate stencil buffer
				292	* - align_j = 4 for any render target surface is multisampled (4x)
				293	* - align_j = 4 for any render target surface with Surface Vertical
				294	* Alignment = VALIGN_4
				295	* - align_j = 2 for any render target surface with Surface Vertical
				296	* Alignment = VALIGN_2
				297	* - align_j = 2 for all other render target surface
				298	* - align_j = 2 for any sampling engine surface with Surface Vertical
				299	* Alignment = VALIGN_2
				300	* - align_j = 4 for any sampling engine surface with Surface Vertical
				301	* Alignment = VALIGN_4"
				302	*
				303	* From the Sandy Bridge PRM, volume 4 part 1, page 86:
				304	*
				305	* "This field (Surface Vertical Alignment) must be set to VALIGN_2 if
				306	* the Surface Format is 96 bits per element (BPE)."
				307	*
				308	* They can be rephrased as
				309	*
				310	* align_i align_j
				311	* compressed formats block width block height
				312	* PIPE_FORMAT_S8_UINT 4 2
				313	* other depth/stencil formats 4 4
				314	* 4x multisampled 4 4
				315	* bpp 96 4 2
				316	* others 4 2 or 4
				317	*/
				318
				319	/*
				320	* From the Ivy Bridge PRM, volume 1 part 1, page 110:
				321	*
				322	* "surface defined by surface format align_i align_j
				323	* 3DSTATE_DEPTH_BUFFER D16_UNORM 8 4
				324	* not D16_UNORM 4 4
				325	* 3DSTATE_STENCIL_BUFFER N/A 8 8
				326	* SURFACE_STATE BC, ETC, EAC* 4 4
				327	* FXT1 8 4
				328	* all others (set by SURFACE_STATE)"
				329	*
				330	* From the Ivy Bridge PRM, volume 4 part 1, page 63:
				331	*
				332	* "- This field (Surface Vertical Aligment) is intended to be set to
				333	* VALIGN_4 if the surface was rendered as a depth buffer, for a
				334	* multisampled (4x) render target, or for a multisampled (8x)
				335	* render target, since these surfaces support only alignment of 4.
				336	* - Use of VALIGN_4 for other surfaces is supported, but uses more
				337	* memory.
				338	* - This field must be set to VALIGN_4 for all tiled Y Render Target
				339	* surfaces.
				340	* - Value of 1 is not supported for format YCRCB_NORMAL (0x182),
				341	* YCRCB_SWAPUVY (0x183), YCRCB_SWAPUV (0x18f), YCRCB_SWAPY (0x190)
				342	* - If Number of Multisamples is not MULTISAMPLECOUNT_1, this field
				343	* must be set to VALIGN_4."
				344	* - VALIGN_4 is not supported for surface format R32G32B32_FLOAT."
				345	*
				346	* "- This field (Surface Horizontal Aligment) is intended to be set to
				347	* HALIGN_8 only if the surface was rendered as a depth buffer with
				348	* Z16 format or a stencil buffer, since these surfaces support only
				349	* alignment of 8.
				350	* - Use of HALIGN_8 for other surfaces is supported, but uses more
				351	* memory.
				352	* - This field must be set to HALIGN_4 if the Surface Format is BC*.
				353	* - This field must be set to HALIGN_8 if the Surface Format is
				354	* FXT1."
				355	*
				356	* They can be rephrased as
				357	*
				358	* align_i align_j
				359	* compressed formats block width block height
				360	* PIPE_FORMAT_Z16_UNORM 8 4
				361	* PIPE_FORMAT_S8_UINT 8 8
				362	* other depth/stencil formats 4 or 8 4
				363	* 2x or 4x multisampled 4 or 8 4
				364	* tiled Y 4 or 8 4 (if rt)
				365	* PIPE_FORMAT_R32G32B32_FLOAT 4 or 8 2
				366	* others 4 or 8 2 or 4
				367	*/
				368
				369	if (params->compressed) {
				370	/* this happens to be the case */
				371	layout->align_i = layout->block_width;
				372	layout->align_j = layout->block_height;
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	373	} else if (info->usage & XGL_IMAGE_USAGE_DEPTH_STENCIL_BIT) {
				374	if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7)) {
				375	switch (layout->format.channelFormat) {
				376	case XGL_CH_FMT_R16:
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	377	layout->align_i = 8;
				378	layout->align_j = 4;
				379	break;
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	380	case XGL_CH_FMT_R8:
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	381	layout->align_i = 8;
				382	layout->align_j = 8;
				383	break;
				384	default:
				385	layout->align_i = 4;
				386	layout->align_j = 4;
				387	break;
				388	}
				389	} else {
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	390	switch (layout->format.channelFormat) {
				391	case XGL_CH_FMT_R8:
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	392	layout->align_i = 4;
				393	layout->align_j = 2;
				394	break;
				395	default:
				396	layout->align_i = 4;
				397	layout->align_j = 4;
				398	break;
				399	}
				400	}
				401	} else {
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	402	const bool valign_4 = (info->samples > 1) \|\|
				403	(intel_gpu_gen(params->gpu) >= INTEL_GEN(7) &&
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	404	layout->tiling == INTEL_TILING_Y &&
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	405	(info->usage & XGL_IMAGE_USAGE_COLOR_ATTACHMENT_BIT));
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	406
				407	if (valign_4)
				408	assert(layout->block_size != 12);
				409
				410	layout->align_i = 4;
				411	layout->align_j = (valign_4) ? 4 : 2;
				412	}
				413
				414	/*
				415	* the fact that align i and j are multiples of block width and height
				416	* respectively is what makes the size of the bo a multiple of the block
				417	* size, slices start at block boundaries, and many of the computations
				418	* work.
				419	*/
				420	assert(layout->align_i % layout->block_width == 0);
				421	assert(layout->align_j % layout->block_height == 0);
				422
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	423	/* make sure u_align() works */
				424	assert(u_is_pow2(layout->align_i) &&
				425	u_is_pow2(layout->align_j));
				426	assert(u_is_pow2(layout->block_width) &&
				427	u_is_pow2(layout->block_height));
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	428	}
				429
				430	static unsigned
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	431	layout_get_valid_tilings(const struct intel_layout *layout,
				432	const struct intel_layout_params *params)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	433	{
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	434	const XGL_IMAGE_CREATE_INFO *info = params->info;
				435	const XGL_FORMAT format = layout->format;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	436	unsigned valid_tilings = LAYOUT_TILING_ALL;
				437
				438	/*
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	439	* From the Sandy Bridge PRM, volume 2 part 1, page 318:
				440	*
				441	* "[DevSNB+]: This field (Tiled Surface) must be set to TRUE. Linear
				442	* Depth Buffer is not supported."
				443	*
				444	* "The Depth Buffer, if tiled, must use Y-Major tiling."
				445	*
				446	* From the Sandy Bridge PRM, volume 1 part 2, page 22:
				447	*
				448	* "W-Major Tile Format is used for separate stencil."
				449	*/
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	450	if (info->usage & XGL_IMAGE_USAGE_DEPTH_STENCIL_BIT) {
				451	switch (format.channelFormat) {
				452	case XGL_CH_FMT_R8:
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	453	valid_tilings &= LAYOUT_TILING_W;
				454	break;
				455	default:
				456	valid_tilings &= LAYOUT_TILING_Y;
				457	break;
				458	}
				459	}
				460
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	461	if (info->usage & XGL_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) {
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	462	/*
				463	* From the Sandy Bridge PRM, volume 1 part 2, page 32:
				464	*
				465	* "NOTE: 128BPE Format Color buffer ( render target ) MUST be
				466	* either TileX or Linear."
				467	*/
				468	if (layout->block_size == 16)
				469	valid_tilings &= ~LAYOUT_TILING_Y;
				470
				471	/*
				472	* From the Ivy Bridge PRM, volume 4 part 1, page 63:
				473	*
				474	* "This field (Surface Vertical Aligment) must be set to VALIGN_4
				475	* for all tiled Y Render Target surfaces."
				476	*
				477	* "VALIGN_4 is not supported for surface format R32G32B32_FLOAT."
				478	*/
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	479	if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7) && layout->block_size == 12)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	480	valid_tilings &= ~LAYOUT_TILING_Y;
				481	}
				482
				483	/* no conflicting binding flags */
				484	assert(valid_tilings);
				485
				486	return valid_tilings;
				487	}
				488
				489	static void
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	490	layout_init_tiling(struct intel_layout *layout,
				491	struct intel_layout_params *params)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	492	{
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	493	const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	494	unsigned valid_tilings = layout_get_valid_tilings(layout, params);
				495
				496	layout->valid_tilings = valid_tilings;
				497
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	498	if (info->usage & (XGL_IMAGE_USAGE_COLOR_ATTACHMENT_BIT \| XGL_IMAGE_USAGE_SHADER_ACCESS_READ_BIT)) {
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	499	/*
				500	* heuristically set a minimum width/height for enabling tiling
				501	*/
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	502	if (info->extent.width < 64 && (valid_tilings & ~LAYOUT_TILING_X))
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	503	valid_tilings &= ~LAYOUT_TILING_X;
				504
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	505	if ((info->extent.width < 32 \|\| info->extent.height < 16) &&
				506	(info->extent.width < 16 \|\| info->extent.height < 32) &&
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	507	(valid_tilings & ~LAYOUT_TILING_Y))
				508	valid_tilings &= ~LAYOUT_TILING_Y;
				509	} else {
				510	/* force linear if we are not sure where the texture is bound to */
				511	if (valid_tilings & LAYOUT_TILING_NONE)
				512	valid_tilings &= LAYOUT_TILING_NONE;
				513	}
				514
				515	/* prefer tiled over linear */
				516	if (valid_tilings & LAYOUT_TILING_Y)
				517	layout->tiling = INTEL_TILING_Y;
				518	else if (valid_tilings & LAYOUT_TILING_X)
				519	layout->tiling = INTEL_TILING_X;
				520	else /* linear or W-tiled, which has no hardware support */
				521	layout->tiling = INTEL_TILING_NONE;
				522	}
				523
				524	static void
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	525	layout_init_arrangements_gen7(struct intel_layout *layout,
				526	struct intel_layout_params *params)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	527	{
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	528	const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	529
				530	/*
				531	* It is not explicitly states, but render targets are expected to be
				532	* UMS/CMS (samples non-interleaved) and depth/stencil buffers are expected
				533	* to be IMS (samples interleaved).
				534	*
				535	* See "Multisampled Surface Storage Format" field of SURFACE_STATE.
				536	*/
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	537	if (info->usage & XGL_IMAGE_USAGE_DEPTH_STENCIL_BIT) {
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	538	layout->interleaved_samples = true;
				539
				540	/*
				541	* From the Ivy Bridge PRM, volume 1 part 1, page 111:
				542	*
				543	* "note that the depth buffer and stencil buffer have an implied
				544	* value of ARYSPC_FULL"
				545	*/
				546	layout->full_layers = true;
				547	} else {
				548	layout->interleaved_samples = false;
				549
				550	/*
				551	* From the Ivy Bridge PRM, volume 4 part 1, page 66:
				552	*
				553	* "If Multisampled Surface Storage Format is MSFMT_MSS and Number
				554	* of Multisamples is not MULTISAMPLECOUNT_1, this field (Surface
				555	* Array Spacing) must be set to ARYSPC_LOD0."
				556	*
				557	* As multisampled resources are not mipmapped, we never use
				558	* ARYSPC_FULL for them.
				559	*/
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	560	if (info->samples > 1)
				561	assert(info->mipLevels == 1);
				562	layout->full_layers = (info->mipLevels > 1);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	563	}
				564	}
				565
				566	static void
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	567	layout_init_arrangements_gen6(struct intel_layout *layout,
				568	struct intel_layout_params *params)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	569	{
				570	/* GEN6 supports only interleaved samples */
				571	layout->interleaved_samples = true;
				572
				573	/*
				574	* From the Sandy Bridge PRM, volume 1 part 1, page 115:
				575	*
				576	* "The separate stencil buffer does not support mip mapping, thus the
				577	* storage for LODs other than LOD 0 is not needed. The following
				578	* QPitch equation applies only to the separate stencil buffer:
				579	*
				580	* QPitch = h_0"
				581	*
				582	* GEN6 does not support compact spacing otherwise.
				583	*/
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	584	layout->full_layers = !(layout->format.channelFormat == XGL_CH_FMT_R8 &&
				585	layout->format.numericFormat == XGL_NUM_FMT_DS);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	586	}
				587
				588	static void
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	589	layout_init_arrangements(struct intel_layout *layout,
				590	struct intel_layout_params *params)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	591	{
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	592	if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7))
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	593	layout_init_arrangements_gen7(layout, params);
				594	else
				595	layout_init_arrangements_gen6(layout, params);
				596
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	597	layout->is_2d = (params->info->imageType != XGL_IMAGE_3D);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	598	}
				599
				600	static void
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	601	layout_init_format(struct intel_layout *layout,
				602	struct intel_layout_params *params)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	603	{
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	604	const XGL_IMAGE_CREATE_INFO *info = params->info;
				605	XGL_FORMAT format = params->info->format;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	606	bool require_separate_stencil;
				607
				608	/*
				609	* From the Sandy Bridge PRM, volume 2 part 1, page 317:
				610	*
				611	* "This field (Separate Stencil Buffer Enable) must be set to the same
				612	* value (enabled or disabled) as Hierarchical Depth Buffer Enable."
				613	*
				614	* GEN7+ requires separate stencil buffers.
				615	*/
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	616	if (info->usage & XGL_IMAGE_USAGE_DEPTH_STENCIL_BIT) {
				617	if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7))
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	618	require_separate_stencil = true;
				619	else
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	620	require_separate_stencil =(layout->aux_type == INTEL_LAYOUT_AUX_HIZ);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	621	}
				622
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	623	if (format.numericFormat == XGL_NUM_FMT_DS) {
				624	switch (format.channelFormat) {
				625	case XGL_CH_FMT_R32G8:
				626	if (require_separate_stencil) {
				627	format.channelFormat = XGL_CH_FMT_R32;
				628	layout->separate_stencil = true;
				629	}
				630	break;
				631	default:
				632	break;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	633	}
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	634	}
				635
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	636	layout->format = format;
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	637
				638	layout->block_width = 1;
				639	layout->block_height = 1;
				640	layout->block_size = 1;
				641	params->compressed = false;
				642
				643	switch (format.channelFormat) {
				644	case XGL_CH_FMT_UNDEFINED:
				645	break;
				646	case XGL_CH_FMT_R4G4:
				647	layout->block_size = 1;
				648	break;
				649	case XGL_CH_FMT_R4G4B4A4:
				650	layout->block_size = 2;
				651	break;
				652	case XGL_CH_FMT_R5G6B5:
				653	case XGL_CH_FMT_B5G6R5:
				654	case XGL_CH_FMT_R5G5B5A1:
				655	layout->block_size = 2;
				656	break;
				657	case XGL_CH_FMT_R8:
				658	layout->block_size = 1;
				659	break;
				660	case XGL_CH_FMT_R8G8:
				661	layout->block_size = 2;
				662	break;
				663	case XGL_CH_FMT_R8G8B8A8:
				664	case XGL_CH_FMT_B8G8R8A8:
				665	case XGL_CH_FMT_R10G11B11:
				666	case XGL_CH_FMT_R11G11B10:
				667	case XGL_CH_FMT_R10G10B10A2:
				668	layout->block_size = 4;
				669	break;
				670	case XGL_CH_FMT_R16:
				671	layout->block_size = 2;
				672	break;
				673	case XGL_CH_FMT_R16G16:
				674	layout->block_size = 4;
				675	break;
				676	case XGL_CH_FMT_R16G16B16A16:
				677	layout->block_size = 8;
				678	break;
				679	case XGL_CH_FMT_R32:
				680	layout->block_size = 4;
				681	break;
				682	case XGL_CH_FMT_R32G32:
				683	layout->block_size = 8;
				684	break;
				685	case XGL_CH_FMT_R32G32B32:
				686	layout->block_size = 12;
				687	break;
				688	case XGL_CH_FMT_R32G32B32A32:
				689	layout->block_size = 16;
				690	break;
				691	case XGL_CH_FMT_R16G8:
				692	layout->block_size = 3;
				693	break;
				694	case XGL_CH_FMT_R32G8:
				695	layout->block_size = 5;
				696	break;
				697	case XGL_CH_FMT_R9G9B9E5:
				698	layout->block_size = 4;
				699	break;
				700	case XGL_CH_FMT_BC1:
				701	case XGL_CH_FMT_BC2:
				702	case XGL_CH_FMT_BC3:
				703	case XGL_CH_FMT_BC4:
				704	case XGL_CH_FMT_BC5:
				705	case XGL_CH_FMT_BC6U:
				706	case XGL_CH_FMT_BC6S:
				707	case XGL_CH_FMT_BC7:
				708	layout->block_width = 4;
				709	layout->block_height = 4;
				710	layout->block_size =
				711	(format.channelFormat == XGL_CH_FMT_BC1 \|\|
				712	format.channelFormat == XGL_CH_FMT_BC4) ? 8 : 16;
				713	params->compressed = true;
				714	break;
				715	default:
				716	assert(!"unknown format");
				717	break;
				718	}
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	719	}
				720
				721	static bool
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	722	layout_want_mcs(struct intel_layout *layout,
				723	struct intel_layout_params *params)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	724	{
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	725	const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	726	bool want_mcs = false;
				727
				728	/* MCS is for RT on GEN7+ */
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	729	if (intel_gpu_gen(params->gpu) < INTEL_GEN(7))
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	730	return false;
				731
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	732	if (info->imageType != XGL_IMAGE_2D \|\|
				733	!(info->usage & XGL_IMAGE_USAGE_COLOR_ATTACHMENT_BIT))
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	734	return false;
				735
				736	/*
				737	* From the Ivy Bridge PRM, volume 4 part 1, page 77:
				738	*
				739	* "For Render Target and Sampling Engine Surfaces:If the surface is
				740	* multisampled (Number of Multisamples any value other than
				741	* MULTISAMPLECOUNT_1), this field (MCS Enable) must be enabled."
				742	*
				743	* "This field must be set to 0 for all SINT MSRTs when all RT channels
				744	* are not written"
				745	*/
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	746	if (info->samples > 1 && !layout->interleaved_samples &&
				747	!(info->format.numericFormat == XGL_NUM_FMT_UINT \|\|
				748	info->format.numericFormat == XGL_NUM_FMT_SINT)) {
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	749	want_mcs = true;
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	750	} else if (info->samples <= 1) {
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	751	/*
				752	* From the Ivy Bridge PRM, volume 2 part 1, page 326:
				753	*
				754	* "When MCS is buffer is used for color clear of non-multisampler
				755	* render target, the following restrictions apply.
				756	* - Support is limited to tiled render targets.
				757	* - Support is for non-mip-mapped and non-array surface types
				758	* only.
				759	* - Clear is supported only on the full RT; i.e., no partial clear
				760	* or overlapping clears.
				761	* - MCS buffer for non-MSRT is supported only for RT formats
				762	* 32bpp, 64bpp and 128bpp.
				763	* ..."
				764	*/
				765	if (layout->tiling != INTEL_TILING_NONE &&
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	766	info->mipLevels == 1 && info->arraySize == 1) {
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	767	switch (layout->block_size) {
				768	case 4:
				769	case 8:
				770	case 16:
				771	want_mcs = true;
				772	break;
				773	default:
				774	break;
				775	}
				776	}
				777	}
				778
				779	return want_mcs;
				780	}
				781
				782	static bool
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	783	layout_want_hiz(const struct intel_layout *layout,
				784	const struct intel_layout_params *params)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	785	{
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	786	const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	787	bool want_hiz = false;
				788
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	789	if (!(info->usage & XGL_IMAGE_USAGE_DEPTH_STENCIL_BIT))
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	790	return false;
				791
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	792	if (info->format.channelFormat == XGL_CH_FMT_R8)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	793	return false;
				794
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	795	if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7)) {
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	796	want_hiz = true;
				797	} else {
				798	/*
				799	* From the Sandy Bridge PRM, volume 2 part 1, page 312:
				800	*
				801	* "The hierarchical depth buffer does not support the LOD field, it
				802	* is assumed by hardware to be zero. A separate hierarachical
				803	* depth buffer is required for each LOD used, and the
				804	* corresponding buffer's state delivered to hardware each time a
				805	* new depth buffer state with modified LOD is delivered."
				806	*
				807	* But we have a stronger requirement. Because of layer offsetting
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	808	* (check out the callers of intel_layout_get_slice_tile_offset()), we
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	809	* already have to require the texture to be non-mipmapped and
				810	* non-array.
				811	*/
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	812	if (info->mipLevels == 1 && info->arraySize == 1 &&
				813	info->extent.depth == 1)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	814	want_hiz = true;
				815	}
				816
				817	return want_hiz;
				818	}
				819
				820	static void
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	821	layout_init_aux(struct intel_layout *layout,
				822	struct intel_layout_params *params)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	823	{
				824	if (layout_want_hiz(layout, params))
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	825	layout->aux_type = INTEL_LAYOUT_AUX_HIZ;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	826	else if (layout_want_mcs(layout, params))
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	827	layout->aux_type = INTEL_LAYOUT_AUX_MCS;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	828	}
				829
				830	static void
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	831	layout_align(struct intel_layout layout, struct intel_layout_params params)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	832	{
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	833	const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	834	int align_w = 1, align_h = 1, pad_h = 0;
				835
				836	/*
				837	* From the Sandy Bridge PRM, volume 1 part 1, page 118:
				838	*
				839	* "To determine the necessary padding on the bottom and right side of
				840	* the surface, refer to the table in Section 7.18.3.4 for the i and j
				841	* parameters for the surface format in use. The surface must then be
				842	* extended to the next multiple of the alignment unit size in each
				843	* dimension, and all texels contained in this extended surface must
				844	* have valid GTT entries."
				845	*
				846	* "For cube surfaces, an additional two rows of padding are required
				847	* at the bottom of the surface. This must be ensured regardless of
				848	* whether the surface is stored tiled or linear. This is due to the
				849	* potential rotation of cache line orientation from memory to cache."
				850	*
				851	* "For compressed textures (BC* and FXT1 surface formats), padding at
				852	* the bottom of the surface is to an even compressed row, which is
				853	* equal to a multiple of 8 uncompressed texel rows. Thus, for padding
				854	* purposes, these surfaces behave as if j = 8 only for surface
				855	* padding purposes. The value of 4 for j still applies for mip level
				856	* alignment and QPitch calculation."
				857	*/
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	858	if (info->usage & XGL_IMAGE_USAGE_SHADER_ACCESS_READ_BIT) {
				859	if (align_w < layout->align_i)
				860	align_w = layout->align_i;
				861	if (align_h < layout->align_j)
				862	align_h = layout->align_j;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	863
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	864	/* in case it is used as a cube */
				865	if (info->imageType == XGL_IMAGE_2D)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	866	pad_h += 2;
				867
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	868	if (params->compressed && align_h < layout->align_j * 2)
				869	align_h = layout->align_j * 2;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	870	}
				871
				872	/*
				873	* From the Sandy Bridge PRM, volume 1 part 1, page 118:
				874	*
				875	* "If the surface contains an odd number of rows of data, a final row
				876	* below the surface must be allocated."
				877	*/
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	878	if ((info->usage & XGL_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) && align_h < 2)
				879	align_h = 2;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	880
				881	/*
				882	* Depth Buffer Clear/Resolve works in 8x4 sample blocks. In
				883	* ilo_texture_can_enable_hiz(), we always return true for the first slice.
				884	* To avoid out-of-bound access, we have to pad.
				885	*/
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	886	if (layout->aux_type == INTEL_LAYOUT_AUX_HIZ) {
				887	if (align_w < 8)
				888	align_w = 8;
				889	if (align_h < 4)
				890	align_h = 4;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	891	}
				892
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	893	params->max_x = u_align(params->max_x, align_w);
				894	params->max_y = u_align(params->max_y + pad_h, align_h);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	895	}
				896
				897	/* note that this may force the texture to be linear */
				898	static void
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	899	layout_calculate_bo_size(struct intel_layout *layout,
				900	struct intel_layout_params *params)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	901	{
				902	assert(params->max_x % layout->block_width == 0);
				903	assert(params->max_y % layout->block_height == 0);
				904	assert(layout->layer_height % layout->block_height == 0);
				905
				906	layout->bo_stride =
				907	(params->max_x / layout->block_width) * layout->block_size;
				908	layout->bo_height = params->max_y / layout->block_height;
				909
				910	while (true) {
				911	unsigned w = layout->bo_stride, h = layout->bo_height;
				912	unsigned align_w, align_h;
				913
				914	/*
				915	* From the Haswell PRM, volume 5, page 163:
				916	*
				917	* "For linear surfaces, additional padding of 64 bytes is required
				918	* at the bottom of the surface. This is in addition to the padding
				919	* required above."
				920	*/
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	921	if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7.5) &&
				922	(params->info->usage & XGL_IMAGE_USAGE_SHADER_ACCESS_READ_BIT) &&
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	923	layout->tiling == INTEL_TILING_NONE) {
				924	layout->bo_height +=
				925	(64 + layout->bo_stride - 1) / layout->bo_stride;
				926	}
				927
				928	/*
				929	* From the Sandy Bridge PRM, volume 4 part 1, page 81:
				930	*
				931	* "- For linear render target surfaces, the pitch must be a
				932	* multiple of the element size for non-YUV surface formats.
				933	* Pitch must be a multiple of 2 * element size for YUV surface
				934	* formats.
				935	* - For other linear surfaces, the pitch can be any multiple of
				936	* bytes.
				937	* - For tiled surfaces, the pitch must be a multiple of the tile
				938	* width."
				939	*
				940	* Different requirements may exist when the bo is used in different
				941	* places, but our alignments here should be good enough that we do not
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	942	* need to check layout->info->usage.
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	943	*/
				944	switch (layout->tiling) {
				945	case INTEL_TILING_X:
				946	align_w = 512;
				947	align_h = 8;
				948	break;
				949	case INTEL_TILING_Y:
				950	align_w = 128;
				951	align_h = 32;
				952	break;
				953	default:
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	954	if (layout->format.channelFormat == XGL_CH_FMT_R8 &&
				955	layout->format.numericFormat == XGL_NUM_FMT_DS) {
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	956	/*
				957	* From the Sandy Bridge PRM, volume 1 part 2, page 22:
				958	*
				959	* "A 4KB tile is subdivided into 8-high by 8-wide array of
				960	* Blocks for W-Major Tiles (W Tiles). Each Block is 8 rows by 8
				961	* bytes."
				962	*
				963	* Since we asked for INTEL_TILING_NONE instead of the non-existent
				964	* INTEL_TILING_W, we want to align to W tiles here.
				965	*/
				966	align_w = 64;
				967	align_h = 64;
				968	} else {
				969	/* some good enough values */
				970	align_w = 64;
				971	align_h = 2;
				972	}
				973	break;
				974	}
				975
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	976	w = u_align(w, align_w);
				977	h = u_align(h, align_h);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	978
				979	/* make sure the bo is mappable */
				980	if (layout->tiling != INTEL_TILING_NONE) {
				981	/*
				982	* Usually only the first 256MB of the GTT is mappable.
				983	*
				984	* See also how intel_context::max_gtt_map_object_size is calculated.
				985	*/
				986	const size_t mappable_gtt_size = 256 * 1024 * 1024;
				987
				988	/*
				989	* Be conservative. We may be able to switch from VALIGN_4 to
				990	* VALIGN_2 if the layout was Y-tiled, but let's keep it simple.
				991	*/
				992	if (mappable_gtt_size / w / 4 < h) {
				993	if (layout->valid_tilings & LAYOUT_TILING_NONE) {
				994	layout->tiling = INTEL_TILING_NONE;
				995	/* MCS support for non-MSRTs is limited to tiled RTs */
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	996	if (layout->aux_type == INTEL_LAYOUT_AUX_MCS &&
				997	params->info->samples <= 1)
				998	layout->aux_type = INTEL_LAYOUT_AUX_NONE;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	999
				1000	continue;
				1001	} else {
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	1002	/* mapping will fail */
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1003	}
				1004	}
				1005	}
				1006
				1007	layout->bo_stride = w;
				1008	layout->bo_height = h;
				1009	break;
				1010	}
				1011	}
				1012
				1013	static void
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	1014	layout_calculate_hiz_size(struct intel_layout *layout,
				1015	struct intel_layout_params *params)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1016	{
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	1017	const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1018	const int hz_align_j = 8;
				1019	int hz_width, hz_height;
				1020
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	1021	assert(layout->aux_type == INTEL_LAYOUT_AUX_HIZ);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1022
				1023	/*
				1024	* See the Sandy Bridge PRM, volume 2 part 1, page 312, and the Ivy Bridge
				1025	* PRM, volume 2 part 1, page 312-313.
				1026	*
				1027	* It seems HiZ buffer is aligned to 8x8, with every two rows packed into a
				1028	* memory row.
				1029	*/
				1030
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	1031	hz_width = u_align(layout->levels[0].slice_width, 16);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1032
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	1033	if (info->imageType == XGL_IMAGE_3D) {
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1034	unsigned lv;
				1035
				1036	hz_height = 0;
				1037
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	1038	for (lv = 0; lv < info->mipLevels; lv++) {
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1039	const unsigned h =
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	1040	u_align(layout->levels[lv].slice_height, hz_align_j);
				1041	hz_height += h * u_minify(info->extent.depth, lv);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1042	}
				1043
				1044	hz_height /= 2;
				1045	} else {
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	1046	const unsigned h0 = u_align(params->h0, hz_align_j);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1047	unsigned hz_qpitch = h0;
				1048
				1049	if (layout->full_layers) {
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	1050	const unsigned h1 = u_align(params->h1, hz_align_j);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1051	const unsigned htail =
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	1052	((intel_gpu_gen(params->gpu) >= INTEL_GEN(7)) ? 12 : 11) * hz_align_j;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1053
				1054	hz_qpitch += h1 + htail;
				1055	}
				1056
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	1057	hz_height = hz_qpitch * info->arraySize / 2;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1058
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	1059	if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7))
				1060	hz_height = u_align(hz_height, 8);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1061	}
				1062
				1063	/* align to Y-tile */
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	1064	layout->aux_stride = u_align(hz_width, 128);
				1065	layout->aux_height = u_align(hz_height, 32);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1066	}
				1067
				1068	static void
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	1069	layout_calculate_mcs_size(struct intel_layout *layout,
				1070	struct intel_layout_params *params)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1071	{
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	1072	const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1073	int mcs_width, mcs_height, mcs_cpp;
				1074	int downscale_x, downscale_y;
				1075
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	1076	assert(layout->aux_type == INTEL_LAYOUT_AUX_MCS);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1077
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	1078	if (info->samples > 1) {
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1079	/*
				1080	* From the Ivy Bridge PRM, volume 2 part 1, page 326, the clear
				1081	* rectangle is scaled down by 8x2 for 4X MSAA and 2x2 for 8X MSAA. The
				1082	* need of scale down could be that the clear rectangle is used to clear
				1083	* the MCS instead of the RT.
				1084	*
				1085	* For 8X MSAA, we need 32 bits in MCS for every pixel in the RT. The
				1086	* 2x2 factor could come from that the hardware writes 128 bits (an
				1087	* OWord) at a time, and the OWord in MCS maps to a 2x2 pixel block in
				1088	* the RT. For 4X MSAA, we need 8 bits in MCS for every pixel in the
				1089	* RT. Similarly, we could reason that an OWord in 4X MCS maps to a 8x2
				1090	* pixel block in the RT.
				1091	*/
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	1092	switch (info->samples) {
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1093	case 2:
				1094	case 4:
				1095	downscale_x = 8;
				1096	downscale_y = 2;
				1097	mcs_cpp = 1;
				1098	break;
				1099	case 8:
				1100	downscale_x = 2;
				1101	downscale_y = 2;
				1102	mcs_cpp = 4;
				1103	break;
				1104	case 16:
				1105	downscale_x = 2;
				1106	downscale_y = 1;
				1107	mcs_cpp = 8;
				1108	break;
				1109	default:
				1110	assert(!"unsupported sample count");
				1111	return;
				1112	break;
				1113	}
				1114
				1115	/*
				1116	* It also appears that the 2x2 subspans generated by the scaled-down
				1117	* clear rectangle cannot be masked. The scale-down clear rectangle
				1118	* thus must be aligned to 2x2, and we need to pad.
				1119	*/
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	1120	mcs_width = u_align(info->extent.width, downscale_x * 2);
				1121	mcs_height = u_align(info->extent.height, downscale_y * 2);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1122	}
				1123	else {
				1124	/*
				1125	* From the Ivy Bridge PRM, volume 2 part 1, page 327:
				1126	*
				1127	* " Pixels Lines
				1128	* TiledY RT CL
				1129	* bpp
				1130	* 32 8 4
				1131	* 64 4 4
				1132	* 128 2 4
				1133	*
				1134	* TiledX RT CL
				1135	* bpp
				1136	* 32 16 2
				1137	* 64 8 2
				1138	* 128 4 2"
				1139	*
				1140	* This table and the two following tables define the RT alignments, the
				1141	* clear rectangle alignments, and the clear rectangle scale factors.
				1142	* Viewing the RT alignments as the sizes of 128-byte blocks, we can see
				1143	* that the clear rectangle alignments are 16x32 blocks, and the clear
				1144	* rectangle scale factors are 8x16 blocks.
				1145	*
				1146	* For non-MSAA RT, we need 1 bit in MCS for every 128-byte block in the
				1147	* RT. Similar to the MSAA cases, we can argue that an OWord maps to
				1148	* 8x16 blocks.
				1149	*
				1150	* One problem with this reasoning is that a Y-tile in MCS has 8x32
				1151	* OWords and maps to 64x512 128-byte blocks. This differs from i965,
				1152	* which says that a Y-tile maps to 128x256 blocks (\see
				1153	* intel_get_non_msrt_mcs_alignment). It does not really change
				1154	* anything except for the size of the allocated MCS. Let's see if we
				1155	* hit out-of-bound access.
				1156	*/
				1157	switch (layout->tiling) {
				1158	case INTEL_TILING_X:
				1159	downscale_x = 64 / layout->block_size;
				1160	downscale_y = 2;
				1161	break;
				1162	case INTEL_TILING_Y:
				1163	downscale_x = 32 / layout->block_size;
				1164	downscale_y = 4;
				1165	break;
				1166	default:
				1167	assert(!"unsupported tiling mode");
				1168	return;
				1169	break;
				1170	}
				1171
				1172	downscale_x *= 8;
				1173	downscale_y *= 16;
				1174
				1175	/*
				1176	* From the Haswell PRM, volume 7, page 652:
				1177	*
				1178	* "Clear rectangle must be aligned to two times the number of
				1179	* pixels in the table shown below due to 16X16 hashing across the
				1180	* slice."
				1181	*
				1182	* The scaled-down clear rectangle must be aligned to 4x4 instead of
				1183	* 2x2, and we need to pad.
				1184	*/
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	1185	mcs_width = u_align(info->extent.width, downscale_x * 4) / downscale_x;
				1186	mcs_height = u_align(info->extent.height, downscale_y * 4) / downscale_y;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1187	mcs_cpp = 16; /* an OWord */
				1188	}
				1189
				1190	/* align to Y-tile */
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	1191	layout->aux_stride = u_align(mcs_width * mcs_cpp, 128);
				1192	layout->aux_height = u_align(mcs_height, 32);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1193	}
				1194
				1195	/**
				1196	* Initialize the layout. Callers should zero-initialize \p layout first.
				1197	*/
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	1198	void intel_layout_init(struct intel_layout *layout,
				1199	const struct intel_dev *dev,
				1200	const XGL_IMAGE_CREATE_INFO *info)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1201	{
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	1202	struct intel_layout_params params;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1203
				1204	memset(&params, 0, sizeof(params));
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	1205	params.gpu = dev->gpu;
				1206	params.info = info;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1207
				1208	/* note that there are dependencies between these functions */
				1209	layout_init_aux(layout, &params);
				1210	layout_init_format(layout, &params);
				1211	layout_init_arrangements(layout, &params);
				1212	layout_init_tiling(layout, &params);
				1213	layout_init_alignments(layout, &params);
				1214	layout_init_levels(layout, &params);
				1215	layout_init_layer_height(layout, &params);
				1216
				1217	layout_align(layout, &params);
				1218	layout_calculate_bo_size(layout, &params);
				1219
				1220	switch (layout->aux_type) {
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	1221	case INTEL_LAYOUT_AUX_HIZ:
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1222	layout_calculate_hiz_size(layout, &params);
				1223	break;
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	1224	case INTEL_LAYOUT_AUX_MCS:
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1225	layout_calculate_mcs_size(layout, &params);
				1226	break;
				1227	default:
				1228	break;
				1229	}
				1230	}
				1231
				1232	/**
				1233	* Update the tiling mode and bo stride (for imported resources).
				1234	*/
				1235	bool
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame^]	1236	intel_layout_update_for_imported_bo(struct intel_layout *layout,
				1237	enum intel_tiling_mode tiling,
				1238	unsigned bo_stride)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1239	{
				1240	if (!(layout->valid_tilings & (1 << tiling)))
				1241	return false;
				1242
				1243	if ((tiling == INTEL_TILING_X && bo_stride % 512) \|\|
				1244	(tiling == INTEL_TILING_Y && bo_stride % 128))
				1245	return false;
				1246
				1247	layout->tiling = tiling;
				1248	layout->bo_stride = bo_stride;
				1249
				1250	return true;
				1251	}