Blame - icd/intel/layout.c - platform/external/vulkan-validation-layers

blob: 1a2ee84928ef987126a2c04fae097f1d6a7604ca [file] [log] [blame]

Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1	/*
				2	* Mesa 3-D graphics library
				3	*
				4	* Copyright (C) 2014 LunarG, Inc.
				5	*
				6	* Permission is hereby granted, free of charge, to any person obtaining a
				7	* copy of this software and associated documentation files (the "Software"),
				8	* to deal in the Software without restriction, including without limitation
				9	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
				10	* and/or sell copies of the Software, and to permit persons to whom the
				11	* Software is furnished to do so, subject to the following conditions:
				12	*
				13	* The above copyright notice and this permission notice shall be included
				14	* in all copies or substantial portions of the Software.
				15	*
				16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
				19	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
				21	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
				22	* DEALINGS IN THE SOFTWARE.
				23	*
				24	* Authors:
				25	* Chia-I Wu <olv@lunarg.com>
				26	*/
				27
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	28	#include "dev.h"
Chia-I Wu	1bf06df	2014-08-16 12:33:13 +0800	[diff] [blame]	29	#include "format.h"
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	30	#include "gpu.h"
				31	#include "layout.h"
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	32
				33	enum {
				34	LAYOUT_TILING_NONE = 1 << INTEL_TILING_NONE,
				35	LAYOUT_TILING_X = 1 << INTEL_TILING_X,
				36	LAYOUT_TILING_Y = 1 << INTEL_TILING_Y,
				37	LAYOUT_TILING_W = 1 << (INTEL_TILING_Y + 1),
				38
				39	LAYOUT_TILING_ALL = (LAYOUT_TILING_NONE \|
				40	LAYOUT_TILING_X \|
				41	LAYOUT_TILING_Y \|
				42	LAYOUT_TILING_W)
				43	};
				44
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	45	struct intel_layout_params {
				46	const struct intel_gpu *gpu;
				47	const XGL_IMAGE_CREATE_INFO *info;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	48
				49	bool compressed;
				50
				51	unsigned h0, h1;
				52	unsigned max_x, max_y;
				53	};
				54
				55	static void
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	56	layout_get_slice_size(const struct intel_layout *layout,
				57	const struct intel_layout_params *params,
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	58	unsigned level, unsigned width, unsigned height)
				59	{
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	60	const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	61	unsigned w, h;
				62
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	63	w = u_minify(info->extent.width, level);
				64	h = u_minify(info->extent.height, level);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	65
				66	/*
				67	* From the Sandy Bridge PRM, volume 1 part 1, page 114:
				68	*
				69	* "The dimensions of the mip maps are first determined by applying the
				70	* sizing algorithm presented in Non-Power-of-Two Mipmaps above. Then,
				71	* if necessary, they are padded out to compression block boundaries."
				72	*/
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	73	w = u_align(w, layout->block_width);
				74	h = u_align(h, layout->block_height);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	75
				76	/*
				77	* From the Sandy Bridge PRM, volume 1 part 1, page 111:
				78	*
				79	* "If the surface is multisampled (4x), these values must be adjusted
				80	* as follows before proceeding:
				81	*
				82	* W_L = ceiling(W_L / 2) * 4
				83	* H_L = ceiling(H_L / 2) * 4"
				84	*
				85	* From the Ivy Bridge PRM, volume 1 part 1, page 108:
				86	*
				87	* "If the surface is multisampled and it is a depth or stencil surface
				88	* or Multisampled Surface StorageFormat in SURFACE_STATE is
				89	* MSFMT_DEPTH_STENCIL, W_L and H_L must be adjusted as follows before
				90	* proceeding:
				91	*
				92	* #samples W_L = H_L =
				93	* 2 ceiling(W_L / 2) * 4 HL [no adjustment]
				94	* 4 ceiling(W_L / 2) * 4 ceiling(H_L / 2) * 4
				95	* 8 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 4
				96	* 16 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 8"
				97	*
				98	* For interleaved samples (4x), where pixels
				99	*
				100	* (x, y ) (x+1, y )
				101	* (x, y+1) (x+1, y+1)
				102	*
				103	* would be is occupied by
				104	*
				105	* (x, y , si0) (x+1, y , si0) (x, y , si1) (x+1, y , si1)
				106	* (x, y+1, si0) (x+1, y+1, si0) (x, y+1, si1) (x+1, y+1, si1)
				107	* (x, y , si2) (x+1, y , si2) (x, y , si3) (x+1, y , si3)
				108	* (x, y+1, si2) (x+1, y+1, si2) (x, y+1, si3) (x+1, y+1, si3)
				109	*
				110	* Thus the need to
				111	*
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	112	* w = u_align(w, 2) * 2;
				113	* y = u_align(y, 2) * 2;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	114	*/
				115	if (layout->interleaved_samples) {
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	116	switch (info->samples) {
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	117	case 0:
				118	case 1:
				119	break;
				120	case 2:
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	121	w = u_align(w, 2) * 2;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	122	break;
				123	case 4:
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	124	w = u_align(w, 2) * 2;
				125	h = u_align(h, 2) * 2;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	126	break;
				127	case 8:
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	128	w = u_align(w, 2) * 4;
				129	h = u_align(h, 2) * 2;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	130	break;
				131	case 16:
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	132	w = u_align(w, 2) * 4;
				133	h = u_align(h, 2) * 4;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	134	break;
				135	default:
				136	assert(!"unsupported sample count");
				137	break;
				138	}
				139	}
				140
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	141	w = u_align(w, layout->align_i);
				142	h = u_align(h, layout->align_j);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	143
				144	*width = w;
				145	*height = h;
				146	}
				147
				148	static unsigned
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	149	layout_get_num_layers(const struct intel_layout *layout,
				150	const struct intel_layout_params *params)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	151	{
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	152	const XGL_IMAGE_CREATE_INFO *info = params->info;
				153	unsigned num_layers = info->arraySize;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	154
				155	/* samples of the same index are stored in a layer */
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	156	if (info->samples > 1 && !layout->interleaved_samples)
				157	num_layers *= info->samples;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	158
				159	return num_layers;
				160	}
				161
				162	static void
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	163	layout_init_layer_height(struct intel_layout *layout,
				164	struct intel_layout_params *params)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	165	{
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	166	const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	167	unsigned num_layers;
				168
				169	num_layers = layout_get_num_layers(layout, params);
				170	if (num_layers <= 1)
				171	return;
				172
				173	if (!layout->full_layers) {
				174	layout->layer_height = params->h0;
				175	params->max_y += params->h0 * (num_layers - 1);
				176	return;
				177	}
				178
				179	/*
				180	* From the Sandy Bridge PRM, volume 1 part 1, page 115:
				181	*
				182	* "The following equation is used for surface formats other than
				183	* compressed textures:
				184	*
				185	* QPitch = (h0 + h1 + 11j)"
				186	*
				187	* "The equation for compressed textures (BC* and FXT1 surface formats)
				188	* follows:
				189	*
				190	* QPitch = (h0 + h1 + 11j) / 4"
				191	*
				192	* "[DevSNB] Errata: Sampler MSAA Qpitch will be 4 greater than the
				193	* value calculated in the equation above, for every other odd Surface
				194	* Height starting from 1 i.e. 1,5,9,13"
				195	*
				196	* From the Ivy Bridge PRM, volume 1 part 1, page 111-112:
				197	*
				198	* "If Surface Array Spacing is set to ARYSPC_FULL (note that the depth
				199	* buffer and stencil buffer have an implied value of ARYSPC_FULL):
				200	*
				201	* QPitch = (h0 + h1 + 12j)
				202	* QPitch = (h0 + h1 + 12j) / 4 (compressed)
				203	*
				204	* (There are many typos or missing words here...)"
				205	*
				206	* To access the N-th slice, an offset of (Stride * QPitch * N) is added to
				207	* the base address. The PRM divides QPitch by 4 for compressed formats
				208	* because the block height for those formats are 4, and it wants QPitch to
				209	* mean the number of memory rows, as opposed to texel rows, between
				210	* slices. Since we use texel rows everywhere, we do not need to divide
				211	* QPitch by 4.
				212	*/
				213	layout->layer_height = params->h0 + params->h1 +
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	214	((intel_gpu_gen(params->gpu) >= INTEL_GEN(7)) ? 12 : 11) * layout->align_j;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	215
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	216	if (intel_gpu_gen(params->gpu) == INTEL_GEN(6) && info->samples > 1 &&
				217	info->extent.height % 4 == 1)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	218	layout->layer_height += 4;
				219
				220	params->max_y += layout->layer_height * (num_layers - 1);
				221	}
				222
				223	static void
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	224	layout_init_levels(struct intel_layout *layout,
				225	struct intel_layout_params *params)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	226	{
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	227	const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	228	unsigned cur_x, cur_y;
				229	unsigned lv;
				230
				231	cur_x = 0;
				232	cur_y = 0;
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	233	for (lv = 0; lv < info->mipLevels; lv++) {
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	234	unsigned level_w, level_h;
				235
				236	layout_get_slice_size(layout, params, lv, &level_w, &level_h);
				237
				238	layout->levels[lv].x = cur_x;
				239	layout->levels[lv].y = cur_y;
				240	layout->levels[lv].slice_width = level_w;
				241	layout->levels[lv].slice_height = level_h;
				242
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	243	if (info->imageType == XGL_IMAGE_3D) {
				244	const unsigned num_slices = u_minify(info->extent.depth, lv);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	245	const unsigned num_slices_per_row = 1 << lv;
				246	const unsigned num_rows =
				247	(num_slices + num_slices_per_row - 1) / num_slices_per_row;
				248
				249	level_w *= num_slices_per_row;
				250	level_h *= num_rows;
				251
				252	cur_y += level_h;
				253	} else {
				254	/* MIPLAYOUT_BELOW */
				255	if (lv == 1)
				256	cur_x += level_w;
				257	else
				258	cur_y += level_h;
				259	}
				260
				261	if (params->max_x < layout->levels[lv].x + level_w)
				262	params->max_x = layout->levels[lv].x + level_w;
				263	if (params->max_y < layout->levels[lv].y + level_h)
				264	params->max_y = layout->levels[lv].y + level_h;
				265	}
				266
				267	params->h0 = layout->levels[0].slice_height;
				268	if (layout->full_layers) {
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	269	if (info->mipLevels > 1)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	270	params->h1 = layout->levels[1].slice_height;
				271	else
				272	layout_get_slice_size(layout, params, 1, &cur_x, &params->h1);
				273	}
				274	}
				275
				276	static void
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	277	layout_init_alignments(struct intel_layout *layout,
				278	struct intel_layout_params *params)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	279	{
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	280	const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	281
				282	/*
				283	* From the Sandy Bridge PRM, volume 1 part 1, page 113:
				284	*
				285	* "surface format align_i align_j
				286	* YUV 4:2:2 formats 4 *see below
				287	* BC1-5 4 4
				288	* FXT1 8 4
				289	* all other formats 4 *see below"
				290	*
				291	* "- align_j = 4 for any depth buffer
				292	* - align_j = 2 for separate stencil buffer
				293	* - align_j = 4 for any render target surface is multisampled (4x)
				294	* - align_j = 4 for any render target surface with Surface Vertical
				295	* Alignment = VALIGN_4
				296	* - align_j = 2 for any render target surface with Surface Vertical
				297	* Alignment = VALIGN_2
				298	* - align_j = 2 for all other render target surface
				299	* - align_j = 2 for any sampling engine surface with Surface Vertical
				300	* Alignment = VALIGN_2
				301	* - align_j = 4 for any sampling engine surface with Surface Vertical
				302	* Alignment = VALIGN_4"
				303	*
				304	* From the Sandy Bridge PRM, volume 4 part 1, page 86:
				305	*
				306	* "This field (Surface Vertical Alignment) must be set to VALIGN_2 if
				307	* the Surface Format is 96 bits per element (BPE)."
				308	*
				309	* They can be rephrased as
				310	*
				311	* align_i align_j
				312	* compressed formats block width block height
				313	* PIPE_FORMAT_S8_UINT 4 2
				314	* other depth/stencil formats 4 4
				315	* 4x multisampled 4 4
				316	* bpp 96 4 2
				317	* others 4 2 or 4
				318	*/
				319
				320	/*
				321	* From the Ivy Bridge PRM, volume 1 part 1, page 110:
				322	*
				323	* "surface defined by surface format align_i align_j
				324	* 3DSTATE_DEPTH_BUFFER D16_UNORM 8 4
				325	* not D16_UNORM 4 4
				326	* 3DSTATE_STENCIL_BUFFER N/A 8 8
				327	* SURFACE_STATE BC, ETC, EAC* 4 4
				328	* FXT1 8 4
				329	* all others (set by SURFACE_STATE)"
				330	*
				331	* From the Ivy Bridge PRM, volume 4 part 1, page 63:
				332	*
				333	* "- This field (Surface Vertical Aligment) is intended to be set to
				334	* VALIGN_4 if the surface was rendered as a depth buffer, for a
				335	* multisampled (4x) render target, or for a multisampled (8x)
				336	* render target, since these surfaces support only alignment of 4.
				337	* - Use of VALIGN_4 for other surfaces is supported, but uses more
				338	* memory.
				339	* - This field must be set to VALIGN_4 for all tiled Y Render Target
				340	* surfaces.
				341	* - Value of 1 is not supported for format YCRCB_NORMAL (0x182),
				342	* YCRCB_SWAPUVY (0x183), YCRCB_SWAPUV (0x18f), YCRCB_SWAPY (0x190)
				343	* - If Number of Multisamples is not MULTISAMPLECOUNT_1, this field
				344	* must be set to VALIGN_4."
				345	* - VALIGN_4 is not supported for surface format R32G32B32_FLOAT."
				346	*
				347	* "- This field (Surface Horizontal Aligment) is intended to be set to
				348	* HALIGN_8 only if the surface was rendered as a depth buffer with
				349	* Z16 format or a stencil buffer, since these surfaces support only
				350	* alignment of 8.
				351	* - Use of HALIGN_8 for other surfaces is supported, but uses more
				352	* memory.
				353	* - This field must be set to HALIGN_4 if the Surface Format is BC*.
				354	* - This field must be set to HALIGN_8 if the Surface Format is
				355	* FXT1."
				356	*
				357	* They can be rephrased as
				358	*
				359	* align_i align_j
				360	* compressed formats block width block height
				361	* PIPE_FORMAT_Z16_UNORM 8 4
				362	* PIPE_FORMAT_S8_UINT 8 8
				363	* other depth/stencil formats 4 or 8 4
				364	* 2x or 4x multisampled 4 or 8 4
				365	* tiled Y 4 or 8 4 (if rt)
				366	* PIPE_FORMAT_R32G32B32_FLOAT 4 or 8 2
				367	* others 4 or 8 2 or 4
				368	*/
				369
				370	if (params->compressed) {
				371	/* this happens to be the case */
				372	layout->align_i = layout->block_width;
				373	layout->align_j = layout->block_height;
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	374	} else if (info->usage & XGL_IMAGE_USAGE_DEPTH_STENCIL_BIT) {
				375	if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7)) {
				376	switch (layout->format.channelFormat) {
				377	case XGL_CH_FMT_R16:
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	378	layout->align_i = 8;
				379	layout->align_j = 4;
				380	break;
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	381	case XGL_CH_FMT_R8:
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	382	layout->align_i = 8;
				383	layout->align_j = 8;
				384	break;
				385	default:
				386	layout->align_i = 4;
				387	layout->align_j = 4;
				388	break;
				389	}
				390	} else {
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	391	switch (layout->format.channelFormat) {
				392	case XGL_CH_FMT_R8:
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	393	layout->align_i = 4;
				394	layout->align_j = 2;
				395	break;
				396	default:
				397	layout->align_i = 4;
				398	layout->align_j = 4;
				399	break;
				400	}
				401	}
				402	} else {
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	403	const bool valign_4 = (info->samples > 1) \|\|
				404	(intel_gpu_gen(params->gpu) >= INTEL_GEN(7) &&
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	405	layout->tiling == INTEL_TILING_Y &&
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	406	(info->usage & XGL_IMAGE_USAGE_COLOR_ATTACHMENT_BIT));
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	407
				408	if (valign_4)
				409	assert(layout->block_size != 12);
				410
				411	layout->align_i = 4;
				412	layout->align_j = (valign_4) ? 4 : 2;
				413	}
				414
				415	/*
				416	* the fact that align i and j are multiples of block width and height
				417	* respectively is what makes the size of the bo a multiple of the block
				418	* size, slices start at block boundaries, and many of the computations
				419	* work.
				420	*/
				421	assert(layout->align_i % layout->block_width == 0);
				422	assert(layout->align_j % layout->block_height == 0);
				423
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	424	/* make sure u_align() works */
				425	assert(u_is_pow2(layout->align_i) &&
				426	u_is_pow2(layout->align_j));
				427	assert(u_is_pow2(layout->block_width) &&
				428	u_is_pow2(layout->block_height));
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	429	}
				430
				431	static unsigned
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	432	layout_get_valid_tilings(const struct intel_layout *layout,
				433	const struct intel_layout_params *params)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	434	{
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	435	const XGL_IMAGE_CREATE_INFO *info = params->info;
				436	const XGL_FORMAT format = layout->format;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	437	unsigned valid_tilings = LAYOUT_TILING_ALL;
				438
				439	/*
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	440	* From the Sandy Bridge PRM, volume 2 part 1, page 318:
				441	*
				442	* "[DevSNB+]: This field (Tiled Surface) must be set to TRUE. Linear
				443	* Depth Buffer is not supported."
				444	*
				445	* "The Depth Buffer, if tiled, must use Y-Major tiling."
				446	*
				447	* From the Sandy Bridge PRM, volume 1 part 2, page 22:
				448	*
				449	* "W-Major Tile Format is used for separate stencil."
				450	*/
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	451	if (info->usage & XGL_IMAGE_USAGE_DEPTH_STENCIL_BIT) {
				452	switch (format.channelFormat) {
				453	case XGL_CH_FMT_R8:
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	454	valid_tilings &= LAYOUT_TILING_W;
				455	break;
				456	default:
				457	valid_tilings &= LAYOUT_TILING_Y;
				458	break;
				459	}
				460	}
				461
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	462	if (info->usage & XGL_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) {
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	463	/*
				464	* From the Sandy Bridge PRM, volume 1 part 2, page 32:
				465	*
				466	* "NOTE: 128BPE Format Color buffer ( render target ) MUST be
				467	* either TileX or Linear."
				468	*/
				469	if (layout->block_size == 16)
				470	valid_tilings &= ~LAYOUT_TILING_Y;
				471
				472	/*
				473	* From the Ivy Bridge PRM, volume 4 part 1, page 63:
				474	*
				475	* "This field (Surface Vertical Aligment) must be set to VALIGN_4
				476	* for all tiled Y Render Target surfaces."
				477	*
				478	* "VALIGN_4 is not supported for surface format R32G32B32_FLOAT."
				479	*/
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	480	if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7) && layout->block_size == 12)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	481	valid_tilings &= ~LAYOUT_TILING_Y;
				482	}
				483
				484	/* no conflicting binding flags */
				485	assert(valid_tilings);
				486
				487	return valid_tilings;
				488	}
				489
				490	static void
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	491	layout_init_tiling(struct intel_layout *layout,
				492	struct intel_layout_params *params)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	493	{
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	494	const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	495	unsigned valid_tilings = layout_get_valid_tilings(layout, params);
				496
				497	layout->valid_tilings = valid_tilings;
				498
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	499	if (info->usage & (XGL_IMAGE_USAGE_COLOR_ATTACHMENT_BIT \| XGL_IMAGE_USAGE_SHADER_ACCESS_READ_BIT)) {
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	500	/*
				501	* heuristically set a minimum width/height for enabling tiling
				502	*/
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	503	if (info->extent.width < 64 && (valid_tilings & ~LAYOUT_TILING_X))
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	504	valid_tilings &= ~LAYOUT_TILING_X;
				505
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	506	if ((info->extent.width < 32 \|\| info->extent.height < 16) &&
				507	(info->extent.width < 16 \|\| info->extent.height < 32) &&
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	508	(valid_tilings & ~LAYOUT_TILING_Y))
				509	valid_tilings &= ~LAYOUT_TILING_Y;
				510	} else {
				511	/* force linear if we are not sure where the texture is bound to */
				512	if (valid_tilings & LAYOUT_TILING_NONE)
				513	valid_tilings &= LAYOUT_TILING_NONE;
				514	}
				515
				516	/* prefer tiled over linear */
				517	if (valid_tilings & LAYOUT_TILING_Y)
				518	layout->tiling = INTEL_TILING_Y;
				519	else if (valid_tilings & LAYOUT_TILING_X)
				520	layout->tiling = INTEL_TILING_X;
				521	else /* linear or W-tiled, which has no hardware support */
				522	layout->tiling = INTEL_TILING_NONE;
				523	}
				524
				525	static void
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	526	layout_init_arrangements_gen7(struct intel_layout *layout,
				527	struct intel_layout_params *params)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	528	{
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	529	const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	530
				531	/*
				532	* It is not explicitly states, but render targets are expected to be
				533	* UMS/CMS (samples non-interleaved) and depth/stencil buffers are expected
				534	* to be IMS (samples interleaved).
				535	*
				536	* See "Multisampled Surface Storage Format" field of SURFACE_STATE.
				537	*/
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	538	if (info->usage & XGL_IMAGE_USAGE_DEPTH_STENCIL_BIT) {
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	539	layout->interleaved_samples = true;
				540
				541	/*
				542	* From the Ivy Bridge PRM, volume 1 part 1, page 111:
				543	*
				544	* "note that the depth buffer and stencil buffer have an implied
				545	* value of ARYSPC_FULL"
				546	*/
				547	layout->full_layers = true;
				548	} else {
				549	layout->interleaved_samples = false;
				550
				551	/*
				552	* From the Ivy Bridge PRM, volume 4 part 1, page 66:
				553	*
				554	* "If Multisampled Surface Storage Format is MSFMT_MSS and Number
				555	* of Multisamples is not MULTISAMPLECOUNT_1, this field (Surface
				556	* Array Spacing) must be set to ARYSPC_LOD0."
				557	*
				558	* As multisampled resources are not mipmapped, we never use
				559	* ARYSPC_FULL for them.
				560	*/
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	561	if (info->samples > 1)
				562	assert(info->mipLevels == 1);
				563	layout->full_layers = (info->mipLevels > 1);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	564	}
				565	}
				566
				567	static void
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	568	layout_init_arrangements_gen6(struct intel_layout *layout,
				569	struct intel_layout_params *params)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	570	{
				571	/* GEN6 supports only interleaved samples */
				572	layout->interleaved_samples = true;
				573
				574	/*
				575	* From the Sandy Bridge PRM, volume 1 part 1, page 115:
				576	*
				577	* "The separate stencil buffer does not support mip mapping, thus the
				578	* storage for LODs other than LOD 0 is not needed. The following
				579	* QPitch equation applies only to the separate stencil buffer:
				580	*
				581	* QPitch = h_0"
				582	*
				583	* GEN6 does not support compact spacing otherwise.
				584	*/
Chia-I Wu	fb24026	2014-08-16 13:26:06 +0800	[diff] [blame^]	585	layout->full_layers =
				586	!intel_format_is_stencil(params->gpu, layout->format);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	587	}
				588
				589	static void
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	590	layout_init_arrangements(struct intel_layout *layout,
				591	struct intel_layout_params *params)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	592	{
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	593	if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7))
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	594	layout_init_arrangements_gen7(layout, params);
				595	else
				596	layout_init_arrangements_gen6(layout, params);
				597
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	598	layout->is_2d = (params->info->imageType != XGL_IMAGE_3D);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	599	}
				600
				601	static void
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	602	layout_init_format(struct intel_layout *layout,
				603	struct intel_layout_params *params)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	604	{
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	605	const XGL_IMAGE_CREATE_INFO *info = params->info;
				606	XGL_FORMAT format = params->info->format;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	607	bool require_separate_stencil;
				608
				609	/*
				610	* From the Sandy Bridge PRM, volume 2 part 1, page 317:
				611	*
				612	* "This field (Separate Stencil Buffer Enable) must be set to the same
				613	* value (enabled or disabled) as Hierarchical Depth Buffer Enable."
				614	*
				615	* GEN7+ requires separate stencil buffers.
				616	*/
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	617	if (info->usage & XGL_IMAGE_USAGE_DEPTH_STENCIL_BIT) {
				618	if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7))
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	619	require_separate_stencil = true;
				620	else
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	621	require_separate_stencil =(layout->aux_type == INTEL_LAYOUT_AUX_HIZ);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	622	}
				623
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	624	if (format.numericFormat == XGL_NUM_FMT_DS) {
				625	switch (format.channelFormat) {
				626	case XGL_CH_FMT_R32G8:
				627	if (require_separate_stencil) {
				628	format.channelFormat = XGL_CH_FMT_R32;
				629	layout->separate_stencil = true;
				630	}
				631	break;
				632	default:
				633	break;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	634	}
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	635	}
				636
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	637	layout->format = format;
Chia-I Wu	1bf06df	2014-08-16 12:33:13 +0800	[diff] [blame]	638	layout->block_width = icd_format_get_block_width(format);
				639	layout->block_height = layout->block_width;
				640	layout->block_size = icd_format_get_size(format);
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	641
Chia-I Wu	1bf06df	2014-08-16 12:33:13 +0800	[diff] [blame]	642	params->compressed = icd_format_is_compressed(format);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	643	}
				644
				645	static bool
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	646	layout_want_mcs(struct intel_layout *layout,
				647	struct intel_layout_params *params)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	648	{
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	649	const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	650	bool want_mcs = false;
				651
				652	/* MCS is for RT on GEN7+ */
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	653	if (intel_gpu_gen(params->gpu) < INTEL_GEN(7))
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	654	return false;
				655
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	656	if (info->imageType != XGL_IMAGE_2D \|\|
				657	!(info->usage & XGL_IMAGE_USAGE_COLOR_ATTACHMENT_BIT))
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	658	return false;
				659
				660	/*
				661	* From the Ivy Bridge PRM, volume 4 part 1, page 77:
				662	*
				663	* "For Render Target and Sampling Engine Surfaces:If the surface is
				664	* multisampled (Number of Multisamples any value other than
				665	* MULTISAMPLECOUNT_1), this field (MCS Enable) must be enabled."
				666	*
				667	* "This field must be set to 0 for all SINT MSRTs when all RT channels
				668	* are not written"
				669	*/
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	670	if (info->samples > 1 && !layout->interleaved_samples &&
				671	!(info->format.numericFormat == XGL_NUM_FMT_UINT \|\|
				672	info->format.numericFormat == XGL_NUM_FMT_SINT)) {
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	673	want_mcs = true;
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	674	} else if (info->samples <= 1) {
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	675	/*
				676	* From the Ivy Bridge PRM, volume 2 part 1, page 326:
				677	*
				678	* "When MCS is buffer is used for color clear of non-multisampler
				679	* render target, the following restrictions apply.
				680	* - Support is limited to tiled render targets.
				681	* - Support is for non-mip-mapped and non-array surface types
				682	* only.
				683	* - Clear is supported only on the full RT; i.e., no partial clear
				684	* or overlapping clears.
				685	* - MCS buffer for non-MSRT is supported only for RT formats
				686	* 32bpp, 64bpp and 128bpp.
				687	* ..."
				688	*/
				689	if (layout->tiling != INTEL_TILING_NONE &&
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	690	info->mipLevels == 1 && info->arraySize == 1) {
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	691	switch (layout->block_size) {
				692	case 4:
				693	case 8:
				694	case 16:
				695	want_mcs = true;
				696	break;
				697	default:
				698	break;
				699	}
				700	}
				701	}
				702
				703	return want_mcs;
				704	}
				705
				706	static bool
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	707	layout_want_hiz(const struct intel_layout *layout,
				708	const struct intel_layout_params *params)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	709	{
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	710	const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	711	bool want_hiz = false;
				712
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	713	if (!(info->usage & XGL_IMAGE_USAGE_DEPTH_STENCIL_BIT))
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	714	return false;
				715
Chia-I Wu	fb24026	2014-08-16 13:26:06 +0800	[diff] [blame^]	716	if (!intel_format_is_depth(params->gpu, info->format))
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	717	return false;
				718
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	719	if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7)) {
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	720	want_hiz = true;
				721	} else {
				722	/*
				723	* From the Sandy Bridge PRM, volume 2 part 1, page 312:
				724	*
				725	* "The hierarchical depth buffer does not support the LOD field, it
				726	* is assumed by hardware to be zero. A separate hierarachical
				727	* depth buffer is required for each LOD used, and the
				728	* corresponding buffer's state delivered to hardware each time a
				729	* new depth buffer state with modified LOD is delivered."
				730	*
				731	* But we have a stronger requirement. Because of layer offsetting
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	732	* (check out the callers of intel_layout_get_slice_tile_offset()), we
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	733	* already have to require the texture to be non-mipmapped and
				734	* non-array.
				735	*/
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	736	if (info->mipLevels == 1 && info->arraySize == 1 &&
				737	info->extent.depth == 1)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	738	want_hiz = true;
				739	}
				740
				741	return want_hiz;
				742	}
				743
				744	static void
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	745	layout_init_aux(struct intel_layout *layout,
				746	struct intel_layout_params *params)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	747	{
				748	if (layout_want_hiz(layout, params))
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	749	layout->aux_type = INTEL_LAYOUT_AUX_HIZ;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	750	else if (layout_want_mcs(layout, params))
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	751	layout->aux_type = INTEL_LAYOUT_AUX_MCS;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	752	}
				753
				754	static void
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	755	layout_align(struct intel_layout layout, struct intel_layout_params params)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	756	{
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	757	const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	758	int align_w = 1, align_h = 1, pad_h = 0;
				759
				760	/*
				761	* From the Sandy Bridge PRM, volume 1 part 1, page 118:
				762	*
				763	* "To determine the necessary padding on the bottom and right side of
				764	* the surface, refer to the table in Section 7.18.3.4 for the i and j
				765	* parameters for the surface format in use. The surface must then be
				766	* extended to the next multiple of the alignment unit size in each
				767	* dimension, and all texels contained in this extended surface must
				768	* have valid GTT entries."
				769	*
				770	* "For cube surfaces, an additional two rows of padding are required
				771	* at the bottom of the surface. This must be ensured regardless of
				772	* whether the surface is stored tiled or linear. This is due to the
				773	* potential rotation of cache line orientation from memory to cache."
				774	*
				775	* "For compressed textures (BC* and FXT1 surface formats), padding at
				776	* the bottom of the surface is to an even compressed row, which is
				777	* equal to a multiple of 8 uncompressed texel rows. Thus, for padding
				778	* purposes, these surfaces behave as if j = 8 only for surface
				779	* padding purposes. The value of 4 for j still applies for mip level
				780	* alignment and QPitch calculation."
				781	*/
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	782	if (info->usage & XGL_IMAGE_USAGE_SHADER_ACCESS_READ_BIT) {
				783	if (align_w < layout->align_i)
				784	align_w = layout->align_i;
				785	if (align_h < layout->align_j)
				786	align_h = layout->align_j;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	787
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	788	/* in case it is used as a cube */
				789	if (info->imageType == XGL_IMAGE_2D)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	790	pad_h += 2;
				791
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	792	if (params->compressed && align_h < layout->align_j * 2)
				793	align_h = layout->align_j * 2;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	794	}
				795
				796	/*
				797	* From the Sandy Bridge PRM, volume 1 part 1, page 118:
				798	*
				799	* "If the surface contains an odd number of rows of data, a final row
				800	* below the surface must be allocated."
				801	*/
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	802	if ((info->usage & XGL_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) && align_h < 2)
				803	align_h = 2;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	804
				805	/*
				806	* Depth Buffer Clear/Resolve works in 8x4 sample blocks. In
				807	* ilo_texture_can_enable_hiz(), we always return true for the first slice.
				808	* To avoid out-of-bound access, we have to pad.
				809	*/
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	810	if (layout->aux_type == INTEL_LAYOUT_AUX_HIZ) {
				811	if (align_w < 8)
				812	align_w = 8;
				813	if (align_h < 4)
				814	align_h = 4;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	815	}
				816
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	817	params->max_x = u_align(params->max_x, align_w);
				818	params->max_y = u_align(params->max_y + pad_h, align_h);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	819	}
				820
				821	/* note that this may force the texture to be linear */
				822	static void
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	823	layout_calculate_bo_size(struct intel_layout *layout,
				824	struct intel_layout_params *params)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	825	{
				826	assert(params->max_x % layout->block_width == 0);
				827	assert(params->max_y % layout->block_height == 0);
				828	assert(layout->layer_height % layout->block_height == 0);
				829
				830	layout->bo_stride =
				831	(params->max_x / layout->block_width) * layout->block_size;
				832	layout->bo_height = params->max_y / layout->block_height;
				833
				834	while (true) {
				835	unsigned w = layout->bo_stride, h = layout->bo_height;
				836	unsigned align_w, align_h;
				837
				838	/*
				839	* From the Haswell PRM, volume 5, page 163:
				840	*
				841	* "For linear surfaces, additional padding of 64 bytes is required
				842	* at the bottom of the surface. This is in addition to the padding
				843	* required above."
				844	*/
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	845	if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7.5) &&
				846	(params->info->usage & XGL_IMAGE_USAGE_SHADER_ACCESS_READ_BIT) &&
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	847	layout->tiling == INTEL_TILING_NONE) {
				848	layout->bo_height +=
				849	(64 + layout->bo_stride - 1) / layout->bo_stride;
				850	}
				851
				852	/*
				853	* From the Sandy Bridge PRM, volume 4 part 1, page 81:
				854	*
				855	* "- For linear render target surfaces, the pitch must be a
				856	* multiple of the element size for non-YUV surface formats.
				857	* Pitch must be a multiple of 2 * element size for YUV surface
				858	* formats.
				859	* - For other linear surfaces, the pitch can be any multiple of
				860	* bytes.
				861	* - For tiled surfaces, the pitch must be a multiple of the tile
				862	* width."
				863	*
				864	* Different requirements may exist when the bo is used in different
				865	* places, but our alignments here should be good enough that we do not
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	866	* need to check layout->info->usage.
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	867	*/
				868	switch (layout->tiling) {
				869	case INTEL_TILING_X:
				870	align_w = 512;
				871	align_h = 8;
				872	break;
				873	case INTEL_TILING_Y:
				874	align_w = 128;
				875	align_h = 32;
				876	break;
				877	default:
Chia-I Wu	fb24026	2014-08-16 13:26:06 +0800	[diff] [blame^]	878	if (intel_format_is_stencil(params->gpu, layout->format)) {
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	879	/*
				880	* From the Sandy Bridge PRM, volume 1 part 2, page 22:
				881	*
				882	* "A 4KB tile is subdivided into 8-high by 8-wide array of
				883	* Blocks for W-Major Tiles (W Tiles). Each Block is 8 rows by 8
				884	* bytes."
				885	*
				886	* Since we asked for INTEL_TILING_NONE instead of the non-existent
				887	* INTEL_TILING_W, we want to align to W tiles here.
				888	*/
				889	align_w = 64;
				890	align_h = 64;
				891	} else {
				892	/* some good enough values */
				893	align_w = 64;
				894	align_h = 2;
				895	}
				896	break;
				897	}
				898
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	899	w = u_align(w, align_w);
				900	h = u_align(h, align_h);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	901
				902	/* make sure the bo is mappable */
				903	if (layout->tiling != INTEL_TILING_NONE) {
				904	/*
				905	* Usually only the first 256MB of the GTT is mappable.
				906	*
				907	* See also how intel_context::max_gtt_map_object_size is calculated.
				908	*/
				909	const size_t mappable_gtt_size = 256 * 1024 * 1024;
				910
				911	/*
				912	* Be conservative. We may be able to switch from VALIGN_4 to
				913	* VALIGN_2 if the layout was Y-tiled, but let's keep it simple.
				914	*/
				915	if (mappable_gtt_size / w / 4 < h) {
				916	if (layout->valid_tilings & LAYOUT_TILING_NONE) {
				917	layout->tiling = INTEL_TILING_NONE;
				918	/* MCS support for non-MSRTs is limited to tiled RTs */
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	919	if (layout->aux_type == INTEL_LAYOUT_AUX_MCS &&
				920	params->info->samples <= 1)
				921	layout->aux_type = INTEL_LAYOUT_AUX_NONE;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	922
				923	continue;
				924	} else {
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	925	/* mapping will fail */
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	926	}
				927	}
				928	}
				929
				930	layout->bo_stride = w;
				931	layout->bo_height = h;
				932	break;
				933	}
				934	}
				935
				936	static void
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	937	layout_calculate_hiz_size(struct intel_layout *layout,
				938	struct intel_layout_params *params)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	939	{
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	940	const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	941	const int hz_align_j = 8;
				942	int hz_width, hz_height;
				943
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	944	assert(layout->aux_type == INTEL_LAYOUT_AUX_HIZ);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	945
				946	/*
				947	* See the Sandy Bridge PRM, volume 2 part 1, page 312, and the Ivy Bridge
				948	* PRM, volume 2 part 1, page 312-313.
				949	*
				950	* It seems HiZ buffer is aligned to 8x8, with every two rows packed into a
				951	* memory row.
				952	*/
				953
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	954	hz_width = u_align(layout->levels[0].slice_width, 16);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	955
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	956	if (info->imageType == XGL_IMAGE_3D) {
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	957	unsigned lv;
				958
				959	hz_height = 0;
				960
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	961	for (lv = 0; lv < info->mipLevels; lv++) {
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	962	const unsigned h =
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	963	u_align(layout->levels[lv].slice_height, hz_align_j);
				964	hz_height += h * u_minify(info->extent.depth, lv);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	965	}
				966
				967	hz_height /= 2;
				968	} else {
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	969	const unsigned h0 = u_align(params->h0, hz_align_j);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	970	unsigned hz_qpitch = h0;
				971
				972	if (layout->full_layers) {
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	973	const unsigned h1 = u_align(params->h1, hz_align_j);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	974	const unsigned htail =
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	975	((intel_gpu_gen(params->gpu) >= INTEL_GEN(7)) ? 12 : 11) * hz_align_j;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	976
				977	hz_qpitch += h1 + htail;
				978	}
				979
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	980	hz_height = hz_qpitch * info->arraySize / 2;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	981
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	982	if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7))
				983	hz_height = u_align(hz_height, 8);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	984	}
				985
				986	/* align to Y-tile */
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	987	layout->aux_stride = u_align(hz_width, 128);
				988	layout->aux_height = u_align(hz_height, 32);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	989	}
				990
				991	static void
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	992	layout_calculate_mcs_size(struct intel_layout *layout,
				993	struct intel_layout_params *params)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	994	{
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	995	const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	996	int mcs_width, mcs_height, mcs_cpp;
				997	int downscale_x, downscale_y;
				998
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	999	assert(layout->aux_type == INTEL_LAYOUT_AUX_MCS);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1000
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	1001	if (info->samples > 1) {
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1002	/*
				1003	* From the Ivy Bridge PRM, volume 2 part 1, page 326, the clear
				1004	* rectangle is scaled down by 8x2 for 4X MSAA and 2x2 for 8X MSAA. The
				1005	* need of scale down could be that the clear rectangle is used to clear
				1006	* the MCS instead of the RT.
				1007	*
				1008	* For 8X MSAA, we need 32 bits in MCS for every pixel in the RT. The
				1009	* 2x2 factor could come from that the hardware writes 128 bits (an
				1010	* OWord) at a time, and the OWord in MCS maps to a 2x2 pixel block in
				1011	* the RT. For 4X MSAA, we need 8 bits in MCS for every pixel in the
				1012	* RT. Similarly, we could reason that an OWord in 4X MCS maps to a 8x2
				1013	* pixel block in the RT.
				1014	*/
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	1015	switch (info->samples) {
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1016	case 2:
				1017	case 4:
				1018	downscale_x = 8;
				1019	downscale_y = 2;
				1020	mcs_cpp = 1;
				1021	break;
				1022	case 8:
				1023	downscale_x = 2;
				1024	downscale_y = 2;
				1025	mcs_cpp = 4;
				1026	break;
				1027	case 16:
				1028	downscale_x = 2;
				1029	downscale_y = 1;
				1030	mcs_cpp = 8;
				1031	break;
				1032	default:
				1033	assert(!"unsupported sample count");
				1034	return;
				1035	break;
				1036	}
				1037
				1038	/*
				1039	* It also appears that the 2x2 subspans generated by the scaled-down
				1040	* clear rectangle cannot be masked. The scale-down clear rectangle
				1041	* thus must be aligned to 2x2, and we need to pad.
				1042	*/
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	1043	mcs_width = u_align(info->extent.width, downscale_x * 2);
				1044	mcs_height = u_align(info->extent.height, downscale_y * 2);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1045	}
				1046	else {
				1047	/*
				1048	* From the Ivy Bridge PRM, volume 2 part 1, page 327:
				1049	*
				1050	* " Pixels Lines
				1051	* TiledY RT CL
				1052	* bpp
				1053	* 32 8 4
				1054	* 64 4 4
				1055	* 128 2 4
				1056	*
				1057	* TiledX RT CL
				1058	* bpp
				1059	* 32 16 2
				1060	* 64 8 2
				1061	* 128 4 2"
				1062	*
				1063	* This table and the two following tables define the RT alignments, the
				1064	* clear rectangle alignments, and the clear rectangle scale factors.
				1065	* Viewing the RT alignments as the sizes of 128-byte blocks, we can see
				1066	* that the clear rectangle alignments are 16x32 blocks, and the clear
				1067	* rectangle scale factors are 8x16 blocks.
				1068	*
				1069	* For non-MSAA RT, we need 1 bit in MCS for every 128-byte block in the
				1070	* RT. Similar to the MSAA cases, we can argue that an OWord maps to
				1071	* 8x16 blocks.
				1072	*
				1073	* One problem with this reasoning is that a Y-tile in MCS has 8x32
				1074	* OWords and maps to 64x512 128-byte blocks. This differs from i965,
				1075	* which says that a Y-tile maps to 128x256 blocks (\see
				1076	* intel_get_non_msrt_mcs_alignment). It does not really change
				1077	* anything except for the size of the allocated MCS. Let's see if we
				1078	* hit out-of-bound access.
				1079	*/
				1080	switch (layout->tiling) {
				1081	case INTEL_TILING_X:
				1082	downscale_x = 64 / layout->block_size;
				1083	downscale_y = 2;
				1084	break;
				1085	case INTEL_TILING_Y:
				1086	downscale_x = 32 / layout->block_size;
				1087	downscale_y = 4;
				1088	break;
				1089	default:
				1090	assert(!"unsupported tiling mode");
				1091	return;
				1092	break;
				1093	}
				1094
				1095	downscale_x *= 8;
				1096	downscale_y *= 16;
				1097
				1098	/*
				1099	* From the Haswell PRM, volume 7, page 652:
				1100	*
				1101	* "Clear rectangle must be aligned to two times the number of
				1102	* pixels in the table shown below due to 16X16 hashing across the
				1103	* slice."
				1104	*
				1105	* The scaled-down clear rectangle must be aligned to 4x4 instead of
				1106	* 2x2, and we need to pad.
				1107	*/
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	1108	mcs_width = u_align(info->extent.width, downscale_x * 4) / downscale_x;
				1109	mcs_height = u_align(info->extent.height, downscale_y * 4) / downscale_y;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1110	mcs_cpp = 16; /* an OWord */
				1111	}
				1112
				1113	/* align to Y-tile */
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	1114	layout->aux_stride = u_align(mcs_width * mcs_cpp, 128);
				1115	layout->aux_height = u_align(mcs_height, 32);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1116	}
				1117
				1118	/**
				1119	* Initialize the layout. Callers should zero-initialize \p layout first.
				1120	*/
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	1121	void intel_layout_init(struct intel_layout *layout,
				1122	const struct intel_dev *dev,
				1123	const XGL_IMAGE_CREATE_INFO *info)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1124	{
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	1125	struct intel_layout_params params;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1126
				1127	memset(&params, 0, sizeof(params));
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	1128	params.gpu = dev->gpu;
				1129	params.info = info;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1130
				1131	/* note that there are dependencies between these functions */
				1132	layout_init_aux(layout, &params);
				1133	layout_init_format(layout, &params);
				1134	layout_init_arrangements(layout, &params);
				1135	layout_init_tiling(layout, &params);
				1136	layout_init_alignments(layout, &params);
				1137	layout_init_levels(layout, &params);
				1138	layout_init_layer_height(layout, &params);
				1139
				1140	layout_align(layout, &params);
				1141	layout_calculate_bo_size(layout, &params);
				1142
				1143	switch (layout->aux_type) {
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	1144	case INTEL_LAYOUT_AUX_HIZ:
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1145	layout_calculate_hiz_size(layout, &params);
				1146	break;
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	1147	case INTEL_LAYOUT_AUX_MCS:
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1148	layout_calculate_mcs_size(layout, &params);
				1149	break;
				1150	default:
				1151	break;
				1152	}
				1153	}
				1154
				1155	/**
				1156	* Update the tiling mode and bo stride (for imported resources).
				1157	*/
				1158	bool
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	1159	intel_layout_update_for_imported_bo(struct intel_layout *layout,
				1160	enum intel_tiling_mode tiling,
				1161	unsigned bo_stride)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1162	{
				1163	if (!(layout->valid_tilings & (1 << tiling)))
				1164	return false;
				1165
				1166	if ((tiling == INTEL_TILING_X && bo_stride % 512) \|\|
				1167	(tiling == INTEL_TILING_Y && bo_stride % 128))
				1168	return false;
				1169
				1170	layout->tiling = tiling;
				1171	layout->bo_stride = bo_stride;
				1172
				1173	return true;
				1174	}