Blame - icd/intel/layout.c - platform/external/vulkan-validation-layers

blob: e68b07952eac8517265a03fe174bb71e20085678 [file] [log] [blame]

Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1	/*
Chia-I Wu	44e4236	2014-09-02 08:32:09 +0800	[diff] [blame]	2	* XGL
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	3	*
				4	* Copyright (C) 2014 LunarG, Inc.
				5	*
				6	* Permission is hereby granted, free of charge, to any person obtaining a
				7	* copy of this software and associated documentation files (the "Software"),
				8	* to deal in the Software without restriction, including without limitation
				9	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
				10	* and/or sell copies of the Software, and to permit persons to whom the
				11	* Software is furnished to do so, subject to the following conditions:
				12	*
				13	* The above copyright notice and this permission notice shall be included
				14	* in all copies or substantial portions of the Software.
				15	*
				16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
				19	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
				21	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
				22	* DEALINGS IN THE SOFTWARE.
				23	*
				24	* Authors:
Chia-I Wu	44e4236	2014-09-02 08:32:09 +0800	[diff] [blame]	25	* Chia-I Wu <olv@lunarg.com>
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	26	*/
				27
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	28	#include "dev.h"
Chia-I Wu	1bf06df	2014-08-16 12:33:13 +0800	[diff] [blame]	29	#include "format.h"
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	30	#include "gpu.h"
				31	#include "layout.h"
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	32
				33	enum {
				34	LAYOUT_TILING_NONE = 1 << INTEL_TILING_NONE,
				35	LAYOUT_TILING_X = 1 << INTEL_TILING_X,
				36	LAYOUT_TILING_Y = 1 << INTEL_TILING_Y,
				37	LAYOUT_TILING_W = 1 << (INTEL_TILING_Y + 1),
				38
				39	LAYOUT_TILING_ALL = (LAYOUT_TILING_NONE \|
				40	LAYOUT_TILING_X \|
				41	LAYOUT_TILING_Y \|
				42	LAYOUT_TILING_W)
				43	};
				44
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	45	struct intel_layout_params {
Chia-I Wu	c94f3e5	2014-10-07 14:45:05 +0800	[diff] [blame]	46	struct intel_dev *dev;
				47
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	48	const struct intel_gpu *gpu;
				49	const XGL_IMAGE_CREATE_INFO *info;
Chia-I Wu	794d12a	2014-09-15 14:55:25 +0800	[diff] [blame]	50	bool scanout;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	51
				52	bool compressed;
				53
				54	unsigned h0, h1;
				55	unsigned max_x, max_y;
				56	};
				57
				58	static void
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	59	layout_get_slice_size(const struct intel_layout *layout,
				60	const struct intel_layout_params *params,
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	61	unsigned level, unsigned width, unsigned height)
				62	{
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	63	const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	64	unsigned w, h;
				65
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	66	w = u_minify(layout->width0, level);
				67	h = u_minify(layout->height0, level);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	68
				69	/*
				70	* From the Sandy Bridge PRM, volume 1 part 1, page 114:
				71	*
				72	* "The dimensions of the mip maps are first determined by applying the
				73	* sizing algorithm presented in Non-Power-of-Two Mipmaps above. Then,
				74	* if necessary, they are padded out to compression block boundaries."
				75	*/
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	76	w = u_align(w, layout->block_width);
				77	h = u_align(h, layout->block_height);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	78
				79	/*
				80	* From the Sandy Bridge PRM, volume 1 part 1, page 111:
				81	*
				82	* "If the surface is multisampled (4x), these values must be adjusted
				83	* as follows before proceeding:
				84	*
				85	* W_L = ceiling(W_L / 2) * 4
				86	* H_L = ceiling(H_L / 2) * 4"
				87	*
				88	* From the Ivy Bridge PRM, volume 1 part 1, page 108:
				89	*
				90	* "If the surface is multisampled and it is a depth or stencil surface
				91	* or Multisampled Surface StorageFormat in SURFACE_STATE is
				92	* MSFMT_DEPTH_STENCIL, W_L and H_L must be adjusted as follows before
				93	* proceeding:
				94	*
				95	* #samples W_L = H_L =
				96	* 2 ceiling(W_L / 2) * 4 HL [no adjustment]
				97	* 4 ceiling(W_L / 2) * 4 ceiling(H_L / 2) * 4
				98	* 8 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 4
				99	* 16 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 8"
				100	*
				101	* For interleaved samples (4x), where pixels
				102	*
				103	* (x, y ) (x+1, y )
				104	* (x, y+1) (x+1, y+1)
				105	*
				106	* would be is occupied by
				107	*
				108	* (x, y , si0) (x+1, y , si0) (x, y , si1) (x+1, y , si1)
				109	* (x, y+1, si0) (x+1, y+1, si0) (x, y+1, si1) (x+1, y+1, si1)
				110	* (x, y , si2) (x+1, y , si2) (x, y , si3) (x+1, y , si3)
				111	* (x, y+1, si2) (x+1, y+1, si2) (x, y+1, si3) (x+1, y+1, si3)
				112	*
				113	* Thus the need to
				114	*
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	115	* w = align(w, 2) * 2;
				116	* y = align(y, 2) * 2;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	117	*/
				118	if (layout->interleaved_samples) {
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	119	switch (info->samples) {
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	120	case 0:
				121	case 1:
				122	break;
				123	case 2:
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	124	w = u_align(w, 2) * 2;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	125	break;
				126	case 4:
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	127	w = u_align(w, 2) * 2;
				128	h = u_align(h, 2) * 2;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	129	break;
				130	case 8:
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	131	w = u_align(w, 2) * 4;
				132	h = u_align(h, 2) * 2;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	133	break;
				134	case 16:
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	135	w = u_align(w, 2) * 4;
				136	h = u_align(h, 2) * 4;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	137	break;
				138	default:
				139	assert(!"unsupported sample count");
				140	break;
				141	}
				142	}
				143
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	144	/*
				145	* From the Ivy Bridge PRM, volume 1 part 1, page 108:
				146	*
				147	* "For separate stencil buffer, the width must be mutiplied by 2 and
				148	* height divided by 2..."
				149	*
				150	* To make things easier (for transfer), we will just double the stencil
				151	* stride in 3DSTATE_STENCIL_BUFFER.
				152	*/
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	153	w = u_align(w, layout->align_i);
				154	h = u_align(h, layout->align_j);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	155
				156	*width = w;
				157	*height = h;
				158	}
				159
				160	static unsigned
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	161	layout_get_num_layers(const struct intel_layout *layout,
				162	const struct intel_layout_params *params)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	163	{
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	164	const XGL_IMAGE_CREATE_INFO *info = params->info;
				165	unsigned num_layers = info->arraySize;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	166
				167	/* samples of the same index are stored in a layer */
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	168	if (info->samples > 1 && !layout->interleaved_samples)
				169	num_layers *= info->samples;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	170
				171	return num_layers;
				172	}
				173
				174	static void
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	175	layout_init_layer_height(struct intel_layout *layout,
				176	struct intel_layout_params *params)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	177	{
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	178	const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	179	unsigned num_layers;
				180
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	181	if (layout->walk != INTEL_LAYOUT_WALK_LAYER)
				182	return;
				183
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	184	num_layers = layout_get_num_layers(layout, params);
				185	if (num_layers <= 1)
				186	return;
				187
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	188	/*
				189	* From the Sandy Bridge PRM, volume 1 part 1, page 115:
				190	*
				191	* "The following equation is used for surface formats other than
				192	* compressed textures:
				193	*
				194	* QPitch = (h0 + h1 + 11j)"
				195	*
				196	* "The equation for compressed textures (BC* and FXT1 surface formats)
				197	* follows:
				198	*
				199	* QPitch = (h0 + h1 + 11j) / 4"
				200	*
				201	* "[DevSNB] Errata: Sampler MSAA Qpitch will be 4 greater than the
				202	* value calculated in the equation above, for every other odd Surface
				203	* Height starting from 1 i.e. 1,5,9,13"
				204	*
				205	* From the Ivy Bridge PRM, volume 1 part 1, page 111-112:
				206	*
				207	* "If Surface Array Spacing is set to ARYSPC_FULL (note that the depth
				208	* buffer and stencil buffer have an implied value of ARYSPC_FULL):
				209	*
				210	* QPitch = (h0 + h1 + 12j)
				211	* QPitch = (h0 + h1 + 12j) / 4 (compressed)
				212	*
				213	* (There are many typos or missing words here...)"
				214	*
				215	* To access the N-th slice, an offset of (Stride * QPitch * N) is added to
				216	* the base address. The PRM divides QPitch by 4 for compressed formats
				217	* because the block height for those formats are 4, and it wants QPitch to
				218	* mean the number of memory rows, as opposed to texel rows, between
				219	* slices. Since we use texel rows everywhere, we do not need to divide
				220	* QPitch by 4.
				221	*/
				222	layout->layer_height = params->h0 + params->h1 +
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	223	((intel_gpu_gen(params->gpu) >= INTEL_GEN(7)) ? 12 : 11) * layout->align_j;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	224
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	225	if (intel_gpu_gen(params->gpu) == INTEL_GEN(6) && info->samples > 1 &&
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	226	layout->height0 % 4 == 1)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	227	layout->layer_height += 4;
				228
				229	params->max_y += layout->layer_height * (num_layers - 1);
				230	}
				231
				232	static void
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	233	layout_init_lods(struct intel_layout *layout,
				234	struct intel_layout_params *params)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	235	{
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	236	const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	237	unsigned cur_x, cur_y;
				238	unsigned lv;
				239
				240	cur_x = 0;
				241	cur_y = 0;
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	242	for (lv = 0; lv < info->mipLevels; lv++) {
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	243	unsigned lod_w, lod_h;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	244
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	245	layout_get_slice_size(layout, params, lv, &lod_w, &lod_h);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	246
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	247	layout->lods[lv].x = cur_x;
				248	layout->lods[lv].y = cur_y;
				249	layout->lods[lv].slice_width = lod_w;
				250	layout->lods[lv].slice_height = lod_h;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	251
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	252	switch (layout->walk) {
				253	case INTEL_LAYOUT_WALK_LOD:
				254	lod_h *= layout_get_num_layers(layout, params);
				255	if (lv == 1)
				256	cur_x += lod_w;
				257	else
				258	cur_y += lod_h;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	259
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	260	/* every LOD begins at tile boundaries */
				261	if (info->mipLevels > 1) {
Chia-I Wu	4806f2c	2015-02-19 13:54:35 -0700	[diff] [blame]	262	assert(layout->format == XGL_FMT_S8_UINT);
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	263	cur_x = u_align(cur_x, 64);
				264	cur_y = u_align(cur_y, 64);
				265	}
				266	break;
				267	case INTEL_LAYOUT_WALK_LAYER:
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	268	/* MIPLAYOUT_BELOW */
				269	if (lv == 1)
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	270	cur_x += lod_w;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	271	else
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	272	cur_y += lod_h;
				273	break;
				274	case INTEL_LAYOUT_WALK_3D:
				275	{
				276	const unsigned num_slices = u_minify(info->extent.depth, lv);
				277	const unsigned num_slices_per_row = 1 << lv;
				278	const unsigned num_rows =
				279	(num_slices + num_slices_per_row - 1) / num_slices_per_row;
				280
				281	lod_w *= num_slices_per_row;
				282	lod_h *= num_rows;
				283
				284	cur_y += lod_h;
				285	}
				286	break;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	287	}
				288
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	289	if (params->max_x < layout->lods[lv].x + lod_w)
				290	params->max_x = layout->lods[lv].x + lod_w;
				291	if (params->max_y < layout->lods[lv].y + lod_h)
				292	params->max_y = layout->lods[lv].y + lod_h;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	293	}
				294
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	295	if (layout->walk == INTEL_LAYOUT_WALK_LAYER) {
				296	params->h0 = layout->lods[0].slice_height;
				297
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	298	if (info->mipLevels > 1)
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	299	params->h1 = layout->lods[1].slice_height;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	300	else
				301	layout_get_slice_size(layout, params, 1, &cur_x, &params->h1);
				302	}
				303	}
				304
				305	static void
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	306	layout_init_alignments(struct intel_layout *layout,
				307	struct intel_layout_params *params)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	308	{
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	309	const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	310
				311	/*
				312	* From the Sandy Bridge PRM, volume 1 part 1, page 113:
				313	*
				314	* "surface format align_i align_j
				315	* YUV 4:2:2 formats 4 *see below
				316	* BC1-5 4 4
				317	* FXT1 8 4
				318	* all other formats 4 *see below"
				319	*
				320	* "- align_j = 4 for any depth buffer
				321	* - align_j = 2 for separate stencil buffer
				322	* - align_j = 4 for any render target surface is multisampled (4x)
				323	* - align_j = 4 for any render target surface with Surface Vertical
				324	* Alignment = VALIGN_4
				325	* - align_j = 2 for any render target surface with Surface Vertical
				326	* Alignment = VALIGN_2
				327	* - align_j = 2 for all other render target surface
				328	* - align_j = 2 for any sampling engine surface with Surface Vertical
				329	* Alignment = VALIGN_2
				330	* - align_j = 4 for any sampling engine surface with Surface Vertical
				331	* Alignment = VALIGN_4"
				332	*
				333	* From the Sandy Bridge PRM, volume 4 part 1, page 86:
				334	*
				335	* "This field (Surface Vertical Alignment) must be set to VALIGN_2 if
				336	* the Surface Format is 96 bits per element (BPE)."
				337	*
				338	* They can be rephrased as
				339	*
				340	* align_i align_j
				341	* compressed formats block width block height
				342	* PIPE_FORMAT_S8_UINT 4 2
				343	* other depth/stencil formats 4 4
				344	* 4x multisampled 4 4
				345	* bpp 96 4 2
				346	* others 4 2 or 4
				347	*/
				348
				349	/*
				350	* From the Ivy Bridge PRM, volume 1 part 1, page 110:
				351	*
				352	* "surface defined by surface format align_i align_j
				353	* 3DSTATE_DEPTH_BUFFER D16_UNORM 8 4
				354	* not D16_UNORM 4 4
				355	* 3DSTATE_STENCIL_BUFFER N/A 8 8
				356	* SURFACE_STATE BC, ETC, EAC* 4 4
				357	* FXT1 8 4
				358	* all others (set by SURFACE_STATE)"
				359	*
				360	* From the Ivy Bridge PRM, volume 4 part 1, page 63:
				361	*
				362	* "- This field (Surface Vertical Aligment) is intended to be set to
				363	* VALIGN_4 if the surface was rendered as a depth buffer, for a
				364	* multisampled (4x) render target, or for a multisampled (8x)
				365	* render target, since these surfaces support only alignment of 4.
				366	* - Use of VALIGN_4 for other surfaces is supported, but uses more
				367	* memory.
				368	* - This field must be set to VALIGN_4 for all tiled Y Render Target
				369	* surfaces.
				370	* - Value of 1 is not supported for format YCRCB_NORMAL (0x182),
				371	* YCRCB_SWAPUVY (0x183), YCRCB_SWAPUV (0x18f), YCRCB_SWAPY (0x190)
				372	* - If Number of Multisamples is not MULTISAMPLECOUNT_1, this field
				373	* must be set to VALIGN_4."
				374	* - VALIGN_4 is not supported for surface format R32G32B32_FLOAT."
				375	*
				376	* "- This field (Surface Horizontal Aligment) is intended to be set to
				377	* HALIGN_8 only if the surface was rendered as a depth buffer with
				378	* Z16 format or a stencil buffer, since these surfaces support only
				379	* alignment of 8.
				380	* - Use of HALIGN_8 for other surfaces is supported, but uses more
				381	* memory.
				382	* - This field must be set to HALIGN_4 if the Surface Format is BC*.
				383	* - This field must be set to HALIGN_8 if the Surface Format is
				384	* FXT1."
				385	*
				386	* They can be rephrased as
				387	*
				388	* align_i align_j
				389	* compressed formats block width block height
				390	* PIPE_FORMAT_Z16_UNORM 8 4
				391	* PIPE_FORMAT_S8_UINT 8 8
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	392	* other depth/stencil formats 4 4
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	393	* 2x or 4x multisampled 4 or 8 4
				394	* tiled Y 4 or 8 4 (if rt)
				395	* PIPE_FORMAT_R32G32B32_FLOAT 4 or 8 2
				396	* others 4 or 8 2 or 4
				397	*/
				398
				399	if (params->compressed) {
				400	/* this happens to be the case */
				401	layout->align_i = layout->block_width;
				402	layout->align_j = layout->block_height;
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	403	} else if (info->usage & XGL_IMAGE_USAGE_DEPTH_STENCIL_BIT) {
				404	if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7)) {
Jeremy Hayes	2b7e88a	2015-01-23 08:51:43 -0700	[diff] [blame]	405	switch (layout->format) {
				406	case XGL_FMT_D16_UNORM:
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	407	layout->align_i = 8;
				408	layout->align_j = 4;
				409	break;
Jeremy Hayes	2b7e88a	2015-01-23 08:51:43 -0700	[diff] [blame]	410	case XGL_FMT_S8_UINT:
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	411	layout->align_i = 8;
				412	layout->align_j = 8;
				413	break;
				414	default:
				415	layout->align_i = 4;
				416	layout->align_j = 4;
				417	break;
				418	}
				419	} else {
Jeremy Hayes	2b7e88a	2015-01-23 08:51:43 -0700	[diff] [blame]	420	switch (layout->format) {
				421	case XGL_FMT_S8_UINT:
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	422	layout->align_i = 4;
				423	layout->align_j = 2;
				424	break;
				425	default:
				426	layout->align_i = 4;
				427	layout->align_j = 4;
				428	break;
				429	}
				430	}
				431	} else {
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	432	const bool valign_4 = (info->samples > 1) \|\|
				433	(intel_gpu_gen(params->gpu) >= INTEL_GEN(7) &&
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	434	layout->tiling == INTEL_TILING_Y &&
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	435	(info->usage & XGL_IMAGE_USAGE_COLOR_ATTACHMENT_BIT));
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	436
				437	if (valign_4)
				438	assert(layout->block_size != 12);
				439
				440	layout->align_i = 4;
				441	layout->align_j = (valign_4) ? 4 : 2;
				442	}
				443
				444	/*
				445	* the fact that align i and j are multiples of block width and height
				446	* respectively is what makes the size of the bo a multiple of the block
				447	* size, slices start at block boundaries, and many of the computations
				448	* work.
				449	*/
				450	assert(layout->align_i % layout->block_width == 0);
				451	assert(layout->align_j % layout->block_height == 0);
				452
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	453	/* make sure u_align() works */
				454	assert(u_is_pow2(layout->align_i) &&
				455	u_is_pow2(layout->align_j));
				456	assert(u_is_pow2(layout->block_width) &&
				457	u_is_pow2(layout->block_height));
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	458	}
				459
				460	static unsigned
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	461	layout_get_valid_tilings(const struct intel_layout *layout,
				462	const struct intel_layout_params *params)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	463	{
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	464	const XGL_IMAGE_CREATE_INFO *info = params->info;
				465	const XGL_FORMAT format = layout->format;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	466	unsigned valid_tilings = LAYOUT_TILING_ALL;
				467
Chia-I Wu	794d12a	2014-09-15 14:55:25 +0800	[diff] [blame]	468	/*
				469	* From the Sandy Bridge PRM, volume 1 part 2, page 32:
				470	*
				471	* "Display/Overlay Y-Major not supported.
				472	* X-Major required for Async Flips"
				473	*/
				474	if (params->scanout)
				475	valid_tilings &= LAYOUT_TILING_X;
				476
Chia-I Wu	6ac9399	2014-08-30 18:23:28 +0800	[diff] [blame]	477	if (info->tiling == XGL_LINEAR_TILING)
				478	valid_tilings &= LAYOUT_TILING_NONE;
				479
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	480	/*
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	481	* From the Sandy Bridge PRM, volume 2 part 1, page 318:
				482	*
				483	* "[DevSNB+]: This field (Tiled Surface) must be set to TRUE. Linear
				484	* Depth Buffer is not supported."
				485	*
				486	* "The Depth Buffer, if tiled, must use Y-Major tiling."
				487	*
				488	* From the Sandy Bridge PRM, volume 1 part 2, page 22:
				489	*
				490	* "W-Major Tile Format is used for separate stencil."
				491	*/
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	492	if (info->usage & XGL_IMAGE_USAGE_DEPTH_STENCIL_BIT) {
Jeremy Hayes	2b7e88a	2015-01-23 08:51:43 -0700	[diff] [blame]	493	switch (format) {
				494	case XGL_FMT_S8_UINT:
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	495	valid_tilings &= LAYOUT_TILING_W;
				496	break;
				497	default:
				498	valid_tilings &= LAYOUT_TILING_Y;
				499	break;
				500	}
				501	}
				502
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	503	if (info->usage & XGL_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) {
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	504	/*
				505	* From the Sandy Bridge PRM, volume 1 part 2, page 32:
				506	*
				507	* "NOTE: 128BPE Format Color buffer ( render target ) MUST be
				508	* either TileX or Linear."
				509	*/
				510	if (layout->block_size == 16)
				511	valid_tilings &= ~LAYOUT_TILING_Y;
				512
				513	/*
				514	* From the Ivy Bridge PRM, volume 4 part 1, page 63:
				515	*
				516	* "This field (Surface Vertical Aligment) must be set to VALIGN_4
				517	* for all tiled Y Render Target surfaces."
				518	*
				519	* "VALIGN_4 is not supported for surface format R32G32B32_FLOAT."
				520	*/
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	521	if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7) && layout->block_size == 12)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	522	valid_tilings &= ~LAYOUT_TILING_Y;
				523	}
				524
				525	/* no conflicting binding flags */
				526	assert(valid_tilings);
				527
				528	return valid_tilings;
				529	}
				530
				531	static void
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	532	layout_init_tiling(struct intel_layout *layout,
				533	struct intel_layout_params *params)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	534	{
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	535	const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	536	unsigned valid_tilings = layout_get_valid_tilings(layout, params);
				537
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	538	/* no hardware support for W-tile */
				539	if (valid_tilings & LAYOUT_TILING_W)
				540	valid_tilings = (valid_tilings & ~LAYOUT_TILING_W) \| LAYOUT_TILING_NONE;
				541
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	542	layout->valid_tilings = valid_tilings;
				543
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	544	if (info->usage & (XGL_IMAGE_USAGE_COLOR_ATTACHMENT_BIT \|
				545	XGL_IMAGE_USAGE_SHADER_ACCESS_READ_BIT)) {
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	546	/*
				547	* heuristically set a minimum width/height for enabling tiling
				548	*/
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	549	if (layout->width0 < 64 && (valid_tilings & ~LAYOUT_TILING_X))
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	550	valid_tilings &= ~LAYOUT_TILING_X;
				551
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	552	if ((layout->width0 < 32 \|\| layout->height0 < 16) &&
				553	(layout->width0 < 16 \|\| layout->height0 < 32) &&
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	554	(valid_tilings & ~LAYOUT_TILING_Y))
				555	valid_tilings &= ~LAYOUT_TILING_Y;
				556	} else {
				557	/* force linear if we are not sure where the texture is bound to */
				558	if (valid_tilings & LAYOUT_TILING_NONE)
				559	valid_tilings &= LAYOUT_TILING_NONE;
				560	}
				561
				562	/* prefer tiled over linear */
				563	if (valid_tilings & LAYOUT_TILING_Y)
				564	layout->tiling = INTEL_TILING_Y;
				565	else if (valid_tilings & LAYOUT_TILING_X)
				566	layout->tiling = INTEL_TILING_X;
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	567	else
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	568	layout->tiling = INTEL_TILING_NONE;
				569	}
				570
				571	static void
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	572	layout_init_walk_gen7(struct intel_layout *layout,
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	573	struct intel_layout_params *params)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	574	{
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	575	const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	576
				577	/*
				578	* It is not explicitly states, but render targets are expected to be
				579	* UMS/CMS (samples non-interleaved) and depth/stencil buffers are expected
				580	* to be IMS (samples interleaved).
				581	*
				582	* See "Multisampled Surface Storage Format" field of SURFACE_STATE.
				583	*/
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	584	if (info->usage & XGL_IMAGE_USAGE_DEPTH_STENCIL_BIT) {
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	585	/*
				586	* From the Ivy Bridge PRM, volume 1 part 1, page 111:
				587	*
				588	* "note that the depth buffer and stencil buffer have an implied
				589	* value of ARYSPC_FULL"
				590	*/
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	591	layout->walk = (info->imageType == XGL_IMAGE_3D) ?
				592	INTEL_LAYOUT_WALK_3D : INTEL_LAYOUT_WALK_LAYER;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	593
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	594	layout->interleaved_samples = true;
				595	} else {
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	596	/*
				597	* From the Ivy Bridge PRM, volume 4 part 1, page 66:
				598	*
				599	* "If Multisampled Surface Storage Format is MSFMT_MSS and Number
				600	* of Multisamples is not MULTISAMPLECOUNT_1, this field (Surface
				601	* Array Spacing) must be set to ARYSPC_LOD0."
				602	*
				603	* As multisampled resources are not mipmapped, we never use
				604	* ARYSPC_FULL for them.
				605	*/
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	606	if (info->samples > 1)
				607	assert(info->mipLevels == 1);
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	608
				609	layout->walk =
				610	(info->imageType == XGL_IMAGE_3D) ? INTEL_LAYOUT_WALK_3D :
				611	(info->mipLevels > 1) ? INTEL_LAYOUT_WALK_LAYER :
				612	INTEL_LAYOUT_WALK_LOD;
				613
				614	layout->interleaved_samples = false;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	615	}
				616	}
				617
				618	static void
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	619	layout_init_walk_gen6(struct intel_layout *layout,
				620	struct intel_layout_params *params)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	621	{
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	622	/*
				623	* From the Sandy Bridge PRM, volume 1 part 1, page 115:
				624	*
				625	* "The separate stencil buffer does not support mip mapping, thus the
				626	* storage for LODs other than LOD 0 is not needed. The following
				627	* QPitch equation applies only to the separate stencil buffer:
				628	*
				629	* QPitch = h_0"
				630	*
				631	* GEN6 does not support compact spacing otherwise.
				632	*/
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	633	layout->walk =
				634	(params->info->imageType == XGL_IMAGE_3D) ? INTEL_LAYOUT_WALK_3D :
Chia-I Wu	4806f2c	2015-02-19 13:54:35 -0700	[diff] [blame]	635	(layout->format == XGL_FMT_S8_UINT) ? INTEL_LAYOUT_WALK_LOD :
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	636	INTEL_LAYOUT_WALK_LAYER;
				637
				638	/* GEN6 supports only interleaved samples */
				639	layout->interleaved_samples = true;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	640	}
				641
				642	static void
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	643	layout_init_walk(struct intel_layout *layout,
				644	struct intel_layout_params *params)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	645	{
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	646	if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7))
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	647	layout_init_walk_gen7(layout, params);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	648	else
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	649	layout_init_walk_gen6(layout, params);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	650	}
				651
				652	static void
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	653	layout_init_size_and_format(struct intel_layout *layout,
				654	struct intel_layout_params *params)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	655	{
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	656	const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	657	XGL_FORMAT format = info->format;
Chia-I Wu	9a056dd	2015-02-11 13:19:39 -0700	[diff] [blame]	658	bool require_separate_stencil = false;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	659
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	660	layout->width0 = info->extent.width;
				661	layout->height0 = info->extent.height;
				662
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	663	/*
				664	* From the Sandy Bridge PRM, volume 2 part 1, page 317:
				665	*
				666	* "This field (Separate Stencil Buffer Enable) must be set to the same
				667	* value (enabled or disabled) as Hierarchical Depth Buffer Enable."
				668	*
				669	* GEN7+ requires separate stencil buffers.
				670	*/
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	671	if (info->usage & XGL_IMAGE_USAGE_DEPTH_STENCIL_BIT) {
				672	if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7))
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	673	require_separate_stencil = true;
				674	else
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	675	require_separate_stencil = (layout->aux == INTEL_LAYOUT_AUX_HIZ);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	676	}
				677
Jeremy Hayes	2b7e88a	2015-01-23 08:51:43 -0700	[diff] [blame]	678	if (icd_format_is_ds(format)) {
				679	switch (format) {
				680	case XGL_FMT_D32_SFLOAT_S8_UINT:
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	681	if (require_separate_stencil) {
Jeremy Hayes	2b7e88a	2015-01-23 08:51:43 -0700	[diff] [blame]	682	format = XGL_FMT_D32_SFLOAT;
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	683	layout->separate_stencil = true;
				684	}
				685	break;
				686	default:
				687	break;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	688	}
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	689	}
				690
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	691	layout->format = format;
Chia-I Wu	1bf06df	2014-08-16 12:33:13 +0800	[diff] [blame]	692	layout->block_width = icd_format_get_block_width(format);
				693	layout->block_height = layout->block_width;
				694	layout->block_size = icd_format_get_size(format);
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	695
Chia-I Wu	1bf06df	2014-08-16 12:33:13 +0800	[diff] [blame]	696	params->compressed = icd_format_is_compressed(format);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	697	}
				698
				699	static bool
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	700	layout_want_mcs(struct intel_layout *layout,
				701	struct intel_layout_params *params)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	702	{
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	703	const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	704	bool want_mcs = false;
				705
				706	/* MCS is for RT on GEN7+ */
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	707	if (intel_gpu_gen(params->gpu) < INTEL_GEN(7))
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	708	return false;
				709
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	710	if (info->imageType != XGL_IMAGE_2D \|\|
				711	!(info->usage & XGL_IMAGE_USAGE_COLOR_ATTACHMENT_BIT))
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	712	return false;
				713
				714	/*
				715	* From the Ivy Bridge PRM, volume 4 part 1, page 77:
				716	*
				717	* "For Render Target and Sampling Engine Surfaces:If the surface is
				718	* multisampled (Number of Multisamples any value other than
				719	* MULTISAMPLECOUNT_1), this field (MCS Enable) must be enabled."
				720	*
				721	* "This field must be set to 0 for all SINT MSRTs when all RT channels
				722	* are not written"
				723	*/
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	724	if (info->samples > 1 && !layout->interleaved_samples &&
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	725	!icd_format_is_int(info->format)) {
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	726	want_mcs = true;
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	727	} else if (info->samples <= 1) {
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	728	/*
				729	* From the Ivy Bridge PRM, volume 2 part 1, page 326:
				730	*
				731	* "When MCS is buffer is used for color clear of non-multisampler
				732	* render target, the following restrictions apply.
				733	* - Support is limited to tiled render targets.
				734	* - Support is for non-mip-mapped and non-array surface types
				735	* only.
				736	* - Clear is supported only on the full RT; i.e., no partial clear
				737	* or overlapping clears.
				738	* - MCS buffer for non-MSRT is supported only for RT formats
				739	* 32bpp, 64bpp and 128bpp.
				740	* ..."
				741	*/
				742	if (layout->tiling != INTEL_TILING_NONE &&
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	743	info->mipLevels == 1 && info->arraySize == 1) {
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	744	switch (layout->block_size) {
				745	case 4:
				746	case 8:
				747	case 16:
				748	want_mcs = true;
				749	break;
				750	default:
				751	break;
				752	}
				753	}
				754	}
				755
				756	return want_mcs;
				757	}
				758
				759	static bool
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	760	layout_want_hiz(const struct intel_layout *layout,
				761	const struct intel_layout_params *params)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	762	{
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	763	const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	764
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	765	if (!(info->usage & XGL_IMAGE_USAGE_DEPTH_STENCIL_BIT))
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	766	return false;
				767
Chia-I Wu	9ec7e70	2015-02-19 13:18:42 -0700	[diff] [blame^]	768	if (!intel_format_has_depth(params->gpu, info->format))
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	769	return false;
				770
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	771	/*
				772	* As can be seen in layout_calculate_hiz_size(), HiZ may not be enabled
				773	* for every level. This is generally fine except on GEN6, where HiZ and
				774	* separate stencil are enabled and disabled at the same time. When the
				775	* format is PIPE_FORMAT_Z32_FLOAT_S8X24_UINT, enabling and disabling HiZ
				776	* can result in incompatible formats.
				777	*/
				778	if (intel_gpu_gen(params->gpu) == INTEL_GEN(6) &&
Jeremy Hayes	2b7e88a	2015-01-23 08:51:43 -0700	[diff] [blame]	779	info->format == XGL_FMT_D32_SFLOAT_S8_UINT &&
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	780	info->mipLevels > 1)
				781	return false;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	782
Chia-I Wu	c94f3e5	2014-10-07 14:45:05 +0800	[diff] [blame]	783	if (true) {
				784	intel_dev_log(params->dev, XGL_DBG_MSG_PERF_WARNING,
				785	XGL_VALIDATION_LEVEL_0, XGL_NULL_HANDLE, 0, 0,
				786	"HiZ disabled");
				787	return false;
				788	}
				789
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	790	return true;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	791	}
				792
				793	static void
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	794	layout_init_aux(struct intel_layout *layout,
				795	struct intel_layout_params *params)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	796	{
				797	if (layout_want_hiz(layout, params))
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	798	layout->aux = INTEL_LAYOUT_AUX_HIZ;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	799	else if (layout_want_mcs(layout, params))
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	800	layout->aux = INTEL_LAYOUT_AUX_MCS;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	801	}
				802
				803	static void
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	804	layout_align(struct intel_layout layout, struct intel_layout_params params)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	805	{
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	806	const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	807	int align_w = 1, align_h = 1, pad_h = 0;
				808
				809	/*
				810	* From the Sandy Bridge PRM, volume 1 part 1, page 118:
				811	*
				812	* "To determine the necessary padding on the bottom and right side of
				813	* the surface, refer to the table in Section 7.18.3.4 for the i and j
				814	* parameters for the surface format in use. The surface must then be
				815	* extended to the next multiple of the alignment unit size in each
				816	* dimension, and all texels contained in this extended surface must
				817	* have valid GTT entries."
				818	*
				819	* "For cube surfaces, an additional two rows of padding are required
				820	* at the bottom of the surface. This must be ensured regardless of
				821	* whether the surface is stored tiled or linear. This is due to the
				822	* potential rotation of cache line orientation from memory to cache."
				823	*
				824	* "For compressed textures (BC* and FXT1 surface formats), padding at
				825	* the bottom of the surface is to an even compressed row, which is
				826	* equal to a multiple of 8 uncompressed texel rows. Thus, for padding
				827	* purposes, these surfaces behave as if j = 8 only for surface
				828	* padding purposes. The value of 4 for j still applies for mip level
				829	* alignment and QPitch calculation."
				830	*/
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	831	if (info->usage & XGL_IMAGE_USAGE_SHADER_ACCESS_READ_BIT) {
				832	if (align_w < layout->align_i)
				833	align_w = layout->align_i;
				834	if (align_h < layout->align_j)
				835	align_h = layout->align_j;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	836
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	837	/* in case it is used as a cube */
				838	if (info->imageType == XGL_IMAGE_2D)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	839	pad_h += 2;
				840
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	841	if (params->compressed && align_h < layout->align_j * 2)
				842	align_h = layout->align_j * 2;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	843	}
				844
				845	/*
				846	* From the Sandy Bridge PRM, volume 1 part 1, page 118:
				847	*
				848	* "If the surface contains an odd number of rows of data, a final row
				849	* below the surface must be allocated."
				850	*/
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	851	if ((info->usage & XGL_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) && align_h < 2)
				852	align_h = 2;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	853
				854	/*
				855	* Depth Buffer Clear/Resolve works in 8x4 sample blocks. In
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	856	* intel_texture_can_enable_hiz(), we always return true for the first slice.
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	857	* To avoid out-of-bound access, we have to pad.
				858	*/
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	859	if (layout->aux == INTEL_LAYOUT_AUX_HIZ &&
				860	info->mipLevels == 1 &&
				861	info->arraySize == 1 &&
				862	info->extent.depth == 1) {
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	863	if (align_w < 8)
				864	align_w = 8;
				865	if (align_h < 4)
				866	align_h = 4;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	867	}
				868
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	869	params->max_x = u_align(params->max_x, align_w);
				870	params->max_y = u_align(params->max_y + pad_h, align_h);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	871	}
				872
				873	/* note that this may force the texture to be linear */
				874	static void
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	875	layout_calculate_bo_size(struct intel_layout *layout,
				876	struct intel_layout_params *params)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	877	{
				878	assert(params->max_x % layout->block_width == 0);
				879	assert(params->max_y % layout->block_height == 0);
				880	assert(layout->layer_height % layout->block_height == 0);
				881
				882	layout->bo_stride =
				883	(params->max_x / layout->block_width) * layout->block_size;
				884	layout->bo_height = params->max_y / layout->block_height;
				885
				886	while (true) {
				887	unsigned w = layout->bo_stride, h = layout->bo_height;
				888	unsigned align_w, align_h;
				889
				890	/*
				891	* From the Haswell PRM, volume 5, page 163:
				892	*
				893	* "For linear surfaces, additional padding of 64 bytes is required
				894	* at the bottom of the surface. This is in addition to the padding
				895	* required above."
				896	*/
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	897	if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7.5) &&
				898	(params->info->usage & XGL_IMAGE_USAGE_SHADER_ACCESS_READ_BIT) &&
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	899	layout->tiling == INTEL_TILING_NONE) {
				900	layout->bo_height +=
				901	(64 + layout->bo_stride - 1) / layout->bo_stride;
				902	}
				903
				904	/*
				905	* From the Sandy Bridge PRM, volume 4 part 1, page 81:
				906	*
				907	* "- For linear render target surfaces, the pitch must be a
				908	* multiple of the element size for non-YUV surface formats.
				909	* Pitch must be a multiple of 2 * element size for YUV surface
				910	* formats.
				911	* - For other linear surfaces, the pitch can be any multiple of
				912	* bytes.
				913	* - For tiled surfaces, the pitch must be a multiple of the tile
				914	* width."
				915	*
				916	* Different requirements may exist when the bo is used in different
				917	* places, but our alignments here should be good enough that we do not
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	918	* need to check layout->info->usage.
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	919	*/
				920	switch (layout->tiling) {
				921	case INTEL_TILING_X:
				922	align_w = 512;
				923	align_h = 8;
				924	break;
				925	case INTEL_TILING_Y:
				926	align_w = 128;
				927	align_h = 32;
				928	break;
				929	default:
Chia-I Wu	4806f2c	2015-02-19 13:54:35 -0700	[diff] [blame]	930	if (layout->format == XGL_FMT_S8_UINT) {
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	931	/*
				932	* From the Sandy Bridge PRM, volume 1 part 2, page 22:
				933	*
				934	* "A 4KB tile is subdivided into 8-high by 8-wide array of
				935	* Blocks for W-Major Tiles (W Tiles). Each Block is 8 rows by 8
				936	* bytes."
				937	*
				938	* Since we asked for INTEL_TILING_NONE instead of the non-existent
				939	* INTEL_TILING_W, we want to align to W tiles here.
				940	*/
				941	align_w = 64;
				942	align_h = 64;
				943	} else {
				944	/* some good enough values */
				945	align_w = 64;
				946	align_h = 2;
				947	}
				948	break;
				949	}
				950
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	951	w = u_align(w, align_w);
				952	h = u_align(h, align_h);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	953
				954	/* make sure the bo is mappable */
				955	if (layout->tiling != INTEL_TILING_NONE) {
				956	/*
				957	* Usually only the first 256MB of the GTT is mappable.
				958	*
				959	* See also how intel_context::max_gtt_map_object_size is calculated.
				960	*/
				961	const size_t mappable_gtt_size = 256 * 1024 * 1024;
				962
				963	/*
				964	* Be conservative. We may be able to switch from VALIGN_4 to
				965	* VALIGN_2 if the layout was Y-tiled, but let's keep it simple.
				966	*/
				967	if (mappable_gtt_size / w / 4 < h) {
				968	if (layout->valid_tilings & LAYOUT_TILING_NONE) {
				969	layout->tiling = INTEL_TILING_NONE;
				970	/* MCS support for non-MSRTs is limited to tiled RTs */
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	971	if (layout->aux == INTEL_LAYOUT_AUX_MCS &&
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	972	params->info->samples <= 1)
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	973	layout->aux = INTEL_LAYOUT_AUX_NONE;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	974
				975	continue;
				976	} else {
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	977	/* mapping will fail */
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	978	}
				979	}
				980	}
				981
				982	layout->bo_stride = w;
				983	layout->bo_height = h;
				984	break;
				985	}
				986	}
				987
				988	static void
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	989	layout_calculate_hiz_size(struct intel_layout *layout,
				990	struct intel_layout_params *params)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	991	{
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	992	const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	993	const unsigned hz_align_j = 8;
				994	enum intel_layout_walk_type hz_walk;
				995	unsigned hz_width, hz_height, lv;
				996	unsigned hz_clear_w, hz_clear_h;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	997
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	998	assert(layout->aux == INTEL_LAYOUT_AUX_HIZ);
				999
				1000	assert(layout->walk == INTEL_LAYOUT_WALK_LAYER \|\|
				1001	layout->walk == INTEL_LAYOUT_WALK_3D);
				1002
				1003	/*
				1004	* From the Sandy Bridge PRM, volume 2 part 1, page 312:
				1005	*
				1006	* "The hierarchical depth buffer does not support the LOD field, it is
				1007	* assumed by hardware to be zero. A separate hierarachical depth
				1008	* buffer is required for each LOD used, and the corresponding
				1009	* buffer's state delivered to hardware each time a new depth buffer
				1010	* state with modified LOD is delivered."
				1011	*
				1012	* We will put all LODs in a single bo with INTEL_LAYOUT_WALK_LOD.
				1013	*/
				1014	if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7))
				1015	hz_walk = layout->walk;
				1016	else
				1017	hz_walk = INTEL_LAYOUT_WALK_LOD;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1018
				1019	/*
				1020	* See the Sandy Bridge PRM, volume 2 part 1, page 312, and the Ivy Bridge
				1021	* PRM, volume 2 part 1, page 312-313.
				1022	*
				1023	* It seems HiZ buffer is aligned to 8x8, with every two rows packed into a
				1024	* memory row.
				1025	*/
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	1026	switch (hz_walk) {
				1027	case INTEL_LAYOUT_WALK_LOD:
				1028	{
				1029	unsigned lod_tx[INTEL_LAYOUT_MAX_LEVELS];
				1030	unsigned lod_ty[INTEL_LAYOUT_MAX_LEVELS];
				1031	unsigned cur_tx, cur_ty;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1032
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	1033	/* figure out the tile offsets of LODs */
				1034	hz_width = 0;
				1035	hz_height = 0;
				1036	cur_tx = 0;
				1037	cur_ty = 0;
				1038	for (lv = 0; lv < info->mipLevels; lv++) {
				1039	unsigned tw, th;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1040
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	1041	lod_tx[lv] = cur_tx;
				1042	lod_ty[lv] = cur_ty;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1043
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	1044	tw = u_align(layout->lods[lv].slice_width, 16);
				1045	th = u_align(layout->lods[lv].slice_height, hz_align_j) *
				1046	info->arraySize / 2;
				1047	/* convert to Y-tiles */
				1048	tw = u_align(tw, 128) / 128;
				1049	th = u_align(th, 32) / 32;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1050
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	1051	if (hz_width < cur_tx + tw)
				1052	hz_width = cur_tx + tw;
				1053	if (hz_height < cur_ty + th)
				1054	hz_height = cur_ty + th;
				1055
				1056	if (lv == 1)
				1057	cur_tx += tw;
				1058	else
				1059	cur_ty += th;
				1060	}
				1061
				1062	/* convert tile offsets to memory offsets */
				1063	for (lv = 0; lv < info->mipLevels; lv++) {
				1064	layout->aux_offsets[lv] =
				1065	(lod_ty[lv] * hz_width + lod_tx[lv]) * 4096;
				1066	}
				1067	hz_width *= 128;
				1068	hz_height *= 32;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1069	}
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	1070	break;
				1071	case INTEL_LAYOUT_WALK_LAYER:
				1072	{
				1073	const unsigned h0 = u_align(params->h0, hz_align_j);
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	1074	const unsigned h1 = u_align(params->h1, hz_align_j);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1075	const unsigned htail =
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	1076	((intel_gpu_gen(params->gpu) >= INTEL_GEN(7)) ? 12 : 11) * hz_align_j;
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	1077	const unsigned hz_qpitch = h0 + h1 + htail;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1078
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	1079	hz_width = u_align(layout->lods[0].slice_width, 16);
				1080
				1081	hz_height = hz_qpitch * info->arraySize / 2;
				1082	if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7))
				1083	hz_height = u_align(hz_height, 8);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1084	}
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	1085	break;
				1086	case INTEL_LAYOUT_WALK_3D:
				1087	hz_width = u_align(layout->lods[0].slice_width, 16);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1088
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	1089	hz_height = 0;
				1090	for (lv = 0; lv < info->mipLevels; lv++) {
				1091	const unsigned h = u_align(layout->lods[lv].slice_height, hz_align_j);
				1092	/* according to the formula, slices are packed together vertically */
				1093	hz_height += h * u_minify(info->extent.depth, lv);
				1094	}
				1095	hz_height /= 2;
				1096	break;
Chia-I Wu	9a056dd	2015-02-11 13:19:39 -0700	[diff] [blame]	1097	default:
				1098	assert(!"unknown layout walk");
				1099	hz_width = 0;
				1100	hz_height = 0;
				1101	break;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1102	}
				1103
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	1104	/*
				1105	* In hiz_align_fb(), we will align the LODs to 8x4 sample blocks.
				1106	* Experiments on Haswell show that aligning the RECTLIST primitive and
				1107	* 3DSTATE_DRAWING_RECTANGLE alone are not enough. The LOD sizes must be
				1108	* aligned.
				1109	*/
				1110	hz_clear_w = 8;
				1111	hz_clear_h = 4;
				1112	switch (info->samples) {
				1113	case 0:
				1114	case 1:
				1115	default:
				1116	break;
				1117	case 2:
				1118	hz_clear_w /= 2;
				1119	break;
				1120	case 4:
				1121	hz_clear_w /= 2;
				1122	hz_clear_h /= 2;
				1123	break;
				1124	case 8:
				1125	hz_clear_w /= 4;
				1126	hz_clear_h /= 2;
				1127	break;
				1128	case 16:
				1129	hz_clear_w /= 4;
				1130	hz_clear_h /= 4;
				1131	break;
				1132	}
				1133
				1134	for (lv = 0; lv < info->mipLevels; lv++) {
				1135	if (u_minify(layout->width0, lv) % hz_clear_w \|\|
				1136	u_minify(layout->height0, lv) % hz_clear_h)
				1137	break;
				1138	layout->aux_enables \|= 1 << lv;
				1139	}
				1140
				1141	/* we padded to allow this in layout_align() */
				1142	if (info->mipLevels == 1 && info->arraySize == 1 && info->extent.depth == 1)
				1143	layout->aux_enables \|= 0x1;
				1144
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1145	/* align to Y-tile */
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	1146	layout->aux_stride = u_align(hz_width, 128);
				1147	layout->aux_height = u_align(hz_height, 32);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1148	}
				1149
				1150	static void
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	1151	layout_calculate_mcs_size(struct intel_layout *layout,
				1152	struct intel_layout_params *params)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1153	{
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	1154	const XGL_IMAGE_CREATE_INFO *info = params->info;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1155	int mcs_width, mcs_height, mcs_cpp;
				1156	int downscale_x, downscale_y;
				1157
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	1158	assert(layout->aux == INTEL_LAYOUT_AUX_MCS);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1159
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	1160	if (info->samples > 1) {
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1161	/*
				1162	* From the Ivy Bridge PRM, volume 2 part 1, page 326, the clear
				1163	* rectangle is scaled down by 8x2 for 4X MSAA and 2x2 for 8X MSAA. The
				1164	* need of scale down could be that the clear rectangle is used to clear
				1165	* the MCS instead of the RT.
				1166	*
				1167	* For 8X MSAA, we need 32 bits in MCS for every pixel in the RT. The
				1168	* 2x2 factor could come from that the hardware writes 128 bits (an
				1169	* OWord) at a time, and the OWord in MCS maps to a 2x2 pixel block in
				1170	* the RT. For 4X MSAA, we need 8 bits in MCS for every pixel in the
				1171	* RT. Similarly, we could reason that an OWord in 4X MCS maps to a 8x2
				1172	* pixel block in the RT.
				1173	*/
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	1174	switch (info->samples) {
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1175	case 2:
				1176	case 4:
				1177	downscale_x = 8;
				1178	downscale_y = 2;
				1179	mcs_cpp = 1;
				1180	break;
				1181	case 8:
				1182	downscale_x = 2;
				1183	downscale_y = 2;
				1184	mcs_cpp = 4;
				1185	break;
				1186	case 16:
				1187	downscale_x = 2;
				1188	downscale_y = 1;
				1189	mcs_cpp = 8;
				1190	break;
				1191	default:
				1192	assert(!"unsupported sample count");
				1193	return;
				1194	break;
				1195	}
				1196
				1197	/*
				1198	* It also appears that the 2x2 subspans generated by the scaled-down
				1199	* clear rectangle cannot be masked. The scale-down clear rectangle
				1200	* thus must be aligned to 2x2, and we need to pad.
				1201	*/
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	1202	mcs_width = u_align(layout->width0, downscale_x * 2);
				1203	mcs_height = u_align(layout->height0, downscale_y * 2);
				1204	} else {
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1205	/*
				1206	* From the Ivy Bridge PRM, volume 2 part 1, page 327:
				1207	*
				1208	* " Pixels Lines
				1209	* TiledY RT CL
				1210	* bpp
				1211	* 32 8 4
				1212	* 64 4 4
				1213	* 128 2 4
				1214	*
				1215	* TiledX RT CL
				1216	* bpp
				1217	* 32 16 2
				1218	* 64 8 2
				1219	* 128 4 2"
				1220	*
				1221	* This table and the two following tables define the RT alignments, the
				1222	* clear rectangle alignments, and the clear rectangle scale factors.
				1223	* Viewing the RT alignments as the sizes of 128-byte blocks, we can see
				1224	* that the clear rectangle alignments are 16x32 blocks, and the clear
				1225	* rectangle scale factors are 8x16 blocks.
				1226	*
				1227	* For non-MSAA RT, we need 1 bit in MCS for every 128-byte block in the
				1228	* RT. Similar to the MSAA cases, we can argue that an OWord maps to
				1229	* 8x16 blocks.
				1230	*
				1231	* One problem with this reasoning is that a Y-tile in MCS has 8x32
				1232	* OWords and maps to 64x512 128-byte blocks. This differs from i965,
				1233	* which says that a Y-tile maps to 128x256 blocks (\see
				1234	* intel_get_non_msrt_mcs_alignment). It does not really change
				1235	* anything except for the size of the allocated MCS. Let's see if we
				1236	* hit out-of-bound access.
				1237	*/
				1238	switch (layout->tiling) {
				1239	case INTEL_TILING_X:
				1240	downscale_x = 64 / layout->block_size;
				1241	downscale_y = 2;
				1242	break;
				1243	case INTEL_TILING_Y:
				1244	downscale_x = 32 / layout->block_size;
				1245	downscale_y = 4;
				1246	break;
				1247	default:
				1248	assert(!"unsupported tiling mode");
				1249	return;
				1250	break;
				1251	}
				1252
				1253	downscale_x *= 8;
				1254	downscale_y *= 16;
				1255
				1256	/*
				1257	* From the Haswell PRM, volume 7, page 652:
				1258	*
				1259	* "Clear rectangle must be aligned to two times the number of
				1260	* pixels in the table shown below due to 16X16 hashing across the
				1261	* slice."
				1262	*
				1263	* The scaled-down clear rectangle must be aligned to 4x4 instead of
				1264	* 2x2, and we need to pad.
				1265	*/
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	1266	mcs_width = u_align(layout->width0, downscale_x * 4) / downscale_x;
				1267	mcs_height = u_align(layout->height0, downscale_y * 4) / downscale_y;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1268	mcs_cpp = 16; /* an OWord */
				1269	}
				1270
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	1271	layout->aux_enables = (1 << info->mipLevels) - 1;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1272	/* align to Y-tile */
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	1273	layout->aux_stride = u_align(mcs_width * mcs_cpp, 128);
				1274	layout->aux_height = u_align(mcs_height, 32);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1275	}
				1276
				1277	/**
				1278	* Initialize the layout. Callers should zero-initialize \p layout first.
				1279	*/
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	1280	void intel_layout_init(struct intel_layout *layout,
Chia-I Wu	c94f3e5	2014-10-07 14:45:05 +0800	[diff] [blame]	1281	struct intel_dev *dev,
Chia-I Wu	794d12a	2014-09-15 14:55:25 +0800	[diff] [blame]	1282	const XGL_IMAGE_CREATE_INFO *info,
				1283	bool scanout)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1284	{
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	1285	struct intel_layout_params params;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1286
				1287	memset(&params, 0, sizeof(params));
Chia-I Wu	c94f3e5	2014-10-07 14:45:05 +0800	[diff] [blame]	1288	params.dev = dev;
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	1289	params.gpu = dev->gpu;
				1290	params.info = info;
Chia-I Wu	794d12a	2014-09-15 14:55:25 +0800	[diff] [blame]	1291	params.scanout = scanout;
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1292
				1293	/* note that there are dependencies between these functions */
				1294	layout_init_aux(layout, &params);
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	1295	layout_init_size_and_format(layout, &params);
				1296	layout_init_walk(layout, &params);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1297	layout_init_tiling(layout, &params);
				1298	layout_init_alignments(layout, &params);
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	1299	layout_init_lods(layout, &params);
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1300	layout_init_layer_height(layout, &params);
				1301
				1302	layout_align(layout, &params);
				1303	layout_calculate_bo_size(layout, &params);
				1304
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	1305	switch (layout->aux) {
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	1306	case INTEL_LAYOUT_AUX_HIZ:
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1307	layout_calculate_hiz_size(layout, &params);
				1308	break;
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	1309	case INTEL_LAYOUT_AUX_MCS:
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1310	layout_calculate_mcs_size(layout, &params);
				1311	break;
				1312	default:
				1313	break;
				1314	}
				1315	}
				1316
				1317	/**
				1318	* Update the tiling mode and bo stride (for imported resources).
				1319	*/
				1320	bool
Chia-I Wu	8a8d8b6	2014-08-14 13:26:26 +0800	[diff] [blame]	1321	intel_layout_update_for_imported_bo(struct intel_layout *layout,
Chia-I Wu	457d0a6	2014-08-18 13:02:26 +0800	[diff] [blame]	1322	enum intel_tiling_mode tiling,
				1323	unsigned bo_stride)
Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame]	1324	{
				1325	if (!(layout->valid_tilings & (1 << tiling)))
				1326	return false;
				1327
				1328	if ((tiling == INTEL_TILING_X && bo_stride % 512) \|\|
				1329	(tiling == INTEL_TILING_Y && bo_stride % 128))
				1330	return false;
				1331
				1332	layout->tiling = tiling;
				1333	layout->bo_stride = bo_stride;
				1334
				1335	return true;
				1336	}
Chia-I Wu	63a5397	2014-12-04 12:51:54 +0800	[diff] [blame]	1337
				1338	/**
				1339	* Return the offset (in bytes) to a slice within the bo.
				1340	*
				1341	* The returned offset is aligned to tile size. Since slices are not
				1342	* guaranteed to start at tile boundaries, the X and Y offsets (in pixels)
				1343	* from the tile origin to the slice are also returned. X offset is always a
				1344	* multiple of 4 and Y offset is always a multiple of 2.
				1345	*/
				1346	unsigned
				1347	intel_layout_get_slice_tile_offset(const struct intel_layout *layout,
				1348	unsigned level, unsigned slice,
				1349	unsigned x_offset, unsigned y_offset)
				1350	{
				1351	unsigned tile_w, tile_h, tile_size, row_size;
				1352	unsigned tile_offset, x, y;
				1353
				1354	/* see the Sandy Bridge PRM, volume 1 part 2, page 24 */
				1355
				1356	switch (layout->tiling) {
				1357	case INTEL_TILING_NONE:
				1358	/* W-tiled */
Jeremy Hayes	2b7e88a	2015-01-23 08:51:43 -0700	[diff] [blame]	1359	if (layout->format == XGL_FMT_S8_UINT) {
Chia-I Wu	63a5397	2014-12-04 12:51:54 +0800	[diff] [blame]	1360	tile_w = 64;
				1361	tile_h = 64;
				1362	}
				1363	else {
				1364	tile_w = 1;
				1365	tile_h = 1;
				1366	}
				1367	break;
				1368	case INTEL_TILING_X:
				1369	tile_w = 512;
				1370	tile_h = 8;
				1371	break;
				1372	case INTEL_TILING_Y:
				1373	tile_w = 128;
				1374	tile_h = 32;
				1375	break;
				1376	default:
				1377	assert(!"unknown tiling");
				1378	tile_w = 1;
				1379	tile_h = 1;
				1380	break;
				1381	}
				1382
				1383	tile_size = tile_w * tile_h;
				1384	row_size = layout->bo_stride * tile_h;
				1385
				1386	intel_layout_get_slice_pos(layout, level, slice, &x, &y);
				1387	/* in bytes */
				1388	intel_layout_pos_to_mem(layout, x, y, &x, &y);
				1389	tile_offset = row_size * (y / tile_h) + tile_size * (x / tile_w);
				1390
				1391	/*
				1392	* Since tex->bo_stride is a multiple of tile_w, slice_offset should be
				1393	* aligned at this point.
				1394	*/
				1395	assert(tile_offset % tile_size == 0);
				1396
				1397	/*
				1398	* because of the possible values of align_i and align_j in
				1399	* tex_layout_init_alignments(), x_offset is guaranteed to be a multiple of
				1400	* 4 and y_offset is guaranteed to be a multiple of 2.
				1401	*/
				1402	if (x_offset) {
				1403	/* in pixels */
				1404	x = (x % tile_w) / layout->block_size * layout->block_width;
				1405	assert(x % 4 == 0);
				1406
				1407	*x_offset = x;
				1408	}
				1409
				1410	if (y_offset) {
				1411	/* in pixels */
				1412	y = (y % tile_h) * layout->block_height;
				1413	assert(y % 2 == 0);
				1414
				1415	*y_offset = y;
				1416	}
				1417
				1418	return tile_offset;
				1419	}