Blame - icd/intel/layout.c - platform/external/vulkan-validation-layers

blob: c2f76161447442905112d3d8c4f5440cc6c1033e [file] [log] [blame]

Chia-I Wu	4bc4701	2014-08-14 13:03:25 +0800	[diff] [blame^]	1	/*
				2	* Mesa 3-D graphics library
				3	*
				4	* Copyright (C) 2014 LunarG, Inc.
				5	*
				6	* Permission is hereby granted, free of charge, to any person obtaining a
				7	* copy of this software and associated documentation files (the "Software"),
				8	* to deal in the Software without restriction, including without limitation
				9	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
				10	* and/or sell copies of the Software, and to permit persons to whom the
				11	* Software is furnished to do so, subject to the following conditions:
				12	*
				13	* The above copyright notice and this permission notice shall be included
				14	* in all copies or substantial portions of the Software.
				15	*
				16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
				19	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
				21	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
				22	* DEALINGS IN THE SOFTWARE.
				23	*
				24	* Authors:
				25	* Chia-I Wu <olv@lunarg.com>
				26	*/
				27
				28	#include "ilo_layout.h"
				29
				30	enum {
				31	LAYOUT_TILING_NONE = 1 << INTEL_TILING_NONE,
				32	LAYOUT_TILING_X = 1 << INTEL_TILING_X,
				33	LAYOUT_TILING_Y = 1 << INTEL_TILING_Y,
				34	LAYOUT_TILING_W = 1 << (INTEL_TILING_Y + 1),
				35
				36	LAYOUT_TILING_ALL = (LAYOUT_TILING_NONE \|
				37	LAYOUT_TILING_X \|
				38	LAYOUT_TILING_Y \|
				39	LAYOUT_TILING_W)
				40	};
				41
				42	struct ilo_layout_params {
				43	const struct ilo_dev_info *dev;
				44	const struct pipe_resource *templ;
				45
				46	bool compressed;
				47
				48	unsigned h0, h1;
				49	unsigned max_x, max_y;
				50	};
				51
				52	static void
				53	layout_get_slice_size(const struct ilo_layout *layout,
				54	const struct ilo_layout_params *params,
				55	unsigned level, unsigned width, unsigned height)
				56	{
				57	const struct pipe_resource *templ = params->templ;
				58	unsigned w, h;
				59
				60	w = u_minify(templ->width0, level);
				61	h = u_minify(templ->height0, level);
				62
				63	/*
				64	* From the Sandy Bridge PRM, volume 1 part 1, page 114:
				65	*
				66	* "The dimensions of the mip maps are first determined by applying the
				67	* sizing algorithm presented in Non-Power-of-Two Mipmaps above. Then,
				68	* if necessary, they are padded out to compression block boundaries."
				69	*/
				70	w = align(w, layout->block_width);
				71	h = align(h, layout->block_height);
				72
				73	/*
				74	* From the Sandy Bridge PRM, volume 1 part 1, page 111:
				75	*
				76	* "If the surface is multisampled (4x), these values must be adjusted
				77	* as follows before proceeding:
				78	*
				79	* W_L = ceiling(W_L / 2) * 4
				80	* H_L = ceiling(H_L / 2) * 4"
				81	*
				82	* From the Ivy Bridge PRM, volume 1 part 1, page 108:
				83	*
				84	* "If the surface is multisampled and it is a depth or stencil surface
				85	* or Multisampled Surface StorageFormat in SURFACE_STATE is
				86	* MSFMT_DEPTH_STENCIL, W_L and H_L must be adjusted as follows before
				87	* proceeding:
				88	*
				89	* #samples W_L = H_L =
				90	* 2 ceiling(W_L / 2) * 4 HL [no adjustment]
				91	* 4 ceiling(W_L / 2) * 4 ceiling(H_L / 2) * 4
				92	* 8 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 4
				93	* 16 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 8"
				94	*
				95	* For interleaved samples (4x), where pixels
				96	*
				97	* (x, y ) (x+1, y )
				98	* (x, y+1) (x+1, y+1)
				99	*
				100	* would be is occupied by
				101	*
				102	* (x, y , si0) (x+1, y , si0) (x, y , si1) (x+1, y , si1)
				103	* (x, y+1, si0) (x+1, y+1, si0) (x, y+1, si1) (x+1, y+1, si1)
				104	* (x, y , si2) (x+1, y , si2) (x, y , si3) (x+1, y , si3)
				105	* (x, y+1, si2) (x+1, y+1, si2) (x, y+1, si3) (x+1, y+1, si3)
				106	*
				107	* Thus the need to
				108	*
				109	* w = align(w, 2) * 2;
				110	* y = align(y, 2) * 2;
				111	*/
				112	if (layout->interleaved_samples) {
				113	switch (templ->nr_samples) {
				114	case 0:
				115	case 1:
				116	break;
				117	case 2:
				118	w = align(w, 2) * 2;
				119	break;
				120	case 4:
				121	w = align(w, 2) * 2;
				122	h = align(h, 2) * 2;
				123	break;
				124	case 8:
				125	w = align(w, 2) * 4;
				126	h = align(h, 2) * 2;
				127	break;
				128	case 16:
				129	w = align(w, 2) * 4;
				130	h = align(h, 2) * 4;
				131	break;
				132	default:
				133	assert(!"unsupported sample count");
				134	break;
				135	}
				136	}
				137
				138	w = align(w, layout->align_i);
				139	h = align(h, layout->align_j);
				140
				141	*width = w;
				142	*height = h;
				143	}
				144
				145	static unsigned
				146	layout_get_num_layers(const struct ilo_layout *layout,
				147	const struct ilo_layout_params *params)
				148	{
				149	const struct pipe_resource *templ = params->templ;
				150	unsigned num_layers = templ->array_size;
				151
				152	/* samples of the same index are stored in a layer */
				153	if (templ->nr_samples > 1 && !layout->interleaved_samples)
				154	num_layers *= templ->nr_samples;
				155
				156	return num_layers;
				157	}
				158
				159	static void
				160	layout_init_layer_height(struct ilo_layout *layout,
				161	struct ilo_layout_params *params)
				162	{
				163	const struct pipe_resource *templ = params->templ;
				164	unsigned num_layers;
				165
				166	num_layers = layout_get_num_layers(layout, params);
				167	if (num_layers <= 1)
				168	return;
				169
				170	if (!layout->full_layers) {
				171	layout->layer_height = params->h0;
				172	params->max_y += params->h0 * (num_layers - 1);
				173	return;
				174	}
				175
				176	/*
				177	* From the Sandy Bridge PRM, volume 1 part 1, page 115:
				178	*
				179	* "The following equation is used for surface formats other than
				180	* compressed textures:
				181	*
				182	* QPitch = (h0 + h1 + 11j)"
				183	*
				184	* "The equation for compressed textures (BC* and FXT1 surface formats)
				185	* follows:
				186	*
				187	* QPitch = (h0 + h1 + 11j) / 4"
				188	*
				189	* "[DevSNB] Errata: Sampler MSAA Qpitch will be 4 greater than the
				190	* value calculated in the equation above, for every other odd Surface
				191	* Height starting from 1 i.e. 1,5,9,13"
				192	*
				193	* From the Ivy Bridge PRM, volume 1 part 1, page 111-112:
				194	*
				195	* "If Surface Array Spacing is set to ARYSPC_FULL (note that the depth
				196	* buffer and stencil buffer have an implied value of ARYSPC_FULL):
				197	*
				198	* QPitch = (h0 + h1 + 12j)
				199	* QPitch = (h0 + h1 + 12j) / 4 (compressed)
				200	*
				201	* (There are many typos or missing words here...)"
				202	*
				203	* To access the N-th slice, an offset of (Stride * QPitch * N) is added to
				204	* the base address. The PRM divides QPitch by 4 for compressed formats
				205	* because the block height for those formats are 4, and it wants QPitch to
				206	* mean the number of memory rows, as opposed to texel rows, between
				207	* slices. Since we use texel rows everywhere, we do not need to divide
				208	* QPitch by 4.
				209	*/
				210	layout->layer_height = params->h0 + params->h1 +
				211	((params->dev->gen >= ILO_GEN(7)) ? 12 : 11) * layout->align_j;
				212
				213	if (params->dev->gen == ILO_GEN(6) && templ->nr_samples > 1 &&
				214	templ->height0 % 4 == 1)
				215	layout->layer_height += 4;
				216
				217	params->max_y += layout->layer_height * (num_layers - 1);
				218	}
				219
				220	static void
				221	layout_init_levels(struct ilo_layout *layout,
				222	struct ilo_layout_params *params)
				223	{
				224	const struct pipe_resource *templ = params->templ;
				225	unsigned cur_x, cur_y;
				226	unsigned lv;
				227
				228	cur_x = 0;
				229	cur_y = 0;
				230	for (lv = 0; lv <= templ->last_level; lv++) {
				231	unsigned level_w, level_h;
				232
				233	layout_get_slice_size(layout, params, lv, &level_w, &level_h);
				234
				235	layout->levels[lv].x = cur_x;
				236	layout->levels[lv].y = cur_y;
				237	layout->levels[lv].slice_width = level_w;
				238	layout->levels[lv].slice_height = level_h;
				239
				240	if (templ->target == PIPE_TEXTURE_3D) {
				241	const unsigned num_slices = u_minify(templ->depth0, lv);
				242	const unsigned num_slices_per_row = 1 << lv;
				243	const unsigned num_rows =
				244	(num_slices + num_slices_per_row - 1) / num_slices_per_row;
				245
				246	level_w *= num_slices_per_row;
				247	level_h *= num_rows;
				248
				249	cur_y += level_h;
				250	} else {
				251	/* MIPLAYOUT_BELOW */
				252	if (lv == 1)
				253	cur_x += level_w;
				254	else
				255	cur_y += level_h;
				256	}
				257
				258	if (params->max_x < layout->levels[lv].x + level_w)
				259	params->max_x = layout->levels[lv].x + level_w;
				260	if (params->max_y < layout->levels[lv].y + level_h)
				261	params->max_y = layout->levels[lv].y + level_h;
				262	}
				263
				264	params->h0 = layout->levels[0].slice_height;
				265	if (layout->full_layers) {
				266	if (templ->last_level > 0)
				267	params->h1 = layout->levels[1].slice_height;
				268	else
				269	layout_get_slice_size(layout, params, 1, &cur_x, &params->h1);
				270	}
				271	}
				272
				273	static void
				274	layout_init_alignments(struct ilo_layout *layout,
				275	struct ilo_layout_params *params)
				276	{
				277	const struct pipe_resource *templ = params->templ;
				278
				279	/*
				280	* From the Sandy Bridge PRM, volume 1 part 1, page 113:
				281	*
				282	* "surface format align_i align_j
				283	* YUV 4:2:2 formats 4 *see below
				284	* BC1-5 4 4
				285	* FXT1 8 4
				286	* all other formats 4 *see below"
				287	*
				288	* "- align_j = 4 for any depth buffer
				289	* - align_j = 2 for separate stencil buffer
				290	* - align_j = 4 for any render target surface is multisampled (4x)
				291	* - align_j = 4 for any render target surface with Surface Vertical
				292	* Alignment = VALIGN_4
				293	* - align_j = 2 for any render target surface with Surface Vertical
				294	* Alignment = VALIGN_2
				295	* - align_j = 2 for all other render target surface
				296	* - align_j = 2 for any sampling engine surface with Surface Vertical
				297	* Alignment = VALIGN_2
				298	* - align_j = 4 for any sampling engine surface with Surface Vertical
				299	* Alignment = VALIGN_4"
				300	*
				301	* From the Sandy Bridge PRM, volume 4 part 1, page 86:
				302	*
				303	* "This field (Surface Vertical Alignment) must be set to VALIGN_2 if
				304	* the Surface Format is 96 bits per element (BPE)."
				305	*
				306	* They can be rephrased as
				307	*
				308	* align_i align_j
				309	* compressed formats block width block height
				310	* PIPE_FORMAT_S8_UINT 4 2
				311	* other depth/stencil formats 4 4
				312	* 4x multisampled 4 4
				313	* bpp 96 4 2
				314	* others 4 2 or 4
				315	*/
				316
				317	/*
				318	* From the Ivy Bridge PRM, volume 1 part 1, page 110:
				319	*
				320	* "surface defined by surface format align_i align_j
				321	* 3DSTATE_DEPTH_BUFFER D16_UNORM 8 4
				322	* not D16_UNORM 4 4
				323	* 3DSTATE_STENCIL_BUFFER N/A 8 8
				324	* SURFACE_STATE BC, ETC, EAC* 4 4
				325	* FXT1 8 4
				326	* all others (set by SURFACE_STATE)"
				327	*
				328	* From the Ivy Bridge PRM, volume 4 part 1, page 63:
				329	*
				330	* "- This field (Surface Vertical Aligment) is intended to be set to
				331	* VALIGN_4 if the surface was rendered as a depth buffer, for a
				332	* multisampled (4x) render target, or for a multisampled (8x)
				333	* render target, since these surfaces support only alignment of 4.
				334	* - Use of VALIGN_4 for other surfaces is supported, but uses more
				335	* memory.
				336	* - This field must be set to VALIGN_4 for all tiled Y Render Target
				337	* surfaces.
				338	* - Value of 1 is not supported for format YCRCB_NORMAL (0x182),
				339	* YCRCB_SWAPUVY (0x183), YCRCB_SWAPUV (0x18f), YCRCB_SWAPY (0x190)
				340	* - If Number of Multisamples is not MULTISAMPLECOUNT_1, this field
				341	* must be set to VALIGN_4."
				342	* - VALIGN_4 is not supported for surface format R32G32B32_FLOAT."
				343	*
				344	* "- This field (Surface Horizontal Aligment) is intended to be set to
				345	* HALIGN_8 only if the surface was rendered as a depth buffer with
				346	* Z16 format or a stencil buffer, since these surfaces support only
				347	* alignment of 8.
				348	* - Use of HALIGN_8 for other surfaces is supported, but uses more
				349	* memory.
				350	* - This field must be set to HALIGN_4 if the Surface Format is BC*.
				351	* - This field must be set to HALIGN_8 if the Surface Format is
				352	* FXT1."
				353	*
				354	* They can be rephrased as
				355	*
				356	* align_i align_j
				357	* compressed formats block width block height
				358	* PIPE_FORMAT_Z16_UNORM 8 4
				359	* PIPE_FORMAT_S8_UINT 8 8
				360	* other depth/stencil formats 4 or 8 4
				361	* 2x or 4x multisampled 4 or 8 4
				362	* tiled Y 4 or 8 4 (if rt)
				363	* PIPE_FORMAT_R32G32B32_FLOAT 4 or 8 2
				364	* others 4 or 8 2 or 4
				365	*/
				366
				367	if (params->compressed) {
				368	/* this happens to be the case */
				369	layout->align_i = layout->block_width;
				370	layout->align_j = layout->block_height;
				371	} else if (templ->bind & PIPE_BIND_DEPTH_STENCIL) {
				372	if (params->dev->gen >= ILO_GEN(7)) {
				373	switch (layout->format) {
				374	case PIPE_FORMAT_Z16_UNORM:
				375	layout->align_i = 8;
				376	layout->align_j = 4;
				377	break;
				378	case PIPE_FORMAT_S8_UINT:
				379	layout->align_i = 8;
				380	layout->align_j = 8;
				381	break;
				382	default:
				383	layout->align_i = 4;
				384	layout->align_j = 4;
				385	break;
				386	}
				387	} else {
				388	switch (layout->format) {
				389	case PIPE_FORMAT_S8_UINT:
				390	layout->align_i = 4;
				391	layout->align_j = 2;
				392	break;
				393	default:
				394	layout->align_i = 4;
				395	layout->align_j = 4;
				396	break;
				397	}
				398	}
				399	} else {
				400	const bool valign_4 = (templ->nr_samples > 1) \|\|
				401	(params->dev->gen >= ILO_GEN(7) &&
				402	layout->tiling == INTEL_TILING_Y &&
				403	(templ->bind & PIPE_BIND_RENDER_TARGET));
				404
				405	if (valign_4)
				406	assert(layout->block_size != 12);
				407
				408	layout->align_i = 4;
				409	layout->align_j = (valign_4) ? 4 : 2;
				410	}
				411
				412	/*
				413	* the fact that align i and j are multiples of block width and height
				414	* respectively is what makes the size of the bo a multiple of the block
				415	* size, slices start at block boundaries, and many of the computations
				416	* work.
				417	*/
				418	assert(layout->align_i % layout->block_width == 0);
				419	assert(layout->align_j % layout->block_height == 0);
				420
				421	/* make sure align() works */
				422	assert(util_is_power_of_two(layout->align_i) &&
				423	util_is_power_of_two(layout->align_j));
				424	assert(util_is_power_of_two(layout->block_width) &&
				425	util_is_power_of_two(layout->block_height));
				426	}
				427
				428	static unsigned
				429	layout_get_valid_tilings(const struct ilo_layout *layout,
				430	const struct ilo_layout_params *params)
				431	{
				432	const struct pipe_resource *templ = params->templ;
				433	const enum pipe_format format = layout->format;
				434	unsigned valid_tilings = LAYOUT_TILING_ALL;
				435
				436	/*
				437	* From the Sandy Bridge PRM, volume 1 part 2, page 32:
				438	*
				439	* "Display/Overlay Y-Major not supported.
				440	* X-Major required for Async Flips"
				441	*/
				442	if (unlikely(templ->bind & PIPE_BIND_SCANOUT))
				443	valid_tilings &= LAYOUT_TILING_X;
				444
				445	/*
				446	* From the Sandy Bridge PRM, volume 3 part 2, page 158:
				447	*
				448	* "The cursor surface address must be 4K byte aligned. The cursor must
				449	* be in linear memory, it cannot be tiled."
				450	*/
				451	if (unlikely(templ->bind & (PIPE_BIND_CURSOR \| PIPE_BIND_LINEAR)))
				452	valid_tilings &= LAYOUT_TILING_NONE;
				453
				454	/*
				455	* From the Sandy Bridge PRM, volume 2 part 1, page 318:
				456	*
				457	* "[DevSNB+]: This field (Tiled Surface) must be set to TRUE. Linear
				458	* Depth Buffer is not supported."
				459	*
				460	* "The Depth Buffer, if tiled, must use Y-Major tiling."
				461	*
				462	* From the Sandy Bridge PRM, volume 1 part 2, page 22:
				463	*
				464	* "W-Major Tile Format is used for separate stencil."
				465	*/
				466	if (templ->bind & PIPE_BIND_DEPTH_STENCIL) {
				467	switch (format) {
				468	case PIPE_FORMAT_S8_UINT:
				469	valid_tilings &= LAYOUT_TILING_W;
				470	break;
				471	default:
				472	valid_tilings &= LAYOUT_TILING_Y;
				473	break;
				474	}
				475	}
				476
				477	if (templ->bind & PIPE_BIND_RENDER_TARGET) {
				478	/*
				479	* From the Sandy Bridge PRM, volume 1 part 2, page 32:
				480	*
				481	* "NOTE: 128BPE Format Color buffer ( render target ) MUST be
				482	* either TileX or Linear."
				483	*/
				484	if (layout->block_size == 16)
				485	valid_tilings &= ~LAYOUT_TILING_Y;
				486
				487	/*
				488	* From the Ivy Bridge PRM, volume 4 part 1, page 63:
				489	*
				490	* "This field (Surface Vertical Aligment) must be set to VALIGN_4
				491	* for all tiled Y Render Target surfaces."
				492	*
				493	* "VALIGN_4 is not supported for surface format R32G32B32_FLOAT."
				494	*/
				495	if (params->dev->gen >= ILO_GEN(7) && layout->block_size == 12)
				496	valid_tilings &= ~LAYOUT_TILING_Y;
				497	}
				498
				499	/* no conflicting binding flags */
				500	assert(valid_tilings);
				501
				502	return valid_tilings;
				503	}
				504
				505	static void
				506	layout_init_tiling(struct ilo_layout *layout,
				507	struct ilo_layout_params *params)
				508	{
				509	const struct pipe_resource *templ = params->templ;
				510	unsigned valid_tilings = layout_get_valid_tilings(layout, params);
				511
				512	layout->valid_tilings = valid_tilings;
				513
				514	if (templ->bind & (PIPE_BIND_RENDER_TARGET \| PIPE_BIND_SAMPLER_VIEW)) {
				515	/*
				516	* heuristically set a minimum width/height for enabling tiling
				517	*/
				518	if (templ->width0 < 64 && (valid_tilings & ~LAYOUT_TILING_X))
				519	valid_tilings &= ~LAYOUT_TILING_X;
				520
				521	if ((templ->width0 < 32 \|\| templ->height0 < 16) &&
				522	(templ->width0 < 16 \|\| templ->height0 < 32) &&
				523	(valid_tilings & ~LAYOUT_TILING_Y))
				524	valid_tilings &= ~LAYOUT_TILING_Y;
				525	} else {
				526	/* force linear if we are not sure where the texture is bound to */
				527	if (valid_tilings & LAYOUT_TILING_NONE)
				528	valid_tilings &= LAYOUT_TILING_NONE;
				529	}
				530
				531	/* prefer tiled over linear */
				532	if (valid_tilings & LAYOUT_TILING_Y)
				533	layout->tiling = INTEL_TILING_Y;
				534	else if (valid_tilings & LAYOUT_TILING_X)
				535	layout->tiling = INTEL_TILING_X;
				536	else /* linear or W-tiled, which has no hardware support */
				537	layout->tiling = INTEL_TILING_NONE;
				538	}
				539
				540	static void
				541	layout_init_arrangements_gen7(struct ilo_layout *layout,
				542	struct ilo_layout_params *params)
				543	{
				544	const struct pipe_resource *templ = params->templ;
				545
				546	/*
				547	* It is not explicitly states, but render targets are expected to be
				548	* UMS/CMS (samples non-interleaved) and depth/stencil buffers are expected
				549	* to be IMS (samples interleaved).
				550	*
				551	* See "Multisampled Surface Storage Format" field of SURFACE_STATE.
				552	*/
				553	if (templ->bind & PIPE_BIND_DEPTH_STENCIL) {
				554	layout->interleaved_samples = true;
				555
				556	/*
				557	* From the Ivy Bridge PRM, volume 1 part 1, page 111:
				558	*
				559	* "note that the depth buffer and stencil buffer have an implied
				560	* value of ARYSPC_FULL"
				561	*/
				562	layout->full_layers = true;
				563	} else {
				564	layout->interleaved_samples = false;
				565
				566	/*
				567	* From the Ivy Bridge PRM, volume 4 part 1, page 66:
				568	*
				569	* "If Multisampled Surface Storage Format is MSFMT_MSS and Number
				570	* of Multisamples is not MULTISAMPLECOUNT_1, this field (Surface
				571	* Array Spacing) must be set to ARYSPC_LOD0."
				572	*
				573	* As multisampled resources are not mipmapped, we never use
				574	* ARYSPC_FULL for them.
				575	*/
				576	if (templ->nr_samples > 1)
				577	assert(templ->last_level == 0);
				578	layout->full_layers = (templ->last_level > 0);
				579	}
				580	}
				581
				582	static void
				583	layout_init_arrangements_gen6(struct ilo_layout *layout,
				584	struct ilo_layout_params *params)
				585	{
				586	/* GEN6 supports only interleaved samples */
				587	layout->interleaved_samples = true;
				588
				589	/*
				590	* From the Sandy Bridge PRM, volume 1 part 1, page 115:
				591	*
				592	* "The separate stencil buffer does not support mip mapping, thus the
				593	* storage for LODs other than LOD 0 is not needed. The following
				594	* QPitch equation applies only to the separate stencil buffer:
				595	*
				596	* QPitch = h_0"
				597	*
				598	* GEN6 does not support compact spacing otherwise.
				599	*/
				600	layout->full_layers = (layout->format != PIPE_FORMAT_S8_UINT);
				601	}
				602
				603	static void
				604	layout_init_arrangements(struct ilo_layout *layout,
				605	struct ilo_layout_params *params)
				606	{
				607	if (params->dev->gen >= ILO_GEN(7))
				608	layout_init_arrangements_gen7(layout, params);
				609	else
				610	layout_init_arrangements_gen6(layout, params);
				611
				612	layout->is_2d = (params->templ->target != PIPE_TEXTURE_3D);
				613	}
				614
				615	static void
				616	layout_init_format(struct ilo_layout *layout,
				617	struct ilo_layout_params *params)
				618	{
				619	const struct pipe_resource *templ = params->templ;
				620	enum pipe_format format = templ->format;
				621	bool require_separate_stencil;
				622
				623	/*
				624	* From the Sandy Bridge PRM, volume 2 part 1, page 317:
				625	*
				626	* "This field (Separate Stencil Buffer Enable) must be set to the same
				627	* value (enabled or disabled) as Hierarchical Depth Buffer Enable."
				628	*
				629	* GEN7+ requires separate stencil buffers.
				630	*/
				631	if (templ->bind & PIPE_BIND_DEPTH_STENCIL) {
				632	if (params->dev->gen >= ILO_GEN(7))
				633	require_separate_stencil = true;
				634	else
				635	require_separate_stencil = (layout->aux_type == ILO_LAYOUT_AUX_HIZ);
				636	}
				637
				638	switch (format) {
				639	case PIPE_FORMAT_ETC1_RGB8:
				640	format = PIPE_FORMAT_R8G8B8X8_UNORM;
				641	break;
				642	case PIPE_FORMAT_Z24_UNORM_S8_UINT:
				643	if (require_separate_stencil) {
				644	format = PIPE_FORMAT_Z24X8_UNORM;
				645	layout->separate_stencil = true;
				646	}
				647	break;
				648	case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
				649	if (require_separate_stencil) {
				650	format = PIPE_FORMAT_Z32_FLOAT;
				651	layout->separate_stencil = true;
				652	}
				653	break;
				654	default:
				655	break;
				656	}
				657
				658	params->compressed = util_format_is_compressed(format);
				659
				660	layout->format = format;
				661	layout->block_width = util_format_get_blockwidth(format);
				662	layout->block_height = util_format_get_blockheight(format);
				663	layout->block_size = util_format_get_blocksize(format);
				664	}
				665
				666	static bool
				667	layout_want_mcs(struct ilo_layout *layout,
				668	struct ilo_layout_params *params)
				669	{
				670	const struct pipe_resource *templ = params->templ;
				671	bool want_mcs = false;
				672
				673	/* MCS is for RT on GEN7+ */
				674	if (params->dev->gen < ILO_GEN(7))
				675	return false;
				676
				677	if (templ->target != PIPE_TEXTURE_2D \|\|
				678	!(templ->bind & PIPE_BIND_RENDER_TARGET))
				679	return false;
				680
				681	/*
				682	* From the Ivy Bridge PRM, volume 4 part 1, page 77:
				683	*
				684	* "For Render Target and Sampling Engine Surfaces:If the surface is
				685	* multisampled (Number of Multisamples any value other than
				686	* MULTISAMPLECOUNT_1), this field (MCS Enable) must be enabled."
				687	*
				688	* "This field must be set to 0 for all SINT MSRTs when all RT channels
				689	* are not written"
				690	*/
				691	if (templ->nr_samples > 1 && !layout->interleaved_samples &&
				692	!util_format_is_pure_sint(templ->format)) {
				693	want_mcs = true;
				694	} else if (templ->nr_samples <= 1) {
				695	/*
				696	* From the Ivy Bridge PRM, volume 2 part 1, page 326:
				697	*
				698	* "When MCS is buffer is used for color clear of non-multisampler
				699	* render target, the following restrictions apply.
				700	* - Support is limited to tiled render targets.
				701	* - Support is for non-mip-mapped and non-array surface types
				702	* only.
				703	* - Clear is supported only on the full RT; i.e., no partial clear
				704	* or overlapping clears.
				705	* - MCS buffer for non-MSRT is supported only for RT formats
				706	* 32bpp, 64bpp and 128bpp.
				707	* ..."
				708	*/
				709	if (layout->tiling != INTEL_TILING_NONE &&
				710	templ->last_level == 0 && templ->array_size == 1) {
				711	switch (layout->block_size) {
				712	case 4:
				713	case 8:
				714	case 16:
				715	want_mcs = true;
				716	break;
				717	default:
				718	break;
				719	}
				720	}
				721	}
				722
				723	return want_mcs;
				724	}
				725
				726	static bool
				727	layout_want_hiz(const struct ilo_layout *layout,
				728	const struct ilo_layout_params *params)
				729	{
				730	const struct pipe_resource *templ = params->templ;
				731	const struct util_format_description *desc =
				732	util_format_description(templ->format);
				733	bool want_hiz = false;
				734
				735	if (ilo_debug & ILO_DEBUG_NOHIZ)
				736	return false;
				737
				738	if (!(templ->bind & PIPE_BIND_DEPTH_STENCIL))
				739	return false;
				740
				741	if (!util_format_has_depth(desc))
				742	return false;
				743
				744	/* no point in having HiZ */
				745	if (templ->usage == PIPE_USAGE_STAGING)
				746	return false;
				747
				748	if (params->dev->gen >= ILO_GEN(7)) {
				749	want_hiz = true;
				750	} else {
				751	/*
				752	* From the Sandy Bridge PRM, volume 2 part 1, page 312:
				753	*
				754	* "The hierarchical depth buffer does not support the LOD field, it
				755	* is assumed by hardware to be zero. A separate hierarachical
				756	* depth buffer is required for each LOD used, and the
				757	* corresponding buffer's state delivered to hardware each time a
				758	* new depth buffer state with modified LOD is delivered."
				759	*
				760	* But we have a stronger requirement. Because of layer offsetting
				761	* (check out the callers of ilo_layout_get_slice_tile_offset()), we
				762	* already have to require the texture to be non-mipmapped and
				763	* non-array.
				764	*/
				765	if (templ->last_level == 0 && templ->array_size == 1 &&
				766	templ->depth0 == 1)
				767	want_hiz = true;
				768	}
				769
				770	return want_hiz;
				771	}
				772
				773	static void
				774	layout_init_aux(struct ilo_layout *layout,
				775	struct ilo_layout_params *params)
				776	{
				777	if (layout_want_hiz(layout, params))
				778	layout->aux_type = ILO_LAYOUT_AUX_HIZ;
				779	else if (layout_want_mcs(layout, params))
				780	layout->aux_type = ILO_LAYOUT_AUX_MCS;
				781	}
				782
				783	static void
				784	layout_align(struct ilo_layout layout, struct ilo_layout_params params)
				785	{
				786	const struct pipe_resource *templ = params->templ;
				787	int align_w = 1, align_h = 1, pad_h = 0;
				788
				789	/*
				790	* From the Sandy Bridge PRM, volume 1 part 1, page 118:
				791	*
				792	* "To determine the necessary padding on the bottom and right side of
				793	* the surface, refer to the table in Section 7.18.3.4 for the i and j
				794	* parameters for the surface format in use. The surface must then be
				795	* extended to the next multiple of the alignment unit size in each
				796	* dimension, and all texels contained in this extended surface must
				797	* have valid GTT entries."
				798	*
				799	* "For cube surfaces, an additional two rows of padding are required
				800	* at the bottom of the surface. This must be ensured regardless of
				801	* whether the surface is stored tiled or linear. This is due to the
				802	* potential rotation of cache line orientation from memory to cache."
				803	*
				804	* "For compressed textures (BC* and FXT1 surface formats), padding at
				805	* the bottom of the surface is to an even compressed row, which is
				806	* equal to a multiple of 8 uncompressed texel rows. Thus, for padding
				807	* purposes, these surfaces behave as if j = 8 only for surface
				808	* padding purposes. The value of 4 for j still applies for mip level
				809	* alignment and QPitch calculation."
				810	*/
				811	if (templ->bind & PIPE_BIND_SAMPLER_VIEW) {
				812	align_w = MAX2(align_w, layout->align_i);
				813	align_h = MAX2(align_h, layout->align_j);
				814
				815	if (templ->target == PIPE_TEXTURE_CUBE)
				816	pad_h += 2;
				817
				818	if (params->compressed)
				819	align_h = MAX2(align_h, layout->align_j * 2);
				820	}
				821
				822	/*
				823	* From the Sandy Bridge PRM, volume 1 part 1, page 118:
				824	*
				825	* "If the surface contains an odd number of rows of data, a final row
				826	* below the surface must be allocated."
				827	*/
				828	if (templ->bind & PIPE_BIND_RENDER_TARGET)
				829	align_h = MAX2(align_h, 2);
				830
				831	/*
				832	* Depth Buffer Clear/Resolve works in 8x4 sample blocks. In
				833	* ilo_texture_can_enable_hiz(), we always return true for the first slice.
				834	* To avoid out-of-bound access, we have to pad.
				835	*/
				836	if (layout->aux_type == ILO_LAYOUT_AUX_HIZ) {
				837	align_w = MAX2(align_w, 8);
				838	align_h = MAX2(align_h, 4);
				839	}
				840
				841	params->max_x = align(params->max_x, align_w);
				842	params->max_y = align(params->max_y + pad_h, align_h);
				843	}
				844
				845	/* note that this may force the texture to be linear */
				846	static void
				847	layout_calculate_bo_size(struct ilo_layout *layout,
				848	struct ilo_layout_params *params)
				849	{
				850	assert(params->max_x % layout->block_width == 0);
				851	assert(params->max_y % layout->block_height == 0);
				852	assert(layout->layer_height % layout->block_height == 0);
				853
				854	layout->bo_stride =
				855	(params->max_x / layout->block_width) * layout->block_size;
				856	layout->bo_height = params->max_y / layout->block_height;
				857
				858	while (true) {
				859	unsigned w = layout->bo_stride, h = layout->bo_height;
				860	unsigned align_w, align_h;
				861
				862	/*
				863	* From the Haswell PRM, volume 5, page 163:
				864	*
				865	* "For linear surfaces, additional padding of 64 bytes is required
				866	* at the bottom of the surface. This is in addition to the padding
				867	* required above."
				868	*/
				869	if (params->dev->gen >= ILO_GEN(7.5) &&
				870	(params->templ->bind & PIPE_BIND_SAMPLER_VIEW) &&
				871	layout->tiling == INTEL_TILING_NONE) {
				872	layout->bo_height +=
				873	(64 + layout->bo_stride - 1) / layout->bo_stride;
				874	}
				875
				876	/*
				877	* From the Sandy Bridge PRM, volume 4 part 1, page 81:
				878	*
				879	* "- For linear render target surfaces, the pitch must be a
				880	* multiple of the element size for non-YUV surface formats.
				881	* Pitch must be a multiple of 2 * element size for YUV surface
				882	* formats.
				883	* - For other linear surfaces, the pitch can be any multiple of
				884	* bytes.
				885	* - For tiled surfaces, the pitch must be a multiple of the tile
				886	* width."
				887	*
				888	* Different requirements may exist when the bo is used in different
				889	* places, but our alignments here should be good enough that we do not
				890	* need to check layout->templ->bind.
				891	*/
				892	switch (layout->tiling) {
				893	case INTEL_TILING_X:
				894	align_w = 512;
				895	align_h = 8;
				896	break;
				897	case INTEL_TILING_Y:
				898	align_w = 128;
				899	align_h = 32;
				900	break;
				901	default:
				902	if (layout->format == PIPE_FORMAT_S8_UINT) {
				903	/*
				904	* From the Sandy Bridge PRM, volume 1 part 2, page 22:
				905	*
				906	* "A 4KB tile is subdivided into 8-high by 8-wide array of
				907	* Blocks for W-Major Tiles (W Tiles). Each Block is 8 rows by 8
				908	* bytes."
				909	*
				910	* Since we asked for INTEL_TILING_NONE instead of the non-existent
				911	* INTEL_TILING_W, we want to align to W tiles here.
				912	*/
				913	align_w = 64;
				914	align_h = 64;
				915	} else {
				916	/* some good enough values */
				917	align_w = 64;
				918	align_h = 2;
				919	}
				920	break;
				921	}
				922
				923	w = align(w, align_w);
				924	h = align(h, align_h);
				925
				926	/* make sure the bo is mappable */
				927	if (layout->tiling != INTEL_TILING_NONE) {
				928	/*
				929	* Usually only the first 256MB of the GTT is mappable.
				930	*
				931	* See also how intel_context::max_gtt_map_object_size is calculated.
				932	*/
				933	const size_t mappable_gtt_size = 256 * 1024 * 1024;
				934
				935	/*
				936	* Be conservative. We may be able to switch from VALIGN_4 to
				937	* VALIGN_2 if the layout was Y-tiled, but let's keep it simple.
				938	*/
				939	if (mappable_gtt_size / w / 4 < h) {
				940	if (layout->valid_tilings & LAYOUT_TILING_NONE) {
				941	layout->tiling = INTEL_TILING_NONE;
				942	/* MCS support for non-MSRTs is limited to tiled RTs */
				943	if (layout->aux_type == ILO_LAYOUT_AUX_MCS &&
				944	params->templ->nr_samples <= 1)
				945	layout->aux_type = ILO_LAYOUT_AUX_NONE;
				946
				947	continue;
				948	} else {
				949	ilo_warn("cannot force texture to be linear\n");
				950	}
				951	}
				952	}
				953
				954	layout->bo_stride = w;
				955	layout->bo_height = h;
				956	break;
				957	}
				958	}
				959
				960	static void
				961	layout_calculate_hiz_size(struct ilo_layout *layout,
				962	struct ilo_layout_params *params)
				963	{
				964	const struct pipe_resource *templ = params->templ;
				965	const int hz_align_j = 8;
				966	int hz_width, hz_height;
				967
				968	assert(layout->aux_type == ILO_LAYOUT_AUX_HIZ);
				969
				970	/*
				971	* See the Sandy Bridge PRM, volume 2 part 1, page 312, and the Ivy Bridge
				972	* PRM, volume 2 part 1, page 312-313.
				973	*
				974	* It seems HiZ buffer is aligned to 8x8, with every two rows packed into a
				975	* memory row.
				976	*/
				977
				978	hz_width = align(layout->levels[0].slice_width, 16);
				979
				980	if (templ->target == PIPE_TEXTURE_3D) {
				981	unsigned lv;
				982
				983	hz_height = 0;
				984
				985	for (lv = 0; lv <= templ->last_level; lv++) {
				986	const unsigned h =
				987	align(layout->levels[lv].slice_height, hz_align_j);
				988	hz_height += h * u_minify(templ->depth0, lv);
				989	}
				990
				991	hz_height /= 2;
				992	} else {
				993	const unsigned h0 = align(params->h0, hz_align_j);
				994	unsigned hz_qpitch = h0;
				995
				996	if (layout->full_layers) {
				997	const unsigned h1 = align(params->h1, hz_align_j);
				998	const unsigned htail =
				999	((params->dev->gen >= ILO_GEN(7)) ? 12 : 11) * hz_align_j;
				1000
				1001	hz_qpitch += h1 + htail;
				1002	}
				1003
				1004	hz_height = hz_qpitch * templ->array_size / 2;
				1005
				1006	if (params->dev->gen >= ILO_GEN(7))
				1007	hz_height = align(hz_height, 8);
				1008	}
				1009
				1010	/* align to Y-tile */
				1011	layout->aux_stride = align(hz_width, 128);
				1012	layout->aux_height = align(hz_height, 32);
				1013	}
				1014
				1015	static void
				1016	layout_calculate_mcs_size(struct ilo_layout *layout,
				1017	struct ilo_layout_params *params)
				1018	{
				1019	const struct pipe_resource *templ = params->templ;
				1020	int mcs_width, mcs_height, mcs_cpp;
				1021	int downscale_x, downscale_y;
				1022
				1023	assert(layout->aux_type == ILO_LAYOUT_AUX_MCS);
				1024
				1025	if (templ->nr_samples > 1) {
				1026	/*
				1027	* From the Ivy Bridge PRM, volume 2 part 1, page 326, the clear
				1028	* rectangle is scaled down by 8x2 for 4X MSAA and 2x2 for 8X MSAA. The
				1029	* need of scale down could be that the clear rectangle is used to clear
				1030	* the MCS instead of the RT.
				1031	*
				1032	* For 8X MSAA, we need 32 bits in MCS for every pixel in the RT. The
				1033	* 2x2 factor could come from that the hardware writes 128 bits (an
				1034	* OWord) at a time, and the OWord in MCS maps to a 2x2 pixel block in
				1035	* the RT. For 4X MSAA, we need 8 bits in MCS for every pixel in the
				1036	* RT. Similarly, we could reason that an OWord in 4X MCS maps to a 8x2
				1037	* pixel block in the RT.
				1038	*/
				1039	switch (templ->nr_samples) {
				1040	case 2:
				1041	case 4:
				1042	downscale_x = 8;
				1043	downscale_y = 2;
				1044	mcs_cpp = 1;
				1045	break;
				1046	case 8:
				1047	downscale_x = 2;
				1048	downscale_y = 2;
				1049	mcs_cpp = 4;
				1050	break;
				1051	case 16:
				1052	downscale_x = 2;
				1053	downscale_y = 1;
				1054	mcs_cpp = 8;
				1055	break;
				1056	default:
				1057	assert(!"unsupported sample count");
				1058	return;
				1059	break;
				1060	}
				1061
				1062	/*
				1063	* It also appears that the 2x2 subspans generated by the scaled-down
				1064	* clear rectangle cannot be masked. The scale-down clear rectangle
				1065	* thus must be aligned to 2x2, and we need to pad.
				1066	*/
				1067	mcs_width = align(templ->width0, downscale_x * 2);
				1068	mcs_height = align(templ->height0, downscale_y * 2);
				1069	}
				1070	else {
				1071	/*
				1072	* From the Ivy Bridge PRM, volume 2 part 1, page 327:
				1073	*
				1074	* " Pixels Lines
				1075	* TiledY RT CL
				1076	* bpp
				1077	* 32 8 4
				1078	* 64 4 4
				1079	* 128 2 4
				1080	*
				1081	* TiledX RT CL
				1082	* bpp
				1083	* 32 16 2
				1084	* 64 8 2
				1085	* 128 4 2"
				1086	*
				1087	* This table and the two following tables define the RT alignments, the
				1088	* clear rectangle alignments, and the clear rectangle scale factors.
				1089	* Viewing the RT alignments as the sizes of 128-byte blocks, we can see
				1090	* that the clear rectangle alignments are 16x32 blocks, and the clear
				1091	* rectangle scale factors are 8x16 blocks.
				1092	*
				1093	* For non-MSAA RT, we need 1 bit in MCS for every 128-byte block in the
				1094	* RT. Similar to the MSAA cases, we can argue that an OWord maps to
				1095	* 8x16 blocks.
				1096	*
				1097	* One problem with this reasoning is that a Y-tile in MCS has 8x32
				1098	* OWords and maps to 64x512 128-byte blocks. This differs from i965,
				1099	* which says that a Y-tile maps to 128x256 blocks (\see
				1100	* intel_get_non_msrt_mcs_alignment). It does not really change
				1101	* anything except for the size of the allocated MCS. Let's see if we
				1102	* hit out-of-bound access.
				1103	*/
				1104	switch (layout->tiling) {
				1105	case INTEL_TILING_X:
				1106	downscale_x = 64 / layout->block_size;
				1107	downscale_y = 2;
				1108	break;
				1109	case INTEL_TILING_Y:
				1110	downscale_x = 32 / layout->block_size;
				1111	downscale_y = 4;
				1112	break;
				1113	default:
				1114	assert(!"unsupported tiling mode");
				1115	return;
				1116	break;
				1117	}
				1118
				1119	downscale_x *= 8;
				1120	downscale_y *= 16;
				1121
				1122	/*
				1123	* From the Haswell PRM, volume 7, page 652:
				1124	*
				1125	* "Clear rectangle must be aligned to two times the number of
				1126	* pixels in the table shown below due to 16X16 hashing across the
				1127	* slice."
				1128	*
				1129	* The scaled-down clear rectangle must be aligned to 4x4 instead of
				1130	* 2x2, and we need to pad.
				1131	*/
				1132	mcs_width = align(templ->width0, downscale_x * 4) / downscale_x;
				1133	mcs_height = align(templ->height0, downscale_y * 4) / downscale_y;
				1134	mcs_cpp = 16; /* an OWord */
				1135	}
				1136
				1137	/* align to Y-tile */
				1138	layout->aux_stride = align(mcs_width * mcs_cpp, 128);
				1139	layout->aux_height = align(mcs_height, 32);
				1140	}
				1141
				1142	/**
				1143	* The texutre is for transfer only. We can define our own layout to save
				1144	* space.
				1145	*/
				1146	static void
				1147	layout_init_for_transfer(struct ilo_layout *layout,
				1148	const struct ilo_dev_info *dev,
				1149	const struct pipe_resource *templ)
				1150	{
				1151	const unsigned num_layers = (templ->target == PIPE_TEXTURE_3D) ?
				1152	templ->depth0 : templ->array_size;
				1153	unsigned layer_width, layer_height;
				1154
				1155	assert(templ->last_level == 0);
				1156	assert(templ->nr_samples <= 1);
				1157
				1158	layout->block_width = util_format_get_blockwidth(templ->format);
				1159	layout->block_height = util_format_get_blockheight(templ->format);
				1160	layout->block_size = util_format_get_blocksize(templ->format);
				1161
				1162	layout->valid_tilings = LAYOUT_TILING_NONE;
				1163	layout->tiling = INTEL_TILING_NONE;
				1164
				1165	layout->align_i = layout->block_width;
				1166	layout->align_j = layout->block_height;
				1167
				1168	assert(util_is_power_of_two(layout->block_width) &&
				1169	util_is_power_of_two(layout->block_height));
				1170
				1171	/* use packed layout */
				1172	layer_width = align(templ->width0, layout->align_i);
				1173	layer_height = align(templ->height0, layout->align_j);
				1174
				1175	layout->levels[0].slice_width = layer_width;
				1176	layout->levels[0].slice_height = layer_height;
				1177	layout->layer_height = layer_height;
				1178
				1179	layout->bo_stride = (layer_width / layout->block_width) * layout->block_size;
				1180	layout->bo_stride = align(layout->bo_stride, 64);
				1181
				1182	layout->bo_height = (layer_height / layout->block_height) * num_layers;
				1183	}
				1184
				1185	/**
				1186	* Initialize the layout. Callers should zero-initialize \p layout first.
				1187	*/
				1188	void ilo_layout_init(struct ilo_layout *layout,
				1189	const struct ilo_dev_info *dev,
				1190	const struct pipe_resource *templ)
				1191	{
				1192	struct ilo_layout_params params;
				1193	bool transfer_only;
				1194
				1195	/* use transfer layout when the texture is never bound to GPU */
				1196	transfer_only = !(templ->bind & ~(PIPE_BIND_TRANSFER_WRITE \|
				1197	PIPE_BIND_TRANSFER_READ));
				1198	if (transfer_only && templ->last_level == 0 && templ->nr_samples <= 1) {
				1199	layout_init_for_transfer(layout, dev, templ);
				1200	return;
				1201	}
				1202
				1203	memset(&params, 0, sizeof(params));
				1204	params.dev = dev;
				1205	params.templ = templ;
				1206
				1207	/* note that there are dependencies between these functions */
				1208	layout_init_aux(layout, &params);
				1209	layout_init_format(layout, &params);
				1210	layout_init_arrangements(layout, &params);
				1211	layout_init_tiling(layout, &params);
				1212	layout_init_alignments(layout, &params);
				1213	layout_init_levels(layout, &params);
				1214	layout_init_layer_height(layout, &params);
				1215
				1216	layout_align(layout, &params);
				1217	layout_calculate_bo_size(layout, &params);
				1218
				1219	switch (layout->aux_type) {
				1220	case ILO_LAYOUT_AUX_HIZ:
				1221	layout_calculate_hiz_size(layout, &params);
				1222	break;
				1223	case ILO_LAYOUT_AUX_MCS:
				1224	layout_calculate_mcs_size(layout, &params);
				1225	break;
				1226	default:
				1227	break;
				1228	}
				1229	}
				1230
				1231	/**
				1232	* Update the tiling mode and bo stride (for imported resources).
				1233	*/
				1234	bool
				1235	ilo_layout_update_for_imported_bo(struct ilo_layout *layout,
				1236	enum intel_tiling_mode tiling,
				1237	unsigned bo_stride)
				1238	{
				1239	if (!(layout->valid_tilings & (1 << tiling)))
				1240	return false;
				1241
				1242	if ((tiling == INTEL_TILING_X && bo_stride % 512) \|\|
				1243	(tiling == INTEL_TILING_Y && bo_stride % 128))
				1244	return false;
				1245
				1246	layout->tiling = tiling;
				1247	layout->bo_stride = bo_stride;
				1248
				1249	return true;
				1250	}
				1251
				1252	/**
				1253	* Return the offset (in bytes) to a slice within the bo.
				1254	*
				1255	* The returned offset is aligned to tile size. Since slices are not
				1256	* guaranteed to start at tile boundaries, the X and Y offsets (in pixels)
				1257	* from the tile origin to the slice are also returned. X offset is always a
				1258	* multiple of 4 and Y offset is always a multiple of 2.
				1259	*/
				1260	unsigned
				1261	ilo_layout_get_slice_tile_offset(const struct ilo_layout *layout,
				1262	unsigned level, unsigned slice,
				1263	unsigned x_offset, unsigned y_offset)
				1264	{
				1265	unsigned tile_w, tile_h, tile_size, row_size;
				1266	unsigned tile_offset, x, y;
				1267
				1268	/* see the Sandy Bridge PRM, volume 1 part 2, page 24 */
				1269
				1270	switch (layout->tiling) {
				1271	case INTEL_TILING_NONE:
				1272	/* W-tiled */
				1273	if (layout->format == PIPE_FORMAT_S8_UINT) {
				1274	tile_w = 64;
				1275	tile_h = 64;
				1276	}
				1277	else {
				1278	tile_w = 1;
				1279	tile_h = 1;
				1280	}
				1281	break;
				1282	case INTEL_TILING_X:
				1283	tile_w = 512;
				1284	tile_h = 8;
				1285	break;
				1286	case INTEL_TILING_Y:
				1287	tile_w = 128;
				1288	tile_h = 32;
				1289	break;
				1290	default:
				1291	assert(!"unknown tiling");
				1292	tile_w = 1;
				1293	tile_h = 1;
				1294	break;
				1295	}
				1296
				1297	tile_size = tile_w * tile_h;
				1298	row_size = layout->bo_stride * tile_h;
				1299
				1300	ilo_layout_get_slice_pos(layout, level, slice, &x, &y);
				1301	/* in bytes */
				1302	ilo_layout_pos_to_mem(layout, x, y, &x, &y);
				1303	tile_offset = row_size * (y / tile_h) + tile_size * (x / tile_w);
				1304
				1305	/*
				1306	* Since tex->bo_stride is a multiple of tile_w, slice_offset should be
				1307	* aligned at this point.
				1308	*/
				1309	assert(tile_offset % tile_size == 0);
				1310
				1311	/*
				1312	* because of the possible values of align_i and align_j in
				1313	* tex_layout_init_alignments(), x_offset is guaranteed to be a multiple of
				1314	* 4 and y_offset is guaranteed to be a multiple of 2.
				1315	*/
				1316	if (x_offset) {
				1317	/* in pixels */
				1318	x = (x % tile_w) / layout->block_size * layout->block_width;
				1319	assert(x % 4 == 0);
				1320
				1321	*x_offset = x;
				1322	}
				1323
				1324	if (y_offset) {
				1325	/* in pixels */
				1326	y = (y % tile_h) * layout->block_height;
				1327	assert(y % 2 == 0);
				1328
				1329	*y_offset = y;
				1330	}
				1331
				1332	return tile_offset;
				1333	}