Blame - drivers/gpu/drm/i915/i915_gem_fence.c - kernel/msm-5.4

blob: 6f7249b00d4c4a83666517fc8222c26e782536a9 [file] [log] [blame]

Daniel Vetter	41a36b7	2015-07-24 13:55:11 +0200	[diff] [blame]	1	/*
				2	* Copyright © 2008-2015 Intel Corporation
				3	*
				4	* Permission is hereby granted, free of charge, to any person obtaining a
				5	* copy of this software and associated documentation files (the "Software"),
				6	* to deal in the Software without restriction, including without limitation
				7	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
				8	* and/or sell copies of the Software, and to permit persons to whom the
				9	* Software is furnished to do so, subject to the following conditions:
				10	*
				11	* The above copyright notice and this permission notice (including the next
				12	* paragraph) shall be included in all copies or substantial portions of the
				13	* Software.
				14	*
				15	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				16	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				17	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
				18	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				19	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
				20	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
				21	* IN THE SOFTWARE.
				22	*/
				23
				24	#include <drm/drmP.h>
				25	#include <drm/i915_drm.h>
				26	#include "i915_drv.h"
				27
Daniel Vetter	a794f62	2015-07-24 17:40:12 +0200	[diff] [blame]	28	/**
				29	* DOC: fence register handling
				30	*
				31	* Important to avoid confusions: "fences" in the i915 driver are not execution
				32	* fences used to track command completion but hardware detiler objects which
				33	* wrap a given range of the global GTT. Each platform has only a fairly limited
				34	* set of these objects.
				35	*
				36	* Fences are used to detile GTT memory mappings. They're also connected to the
Masanari Iida	34fd3e1	2016-01-05 12:29:17 +0900	[diff] [blame^]	37	* hardware frontbuffer render tracking and hence interact with frontbuffer
				38	* compression. Furthermore on older platforms fences are required for tiled
Daniel Vetter	a794f62	2015-07-24 17:40:12 +0200	[diff] [blame]	39	* objects used by the display engine. They can also be used by the render
				40	* engine - they're required for blitter commands and are optional for render
				41	* commands. But on gen4+ both display (with the exception of fbc) and rendering
				42	* have their own tiling state bits and don't need fences.
				43	*
				44	* Also note that fences only support X and Y tiling and hence can't be used for
				45	* the fancier new tiling formats like W, Ys and Yf.
				46	*
				47	* Finally note that because fences are such a restricted resource they're
				48	* dynamically associated with objects. Furthermore fence state is committed to
Masanari Iida	34fd3e1	2016-01-05 12:29:17 +0900	[diff] [blame^]	49	* the hardware lazily to avoid unnecessary stalls on gen2/3. Therefore code must
				50	* explicitly call i915_gem_object_get_fence() to synchronize fencing status
Daniel Vetter	a794f62	2015-07-24 17:40:12 +0200	[diff] [blame]	51	* for cpu access. Also note that some code wants an unfenced view, for those
				52	* cases the fence can be removed forcefully with i915_gem_object_put_fence().
				53	*
				54	* Internally these functions will synchronize with userspace access by removing
				55	* CPU ptes into GTT mmaps (not the GTT ptes themselves) as needed.
				56	*/
				57
Daniel Vetter	41a36b7	2015-07-24 13:55:11 +0200	[diff] [blame]	58	static void i965_write_fence_reg(struct drm_device *dev, int reg,
				59	struct drm_i915_gem_object *obj)
				60	{
				61	struct drm_i915_private *dev_priv = dev->dev_private;
				62	int fence_reg;
				63	int fence_pitch_shift;
				64
				65	if (INTEL_INFO(dev)->gen >= 6) {
				66	fence_reg = FENCE_REG_SANDYBRIDGE_0;
				67	fence_pitch_shift = SANDYBRIDGE_FENCE_PITCH_SHIFT;
				68	} else {
				69	fence_reg = FENCE_REG_965_0;
				70	fence_pitch_shift = I965_FENCE_PITCH_SHIFT;
				71	}
				72
				73	fence_reg += reg * 8;
				74
				75	/* To w/a incoherency with non-atomic 64-bit register updates,
				76	* we split the 64-bit update into two 32-bit writes. In order
				77	* for a partial fence not to be evaluated between writes, we
				78	* precede the update with write to turn off the fence register,
				79	* and only enable the fence as the last step.
				80	*
				81	* For extra levels of paranoia, we make sure each step lands
				82	* before applying the next step.
				83	*/
				84	I915_WRITE(fence_reg, 0);
				85	POSTING_READ(fence_reg);
				86
				87	if (obj) {
				88	u32 size = i915_gem_obj_ggtt_size(obj);
				89	uint64_t val;
				90
				91	/* Adjust fence size to match tiled area */
				92	if (obj->tiling_mode != I915_TILING_NONE) {
				93	uint32_t row_size = obj->stride *
				94	(obj->tiling_mode == I915_TILING_Y ? 32 : 8);
				95	size = (size / row_size) * row_size;
				96	}
				97
				98	val = (uint64_t)((i915_gem_obj_ggtt_offset(obj) + size - 4096) &
				99	0xfffff000) << 32;
				100	val \|= i915_gem_obj_ggtt_offset(obj) & 0xfffff000;
				101	val \|= (uint64_t)((obj->stride / 128) - 1) << fence_pitch_shift;
				102	if (obj->tiling_mode == I915_TILING_Y)
				103	val \|= 1 << I965_FENCE_TILING_Y_SHIFT;
				104	val \|= I965_FENCE_REG_VALID;
				105
				106	I915_WRITE(fence_reg + 4, val >> 32);
				107	POSTING_READ(fence_reg + 4);
				108
				109	I915_WRITE(fence_reg + 0, val);
				110	POSTING_READ(fence_reg);
				111	} else {
				112	I915_WRITE(fence_reg + 4, 0);
				113	POSTING_READ(fence_reg + 4);
				114	}
				115	}
				116
				117	static void i915_write_fence_reg(struct drm_device *dev, int reg,
				118	struct drm_i915_gem_object *obj)
				119	{
				120	struct drm_i915_private *dev_priv = dev->dev_private;
				121	u32 val;
				122
				123	if (obj) {
				124	u32 size = i915_gem_obj_ggtt_size(obj);
				125	int pitch_val;
				126	int tile_width;
				127
				128	WARN((i915_gem_obj_ggtt_offset(obj) & ~I915_FENCE_START_MASK) \|\|
				129	(size & -size) != size \|\|
				130	(i915_gem_obj_ggtt_offset(obj) & (size - 1)),
				131	"object 0x%08lx [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n",
				132	i915_gem_obj_ggtt_offset(obj), obj->map_and_fenceable, size);
				133
				134	if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev))
				135	tile_width = 128;
				136	else
				137	tile_width = 512;
				138
				139	/* Note: pitch better be a power of two tile widths */
				140	pitch_val = obj->stride / tile_width;
				141	pitch_val = ffs(pitch_val) - 1;
				142
				143	val = i915_gem_obj_ggtt_offset(obj);
				144	if (obj->tiling_mode == I915_TILING_Y)
				145	val \|= 1 << I830_FENCE_TILING_Y_SHIFT;
				146	val \|= I915_FENCE_SIZE_BITS(size);
				147	val \|= pitch_val << I830_FENCE_PITCH_SHIFT;
				148	val \|= I830_FENCE_REG_VALID;
				149	} else
				150	val = 0;
				151
				152	if (reg < 8)
				153	reg = FENCE_REG_830_0 + reg * 4;
				154	else
				155	reg = FENCE_REG_945_8 + (reg - 8) * 4;
				156
				157	I915_WRITE(reg, val);
				158	POSTING_READ(reg);
				159	}
				160
				161	static void i830_write_fence_reg(struct drm_device *dev, int reg,
				162	struct drm_i915_gem_object *obj)
				163	{
				164	struct drm_i915_private *dev_priv = dev->dev_private;
				165	uint32_t val;
				166
				167	if (obj) {
				168	u32 size = i915_gem_obj_ggtt_size(obj);
				169	uint32_t pitch_val;
				170
				171	WARN((i915_gem_obj_ggtt_offset(obj) & ~I830_FENCE_START_MASK) \|\|
				172	(size & -size) != size \|\|
				173	(i915_gem_obj_ggtt_offset(obj) & (size - 1)),
				174	"object 0x%08lx not 512K or pot-size 0x%08x aligned\n",
				175	i915_gem_obj_ggtt_offset(obj), size);
				176
				177	pitch_val = obj->stride / 128;
				178	pitch_val = ffs(pitch_val) - 1;
				179
				180	val = i915_gem_obj_ggtt_offset(obj);
				181	if (obj->tiling_mode == I915_TILING_Y)
				182	val \|= 1 << I830_FENCE_TILING_Y_SHIFT;
				183	val \|= I830_FENCE_SIZE_BITS(size);
				184	val \|= pitch_val << I830_FENCE_PITCH_SHIFT;
				185	val \|= I830_FENCE_REG_VALID;
				186	} else
				187	val = 0;
				188
				189	I915_WRITE(FENCE_REG_830_0 + reg * 4, val);
				190	POSTING_READ(FENCE_REG_830_0 + reg * 4);
				191	}
				192
				193	inline static bool i915_gem_object_needs_mb(struct drm_i915_gem_object *obj)
				194	{
				195	return obj && obj->base.read_domains & I915_GEM_DOMAIN_GTT;
				196	}
				197
				198	static void i915_gem_write_fence(struct drm_device *dev, int reg,
				199	struct drm_i915_gem_object *obj)
				200	{
				201	struct drm_i915_private *dev_priv = dev->dev_private;
				202
				203	/* Ensure that all CPU reads are completed before installing a fence
				204	* and all writes before removing the fence.
				205	*/
				206	if (i915_gem_object_needs_mb(dev_priv->fence_regs[reg].obj))
				207	mb();
				208
				209	WARN(obj && (!obj->stride \|\| !obj->tiling_mode),
				210	"bogus fence setup with stride: 0x%x, tiling mode: %i\n",
				211	obj->stride, obj->tiling_mode);
				212
				213	if (IS_GEN2(dev))
				214	i830_write_fence_reg(dev, reg, obj);
				215	else if (IS_GEN3(dev))
				216	i915_write_fence_reg(dev, reg, obj);
				217	else if (INTEL_INFO(dev)->gen >= 4)
				218	i965_write_fence_reg(dev, reg, obj);
				219
				220	/* And similarly be paranoid that no direct access to this region
				221	* is reordered to before the fence is installed.
				222	*/
				223	if (i915_gem_object_needs_mb(obj))
				224	mb();
				225	}
				226
				227	static inline int fence_number(struct drm_i915_private *dev_priv,
				228	struct drm_i915_fence_reg *fence)
				229	{
				230	return fence - dev_priv->fence_regs;
				231	}
				232
				233	static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
				234	struct drm_i915_fence_reg *fence,
				235	bool enable)
				236	{
				237	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
				238	int reg = fence_number(dev_priv, fence);
				239
				240	i915_gem_write_fence(obj->base.dev, reg, enable ? obj : NULL);
				241
				242	if (enable) {
				243	obj->fence_reg = reg;
				244	fence->obj = obj;
				245	list_move_tail(&fence->lru_list, &dev_priv->mm.fence_list);
				246	} else {
				247	obj->fence_reg = I915_FENCE_REG_NONE;
				248	fence->obj = NULL;
				249	list_del_init(&fence->lru_list);
				250	}
				251	obj->fence_dirty = false;
				252	}
				253
				254	static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj)
				255	{
				256	if (obj->tiling_mode)
				257	i915_gem_release_mmap(obj);
				258
				259	/* As we do not have an associated fence register, we will force
				260	* a tiling change if we ever need to acquire one.
				261	*/
				262	obj->fence_dirty = false;
				263	obj->fence_reg = I915_FENCE_REG_NONE;
				264	}
				265
				266	static int
				267	i915_gem_object_wait_fence(struct drm_i915_gem_object *obj)
				268	{
				269	if (obj->last_fenced_req) {
				270	int ret = i915_wait_request(obj->last_fenced_req);
				271	if (ret)
				272	return ret;
				273
				274	i915_gem_request_assign(&obj->last_fenced_req, NULL);
				275	}
				276
				277	return 0;
				278	}
				279
Daniel Vetter	a794f62	2015-07-24 17:40:12 +0200	[diff] [blame]	280	/**
				281	* i915_gem_object_put_fence - force-remove fence for an object
				282	* @obj: object to map through a fence reg
				283	*
				284	* This function force-removes any fence from the given object, which is useful
				285	* if the kernel wants to do untiled GTT access.
				286	*
				287	* Returns:
				288	*
				289	* 0 on success, negative error code on failure.
				290	*/
Daniel Vetter	41a36b7	2015-07-24 13:55:11 +0200	[diff] [blame]	291	int
				292	i915_gem_object_put_fence(struct drm_i915_gem_object *obj)
				293	{
				294	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
				295	struct drm_i915_fence_reg *fence;
				296	int ret;
				297
				298	ret = i915_gem_object_wait_fence(obj);
				299	if (ret)
				300	return ret;
				301
				302	if (obj->fence_reg == I915_FENCE_REG_NONE)
				303	return 0;
				304
				305	fence = &dev_priv->fence_regs[obj->fence_reg];
				306
				307	if (WARN_ON(fence->pin_count))
				308	return -EBUSY;
				309
				310	i915_gem_object_fence_lost(obj);
				311	i915_gem_object_update_fence(obj, fence, false);
				312
				313	return 0;
				314	}
				315
				316	static struct drm_i915_fence_reg *
				317	i915_find_fence_reg(struct drm_device *dev)
				318	{
				319	struct drm_i915_private *dev_priv = dev->dev_private;
				320	struct drm_i915_fence_reg reg, avail;
				321	int i;
				322
				323	/* First try to find a free reg */
				324	avail = NULL;
				325	for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) {
				326	reg = &dev_priv->fence_regs[i];
				327	if (!reg->obj)
				328	return reg;
				329
				330	if (!reg->pin_count)
				331	avail = reg;
				332	}
				333
				334	if (avail == NULL)
				335	goto deadlock;
				336
				337	/* None available, try to steal one or wait for a user to finish */
				338	list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) {
				339	if (reg->pin_count)
				340	continue;
				341
				342	return reg;
				343	}
				344
				345	deadlock:
				346	/* Wait for completion of pending flips which consume fences */
				347	if (intel_has_pending_fb_unpin(dev))
				348	return ERR_PTR(-EAGAIN);
				349
				350	return ERR_PTR(-EDEADLK);
				351	}
				352
				353	/**
				354	* i915_gem_object_get_fence - set up fencing for an object
				355	* @obj: object to map through a fence reg
				356	*
				357	* When mapping objects through the GTT, userspace wants to be able to write
				358	* to them without having to worry about swizzling if the object is tiled.
				359	* This function walks the fence regs looking for a free one for @obj,
				360	* stealing one if it can't find any.
				361	*
				362	* It then sets up the reg based on the object's properties: address, pitch
				363	* and tiling format.
				364	*
				365	* For an untiled surface, this removes any existing fence.
Daniel Vetter	a794f62	2015-07-24 17:40:12 +0200	[diff] [blame]	366	*
				367	* Returns:
				368	*
				369	* 0 on success, negative error code on failure.
Daniel Vetter	41a36b7	2015-07-24 13:55:11 +0200	[diff] [blame]	370	*/
				371	int
				372	i915_gem_object_get_fence(struct drm_i915_gem_object *obj)
				373	{
				374	struct drm_device *dev = obj->base.dev;
				375	struct drm_i915_private *dev_priv = dev->dev_private;
				376	bool enable = obj->tiling_mode != I915_TILING_NONE;
				377	struct drm_i915_fence_reg *reg;
				378	int ret;
				379
				380	/* Have we updated the tiling parameters upon the object and so
				381	* will need to serialise the write to the associated fence register?
				382	*/
				383	if (obj->fence_dirty) {
				384	ret = i915_gem_object_wait_fence(obj);
				385	if (ret)
				386	return ret;
				387	}
				388
				389	/* Just update our place in the LRU if our fence is getting reused. */
				390	if (obj->fence_reg != I915_FENCE_REG_NONE) {
				391	reg = &dev_priv->fence_regs[obj->fence_reg];
				392	if (!obj->fence_dirty) {
				393	list_move_tail(&reg->lru_list,
				394	&dev_priv->mm.fence_list);
				395	return 0;
				396	}
				397	} else if (enable) {
				398	if (WARN_ON(!obj->map_and_fenceable))
				399	return -EINVAL;
				400
				401	reg = i915_find_fence_reg(dev);
				402	if (IS_ERR(reg))
				403	return PTR_ERR(reg);
				404
				405	if (reg->obj) {
				406	struct drm_i915_gem_object *old = reg->obj;
				407
				408	ret = i915_gem_object_wait_fence(old);
				409	if (ret)
				410	return ret;
				411
				412	i915_gem_object_fence_lost(old);
				413	}
				414	} else
				415	return 0;
				416
				417	i915_gem_object_update_fence(obj, reg, enable);
				418
				419	return 0;
				420	}
				421
Daniel Vetter	a794f62	2015-07-24 17:40:12 +0200	[diff] [blame]	422	/**
				423	* i915_gem_object_pin_fence - pin fencing state
				424	* @obj: object to pin fencing for
				425	*
				426	* This pins the fencing state (whether tiled or untiled) to make sure the
				427	* object is ready to be used as a scanout target. Fencing status must be
				428	* synchronize first by calling i915_gem_object_get_fence():
				429	*
				430	* The resulting fence pin reference must be released again with
				431	* i915_gem_object_unpin_fence().
				432	*
				433	* Returns:
				434	*
				435	* True if the object has a fence, false otherwise.
				436	*/
Daniel Vetter	41a36b7	2015-07-24 13:55:11 +0200	[diff] [blame]	437	bool
				438	i915_gem_object_pin_fence(struct drm_i915_gem_object *obj)
				439	{
				440	if (obj->fence_reg != I915_FENCE_REG_NONE) {
				441	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
				442	struct i915_vma *ggtt_vma = i915_gem_obj_to_ggtt(obj);
				443
				444	WARN_ON(!ggtt_vma \|\|
				445	dev_priv->fence_regs[obj->fence_reg].pin_count >
				446	ggtt_vma->pin_count);
				447	dev_priv->fence_regs[obj->fence_reg].pin_count++;
				448	return true;
				449	} else
				450	return false;
				451	}
				452
Daniel Vetter	a794f62	2015-07-24 17:40:12 +0200	[diff] [blame]	453	/**
				454	* i915_gem_object_unpin_fence - unpin fencing state
				455	* @obj: object to unpin fencing for
				456	*
				457	* This releases the fence pin reference acquired through
				458	* i915_gem_object_pin_fence. It will handle both objects with and without an
				459	* attached fence correctly, callers do not need to distinguish this.
				460	*/
Daniel Vetter	41a36b7	2015-07-24 13:55:11 +0200	[diff] [blame]	461	void
				462	i915_gem_object_unpin_fence(struct drm_i915_gem_object *obj)
				463	{
				464	if (obj->fence_reg != I915_FENCE_REG_NONE) {
				465	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
				466	WARN_ON(dev_priv->fence_regs[obj->fence_reg].pin_count <= 0);
				467	dev_priv->fence_regs[obj->fence_reg].pin_count--;
				468	}
				469	}
				470
Daniel Vetter	a794f62	2015-07-24 17:40:12 +0200	[diff] [blame]	471	/**
				472	* i915_gem_restore_fences - restore fence state
				473	* @dev: DRM device
				474	*
				475	* Restore the hw fence state to match the software tracking again, to be called
				476	* after a gpu reset and on resume.
				477	*/
Daniel Vetter	41a36b7	2015-07-24 13:55:11 +0200	[diff] [blame]	478	void i915_gem_restore_fences(struct drm_device *dev)
				479	{
				480	struct drm_i915_private *dev_priv = dev->dev_private;
				481	int i;
				482
				483	for (i = 0; i < dev_priv->num_fence_regs; i++) {
				484	struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
				485
				486	/*
				487	* Commit delayed tiling changes if we have an object still
				488	* attached to the fence, otherwise just clear the fence.
				489	*/
				490	if (reg->obj) {
				491	i915_gem_object_update_fence(reg->obj, reg,
				492	reg->obj->tiling_mode);
				493	} else {
				494	i915_gem_write_fence(dev, i, NULL);
				495	}
				496	}
				497	}
Daniel Vetter	7f96eca	2015-07-24 17:40:14 +0200	[diff] [blame]	498
				499	/**
Daniel Vetter	3271dca	2015-07-24 17:40:15 +0200	[diff] [blame]	500	* DOC: tiling swizzling details
Daniel Vetter	7f96eca	2015-07-24 17:40:14 +0200	[diff] [blame]	501	*
				502	* The idea behind tiling is to increase cache hit rates by rearranging
				503	* pixel data so that a group of pixel accesses are in the same cacheline.
				504	* Performance improvement from doing this on the back/depth buffer are on
				505	* the order of 30%.
				506	*
				507	* Intel architectures make this somewhat more complicated, though, by
				508	* adjustments made to addressing of data when the memory is in interleaved
				509	* mode (matched pairs of DIMMS) to improve memory bandwidth.
				510	* For interleaved memory, the CPU sends every sequential 64 bytes
				511	* to an alternate memory channel so it can get the bandwidth from both.
				512	*
				513	* The GPU also rearranges its accesses for increased bandwidth to interleaved
				514	* memory, and it matches what the CPU does for non-tiled. However, when tiled
				515	* it does it a little differently, since one walks addresses not just in the
				516	* X direction but also Y. So, along with alternating channels when bit
				517	* 6 of the address flips, it also alternates when other bits flip -- Bits 9
				518	* (every 512 bytes, an X tile scanline) and 10 (every two X tile scanlines)
				519	* are common to both the 915 and 965-class hardware.
				520	*
				521	* The CPU also sometimes XORs in higher bits as well, to improve
				522	* bandwidth doing strided access like we do so frequently in graphics. This
				523	* is called "Channel XOR Randomization" in the MCH documentation. The result
				524	* is that the CPU is XORing in either bit 11 or bit 17 to bit 6 of its address
				525	* decode.
				526	*
				527	* All of this bit 6 XORing has an effect on our memory management,
				528	* as we need to make sure that the 3d driver can correctly address object
				529	* contents.
				530	*
				531	* If we don't have interleaved memory, all tiling is safe and no swizzling is
				532	* required.
				533	*
				534	* When bit 17 is XORed in, we simply refuse to tile at all. Bit
Masanari Iida	34fd3e1	2016-01-05 12:29:17 +0900	[diff] [blame^]	535	* 17 is not just a page offset, so as we page an object out and back in,
Daniel Vetter	7f96eca	2015-07-24 17:40:14 +0200	[diff] [blame]	536	* individual pages in it will have different bit 17 addresses, resulting in
				537	* each 64 bytes being swapped with its neighbor!
				538	*
				539	* Otherwise, if interleaved, we have to tell the 3d driver what the address
				540	* swizzling it needs to do is, since it's writing with the CPU to the pages
				541	* (bit 6 and potentially bit 11 XORed in), and the GPU is reading from the
				542	* pages (bit 6, 9, and 10 XORed in), resulting in a cumulative bit swizzling
				543	* required by the CPU of XORing in bit 6, 9, 10, and potentially 11, in order
				544	* to match what the GPU expects.
				545	*/
				546
				547	/**
Daniel Vetter	3271dca	2015-07-24 17:40:15 +0200	[diff] [blame]	548	* i915_gem_detect_bit_6_swizzle - detect bit 6 swizzling pattern
				549	* @dev: DRM device
				550	*
Daniel Vetter	7f96eca	2015-07-24 17:40:14 +0200	[diff] [blame]	551	* Detects bit 6 swizzling of address lookup between IGD access and CPU
				552	* access through main memory.
				553	*/
				554	void
				555	i915_gem_detect_bit_6_swizzle(struct drm_device *dev)
				556	{
				557	struct drm_i915_private *dev_priv = dev->dev_private;
				558	uint32_t swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN;
				559	uint32_t swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
				560
				561	if (INTEL_INFO(dev)->gen >= 8 \|\| IS_VALLEYVIEW(dev)) {
				562	/*
				563	* On BDW+, swizzling is not used. We leave the CPU memory
				564	* controller in charge of optimizing memory accesses without
				565	* the extra address manipulation GPU side.
				566	*
				567	* VLV and CHV don't have GPU swizzling.
				568	*/
				569	swizzle_x = I915_BIT_6_SWIZZLE_NONE;
				570	swizzle_y = I915_BIT_6_SWIZZLE_NONE;
				571	} else if (INTEL_INFO(dev)->gen >= 6) {
				572	if (dev_priv->preserve_bios_swizzle) {
				573	if (I915_READ(DISP_ARB_CTL) &
				574	DISP_TILE_SURFACE_SWIZZLING) {
				575	swizzle_x = I915_BIT_6_SWIZZLE_9_10;
				576	swizzle_y = I915_BIT_6_SWIZZLE_9;
				577	} else {
				578	swizzle_x = I915_BIT_6_SWIZZLE_NONE;
				579	swizzle_y = I915_BIT_6_SWIZZLE_NONE;
				580	}
				581	} else {
				582	uint32_t dimm_c0, dimm_c1;
				583	dimm_c0 = I915_READ(MAD_DIMM_C0);
				584	dimm_c1 = I915_READ(MAD_DIMM_C1);
				585	dimm_c0 &= MAD_DIMM_A_SIZE_MASK \| MAD_DIMM_B_SIZE_MASK;
				586	dimm_c1 &= MAD_DIMM_A_SIZE_MASK \| MAD_DIMM_B_SIZE_MASK;
				587	/* Enable swizzling when the channels are populated
				588	* with identically sized dimms. We don't need to check
				589	* the 3rd channel because no cpu with gpu attached
				590	* ships in that configuration. Also, swizzling only
				591	* makes sense for 2 channels anyway. */
				592	if (dimm_c0 == dimm_c1) {
				593	swizzle_x = I915_BIT_6_SWIZZLE_9_10;
				594	swizzle_y = I915_BIT_6_SWIZZLE_9;
				595	} else {
				596	swizzle_x = I915_BIT_6_SWIZZLE_NONE;
				597	swizzle_y = I915_BIT_6_SWIZZLE_NONE;
				598	}
				599	}
				600	} else if (IS_GEN5(dev)) {
				601	/* On Ironlake whatever DRAM config, GPU always do
				602	* same swizzling setup.
				603	*/
				604	swizzle_x = I915_BIT_6_SWIZZLE_9_10;
				605	swizzle_y = I915_BIT_6_SWIZZLE_9;
				606	} else if (IS_GEN2(dev)) {
				607	/* As far as we know, the 865 doesn't have these bit 6
				608	* swizzling issues.
				609	*/
				610	swizzle_x = I915_BIT_6_SWIZZLE_NONE;
				611	swizzle_y = I915_BIT_6_SWIZZLE_NONE;
				612	} else if (IS_MOBILE(dev) \|\| (IS_GEN3(dev) && !IS_G33(dev))) {
				613	uint32_t dcc;
				614
				615	/* On 9xx chipsets, channel interleave by the CPU is
				616	* determined by DCC. For single-channel, neither the CPU
				617	* nor the GPU do swizzling. For dual channel interleaved,
				618	* the GPU's interleave is bit 9 and 10 for X tiled, and bit
				619	* 9 for Y tiled. The CPU's interleave is independent, and
				620	* can be based on either bit 11 (haven't seen this yet) or
				621	* bit 17 (common).
				622	*/
				623	dcc = I915_READ(DCC);
				624	switch (dcc & DCC_ADDRESSING_MODE_MASK) {
				625	case DCC_ADDRESSING_MODE_SINGLE_CHANNEL:
				626	case DCC_ADDRESSING_MODE_DUAL_CHANNEL_ASYMMETRIC:
				627	swizzle_x = I915_BIT_6_SWIZZLE_NONE;
				628	swizzle_y = I915_BIT_6_SWIZZLE_NONE;
				629	break;
				630	case DCC_ADDRESSING_MODE_DUAL_CHANNEL_INTERLEAVED:
				631	if (dcc & DCC_CHANNEL_XOR_DISABLE) {
				632	/* This is the base swizzling by the GPU for
				633	* tiled buffers.
				634	*/
				635	swizzle_x = I915_BIT_6_SWIZZLE_9_10;
				636	swizzle_y = I915_BIT_6_SWIZZLE_9;
				637	} else if ((dcc & DCC_CHANNEL_XOR_BIT_17) == 0) {
				638	/* Bit 11 swizzling by the CPU in addition. */
				639	swizzle_x = I915_BIT_6_SWIZZLE_9_10_11;
				640	swizzle_y = I915_BIT_6_SWIZZLE_9_11;
				641	} else {
				642	/* Bit 17 swizzling by the CPU in addition. */
				643	swizzle_x = I915_BIT_6_SWIZZLE_9_10_17;
				644	swizzle_y = I915_BIT_6_SWIZZLE_9_17;
				645	}
				646	break;
				647	}
				648
				649	/* check for L-shaped memory aka modified enhanced addressing */
				650	if (IS_GEN4(dev)) {
				651	uint32_t ddc2 = I915_READ(DCC2);
				652
				653	if (!(ddc2 & DCC2_MODIFIED_ENHANCED_DISABLE))
				654	dev_priv->quirks \|= QUIRK_PIN_SWIZZLED_PAGES;
				655	}
				656
				657	if (dcc == 0xffffffff) {
				658	DRM_ERROR("Couldn't read from MCHBAR. "
				659	"Disabling tiling.\n");
				660	swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN;
				661	swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
				662	}
				663	} else {
				664	/* The 965, G33, and newer, have a very flexible memory
				665	* configuration. It will enable dual-channel mode
				666	* (interleaving) on as much memory as it can, and the GPU
				667	* will additionally sometimes enable different bit 6
				668	* swizzling for tiled objects from the CPU.
				669	*
				670	* Here's what I found on the G965:
				671	* slot fill memory size swizzling
				672	* 0A 0B 1A 1B 1-ch 2-ch
				673	* 512 0 0 0 512 0 O
				674	* 512 0 512 0 16 1008 X
				675	* 512 0 0 512 16 1008 X
				676	* 0 512 0 512 16 1008 X
				677	* 1024 1024 1024 0 2048 1024 O
				678	*
				679	* We could probably detect this based on either the DRB
				680	* matching, which was the case for the swizzling required in
				681	* the table above, or from the 1-ch value being less than
				682	* the minimum size of a rank.
				683	*/
				684	if (I915_READ16(C0DRB3) != I915_READ16(C1DRB3)) {
				685	swizzle_x = I915_BIT_6_SWIZZLE_NONE;
				686	swizzle_y = I915_BIT_6_SWIZZLE_NONE;
				687	} else {
				688	swizzle_x = I915_BIT_6_SWIZZLE_9_10;
				689	swizzle_y = I915_BIT_6_SWIZZLE_9;
				690	}
				691	}
				692
				693	dev_priv->mm.bit_6_swizzle_x = swizzle_x;
				694	dev_priv->mm.bit_6_swizzle_y = swizzle_y;
				695	}
				696
Daniel Vetter	3271dca	2015-07-24 17:40:15 +0200	[diff] [blame]	697	/*
Daniel Vetter	7f96eca	2015-07-24 17:40:14 +0200	[diff] [blame]	698	* Swap every 64 bytes of this page around, to account for it having a new
				699	* bit 17 of its physical address and therefore being interpreted differently
				700	* by the GPU.
				701	*/
				702	static void
				703	i915_gem_swizzle_page(struct page *page)
				704	{
				705	char temp[64];
				706	char *vaddr;
				707	int i;
				708
				709	vaddr = kmap(page);
				710
				711	for (i = 0; i < PAGE_SIZE; i += 128) {
				712	memcpy(temp, &vaddr[i], 64);
				713	memcpy(&vaddr[i], &vaddr[i + 64], 64);
				714	memcpy(&vaddr[i + 64], temp, 64);
				715	}
				716
				717	kunmap(page);
				718	}
				719
Daniel Vetter	3271dca	2015-07-24 17:40:15 +0200	[diff] [blame]	720	/**
				721	* i915_gem_object_do_bit_17_swizzle - fixup bit 17 swizzling
				722	* @obj: i915 GEM buffer object
				723	*
				724	* This function fixes up the swizzling in case any page frame number for this
				725	* object has changed in bit 17 since that state has been saved with
				726	* i915_gem_object_save_bit_17_swizzle().
				727	*
				728	* This is called when pinning backing storage again, since the kernel is free
				729	* to move unpinned backing storage around (either by directly moving pages or
				730	* by swapping them out and back in again).
				731	*/
Daniel Vetter	7f96eca	2015-07-24 17:40:14 +0200	[diff] [blame]	732	void
				733	i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj)
				734	{
				735	struct sg_page_iter sg_iter;
				736	int i;
				737
				738	if (obj->bit_17 == NULL)
				739	return;
				740
				741	i = 0;
				742	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) {
				743	struct page *page = sg_page_iter_page(&sg_iter);
				744	char new_bit_17 = page_to_phys(page) >> 17;
				745	if ((new_bit_17 & 0x1) !=
				746	(test_bit(i, obj->bit_17) != 0)) {
				747	i915_gem_swizzle_page(page);
				748	set_page_dirty(page);
				749	}
				750	i++;
				751	}
				752	}
				753
Daniel Vetter	3271dca	2015-07-24 17:40:15 +0200	[diff] [blame]	754	/**
				755	* i915_gem_object_save_bit_17_swizzle - save bit 17 swizzling
				756	* @obj: i915 GEM buffer object
				757	*
				758	* This function saves the bit 17 of each page frame number so that swizzling
				759	* can be fixed up later on with i915_gem_object_do_bit_17_swizzle(). This must
				760	* be called before the backing storage can be unpinned.
				761	*/
Daniel Vetter	7f96eca	2015-07-24 17:40:14 +0200	[diff] [blame]	762	void
				763	i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj)
				764	{
				765	struct sg_page_iter sg_iter;
				766	int page_count = obj->base.size >> PAGE_SHIFT;
				767	int i;
				768
				769	if (obj->bit_17 == NULL) {
				770	obj->bit_17 = kcalloc(BITS_TO_LONGS(page_count),
				771	sizeof(long), GFP_KERNEL);
				772	if (obj->bit_17 == NULL) {
				773	DRM_ERROR("Failed to allocate memory for bit 17 "
				774	"record\n");
				775	return;
				776	}
				777	}
				778
				779	i = 0;
				780	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) {
				781	if (page_to_phys(sg_page_iter_page(&sg_iter)) & (1 << 17))
				782	__set_bit(i, obj->bit_17);
				783	else
				784	__clear_bit(i, obj->bit_17);
				785	i++;
				786	}
				787	}