Blame - drivers/gpu/drm/i915/i915_gem_fence.c - kernel/msm-4.19

blob: f010391b87f5cd0803c7bc994f8b44fd68a5e9b6 [file] [log] [blame]

Daniel Vetter	41a36b7	2015-07-24 13:55:11 +0200	[diff] [blame]	1	/*
				2	* Copyright © 2008-2015 Intel Corporation
				3	*
				4	* Permission is hereby granted, free of charge, to any person obtaining a
				5	* copy of this software and associated documentation files (the "Software"),
				6	* to deal in the Software without restriction, including without limitation
				7	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
				8	* and/or sell copies of the Software, and to permit persons to whom the
				9	* Software is furnished to do so, subject to the following conditions:
				10	*
				11	* The above copyright notice and this permission notice (including the next
				12	* paragraph) shall be included in all copies or substantial portions of the
				13	* Software.
				14	*
				15	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				16	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				17	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
				18	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				19	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
				20	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
				21	* IN THE SOFTWARE.
				22	*/
				23
				24	#include <drm/drmP.h>
				25	#include <drm/i915_drm.h>
				26	#include "i915_drv.h"
				27
Daniel Vetter	a794f62	2015-07-24 17:40:12 +0200	[diff] [blame]	28	/**
				29	* DOC: fence register handling
				30	*
				31	* Important to avoid confusions: "fences" in the i915 driver are not execution
				32	* fences used to track command completion but hardware detiler objects which
				33	* wrap a given range of the global GTT. Each platform has only a fairly limited
				34	* set of these objects.
				35	*
				36	* Fences are used to detile GTT memory mappings. They're also connected to the
				37	* hardware frontbuffer render tracking and hence interract with frontbuffer
				38	* conmpression. Furthermore on older platforms fences are required for tiled
				39	* objects used by the display engine. They can also be used by the render
				40	* engine - they're required for blitter commands and are optional for render
				41	* commands. But on gen4+ both display (with the exception of fbc) and rendering
				42	* have their own tiling state bits and don't need fences.
				43	*
				44	* Also note that fences only support X and Y tiling and hence can't be used for
				45	* the fancier new tiling formats like W, Ys and Yf.
				46	*
				47	* Finally note that because fences are such a restricted resource they're
				48	* dynamically associated with objects. Furthermore fence state is committed to
				49	* the hardware lazily to avoid unecessary stalls on gen2/3. Therefore code must
				50	* explictly call i915_gem_object_get_fence() to synchronize fencing status
				51	* for cpu access. Also note that some code wants an unfenced view, for those
				52	* cases the fence can be removed forcefully with i915_gem_object_put_fence().
				53	*
				54	* Internally these functions will synchronize with userspace access by removing
				55	* CPU ptes into GTT mmaps (not the GTT ptes themselves) as needed.
				56	*/
				57
Daniel Vetter	41a36b7	2015-07-24 13:55:11 +0200	[diff] [blame]	58	static void i965_write_fence_reg(struct drm_device *dev, int reg,
				59	struct drm_i915_gem_object *obj)
				60	{
				61	struct drm_i915_private *dev_priv = dev->dev_private;
Ville Syrjälä	eecf613	2015-09-21 18:05:14 +0300	[diff] [blame]	62	int fence_reg_lo, fence_reg_hi;
Daniel Vetter	41a36b7	2015-07-24 13:55:11 +0200	[diff] [blame]	63	int fence_pitch_shift;
				64
				65	if (INTEL_INFO(dev)->gen >= 6) {
Ville Syrjälä	eecf613	2015-09-21 18:05:14 +0300	[diff] [blame]	66	fence_reg_lo = FENCE_REG_GEN6_LO(reg);
				67	fence_reg_hi = FENCE_REG_GEN6_HI(reg);
				68	fence_pitch_shift = GEN6_FENCE_PITCH_SHIFT;
Daniel Vetter	41a36b7	2015-07-24 13:55:11 +0200	[diff] [blame]	69	} else {
Ville Syrjälä	eecf613	2015-09-21 18:05:14 +0300	[diff] [blame]	70	fence_reg_lo = FENCE_REG_965_LO(reg);
				71	fence_reg_hi = FENCE_REG_965_HI(reg);
Daniel Vetter	41a36b7	2015-07-24 13:55:11 +0200	[diff] [blame]	72	fence_pitch_shift = I965_FENCE_PITCH_SHIFT;
				73	}
				74
Daniel Vetter	41a36b7	2015-07-24 13:55:11 +0200	[diff] [blame]	75	/* To w/a incoherency with non-atomic 64-bit register updates,
				76	* we split the 64-bit update into two 32-bit writes. In order
				77	* for a partial fence not to be evaluated between writes, we
				78	* precede the update with write to turn off the fence register,
				79	* and only enable the fence as the last step.
				80	*
				81	* For extra levels of paranoia, we make sure each step lands
				82	* before applying the next step.
				83	*/
Ville Syrjälä	eecf613	2015-09-21 18:05:14 +0300	[diff] [blame]	84	I915_WRITE(fence_reg_lo, 0);
				85	POSTING_READ(fence_reg_lo);
Daniel Vetter	41a36b7	2015-07-24 13:55:11 +0200	[diff] [blame]	86
				87	if (obj) {
				88	u32 size = i915_gem_obj_ggtt_size(obj);
				89	uint64_t val;
				90
				91	/* Adjust fence size to match tiled area */
				92	if (obj->tiling_mode != I915_TILING_NONE) {
				93	uint32_t row_size = obj->stride *
				94	(obj->tiling_mode == I915_TILING_Y ? 32 : 8);
				95	size = (size / row_size) * row_size;
				96	}
				97
				98	val = (uint64_t)((i915_gem_obj_ggtt_offset(obj) + size - 4096) &
				99	0xfffff000) << 32;
				100	val \|= i915_gem_obj_ggtt_offset(obj) & 0xfffff000;
				101	val \|= (uint64_t)((obj->stride / 128) - 1) << fence_pitch_shift;
				102	if (obj->tiling_mode == I915_TILING_Y)
				103	val \|= 1 << I965_FENCE_TILING_Y_SHIFT;
				104	val \|= I965_FENCE_REG_VALID;
				105
Ville Syrjälä	eecf613	2015-09-21 18:05:14 +0300	[diff] [blame]	106	I915_WRITE(fence_reg_hi, val >> 32);
				107	POSTING_READ(fence_reg_hi);
Daniel Vetter	41a36b7	2015-07-24 13:55:11 +0200	[diff] [blame]	108
Ville Syrjälä	eecf613	2015-09-21 18:05:14 +0300	[diff] [blame]	109	I915_WRITE(fence_reg_lo, val);
				110	POSTING_READ(fence_reg_lo);
Daniel Vetter	41a36b7	2015-07-24 13:55:11 +0200	[diff] [blame]	111	} else {
Ville Syrjälä	eecf613	2015-09-21 18:05:14 +0300	[diff] [blame]	112	I915_WRITE(fence_reg_hi, 0);
				113	POSTING_READ(fence_reg_hi);
Daniel Vetter	41a36b7	2015-07-24 13:55:11 +0200	[diff] [blame]	114	}
				115	}
				116
				117	static void i915_write_fence_reg(struct drm_device *dev, int reg,
				118	struct drm_i915_gem_object *obj)
				119	{
				120	struct drm_i915_private *dev_priv = dev->dev_private;
				121	u32 val;
				122
				123	if (obj) {
				124	u32 size = i915_gem_obj_ggtt_size(obj);
				125	int pitch_val;
				126	int tile_width;
				127
				128	WARN((i915_gem_obj_ggtt_offset(obj) & ~I915_FENCE_START_MASK) \|\|
				129	(size & -size) != size \|\|
				130	(i915_gem_obj_ggtt_offset(obj) & (size - 1)),
Michel Thierry	088e0df	2015-08-07 17:40:17 +0100	[diff] [blame]	131	"object 0x%08llx [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n",
Daniel Vetter	41a36b7	2015-07-24 13:55:11 +0200	[diff] [blame]	132	i915_gem_obj_ggtt_offset(obj), obj->map_and_fenceable, size);
				133
				134	if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev))
				135	tile_width = 128;
				136	else
				137	tile_width = 512;
				138
				139	/* Note: pitch better be a power of two tile widths */
				140	pitch_val = obj->stride / tile_width;
				141	pitch_val = ffs(pitch_val) - 1;
				142
				143	val = i915_gem_obj_ggtt_offset(obj);
				144	if (obj->tiling_mode == I915_TILING_Y)
				145	val \|= 1 << I830_FENCE_TILING_Y_SHIFT;
				146	val \|= I915_FENCE_SIZE_BITS(size);
				147	val \|= pitch_val << I830_FENCE_PITCH_SHIFT;
				148	val \|= I830_FENCE_REG_VALID;
				149	} else
				150	val = 0;
				151
Ville Syrjälä	eecf613	2015-09-21 18:05:14 +0300	[diff] [blame]	152	I915_WRITE(FENCE_REG(reg), val);
				153	POSTING_READ(FENCE_REG(reg));
Daniel Vetter	41a36b7	2015-07-24 13:55:11 +0200	[diff] [blame]	154	}
				155
				156	static void i830_write_fence_reg(struct drm_device *dev, int reg,
				157	struct drm_i915_gem_object *obj)
				158	{
				159	struct drm_i915_private *dev_priv = dev->dev_private;
				160	uint32_t val;
				161
				162	if (obj) {
				163	u32 size = i915_gem_obj_ggtt_size(obj);
				164	uint32_t pitch_val;
				165
				166	WARN((i915_gem_obj_ggtt_offset(obj) & ~I830_FENCE_START_MASK) \|\|
				167	(size & -size) != size \|\|
				168	(i915_gem_obj_ggtt_offset(obj) & (size - 1)),
Michel Thierry	088e0df	2015-08-07 17:40:17 +0100	[diff] [blame]	169	"object 0x%08llx not 512K or pot-size 0x%08x aligned\n",
Daniel Vetter	41a36b7	2015-07-24 13:55:11 +0200	[diff] [blame]	170	i915_gem_obj_ggtt_offset(obj), size);
				171
				172	pitch_val = obj->stride / 128;
				173	pitch_val = ffs(pitch_val) - 1;
				174
				175	val = i915_gem_obj_ggtt_offset(obj);
				176	if (obj->tiling_mode == I915_TILING_Y)
				177	val \|= 1 << I830_FENCE_TILING_Y_SHIFT;
				178	val \|= I830_FENCE_SIZE_BITS(size);
				179	val \|= pitch_val << I830_FENCE_PITCH_SHIFT;
				180	val \|= I830_FENCE_REG_VALID;
				181	} else
				182	val = 0;
				183
Ville Syrjälä	eecf613	2015-09-21 18:05:14 +0300	[diff] [blame]	184	I915_WRITE(FENCE_REG(reg), val);
				185	POSTING_READ(FENCE_REG(reg));
Daniel Vetter	41a36b7	2015-07-24 13:55:11 +0200	[diff] [blame]	186	}
				187
				188	inline static bool i915_gem_object_needs_mb(struct drm_i915_gem_object *obj)
				189	{
				190	return obj && obj->base.read_domains & I915_GEM_DOMAIN_GTT;
				191	}
				192
				193	static void i915_gem_write_fence(struct drm_device *dev, int reg,
				194	struct drm_i915_gem_object *obj)
				195	{
				196	struct drm_i915_private *dev_priv = dev->dev_private;
				197
				198	/* Ensure that all CPU reads are completed before installing a fence
				199	* and all writes before removing the fence.
				200	*/
				201	if (i915_gem_object_needs_mb(dev_priv->fence_regs[reg].obj))
				202	mb();
				203
				204	WARN(obj && (!obj->stride \|\| !obj->tiling_mode),
				205	"bogus fence setup with stride: 0x%x, tiling mode: %i\n",
				206	obj->stride, obj->tiling_mode);
				207
				208	if (IS_GEN2(dev))
				209	i830_write_fence_reg(dev, reg, obj);
				210	else if (IS_GEN3(dev))
				211	i915_write_fence_reg(dev, reg, obj);
				212	else if (INTEL_INFO(dev)->gen >= 4)
				213	i965_write_fence_reg(dev, reg, obj);
				214
				215	/* And similarly be paranoid that no direct access to this region
				216	* is reordered to before the fence is installed.
				217	*/
				218	if (i915_gem_object_needs_mb(obj))
				219	mb();
				220	}
				221
				222	static inline int fence_number(struct drm_i915_private *dev_priv,
				223	struct drm_i915_fence_reg *fence)
				224	{
				225	return fence - dev_priv->fence_regs;
				226	}
				227
				228	static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
				229	struct drm_i915_fence_reg *fence,
				230	bool enable)
				231	{
				232	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
				233	int reg = fence_number(dev_priv, fence);
				234
				235	i915_gem_write_fence(obj->base.dev, reg, enable ? obj : NULL);
				236
				237	if (enable) {
				238	obj->fence_reg = reg;
				239	fence->obj = obj;
				240	list_move_tail(&fence->lru_list, &dev_priv->mm.fence_list);
				241	} else {
				242	obj->fence_reg = I915_FENCE_REG_NONE;
				243	fence->obj = NULL;
				244	list_del_init(&fence->lru_list);
				245	}
				246	obj->fence_dirty = false;
				247	}
				248
				249	static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj)
				250	{
				251	if (obj->tiling_mode)
				252	i915_gem_release_mmap(obj);
				253
				254	/* As we do not have an associated fence register, we will force
				255	* a tiling change if we ever need to acquire one.
				256	*/
				257	obj->fence_dirty = false;
				258	obj->fence_reg = I915_FENCE_REG_NONE;
				259	}
				260
				261	static int
				262	i915_gem_object_wait_fence(struct drm_i915_gem_object *obj)
				263	{
				264	if (obj->last_fenced_req) {
				265	int ret = i915_wait_request(obj->last_fenced_req);
				266	if (ret)
				267	return ret;
				268
				269	i915_gem_request_assign(&obj->last_fenced_req, NULL);
				270	}
				271
				272	return 0;
				273	}
				274
Daniel Vetter	a794f62	2015-07-24 17:40:12 +0200	[diff] [blame]	275	/**
				276	* i915_gem_object_put_fence - force-remove fence for an object
				277	* @obj: object to map through a fence reg
				278	*
				279	* This function force-removes any fence from the given object, which is useful
				280	* if the kernel wants to do untiled GTT access.
				281	*
				282	* Returns:
				283	*
				284	* 0 on success, negative error code on failure.
				285	*/
Daniel Vetter	41a36b7	2015-07-24 13:55:11 +0200	[diff] [blame]	286	int
				287	i915_gem_object_put_fence(struct drm_i915_gem_object *obj)
				288	{
				289	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
				290	struct drm_i915_fence_reg *fence;
				291	int ret;
				292
				293	ret = i915_gem_object_wait_fence(obj);
				294	if (ret)
				295	return ret;
				296
				297	if (obj->fence_reg == I915_FENCE_REG_NONE)
				298	return 0;
				299
				300	fence = &dev_priv->fence_regs[obj->fence_reg];
				301
				302	if (WARN_ON(fence->pin_count))
				303	return -EBUSY;
				304
				305	i915_gem_object_fence_lost(obj);
				306	i915_gem_object_update_fence(obj, fence, false);
				307
				308	return 0;
				309	}
				310
				311	static struct drm_i915_fence_reg *
				312	i915_find_fence_reg(struct drm_device *dev)
				313	{
				314	struct drm_i915_private *dev_priv = dev->dev_private;
				315	struct drm_i915_fence_reg reg, avail;
				316	int i;
				317
				318	/* First try to find a free reg */
				319	avail = NULL;
Daniel Vetter	c668cde	2015-09-30 10:46:59 +0200	[diff] [blame]	320	for (i = 0; i < dev_priv->num_fence_regs; i++) {
Daniel Vetter	41a36b7	2015-07-24 13:55:11 +0200	[diff] [blame]	321	reg = &dev_priv->fence_regs[i];
				322	if (!reg->obj)
				323	return reg;
				324
				325	if (!reg->pin_count)
				326	avail = reg;
				327	}
				328
				329	if (avail == NULL)
				330	goto deadlock;
				331
				332	/* None available, try to steal one or wait for a user to finish */
				333	list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) {
				334	if (reg->pin_count)
				335	continue;
				336
				337	return reg;
				338	}
				339
				340	deadlock:
				341	/* Wait for completion of pending flips which consume fences */
				342	if (intel_has_pending_fb_unpin(dev))
				343	return ERR_PTR(-EAGAIN);
				344
				345	return ERR_PTR(-EDEADLK);
				346	}
				347
				348	/**
				349	* i915_gem_object_get_fence - set up fencing for an object
				350	* @obj: object to map through a fence reg
				351	*
				352	* When mapping objects through the GTT, userspace wants to be able to write
				353	* to them without having to worry about swizzling if the object is tiled.
				354	* This function walks the fence regs looking for a free one for @obj,
				355	* stealing one if it can't find any.
				356	*
				357	* It then sets up the reg based on the object's properties: address, pitch
				358	* and tiling format.
				359	*
				360	* For an untiled surface, this removes any existing fence.
Daniel Vetter	a794f62	2015-07-24 17:40:12 +0200	[diff] [blame]	361	*
				362	* Returns:
				363	*
				364	* 0 on success, negative error code on failure.
Daniel Vetter	41a36b7	2015-07-24 13:55:11 +0200	[diff] [blame]	365	*/
				366	int
				367	i915_gem_object_get_fence(struct drm_i915_gem_object *obj)
				368	{
				369	struct drm_device *dev = obj->base.dev;
				370	struct drm_i915_private *dev_priv = dev->dev_private;
				371	bool enable = obj->tiling_mode != I915_TILING_NONE;
				372	struct drm_i915_fence_reg *reg;
				373	int ret;
				374
				375	/* Have we updated the tiling parameters upon the object and so
				376	* will need to serialise the write to the associated fence register?
				377	*/
				378	if (obj->fence_dirty) {
				379	ret = i915_gem_object_wait_fence(obj);
				380	if (ret)
				381	return ret;
				382	}
				383
				384	/* Just update our place in the LRU if our fence is getting reused. */
				385	if (obj->fence_reg != I915_FENCE_REG_NONE) {
				386	reg = &dev_priv->fence_regs[obj->fence_reg];
				387	if (!obj->fence_dirty) {
				388	list_move_tail(&reg->lru_list,
				389	&dev_priv->mm.fence_list);
				390	return 0;
				391	}
				392	} else if (enable) {
				393	if (WARN_ON(!obj->map_and_fenceable))
				394	return -EINVAL;
				395
				396	reg = i915_find_fence_reg(dev);
				397	if (IS_ERR(reg))
				398	return PTR_ERR(reg);
				399
				400	if (reg->obj) {
				401	struct drm_i915_gem_object *old = reg->obj;
				402
				403	ret = i915_gem_object_wait_fence(old);
				404	if (ret)
				405	return ret;
				406
				407	i915_gem_object_fence_lost(old);
				408	}
				409	} else
				410	return 0;
				411
				412	i915_gem_object_update_fence(obj, reg, enable);
				413
				414	return 0;
				415	}
				416
Daniel Vetter	a794f62	2015-07-24 17:40:12 +0200	[diff] [blame]	417	/**
				418	* i915_gem_object_pin_fence - pin fencing state
				419	* @obj: object to pin fencing for
				420	*
				421	* This pins the fencing state (whether tiled or untiled) to make sure the
				422	* object is ready to be used as a scanout target. Fencing status must be
				423	* synchronize first by calling i915_gem_object_get_fence():
				424	*
				425	* The resulting fence pin reference must be released again with
				426	* i915_gem_object_unpin_fence().
				427	*
				428	* Returns:
				429	*
				430	* True if the object has a fence, false otherwise.
				431	*/
Daniel Vetter	41a36b7	2015-07-24 13:55:11 +0200	[diff] [blame]	432	bool
				433	i915_gem_object_pin_fence(struct drm_i915_gem_object *obj)
				434	{
				435	if (obj->fence_reg != I915_FENCE_REG_NONE) {
				436	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
				437	struct i915_vma *ggtt_vma = i915_gem_obj_to_ggtt(obj);
				438
				439	WARN_ON(!ggtt_vma \|\|
				440	dev_priv->fence_regs[obj->fence_reg].pin_count >
				441	ggtt_vma->pin_count);
				442	dev_priv->fence_regs[obj->fence_reg].pin_count++;
				443	return true;
				444	} else
				445	return false;
				446	}
				447
Daniel Vetter	a794f62	2015-07-24 17:40:12 +0200	[diff] [blame]	448	/**
				449	* i915_gem_object_unpin_fence - unpin fencing state
				450	* @obj: object to unpin fencing for
				451	*
				452	* This releases the fence pin reference acquired through
				453	* i915_gem_object_pin_fence. It will handle both objects with and without an
				454	* attached fence correctly, callers do not need to distinguish this.
				455	*/
Daniel Vetter	41a36b7	2015-07-24 13:55:11 +0200	[diff] [blame]	456	void
				457	i915_gem_object_unpin_fence(struct drm_i915_gem_object *obj)
				458	{
				459	if (obj->fence_reg != I915_FENCE_REG_NONE) {
				460	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
				461	WARN_ON(dev_priv->fence_regs[obj->fence_reg].pin_count <= 0);
				462	dev_priv->fence_regs[obj->fence_reg].pin_count--;
				463	}
				464	}
				465
Daniel Vetter	a794f62	2015-07-24 17:40:12 +0200	[diff] [blame]	466	/**
				467	* i915_gem_restore_fences - restore fence state
				468	* @dev: DRM device
				469	*
				470	* Restore the hw fence state to match the software tracking again, to be called
				471	* after a gpu reset and on resume.
				472	*/
Daniel Vetter	41a36b7	2015-07-24 13:55:11 +0200	[diff] [blame]	473	void i915_gem_restore_fences(struct drm_device *dev)
				474	{
				475	struct drm_i915_private *dev_priv = dev->dev_private;
				476	int i;
				477
				478	for (i = 0; i < dev_priv->num_fence_regs; i++) {
				479	struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
				480
				481	/*
				482	* Commit delayed tiling changes if we have an object still
				483	* attached to the fence, otherwise just clear the fence.
				484	*/
				485	if (reg->obj) {
				486	i915_gem_object_update_fence(reg->obj, reg,
				487	reg->obj->tiling_mode);
				488	} else {
				489	i915_gem_write_fence(dev, i, NULL);
				490	}
				491	}
				492	}
Daniel Vetter	7f96eca	2015-07-24 17:40:14 +0200	[diff] [blame]	493
				494	/**
Daniel Vetter	3271dca	2015-07-24 17:40:15 +0200	[diff] [blame]	495	* DOC: tiling swizzling details
Daniel Vetter	7f96eca	2015-07-24 17:40:14 +0200	[diff] [blame]	496	*
				497	* The idea behind tiling is to increase cache hit rates by rearranging
				498	* pixel data so that a group of pixel accesses are in the same cacheline.
				499	* Performance improvement from doing this on the back/depth buffer are on
				500	* the order of 30%.
				501	*
				502	* Intel architectures make this somewhat more complicated, though, by
				503	* adjustments made to addressing of data when the memory is in interleaved
				504	* mode (matched pairs of DIMMS) to improve memory bandwidth.
				505	* For interleaved memory, the CPU sends every sequential 64 bytes
				506	* to an alternate memory channel so it can get the bandwidth from both.
				507	*
				508	* The GPU also rearranges its accesses for increased bandwidth to interleaved
				509	* memory, and it matches what the CPU does for non-tiled. However, when tiled
				510	* it does it a little differently, since one walks addresses not just in the
				511	* X direction but also Y. So, along with alternating channels when bit
				512	* 6 of the address flips, it also alternates when other bits flip -- Bits 9
				513	* (every 512 bytes, an X tile scanline) and 10 (every two X tile scanlines)
				514	* are common to both the 915 and 965-class hardware.
				515	*
				516	* The CPU also sometimes XORs in higher bits as well, to improve
				517	* bandwidth doing strided access like we do so frequently in graphics. This
				518	* is called "Channel XOR Randomization" in the MCH documentation. The result
				519	* is that the CPU is XORing in either bit 11 or bit 17 to bit 6 of its address
				520	* decode.
				521	*
				522	* All of this bit 6 XORing has an effect on our memory management,
				523	* as we need to make sure that the 3d driver can correctly address object
				524	* contents.
				525	*
				526	* If we don't have interleaved memory, all tiling is safe and no swizzling is
				527	* required.
				528	*
				529	* When bit 17 is XORed in, we simply refuse to tile at all. Bit
				530	* 17 is not just a page offset, so as we page an objet out and back in,
				531	* individual pages in it will have different bit 17 addresses, resulting in
				532	* each 64 bytes being swapped with its neighbor!
				533	*
				534	* Otherwise, if interleaved, we have to tell the 3d driver what the address
				535	* swizzling it needs to do is, since it's writing with the CPU to the pages
				536	* (bit 6 and potentially bit 11 XORed in), and the GPU is reading from the
				537	* pages (bit 6, 9, and 10 XORed in), resulting in a cumulative bit swizzling
				538	* required by the CPU of XORing in bit 6, 9, 10, and potentially 11, in order
				539	* to match what the GPU expects.
				540	*/
				541
				542	/**
Daniel Vetter	3271dca	2015-07-24 17:40:15 +0200	[diff] [blame]	543	* i915_gem_detect_bit_6_swizzle - detect bit 6 swizzling pattern
				544	* @dev: DRM device
				545	*
Daniel Vetter	7f96eca	2015-07-24 17:40:14 +0200	[diff] [blame]	546	* Detects bit 6 swizzling of address lookup between IGD access and CPU
				547	* access through main memory.
				548	*/
				549	void
				550	i915_gem_detect_bit_6_swizzle(struct drm_device *dev)
				551	{
				552	struct drm_i915_private *dev_priv = dev->dev_private;
				553	uint32_t swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN;
				554	uint32_t swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
				555
				556	if (INTEL_INFO(dev)->gen >= 8 \|\| IS_VALLEYVIEW(dev)) {
				557	/*
				558	* On BDW+, swizzling is not used. We leave the CPU memory
				559	* controller in charge of optimizing memory accesses without
				560	* the extra address manipulation GPU side.
				561	*
				562	* VLV and CHV don't have GPU swizzling.
				563	*/
				564	swizzle_x = I915_BIT_6_SWIZZLE_NONE;
				565	swizzle_y = I915_BIT_6_SWIZZLE_NONE;
				566	} else if (INTEL_INFO(dev)->gen >= 6) {
				567	if (dev_priv->preserve_bios_swizzle) {
				568	if (I915_READ(DISP_ARB_CTL) &
				569	DISP_TILE_SURFACE_SWIZZLING) {
				570	swizzle_x = I915_BIT_6_SWIZZLE_9_10;
				571	swizzle_y = I915_BIT_6_SWIZZLE_9;
				572	} else {
				573	swizzle_x = I915_BIT_6_SWIZZLE_NONE;
				574	swizzle_y = I915_BIT_6_SWIZZLE_NONE;
				575	}
				576	} else {
				577	uint32_t dimm_c0, dimm_c1;
				578	dimm_c0 = I915_READ(MAD_DIMM_C0);
				579	dimm_c1 = I915_READ(MAD_DIMM_C1);
				580	dimm_c0 &= MAD_DIMM_A_SIZE_MASK \| MAD_DIMM_B_SIZE_MASK;
				581	dimm_c1 &= MAD_DIMM_A_SIZE_MASK \| MAD_DIMM_B_SIZE_MASK;
				582	/* Enable swizzling when the channels are populated
				583	* with identically sized dimms. We don't need to check
				584	* the 3rd channel because no cpu with gpu attached
				585	* ships in that configuration. Also, swizzling only
				586	* makes sense for 2 channels anyway. */
				587	if (dimm_c0 == dimm_c1) {
				588	swizzle_x = I915_BIT_6_SWIZZLE_9_10;
				589	swizzle_y = I915_BIT_6_SWIZZLE_9;
				590	} else {
				591	swizzle_x = I915_BIT_6_SWIZZLE_NONE;
				592	swizzle_y = I915_BIT_6_SWIZZLE_NONE;
				593	}
				594	}
				595	} else if (IS_GEN5(dev)) {
				596	/* On Ironlake whatever DRAM config, GPU always do
				597	* same swizzling setup.
				598	*/
				599	swizzle_x = I915_BIT_6_SWIZZLE_9_10;
				600	swizzle_y = I915_BIT_6_SWIZZLE_9;
				601	} else if (IS_GEN2(dev)) {
				602	/* As far as we know, the 865 doesn't have these bit 6
				603	* swizzling issues.
				604	*/
				605	swizzle_x = I915_BIT_6_SWIZZLE_NONE;
				606	swizzle_y = I915_BIT_6_SWIZZLE_NONE;
				607	} else if (IS_MOBILE(dev) \|\| (IS_GEN3(dev) && !IS_G33(dev))) {
				608	uint32_t dcc;
				609
				610	/* On 9xx chipsets, channel interleave by the CPU is
				611	* determined by DCC. For single-channel, neither the CPU
				612	* nor the GPU do swizzling. For dual channel interleaved,
				613	* the GPU's interleave is bit 9 and 10 for X tiled, and bit
				614	* 9 for Y tiled. The CPU's interleave is independent, and
				615	* can be based on either bit 11 (haven't seen this yet) or
				616	* bit 17 (common).
				617	*/
				618	dcc = I915_READ(DCC);
				619	switch (dcc & DCC_ADDRESSING_MODE_MASK) {
				620	case DCC_ADDRESSING_MODE_SINGLE_CHANNEL:
				621	case DCC_ADDRESSING_MODE_DUAL_CHANNEL_ASYMMETRIC:
				622	swizzle_x = I915_BIT_6_SWIZZLE_NONE;
				623	swizzle_y = I915_BIT_6_SWIZZLE_NONE;
				624	break;
				625	case DCC_ADDRESSING_MODE_DUAL_CHANNEL_INTERLEAVED:
				626	if (dcc & DCC_CHANNEL_XOR_DISABLE) {
				627	/* This is the base swizzling by the GPU for
				628	* tiled buffers.
				629	*/
				630	swizzle_x = I915_BIT_6_SWIZZLE_9_10;
				631	swizzle_y = I915_BIT_6_SWIZZLE_9;
				632	} else if ((dcc & DCC_CHANNEL_XOR_BIT_17) == 0) {
				633	/* Bit 11 swizzling by the CPU in addition. */
				634	swizzle_x = I915_BIT_6_SWIZZLE_9_10_11;
				635	swizzle_y = I915_BIT_6_SWIZZLE_9_11;
				636	} else {
				637	/* Bit 17 swizzling by the CPU in addition. */
				638	swizzle_x = I915_BIT_6_SWIZZLE_9_10_17;
				639	swizzle_y = I915_BIT_6_SWIZZLE_9_17;
				640	}
				641	break;
				642	}
				643
				644	/* check for L-shaped memory aka modified enhanced addressing */
Chris Wilson	0b466dc2	2015-11-19 09:58:05 +0000	[diff] [blame]	645	if (IS_GEN4(dev) &&
				646	!(I915_READ(DCC2) & DCC2_MODIFIED_ENHANCED_DISABLE)) {
				647	swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN;
				648	swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
Daniel Vetter	7f96eca	2015-07-24 17:40:14 +0200	[diff] [blame]	649	}
				650
				651	if (dcc == 0xffffffff) {
				652	DRM_ERROR("Couldn't read from MCHBAR. "
				653	"Disabling tiling.\n");
				654	swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN;
				655	swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
				656	}
				657	} else {
				658	/* The 965, G33, and newer, have a very flexible memory
				659	* configuration. It will enable dual-channel mode
				660	* (interleaving) on as much memory as it can, and the GPU
				661	* will additionally sometimes enable different bit 6
				662	* swizzling for tiled objects from the CPU.
				663	*
				664	* Here's what I found on the G965:
				665	* slot fill memory size swizzling
				666	* 0A 0B 1A 1B 1-ch 2-ch
				667	* 512 0 0 0 512 0 O
				668	* 512 0 512 0 16 1008 X
				669	* 512 0 0 512 16 1008 X
				670	* 0 512 0 512 16 1008 X
				671	* 1024 1024 1024 0 2048 1024 O
				672	*
				673	* We could probably detect this based on either the DRB
				674	* matching, which was the case for the swizzling required in
				675	* the table above, or from the 1-ch value being less than
				676	* the minimum size of a rank.
Chris Wilson	0b466dc2	2015-11-19 09:58:05 +0000	[diff] [blame]	677	*
				678	* Reports indicate that the swizzling actually
				679	* varies depending upon page placement inside the
				680	* channels, i.e. we see swizzled pages where the
				681	* banks of memory are paired and unswizzled on the
				682	* uneven portion, so leave that as unknown.
Daniel Vetter	7f96eca	2015-07-24 17:40:14 +0200	[diff] [blame]	683	*/
Chris Wilson	0b466dc2	2015-11-19 09:58:05 +0000	[diff] [blame]	684	if (I915_READ16(C0DRB3) == I915_READ16(C1DRB3)) {
Daniel Vetter	7f96eca	2015-07-24 17:40:14 +0200	[diff] [blame]	685	swizzle_x = I915_BIT_6_SWIZZLE_9_10;
				686	swizzle_y = I915_BIT_6_SWIZZLE_9;
				687	}
				688	}
				689
Chris Wilson	0b466dc2	2015-11-19 09:58:05 +0000	[diff] [blame]	690	if (swizzle_x == I915_BIT_6_SWIZZLE_UNKNOWN \|\|
				691	swizzle_y == I915_BIT_6_SWIZZLE_UNKNOWN) {
				692	/* Userspace likes to explode if it sees unknown swizzling,
				693	* so lie. We will finish the lie when reporting through
				694	* the get-tiling-ioctl by reporting the physical swizzle
				695	* mode as unknown instead.
				696	*
				697	* As we don't strictly know what the swizzling is, it may be
				698	* bit17 dependent, and so we need to also prevent the pages
				699	* from being moved.
				700	*/
				701	dev_priv->quirks \|= QUIRK_PIN_SWIZZLED_PAGES;
				702	swizzle_x = I915_BIT_6_SWIZZLE_NONE;
				703	swizzle_y = I915_BIT_6_SWIZZLE_NONE;
				704	}
				705
Daniel Vetter	7f96eca	2015-07-24 17:40:14 +0200	[diff] [blame]	706	dev_priv->mm.bit_6_swizzle_x = swizzle_x;
				707	dev_priv->mm.bit_6_swizzle_y = swizzle_y;
				708	}
				709
Daniel Vetter	3271dca	2015-07-24 17:40:15 +0200	[diff] [blame]	710	/*
Daniel Vetter	7f96eca	2015-07-24 17:40:14 +0200	[diff] [blame]	711	* Swap every 64 bytes of this page around, to account for it having a new
				712	* bit 17 of its physical address and therefore being interpreted differently
				713	* by the GPU.
				714	*/
				715	static void
				716	i915_gem_swizzle_page(struct page *page)
				717	{
				718	char temp[64];
				719	char *vaddr;
				720	int i;
				721
				722	vaddr = kmap(page);
				723
				724	for (i = 0; i < PAGE_SIZE; i += 128) {
				725	memcpy(temp, &vaddr[i], 64);
				726	memcpy(&vaddr[i], &vaddr[i + 64], 64);
				727	memcpy(&vaddr[i + 64], temp, 64);
				728	}
				729
				730	kunmap(page);
				731	}
				732
Daniel Vetter	3271dca	2015-07-24 17:40:15 +0200	[diff] [blame]	733	/**
				734	* i915_gem_object_do_bit_17_swizzle - fixup bit 17 swizzling
				735	* @obj: i915 GEM buffer object
				736	*
				737	* This function fixes up the swizzling in case any page frame number for this
				738	* object has changed in bit 17 since that state has been saved with
				739	* i915_gem_object_save_bit_17_swizzle().
				740	*
				741	* This is called when pinning backing storage again, since the kernel is free
				742	* to move unpinned backing storage around (either by directly moving pages or
				743	* by swapping them out and back in again).
				744	*/
Daniel Vetter	7f96eca	2015-07-24 17:40:14 +0200	[diff] [blame]	745	void
				746	i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj)
				747	{
				748	struct sg_page_iter sg_iter;
				749	int i;
				750
				751	if (obj->bit_17 == NULL)
				752	return;
				753
				754	i = 0;
				755	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) {
				756	struct page *page = sg_page_iter_page(&sg_iter);
				757	char new_bit_17 = page_to_phys(page) >> 17;
				758	if ((new_bit_17 & 0x1) !=
				759	(test_bit(i, obj->bit_17) != 0)) {
				760	i915_gem_swizzle_page(page);
				761	set_page_dirty(page);
				762	}
				763	i++;
				764	}
				765	}
				766
Daniel Vetter	3271dca	2015-07-24 17:40:15 +0200	[diff] [blame]	767	/**
				768	* i915_gem_object_save_bit_17_swizzle - save bit 17 swizzling
				769	* @obj: i915 GEM buffer object
				770	*
				771	* This function saves the bit 17 of each page frame number so that swizzling
				772	* can be fixed up later on with i915_gem_object_do_bit_17_swizzle(). This must
				773	* be called before the backing storage can be unpinned.
				774	*/
Daniel Vetter	7f96eca	2015-07-24 17:40:14 +0200	[diff] [blame]	775	void
				776	i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj)
				777	{
				778	struct sg_page_iter sg_iter;
				779	int page_count = obj->base.size >> PAGE_SHIFT;
				780	int i;
				781
				782	if (obj->bit_17 == NULL) {
				783	obj->bit_17 = kcalloc(BITS_TO_LONGS(page_count),
				784	sizeof(long), GFP_KERNEL);
				785	if (obj->bit_17 == NULL) {
				786	DRM_ERROR("Failed to allocate memory for bit 17 "
				787	"record\n");
				788	return;
				789	}
				790	}
				791
				792	i = 0;
				793	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) {
				794	if (page_to_phys(sg_page_iter_page(&sg_iter)) & (1 << 17))
				795	__set_bit(i, obj->bit_17);
				796	else
				797	__clear_bit(i, obj->bit_17);
				798	i++;
				799	}
				800	}