Blame - drivers/gpu/drm/i915/i915_gem_tiling.c - kernel/msm-4.9

blob: 0c1b3a0834e100b8227ad010c54bcf6d4e62480d [file] [log] [blame]

Eric Anholt	673a394	2008-07-30 12:06:12 -0700	[diff] [blame^]	1	/*
				2	* Copyright © 2008 Intel Corporation
				3	*
				4	* Permission is hereby granted, free of charge, to any person obtaining a
				5	* copy of this software and associated documentation files (the "Software"),
				6	* to deal in the Software without restriction, including without limitation
				7	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
				8	* and/or sell copies of the Software, and to permit persons to whom the
				9	* Software is furnished to do so, subject to the following conditions:
				10	*
				11	* The above copyright notice and this permission notice (including the next
				12	* paragraph) shall be included in all copies or substantial portions of the
				13	* Software.
				14	*
				15	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				16	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				17	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
				18	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				19	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
				20	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
				21	* IN THE SOFTWARE.
				22	*
				23	* Authors:
				24	* Eric Anholt <eric@anholt.net>
				25	*
				26	*/
				27
				28	#include "drmP.h"
				29	#include "drm.h"
				30	#include "i915_drm.h"
				31	#include "i915_drv.h"
				32
				33	/** @file i915_gem_tiling.c
				34	*
				35	* Support for managing tiling state of buffer objects.
				36	*
				37	* The idea behind tiling is to increase cache hit rates by rearranging
				38	* pixel data so that a group of pixel accesses are in the same cacheline.
				39	* Performance improvement from doing this on the back/depth buffer are on
				40	* the order of 30%.
				41	*
				42	* Intel architectures make this somewhat more complicated, though, by
				43	* adjustments made to addressing of data when the memory is in interleaved
				44	* mode (matched pairs of DIMMS) to improve memory bandwidth.
				45	* For interleaved memory, the CPU sends every sequential 64 bytes
				46	* to an alternate memory channel so it can get the bandwidth from both.
				47	*
				48	* The GPU also rearranges its accesses for increased bandwidth to interleaved
				49	* memory, and it matches what the CPU does for non-tiled. However, when tiled
				50	* it does it a little differently, since one walks addresses not just in the
				51	* X direction but also Y. So, along with alternating channels when bit
				52	* 6 of the address flips, it also alternates when other bits flip -- Bits 9
				53	* (every 512 bytes, an X tile scanline) and 10 (every two X tile scanlines)
				54	* are common to both the 915 and 965-class hardware.
				55	*
				56	* The CPU also sometimes XORs in higher bits as well, to improve
				57	* bandwidth doing strided access like we do so frequently in graphics. This
				58	* is called "Channel XOR Randomization" in the MCH documentation. The result
				59	* is that the CPU is XORing in either bit 11 or bit 17 to bit 6 of its address
				60	* decode.
				61	*
				62	* All of this bit 6 XORing has an effect on our memory management,
				63	* as we need to make sure that the 3d driver can correctly address object
				64	* contents.
				65	*
				66	* If we don't have interleaved memory, all tiling is safe and no swizzling is
				67	* required.
				68	*
				69	* When bit 17 is XORed in, we simply refuse to tile at all. Bit
				70	* 17 is not just a page offset, so as we page an objet out and back in,
				71	* individual pages in it will have different bit 17 addresses, resulting in
				72	* each 64 bytes being swapped with its neighbor!
				73	*
				74	* Otherwise, if interleaved, we have to tell the 3d driver what the address
				75	* swizzling it needs to do is, since it's writing with the CPU to the pages
				76	* (bit 6 and potentially bit 11 XORed in), and the GPU is reading from the
				77	* pages (bit 6, 9, and 10 XORed in), resulting in a cumulative bit swizzling
				78	* required by the CPU of XORing in bit 6, 9, 10, and potentially 11, in order
				79	* to match what the GPU expects.
				80	*/
				81
				82	/**
				83	* Detects bit 6 swizzling of address lookup between IGD access and CPU
				84	* access through main memory.
				85	*/
				86	void
				87	i915_gem_detect_bit_6_swizzle(struct drm_device *dev)
				88	{
				89	drm_i915_private_t *dev_priv = dev->dev_private;
				90	uint32_t swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN;
				91	uint32_t swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
				92
				93	if (!IS_I9XX(dev)) {
				94	/* As far as we know, the 865 doesn't have these bit 6
				95	* swizzling issues.
				96	*/
				97	swizzle_x = I915_BIT_6_SWIZZLE_NONE;
				98	swizzle_y = I915_BIT_6_SWIZZLE_NONE;
				99	} else if (!IS_I965G(dev) \|\| IS_I965GM(dev)) {
				100	uint32_t dcc;
				101
				102	/* On 915-945 and GM965, channel interleave by the CPU is
				103	* determined by DCC. The CPU will alternate based on bit 6
				104	* in interleaved mode, and the GPU will then also alternate
				105	* on bit 6, 9, and 10 for X, but the CPU may also optionally
				106	* alternate based on bit 17 (XOR not disabled and XOR
				107	* bit == 17).
				108	*/
				109	dcc = I915_READ(DCC);
				110	switch (dcc & DCC_ADDRESSING_MODE_MASK) {
				111	case DCC_ADDRESSING_MODE_SINGLE_CHANNEL:
				112	case DCC_ADDRESSING_MODE_DUAL_CHANNEL_ASYMMETRIC:
				113	swizzle_x = I915_BIT_6_SWIZZLE_NONE;
				114	swizzle_y = I915_BIT_6_SWIZZLE_NONE;
				115	break;
				116	case DCC_ADDRESSING_MODE_DUAL_CHANNEL_INTERLEAVED:
				117	if (IS_I915G(dev) \|\| IS_I915GM(dev) \|\|
				118	dcc & DCC_CHANNEL_XOR_DISABLE) {
				119	swizzle_x = I915_BIT_6_SWIZZLE_9_10;
				120	swizzle_y = I915_BIT_6_SWIZZLE_9;
				121	} else if (IS_I965GM(dev)) {
				122	/* GM965 only does bit 11-based channel
				123	* randomization
				124	*/
				125	swizzle_x = I915_BIT_6_SWIZZLE_9_10_11;
				126	swizzle_y = I915_BIT_6_SWIZZLE_9_11;
				127	} else {
				128	/* Bit 17 or perhaps other swizzling */
				129	swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN;
				130	swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
				131	}
				132	break;
				133	}
				134	if (dcc == 0xffffffff) {
				135	DRM_ERROR("Couldn't read from MCHBAR. "
				136	"Disabling tiling.\n");
				137	swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN;
				138	swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
				139	}
				140	} else {
				141	/* The 965, G33, and newer, have a very flexible memory
				142	* configuration. It will enable dual-channel mode
				143	* (interleaving) on as much memory as it can, and the GPU
				144	* will additionally sometimes enable different bit 6
				145	* swizzling for tiled objects from the CPU.
				146	*
				147	* Here's what I found on the G965:
				148	* slot fill memory size swizzling
				149	* 0A 0B 1A 1B 1-ch 2-ch
				150	* 512 0 0 0 512 0 O
				151	* 512 0 512 0 16 1008 X
				152	* 512 0 0 512 16 1008 X
				153	* 0 512 0 512 16 1008 X
				154	* 1024 1024 1024 0 2048 1024 O
				155	*
				156	* We could probably detect this based on either the DRB
				157	* matching, which was the case for the swizzling required in
				158	* the table above, or from the 1-ch value being less than
				159	* the minimum size of a rank.
				160	*/
				161	if (I915_READ16(C0DRB3) != I915_READ16(C1DRB3)) {
				162	swizzle_x = I915_BIT_6_SWIZZLE_NONE;
				163	swizzle_y = I915_BIT_6_SWIZZLE_NONE;
				164	} else {
				165	swizzle_x = I915_BIT_6_SWIZZLE_9_10;
				166	swizzle_y = I915_BIT_6_SWIZZLE_9;
				167	}
				168	}
				169
				170	dev_priv->mm.bit_6_swizzle_x = swizzle_x;
				171	dev_priv->mm.bit_6_swizzle_y = swizzle_y;
				172	}
				173
				174	/**
				175	* Sets the tiling mode of an object, returning the required swizzling of
				176	* bit 6 of addresses in the object.
				177	*/
				178	int
				179	i915_gem_set_tiling(struct drm_device dev, void data,
				180	struct drm_file *file_priv)
				181	{
				182	struct drm_i915_gem_set_tiling *args = data;
				183	drm_i915_private_t *dev_priv = dev->dev_private;
				184	struct drm_gem_object *obj;
				185	struct drm_i915_gem_object *obj_priv;
				186
				187	obj = drm_gem_object_lookup(dev, file_priv, args->handle);
				188	if (obj == NULL)
				189	return -EINVAL;
				190	obj_priv = obj->driver_private;
				191
				192	mutex_lock(&dev->struct_mutex);
				193
				194	if (args->tiling_mode == I915_TILING_NONE) {
				195	obj_priv->tiling_mode = I915_TILING_NONE;
				196	args->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
				197	} else {
				198	if (args->tiling_mode == I915_TILING_X)
				199	args->swizzle_mode = dev_priv->mm.bit_6_swizzle_x;
				200	else
				201	args->swizzle_mode = dev_priv->mm.bit_6_swizzle_y;
				202	/* If we can't handle the swizzling, make it untiled. */
				203	if (args->swizzle_mode == I915_BIT_6_SWIZZLE_UNKNOWN) {
				204	args->tiling_mode = I915_TILING_NONE;
				205	args->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
				206	}
				207	}
				208	obj_priv->tiling_mode = args->tiling_mode;
				209
				210	mutex_unlock(&dev->struct_mutex);
				211
				212	drm_gem_object_unreference(obj);
				213
				214	return 0;
				215	}
				216
				217	/**
				218	* Returns the current tiling mode and required bit 6 swizzling for the object.
				219	*/
				220	int
				221	i915_gem_get_tiling(struct drm_device dev, void data,
				222	struct drm_file *file_priv)
				223	{
				224	struct drm_i915_gem_get_tiling *args = data;
				225	drm_i915_private_t *dev_priv = dev->dev_private;
				226	struct drm_gem_object *obj;
				227	struct drm_i915_gem_object *obj_priv;
				228
				229	obj = drm_gem_object_lookup(dev, file_priv, args->handle);
				230	if (obj == NULL)
				231	return -EINVAL;
				232	obj_priv = obj->driver_private;
				233
				234	mutex_lock(&dev->struct_mutex);
				235
				236	args->tiling_mode = obj_priv->tiling_mode;
				237	switch (obj_priv->tiling_mode) {
				238	case I915_TILING_X:
				239	args->swizzle_mode = dev_priv->mm.bit_6_swizzle_x;
				240	break;
				241	case I915_TILING_Y:
				242	args->swizzle_mode = dev_priv->mm.bit_6_swizzle_y;
				243	break;
				244	case I915_TILING_NONE:
				245	args->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
				246	break;
				247	default:
				248	DRM_ERROR("unknown tiling mode\n");
				249	}
				250
				251	mutex_unlock(&dev->struct_mutex);
				252
				253	drm_gem_object_unreference(obj);
				254
				255	return 0;
				256	}