blob: 9e8173fe2a09a04e3c9616e8a5532ad043b74ef7 [file] [log] [blame]
Daniel Vetter41a36b72015-07-24 13:55:11 +02001/*
2 * Copyright © 2008-2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include <drm/drmP.h>
25#include <drm/i915_drm.h>
26#include "i915_drv.h"
27
Daniel Vettera794f622015-07-24 17:40:12 +020028/**
29 * DOC: fence register handling
30 *
31 * Important to avoid confusions: "fences" in the i915 driver are not execution
32 * fences used to track command completion but hardware detiler objects which
33 * wrap a given range of the global GTT. Each platform has only a fairly limited
34 * set of these objects.
35 *
36 * Fences are used to detile GTT memory mappings. They're also connected to the
Masanari Iida34fd3e12016-01-05 12:29:17 +090037 * hardware frontbuffer render tracking and hence interact with frontbuffer
38 * compression. Furthermore on older platforms fences are required for tiled
Daniel Vettera794f622015-07-24 17:40:12 +020039 * objects used by the display engine. They can also be used by the render
40 * engine - they're required for blitter commands and are optional for render
41 * commands. But on gen4+ both display (with the exception of fbc) and rendering
42 * have their own tiling state bits and don't need fences.
43 *
44 * Also note that fences only support X and Y tiling and hence can't be used for
45 * the fancier new tiling formats like W, Ys and Yf.
46 *
47 * Finally note that because fences are such a restricted resource they're
48 * dynamically associated with objects. Furthermore fence state is committed to
Masanari Iida34fd3e12016-01-05 12:29:17 +090049 * the hardware lazily to avoid unnecessary stalls on gen2/3. Therefore code must
50 * explicitly call i915_gem_object_get_fence() to synchronize fencing status
Daniel Vettera794f622015-07-24 17:40:12 +020051 * for cpu access. Also note that some code wants an unfenced view, for those
52 * cases the fence can be removed forcefully with i915_gem_object_put_fence().
53 *
54 * Internally these functions will synchronize with userspace access by removing
55 * CPU ptes into GTT mmaps (not the GTT ptes themselves) as needed.
56 */
57
Daniel Vetter41a36b72015-07-24 13:55:11 +020058static void i965_write_fence_reg(struct drm_device *dev, int reg,
59 struct drm_i915_gem_object *obj)
60{
Chris Wilsonfac5e232016-07-04 11:34:36 +010061 struct drm_i915_private *dev_priv = to_i915(dev);
Ville Syrjäläf0f59a02015-11-18 15:33:26 +020062 i915_reg_t fence_reg_lo, fence_reg_hi;
Daniel Vetter41a36b72015-07-24 13:55:11 +020063 int fence_pitch_shift;
64
65 if (INTEL_INFO(dev)->gen >= 6) {
Ville Syrjäläeecf6132015-09-21 18:05:14 +030066 fence_reg_lo = FENCE_REG_GEN6_LO(reg);
67 fence_reg_hi = FENCE_REG_GEN6_HI(reg);
68 fence_pitch_shift = GEN6_FENCE_PITCH_SHIFT;
Daniel Vetter41a36b72015-07-24 13:55:11 +020069 } else {
Ville Syrjäläeecf6132015-09-21 18:05:14 +030070 fence_reg_lo = FENCE_REG_965_LO(reg);
71 fence_reg_hi = FENCE_REG_965_HI(reg);
Daniel Vetter41a36b72015-07-24 13:55:11 +020072 fence_pitch_shift = I965_FENCE_PITCH_SHIFT;
73 }
74
Daniel Vetter41a36b72015-07-24 13:55:11 +020075 /* To w/a incoherency with non-atomic 64-bit register updates,
76 * we split the 64-bit update into two 32-bit writes. In order
77 * for a partial fence not to be evaluated between writes, we
78 * precede the update with write to turn off the fence register,
79 * and only enable the fence as the last step.
80 *
81 * For extra levels of paranoia, we make sure each step lands
82 * before applying the next step.
83 */
Ville Syrjäläeecf6132015-09-21 18:05:14 +030084 I915_WRITE(fence_reg_lo, 0);
85 POSTING_READ(fence_reg_lo);
Daniel Vetter41a36b72015-07-24 13:55:11 +020086
87 if (obj) {
88 u32 size = i915_gem_obj_ggtt_size(obj);
Chris Wilson3e510a82016-08-05 10:14:23 +010089 unsigned int tiling = i915_gem_object_get_tiling(obj);
90 unsigned int stride = i915_gem_object_get_stride(obj);
Daniel Vetter41a36b72015-07-24 13:55:11 +020091 uint64_t val;
92
93 /* Adjust fence size to match tiled area */
Chris Wilson3e510a82016-08-05 10:14:23 +010094 if (tiling != I915_TILING_NONE) {
95 uint32_t row_size = stride *
96 (tiling == I915_TILING_Y ? 32 : 8);
Daniel Vetter41a36b72015-07-24 13:55:11 +020097 size = (size / row_size) * row_size;
98 }
99
100 val = (uint64_t)((i915_gem_obj_ggtt_offset(obj) + size - 4096) &
101 0xfffff000) << 32;
102 val |= i915_gem_obj_ggtt_offset(obj) & 0xfffff000;
Chris Wilson3e510a82016-08-05 10:14:23 +0100103 val |= (uint64_t)((stride / 128) - 1) << fence_pitch_shift;
104 if (tiling == I915_TILING_Y)
Daniel Vetter41a36b72015-07-24 13:55:11 +0200105 val |= 1 << I965_FENCE_TILING_Y_SHIFT;
106 val |= I965_FENCE_REG_VALID;
107
Ville Syrjäläeecf6132015-09-21 18:05:14 +0300108 I915_WRITE(fence_reg_hi, val >> 32);
109 POSTING_READ(fence_reg_hi);
Daniel Vetter41a36b72015-07-24 13:55:11 +0200110
Ville Syrjäläeecf6132015-09-21 18:05:14 +0300111 I915_WRITE(fence_reg_lo, val);
112 POSTING_READ(fence_reg_lo);
Daniel Vetter41a36b72015-07-24 13:55:11 +0200113 } else {
Ville Syrjäläeecf6132015-09-21 18:05:14 +0300114 I915_WRITE(fence_reg_hi, 0);
115 POSTING_READ(fence_reg_hi);
Daniel Vetter41a36b72015-07-24 13:55:11 +0200116 }
117}
118
119static void i915_write_fence_reg(struct drm_device *dev, int reg,
120 struct drm_i915_gem_object *obj)
121{
Chris Wilsonfac5e232016-07-04 11:34:36 +0100122 struct drm_i915_private *dev_priv = to_i915(dev);
Daniel Vetter41a36b72015-07-24 13:55:11 +0200123 u32 val;
124
125 if (obj) {
126 u32 size = i915_gem_obj_ggtt_size(obj);
Chris Wilson3e510a82016-08-05 10:14:23 +0100127 unsigned int tiling = i915_gem_object_get_tiling(obj);
128 unsigned int stride = i915_gem_object_get_stride(obj);
Daniel Vetter41a36b72015-07-24 13:55:11 +0200129 int pitch_val;
130 int tile_width;
131
132 WARN((i915_gem_obj_ggtt_offset(obj) & ~I915_FENCE_START_MASK) ||
133 (size & -size) != size ||
134 (i915_gem_obj_ggtt_offset(obj) & (size - 1)),
Michel Thierry088e0df2015-08-07 17:40:17 +0100135 "object 0x%08llx [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n",
Daniel Vetter41a36b72015-07-24 13:55:11 +0200136 i915_gem_obj_ggtt_offset(obj), obj->map_and_fenceable, size);
137
Chris Wilson3e510a82016-08-05 10:14:23 +0100138 if (tiling == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev))
Daniel Vetter41a36b72015-07-24 13:55:11 +0200139 tile_width = 128;
140 else
141 tile_width = 512;
142
143 /* Note: pitch better be a power of two tile widths */
Chris Wilson3e510a82016-08-05 10:14:23 +0100144 pitch_val = stride / tile_width;
Daniel Vetter41a36b72015-07-24 13:55:11 +0200145 pitch_val = ffs(pitch_val) - 1;
146
147 val = i915_gem_obj_ggtt_offset(obj);
Chris Wilson3e510a82016-08-05 10:14:23 +0100148 if (tiling == I915_TILING_Y)
Daniel Vetter41a36b72015-07-24 13:55:11 +0200149 val |= 1 << I830_FENCE_TILING_Y_SHIFT;
150 val |= I915_FENCE_SIZE_BITS(size);
151 val |= pitch_val << I830_FENCE_PITCH_SHIFT;
152 val |= I830_FENCE_REG_VALID;
153 } else
154 val = 0;
155
Ville Syrjäläeecf6132015-09-21 18:05:14 +0300156 I915_WRITE(FENCE_REG(reg), val);
157 POSTING_READ(FENCE_REG(reg));
Daniel Vetter41a36b72015-07-24 13:55:11 +0200158}
159
160static void i830_write_fence_reg(struct drm_device *dev, int reg,
161 struct drm_i915_gem_object *obj)
162{
Chris Wilsonfac5e232016-07-04 11:34:36 +0100163 struct drm_i915_private *dev_priv = to_i915(dev);
Daniel Vetter41a36b72015-07-24 13:55:11 +0200164 uint32_t val;
165
166 if (obj) {
167 u32 size = i915_gem_obj_ggtt_size(obj);
Chris Wilson3e510a82016-08-05 10:14:23 +0100168 unsigned int tiling = i915_gem_object_get_tiling(obj);
169 unsigned int stride = i915_gem_object_get_stride(obj);
Daniel Vetter41a36b72015-07-24 13:55:11 +0200170 uint32_t pitch_val;
171
172 WARN((i915_gem_obj_ggtt_offset(obj) & ~I830_FENCE_START_MASK) ||
173 (size & -size) != size ||
174 (i915_gem_obj_ggtt_offset(obj) & (size - 1)),
Michel Thierry088e0df2015-08-07 17:40:17 +0100175 "object 0x%08llx not 512K or pot-size 0x%08x aligned\n",
Daniel Vetter41a36b72015-07-24 13:55:11 +0200176 i915_gem_obj_ggtt_offset(obj), size);
177
Chris Wilson3e510a82016-08-05 10:14:23 +0100178 pitch_val = stride / 128;
Daniel Vetter41a36b72015-07-24 13:55:11 +0200179 pitch_val = ffs(pitch_val) - 1;
180
181 val = i915_gem_obj_ggtt_offset(obj);
Chris Wilson3e510a82016-08-05 10:14:23 +0100182 if (tiling == I915_TILING_Y)
Daniel Vetter41a36b72015-07-24 13:55:11 +0200183 val |= 1 << I830_FENCE_TILING_Y_SHIFT;
184 val |= I830_FENCE_SIZE_BITS(size);
185 val |= pitch_val << I830_FENCE_PITCH_SHIFT;
186 val |= I830_FENCE_REG_VALID;
187 } else
188 val = 0;
189
Ville Syrjäläeecf6132015-09-21 18:05:14 +0300190 I915_WRITE(FENCE_REG(reg), val);
191 POSTING_READ(FENCE_REG(reg));
Daniel Vetter41a36b72015-07-24 13:55:11 +0200192}
193
194inline static bool i915_gem_object_needs_mb(struct drm_i915_gem_object *obj)
195{
196 return obj && obj->base.read_domains & I915_GEM_DOMAIN_GTT;
197}
198
199static void i915_gem_write_fence(struct drm_device *dev, int reg,
200 struct drm_i915_gem_object *obj)
201{
Chris Wilsonfac5e232016-07-04 11:34:36 +0100202 struct drm_i915_private *dev_priv = to_i915(dev);
Daniel Vetter41a36b72015-07-24 13:55:11 +0200203
204 /* Ensure that all CPU reads are completed before installing a fence
205 * and all writes before removing the fence.
206 */
207 if (i915_gem_object_needs_mb(dev_priv->fence_regs[reg].obj))
208 mb();
209
Chris Wilson3e510a82016-08-05 10:14:23 +0100210 WARN(obj &&
211 (!i915_gem_object_get_stride(obj) ||
212 !i915_gem_object_get_tiling(obj)),
Daniel Vetter41a36b72015-07-24 13:55:11 +0200213 "bogus fence setup with stride: 0x%x, tiling mode: %i\n",
Chris Wilson3e510a82016-08-05 10:14:23 +0100214 i915_gem_object_get_stride(obj),
215 i915_gem_object_get_tiling(obj));
Daniel Vetter41a36b72015-07-24 13:55:11 +0200216
217 if (IS_GEN2(dev))
218 i830_write_fence_reg(dev, reg, obj);
219 else if (IS_GEN3(dev))
220 i915_write_fence_reg(dev, reg, obj);
221 else if (INTEL_INFO(dev)->gen >= 4)
222 i965_write_fence_reg(dev, reg, obj);
223
224 /* And similarly be paranoid that no direct access to this region
225 * is reordered to before the fence is installed.
226 */
227 if (i915_gem_object_needs_mb(obj))
228 mb();
229}
230
231static inline int fence_number(struct drm_i915_private *dev_priv,
232 struct drm_i915_fence_reg *fence)
233{
234 return fence - dev_priv->fence_regs;
235}
236
237static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
238 struct drm_i915_fence_reg *fence,
239 bool enable)
240{
Chris Wilsonfac5e232016-07-04 11:34:36 +0100241 struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
Daniel Vetter41a36b72015-07-24 13:55:11 +0200242 int reg = fence_number(dev_priv, fence);
243
244 i915_gem_write_fence(obj->base.dev, reg, enable ? obj : NULL);
245
246 if (enable) {
247 obj->fence_reg = reg;
248 fence->obj = obj;
249 list_move_tail(&fence->lru_list, &dev_priv->mm.fence_list);
250 } else {
251 obj->fence_reg = I915_FENCE_REG_NONE;
252 fence->obj = NULL;
253 list_del_init(&fence->lru_list);
254 }
255 obj->fence_dirty = false;
256}
257
258static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj)
259{
Chris Wilson3e510a82016-08-05 10:14:23 +0100260 if (i915_gem_object_is_tiled(obj))
Daniel Vetter41a36b72015-07-24 13:55:11 +0200261 i915_gem_release_mmap(obj);
262
263 /* As we do not have an associated fence register, we will force
264 * a tiling change if we ever need to acquire one.
265 */
266 obj->fence_dirty = false;
267 obj->fence_reg = I915_FENCE_REG_NONE;
268}
269
270static int
271i915_gem_object_wait_fence(struct drm_i915_gem_object *obj)
272{
Chris Wilsonfa545cb2016-08-04 07:52:35 +0100273 return i915_gem_active_retire(&obj->last_fence,
274 &obj->base.dev->struct_mutex);
Daniel Vetter41a36b72015-07-24 13:55:11 +0200275}
276
Daniel Vettera794f622015-07-24 17:40:12 +0200277/**
278 * i915_gem_object_put_fence - force-remove fence for an object
279 * @obj: object to map through a fence reg
280 *
281 * This function force-removes any fence from the given object, which is useful
282 * if the kernel wants to do untiled GTT access.
283 *
284 * Returns:
285 *
286 * 0 on success, negative error code on failure.
287 */
Daniel Vetter41a36b72015-07-24 13:55:11 +0200288int
289i915_gem_object_put_fence(struct drm_i915_gem_object *obj)
290{
Chris Wilsonfac5e232016-07-04 11:34:36 +0100291 struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
Daniel Vetter41a36b72015-07-24 13:55:11 +0200292 struct drm_i915_fence_reg *fence;
293 int ret;
294
295 ret = i915_gem_object_wait_fence(obj);
296 if (ret)
297 return ret;
298
299 if (obj->fence_reg == I915_FENCE_REG_NONE)
300 return 0;
301
302 fence = &dev_priv->fence_regs[obj->fence_reg];
303
304 if (WARN_ON(fence->pin_count))
305 return -EBUSY;
306
307 i915_gem_object_fence_lost(obj);
308 i915_gem_object_update_fence(obj, fence, false);
309
310 return 0;
311}
312
313static struct drm_i915_fence_reg *
314i915_find_fence_reg(struct drm_device *dev)
315{
Chris Wilsonfac5e232016-07-04 11:34:36 +0100316 struct drm_i915_private *dev_priv = to_i915(dev);
Daniel Vetter41a36b72015-07-24 13:55:11 +0200317 struct drm_i915_fence_reg *reg, *avail;
318 int i;
319
320 /* First try to find a free reg */
321 avail = NULL;
Daniel Vetterc668cde2015-09-30 10:46:59 +0200322 for (i = 0; i < dev_priv->num_fence_regs; i++) {
Daniel Vetter41a36b72015-07-24 13:55:11 +0200323 reg = &dev_priv->fence_regs[i];
324 if (!reg->obj)
325 return reg;
326
327 if (!reg->pin_count)
328 avail = reg;
329 }
330
331 if (avail == NULL)
332 goto deadlock;
333
334 /* None available, try to steal one or wait for a user to finish */
335 list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) {
336 if (reg->pin_count)
337 continue;
338
339 return reg;
340 }
341
342deadlock:
343 /* Wait for completion of pending flips which consume fences */
344 if (intel_has_pending_fb_unpin(dev))
345 return ERR_PTR(-EAGAIN);
346
347 return ERR_PTR(-EDEADLK);
348}
349
350/**
351 * i915_gem_object_get_fence - set up fencing for an object
352 * @obj: object to map through a fence reg
353 *
354 * When mapping objects through the GTT, userspace wants to be able to write
355 * to them without having to worry about swizzling if the object is tiled.
356 * This function walks the fence regs looking for a free one for @obj,
357 * stealing one if it can't find any.
358 *
359 * It then sets up the reg based on the object's properties: address, pitch
360 * and tiling format.
361 *
362 * For an untiled surface, this removes any existing fence.
Daniel Vettera794f622015-07-24 17:40:12 +0200363 *
364 * Returns:
365 *
366 * 0 on success, negative error code on failure.
Daniel Vetter41a36b72015-07-24 13:55:11 +0200367 */
368int
369i915_gem_object_get_fence(struct drm_i915_gem_object *obj)
370{
371 struct drm_device *dev = obj->base.dev;
Chris Wilsonfac5e232016-07-04 11:34:36 +0100372 struct drm_i915_private *dev_priv = to_i915(dev);
Chris Wilson3e510a82016-08-05 10:14:23 +0100373 bool enable = i915_gem_object_is_tiled(obj);
Daniel Vetter41a36b72015-07-24 13:55:11 +0200374 struct drm_i915_fence_reg *reg;
375 int ret;
376
377 /* Have we updated the tiling parameters upon the object and so
378 * will need to serialise the write to the associated fence register?
379 */
380 if (obj->fence_dirty) {
381 ret = i915_gem_object_wait_fence(obj);
382 if (ret)
383 return ret;
384 }
385
386 /* Just update our place in the LRU if our fence is getting reused. */
387 if (obj->fence_reg != I915_FENCE_REG_NONE) {
388 reg = &dev_priv->fence_regs[obj->fence_reg];
389 if (!obj->fence_dirty) {
390 list_move_tail(&reg->lru_list,
391 &dev_priv->mm.fence_list);
392 return 0;
393 }
394 } else if (enable) {
395 if (WARN_ON(!obj->map_and_fenceable))
396 return -EINVAL;
397
398 reg = i915_find_fence_reg(dev);
399 if (IS_ERR(reg))
400 return PTR_ERR(reg);
401
402 if (reg->obj) {
403 struct drm_i915_gem_object *old = reg->obj;
404
405 ret = i915_gem_object_wait_fence(old);
406 if (ret)
407 return ret;
408
409 i915_gem_object_fence_lost(old);
410 }
411 } else
412 return 0;
413
414 i915_gem_object_update_fence(obj, reg, enable);
415
416 return 0;
417}
418
Daniel Vettera794f622015-07-24 17:40:12 +0200419/**
420 * i915_gem_object_pin_fence - pin fencing state
421 * @obj: object to pin fencing for
422 *
423 * This pins the fencing state (whether tiled or untiled) to make sure the
424 * object is ready to be used as a scanout target. Fencing status must be
425 * synchronize first by calling i915_gem_object_get_fence():
426 *
427 * The resulting fence pin reference must be released again with
428 * i915_gem_object_unpin_fence().
429 *
430 * Returns:
431 *
432 * True if the object has a fence, false otherwise.
433 */
Daniel Vetter41a36b72015-07-24 13:55:11 +0200434bool
435i915_gem_object_pin_fence(struct drm_i915_gem_object *obj)
436{
437 if (obj->fence_reg != I915_FENCE_REG_NONE) {
Chris Wilsonfac5e232016-07-04 11:34:36 +0100438 struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
Daniel Vetter41a36b72015-07-24 13:55:11 +0200439 struct i915_vma *ggtt_vma = i915_gem_obj_to_ggtt(obj);
440
441 WARN_ON(!ggtt_vma ||
442 dev_priv->fence_regs[obj->fence_reg].pin_count >
Chris Wilson20dfbde2016-08-04 16:32:30 +0100443 i915_vma_pin_count(ggtt_vma));
Daniel Vetter41a36b72015-07-24 13:55:11 +0200444 dev_priv->fence_regs[obj->fence_reg].pin_count++;
445 return true;
446 } else
447 return false;
448}
449
Daniel Vettera794f622015-07-24 17:40:12 +0200450/**
451 * i915_gem_object_unpin_fence - unpin fencing state
452 * @obj: object to unpin fencing for
453 *
454 * This releases the fence pin reference acquired through
455 * i915_gem_object_pin_fence. It will handle both objects with and without an
456 * attached fence correctly, callers do not need to distinguish this.
457 */
Daniel Vetter41a36b72015-07-24 13:55:11 +0200458void
459i915_gem_object_unpin_fence(struct drm_i915_gem_object *obj)
460{
461 if (obj->fence_reg != I915_FENCE_REG_NONE) {
Chris Wilsonfac5e232016-07-04 11:34:36 +0100462 struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
Daniel Vetter41a36b72015-07-24 13:55:11 +0200463 WARN_ON(dev_priv->fence_regs[obj->fence_reg].pin_count <= 0);
464 dev_priv->fence_regs[obj->fence_reg].pin_count--;
465 }
466}
467
Daniel Vettera794f622015-07-24 17:40:12 +0200468/**
469 * i915_gem_restore_fences - restore fence state
470 * @dev: DRM device
471 *
472 * Restore the hw fence state to match the software tracking again, to be called
473 * after a gpu reset and on resume.
474 */
Daniel Vetter41a36b72015-07-24 13:55:11 +0200475void i915_gem_restore_fences(struct drm_device *dev)
476{
Chris Wilsonfac5e232016-07-04 11:34:36 +0100477 struct drm_i915_private *dev_priv = to_i915(dev);
Daniel Vetter41a36b72015-07-24 13:55:11 +0200478 int i;
479
480 for (i = 0; i < dev_priv->num_fence_regs; i++) {
481 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
482
483 /*
484 * Commit delayed tiling changes if we have an object still
485 * attached to the fence, otherwise just clear the fence.
486 */
487 if (reg->obj) {
488 i915_gem_object_update_fence(reg->obj, reg,
Chris Wilson3e510a82016-08-05 10:14:23 +0100489 i915_gem_object_get_tiling(reg->obj));
Daniel Vetter41a36b72015-07-24 13:55:11 +0200490 } else {
491 i915_gem_write_fence(dev, i, NULL);
492 }
493 }
494}
Daniel Vetter7f96eca2015-07-24 17:40:14 +0200495
496/**
Daniel Vetter3271dca2015-07-24 17:40:15 +0200497 * DOC: tiling swizzling details
Daniel Vetter7f96eca2015-07-24 17:40:14 +0200498 *
499 * The idea behind tiling is to increase cache hit rates by rearranging
500 * pixel data so that a group of pixel accesses are in the same cacheline.
501 * Performance improvement from doing this on the back/depth buffer are on
502 * the order of 30%.
503 *
504 * Intel architectures make this somewhat more complicated, though, by
505 * adjustments made to addressing of data when the memory is in interleaved
506 * mode (matched pairs of DIMMS) to improve memory bandwidth.
507 * For interleaved memory, the CPU sends every sequential 64 bytes
508 * to an alternate memory channel so it can get the bandwidth from both.
509 *
510 * The GPU also rearranges its accesses for increased bandwidth to interleaved
511 * memory, and it matches what the CPU does for non-tiled. However, when tiled
512 * it does it a little differently, since one walks addresses not just in the
513 * X direction but also Y. So, along with alternating channels when bit
514 * 6 of the address flips, it also alternates when other bits flip -- Bits 9
515 * (every 512 bytes, an X tile scanline) and 10 (every two X tile scanlines)
516 * are common to both the 915 and 965-class hardware.
517 *
518 * The CPU also sometimes XORs in higher bits as well, to improve
519 * bandwidth doing strided access like we do so frequently in graphics. This
520 * is called "Channel XOR Randomization" in the MCH documentation. The result
521 * is that the CPU is XORing in either bit 11 or bit 17 to bit 6 of its address
522 * decode.
523 *
524 * All of this bit 6 XORing has an effect on our memory management,
525 * as we need to make sure that the 3d driver can correctly address object
526 * contents.
527 *
528 * If we don't have interleaved memory, all tiling is safe and no swizzling is
529 * required.
530 *
531 * When bit 17 is XORed in, we simply refuse to tile at all. Bit
Masanari Iida34fd3e12016-01-05 12:29:17 +0900532 * 17 is not just a page offset, so as we page an object out and back in,
Daniel Vetter7f96eca2015-07-24 17:40:14 +0200533 * individual pages in it will have different bit 17 addresses, resulting in
534 * each 64 bytes being swapped with its neighbor!
535 *
536 * Otherwise, if interleaved, we have to tell the 3d driver what the address
537 * swizzling it needs to do is, since it's writing with the CPU to the pages
538 * (bit 6 and potentially bit 11 XORed in), and the GPU is reading from the
539 * pages (bit 6, 9, and 10 XORed in), resulting in a cumulative bit swizzling
540 * required by the CPU of XORing in bit 6, 9, 10, and potentially 11, in order
541 * to match what the GPU expects.
542 */
543
544/**
Daniel Vetter3271dca2015-07-24 17:40:15 +0200545 * i915_gem_detect_bit_6_swizzle - detect bit 6 swizzling pattern
546 * @dev: DRM device
547 *
Daniel Vetter7f96eca2015-07-24 17:40:14 +0200548 * Detects bit 6 swizzling of address lookup between IGD access and CPU
549 * access through main memory.
550 */
551void
552i915_gem_detect_bit_6_swizzle(struct drm_device *dev)
553{
Chris Wilsonfac5e232016-07-04 11:34:36 +0100554 struct drm_i915_private *dev_priv = to_i915(dev);
Daniel Vetter7f96eca2015-07-24 17:40:14 +0200555 uint32_t swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN;
556 uint32_t swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
557
558 if (INTEL_INFO(dev)->gen >= 8 || IS_VALLEYVIEW(dev)) {
559 /*
560 * On BDW+, swizzling is not used. We leave the CPU memory
561 * controller in charge of optimizing memory accesses without
562 * the extra address manipulation GPU side.
563 *
564 * VLV and CHV don't have GPU swizzling.
565 */
566 swizzle_x = I915_BIT_6_SWIZZLE_NONE;
567 swizzle_y = I915_BIT_6_SWIZZLE_NONE;
568 } else if (INTEL_INFO(dev)->gen >= 6) {
569 if (dev_priv->preserve_bios_swizzle) {
570 if (I915_READ(DISP_ARB_CTL) &
571 DISP_TILE_SURFACE_SWIZZLING) {
572 swizzle_x = I915_BIT_6_SWIZZLE_9_10;
573 swizzle_y = I915_BIT_6_SWIZZLE_9;
574 } else {
575 swizzle_x = I915_BIT_6_SWIZZLE_NONE;
576 swizzle_y = I915_BIT_6_SWIZZLE_NONE;
577 }
578 } else {
579 uint32_t dimm_c0, dimm_c1;
580 dimm_c0 = I915_READ(MAD_DIMM_C0);
581 dimm_c1 = I915_READ(MAD_DIMM_C1);
582 dimm_c0 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK;
583 dimm_c1 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK;
584 /* Enable swizzling when the channels are populated
585 * with identically sized dimms. We don't need to check
586 * the 3rd channel because no cpu with gpu attached
587 * ships in that configuration. Also, swizzling only
588 * makes sense for 2 channels anyway. */
589 if (dimm_c0 == dimm_c1) {
590 swizzle_x = I915_BIT_6_SWIZZLE_9_10;
591 swizzle_y = I915_BIT_6_SWIZZLE_9;
592 } else {
593 swizzle_x = I915_BIT_6_SWIZZLE_NONE;
594 swizzle_y = I915_BIT_6_SWIZZLE_NONE;
595 }
596 }
597 } else if (IS_GEN5(dev)) {
598 /* On Ironlake whatever DRAM config, GPU always do
599 * same swizzling setup.
600 */
601 swizzle_x = I915_BIT_6_SWIZZLE_9_10;
602 swizzle_y = I915_BIT_6_SWIZZLE_9;
603 } else if (IS_GEN2(dev)) {
604 /* As far as we know, the 865 doesn't have these bit 6
605 * swizzling issues.
606 */
607 swizzle_x = I915_BIT_6_SWIZZLE_NONE;
608 swizzle_y = I915_BIT_6_SWIZZLE_NONE;
609 } else if (IS_MOBILE(dev) || (IS_GEN3(dev) && !IS_G33(dev))) {
610 uint32_t dcc;
611
612 /* On 9xx chipsets, channel interleave by the CPU is
613 * determined by DCC. For single-channel, neither the CPU
614 * nor the GPU do swizzling. For dual channel interleaved,
615 * the GPU's interleave is bit 9 and 10 for X tiled, and bit
616 * 9 for Y tiled. The CPU's interleave is independent, and
617 * can be based on either bit 11 (haven't seen this yet) or
618 * bit 17 (common).
619 */
620 dcc = I915_READ(DCC);
621 switch (dcc & DCC_ADDRESSING_MODE_MASK) {
622 case DCC_ADDRESSING_MODE_SINGLE_CHANNEL:
623 case DCC_ADDRESSING_MODE_DUAL_CHANNEL_ASYMMETRIC:
624 swizzle_x = I915_BIT_6_SWIZZLE_NONE;
625 swizzle_y = I915_BIT_6_SWIZZLE_NONE;
626 break;
627 case DCC_ADDRESSING_MODE_DUAL_CHANNEL_INTERLEAVED:
628 if (dcc & DCC_CHANNEL_XOR_DISABLE) {
629 /* This is the base swizzling by the GPU for
630 * tiled buffers.
631 */
632 swizzle_x = I915_BIT_6_SWIZZLE_9_10;
633 swizzle_y = I915_BIT_6_SWIZZLE_9;
634 } else if ((dcc & DCC_CHANNEL_XOR_BIT_17) == 0) {
635 /* Bit 11 swizzling by the CPU in addition. */
636 swizzle_x = I915_BIT_6_SWIZZLE_9_10_11;
637 swizzle_y = I915_BIT_6_SWIZZLE_9_11;
638 } else {
639 /* Bit 17 swizzling by the CPU in addition. */
640 swizzle_x = I915_BIT_6_SWIZZLE_9_10_17;
641 swizzle_y = I915_BIT_6_SWIZZLE_9_17;
642 }
643 break;
644 }
645
646 /* check for L-shaped memory aka modified enhanced addressing */
Chris Wilson0b466dc22015-11-19 09:58:05 +0000647 if (IS_GEN4(dev) &&
648 !(I915_READ(DCC2) & DCC2_MODIFIED_ENHANCED_DISABLE)) {
649 swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN;
650 swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
Daniel Vetter7f96eca2015-07-24 17:40:14 +0200651 }
652
653 if (dcc == 0xffffffff) {
654 DRM_ERROR("Couldn't read from MCHBAR. "
655 "Disabling tiling.\n");
656 swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN;
657 swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
658 }
659 } else {
660 /* The 965, G33, and newer, have a very flexible memory
661 * configuration. It will enable dual-channel mode
662 * (interleaving) on as much memory as it can, and the GPU
663 * will additionally sometimes enable different bit 6
664 * swizzling for tiled objects from the CPU.
665 *
666 * Here's what I found on the G965:
667 * slot fill memory size swizzling
668 * 0A 0B 1A 1B 1-ch 2-ch
669 * 512 0 0 0 512 0 O
670 * 512 0 512 0 16 1008 X
671 * 512 0 0 512 16 1008 X
672 * 0 512 0 512 16 1008 X
673 * 1024 1024 1024 0 2048 1024 O
674 *
675 * We could probably detect this based on either the DRB
676 * matching, which was the case for the swizzling required in
677 * the table above, or from the 1-ch value being less than
678 * the minimum size of a rank.
Chris Wilson0b466dc22015-11-19 09:58:05 +0000679 *
680 * Reports indicate that the swizzling actually
681 * varies depending upon page placement inside the
682 * channels, i.e. we see swizzled pages where the
683 * banks of memory are paired and unswizzled on the
684 * uneven portion, so leave that as unknown.
Daniel Vetter7f96eca2015-07-24 17:40:14 +0200685 */
Chris Wilson0b466dc22015-11-19 09:58:05 +0000686 if (I915_READ16(C0DRB3) == I915_READ16(C1DRB3)) {
Daniel Vetter7f96eca2015-07-24 17:40:14 +0200687 swizzle_x = I915_BIT_6_SWIZZLE_9_10;
688 swizzle_y = I915_BIT_6_SWIZZLE_9;
689 }
690 }
691
Chris Wilson0b466dc22015-11-19 09:58:05 +0000692 if (swizzle_x == I915_BIT_6_SWIZZLE_UNKNOWN ||
693 swizzle_y == I915_BIT_6_SWIZZLE_UNKNOWN) {
694 /* Userspace likes to explode if it sees unknown swizzling,
695 * so lie. We will finish the lie when reporting through
696 * the get-tiling-ioctl by reporting the physical swizzle
697 * mode as unknown instead.
698 *
699 * As we don't strictly know what the swizzling is, it may be
700 * bit17 dependent, and so we need to also prevent the pages
701 * from being moved.
702 */
703 dev_priv->quirks |= QUIRK_PIN_SWIZZLED_PAGES;
704 swizzle_x = I915_BIT_6_SWIZZLE_NONE;
705 swizzle_y = I915_BIT_6_SWIZZLE_NONE;
706 }
707
Daniel Vetter7f96eca2015-07-24 17:40:14 +0200708 dev_priv->mm.bit_6_swizzle_x = swizzle_x;
709 dev_priv->mm.bit_6_swizzle_y = swizzle_y;
710}
711
Daniel Vetter3271dca2015-07-24 17:40:15 +0200712/*
Daniel Vetter7f96eca2015-07-24 17:40:14 +0200713 * Swap every 64 bytes of this page around, to account for it having a new
714 * bit 17 of its physical address and therefore being interpreted differently
715 * by the GPU.
716 */
717static void
718i915_gem_swizzle_page(struct page *page)
719{
720 char temp[64];
721 char *vaddr;
722 int i;
723
724 vaddr = kmap(page);
725
726 for (i = 0; i < PAGE_SIZE; i += 128) {
727 memcpy(temp, &vaddr[i], 64);
728 memcpy(&vaddr[i], &vaddr[i + 64], 64);
729 memcpy(&vaddr[i + 64], temp, 64);
730 }
731
732 kunmap(page);
733}
734
Daniel Vetter3271dca2015-07-24 17:40:15 +0200735/**
736 * i915_gem_object_do_bit_17_swizzle - fixup bit 17 swizzling
737 * @obj: i915 GEM buffer object
738 *
739 * This function fixes up the swizzling in case any page frame number for this
740 * object has changed in bit 17 since that state has been saved with
741 * i915_gem_object_save_bit_17_swizzle().
742 *
743 * This is called when pinning backing storage again, since the kernel is free
744 * to move unpinned backing storage around (either by directly moving pages or
745 * by swapping them out and back in again).
746 */
Daniel Vetter7f96eca2015-07-24 17:40:14 +0200747void
748i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj)
749{
Dave Gordon85d12252016-05-20 11:54:06 +0100750 struct sgt_iter sgt_iter;
751 struct page *page;
Daniel Vetter7f96eca2015-07-24 17:40:14 +0200752 int i;
753
754 if (obj->bit_17 == NULL)
755 return;
756
757 i = 0;
Dave Gordon85d12252016-05-20 11:54:06 +0100758 for_each_sgt_page(page, sgt_iter, obj->pages) {
Daniel Vetter7f96eca2015-07-24 17:40:14 +0200759 char new_bit_17 = page_to_phys(page) >> 17;
760 if ((new_bit_17 & 0x1) !=
761 (test_bit(i, obj->bit_17) != 0)) {
762 i915_gem_swizzle_page(page);
763 set_page_dirty(page);
764 }
765 i++;
766 }
767}
768
Daniel Vetter3271dca2015-07-24 17:40:15 +0200769/**
770 * i915_gem_object_save_bit_17_swizzle - save bit 17 swizzling
771 * @obj: i915 GEM buffer object
772 *
773 * This function saves the bit 17 of each page frame number so that swizzling
774 * can be fixed up later on with i915_gem_object_do_bit_17_swizzle(). This must
775 * be called before the backing storage can be unpinned.
776 */
Daniel Vetter7f96eca2015-07-24 17:40:14 +0200777void
778i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj)
779{
Dave Gordon85d12252016-05-20 11:54:06 +0100780 struct sgt_iter sgt_iter;
781 struct page *page;
Daniel Vetter7f96eca2015-07-24 17:40:14 +0200782 int page_count = obj->base.size >> PAGE_SHIFT;
783 int i;
784
785 if (obj->bit_17 == NULL) {
786 obj->bit_17 = kcalloc(BITS_TO_LONGS(page_count),
787 sizeof(long), GFP_KERNEL);
788 if (obj->bit_17 == NULL) {
789 DRM_ERROR("Failed to allocate memory for bit 17 "
790 "record\n");
791 return;
792 }
793 }
794
795 i = 0;
Dave Gordon85d12252016-05-20 11:54:06 +0100796
797 for_each_sgt_page(page, sgt_iter, obj->pages) {
798 if (page_to_phys(page) & (1 << 17))
Daniel Vetter7f96eca2015-07-24 17:40:14 +0200799 __set_bit(i, obj->bit_17);
800 else
801 __clear_bit(i, obj->bit_17);
802 i++;
803 }
804}