drm/i915: Treat ringbuffer writes as write to normal memory

Ringbuffers are now being written to either through LLC or WC paths, so
treating them as simply iomem is no longer adequate. However, for the
older !llc hardware, the hardware is documentated as treating the TAIL
register update as serialising, so we can relax the barriers when filling
the rings (but even if it were not, it is still an uncached register write
and so serialising anyway.).

For simplicity, let's ignore the iomem annotation.

v2: Remove iomem from ringbuffer->virtual_address
v3: And for good measure add iomem elsewhere to keep sparse happy

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com> #v2
Link: http://patchwork.freedesktop.org/patch/msgid/1469005202-9659-8-git-send-email-chris@chris-wilson.co.uk
Link: http://patchwork.freedesktop.org/patch/msgid/1469017917-15134-7-git-send-email-chris@chris-wilson.co.uk
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 10f1e32..30da543 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -3681,7 +3681,7 @@
 
 	lockdep_assert_held(&vma->vm->dev->struct_mutex);
 	if (WARN_ON(!vma->obj->map_and_fenceable))
-		return ERR_PTR(-ENODEV);
+		return IO_ERR_PTR(-ENODEV);
 
 	GEM_BUG_ON(!vma->is_ggtt);
 	GEM_BUG_ON((vma->bound & GLOBAL_BIND) == 0);
@@ -3692,7 +3692,7 @@
 					vma->node.start,
 					vma->node.size);
 		if (ptr == NULL)
-			return ERR_PTR(-ENOMEM);
+			return IO_ERR_PTR(-ENOMEM);
 
 		vma->iomap = ptr;
 	}
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index aa5f31d..c4a6579 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -580,6 +580,7 @@
  * Returns a valid iomapped pointer or ERR_PTR.
  */
 void __iomem *i915_vma_pin_iomap(struct i915_vma *vma);
+#define IO_ERR_PTR(x) ((void __iomem *)ERR_PTR(x))
 
 /**
  * i915_vma_unpin_iomap - unpins the mapping returned from i915_vma_iomap
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index c8ed367..093bfe1 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -2860,6 +2860,7 @@
 semaphore_waits_for(struct intel_engine_cs *engine, u32 *seqno)
 {
 	struct drm_i915_private *dev_priv = engine->i915;
+	void __iomem *vaddr;
 	u32 cmd, ipehr, head;
 	u64 offset = 0;
 	int i, backwards;
@@ -2898,6 +2899,7 @@
 	 */
 	head = I915_READ_HEAD(engine) & HEAD_ADDR;
 	backwards = (INTEL_GEN(dev_priv) >= 8) ? 5 : 4;
+	vaddr = (void __iomem *)engine->buffer->virtual_start;
 
 	for (i = backwards; i; --i) {
 		/*
@@ -2908,7 +2910,7 @@
 		head &= engine->buffer->size - 1;
 
 		/* This here seems to blow up */
-		cmd = ioread32(engine->buffer->virtual_start + head);
+		cmd = ioread32(vaddr + head);
 		if (cmd == ipehr)
 			break;
 
@@ -2918,11 +2920,11 @@
 	if (!i)
 		return NULL;
 
-	*seqno = ioread32(engine->buffer->virtual_start + head + 4) + 1;
+	*seqno = ioread32(vaddr + head + 4) + 1;
 	if (INTEL_GEN(dev_priv) >= 8) {
-		offset = ioread32(engine->buffer->virtual_start + head + 12);
+		offset = ioread32(vaddr + head + 12);
 		offset <<= 32;
-		offset = ioread32(engine->buffer->virtual_start + head + 8);
+		offset |= ioread32(vaddr + head + 8);
 	}
 	return semaphore_wait_to_signaller_ring(engine, ipehr, offset);
 }
diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c
index b1074a2..6344999 100644
--- a/drivers/gpu/drm/i915/intel_fbdev.c
+++ b/drivers/gpu/drm/i915/intel_fbdev.c
@@ -189,7 +189,7 @@
 	struct i915_vma *vma;
 	struct drm_i915_gem_object *obj;
 	bool prealloc = false;
-	void *vaddr;
+	void __iomem *vaddr;
 	int ret;
 
 	if (intel_fb &&
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index 938e3ee..aa3ac02 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -81,8 +81,9 @@
  */
 static inline void intel_logical_ring_advance(struct intel_ringbuffer *ringbuf)
 {
-	ringbuf->tail &= ringbuf->size - 1;
+	__intel_ringbuffer_advance(ringbuf);
 }
+
 /**
  * intel_logical_ring_emit() - write a DWORD to the ringbuffer.
  * @ringbuf: Ringbuffer to write to.
@@ -91,8 +92,7 @@
 static inline void intel_logical_ring_emit(struct intel_ringbuffer *ringbuf,
 					   u32 data)
 {
-	iowrite32(data, ringbuf->virtual_start + ringbuf->tail);
-	ringbuf->tail += 4;
+	__intel_ringbuffer_emit(ringbuf, data);
 }
 static inline void intel_logical_ring_emit_reg(struct intel_ringbuffer *ringbuf,
 					       i915_reg_t reg)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 9c0a0b4..afed24a 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -2021,7 +2021,8 @@
 		/* Access through the GTT requires the device to be awake. */
 		assert_rpm_wakelock_held(dev_priv);
 
-		addr = i915_vma_pin_iomap(i915_gem_obj_to_ggtt(obj));
+		addr = (void __force *)
+			i915_vma_pin_iomap(i915_gem_obj_to_ggtt(obj));
 		if (IS_ERR(addr)) {
 			ret = PTR_ERR(addr);
 			goto err_unpin;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 5cbafc0..d1b2d9b 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -84,7 +84,7 @@
 
 struct intel_ringbuffer {
 	struct drm_i915_gem_object *obj;
-	void __iomem *virtual_start;
+	void *virtual_start;
 	struct i915_vma *vma;
 
 	struct intel_engine_cs *engine;
@@ -453,23 +453,35 @@
 
 int __must_check intel_ring_begin(struct drm_i915_gem_request *req, int n);
 int __must_check intel_ring_cacheline_align(struct drm_i915_gem_request *req);
-static inline void intel_ring_emit(struct intel_engine_cs *engine,
-				   u32 data)
+
+static inline void __intel_ringbuffer_emit(struct intel_ringbuffer *rb,
+					   u32 data)
 {
-	struct intel_ringbuffer *ringbuf = engine->buffer;
-	iowrite32(data, ringbuf->virtual_start + ringbuf->tail);
-	ringbuf->tail += 4;
+	*(uint32_t *)(rb->virtual_start + rb->tail) = data;
+	rb->tail += 4;
 }
+
+static inline void __intel_ringbuffer_advance(struct intel_ringbuffer *rb)
+{
+	rb->tail &= rb->size - 1;
+}
+
+static inline void intel_ring_emit(struct intel_engine_cs *engine, u32 data)
+{
+	__intel_ringbuffer_emit(engine->buffer, data);
+}
+
 static inline void intel_ring_emit_reg(struct intel_engine_cs *engine,
 				       i915_reg_t reg)
 {
 	intel_ring_emit(engine, i915_mmio_reg_offset(reg));
 }
+
 static inline void intel_ring_advance(struct intel_engine_cs *engine)
 {
-	struct intel_ringbuffer *ringbuf = engine->buffer;
-	ringbuf->tail &= ringbuf->size - 1;
+	__intel_ringbuffer_advance(engine->buffer);
 }
+
 int __intel_ring_space(int head, int tail, int size);
 void intel_ring_update_space(struct intel_ringbuffer *ringbuf);