drm/nouveau/fence: convert to exec engine, and improve channel sync

Now have a somewhat simpler semaphore sync implementation for nv17:nv84,
and a switched to using semaphores as fences on nv84+ and making use of
the hardware's >= acquire operation.

Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
diff --git a/drivers/gpu/drm/nouveau/Makefile b/drivers/gpu/drm/nouveau/Makefile
index ba3e57c..ce222eb 100644
--- a/drivers/gpu/drm/nouveau/Makefile
+++ b/drivers/gpu/drm/nouveau/Makefile
@@ -18,6 +18,7 @@
              nv50_fb.o nvc0_fb.o \
              nv04_fifo.o nv10_fifo.o nv40_fifo.o nv50_fifo.o nvc0_fifo.o \
              nve0_fifo.o \
+             nv04_fence.o nv10_fence.o nv84_fence.o nvc0_fence.o \
              nv04_software.o nv50_software.o nvc0_software.o \
              nv04_graph.o nv10_graph.o nv20_graph.o \
              nv40_graph.o nv50_graph.o nvc0_graph.o nve0_graph.o \
diff --git a/drivers/gpu/drm/nouveau/nouveau_channel.c b/drivers/gpu/drm/nouveau/nouveau_channel.c
index 694f632..d25dc24 100644
--- a/drivers/gpu/drm/nouveau/nouveau_channel.c
+++ b/drivers/gpu/drm/nouveau/nouveau_channel.c
@@ -119,6 +119,7 @@
 		      struct drm_file *file_priv,
 		      uint32_t vram_handle, uint32_t gart_handle)
 {
+	struct nouveau_exec_engine *fence = nv_engine(dev, NVOBJ_ENGINE_FENCE);
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo;
 	struct nouveau_fpriv *fpriv = nouveau_fpriv(file_priv);
@@ -157,8 +158,6 @@
 	}
 
 	NV_DEBUG(dev, "initialising channel %d\n", chan->id);
-	INIT_LIST_HEAD(&chan->fence.pending);
-	spin_lock_init(&chan->fence.lock);
 
 	/* setup channel's memory and vm */
 	ret = nouveau_gpuobj_channel_init(chan, vram_handle, gart_handle);
@@ -188,7 +187,7 @@
 	chan->user_put = 0x40;
 	chan->user_get = 0x44;
 	if (dev_priv->card_type >= NV_50)
-                chan->user_get_hi = 0x60;
+		chan->user_get_hi = 0x60;
 
 	/* disable the fifo caches */
 	pfifo->reassign(dev, false);
@@ -211,7 +210,6 @@
 
 	for (i = 0; i < NOUVEAU_DMA_SKIPS; i++)
 		OUT_RING  (chan, 0x00000000);
-	FIRE_RING(chan);
 
 	ret = nouveau_gpuobj_gr_new(chan, NvSw, nouveau_software_class(dev));
 	if (ret) {
@@ -219,7 +217,21 @@
 		return ret;
 	}
 
-	ret = nouveau_fence_channel_init(chan);
+	if (dev_priv->card_type < NV_C0) {
+		ret = RING_SPACE(chan, 2);
+		if (ret) {
+			nouveau_channel_put(&chan);
+			return ret;
+		}
+
+		BEGIN_NV04(chan, NvSubSw, NV01_SUBCHAN_OBJECT, 1);
+		OUT_RING  (chan, NvSw);
+		FIRE_RING (chan);
+	}
+
+	FIRE_RING(chan);
+
+	ret = fence->context_new(chan, NVOBJ_ENGINE_FENCE);
 	if (ret) {
 		nouveau_channel_put(&chan);
 		return ret;
@@ -291,12 +303,6 @@
 	/* give it chance to idle */
 	nouveau_channel_idle(chan);
 
-	/* ensure all outstanding fences are signaled.  they should be if the
-	 * above attempts at idling were OK, but if we failed this'll tell TTM
-	 * we're done with the buffers.
-	 */
-	nouveau_fence_channel_fini(chan);
-
 	/* boot it off the hardware */
 	pfifo->reassign(dev, false);
 
@@ -305,6 +311,9 @@
 	for (i = 0; i < NVOBJ_ENGINE_NR; i++) {
 		if (chan->engctx[i])
 			dev_priv->eng[i]->context_del(chan, i);
+		/*XXX: clean this up later, order is important */
+		if (i == NVOBJ_ENGINE_FENCE)
+			pfifo->destroy_context(chan);
 	}
 
 	pfifo->reassign(dev, true);
@@ -367,18 +376,14 @@
 	struct nouveau_fence *fence = NULL;
 	int ret;
 
-	nouveau_fence_update(chan);
-
-	if (chan->fence.sequence != chan->fence.sequence_ack) {
-		ret = nouveau_fence_new(chan, &fence);
-		if (!ret) {
-			ret = nouveau_fence_wait(fence, false, false);
-			nouveau_fence_unref(&fence);
-		}
-
-		if (ret)
-			NV_ERROR(dev, "Failed to idle channel %d.\n", chan->id);
+	ret = nouveau_fence_new(chan, &fence);
+	if (!ret) {
+		ret = nouveau_fence_wait(fence, false, false);
+		nouveau_fence_unref(&fence);
 	}
+
+	if (ret)
+		NV_ERROR(dev, "Failed to idle channel %d.\n", chan->id);
 }
 
 /* cleans up all the fifos from file_priv */
diff --git a/drivers/gpu/drm/nouveau/nouveau_debugfs.c b/drivers/gpu/drm/nouveau/nouveau_debugfs.c
index fa2ec49..188c92b 100644
--- a/drivers/gpu/drm/nouveau/nouveau_debugfs.c
+++ b/drivers/gpu/drm/nouveau/nouveau_debugfs.c
@@ -67,8 +67,6 @@
 			   nvchan_rd32(chan, 0x8c));
 	}
 
-	seq_printf(m, "last fence    : %d\n", chan->fence.sequence);
-	seq_printf(m, "last signalled: %d\n", chan->fence.sequence_ack);
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c
index f9cdc92..69688ef 100644
--- a/drivers/gpu/drm/nouveau/nouveau_display.c
+++ b/drivers/gpu/drm/nouveau/nouveau_display.c
@@ -461,7 +461,7 @@
 		OUT_RING  (chan, 0x00000000);
 	} else {
 		BEGIN_NVC0(chan, 0, NV10_SUBCHAN_REF_CNT, 1);
-		OUT_RING  (chan, ++chan->fence.sequence);
+		OUT_RING  (chan, 0);
 		BEGIN_IMC0(chan, 0, NVSW_SUBCHAN_PAGE_FLIP, 0x0000);
 	}
 	FIRE_RING (chan);
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h
index 43a46f1..79eecf5 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@@ -165,6 +165,7 @@
 #define NVOBJ_ENGINE_PPP	NVOBJ_ENGINE_MPEG
 #define NVOBJ_ENGINE_BSP	6
 #define NVOBJ_ENGINE_VP		7
+#define NVOBJ_ENGINE_FENCE	14
 #define NVOBJ_ENGINE_DISPLAY	15
 #define NVOBJ_ENGINE_NR		16
 
@@ -234,17 +235,6 @@
 	uint32_t user_get_hi;
 	uint32_t user_put;
 
-	/* Fencing */
-	struct {
-		/* lock protects the pending list only */
-		spinlock_t lock;
-		struct list_head pending;
-		uint32_t sequence;
-		uint32_t sequence_ack;
-		atomic_t last_sequence_irq;
-		struct nouveau_vma vma;
-	} fence;
-
 	/* DMA push buffer */
 	struct nouveau_gpuobj *pushbuf;
 	struct nouveau_bo     *pushbuf_bo;
@@ -1443,13 +1433,6 @@
 			       struct nouveau_vma *);
 extern void nouveau_bo_vma_del(struct nouveau_bo *, struct nouveau_vma *);
 
-/* nouveau_fence.c */
-int  nouveau_fence_init(struct drm_device *);
-void nouveau_fence_fini(struct drm_device *);
-int  nouveau_fence_channel_init(struct nouveau_channel *);
-void nouveau_fence_channel_fini(struct nouveau_channel *);
-void nouveau_fence_work(struct nouveau_fence *fence,
-			void (*work)(void *priv, bool signalled), void *priv);
 /* nouveau_gem.c */
 extern int nouveau_gem_new(struct drm_device *, int size, int align,
 			   uint32_t domain, uint32_t tile_mode,
@@ -1746,6 +1729,7 @@
 #define NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_EQUAL                 0x00000001
 #define NV84_SUBCHAN_SEMAPHORE_TRIGGER_WRITE_LONG                    0x00000002
 #define NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_GEQUAL                0x00000004
+#define NVC0_SUBCHAN_SEMAPHORE_TRIGGER_YIELD                         0x00001000
 #define NV84_SUBCHAN_NOTIFY_INTR                                     0x00000020
 #define NV84_SUBCHAN_WRCACHE_FLUSH                                   0x00000024
 #define NV10_SUBCHAN_REF_CNT                                         0x00000050
diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c
index 2c10d54..4ba41a4 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fence.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fence.c
@@ -36,85 +36,71 @@
 #include "nouveau_software.h"
 #include "nouveau_dma.h"
 
-#define USE_REFCNT(dev) (nouveau_private(dev)->chipset >= 0x10)
-#define USE_SEMA(dev) (nouveau_private(dev)->chipset >= 0x17)
+void
+nouveau_fence_context_del(struct nouveau_fence_chan *fctx)
+{
+	struct nouveau_fence *fence, *fnext;
+	spin_lock(&fctx->lock);
+	list_for_each_entry_safe(fence, fnext, &fctx->pending, head) {
+		if (fence->work)
+			fence->work(fence->priv, false);
+		fence->channel = NULL;
+		list_del(&fence->head);
+		nouveau_fence_unref(&fence);
+	}
+	spin_unlock(&fctx->lock);
+}
+
+void
+nouveau_fence_context_new(struct nouveau_fence_chan *fctx)
+{
+	INIT_LIST_HEAD(&fctx->pending);
+	spin_lock_init(&fctx->lock);
+}
 
 void
 nouveau_fence_update(struct nouveau_channel *chan)
 {
 	struct drm_device *dev = chan->dev;
-	struct nouveau_fence *tmp, *fence;
-	uint32_t sequence;
+	struct nouveau_fence_priv *priv = nv_engine(dev, NVOBJ_ENGINE_FENCE);
+	struct nouveau_fence_chan *fctx = chan->engctx[NVOBJ_ENGINE_FENCE];
+	struct nouveau_fence *fence, *fnext;
 
-	spin_lock(&chan->fence.lock);
-
-	/* Fetch the last sequence if the channel is still up and running */
-	if (likely(!list_empty(&chan->fence.pending))) {
-		if (USE_REFCNT(dev))
-			sequence = nvchan_rd32(chan, 0x48);
-		else
-			sequence = atomic_read(&chan->fence.last_sequence_irq);
-
-		if (chan->fence.sequence_ack == sequence)
-			goto out;
-		chan->fence.sequence_ack = sequence;
-	}
-
-	list_for_each_entry_safe(fence, tmp, &chan->fence.pending, head) {
-		if (fence->sequence > chan->fence.sequence_ack)
+	spin_lock(&fctx->lock);
+	list_for_each_entry_safe(fence, fnext, &fctx->pending, head) {
+		if (priv->read(chan) < fence->sequence)
 			break;
 
-		fence->channel = NULL;
-		list_del(&fence->head);
 		if (fence->work)
 			fence->work(fence->priv, true);
-
+		fence->channel = NULL;
+		list_del(&fence->head);
 		nouveau_fence_unref(&fence);
 	}
-
-out:
-	spin_unlock(&chan->fence.lock);
+	spin_unlock(&fctx->lock);
 }
 
 int
 nouveau_fence_emit(struct nouveau_fence *fence, struct nouveau_channel *chan)
 {
 	struct drm_device *dev = chan->dev;
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nouveau_fence_priv *priv = nv_engine(dev, NVOBJ_ENGINE_FENCE);
+	struct nouveau_fence_chan *fctx = chan->engctx[NVOBJ_ENGINE_FENCE];
 	int ret;
 
-	ret = RING_SPACE(chan, 2);
-	if (ret)
-		return ret;
+	fence->channel  = chan;
+	fence->timeout  = jiffies + (3 * DRM_HZ);
+	fence->sequence = ++fctx->sequence;
 
-	if (unlikely(chan->fence.sequence == chan->fence.sequence_ack - 1)) {
-		nouveau_fence_update(chan);
-
-		BUG_ON(chan->fence.sequence ==
-		       chan->fence.sequence_ack - 1);
+	ret = priv->emit(fence);
+	if (!ret) {
+		kref_get(&fence->kref);
+		spin_lock(&fctx->lock);
+		list_add_tail(&fence->head, &fctx->pending);
+		spin_unlock(&fctx->lock);
 	}
 
-	fence->sequence = ++chan->fence.sequence;
-	fence->channel = chan;
-
-	kref_get(&fence->kref);
-	spin_lock(&chan->fence.lock);
-	list_add_tail(&fence->head, &chan->fence.pending);
-	spin_unlock(&chan->fence.lock);
-
-	if (USE_REFCNT(dev)) {
-		if (dev_priv->card_type < NV_C0)
-			BEGIN_NV04(chan, 0, NV10_SUBCHAN_REF_CNT, 1);
-		else
-			BEGIN_NVC0(chan, 0, NV10_SUBCHAN_REF_CNT, 1);
-	} else {
-		BEGIN_NV04(chan, NvSubSw, 0x0150, 1);
-	}
-	OUT_RING (chan, fence->sequence);
-	FIRE_RING(chan);
-	fence->timeout = jiffies + 3 * DRM_HZ;
-
-	return 0;
+	return ret;
 }
 
 bool
@@ -158,6 +144,23 @@
 	return ret;
 }
 
+int
+nouveau_fence_sync(struct nouveau_fence *fence, struct nouveau_channel *chan)
+{
+	struct nouveau_channel *prev = fence ? fence->channel : NULL;
+	struct drm_device *dev = chan->dev;
+	struct nouveau_fence_priv *priv = nv_engine(dev, NVOBJ_ENGINE_FENCE);
+	int ret = 0;
+
+	if (unlikely(prev && prev != chan && !nouveau_fence_done(fence))) {
+		ret = priv->sync(fence, chan);
+		if (unlikely(ret))
+			ret = nouveau_fence_wait(fence, true, false);
+	}
+
+	return ret;
+}
+
 static void
 nouveau_fence_del(struct kref *kref)
 {
@@ -186,6 +189,9 @@
 	struct nouveau_fence *fence;
 	int ret = 0;
 
+	if (unlikely(!chan->engctx[NVOBJ_ENGINE_FENCE]))
+		return -ENODEV;
+
 	fence = kzalloc(sizeof(*fence), GFP_KERNEL);
 	if (!fence)
 		return -ENOMEM;
@@ -200,359 +206,3 @@
 	*pfence = fence;
 	return ret;
 }
-
-struct nouveau_semaphore {
-	struct kref ref;
-	struct drm_device *dev;
-	struct drm_mm_node *mem;
-};
-
-void
-nouveau_fence_work(struct nouveau_fence *fence,
-		   void (*work)(void *priv, bool signalled),
-		   void *priv)
-{
-	if (!fence->channel) {
-		work(priv, true);
-	} else {
-		fence->work = work;
-		fence->priv = priv;
-	}
-}
-
-static struct nouveau_semaphore *
-semaphore_alloc(struct drm_device *dev)
-{
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_semaphore *sema;
-	int size = (dev_priv->chipset < 0x84) ? 4 : 16;
-	int ret, i;
-
-	if (!USE_SEMA(dev))
-		return NULL;
-
-	sema = kmalloc(sizeof(*sema), GFP_KERNEL);
-	if (!sema)
-		goto fail;
-
-	ret = drm_mm_pre_get(&dev_priv->fence.heap);
-	if (ret)
-		goto fail;
-
-	spin_lock(&dev_priv->fence.lock);
-	sema->mem = drm_mm_search_free(&dev_priv->fence.heap, size, 0, 0);
-	if (sema->mem)
-		sema->mem = drm_mm_get_block_atomic(sema->mem, size, 0);
-	spin_unlock(&dev_priv->fence.lock);
-
-	if (!sema->mem)
-		goto fail;
-
-	kref_init(&sema->ref);
-	sema->dev = dev;
-	for (i = sema->mem->start; i < sema->mem->start + size; i += 4)
-		nouveau_bo_wr32(dev_priv->fence.bo, i / 4, 0);
-
-	return sema;
-fail:
-	kfree(sema);
-	return NULL;
-}
-
-static void
-semaphore_free(struct kref *ref)
-{
-	struct nouveau_semaphore *sema =
-		container_of(ref, struct nouveau_semaphore, ref);
-	struct drm_nouveau_private *dev_priv = sema->dev->dev_private;
-
-	spin_lock(&dev_priv->fence.lock);
-	drm_mm_put_block(sema->mem);
-	spin_unlock(&dev_priv->fence.lock);
-
-	kfree(sema);
-}
-
-static void
-semaphore_work(void *priv, bool signalled)
-{
-	struct nouveau_semaphore *sema = priv;
-	struct drm_nouveau_private *dev_priv = sema->dev->dev_private;
-
-	if (unlikely(!signalled))
-		nouveau_bo_wr32(dev_priv->fence.bo, sema->mem->start / 4, 1);
-
-	kref_put(&sema->ref, semaphore_free);
-}
-
-static int
-semaphore_acquire(struct nouveau_channel *chan, struct nouveau_semaphore *sema)
-{
-	struct drm_nouveau_private *dev_priv = chan->dev->dev_private;
-	struct nouveau_fence *fence = NULL;
-	u64 offset = chan->fence.vma.offset + sema->mem->start;
-	int ret;
-
-	if (dev_priv->chipset < 0x84) {
-		ret = RING_SPACE(chan, 4);
-		if (ret)
-			return ret;
-
-		BEGIN_NV04(chan, 0, NV11_SUBCHAN_DMA_SEMAPHORE, 3);
-		OUT_RING  (chan, NvSema);
-		OUT_RING  (chan, offset);
-		OUT_RING  (chan, 1);
-	} else
-	if (dev_priv->chipset < 0xc0) {
-		ret = RING_SPACE(chan, 7);
-		if (ret)
-			return ret;
-
-		BEGIN_NV04(chan, 0, NV11_SUBCHAN_DMA_SEMAPHORE, 1);
-		OUT_RING  (chan, chan->vram_handle);
-		BEGIN_NV04(chan, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4);
-		OUT_RING  (chan, upper_32_bits(offset));
-		OUT_RING  (chan, lower_32_bits(offset));
-		OUT_RING  (chan, 1);
-		OUT_RING  (chan, 1); /* ACQUIRE_EQ */
-	} else {
-		ret = RING_SPACE(chan, 5);
-		if (ret)
-			return ret;
-
-		BEGIN_NVC0(chan, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4);
-		OUT_RING  (chan, upper_32_bits(offset));
-		OUT_RING  (chan, lower_32_bits(offset));
-		OUT_RING  (chan, 1);
-		OUT_RING  (chan, 0x1001); /* ACQUIRE_EQ */
-	}
-
-	/* Delay semaphore destruction until its work is done */
-	ret = nouveau_fence_new(chan, &fence);
-	if (ret)
-		return ret;
-
-	kref_get(&sema->ref);
-	nouveau_fence_work(fence, semaphore_work, sema);
-	nouveau_fence_unref(&fence);
-	return 0;
-}
-
-static int
-semaphore_release(struct nouveau_channel *chan, struct nouveau_semaphore *sema)
-{
-	struct drm_nouveau_private *dev_priv = chan->dev->dev_private;
-	struct nouveau_fence *fence = NULL;
-	u64 offset = chan->fence.vma.offset + sema->mem->start;
-	int ret;
-
-	if (dev_priv->chipset < 0x84) {
-		ret = RING_SPACE(chan, 5);
-		if (ret)
-			return ret;
-
-		BEGIN_NV04(chan, 0, NV11_SUBCHAN_DMA_SEMAPHORE, 2);
-		OUT_RING  (chan, NvSema);
-		OUT_RING  (chan, offset);
-		BEGIN_NV04(chan, 0, NV11_SUBCHAN_SEMAPHORE_RELEASE, 1);
-		OUT_RING  (chan, 1);
-	} else
-	if (dev_priv->chipset < 0xc0) {
-		ret = RING_SPACE(chan, 7);
-		if (ret)
-			return ret;
-
-		BEGIN_NV04(chan, 0, NV11_SUBCHAN_DMA_SEMAPHORE, 1);
-		OUT_RING  (chan, chan->vram_handle);
-		BEGIN_NV04(chan, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4);
-		OUT_RING  (chan, upper_32_bits(offset));
-		OUT_RING  (chan, lower_32_bits(offset));
-		OUT_RING  (chan, 1);
-		OUT_RING  (chan, 2); /* RELEASE */
-	} else {
-		ret = RING_SPACE(chan, 5);
-		if (ret)
-			return ret;
-
-		BEGIN_NVC0(chan, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4);
-		OUT_RING  (chan, upper_32_bits(offset));
-		OUT_RING  (chan, lower_32_bits(offset));
-		OUT_RING  (chan, 1);
-		OUT_RING  (chan, 0x1002); /* RELEASE */
-	}
-
-	/* Delay semaphore destruction until its work is done */
-	ret = nouveau_fence_new(chan, &fence);
-	if (ret)
-		return ret;
-
-	kref_get(&sema->ref);
-	nouveau_fence_work(fence, semaphore_work, sema);
-	nouveau_fence_unref(&fence);
-	return 0;
-}
-
-int
-nouveau_fence_sync(struct nouveau_fence *fence,
-		   struct nouveau_channel *wchan)
-{
-	struct nouveau_channel *chan;
-	struct drm_device *dev = wchan->dev;
-	struct nouveau_semaphore *sema;
-	int ret = 0;
-
-	chan = fence ? nouveau_channel_get_unlocked(fence->channel) : NULL;
-	if (likely(!chan || chan == wchan || nouveau_fence_done(fence)))
-		goto out;
-
-	sema = semaphore_alloc(dev);
-	if (!sema) {
-		/* Early card or broken userspace, fall back to
-		 * software sync. */
-		ret = nouveau_fence_wait(fence, true, false);
-		goto out;
-	}
-
-	/* try to take chan's mutex, if we can't take it right away
-	 * we have to fallback to software sync to prevent locking
-	 * order issues
-	 */
-	if (!mutex_trylock(&chan->mutex)) {
-		ret = nouveau_fence_wait(fence, true, false);
-		goto out_unref;
-	}
-
-	/* Make wchan wait until it gets signalled */
-	ret = semaphore_acquire(wchan, sema);
-	if (ret)
-		goto out_unlock;
-
-	/* Signal the semaphore from chan */
-	ret = semaphore_release(chan, sema);
-
-out_unlock:
-	mutex_unlock(&chan->mutex);
-out_unref:
-	kref_put(&sema->ref, semaphore_free);
-out:
-	if (chan)
-		nouveau_channel_put_unlocked(&chan);
-	return ret;
-}
-
-int
-nouveau_fence_channel_init(struct nouveau_channel *chan)
-{
-	struct drm_device *dev = chan->dev;
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_gpuobj *obj = NULL;
-	int ret;
-
-	if (dev_priv->card_type < NV_C0) {
-		ret = RING_SPACE(chan, 2);
-		if (ret)
-			return ret;
-
-		BEGIN_NV04(chan, NvSubSw, NV01_SUBCHAN_OBJECT, 1);
-		OUT_RING  (chan, NvSw);
-		FIRE_RING (chan);
-	}
-
-	/* Setup area of memory shared between all channels for x-chan sync */
-	if (USE_SEMA(dev) && dev_priv->chipset < 0x84) {
-		struct ttm_mem_reg *mem = &dev_priv->fence.bo->bo.mem;
-
-		ret = nouveau_gpuobj_dma_new(chan, NV_CLASS_DMA_FROM_MEMORY,
-					     mem->start << PAGE_SHIFT,
-					     mem->size, NV_MEM_ACCESS_RW,
-					     NV_MEM_TARGET_VRAM, &obj);
-		if (ret)
-			return ret;
-
-		ret = nouveau_ramht_insert(chan, NvSema, obj);
-		nouveau_gpuobj_ref(NULL, &obj);
-		if (ret)
-			return ret;
-	} else
-	if (USE_SEMA(dev)) {
-		/* map fence bo into channel's vm */
-		ret = nouveau_bo_vma_add(dev_priv->fence.bo, chan->vm,
-					 &chan->fence.vma);
-		if (ret)
-			return ret;
-	}
-
-	atomic_set(&chan->fence.last_sequence_irq, 0);
-	return 0;
-}
-
-void
-nouveau_fence_channel_fini(struct nouveau_channel *chan)
-{
-	struct drm_nouveau_private *dev_priv = chan->dev->dev_private;
-	struct nouveau_fence *tmp, *fence;
-
-	spin_lock(&chan->fence.lock);
-	list_for_each_entry_safe(fence, tmp, &chan->fence.pending, head) {
-		fence->channel = NULL;
-		list_del(&fence->head);
-
-		if (unlikely(fence->work))
-			fence->work(fence->priv, false);
-
-		kref_put(&fence->kref, nouveau_fence_del);
-	}
-	spin_unlock(&chan->fence.lock);
-
-	nouveau_bo_vma_del(dev_priv->fence.bo, &chan->fence.vma);
-}
-
-int
-nouveau_fence_init(struct drm_device *dev)
-{
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	int size = (dev_priv->chipset < 0x84) ? 4096 : 16384;
-	int ret;
-
-	/* Create a shared VRAM heap for cross-channel sync. */
-	if (USE_SEMA(dev)) {
-		ret = nouveau_bo_new(dev, size, 0, TTM_PL_FLAG_VRAM,
-				     0, 0, NULL, &dev_priv->fence.bo);
-		if (ret)
-			return ret;
-
-		ret = nouveau_bo_pin(dev_priv->fence.bo, TTM_PL_FLAG_VRAM);
-		if (ret)
-			goto fail;
-
-		ret = nouveau_bo_map(dev_priv->fence.bo);
-		if (ret)
-			goto fail;
-
-		ret = drm_mm_init(&dev_priv->fence.heap, 0,
-				  dev_priv->fence.bo->bo.mem.size);
-		if (ret)
-			goto fail;
-
-		spin_lock_init(&dev_priv->fence.lock);
-	}
-
-	return 0;
-fail:
-	nouveau_bo_unmap(dev_priv->fence.bo);
-	nouveau_bo_ref(NULL, &dev_priv->fence.bo);
-	return ret;
-}
-
-void
-nouveau_fence_fini(struct drm_device *dev)
-{
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-
-	if (USE_SEMA(dev)) {
-		drm_mm_takedown(&dev_priv->fence.heap);
-		nouveau_bo_unmap(dev_priv->fence.bo);
-		nouveau_bo_unpin(dev_priv->fence.bo);
-		nouveau_bo_ref(NULL, &dev_priv->fence.bo);
-	}
-}
diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.h b/drivers/gpu/drm/nouveau/nouveau_fence.h
index 1337acb..ec9afa7 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fence.h
+++ b/drivers/gpu/drm/nouveau/nouveau_fence.h
@@ -25,4 +25,27 @@
 void nouveau_fence_idle(struct nouveau_channel *);
 void nouveau_fence_update(struct nouveau_channel *);
 
+struct nouveau_fence_chan {
+	struct list_head pending;
+	spinlock_t lock;
+	u32 sequence;
+};
+
+struct nouveau_fence_priv {
+	struct nouveau_exec_engine engine;
+	int (*emit)(struct nouveau_fence *);
+	int (*sync)(struct nouveau_fence *, struct nouveau_channel *);
+	u32 (*read)(struct nouveau_channel *);
+};
+
+void nouveau_fence_context_new(struct nouveau_fence_chan *);
+void nouveau_fence_context_del(struct nouveau_fence_chan *);
+
+int nv04_fence_create(struct drm_device *dev);
+int nv04_fence_mthd(struct nouveau_channel *, u32, u32, u32);
+
+int nv10_fence_create(struct drm_device *dev);
+int nv84_fence_create(struct drm_device *dev);
+int nvc0_fence_create(struct drm_device *dev);
+
 #endif
diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c
index 996755a..30f5423 100644
--- a/drivers/gpu/drm/nouveau/nouveau_gem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_gem.c
@@ -709,7 +709,7 @@
 	}
 
 	if (chan->dma.ib_max) {
-		ret = nouveau_dma_wait(chan, req->nr_push + 1, 6);
+		ret = nouveau_dma_wait(chan, req->nr_push + 1, 16);
 		if (ret) {
 			NV_INFO(dev, "nv50cal_space: %d\n", ret);
 			goto out;
diff --git a/drivers/gpu/drm/nouveau/nouveau_state.c b/drivers/gpu/drm/nouveau/nouveau_state.c
index ed83c42..1039e57 100644
--- a/drivers/gpu/drm/nouveau/nouveau_state.c
+++ b/drivers/gpu/drm/nouveau/nouveau_state.c
@@ -39,6 +39,7 @@
 #include "nouveau_gpio.h"
 #include "nouveau_pm.h"
 #include "nv50_display.h"
+#include "nouveau_fence.h"
 #include "nouveau_software.h"
 
 static void nouveau_stub_takedown(struct drm_device *dev) {}
@@ -768,6 +769,29 @@
 	if (!dev_priv->noaccel) {
 		switch (dev_priv->card_type) {
 		case NV_04:
+			nv04_fence_create(dev);
+			break;
+		case NV_10:
+		case NV_20:
+		case NV_30:
+		case NV_40:
+		case NV_50:
+			if (dev_priv->chipset < 0x84)
+				nv10_fence_create(dev);
+			else
+				nv84_fence_create(dev);
+			break;
+		case NV_C0:
+		case NV_D0:
+		case NV_E0:
+			nvc0_fence_create(dev);
+			break;
+		default:
+			break;
+		}
+
+		switch (dev_priv->card_type) {
+		case NV_04:
 		case NV_10:
 		case NV_20:
 		case NV_30:
@@ -894,14 +918,10 @@
 	nouveau_backlight_init(dev);
 	nouveau_pm_init(dev);
 
-	ret = nouveau_fence_init(dev);
-	if (ret)
-		goto out_pm;
-
 	if (dev_priv->eng[NVOBJ_ENGINE_GR]) {
 		ret = nouveau_card_channel_init(dev);
 		if (ret)
-			goto out_fence;
+			goto out_pm;
 	}
 
 	if (dev->mode_config.num_crtc) {
@@ -916,8 +936,6 @@
 
 out_chan:
 	nouveau_card_channel_fini(dev);
-out_fence:
-	nouveau_fence_fini(dev);
 out_pm:
 	nouveau_pm_fini(dev);
 	nouveau_backlight_exit(dev);
@@ -974,7 +992,6 @@
 	}
 
 	nouveau_card_channel_fini(dev);
-	nouveau_fence_fini(dev);
 	nouveau_pm_fini(dev);
 	nouveau_backlight_exit(dev);
 	nouveau_display_destroy(dev);
diff --git a/drivers/gpu/drm/nouveau/nv04_fence.c b/drivers/gpu/drm/nouveau/nv04_fence.c
new file mode 100644
index 0000000..08bd2ce
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nv04_fence.c
@@ -0,0 +1,139 @@
+/*
+ * Copyright 2012 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+
+#include "drmP.h"
+#include "nouveau_drv.h"
+#include "nouveau_dma.h"
+#include "nouveau_ramht.h"
+#include "nouveau_fence.h"
+
+struct nv04_fence_chan {
+	struct nouveau_fence_chan base;
+	atomic_t sequence;
+};
+
+struct nv04_fence_priv {
+	struct nouveau_fence_priv base;
+};
+
+static int
+nv04_fence_emit(struct nouveau_fence *fence)
+{
+	struct nouveau_channel *chan = fence->channel;
+	int ret = RING_SPACE(chan, 2);
+	if (ret == 0) {
+		BEGIN_NV04(chan, NvSubSw, 0x0150, 1);
+		OUT_RING  (chan, fence->sequence);
+		FIRE_RING (chan);
+	}
+	return ret;
+}
+
+static int
+nv04_fence_sync(struct nouveau_fence *fence, struct nouveau_channel *chan)
+{
+	return -ENODEV;
+}
+
+int
+nv04_fence_mthd(struct nouveau_channel *chan, u32 class, u32 mthd, u32 data)
+{
+	struct nv04_fence_chan *fctx = chan->engctx[NVOBJ_ENGINE_FENCE];
+	atomic_set(&fctx->sequence, data);
+	return 0;
+}
+
+static u32
+nv04_fence_read(struct nouveau_channel *chan)
+{
+	struct nv04_fence_chan *fctx = chan->engctx[NVOBJ_ENGINE_FENCE];
+	return atomic_read(&fctx->sequence);
+}
+
+static void
+nv04_fence_context_del(struct nouveau_channel *chan, int engine)
+{
+	struct nv04_fence_chan *fctx = chan->engctx[engine];
+	nouveau_fence_context_del(&fctx->base);
+	chan->engctx[engine] = NULL;
+	kfree(fctx);
+}
+
+static int
+nv04_fence_context_new(struct nouveau_channel *chan, int engine)
+{
+	struct nv04_fence_chan *fctx = kzalloc(sizeof(*fctx), GFP_KERNEL);
+	if (fctx) {
+		nouveau_fence_context_new(&fctx->base);
+		atomic_set(&fctx->sequence, 0);
+		chan->engctx[engine] = fctx;
+		return 0;
+	}
+	return -ENOMEM;
+}
+
+static int
+nv04_fence_fini(struct drm_device *dev, int engine, bool suspend)
+{
+	return 0;
+}
+
+static int
+nv04_fence_init(struct drm_device *dev, int engine)
+{
+	return 0;
+}
+
+static void
+nv04_fence_destroy(struct drm_device *dev, int engine)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nv04_fence_priv *priv = nv_engine(dev, engine);
+
+	dev_priv->eng[engine] = NULL;
+	kfree(priv);
+}
+
+int
+nv04_fence_create(struct drm_device *dev)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nv04_fence_priv *priv;
+	int ret;
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->base.engine.destroy = nv04_fence_destroy;
+	priv->base.engine.init = nv04_fence_init;
+	priv->base.engine.fini = nv04_fence_fini;
+	priv->base.engine.context_new = nv04_fence_context_new;
+	priv->base.engine.context_del = nv04_fence_context_del;
+	priv->base.emit = nv04_fence_emit;
+	priv->base.sync = nv04_fence_sync;
+	priv->base.read = nv04_fence_read;
+	dev_priv->eng[NVOBJ_ENGINE_FENCE] = &priv->base.engine;
+	return ret;
+}
diff --git a/drivers/gpu/drm/nouveau/nv04_software.c b/drivers/gpu/drm/nouveau/nv04_software.c
index a91cec6..0c41abf 100644
--- a/drivers/gpu/drm/nouveau/nv04_software.c
+++ b/drivers/gpu/drm/nouveau/nv04_software.c
@@ -26,6 +26,7 @@
 
 #include "nouveau_drv.h"
 #include "nouveau_ramht.h"
+#include "nouveau_fence.h"
 #include "nouveau_software.h"
 #include "nouveau_hw.h"
 
@@ -38,13 +39,6 @@
 };
 
 static int
-mthd_fence(struct nouveau_channel *chan, u32 class, u32 mthd, u32 data)
-{
-	atomic_set(&chan->fence.last_sequence_irq, data);
-	return 0;
-}
-
-static int
 mthd_flip(struct nouveau_channel *chan, u32 class, u32 mthd, u32 data)
 {
 
@@ -69,7 +63,6 @@
 		return -ENOMEM;
 
 	nouveau_software_context_new(&pch->base);
-	atomic_set(&chan->fence.last_sequence_irq, 0);
 	chan->engctx[engine] = pch;
 	return 0;
 }
@@ -143,7 +136,7 @@
 	NVOBJ_ENGINE_ADD(dev, SW, &psw->base.base);
 	if (dev_priv->card_type <= NV_04) {
 		NVOBJ_CLASS(dev, 0x006e, SW);
-		NVOBJ_MTHD (dev, 0x006e, 0x0150, mthd_fence);
+		NVOBJ_MTHD (dev, 0x006e, 0x0150, nv04_fence_mthd);
 		NVOBJ_MTHD (dev, 0x006e, 0x0500, mthd_flip);
 	} else {
 		NVOBJ_CLASS(dev, 0x016e, SW);
diff --git a/drivers/gpu/drm/nouveau/nv10_fence.c b/drivers/gpu/drm/nouveau/nv10_fence.c
new file mode 100644
index 0000000..10831ea
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nv10_fence.c
@@ -0,0 +1,212 @@
+/*
+ * Copyright 2012 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs <bskeggs@redhat.com>
+ */
+
+#include "drmP.h"
+#include "nouveau_drv.h"
+#include "nouveau_dma.h"
+#include "nouveau_ramht.h"
+#include "nouveau_fence.h"
+
+struct nv10_fence_chan {
+	struct nouveau_fence_chan base;
+};
+
+struct nv10_fence_priv {
+	struct nouveau_fence_priv base;
+	struct nouveau_bo *bo;
+	spinlock_t lock;
+	u32 sequence;
+};
+
+static int
+nv10_fence_emit(struct nouveau_fence *fence)
+{
+	struct nouveau_channel *chan = fence->channel;
+	int ret = RING_SPACE(chan, 2);
+	if (ret == 0) {
+		BEGIN_NV04(chan, 0, NV10_SUBCHAN_REF_CNT, 1);
+		OUT_RING  (chan, fence->sequence);
+		FIRE_RING (chan);
+	}
+	return ret;
+}
+
+static int
+nv10_fence_sync(struct nouveau_fence *fence, struct nouveau_channel *chan)
+{
+	return -ENODEV;
+}
+
+static int
+nv17_fence_sync(struct nouveau_fence *fence, struct nouveau_channel *chan)
+{
+	struct nv10_fence_priv *priv = nv_engine(chan->dev, NVOBJ_ENGINE_FENCE);
+	struct nouveau_channel *prev = fence->channel;
+	u32 value;
+	int ret;
+
+	if (!mutex_trylock(&prev->mutex))
+		return -EBUSY;
+
+	spin_lock(&priv->lock);
+	value = priv->sequence;
+	priv->sequence += 2;
+	spin_unlock(&priv->lock);
+
+	ret = RING_SPACE(prev, 5);
+	if (!ret) {
+		BEGIN_NV04(prev, 0, NV11_SUBCHAN_DMA_SEMAPHORE, 4);
+		OUT_RING  (prev, NvSema);
+		OUT_RING  (prev, 0);
+		OUT_RING  (prev, value + 0);
+		OUT_RING  (prev, value + 1);
+		FIRE_RING (prev);
+	}
+
+	if (!ret && !(ret = RING_SPACE(chan, 5))) {
+		BEGIN_NV04(chan, 0, NV11_SUBCHAN_DMA_SEMAPHORE, 4);
+		OUT_RING  (chan, NvSema);
+		OUT_RING  (chan, 0);
+		OUT_RING  (chan, value + 1);
+		OUT_RING  (chan, value + 2);
+		FIRE_RING (chan);
+	}
+
+	mutex_unlock(&prev->mutex);
+	return 0;
+}
+
+static u32
+nv10_fence_read(struct nouveau_channel *chan)
+{
+	return nvchan_rd32(chan, 0x0048);
+}
+
+static void
+nv10_fence_context_del(struct nouveau_channel *chan, int engine)
+{
+	struct nv10_fence_chan *fctx = chan->engctx[engine];
+	nouveau_fence_context_del(&fctx->base);
+	chan->engctx[engine] = NULL;
+	kfree(fctx);
+}
+
+static int
+nv10_fence_context_new(struct nouveau_channel *chan, int engine)
+{
+	struct nv10_fence_priv *priv = nv_engine(chan->dev, engine);
+	struct nv10_fence_chan *fctx;
+	struct nouveau_gpuobj *obj;
+	int ret = 0;
+
+	fctx = chan->engctx[engine] = kzalloc(sizeof(*fctx), GFP_KERNEL);
+	if (!fctx)
+		return -ENOMEM;
+
+	nouveau_fence_context_new(&fctx->base);
+
+	if (priv->bo) {
+		struct ttm_mem_reg *mem = &priv->bo->bo.mem;
+
+		ret = nouveau_gpuobj_dma_new(chan, NV_CLASS_DMA_FROM_MEMORY,
+					     mem->start * PAGE_SIZE, mem->size,
+					     NV_MEM_ACCESS_RW,
+					     NV_MEM_TARGET_VRAM, &obj);
+		if (!ret) {
+			ret = nouveau_ramht_insert(chan, NvSema, obj);
+			nouveau_gpuobj_ref(NULL, &obj);
+		}
+	}
+
+	if (ret)
+		nv10_fence_context_del(chan, engine);
+	return ret;
+}
+
+static int
+nv10_fence_fini(struct drm_device *dev, int engine, bool suspend)
+{
+	return 0;
+}
+
+static int
+nv10_fence_init(struct drm_device *dev, int engine)
+{
+	return 0;
+}
+
+static void
+nv10_fence_destroy(struct drm_device *dev, int engine)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nv10_fence_priv *priv = nv_engine(dev, engine);
+
+	nouveau_bo_ref(NULL, &priv->bo);
+	dev_priv->eng[engine] = NULL;
+	kfree(priv);
+}
+
+int
+nv10_fence_create(struct drm_device *dev)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nv10_fence_priv *priv;
+	int ret = 0;
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->base.engine.destroy = nv10_fence_destroy;
+	priv->base.engine.init = nv10_fence_init;
+	priv->base.engine.fini = nv10_fence_fini;
+	priv->base.engine.context_new = nv10_fence_context_new;
+	priv->base.engine.context_del = nv10_fence_context_del;
+	priv->base.emit = nv10_fence_emit;
+	priv->base.read = nv10_fence_read;
+	priv->base.sync = nv10_fence_sync;
+	dev_priv->eng[NVOBJ_ENGINE_FENCE] = &priv->base.engine;
+	spin_lock_init(&priv->lock);
+
+	if (dev_priv->chipset >= 0x17) {
+		ret = nouveau_bo_new(dev, 4096, 0x1000, TTM_PL_FLAG_VRAM,
+				     0, 0x0000, NULL, &priv->bo);
+		if (!ret) {
+			ret = nouveau_bo_pin(priv->bo, TTM_PL_FLAG_VRAM);
+			if (!ret)
+				ret = nouveau_bo_map(priv->bo);
+			if (ret)
+				nouveau_bo_ref(NULL, &priv->bo);
+		}
+
+		if (ret == 0) {
+			nouveau_bo_wr32(priv->bo, 0x000, 0x00000000);
+			priv->base.sync = nv17_fence_sync;
+		}
+	}
+
+	if (ret)
+		nv10_fence_destroy(dev, NVOBJ_ENGINE_FENCE);
+	return ret;
+}
diff --git a/drivers/gpu/drm/nouveau/nv84_fence.c b/drivers/gpu/drm/nouveau/nv84_fence.c
new file mode 100644
index 0000000..d23dbc0
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nv84_fence.c
@@ -0,0 +1,174 @@
+/*
+ * Copyright 2012 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+
+#include "drmP.h"
+#include "nouveau_drv.h"
+#include "nouveau_dma.h"
+#include "nouveau_ramht.h"
+#include "nouveau_fence.h"
+
+struct nv84_fence_chan {
+	struct nouveau_fence_chan base;
+};
+
+struct nv84_fence_priv {
+	struct nouveau_fence_priv base;
+	struct nouveau_gpuobj *mem;
+};
+
+static int
+nv84_fence_emit(struct nouveau_fence *fence)
+{
+	struct nouveau_channel *chan = fence->channel;
+	int ret = RING_SPACE(chan, 7);
+	if (ret == 0) {
+		BEGIN_NV04(chan, 0, NV11_SUBCHAN_DMA_SEMAPHORE, 1);
+		OUT_RING  (chan, NvSema);
+		BEGIN_NV04(chan, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4);
+		OUT_RING  (chan, upper_32_bits(chan->id * 16));
+		OUT_RING  (chan, lower_32_bits(chan->id * 16));
+		OUT_RING  (chan, fence->sequence);
+		OUT_RING  (chan, NV84_SUBCHAN_SEMAPHORE_TRIGGER_WRITE_LONG);
+		FIRE_RING (chan);
+	}
+	return ret;
+}
+
+static int
+nv84_fence_sync(struct nouveau_fence *fence, struct nouveau_channel *chan)
+{
+	int ret = RING_SPACE(chan, 7);
+	if (ret == 0) {
+		BEGIN_NV04(chan, 0, NV11_SUBCHAN_DMA_SEMAPHORE, 1);
+		OUT_RING  (chan, NvSema);
+		BEGIN_NV04(chan, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4);
+		OUT_RING  (chan, upper_32_bits(fence->channel->id * 16));
+		OUT_RING  (chan, lower_32_bits(fence->channel->id * 16));
+		OUT_RING  (chan, fence->sequence);
+		OUT_RING  (chan, NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_GEQUAL);
+		FIRE_RING (chan);
+	}
+	return ret;
+}
+
+static u32
+nv84_fence_read(struct nouveau_channel *chan)
+{
+	struct nv84_fence_priv *priv = nv_engine(chan->dev, NVOBJ_ENGINE_FENCE);
+	return nv_ro32(priv->mem, chan->id * 16);
+}
+
+static void
+nv84_fence_context_del(struct nouveau_channel *chan, int engine)
+{
+	struct nv84_fence_chan *fctx = chan->engctx[engine];
+	nouveau_fence_context_del(&fctx->base);
+	chan->engctx[engine] = NULL;
+	kfree(fctx);
+}
+
+static int
+nv84_fence_context_new(struct nouveau_channel *chan, int engine)
+{
+	struct nv84_fence_priv *priv = nv_engine(chan->dev, engine);
+	struct nv84_fence_chan *fctx;
+	struct nouveau_gpuobj *obj;
+	int ret;
+
+	fctx = chan->engctx[engine] = kzalloc(sizeof(*fctx), GFP_KERNEL);
+	if (!fctx)
+		return -ENOMEM;
+
+	nouveau_fence_context_new(&fctx->base);
+
+	ret = nouveau_gpuobj_dma_new(chan, NV_CLASS_DMA_FROM_MEMORY,
+				     priv->mem->vinst, priv->mem->size,
+				     NV_MEM_ACCESS_RW,
+				     NV_MEM_TARGET_VRAM, &obj);
+	if (ret == 0) {
+		ret = nouveau_ramht_insert(chan, NvSema, obj);
+		nouveau_gpuobj_ref(NULL, &obj);
+		nv_wo32(priv->mem, chan->id * 16, 0x00000000);
+	}
+
+	if (ret)
+		nv84_fence_context_del(chan, engine);
+	return ret;
+}
+
+static int
+nv84_fence_fini(struct drm_device *dev, int engine, bool suspend)
+{
+	return 0;
+}
+
+static int
+nv84_fence_init(struct drm_device *dev, int engine)
+{
+	return 0;
+}
+
+static void
+nv84_fence_destroy(struct drm_device *dev, int engine)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nv84_fence_priv *priv = nv_engine(dev, engine);
+
+	nouveau_gpuobj_ref(NULL, &priv->mem);
+	dev_priv->eng[engine] = NULL;
+	kfree(priv);
+}
+
+int
+nv84_fence_create(struct drm_device *dev)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo;
+	struct nv84_fence_priv *priv;
+	int ret;
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->base.engine.destroy = nv84_fence_destroy;
+	priv->base.engine.init = nv84_fence_init;
+	priv->base.engine.fini = nv84_fence_fini;
+	priv->base.engine.context_new = nv84_fence_context_new;
+	priv->base.engine.context_del = nv84_fence_context_del;
+	priv->base.emit = nv84_fence_emit;
+	priv->base.sync = nv84_fence_sync;
+	priv->base.read = nv84_fence_read;
+	dev_priv->eng[NVOBJ_ENGINE_FENCE] = &priv->base.engine;
+
+	ret = nouveau_gpuobj_new(dev, NULL, 16 * pfifo->channels,
+				 0x1000, 0, &priv->mem);
+	if (ret)
+		goto out;
+
+out:
+	if (ret)
+		nv84_fence_destroy(dev, NVOBJ_ENGINE_FENCE);
+	return ret;
+}
diff --git a/drivers/gpu/drm/nouveau/nvc0_fence.c b/drivers/gpu/drm/nouveau/nvc0_fence.c
new file mode 100644
index 0000000..41545f1
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvc0_fence.c
@@ -0,0 +1,182 @@
+/*
+ * Copyright 2012 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+
+#include "drmP.h"
+#include "nouveau_drv.h"
+#include "nouveau_dma.h"
+#include "nouveau_ramht.h"
+#include "nouveau_fence.h"
+
+struct nvc0_fence_priv {
+	struct nouveau_fence_priv base;
+	struct nouveau_bo *bo;
+};
+
+struct nvc0_fence_chan {
+	struct nouveau_fence_chan base;
+	struct nouveau_vma vma;
+};
+
+static int
+nvc0_fence_emit(struct nouveau_fence *fence)
+{
+	struct nouveau_channel *chan = fence->channel;
+	struct nvc0_fence_chan *fctx = chan->engctx[NVOBJ_ENGINE_FENCE];
+	u64 addr = fctx->vma.offset + chan->id * 16;
+	int ret;
+
+	ret = RING_SPACE(chan, 5);
+	if (ret == 0) {
+		BEGIN_NVC0(chan, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4);
+		OUT_RING  (chan, upper_32_bits(addr));
+		OUT_RING  (chan, lower_32_bits(addr));
+		OUT_RING  (chan, fence->sequence);
+		OUT_RING  (chan, NV84_SUBCHAN_SEMAPHORE_TRIGGER_WRITE_LONG);
+		FIRE_RING (chan);
+	}
+
+	return ret;
+}
+
+static int
+nvc0_fence_sync(struct nouveau_fence *fence, struct nouveau_channel *chan)
+{
+	struct nvc0_fence_chan *fctx = chan->engctx[NVOBJ_ENGINE_FENCE];
+	u64 addr = fctx->vma.offset + fence->channel->id * 16;
+	int ret;
+
+	ret = RING_SPACE(chan, 5);
+	if (ret == 0) {
+		BEGIN_NVC0(chan, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4);
+		OUT_RING  (chan, upper_32_bits(addr));
+		OUT_RING  (chan, lower_32_bits(addr));
+		OUT_RING  (chan, fence->sequence);
+		OUT_RING  (chan, NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_GEQUAL |
+				 NVC0_SUBCHAN_SEMAPHORE_TRIGGER_YIELD);
+		FIRE_RING (chan);
+	}
+
+	return ret;
+}
+
+static u32
+nvc0_fence_read(struct nouveau_channel *chan)
+{
+	struct nvc0_fence_priv *priv = nv_engine(chan->dev, NVOBJ_ENGINE_FENCE);
+	return nouveau_bo_rd32(priv->bo, chan->id * 16/4);
+}
+
+static void
+nvc0_fence_context_del(struct nouveau_channel *chan, int engine)
+{
+	struct nvc0_fence_priv *priv = nv_engine(chan->dev, engine);
+	struct nvc0_fence_chan *fctx = chan->engctx[engine];
+
+	nouveau_bo_vma_del(priv->bo, &fctx->vma);
+	nouveau_fence_context_del(&fctx->base);
+	chan->engctx[engine] = NULL;
+	kfree(fctx);
+}
+
+static int
+nvc0_fence_context_new(struct nouveau_channel *chan, int engine)
+{
+	struct nvc0_fence_priv *priv = nv_engine(chan->dev, engine);
+	struct nvc0_fence_chan *fctx;
+	int ret;
+
+	fctx = chan->engctx[engine] = kzalloc(sizeof(*fctx), GFP_KERNEL);
+	if (!fctx)
+		return -ENOMEM;
+
+	nouveau_fence_context_new(&fctx->base);
+
+	ret = nouveau_bo_vma_add(priv->bo, chan->vm, &fctx->vma);
+	if (ret)
+		nvc0_fence_context_del(chan, engine);
+
+	nouveau_bo_wr32(priv->bo, chan->id * 16/4, 0x00000000);
+	return ret;
+}
+
+static int
+nvc0_fence_fini(struct drm_device *dev, int engine, bool suspend)
+{
+	return 0;
+}
+
+static int
+nvc0_fence_init(struct drm_device *dev, int engine)
+{
+	return 0;
+}
+
+static void
+nvc0_fence_destroy(struct drm_device *dev, int engine)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nvc0_fence_priv *priv = nv_engine(dev, engine);
+
+	nouveau_bo_unmap(priv->bo);
+	nouveau_bo_ref(NULL, &priv->bo);
+	dev_priv->eng[engine] = NULL;
+	kfree(priv);
+}
+
+int
+nvc0_fence_create(struct drm_device *dev)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo;
+	struct nvc0_fence_priv *priv;
+	int ret;
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->base.engine.destroy = nvc0_fence_destroy;
+	priv->base.engine.init = nvc0_fence_init;
+	priv->base.engine.fini = nvc0_fence_fini;
+	priv->base.engine.context_new = nvc0_fence_context_new;
+	priv->base.engine.context_del = nvc0_fence_context_del;
+	priv->base.emit = nvc0_fence_emit;
+	priv->base.sync = nvc0_fence_sync;
+	priv->base.read = nvc0_fence_read;
+	dev_priv->eng[NVOBJ_ENGINE_FENCE] = &priv->base.engine;
+
+	ret = nouveau_bo_new(dev, 16 * pfifo->channels, 0, TTM_PL_FLAG_VRAM,
+			     0, 0, NULL, &priv->bo);
+	if (ret == 0) {
+		ret = nouveau_bo_pin(priv->bo, TTM_PL_FLAG_VRAM);
+		if (ret == 0)
+			ret = nouveau_bo_map(priv->bo);
+		if (ret)
+			nouveau_bo_ref(NULL, &priv->bo);
+	}
+
+	if (ret)
+		nvc0_fence_destroy(dev, NVOBJ_ENGINE_FENCE);
+	return ret;
+}