drm/nouveau/fifo: turn all fifo modules into engine modules

Been tested on each major revision that's relevant here, but I'm sure there
are still bugs waiting to be ironed out.

This is a *very* invasive change.

There's a couple of pieces left that I don't like much (eg. other engines
using fifo_priv for the channel count), but that's an artefact of there
being a master channel list still.  This is changing, slowly.

Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
diff --git a/drivers/gpu/drm/nouveau/Makefile b/drivers/gpu/drm/nouveau/Makefile
index ce222eb..fe5267d 100644
--- a/drivers/gpu/drm/nouveau/Makefile
+++ b/drivers/gpu/drm/nouveau/Makefile
@@ -16,8 +16,8 @@
              nv04_mc.o nv40_mc.o nv50_mc.o \
              nv04_fb.o nv10_fb.o nv20_fb.o nv30_fb.o nv40_fb.o \
              nv50_fb.o nvc0_fb.o \
-             nv04_fifo.o nv10_fifo.o nv40_fifo.o nv50_fifo.o nvc0_fifo.o \
-             nve0_fifo.o \
+             nv04_fifo.o nv10_fifo.o nv17_fifo.o nv40_fifo.o nv50_fifo.o \
+             nv84_fifo.o nvc0_fifo.o nve0_fifo.o \
              nv04_fence.o nv10_fence.o nv84_fence.o nvc0_fence.o \
              nv04_software.o nv50_software.o nvc0_software.o \
              nv04_graph.o nv10_graph.o nv20_graph.o \
diff --git a/drivers/gpu/drm/nouveau/nouveau_channel.c b/drivers/gpu/drm/nouveau/nouveau_channel.c
index a1f5667..9420538 100644
--- a/drivers/gpu/drm/nouveau/nouveau_channel.c
+++ b/drivers/gpu/drm/nouveau/nouveau_channel.c
@@ -27,6 +27,7 @@
 #include "nouveau_drv.h"
 #include "nouveau_drm.h"
 #include "nouveau_dma.h"
+#include "nouveau_fifo.h"
 #include "nouveau_ramht.h"
 #include "nouveau_fence.h"
 #include "nouveau_software.h"
@@ -120,8 +121,8 @@
 		      uint32_t vram_handle, uint32_t gart_handle)
 {
 	struct nouveau_exec_engine *fence = nv_engine(dev, NVOBJ_ENGINE_FENCE);
+	struct nouveau_fifo_priv *pfifo = nv_engine(dev, NVOBJ_ENGINE_FIFO);
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo;
 	struct nouveau_fpriv *fpriv = nouveau_fpriv(file_priv);
 	struct nouveau_channel *chan;
 	unsigned long flags;
@@ -189,20 +190,13 @@
 	if (dev_priv->card_type >= NV_50)
 		chan->user_get_hi = 0x60;
 
-	/* disable the fifo caches */
-	if (dev_priv->card_type < NV_50)
-		nv_wr32(dev, NV03_PFIFO_CACHES, 0);
-
-	/* Construct initial RAMFC for new channel */
-	ret = pfifo->create_context(chan);
+	/* create fifo context */
+	ret = pfifo->base.context_new(chan, NVOBJ_ENGINE_FIFO);
 	if (ret) {
 		nouveau_channel_put(&chan);
 		return ret;
 	}
 
-	if (dev_priv->card_type < NV_50)
-		nv_wr32(dev, NV03_PFIFO_CACHES, 1);
-
 	/* Insert NOPs for NOUVEAU_DMA_SKIPS */
 	ret = RING_SPACE(chan, NOUVEAU_DMA_SKIPS);
 	if (ret) {
@@ -288,7 +282,6 @@
 	struct nouveau_channel *chan = *pchan;
 	struct drm_device *dev = chan->dev;
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo;
 	unsigned long flags;
 	int i;
 
@@ -305,22 +298,12 @@
 	/* give it chance to idle */
 	nouveau_channel_idle(chan);
 
-	/* boot it off the hardware */
-	if (dev_priv->card_type < NV_50)
-		nv_wr32(dev, NV03_PFIFO_CACHES, 0);
-
 	/* destroy the engine specific contexts */
 	for (i = NVOBJ_ENGINE_NR - 1; i >= 0; i--) {
 		if (chan->engctx[i])
 			dev_priv->eng[i]->context_del(chan, i);
-		/*XXX: clean this up later, order is important */
-		if (i == NVOBJ_ENGINE_FENCE)
-			pfifo->destroy_context(chan);
 	}
 
-	if (dev_priv->card_type < NV_50)
-		nv_wr32(dev, NV03_PFIFO_CACHES, 1);
-
 	/* aside from its resources, the channel should now be dead,
 	 * remove it from the channel list
 	 */
@@ -393,13 +376,15 @@
 void
 nouveau_channel_cleanup(struct drm_device *dev, struct drm_file *file_priv)
 {
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_engine *engine = &dev_priv->engine;
+	struct nouveau_fifo_priv *pfifo = nv_engine(dev, NVOBJ_ENGINE_FIFO);
 	struct nouveau_channel *chan;
 	int i;
 
+	if (!pfifo)
+		return;
+
 	NV_DEBUG(dev, "clearing FIFO enables from file_priv\n");
-	for (i = 0; i < engine->fifo.channels; i++) {
+	for (i = 0; i < pfifo->channels; i++) {
 		chan = nouveau_channel_get(file_priv, i);
 		if (IS_ERR(chan))
 			continue;
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.c b/drivers/gpu/drm/nouveau/nouveau_drv.c
index 910b97e..cad254c 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.c
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.c
@@ -33,6 +33,7 @@
 #include "nouveau_fb.h"
 #include "nouveau_fbcon.h"
 #include "nouveau_pm.h"
+#include "nouveau_fifo.h"
 #include "nv50_display.h"
 
 #include "drm_pciids.h"
@@ -175,7 +176,7 @@
 	struct drm_device *dev = pci_get_drvdata(pdev);
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_instmem_engine *pinstmem = &dev_priv->engine.instmem;
-	struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo;
+	struct nouveau_fifo_priv *pfifo = nv_engine(dev, NVOBJ_ENGINE_FIFO);
 	struct nouveau_channel *chan;
 	struct drm_crtc *crtc;
 	int ret, i, e;
@@ -214,21 +215,13 @@
 	ttm_bo_evict_mm(&dev_priv->ttm.bdev, TTM_PL_VRAM);
 
 	NV_INFO(dev, "Idling channels...\n");
-	for (i = 0; i < pfifo->channels; i++) {
+	for (i = 0; i < (pfifo ? pfifo->channels : 0); i++) {
 		chan = dev_priv->channels.ptr[i];
 
 		if (chan && chan->pushbuf_bo)
 			nouveau_channel_idle(chan);
 	}
 
-	if (dev_priv->card_type < NV_50) {
-		nv_wr32(dev, NV03_PFIFO_CACHES, 0);
-		nv_mask(dev, NV04_PFIFO_CACHE1_DMA_PUSH, 0x00000001, 0);
-		nv_wr32(dev, NV03_PFIFO_CACHE1_PUSH0, 0);
-		nv_mask(dev, NV04_PFIFO_CACHE1_PULL0, 0x00000001, 0);
-	}
-	pfifo->unload_context(dev);
-
 	for (e = NVOBJ_ENGINE_NR - 1; e >= 0; e--) {
 		if (!dev_priv->eng[e])
 			continue;
@@ -269,11 +262,6 @@
 		if (dev_priv->eng[e])
 			dev_priv->eng[e]->init(dev, e);
 	}
-	if (dev_priv->card_type < NV_50) {
-		nv_wr32(dev, NV03_PFIFO_CACHE1_PUSH0, 1);
-		nv_wr32(dev, NV04_PFIFO_CACHE1_PULL0, 1);
-		nv_wr32(dev, NV03_PFIFO_CACHES, 1);
-	}
 	return ret;
 }
 
@@ -281,6 +269,7 @@
 nouveau_pci_resume(struct pci_dev *pdev)
 {
 	struct drm_device *dev = pci_get_drvdata(pdev);
+	struct nouveau_fifo_priv *pfifo = nv_engine(dev, NVOBJ_ENGINE_FIFO);
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_engine *engine = &dev_priv->engine;
 	struct drm_crtc *crtc;
@@ -328,7 +317,6 @@
 		if (dev_priv->eng[i])
 			dev_priv->eng[i]->init(dev, i);
 	}
-	engine->fifo.init(dev);
 
 	nouveau_irq_postinstall(dev);
 
@@ -337,7 +325,7 @@
 		struct nouveau_channel *chan;
 		int j;
 
-		for (i = 0; i < dev_priv->engine.fifo.channels; i++) {
+		for (i = 0; i < (pfifo ? pfifo->channels : 0); i++) {
 			chan = dev_priv->channels.ptr[i];
 			if (!chan || !chan->pushbuf_bo)
 				continue;
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h
index 9943ccf..1ede354 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@@ -165,9 +165,10 @@
 #define NVOBJ_ENGINE_PPP	NVOBJ_ENGINE_MPEG
 #define NVOBJ_ENGINE_BSP	6
 #define NVOBJ_ENGINE_VP		7
-#define NVOBJ_ENGINE_FENCE	14
-#define NVOBJ_ENGINE_DISPLAY	15
+#define NVOBJ_ENGINE_FIFO	14
+#define NVOBJ_ENGINE_FENCE	15
 #define NVOBJ_ENGINE_NR		16
+#define NVOBJ_ENGINE_DISPLAY	(NVOBJ_ENGINE_NR + 0) /*XXX*/
 
 #define NVOBJ_FLAG_DONT_MAP             (1 << 0)
 #define NVOBJ_FLAG_ZERO_ALLOC		(1 << 1)
@@ -248,8 +249,6 @@
 
 	/* PFIFO context */
 	struct nouveau_gpuobj *ramfc;
-	struct nouveau_gpuobj *cache;
-	void *fifo_priv;
 
 	/* Execution engine contexts */
 	void *engctx[NVOBJ_ENGINE_NR];
@@ -283,8 +282,6 @@
 		int ib_put;
 	} dma;
 
-	uint32_t sw_subchannel[8];
-
 	struct {
 		bool active;
 		char name[32];
@@ -347,23 +344,6 @@
 	void (*free_tile_region)(struct drm_device *dev, int i);
 };
 
-struct nouveau_fifo_engine {
-	void *priv;
-	int  channels;
-
-	struct nouveau_gpuobj *playlist[2];
-	int cur_playlist;
-
-	int  (*init)(struct drm_device *);
-	void (*takedown)(struct drm_device *);
-
-	int  (*create_context)(struct nouveau_channel *);
-	void (*destroy_context)(struct nouveau_channel *);
-	int  (*load_context)(struct nouveau_channel *);
-	int  (*unload_context)(struct drm_device *);
-	void (*tlb_flush)(struct drm_device *dev);
-};
-
 struct nouveau_display_engine {
 	void *priv;
 	int (*early_init)(struct drm_device *);
@@ -571,7 +551,6 @@
 	struct nouveau_mc_engine      mc;
 	struct nouveau_timer_engine   timer;
 	struct nouveau_fb_engine      fb;
-	struct nouveau_fifo_engine    fifo;
 	struct nouveau_display_engine display;
 	struct nouveau_gpio_engine    gpio;
 	struct nouveau_pm_engine      pm;
@@ -1183,52 +1162,6 @@
 extern int  nvc0_fb_init(struct drm_device *);
 extern void nvc0_fb_takedown(struct drm_device *);
 
-/* nv04_fifo.c */
-extern int  nv04_fifo_init(struct drm_device *);
-extern void nv04_fifo_fini(struct drm_device *);
-extern int  nv04_fifo_create_context(struct nouveau_channel *);
-extern void nv04_fifo_destroy_context(struct nouveau_channel *);
-extern int  nv04_fifo_load_context(struct nouveau_channel *);
-extern int  nv04_fifo_unload_context(struct drm_device *);
-extern void nv04_fifo_isr(struct drm_device *);
-bool nv04_fifo_cache_pull(struct drm_device *, bool enable);
-
-/* nv10_fifo.c */
-extern int  nv10_fifo_init(struct drm_device *);
-extern int  nv10_fifo_create_context(struct nouveau_channel *);
-extern int  nv10_fifo_load_context(struct nouveau_channel *);
-extern int  nv10_fifo_unload_context(struct drm_device *);
-
-/* nv40_fifo.c */
-extern int  nv40_fifo_init(struct drm_device *);
-extern int  nv40_fifo_create_context(struct nouveau_channel *);
-extern int  nv40_fifo_load_context(struct nouveau_channel *);
-extern int  nv40_fifo_unload_context(struct drm_device *);
-
-/* nv50_fifo.c */
-extern int  nv50_fifo_init(struct drm_device *);
-extern void nv50_fifo_takedown(struct drm_device *);
-extern int  nv50_fifo_create_context(struct nouveau_channel *);
-extern void nv50_fifo_destroy_context(struct nouveau_channel *);
-extern int  nv50_fifo_load_context(struct nouveau_channel *);
-extern int  nv50_fifo_unload_context(struct drm_device *);
-extern void nv50_fifo_tlb_flush(struct drm_device *dev);
-
-/* nvc0_fifo.c */
-extern int  nvc0_fifo_init(struct drm_device *);
-extern void nvc0_fifo_takedown(struct drm_device *);
-extern int  nvc0_fifo_create_context(struct nouveau_channel *);
-extern void nvc0_fifo_destroy_context(struct nouveau_channel *);
-extern int  nvc0_fifo_load_context(struct nouveau_channel *);
-extern int  nvc0_fifo_unload_context(struct drm_device *);
-
-/* nve0_fifo.c */
-extern int  nve0_fifo_init(struct drm_device *);
-extern void nve0_fifo_takedown(struct drm_device *);
-extern int  nve0_fifo_create_context(struct nouveau_channel *);
-extern void nve0_fifo_destroy_context(struct nouveau_channel *);
-extern int  nve0_fifo_unload_context(struct drm_device *);
-
 /* nv04_graph.c */
 extern int  nv04_graph_create(struct drm_device *);
 extern int  nv04_graph_object_new(struct nouveau_channel *, int, u32, u16);
diff --git a/drivers/gpu/drm/nouveau/nouveau_fifo.h b/drivers/gpu/drm/nouveau/nouveau_fifo.h
new file mode 100644
index 0000000..ce99cab
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nouveau_fifo.h
@@ -0,0 +1,32 @@
+#ifndef __NOUVEAU_FIFO_H__
+#define __NOUVEAU_FIFO_H__
+
+struct nouveau_fifo_priv {
+	struct nouveau_exec_engine base;
+	u32 channels;
+};
+
+struct nouveau_fifo_chan {
+};
+
+bool nv04_fifo_cache_pull(struct drm_device *, bool);
+void nv04_fifo_context_del(struct nouveau_channel *, int);
+int  nv04_fifo_fini(struct drm_device *, int, bool);
+int  nv04_fifo_init(struct drm_device *, int);
+void nv04_fifo_isr(struct drm_device *);
+void nv04_fifo_destroy(struct drm_device *, int);
+
+void nv50_fifo_playlist_update(struct drm_device *);
+void nv50_fifo_destroy(struct drm_device *, int);
+void nv50_fifo_tlb_flush(struct drm_device *, int);
+
+int  nv04_fifo_create(struct drm_device *);
+int  nv10_fifo_create(struct drm_device *);
+int  nv17_fifo_create(struct drm_device *);
+int  nv40_fifo_create(struct drm_device *);
+int  nv50_fifo_create(struct drm_device *);
+int  nv84_fifo_create(struct drm_device *);
+int  nvc0_fifo_create(struct drm_device *);
+int  nve0_fifo_create(struct drm_device *);
+
+#endif
diff --git a/drivers/gpu/drm/nouveau/nouveau_mem.c b/drivers/gpu/drm/nouveau/nouveau_mem.c
index fd72734..5b498ea 100644
--- a/drivers/gpu/drm/nouveau/nouveau_mem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_mem.c
@@ -39,6 +39,7 @@
 #include "nouveau_pm.h"
 #include "nouveau_mm.h"
 #include "nouveau_vm.h"
+#include "nouveau_fifo.h"
 #include "nouveau_fence.h"
 
 /*
diff --git a/drivers/gpu/drm/nouveau/nouveau_object.c b/drivers/gpu/drm/nouveau/nouveau_object.c
index 8e0b38f..d7e56ce 100644
--- a/drivers/gpu/drm/nouveau/nouveau_object.c
+++ b/drivers/gpu/drm/nouveau/nouveau_object.c
@@ -34,6 +34,7 @@
 #include "drm.h"
 #include "nouveau_drv.h"
 #include "nouveau_drm.h"
+#include "nouveau_fifo.h"
 #include "nouveau_ramht.h"
 #include "nouveau_software.h"
 #include "nouveau_vm.h"
@@ -120,12 +121,13 @@
 			  u32 class, u32 mthd, u32 data)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nouveau_fifo_priv *pfifo = nv_engine(dev, NVOBJ_ENGINE_FIFO);
 	struct nouveau_channel *chan = NULL;
 	unsigned long flags;
 	int ret = -EINVAL;
 
 	spin_lock_irqsave(&dev_priv->channels.lock, flags);
-	if (chid >= 0 && chid < dev_priv->engine.fifo.channels)
+	if (chid >= 0 && chid < pfifo->channels)
 		chan = dev_priv->channels.ptr[chid];
 	if (chan)
 		ret = nouveau_gpuobj_mthd_call(chan, class, mthd, data);
diff --git a/drivers/gpu/drm/nouveau/nouveau_state.c b/drivers/gpu/drm/nouveau/nouveau_state.c
index 660a033..e4e73a1 100644
--- a/drivers/gpu/drm/nouveau/nouveau_state.c
+++ b/drivers/gpu/drm/nouveau/nouveau_state.c
@@ -39,6 +39,7 @@
 #include "nouveau_gpio.h"
 #include "nouveau_pm.h"
 #include "nv50_display.h"
+#include "nouveau_fifo.h"
 #include "nouveau_fence.h"
 #include "nouveau_software.h"
 
@@ -68,13 +69,6 @@
 		engine->timer.takedown		= nv04_timer_takedown;
 		engine->fb.init			= nv04_fb_init;
 		engine->fb.takedown		= nv04_fb_takedown;
-		engine->fifo.channels		= 16;
-		engine->fifo.init		= nv04_fifo_init;
-		engine->fifo.takedown		= nv04_fifo_fini;
-		engine->fifo.create_context	= nv04_fifo_create_context;
-		engine->fifo.destroy_context	= nv04_fifo_destroy_context;
-		engine->fifo.load_context	= nv04_fifo_load_context;
-		engine->fifo.unload_context	= nv04_fifo_unload_context;
 		engine->display.early_init	= nv04_display_early_init;
 		engine->display.late_takedown	= nv04_display_late_takedown;
 		engine->display.create		= nv04_display_create;
@@ -108,13 +102,6 @@
 		engine->fb.init_tile_region	= nv10_fb_init_tile_region;
 		engine->fb.set_tile_region	= nv10_fb_set_tile_region;
 		engine->fb.free_tile_region	= nv10_fb_free_tile_region;
-		engine->fifo.channels		= 32;
-		engine->fifo.init		= nv10_fifo_init;
-		engine->fifo.takedown		= nv04_fifo_fini;
-		engine->fifo.create_context	= nv10_fifo_create_context;
-		engine->fifo.destroy_context	= nv04_fifo_destroy_context;
-		engine->fifo.load_context	= nv10_fifo_load_context;
-		engine->fifo.unload_context	= nv10_fifo_unload_context;
 		engine->display.early_init	= nv04_display_early_init;
 		engine->display.late_takedown	= nv04_display_late_takedown;
 		engine->display.create		= nv04_display_create;
@@ -154,13 +141,6 @@
 		engine->fb.init_tile_region	= nv20_fb_init_tile_region;
 		engine->fb.set_tile_region	= nv20_fb_set_tile_region;
 		engine->fb.free_tile_region	= nv20_fb_free_tile_region;
-		engine->fifo.channels		= 32;
-		engine->fifo.init		= nv10_fifo_init;
-		engine->fifo.takedown		= nv04_fifo_fini;
-		engine->fifo.create_context	= nv10_fifo_create_context;
-		engine->fifo.destroy_context	= nv04_fifo_destroy_context;
-		engine->fifo.load_context	= nv10_fifo_load_context;
-		engine->fifo.unload_context	= nv10_fifo_unload_context;
 		engine->display.early_init	= nv04_display_early_init;
 		engine->display.late_takedown	= nv04_display_late_takedown;
 		engine->display.create		= nv04_display_create;
@@ -196,13 +176,6 @@
 		engine->fb.init_tile_region	= nv30_fb_init_tile_region;
 		engine->fb.set_tile_region	= nv10_fb_set_tile_region;
 		engine->fb.free_tile_region	= nv30_fb_free_tile_region;
-		engine->fifo.channels		= 32;
-		engine->fifo.init		= nv10_fifo_init;
-		engine->fifo.takedown		= nv04_fifo_fini;
-		engine->fifo.create_context	= nv10_fifo_create_context;
-		engine->fifo.destroy_context	= nv04_fifo_destroy_context;
-		engine->fifo.load_context	= nv10_fifo_load_context;
-		engine->fifo.unload_context	= nv10_fifo_unload_context;
 		engine->display.early_init	= nv04_display_early_init;
 		engine->display.late_takedown	= nv04_display_late_takedown;
 		engine->display.create		= nv04_display_create;
@@ -241,13 +214,6 @@
 		engine->fb.init_tile_region	= nv30_fb_init_tile_region;
 		engine->fb.set_tile_region	= nv40_fb_set_tile_region;
 		engine->fb.free_tile_region	= nv30_fb_free_tile_region;
-		engine->fifo.channels		= 32;
-		engine->fifo.init		= nv40_fifo_init;
-		engine->fifo.takedown		= nv04_fifo_fini;
-		engine->fifo.create_context	= nv40_fifo_create_context;
-		engine->fifo.destroy_context	= nv04_fifo_destroy_context;
-		engine->fifo.load_context	= nv40_fifo_load_context;
-		engine->fifo.unload_context	= nv40_fifo_unload_context;
 		engine->display.early_init	= nv04_display_early_init;
 		engine->display.late_takedown	= nv04_display_late_takedown;
 		engine->display.create		= nv04_display_create;
@@ -294,14 +260,6 @@
 		engine->timer.takedown		= nv04_timer_takedown;
 		engine->fb.init			= nv50_fb_init;
 		engine->fb.takedown		= nv50_fb_takedown;
-		engine->fifo.channels		= 128;
-		engine->fifo.init		= nv50_fifo_init;
-		engine->fifo.takedown		= nv50_fifo_takedown;
-		engine->fifo.create_context	= nv50_fifo_create_context;
-		engine->fifo.destroy_context	= nv50_fifo_destroy_context;
-		engine->fifo.load_context	= nv50_fifo_load_context;
-		engine->fifo.unload_context	= nv50_fifo_unload_context;
-		engine->fifo.tlb_flush		= nv50_fifo_tlb_flush;
 		engine->display.early_init	= nv50_display_early_init;
 		engine->display.late_takedown	= nv50_display_late_takedown;
 		engine->display.create		= nv50_display_create;
@@ -365,13 +323,6 @@
 		engine->timer.takedown		= nv04_timer_takedown;
 		engine->fb.init			= nvc0_fb_init;
 		engine->fb.takedown		= nvc0_fb_takedown;
-		engine->fifo.channels		= 128;
-		engine->fifo.init		= nvc0_fifo_init;
-		engine->fifo.takedown		= nvc0_fifo_takedown;
-		engine->fifo.create_context	= nvc0_fifo_create_context;
-		engine->fifo.destroy_context	= nvc0_fifo_destroy_context;
-		engine->fifo.load_context	= nvc0_fifo_load_context;
-		engine->fifo.unload_context	= nvc0_fifo_unload_context;
 		engine->display.early_init	= nv50_display_early_init;
 		engine->display.late_takedown	= nv50_display_late_takedown;
 		engine->display.create		= nv50_display_create;
@@ -414,13 +365,6 @@
 		engine->timer.takedown		= nv04_timer_takedown;
 		engine->fb.init			= nvc0_fb_init;
 		engine->fb.takedown		= nvc0_fb_takedown;
-		engine->fifo.channels		= 128;
-		engine->fifo.init		= nvc0_fifo_init;
-		engine->fifo.takedown		= nvc0_fifo_takedown;
-		engine->fifo.create_context	= nvc0_fifo_create_context;
-		engine->fifo.destroy_context	= nvc0_fifo_destroy_context;
-		engine->fifo.load_context	= nvc0_fifo_load_context;
-		engine->fifo.unload_context	= nvc0_fifo_unload_context;
 		engine->display.early_init	= nouveau_stub_init;
 		engine->display.late_takedown	= nouveau_stub_takedown;
 		engine->display.create		= nvd0_display_create;
@@ -461,13 +405,6 @@
 		engine->timer.takedown		= nv04_timer_takedown;
 		engine->fb.init			= nvc0_fb_init;
 		engine->fb.takedown		= nvc0_fb_takedown;
-		engine->fifo.channels		= 4096;
-		engine->fifo.init		= nve0_fifo_init;
-		engine->fifo.takedown		= nve0_fifo_takedown;
-		engine->fifo.create_context	= nve0_fifo_create_context;
-		engine->fifo.destroy_context	= nve0_fifo_destroy_context;
-		engine->fifo.load_context	= nvc0_fifo_load_context;
-		engine->fifo.unload_context	= nve0_fifo_unload_context;
 		engine->display.early_init	= nouveau_stub_init;
 		engine->display.late_takedown	= nouveau_stub_takedown;
 		engine->display.create		= nvd0_display_create;
@@ -728,6 +665,38 @@
 	if (!dev_priv->noaccel) {
 		switch (dev_priv->card_type) {
 		case NV_04:
+			nv04_fifo_create(dev);
+			break;
+		case NV_10:
+		case NV_20:
+		case NV_30:
+			if (dev_priv->chipset < 0x17)
+				nv10_fifo_create(dev);
+			else
+				nv17_fifo_create(dev);
+			break;
+		case NV_40:
+			nv40_fifo_create(dev);
+			break;
+		case NV_50:
+			if (dev_priv->chipset == 0x50)
+				nv50_fifo_create(dev);
+			else
+				nv84_fifo_create(dev);
+			break;
+		case NV_C0:
+		case NV_D0:
+			nvc0_fifo_create(dev);
+			break;
+		case NV_E0:
+			nve0_fifo_create(dev);
+			break;
+		default:
+			break;
+		}
+
+		switch (dev_priv->card_type) {
+		case NV_04:
 			nv04_fence_create(dev);
 			break;
 		case NV_10:
@@ -859,16 +828,11 @@
 					goto out_engine;
 			}
 		}
-
-		/* PFIFO */
-		ret = engine->fifo.init(dev);
-		if (ret)
-			goto out_engine;
 	}
 
 	ret = nouveau_irq_init(dev);
 	if (ret)
-		goto out_fifo;
+		goto out_engine;
 
 	ret = nouveau_display_create(dev);
 	if (ret)
@@ -901,9 +865,6 @@
 	nouveau_display_destroy(dev);
 out_irq:
 	nouveau_irq_fini(dev);
-out_fifo:
-	if (!dev_priv->noaccel)
-		engine->fifo.takedown(dev);
 out_engine:
 	if (!dev_priv->noaccel) {
 		for (e = e - 1; e >= 0; e--) {
@@ -956,7 +917,6 @@
 	nouveau_display_destroy(dev);
 
 	if (!dev_priv->noaccel) {
-		engine->fifo.takedown(dev);
 		for (e = NVOBJ_ENGINE_NR - 1; e >= 0; e--) {
 			if (dev_priv->eng[e]) {
 				dev_priv->eng[e]->fini(dev, e, false);
diff --git a/drivers/gpu/drm/nouveau/nv04_fifo.c b/drivers/gpu/drm/nouveau/nv04_fifo.c
index 584c24d..a6295cd 100644
--- a/drivers/gpu/drm/nouveau/nv04_fifo.c
+++ b/drivers/gpu/drm/nouveau/nv04_fifo.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2007 Ben Skeggs.
+ * Copyright (C) 2012 Ben Skeggs.
  * All Rights Reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining
@@ -27,21 +27,38 @@
 #include "drmP.h"
 #include "drm.h"
 #include "nouveau_drv.h"
-#include "nouveau_ramht.h"
+#include "nouveau_fifo.h"
 #include "nouveau_util.h"
+#include "nouveau_ramht.h"
+#include "nouveau_software.h"
 
-#define NV04_RAMFC(c) (dev_priv->ramfc->pinst + ((c) * NV04_RAMFC__SIZE))
-#define NV04_RAMFC__SIZE 32
-#define NV04_RAMFC_DMA_PUT                                       0x00
-#define NV04_RAMFC_DMA_GET                                       0x04
-#define NV04_RAMFC_DMA_INSTANCE                                  0x08
-#define NV04_RAMFC_DMA_STATE                                     0x0C
-#define NV04_RAMFC_DMA_FETCH                                     0x10
-#define NV04_RAMFC_ENGINE                                        0x14
-#define NV04_RAMFC_PULL1_ENGINE                                  0x18
+static struct ramfc_desc {
+	unsigned bits:6;
+	unsigned ctxs:5;
+	unsigned ctxp:8;
+	unsigned regs:5;
+	unsigned regp;
+} nv04_ramfc[] = {
+	{ 32,  0, 0x00,  0, NV04_PFIFO_CACHE1_DMA_PUT },
+	{ 32,  0, 0x04,  0, NV04_PFIFO_CACHE1_DMA_GET },
+	{ 16,  0, 0x08,  0, NV04_PFIFO_CACHE1_DMA_INSTANCE },
+	{ 16, 16, 0x08,  0, NV04_PFIFO_CACHE1_DMA_DCOUNT },
+	{ 32,  0, 0x0c,  0, NV04_PFIFO_CACHE1_DMA_STATE },
+	{ 32,  0, 0x10,  0, NV04_PFIFO_CACHE1_DMA_FETCH },
+	{ 32,  0, 0x14,  0, NV04_PFIFO_CACHE1_ENGINE },
+	{ 32,  0, 0x18,  0, NV04_PFIFO_CACHE1_PULL1 },
+	{}
+};
 
-#define RAMFC_WR(offset, val) nv_wo32(chan->ramfc, NV04_RAMFC_##offset, (val))
-#define RAMFC_RD(offset)      nv_ro32(chan->ramfc, NV04_RAMFC_##offset)
+struct nv04_fifo_priv {
+	struct nouveau_fifo_priv base;
+	struct ramfc_desc *ramfc_desc;
+};
+
+struct nv04_fifo_chan {
+	struct nouveau_fifo_chan base;
+	struct nouveau_gpuobj *ramfc;
+};
 
 bool
 nv04_fifo_cache_pull(struct drm_device *dev, bool enable)
@@ -58,13 +75,13 @@
 		 * invalidate the most recently calculated instance.
 		 */
 		if (!nv_wait(dev, NV04_PFIFO_CACHE1_PULL0,
-			     NV04_PFIFO_CACHE1_PULL0_HASH_BUSY, 0))
+				  NV04_PFIFO_CACHE1_PULL0_HASH_BUSY, 0))
 			NV_ERROR(dev, "Timeout idling the PFIFO puller.\n");
 
 		if (nv_rd32(dev, NV04_PFIFO_CACHE1_PULL0) &
-		    NV04_PFIFO_CACHE1_PULL0_HASH_FAILED)
+				 NV04_PFIFO_CACHE1_PULL0_HASH_FAILED)
 			nv_wr32(dev, NV03_PFIFO_INTR_0,
-				NV_PFIFO_INTR_CACHE_ERROR);
+				     NV_PFIFO_INTR_CACHE_ERROR);
 
 		nv_wr32(dev, NV04_PFIFO_CACHE1_HASH, 0);
 	}
@@ -72,238 +89,182 @@
 	return pull & 1;
 }
 
-#ifdef __BIG_ENDIAN
-#define DMA_FETCH_ENDIANNESS NV_PFIFO_CACHE1_BIG_ENDIAN
-#else
-#define DMA_FETCH_ENDIANNESS 0
-#endif
-
-int
-nv04_fifo_create_context(struct nouveau_channel *chan)
+static int
+nv04_fifo_context_new(struct nouveau_channel *chan, int engine)
 {
 	struct drm_device *dev = chan->dev;
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nv04_fifo_priv *priv = nv_engine(dev, engine);
+	struct nv04_fifo_chan *fctx;
 	unsigned long flags;
 	int ret;
 
-	ret = nouveau_gpuobj_new_fake(dev, NV04_RAMFC(chan->id), ~0,
-						NV04_RAMFC__SIZE,
-						NVOBJ_FLAG_ZERO_ALLOC |
-						NVOBJ_FLAG_ZERO_FREE,
-						&chan->ramfc);
-	if (ret)
-		return ret;
-
-	chan->user = ioremap(pci_resource_start(dev->pdev, 0) +
-			     NV03_USER(chan->id), PAGE_SIZE);
-	if (!chan->user)
+	fctx = chan->engctx[engine] = kzalloc(sizeof(*fctx), GFP_KERNEL);
+	if (!fctx)
 		return -ENOMEM;
 
+	/* map channel control registers */
+	chan->user = ioremap(pci_resource_start(dev->pdev, 0) +
+			     NV03_USER(chan->id), PAGE_SIZE);
+	if (!chan->user) {
+		ret = -ENOMEM;
+		goto error;
+	}
+
+	/* initialise default fifo context */
+	ret = nouveau_gpuobj_new_fake(dev, dev_priv->ramfc->pinst +
+				      chan->id * 32, ~0, 32,
+				      NVOBJ_FLAG_ZERO_FREE, &fctx->ramfc);
+	if (ret)
+		goto error;
+
+	nv_wo32(fctx->ramfc, 0x00, chan->pushbuf_base);
+	nv_wo32(fctx->ramfc, 0x04, chan->pushbuf_base);
+	nv_wo32(fctx->ramfc, 0x08, chan->pushbuf->pinst >> 4);
+	nv_wo32(fctx->ramfc, 0x0c, 0x00000000);
+	nv_wo32(fctx->ramfc, 0x10, NV_PFIFO_CACHE1_DMA_FETCH_TRIG_128_BYTES |
+				   NV_PFIFO_CACHE1_DMA_FETCH_SIZE_128_BYTES |
+#ifdef __BIG_ENDIAN
+				   NV_PFIFO_CACHE1_BIG_ENDIAN |
+#endif
+				   NV_PFIFO_CACHE1_DMA_FETCH_MAX_REQS_8);
+	nv_wo32(fctx->ramfc, 0x14, 0x00000000);
+	nv_wo32(fctx->ramfc, 0x18, 0x00000000);
+	nv_wo32(fctx->ramfc, 0x1c, 0x00000000);
+
+	/* enable dma mode on the channel */
 	spin_lock_irqsave(&dev_priv->context_switch_lock, flags);
-
-	/* Setup initial state */
-	RAMFC_WR(DMA_PUT, chan->pushbuf_base);
-	RAMFC_WR(DMA_GET, chan->pushbuf_base);
-	RAMFC_WR(DMA_INSTANCE, chan->pushbuf->pinst >> 4);
-	RAMFC_WR(DMA_FETCH, (NV_PFIFO_CACHE1_DMA_FETCH_TRIG_128_BYTES |
-			     NV_PFIFO_CACHE1_DMA_FETCH_SIZE_128_BYTES |
-			     NV_PFIFO_CACHE1_DMA_FETCH_MAX_REQS_8 |
-			     DMA_FETCH_ENDIANNESS));
-
-	/* enable the fifo dma operation */
-	nv_wr32(dev, NV04_PFIFO_MODE,
-		nv_rd32(dev, NV04_PFIFO_MODE) | (1 << chan->id));
-
+	nv_mask(dev, NV04_PFIFO_MODE, (1 << chan->id), (1 << chan->id));
 	spin_unlock_irqrestore(&dev_priv->context_switch_lock, flags);
-	return 0;
+
+error:
+	if (ret)
+		priv->base.base.context_del(chan, engine);
+	return ret;
 }
 
 void
-nv04_fifo_destroy_context(struct nouveau_channel *chan)
+nv04_fifo_context_del(struct nouveau_channel *chan, int engine)
 {
 	struct drm_device *dev = chan->dev;
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo;
+	struct nv04_fifo_priv *priv = nv_engine(chan->dev, engine);
+	struct nv04_fifo_chan *fctx = chan->engctx[engine];
+	struct ramfc_desc *c = priv->ramfc_desc;
 	unsigned long flags;
+	int chid;
 
+	/* prevent fifo context switches */
 	spin_lock_irqsave(&dev_priv->context_switch_lock, flags);
 	nv_wr32(dev, NV03_PFIFO_CACHES, 0);
 
-	/* Unload the context if it's the currently active one */
-	if ((nv_rd32(dev, NV03_PFIFO_CACHE1_PUSH1) & 0xf) == chan->id) {
+	/* if this channel is active, replace it with a null context */
+	chid = nv_rd32(dev, NV03_PFIFO_CACHE1_PUSH1) & priv->base.channels;
+	if (chid == chan->id) {
 		nv_mask(dev, NV04_PFIFO_CACHE1_DMA_PUSH, 0x00000001, 0);
 		nv_wr32(dev, NV03_PFIFO_CACHE1_PUSH0, 0);
 		nv_mask(dev, NV04_PFIFO_CACHE1_PULL0, 0x00000001, 0);
-		pfifo->unload_context(dev);
+
+		do {
+			u32 mask = ((1ULL << c->bits) - 1) << c->regs;
+			nv_mask(dev, c->regp, mask, 0x00000000);
+		} while ((++c)->bits);
+
+		nv_wr32(dev, NV03_PFIFO_CACHE1_GET, 0);
+		nv_wr32(dev, NV03_PFIFO_CACHE1_PUT, 0);
+		nv_wr32(dev, NV03_PFIFO_CACHE1_PUSH1, priv->base.channels);
 		nv_wr32(dev, NV03_PFIFO_CACHE1_PUSH0, 1);
 		nv_wr32(dev, NV04_PFIFO_CACHE1_PULL0, 1);
 	}
 
-	/* Keep it from being rescheduled */
+	/* restore normal operation, after disabling dma mode */
 	nv_mask(dev, NV04_PFIFO_MODE, 1 << chan->id, 0);
 	nv_wr32(dev, NV03_PFIFO_CACHES, 1);
 	spin_unlock_irqrestore(&dev_priv->context_switch_lock, flags);
 
-	/* Free the channel resources */
+	/* clean up */
+	nouveau_gpuobj_ref(NULL, &fctx->ramfc);
+	nouveau_gpuobj_ref(NULL, &chan->ramfc); /*XXX: nv40 */
 	if (chan->user) {
 		iounmap(chan->user);
 		chan->user = NULL;
 	}
-	nouveau_gpuobj_ref(NULL, &chan->ramfc);
-}
-
-static void
-nv04_fifo_do_load_context(struct drm_device *dev, int chid)
-{
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	uint32_t fc = NV04_RAMFC(chid), tmp;
-
-	nv_wr32(dev, NV04_PFIFO_CACHE1_DMA_PUT, nv_ri32(dev, fc + 0));
-	nv_wr32(dev, NV04_PFIFO_CACHE1_DMA_GET, nv_ri32(dev, fc + 4));
-	tmp = nv_ri32(dev, fc + 8);
-	nv_wr32(dev, NV04_PFIFO_CACHE1_DMA_INSTANCE, tmp & 0xFFFF);
-	nv_wr32(dev, NV04_PFIFO_CACHE1_DMA_DCOUNT, tmp >> 16);
-	nv_wr32(dev, NV04_PFIFO_CACHE1_DMA_STATE, nv_ri32(dev, fc + 12));
-	nv_wr32(dev, NV04_PFIFO_CACHE1_DMA_FETCH, nv_ri32(dev, fc + 16));
-	nv_wr32(dev, NV04_PFIFO_CACHE1_ENGINE, nv_ri32(dev, fc + 20));
-	nv_wr32(dev, NV04_PFIFO_CACHE1_PULL1, nv_ri32(dev, fc + 24));
-
-	nv_wr32(dev, NV03_PFIFO_CACHE1_GET, 0);
-	nv_wr32(dev, NV03_PFIFO_CACHE1_PUT, 0);
 }
 
 int
-nv04_fifo_load_context(struct nouveau_channel *chan)
-{
-	uint32_t tmp;
-
-	nv_wr32(chan->dev, NV03_PFIFO_CACHE1_PUSH1,
-			   NV03_PFIFO_CACHE1_PUSH1_DMA | chan->id);
-	nv04_fifo_do_load_context(chan->dev, chan->id);
-	nv_wr32(chan->dev, NV04_PFIFO_CACHE1_DMA_PUSH, 1);
-
-	/* Reset NV04_PFIFO_CACHE1_DMA_CTL_AT_INFO to INVALID */
-	tmp = nv_rd32(chan->dev, NV04_PFIFO_CACHE1_DMA_CTL) & ~(1 << 31);
-	nv_wr32(chan->dev, NV04_PFIFO_CACHE1_DMA_CTL, tmp);
-
-	return 0;
-}
-
-int
-nv04_fifo_unload_context(struct drm_device *dev)
+nv04_fifo_init(struct drm_device *dev, int engine)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo;
-	struct nouveau_channel *chan = NULL;
-	uint32_t tmp;
-	int chid;
+	struct nv04_fifo_priv *priv = nv_engine(dev, engine);
+	int i;
 
-	chid = nv_rd32(dev, NV03_PFIFO_CACHE1_PUSH1) & 0xf;
-	if (chid < 0 || chid >= dev_priv->engine.fifo.channels)
-		return 0;
+	nv_mask(dev, NV03_PMC_ENABLE, NV_PMC_ENABLE_PFIFO, 0);
+	nv_mask(dev, NV03_PMC_ENABLE, NV_PMC_ENABLE_PFIFO, NV_PMC_ENABLE_PFIFO);
 
-	chan = dev_priv->channels.ptr[chid];
-	if (!chan) {
-		NV_ERROR(dev, "Inactive channel on PFIFO: %d\n", chid);
-		return -EINVAL;
-	}
-
-	RAMFC_WR(DMA_PUT, nv_rd32(dev, NV04_PFIFO_CACHE1_DMA_PUT));
-	RAMFC_WR(DMA_GET, nv_rd32(dev, NV04_PFIFO_CACHE1_DMA_GET));
-	tmp  = nv_rd32(dev, NV04_PFIFO_CACHE1_DMA_DCOUNT) << 16;
-	tmp |= nv_rd32(dev, NV04_PFIFO_CACHE1_DMA_INSTANCE);
-	RAMFC_WR(DMA_INSTANCE, tmp);
-	RAMFC_WR(DMA_STATE, nv_rd32(dev, NV04_PFIFO_CACHE1_DMA_STATE));
-	RAMFC_WR(DMA_FETCH, nv_rd32(dev, NV04_PFIFO_CACHE1_DMA_FETCH));
-	RAMFC_WR(ENGINE, nv_rd32(dev, NV04_PFIFO_CACHE1_ENGINE));
-	RAMFC_WR(PULL1_ENGINE, nv_rd32(dev, NV04_PFIFO_CACHE1_PULL1));
-
-	nv04_fifo_do_load_context(dev, pfifo->channels - 1);
-	nv_wr32(dev, NV03_PFIFO_CACHE1_PUSH1, pfifo->channels - 1);
-	return 0;
-}
-
-static void
-nv04_fifo_init_reset(struct drm_device *dev)
-{
-	nv_wr32(dev, NV03_PMC_ENABLE,
-		nv_rd32(dev, NV03_PMC_ENABLE) & ~NV_PMC_ENABLE_PFIFO);
-	nv_wr32(dev, NV03_PMC_ENABLE,
-		nv_rd32(dev, NV03_PMC_ENABLE) |  NV_PMC_ENABLE_PFIFO);
-
-	nv_wr32(dev, 0x003224, 0x000f0078);
-	nv_wr32(dev, 0x002044, 0x0101ffff);
-	nv_wr32(dev, 0x002040, 0x000000ff);
-	nv_wr32(dev, 0x002500, 0x00000000);
-	nv_wr32(dev, 0x003000, 0x00000000);
-	nv_wr32(dev, 0x003050, 0x00000000);
-	nv_wr32(dev, 0x003200, 0x00000000);
-	nv_wr32(dev, 0x003250, 0x00000000);
-	nv_wr32(dev, 0x003220, 0x00000000);
-
-	nv_wr32(dev, 0x003250, 0x00000000);
-	nv_wr32(dev, 0x003270, 0x00000000);
-	nv_wr32(dev, 0x003210, 0x00000000);
-}
-
-static void
-nv04_fifo_init_ramxx(struct drm_device *dev)
-{
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	nv_wr32(dev, NV04_PFIFO_DELAY_0, 0x000000ff);
+	nv_wr32(dev, NV04_PFIFO_DMA_TIMESLICE, 0x0101ffff);
 
 	nv_wr32(dev, NV03_PFIFO_RAMHT, (0x03 << 24) /* search 128 */ |
 				       ((dev_priv->ramht->bits - 9) << 16) |
 				       (dev_priv->ramht->gpuobj->pinst >> 8));
 	nv_wr32(dev, NV03_PFIFO_RAMRO, dev_priv->ramro->pinst >> 8);
 	nv_wr32(dev, NV03_PFIFO_RAMFC, dev_priv->ramfc->pinst >> 8);
-}
 
-static void
-nv04_fifo_init_intr(struct drm_device *dev)
-{
-	nouveau_irq_register(dev, 8, nv04_fifo_isr);
-	nv_wr32(dev, 0x002100, 0xffffffff);
-	nv_wr32(dev, 0x002140, 0xffffffff);
-}
+	nv_wr32(dev, NV03_PFIFO_CACHE1_PUSH1, priv->base.channels);
 
-int
-nv04_fifo_init(struct drm_device *dev)
-{
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo;
-	int i;
+	nv_wr32(dev, NV03_PFIFO_INTR_0, 0xffffffff);
+	nv_wr32(dev, NV03_PFIFO_INTR_EN_0, 0xffffffff);
 
-	nv04_fifo_init_reset(dev);
-	nv04_fifo_init_ramxx(dev);
-
-	nv04_fifo_do_load_context(dev, pfifo->channels - 1);
-	nv_wr32(dev, NV03_PFIFO_CACHE1_PUSH1, pfifo->channels - 1);
-
-	nv04_fifo_init_intr(dev);
 	nv_wr32(dev, NV03_PFIFO_CACHE1_PUSH0, 1);
 	nv_wr32(dev, NV04_PFIFO_CACHE1_PULL0, 1);
 	nv_wr32(dev, NV03_PFIFO_CACHES, 1);
 
-	for (i = 0; i < dev_priv->engine.fifo.channels; i++) {
-		if (dev_priv->channels.ptr[i]) {
-			uint32_t mode = nv_rd32(dev, NV04_PFIFO_MODE);
-			nv_wr32(dev, NV04_PFIFO_MODE, mode | (1 << i));
-		}
+	for (i = 0; i < priv->base.channels; i++) {
+		if (dev_priv->channels.ptr[i])
+			nv_mask(dev, NV04_PFIFO_MODE, (1 << i), (1 << i));
 	}
 
 	return 0;
 }
 
-void
-nv04_fifo_fini(struct drm_device *dev)
+int
+nv04_fifo_fini(struct drm_device *dev, int engine, bool suspend)
 {
-	nv_wr32(dev, 0x2140, 0x00000000);
-	nouveau_irq_unregister(dev, 8);
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nv04_fifo_priv *priv = nv_engine(dev, engine);
+	struct nouveau_channel *chan;
+	int chid;
+
+	/* prevent context switches and halt fifo operation */
+	nv_wr32(dev, NV03_PFIFO_CACHES, 0);
+	nv_wr32(dev, NV04_PFIFO_CACHE1_DMA_PUSH, 0);
+	nv_wr32(dev, NV03_PFIFO_CACHE1_PUSH0, 0);
+	nv_wr32(dev, NV04_PFIFO_CACHE1_PULL0, 0);
+
+	/* store current fifo context in ramfc */
+	chid = nv_rd32(dev, NV03_PFIFO_CACHE1_PUSH1) & priv->base.channels;
+	chan = dev_priv->channels.ptr[chid];
+	if (suspend && chid != priv->base.channels && chan) {
+		struct nv04_fifo_chan *fctx = chan->engctx[engine];
+		struct nouveau_gpuobj *ctx = fctx->ramfc;
+		struct ramfc_desc *c = priv->ramfc_desc;
+		do {
+			u32 rm = ((1ULL << c->bits) - 1) << c->regs;
+			u32 cm = ((1ULL << c->bits) - 1) << c->ctxs;
+			u32 rv = (nv_rd32(dev, c->regp) &  rm) >> c->regs;
+			u32 cv = (nv_ro32(ctx, c->ctxp) & ~cm);
+			nv_wo32(ctx, c->ctxp, cv | (rv << c->ctxs));
+		} while ((++c)->bits);
+	}
+
+	nv_wr32(dev, NV03_PFIFO_INTR_EN_0, 0x00000000);
+	return 0;
 }
 
 static bool
 nouveau_fifo_swmthd(struct drm_device *dev, u32 chid, u32 addr, u32 data)
 {
+	struct nouveau_fifo_priv *pfifo = nv_engine(dev, NVOBJ_ENGINE_FIFO);
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_channel *chan = NULL;
 	struct nouveau_gpuobj *obj;
@@ -314,7 +275,7 @@
 	u32 engine;
 
 	spin_lock_irqsave(&dev_priv->channels.lock, flags);
-	if (likely(chid >= 0 && chid < dev_priv->engine.fifo.channels))
+	if (likely(chid >= 0 && chid < pfifo->channels))
 		chan = dev_priv->channels.ptr[chid];
 	if (unlikely(!chan))
 		goto out;
@@ -325,7 +286,6 @@
 		if (unlikely(!obj || obj->engine != NVOBJ_ENGINE_SW))
 			break;
 
-		chan->sw_subchannel[subc] = obj->class;
 		engine = 0x0000000f << (subc * 4);
 
 		nv_mask(dev, NV04_PFIFO_CACHE1_ENGINE, engine, 0x00000000);
@@ -336,7 +296,7 @@
 		if (unlikely(((engine >> (subc * 4)) & 0xf) != 0))
 			break;
 
-		if (!nouveau_gpuobj_mthd_call(chan, chan->sw_subchannel[subc],
+		if (!nouveau_gpuobj_mthd_call(chan, nouveau_software_class(dev),
 					      mthd, data))
 			handled = true;
 		break;
@@ -359,6 +319,7 @@
 void
 nv04_fifo_isr(struct drm_device *dev)
 {
+	struct nouveau_fifo_priv *pfifo = nv_engine(dev, NVOBJ_ENGINE_FIFO);
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	uint32_t status, reassign;
 	int cnt = 0;
@@ -369,8 +330,7 @@
 
 		nv_wr32(dev, NV03_PFIFO_CACHES, 0);
 
-		chid  = nv_rd32(dev, NV03_PFIFO_CACHE1_PUSH1);
-		chid &= dev_priv->engine.fifo.channels - 1;
+		chid = nv_rd32(dev, NV03_PFIFO_CACHE1_PUSH1) & pfifo->channels;
 		get  = nv_rd32(dev, NV03_PFIFO_CACHE1_GET);
 
 		if (status & NV_PFIFO_INTR_CACHE_ERROR) {
@@ -509,3 +469,38 @@
 
 	nv_wr32(dev, NV03_PMC_INTR_0, NV_PMC_INTR_0_PFIFO_PENDING);
 }
+
+void
+nv04_fifo_destroy(struct drm_device *dev, int engine)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nv04_fifo_priv *priv = nv_engine(dev, engine);
+
+	nouveau_irq_unregister(dev, 8);
+
+	dev_priv->eng[engine] = NULL;
+	kfree(priv);
+}
+
+int
+nv04_fifo_create(struct drm_device *dev)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nv04_fifo_priv *priv;
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->base.base.destroy = nv04_fifo_destroy;
+	priv->base.base.init = nv04_fifo_init;
+	priv->base.base.fini = nv04_fifo_fini;
+	priv->base.base.context_new = nv04_fifo_context_new;
+	priv->base.base.context_del = nv04_fifo_context_del;
+	priv->base.channels = 15;
+	priv->ramfc_desc = nv04_ramfc;
+	dev_priv->eng[NVOBJ_ENGINE_FIFO] = &priv->base.base;
+
+	nouveau_irq_register(dev, 8, nv04_fifo_isr);
+	return 0;
+}
diff --git a/drivers/gpu/drm/nouveau/nv04_graph.c b/drivers/gpu/drm/nouveau/nv04_graph.c
index 5b5f3ba..72f1a62 100644
--- a/drivers/gpu/drm/nouveau/nv04_graph.c
+++ b/drivers/gpu/drm/nouveau/nv04_graph.c
@@ -356,12 +356,12 @@
 nv04_graph_channel(struct drm_device *dev)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	int chid = dev_priv->engine.fifo.channels;
+	int chid = 15;
 
 	if (nv_rd32(dev, NV04_PGRAPH_CTX_CONTROL) & 0x00010000)
 		chid = nv_rd32(dev, NV04_PGRAPH_CTX_USER) >> 24;
 
-	if (chid >= dev_priv->engine.fifo.channels)
+	if (chid > 15)
 		return NULL;
 
 	return dev_priv->channels.ptr[chid];
@@ -404,7 +404,6 @@
 static int
 nv04_graph_unload_context(struct drm_device *dev)
 {
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_channel *chan = NULL;
 	struct graph_state *ctx;
 	uint32_t tmp;
@@ -420,7 +419,7 @@
 
 	nv_wr32(dev, NV04_PGRAPH_CTX_CONTROL, 0x10000000);
 	tmp  = nv_rd32(dev, NV04_PGRAPH_CTX_USER) & 0x00ffffff;
-	tmp |= (dev_priv->engine.fifo.channels - 1) << 24;
+	tmp |= 15 << 24;
 	nv_wr32(dev, NV04_PGRAPH_CTX_USER, tmp);
 	return 0;
 }
@@ -495,7 +494,6 @@
 static int
 nv04_graph_init(struct drm_device *dev, int engine)
 {
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	uint32_t tmp;
 
 	nv_wr32(dev, NV03_PMC_ENABLE, nv_rd32(dev, NV03_PMC_ENABLE) &
@@ -527,7 +525,7 @@
 	nv_wr32(dev, NV04_PGRAPH_STATE        , 0xFFFFFFFF);
 	nv_wr32(dev, NV04_PGRAPH_CTX_CONTROL  , 0x10000100);
 	tmp  = nv_rd32(dev, NV04_PGRAPH_CTX_USER) & 0x00ffffff;
-	tmp |= (dev_priv->engine.fifo.channels - 1) << 24;
+	tmp |= 15 << 24;
 	nv_wr32(dev, NV04_PGRAPH_CTX_USER, tmp);
 
 	/* These don't belong here, they're part of a per-channel context */
diff --git a/drivers/gpu/drm/nouveau/nv04_instmem.c b/drivers/gpu/drm/nouveau/nv04_instmem.c
index 1acc626..ef7a934 100644
--- a/drivers/gpu/drm/nouveau/nv04_instmem.c
+++ b/drivers/gpu/drm/nouveau/nv04_instmem.c
@@ -1,6 +1,8 @@
 #include "drmP.h"
 #include "drm.h"
+
 #include "nouveau_drv.h"
+#include "nouveau_fifo.h"
 #include "nouveau_ramht.h"
 
 /* returns the size of fifo context */
@@ -10,12 +12,15 @@
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 
 	if (dev_priv->chipset >= 0x40)
-		return 128;
+		return 128 * 32;
 	else
 	if (dev_priv->chipset >= 0x17)
-		return 64;
+		return 64 * 32;
+	else
+	if (dev_priv->chipset >= 0x10)
+		return 32 * 32;
 
-	return 32;
+	return 32 * 16;
 }
 
 int nv04_instmem_init(struct drm_device *dev)
@@ -39,7 +44,7 @@
 		else if (nv44_graph_class(dev))	    rsvd = 0x4980 * vs;
 		else				    rsvd = 0x4a40 * vs;
 		rsvd += 16 * 1024;
-		rsvd *= dev_priv->engine.fifo.channels;
+		rsvd *= 32; /* per-channel */
 
 		rsvd += 512 * 1024; /* pci(e)gart table */
 		rsvd += 512 * 1024; /* object storage */
@@ -67,7 +72,7 @@
 		return ret;
 
 	/* And RAMFC */
-	length = dev_priv->engine.fifo.channels * nouveau_fifo_ctx_size(dev);
+	length = nouveau_fifo_ctx_size(dev);
 	switch (dev_priv->card_type) {
 	case NV_40:
 		offset = 0x20000;
diff --git a/drivers/gpu/drm/nouveau/nv10_fifo.c b/drivers/gpu/drm/nouveau/nv10_fifo.c
index 476451c..f1fe7d7 100644
--- a/drivers/gpu/drm/nouveau/nv10_fifo.c
+++ b/drivers/gpu/drm/nouveau/nv10_fifo.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2007 Ben Skeggs.
+ * Copyright (C) 2012 Ben Skeggs.
  * All Rights Reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining
@@ -27,214 +27,112 @@
 #include "drmP.h"
 #include "drm.h"
 #include "nouveau_drv.h"
+#include "nouveau_fifo.h"
+#include "nouveau_util.h"
 #include "nouveau_ramht.h"
 
-#define NV10_RAMFC(c) (dev_priv->ramfc->pinst + ((c) * NV10_RAMFC__SIZE))
-#define NV10_RAMFC__SIZE ((dev_priv->chipset) >= 0x17 ? 64 : 32)
+static struct ramfc_desc {
+	unsigned bits:6;
+	unsigned ctxs:5;
+	unsigned ctxp:8;
+	unsigned regs:5;
+	unsigned regp;
+} nv10_ramfc[] = {
+	{ 32,  0, 0x00,  0, NV04_PFIFO_CACHE1_DMA_PUT },
+	{ 32,  0, 0x04,  0, NV04_PFIFO_CACHE1_DMA_GET },
+	{ 32,  0, 0x08,  0, NV10_PFIFO_CACHE1_REF_CNT },
+	{ 16,  0, 0x0c,  0, NV04_PFIFO_CACHE1_DMA_INSTANCE },
+	{ 16, 16, 0x0c,  0, NV04_PFIFO_CACHE1_DMA_DCOUNT },
+	{ 32,  0, 0x10,  0, NV04_PFIFO_CACHE1_DMA_STATE },
+	{ 32,  0, 0x14,  0, NV04_PFIFO_CACHE1_DMA_FETCH },
+	{ 32,  0, 0x18,  0, NV04_PFIFO_CACHE1_ENGINE },
+	{ 32,  0, 0x1c,  0, NV04_PFIFO_CACHE1_PULL1 },
+	{}
+};
 
-int
-nv10_fifo_create_context(struct nouveau_channel *chan)
+struct nv10_fifo_priv {
+	struct nouveau_fifo_priv base;
+	struct ramfc_desc *ramfc_desc;
+};
+
+struct nv10_fifo_chan {
+	struct nouveau_fifo_chan base;
+	struct nouveau_gpuobj *ramfc;
+};
+
+static int
+nv10_fifo_context_new(struct nouveau_channel *chan, int engine)
 {
-	struct drm_nouveau_private *dev_priv = chan->dev->dev_private;
 	struct drm_device *dev = chan->dev;
-	uint32_t fc = NV10_RAMFC(chan->id);
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nv10_fifo_priv *priv = nv_engine(dev, engine);
+	struct nv10_fifo_chan *fctx;
+	unsigned long flags;
 	int ret;
 
-	ret = nouveau_gpuobj_new_fake(dev, NV10_RAMFC(chan->id), ~0,
-				      NV10_RAMFC__SIZE, NVOBJ_FLAG_ZERO_ALLOC |
-				      NVOBJ_FLAG_ZERO_FREE, &chan->ramfc);
-	if (ret)
-		return ret;
-
-	chan->user = ioremap(pci_resource_start(dev->pdev, 0) +
-			     NV03_USER(chan->id), PAGE_SIZE);
-	if (!chan->user)
+	fctx = chan->engctx[engine] = kzalloc(sizeof(*fctx), GFP_KERNEL);
+	if (!fctx)
 		return -ENOMEM;
 
-	/* Fill entries that are seen filled in dumps of nvidia driver just
-	 * after channel's is put into DMA mode
-	 */
-	nv_wi32(dev, fc +  0, chan->pushbuf_base);
-	nv_wi32(dev, fc +  4, chan->pushbuf_base);
-	nv_wi32(dev, fc + 12, chan->pushbuf->pinst >> 4);
-	nv_wi32(dev, fc + 20, NV_PFIFO_CACHE1_DMA_FETCH_TRIG_128_BYTES |
-			      NV_PFIFO_CACHE1_DMA_FETCH_SIZE_128_BYTES |
-			      NV_PFIFO_CACHE1_DMA_FETCH_MAX_REQS_8 |
+	/* map channel control registers */
+	chan->user = ioremap(pci_resource_start(dev->pdev, 0) +
+			     NV03_USER(chan->id), PAGE_SIZE);
+	if (!chan->user) {
+		ret = -ENOMEM;
+		goto error;
+	}
+
+	/* initialise default fifo context */
+	ret = nouveau_gpuobj_new_fake(dev, dev_priv->ramfc->pinst +
+				      chan->id * 32, ~0, 32,
+				      NVOBJ_FLAG_ZERO_FREE, &fctx->ramfc);
+	if (ret)
+		goto error;
+
+	nv_wo32(fctx->ramfc, 0x00, chan->pushbuf_base);
+	nv_wo32(fctx->ramfc, 0x04, chan->pushbuf_base);
+	nv_wo32(fctx->ramfc, 0x08, 0x00000000);
+	nv_wo32(fctx->ramfc, 0x0c, chan->pushbuf->pinst >> 4);
+	nv_wo32(fctx->ramfc, 0x10, 0x00000000);
+	nv_wo32(fctx->ramfc, 0x14, NV_PFIFO_CACHE1_DMA_FETCH_TRIG_128_BYTES |
+				   NV_PFIFO_CACHE1_DMA_FETCH_SIZE_128_BYTES |
 #ifdef __BIG_ENDIAN
-			      NV_PFIFO_CACHE1_BIG_ENDIAN |
+				   NV_PFIFO_CACHE1_BIG_ENDIAN |
 #endif
-			      0);
+				   NV_PFIFO_CACHE1_DMA_FETCH_MAX_REQS_8);
+	nv_wo32(fctx->ramfc, 0x18, 0x00000000);
+	nv_wo32(fctx->ramfc, 0x1c, 0x00000000);
 
-	/* enable the fifo dma operation */
-	nv_wr32(dev, NV04_PFIFO_MODE,
-		nv_rd32(dev, NV04_PFIFO_MODE) | (1 << chan->id));
-	return 0;
-}
+	/* enable dma mode on the channel */
+	spin_lock_irqsave(&dev_priv->context_switch_lock, flags);
+	nv_mask(dev, NV04_PFIFO_MODE, (1 << chan->id), (1 << chan->id));
+	spin_unlock_irqrestore(&dev_priv->context_switch_lock, flags);
 
-static void
-nv10_fifo_do_load_context(struct drm_device *dev, int chid)
-{
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	uint32_t fc = NV10_RAMFC(chid), tmp;
-
-	nv_wr32(dev, NV04_PFIFO_CACHE1_DMA_PUT, nv_ri32(dev, fc + 0));
-	nv_wr32(dev, NV04_PFIFO_CACHE1_DMA_GET, nv_ri32(dev, fc + 4));
-	nv_wr32(dev, NV10_PFIFO_CACHE1_REF_CNT, nv_ri32(dev, fc + 8));
-
-	tmp = nv_ri32(dev, fc + 12);
-	nv_wr32(dev, NV04_PFIFO_CACHE1_DMA_INSTANCE, tmp & 0xFFFF);
-	nv_wr32(dev, NV04_PFIFO_CACHE1_DMA_DCOUNT, tmp >> 16);
-
-	nv_wr32(dev, NV04_PFIFO_CACHE1_DMA_STATE, nv_ri32(dev, fc + 16));
-	nv_wr32(dev, NV04_PFIFO_CACHE1_DMA_FETCH, nv_ri32(dev, fc + 20));
-	nv_wr32(dev, NV04_PFIFO_CACHE1_ENGINE, nv_ri32(dev, fc + 24));
-	nv_wr32(dev, NV04_PFIFO_CACHE1_PULL1, nv_ri32(dev, fc + 28));
-
-	if (dev_priv->chipset < 0x17)
-		goto out;
-
-	nv_wr32(dev, NV10_PFIFO_CACHE1_ACQUIRE_VALUE, nv_ri32(dev, fc + 32));
-	tmp = nv_ri32(dev, fc + 36);
-	nv_wr32(dev, NV10_PFIFO_CACHE1_ACQUIRE_TIMESTAMP, tmp);
-	nv_wr32(dev, NV10_PFIFO_CACHE1_ACQUIRE_TIMEOUT, nv_ri32(dev, fc + 40));
-	nv_wr32(dev, NV10_PFIFO_CACHE1_SEMAPHORE, nv_ri32(dev, fc + 44));
-	nv_wr32(dev, NV10_PFIFO_CACHE1_DMA_SUBROUTINE, nv_ri32(dev, fc + 48));
-
-out:
-	nv_wr32(dev, NV03_PFIFO_CACHE1_GET, 0);
-	nv_wr32(dev, NV03_PFIFO_CACHE1_PUT, 0);
+error:
+	if (ret)
+		priv->base.base.context_del(chan, engine);
+	return ret;
 }
 
 int
-nv10_fifo_load_context(struct nouveau_channel *chan)
-{
-	struct drm_device *dev = chan->dev;
-	uint32_t tmp;
-
-	nv10_fifo_do_load_context(dev, chan->id);
-
-	nv_wr32(dev, NV03_PFIFO_CACHE1_PUSH1,
-		     NV03_PFIFO_CACHE1_PUSH1_DMA | chan->id);
-	nv_wr32(dev, NV04_PFIFO_CACHE1_DMA_PUSH, 1);
-
-	/* Reset NV04_PFIFO_CACHE1_DMA_CTL_AT_INFO to INVALID */
-	tmp = nv_rd32(dev, NV04_PFIFO_CACHE1_DMA_CTL) & ~(1 << 31);
-	nv_wr32(dev, NV04_PFIFO_CACHE1_DMA_CTL, tmp);
-
-	return 0;
-}
-
-int
-nv10_fifo_unload_context(struct drm_device *dev)
+nv10_fifo_create(struct drm_device *dev)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo;
-	uint32_t fc, tmp;
-	int chid;
+	struct nv10_fifo_priv *priv;
 
-	chid = nv_rd32(dev, NV03_PFIFO_CACHE1_PUSH1) & 0x1f;
-	if (chid < 0 || chid >= dev_priv->engine.fifo.channels)
-		return 0;
-	fc = NV10_RAMFC(chid);
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
 
-	nv_wi32(dev, fc +  0, nv_rd32(dev, NV04_PFIFO_CACHE1_DMA_PUT));
-	nv_wi32(dev, fc +  4, nv_rd32(dev, NV04_PFIFO_CACHE1_DMA_GET));
-	nv_wi32(dev, fc +  8, nv_rd32(dev, NV10_PFIFO_CACHE1_REF_CNT));
-	tmp  = nv_rd32(dev, NV04_PFIFO_CACHE1_DMA_INSTANCE) & 0xFFFF;
-	tmp |= (nv_rd32(dev, NV04_PFIFO_CACHE1_DMA_DCOUNT) << 16);
-	nv_wi32(dev, fc + 12, tmp);
-	nv_wi32(dev, fc + 16, nv_rd32(dev, NV04_PFIFO_CACHE1_DMA_STATE));
-	nv_wi32(dev, fc + 20, nv_rd32(dev, NV04_PFIFO_CACHE1_DMA_FETCH));
-	nv_wi32(dev, fc + 24, nv_rd32(dev, NV04_PFIFO_CACHE1_ENGINE));
-	nv_wi32(dev, fc + 28, nv_rd32(dev, NV04_PFIFO_CACHE1_PULL1));
+	priv->base.base.destroy = nv04_fifo_destroy;
+	priv->base.base.init = nv04_fifo_init;
+	priv->base.base.fini = nv04_fifo_fini;
+	priv->base.base.context_new = nv10_fifo_context_new;
+	priv->base.base.context_del = nv04_fifo_context_del;
+	priv->base.channels = 31;
+	priv->ramfc_desc = nv10_ramfc;
+	dev_priv->eng[NVOBJ_ENGINE_FIFO] = &priv->base.base;
 
-	if (dev_priv->chipset < 0x17)
-		goto out;
-
-	nv_wi32(dev, fc + 32, nv_rd32(dev, NV10_PFIFO_CACHE1_ACQUIRE_VALUE));
-	tmp = nv_rd32(dev, NV10_PFIFO_CACHE1_ACQUIRE_TIMESTAMP);
-	nv_wi32(dev, fc + 36, tmp);
-	nv_wi32(dev, fc + 40, nv_rd32(dev, NV10_PFIFO_CACHE1_ACQUIRE_TIMEOUT));
-	nv_wi32(dev, fc + 44, nv_rd32(dev, NV10_PFIFO_CACHE1_SEMAPHORE));
-	nv_wi32(dev, fc + 48, nv_rd32(dev, NV04_PFIFO_CACHE1_DMA_GET));
-
-out:
-	nv10_fifo_do_load_context(dev, pfifo->channels - 1);
-	nv_wr32(dev, NV03_PFIFO_CACHE1_PUSH1, pfifo->channels - 1);
-	return 0;
-}
-
-static void
-nv10_fifo_init_reset(struct drm_device *dev)
-{
-	nv_wr32(dev, NV03_PMC_ENABLE,
-		nv_rd32(dev, NV03_PMC_ENABLE) & ~NV_PMC_ENABLE_PFIFO);
-	nv_wr32(dev, NV03_PMC_ENABLE,
-		nv_rd32(dev, NV03_PMC_ENABLE) |  NV_PMC_ENABLE_PFIFO);
-
-	nv_wr32(dev, 0x003224, 0x000f0078);
-	nv_wr32(dev, 0x002044, 0x0101ffff);
-	nv_wr32(dev, 0x002040, 0x000000ff);
-	nv_wr32(dev, 0x002500, 0x00000000);
-	nv_wr32(dev, 0x003000, 0x00000000);
-	nv_wr32(dev, 0x003050, 0x00000000);
-
-	nv_wr32(dev, 0x003258, 0x00000000);
-	nv_wr32(dev, 0x003210, 0x00000000);
-	nv_wr32(dev, 0x003270, 0x00000000);
-}
-
-static void
-nv10_fifo_init_ramxx(struct drm_device *dev)
-{
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-
-	nv_wr32(dev, NV03_PFIFO_RAMHT, (0x03 << 24) /* search 128 */ |
-				       ((dev_priv->ramht->bits - 9) << 16) |
-				       (dev_priv->ramht->gpuobj->pinst >> 8));
-	nv_wr32(dev, NV03_PFIFO_RAMRO, dev_priv->ramro->pinst >> 8);
-
-	if (dev_priv->chipset < 0x17) {
-		nv_wr32(dev, NV03_PFIFO_RAMFC, dev_priv->ramfc->pinst >> 8);
-	} else {
-		nv_wr32(dev, NV03_PFIFO_RAMFC, (dev_priv->ramfc->pinst >> 8) |
-					       (1 << 16) /* 64 Bytes entry*/);
-		/* XXX nvidia blob set bit 18, 21,23 for nv20 & nv30 */
-	}
-}
-
-static void
-nv10_fifo_init_intr(struct drm_device *dev)
-{
 	nouveau_irq_register(dev, 8, nv04_fifo_isr);
-	nv_wr32(dev, 0x002100, 0xffffffff);
-	nv_wr32(dev, 0x002140, 0xffffffff);
-}
-
-int
-nv10_fifo_init(struct drm_device *dev)
-{
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo;
-	int i;
-
-	nv10_fifo_init_reset(dev);
-	nv10_fifo_init_ramxx(dev);
-
-	nv10_fifo_do_load_context(dev, pfifo->channels - 1);
-	nv_wr32(dev, NV03_PFIFO_CACHE1_PUSH1, pfifo->channels - 1);
-
-	nv10_fifo_init_intr(dev);
-	nv_wr32(dev, NV03_PFIFO_CACHE1_PUSH0, 1);
-	nv_wr32(dev, NV04_PFIFO_CACHE1_PULL0, 1);
-	nv_wr32(dev, NV03_PFIFO_CACHES, 1);
-
-	for (i = 0; i < dev_priv->engine.fifo.channels; i++) {
-		if (dev_priv->channels.ptr[i]) {
-			uint32_t mode = nv_rd32(dev, NV04_PFIFO_MODE);
-			nv_wr32(dev, NV04_PFIFO_MODE, mode | (1 << i));
-		}
-	}
-
 	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/nv10_graph.c b/drivers/gpu/drm/nouveau/nv10_graph.c
index 10c0eb5..fb1d88a 100644
--- a/drivers/gpu/drm/nouveau/nv10_graph.c
+++ b/drivers/gpu/drm/nouveau/nv10_graph.c
@@ -759,7 +759,6 @@
 nv10_graph_unload_context(struct drm_device *dev)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo;
 	struct nouveau_channel *chan;
 	struct graph_state *ctx;
 	uint32_t tmp;
@@ -782,7 +781,7 @@
 
 	nv_wr32(dev, NV10_PGRAPH_CTX_CONTROL, 0x10000000);
 	tmp  = nv_rd32(dev, NV10_PGRAPH_CTX_USER) & 0x00ffffff;
-	tmp |= (pfifo->channels - 1) << 24;
+	tmp |= 31 << 24;
 	nv_wr32(dev, NV10_PGRAPH_CTX_USER, tmp);
 	return 0;
 }
@@ -822,12 +821,12 @@
 nv10_graph_channel(struct drm_device *dev)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	int chid = dev_priv->engine.fifo.channels;
+	int chid = 31;
 
 	if (nv_rd32(dev, NV10_PGRAPH_CTX_CONTROL) & 0x00010000)
 		chid = nv_rd32(dev, NV10_PGRAPH_CTX_USER) >> 24;
 
-	if (chid >= dev_priv->engine.fifo.channels)
+	if (chid >= 31)
 		return NULL;
 
 	return dev_priv->channels.ptr[chid];
@@ -948,7 +947,7 @@
 	nv_wr32(dev, NV10_PGRAPH_STATE, 0xFFFFFFFF);
 
 	tmp  = nv_rd32(dev, NV10_PGRAPH_CTX_USER) & 0x00ffffff;
-	tmp |= (dev_priv->engine.fifo.channels - 1) << 24;
+	tmp |= 31 << 24;
 	nv_wr32(dev, NV10_PGRAPH_CTX_USER, tmp);
 	nv_wr32(dev, NV10_PGRAPH_CTX_CONTROL, 0x10000100);
 	nv_wr32(dev, NV10_PGRAPH_FFINTFC_ST2, 0x08000000);
diff --git a/drivers/gpu/drm/nouveau/nv17_fifo.c b/drivers/gpu/drm/nouveau/nv17_fifo.c
new file mode 100644
index 0000000..d9e482e
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nv17_fifo.c
@@ -0,0 +1,177 @@
+/*
+ * Copyright (C) 2012 Ben Skeggs.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "drmP.h"
+#include "drm.h"
+#include "nouveau_drv.h"
+#include "nouveau_fifo.h"
+#include "nouveau_util.h"
+#include "nouveau_ramht.h"
+
+static struct ramfc_desc {
+	unsigned bits:6;
+	unsigned ctxs:5;
+	unsigned ctxp:8;
+	unsigned regs:5;
+	unsigned regp;
+} nv17_ramfc[] = {
+	{ 32,  0, 0x00,  0, NV04_PFIFO_CACHE1_DMA_PUT },
+	{ 32,  0, 0x04,  0, NV04_PFIFO_CACHE1_DMA_GET },
+	{ 32,  0, 0x08,  0, NV10_PFIFO_CACHE1_REF_CNT },
+	{ 16,  0, 0x0c,  0, NV04_PFIFO_CACHE1_DMA_INSTANCE },
+	{ 16, 16, 0x0c,  0, NV04_PFIFO_CACHE1_DMA_DCOUNT },
+	{ 32,  0, 0x10,  0, NV04_PFIFO_CACHE1_DMA_STATE },
+	{ 32,  0, 0x14,  0, NV04_PFIFO_CACHE1_DMA_FETCH },
+	{ 32,  0, 0x18,  0, NV04_PFIFO_CACHE1_ENGINE },
+	{ 32,  0, 0x1c,  0, NV04_PFIFO_CACHE1_PULL1 },
+	{ 32,  0, 0x20,  0, NV10_PFIFO_CACHE1_ACQUIRE_VALUE },
+	{ 32,  0, 0x24,  0, NV10_PFIFO_CACHE1_ACQUIRE_TIMESTAMP },
+	{ 32,  0, 0x28,  0, NV10_PFIFO_CACHE1_ACQUIRE_TIMEOUT },
+	{ 32,  0, 0x2c,  0, NV10_PFIFO_CACHE1_SEMAPHORE },
+	{ 32,  0, 0x30,  0, NV10_PFIFO_CACHE1_DMA_SUBROUTINE },
+	{}
+};
+
+struct nv17_fifo_priv {
+	struct nouveau_fifo_priv base;
+	struct ramfc_desc *ramfc_desc;
+};
+
+struct nv17_fifo_chan {
+	struct nouveau_fifo_chan base;
+	struct nouveau_gpuobj *ramfc;
+};
+
+static int
+nv17_fifo_context_new(struct nouveau_channel *chan, int engine)
+{
+	struct drm_device *dev = chan->dev;
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nv17_fifo_priv *priv = nv_engine(dev, engine);
+	struct nv17_fifo_chan *fctx;
+	unsigned long flags;
+	int ret;
+
+	fctx = chan->engctx[engine] = kzalloc(sizeof(*fctx), GFP_KERNEL);
+	if (!fctx)
+		return -ENOMEM;
+
+	/* map channel control registers */
+	chan->user = ioremap(pci_resource_start(dev->pdev, 0) +
+			     NV03_USER(chan->id), PAGE_SIZE);
+	if (!chan->user) {
+		ret = -ENOMEM;
+		goto error;
+	}
+
+	/* initialise default fifo context */
+	ret = nouveau_gpuobj_new_fake(dev, dev_priv->ramfc->pinst +
+				      chan->id * 64, ~0, 64,
+				      NVOBJ_FLAG_ZERO_ALLOC |
+				      NVOBJ_FLAG_ZERO_FREE, &fctx->ramfc);
+	if (ret)
+		goto error;
+
+	nv_wo32(fctx->ramfc, 0x00, chan->pushbuf_base);
+	nv_wo32(fctx->ramfc, 0x04, chan->pushbuf_base);
+	nv_wo32(fctx->ramfc, 0x0c, chan->pushbuf->pinst >> 4);
+	nv_wo32(fctx->ramfc, 0x14, NV_PFIFO_CACHE1_DMA_FETCH_TRIG_128_BYTES |
+				   NV_PFIFO_CACHE1_DMA_FETCH_SIZE_128_BYTES |
+#ifdef __BIG_ENDIAN
+				   NV_PFIFO_CACHE1_BIG_ENDIAN |
+#endif
+				   NV_PFIFO_CACHE1_DMA_FETCH_MAX_REQS_8);
+
+	/* enable dma mode on the channel */
+	spin_lock_irqsave(&dev_priv->context_switch_lock, flags);
+	nv_mask(dev, NV04_PFIFO_MODE, (1 << chan->id), (1 << chan->id));
+	spin_unlock_irqrestore(&dev_priv->context_switch_lock, flags);
+
+error:
+	if (ret)
+		priv->base.base.context_del(chan, engine);
+	return ret;
+}
+
+static int
+nv17_fifo_init(struct drm_device *dev, int engine)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nv17_fifo_priv *priv = nv_engine(dev, engine);
+	int i;
+
+	nv_mask(dev, NV03_PMC_ENABLE, NV_PMC_ENABLE_PFIFO, 0);
+	nv_mask(dev, NV03_PMC_ENABLE, NV_PMC_ENABLE_PFIFO, NV_PMC_ENABLE_PFIFO);
+
+	nv_wr32(dev, NV04_PFIFO_DELAY_0, 0x000000ff);
+	nv_wr32(dev, NV04_PFIFO_DMA_TIMESLICE, 0x0101ffff);
+
+	nv_wr32(dev, NV03_PFIFO_RAMHT, (0x03 << 24) /* search 128 */ |
+				       ((dev_priv->ramht->bits - 9) << 16) |
+				       (dev_priv->ramht->gpuobj->pinst >> 8));
+	nv_wr32(dev, NV03_PFIFO_RAMRO, dev_priv->ramro->pinst >> 8);
+	nv_wr32(dev, NV03_PFIFO_RAMFC, 0x00010000 |
+				       dev_priv->ramfc->pinst >> 8);
+
+	nv_wr32(dev, NV03_PFIFO_CACHE1_PUSH1, priv->base.channels);
+
+	nv_wr32(dev, NV03_PFIFO_INTR_0, 0xffffffff);
+	nv_wr32(dev, NV03_PFIFO_INTR_EN_0, 0xffffffff);
+
+	nv_wr32(dev, NV03_PFIFO_CACHE1_PUSH0, 1);
+	nv_wr32(dev, NV04_PFIFO_CACHE1_PULL0, 1);
+	nv_wr32(dev, NV03_PFIFO_CACHES, 1);
+
+	for (i = 0; i < priv->base.channels; i++) {
+		if (dev_priv->channels.ptr[i])
+			nv_mask(dev, NV04_PFIFO_MODE, (1 << i), (1 << i));
+	}
+
+	return 0;
+}
+
+int
+nv17_fifo_create(struct drm_device *dev)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nv17_fifo_priv *priv;
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->base.base.destroy = nv04_fifo_destroy;
+	priv->base.base.init = nv17_fifo_init;
+	priv->base.base.fini = nv04_fifo_fini;
+	priv->base.base.context_new = nv17_fifo_context_new;
+	priv->base.base.context_del = nv04_fifo_context_del;
+	priv->base.channels = 31;
+	priv->ramfc_desc = nv17_ramfc;
+	dev_priv->eng[NVOBJ_ENGINE_FIFO] = &priv->base.base;
+
+	nouveau_irq_register(dev, 8, nv04_fifo_isr);
+	return 0;
+}
diff --git a/drivers/gpu/drm/nouveau/nv20_graph.c b/drivers/gpu/drm/nouveau/nv20_graph.c
index 385e2b4..e34ea30 100644
--- a/drivers/gpu/drm/nouveau/nv20_graph.c
+++ b/drivers/gpu/drm/nouveau/nv20_graph.c
@@ -43,8 +43,6 @@
 int
 nv20_graph_unload_context(struct drm_device *dev)
 {
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo;
 	struct nouveau_channel *chan;
 	struct nouveau_gpuobj *grctx;
 	u32 tmp;
@@ -62,7 +60,7 @@
 
 	nv_wr32(dev, NV10_PGRAPH_CTX_CONTROL, 0x10000000);
 	tmp  = nv_rd32(dev, NV10_PGRAPH_CTX_USER) & 0x00ffffff;
-	tmp |= (pfifo->channels - 1) << 24;
+	tmp |= 31 << 24;
 	nv_wr32(dev, NV10_PGRAPH_CTX_USER, tmp);
 	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/nv31_mpeg.c b/drivers/gpu/drm/nouveau/nv31_mpeg.c
index 6f06a07..5f239bf 100644
--- a/drivers/gpu/drm/nouveau/nv31_mpeg.c
+++ b/drivers/gpu/drm/nouveau/nv31_mpeg.c
@@ -24,6 +24,7 @@
 
 #include "drmP.h"
 #include "nouveau_drv.h"
+#include "nouveau_fifo.h"
 #include "nouveau_ramht.h"
 
 struct nv31_mpeg_engine {
@@ -208,6 +209,7 @@
 static int
 nv31_mpeg_isr_chid(struct drm_device *dev, u32 inst)
 {
+	struct nouveau_fifo_priv *pfifo = nv_engine(dev, NVOBJ_ENGINE_FIFO);
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_gpuobj *ctx;
 	unsigned long flags;
@@ -218,7 +220,7 @@
 		return 0;
 
 	spin_lock_irqsave(&dev_priv->channels.lock, flags);
-	for (i = 0; i < dev_priv->engine.fifo.channels; i++) {
+	for (i = 0; i < pfifo->channels; i++) {
 		if (!dev_priv->channels.ptr[i])
 			continue;
 
diff --git a/drivers/gpu/drm/nouveau/nv40_fifo.c b/drivers/gpu/drm/nouveau/nv40_fifo.c
index 8d34661..cdc8184 100644
--- a/drivers/gpu/drm/nouveau/nv40_fifo.c
+++ b/drivers/gpu/drm/nouveau/nv40_fifo.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2007 Ben Skeggs.
+ * Copyright (C) 2012 Ben Skeggs.
  * All Rights Reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining
@@ -25,215 +25,123 @@
  */
 
 #include "drmP.h"
+#include "drm.h"
 #include "nouveau_drv.h"
-#include "nouveau_drm.h"
+#include "nouveau_fifo.h"
+#include "nouveau_util.h"
 #include "nouveau_ramht.h"
 
-#define NV40_RAMFC(c) (dev_priv->ramfc->pinst + ((c) * NV40_RAMFC__SIZE))
-#define NV40_RAMFC__SIZE 128
+static struct ramfc_desc {
+	unsigned bits:6;
+	unsigned ctxs:5;
+	unsigned ctxp:8;
+	unsigned regs:5;
+	unsigned regp;
+} nv40_ramfc[] = {
+	{ 32,  0, 0x00,  0, NV04_PFIFO_CACHE1_DMA_PUT },
+	{ 32,  0, 0x04,  0, NV04_PFIFO_CACHE1_DMA_GET },
+	{ 32,  0, 0x08,  0, NV10_PFIFO_CACHE1_REF_CNT },
+	{ 32,  0, 0x0c,  0, NV04_PFIFO_CACHE1_DMA_INSTANCE },
+	{ 32,  0, 0x10,  0, NV04_PFIFO_CACHE1_DMA_DCOUNT },
+	{ 32,  0, 0x14,  0, NV04_PFIFO_CACHE1_DMA_STATE },
+	{ 28,  0, 0x18,  0, NV04_PFIFO_CACHE1_DMA_FETCH },
+	{  2, 28, 0x18, 28, 0x002058 },
+	{ 32,  0, 0x1c,  0, NV04_PFIFO_CACHE1_ENGINE },
+	{ 32,  0, 0x20,  0, NV04_PFIFO_CACHE1_PULL1 },
+	{ 32,  0, 0x24,  0, NV10_PFIFO_CACHE1_ACQUIRE_VALUE },
+	{ 32,  0, 0x28,  0, NV10_PFIFO_CACHE1_ACQUIRE_TIMESTAMP },
+	{ 32,  0, 0x2c,  0, NV10_PFIFO_CACHE1_ACQUIRE_TIMEOUT },
+	{ 32,  0, 0x30,  0, NV10_PFIFO_CACHE1_SEMAPHORE },
+	{ 32,  0, 0x34,  0, NV10_PFIFO_CACHE1_DMA_SUBROUTINE },
+	{ 32,  0, 0x38,  0, NV40_PFIFO_GRCTX_INSTANCE },
+	{ 17,  0, 0x3c,  0, NV04_PFIFO_DMA_TIMESLICE },
+	{ 32,  0, 0x40,  0, 0x0032e4 },
+	{ 32,  0, 0x44,  0, 0x0032e8 },
+	{ 32,  0, 0x4c,  0, 0x002088 },
+	{ 32,  0, 0x50,  0, 0x003300 },
+	{ 32,  0, 0x54,  0, 0x00330c },
+	{}
+};
 
-int
-nv40_fifo_create_context(struct nouveau_channel *chan)
+struct nv40_fifo_priv {
+	struct nouveau_fifo_priv base;
+	struct ramfc_desc *ramfc_desc;
+};
+
+struct nv40_fifo_chan {
+	struct nouveau_fifo_chan base;
+	struct nouveau_gpuobj *ramfc;
+};
+
+static int
+nv40_fifo_context_new(struct nouveau_channel *chan, int engine)
 {
 	struct drm_device *dev = chan->dev;
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	uint32_t fc = NV40_RAMFC(chan->id);
+	struct nv40_fifo_priv *priv = nv_engine(dev, engine);
+	struct nv40_fifo_chan *fctx;
 	unsigned long flags;
 	int ret;
 
-	ret = nouveau_gpuobj_new_fake(dev, NV40_RAMFC(chan->id), ~0,
-				      NV40_RAMFC__SIZE, NVOBJ_FLAG_ZERO_ALLOC |
-				      NVOBJ_FLAG_ZERO_FREE, &chan->ramfc);
-	if (ret)
-		return ret;
-
-	chan->user = ioremap(pci_resource_start(dev->pdev, 0) +
-			     NV40_USER(chan->id), PAGE_SIZE);
-	if (!chan->user)
+	fctx = chan->engctx[engine] = kzalloc(sizeof(*fctx), GFP_KERNEL);
+	if (!fctx)
 		return -ENOMEM;
 
-	spin_lock_irqsave(&dev_priv->context_switch_lock, flags);
+	/* map channel control registers */
+	chan->user = ioremap(pci_resource_start(dev->pdev, 0) +
+			     NV03_USER(chan->id), PAGE_SIZE);
+	if (!chan->user) {
+		ret = -ENOMEM;
+		goto error;
+	}
 
-	nv_wi32(dev, fc +  0, chan->pushbuf_base);
-	nv_wi32(dev, fc +  4, chan->pushbuf_base);
-	nv_wi32(dev, fc + 12, chan->pushbuf->pinst >> 4);
-	nv_wi32(dev, fc + 24, NV_PFIFO_CACHE1_DMA_FETCH_TRIG_128_BYTES |
-			      NV_PFIFO_CACHE1_DMA_FETCH_SIZE_128_BYTES |
-			      NV_PFIFO_CACHE1_DMA_FETCH_MAX_REQS_8 |
+	/* initialise default fifo context */
+	ret = nouveau_gpuobj_new_fake(dev, dev_priv->ramfc->pinst +
+				      chan->id * 128, ~0, 128,
+				      NVOBJ_FLAG_ZERO_ALLOC |
+				      NVOBJ_FLAG_ZERO_FREE, &fctx->ramfc);
+	if (ret)
+		goto error;
+
+	nv_wo32(fctx->ramfc, 0x00, chan->pushbuf_base);
+	nv_wo32(fctx->ramfc, 0x04, chan->pushbuf_base);
+	nv_wo32(fctx->ramfc, 0x0c, chan->pushbuf->pinst >> 4);
+	nv_wo32(fctx->ramfc, 0x18, 0x30000000 |
+				   NV_PFIFO_CACHE1_DMA_FETCH_TRIG_128_BYTES |
+				   NV_PFIFO_CACHE1_DMA_FETCH_SIZE_128_BYTES |
 #ifdef __BIG_ENDIAN
-			      NV_PFIFO_CACHE1_BIG_ENDIAN |
+				   NV_PFIFO_CACHE1_BIG_ENDIAN |
 #endif
-			      0x30000000 /* no idea.. */);
-	nv_wi32(dev, fc + 60, 0x0001FFFF);
+				   NV_PFIFO_CACHE1_DMA_FETCH_MAX_REQS_8);
+	nv_wo32(fctx->ramfc, 0x3c, 0x0001ffff);
 
-	/* enable the fifo dma operation */
-	nv_wr32(dev, NV04_PFIFO_MODE,
-		nv_rd32(dev, NV04_PFIFO_MODE) | (1 << chan->id));
-
+	/* enable dma mode on the channel */
+	spin_lock_irqsave(&dev_priv->context_switch_lock, flags);
+	nv_mask(dev, NV04_PFIFO_MODE, (1 << chan->id), (1 << chan->id));
 	spin_unlock_irqrestore(&dev_priv->context_switch_lock, flags);
-	return 0;
+
+	/*XXX: remove this later, need fifo engine context commit hook */
+	nouveau_gpuobj_ref(fctx->ramfc, &chan->ramfc);
+
+error:
+	if (ret)
+		priv->base.base.context_del(chan, engine);
+	return ret;
 }
 
-static void
-nv40_fifo_do_load_context(struct drm_device *dev, int chid)
+static int
+nv40_fifo_init(struct drm_device *dev, int engine)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	uint32_t fc = NV40_RAMFC(chid), tmp, tmp2;
-
-	nv_wr32(dev, NV04_PFIFO_CACHE1_DMA_PUT, nv_ri32(dev, fc + 0));
-	nv_wr32(dev, NV04_PFIFO_CACHE1_DMA_GET, nv_ri32(dev, fc + 4));
-	nv_wr32(dev, NV10_PFIFO_CACHE1_REF_CNT, nv_ri32(dev, fc + 8));
-	nv_wr32(dev, NV04_PFIFO_CACHE1_DMA_INSTANCE, nv_ri32(dev, fc + 12));
-	nv_wr32(dev, NV04_PFIFO_CACHE1_DMA_DCOUNT, nv_ri32(dev, fc + 16));
-	nv_wr32(dev, NV04_PFIFO_CACHE1_DMA_STATE, nv_ri32(dev, fc + 20));
-
-	/* No idea what 0x2058 is.. */
-	tmp   = nv_ri32(dev, fc + 24);
-	tmp2  = nv_rd32(dev, 0x2058) & 0xFFF;
-	tmp2 |= (tmp & 0x30000000);
-	nv_wr32(dev, 0x2058, tmp2);
-	tmp  &= ~0x30000000;
-	nv_wr32(dev, NV04_PFIFO_CACHE1_DMA_FETCH, tmp);
-
-	nv_wr32(dev, NV04_PFIFO_CACHE1_ENGINE, nv_ri32(dev, fc + 28));
-	nv_wr32(dev, NV04_PFIFO_CACHE1_PULL1, nv_ri32(dev, fc + 32));
-	nv_wr32(dev, NV10_PFIFO_CACHE1_ACQUIRE_VALUE, nv_ri32(dev, fc + 36));
-	tmp = nv_ri32(dev, fc + 40);
-	nv_wr32(dev, NV10_PFIFO_CACHE1_ACQUIRE_TIMESTAMP, tmp);
-	nv_wr32(dev, NV10_PFIFO_CACHE1_ACQUIRE_TIMEOUT, nv_ri32(dev, fc + 44));
-	nv_wr32(dev, NV10_PFIFO_CACHE1_SEMAPHORE, nv_ri32(dev, fc + 48));
-	nv_wr32(dev, NV10_PFIFO_CACHE1_DMA_SUBROUTINE, nv_ri32(dev, fc + 52));
-	nv_wr32(dev, NV40_PFIFO_GRCTX_INSTANCE, nv_ri32(dev, fc + 56));
-
-	/* Don't clobber the TIMEOUT_ENABLED flag when restoring from RAMFC */
-	tmp  = nv_rd32(dev, NV04_PFIFO_DMA_TIMESLICE) & ~0x1FFFF;
-	tmp |= nv_ri32(dev, fc + 60) & 0x1FFFF;
-	nv_wr32(dev, NV04_PFIFO_DMA_TIMESLICE, tmp);
-
-	nv_wr32(dev, 0x32e4, nv_ri32(dev, fc + 64));
-	/* NVIDIA does this next line twice... */
-	nv_wr32(dev, 0x32e8, nv_ri32(dev, fc + 68));
-	nv_wr32(dev, 0x2088, nv_ri32(dev, fc + 76));
-	nv_wr32(dev, 0x3300, nv_ri32(dev, fc + 80));
-	nv_wr32(dev, 0x330c, nv_ri32(dev, fc + 84));
-
-	nv_wr32(dev, NV03_PFIFO_CACHE1_GET, 0);
-	nv_wr32(dev, NV03_PFIFO_CACHE1_PUT, 0);
-}
-
-int
-nv40_fifo_load_context(struct nouveau_channel *chan)
-{
-	struct drm_device *dev = chan->dev;
-	uint32_t tmp;
-
-	nv40_fifo_do_load_context(dev, chan->id);
-
-	/* Set channel active, and in DMA mode */
-	nv_wr32(dev, NV03_PFIFO_CACHE1_PUSH1,
-		     NV40_PFIFO_CACHE1_PUSH1_DMA | chan->id);
-	nv_wr32(dev, NV04_PFIFO_CACHE1_DMA_PUSH, 1);
-
-	/* Reset DMA_CTL_AT_INFO to INVALID */
-	tmp = nv_rd32(dev, NV04_PFIFO_CACHE1_DMA_CTL) & ~(1 << 31);
-	nv_wr32(dev, NV04_PFIFO_CACHE1_DMA_CTL, tmp);
-
-	return 0;
-}
-
-int
-nv40_fifo_unload_context(struct drm_device *dev)
-{
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo;
-	uint32_t fc, tmp;
-	int chid;
-
-	chid = nv_rd32(dev, NV03_PFIFO_CACHE1_PUSH1) & 0x1f;
-	if (chid < 0 || chid >= dev_priv->engine.fifo.channels)
-		return 0;
-	fc = NV40_RAMFC(chid);
-
-	nv_wi32(dev, fc + 0, nv_rd32(dev, NV04_PFIFO_CACHE1_DMA_PUT));
-	nv_wi32(dev, fc + 4, nv_rd32(dev, NV04_PFIFO_CACHE1_DMA_GET));
-	nv_wi32(dev, fc + 8, nv_rd32(dev, NV10_PFIFO_CACHE1_REF_CNT));
-	nv_wi32(dev, fc + 12, nv_rd32(dev, NV04_PFIFO_CACHE1_DMA_INSTANCE));
-	nv_wi32(dev, fc + 16, nv_rd32(dev, NV04_PFIFO_CACHE1_DMA_DCOUNT));
-	nv_wi32(dev, fc + 20, nv_rd32(dev, NV04_PFIFO_CACHE1_DMA_STATE));
-	tmp  = nv_rd32(dev, NV04_PFIFO_CACHE1_DMA_FETCH);
-	tmp |= nv_rd32(dev, 0x2058) & 0x30000000;
-	nv_wi32(dev, fc + 24, tmp);
-	nv_wi32(dev, fc + 28, nv_rd32(dev, NV04_PFIFO_CACHE1_ENGINE));
-	nv_wi32(dev, fc + 32, nv_rd32(dev, NV04_PFIFO_CACHE1_PULL1));
-	nv_wi32(dev, fc + 36, nv_rd32(dev, NV10_PFIFO_CACHE1_ACQUIRE_VALUE));
-	tmp = nv_rd32(dev, NV10_PFIFO_CACHE1_ACQUIRE_TIMESTAMP);
-	nv_wi32(dev, fc + 40, tmp);
-	nv_wi32(dev, fc + 44, nv_rd32(dev, NV10_PFIFO_CACHE1_ACQUIRE_TIMEOUT));
-	nv_wi32(dev, fc + 48, nv_rd32(dev, NV10_PFIFO_CACHE1_SEMAPHORE));
-	/* NVIDIA read 0x3228 first, then write DMA_GET here.. maybe something
-	 * more involved depending on the value of 0x3228?
-	 */
-	nv_wi32(dev, fc + 52, nv_rd32(dev, NV04_PFIFO_CACHE1_DMA_GET));
-	nv_wi32(dev, fc + 56, nv_rd32(dev, NV40_PFIFO_GRCTX_INSTANCE));
-	nv_wi32(dev, fc + 60, nv_rd32(dev, NV04_PFIFO_DMA_TIMESLICE) & 0x1ffff);
-	/* No idea what the below is for exactly, ripped from a mmio-trace */
-	nv_wi32(dev, fc + 64, nv_rd32(dev, NV40_PFIFO_UNK32E4));
-	/* NVIDIA do this next line twice.. bug? */
-	nv_wi32(dev, fc + 68, nv_rd32(dev, 0x32e8));
-	nv_wi32(dev, fc + 76, nv_rd32(dev, 0x2088));
-	nv_wi32(dev, fc + 80, nv_rd32(dev, 0x3300));
-#if 0 /* no real idea which is PUT/GET in UNK_48.. */
-	tmp  = nv_rd32(dev, NV04_PFIFO_CACHE1_GET);
-	tmp |= (nv_rd32(dev, NV04_PFIFO_CACHE1_PUT) << 16);
-	nv_wi32(dev, fc + 72, tmp);
-#endif
-	nv_wi32(dev, fc + 84, nv_rd32(dev, 0x330c));
-
-	nv40_fifo_do_load_context(dev, pfifo->channels - 1);
-	nv_wr32(dev, NV03_PFIFO_CACHE1_PUSH1,
-		     NV40_PFIFO_CACHE1_PUSH1_DMA | (pfifo->channels - 1));
-	return 0;
-}
-
-static void
-nv40_fifo_init_reset(struct drm_device *dev)
-{
+	struct nv40_fifo_priv *priv = nv_engine(dev, engine);
 	int i;
 
-	nv_wr32(dev, NV03_PMC_ENABLE,
-		nv_rd32(dev, NV03_PMC_ENABLE) & ~NV_PMC_ENABLE_PFIFO);
-	nv_wr32(dev, NV03_PMC_ENABLE,
-		nv_rd32(dev, NV03_PMC_ENABLE) |  NV_PMC_ENABLE_PFIFO);
+	nv_mask(dev, NV03_PMC_ENABLE, NV_PMC_ENABLE_PFIFO, 0);
+	nv_mask(dev, NV03_PMC_ENABLE, NV_PMC_ENABLE_PFIFO, NV_PMC_ENABLE_PFIFO);
 
-	nv_wr32(dev, 0x003224, 0x000f0078);
-	nv_wr32(dev, 0x003210, 0x00000000);
-	nv_wr32(dev, 0x003270, 0x00000000);
-	nv_wr32(dev, 0x003240, 0x00000000);
-	nv_wr32(dev, 0x003244, 0x00000000);
-	nv_wr32(dev, 0x003258, 0x00000000);
-	nv_wr32(dev, 0x002504, 0x00000000);
-	for (i = 0; i < 16; i++)
-		nv_wr32(dev, 0x002510 + (i * 4), 0x00000000);
-	nv_wr32(dev, 0x00250c, 0x0000ffff);
-	nv_wr32(dev, 0x002048, 0x00000000);
-	nv_wr32(dev, 0x003228, 0x00000000);
-	nv_wr32(dev, 0x0032e8, 0x00000000);
-	nv_wr32(dev, 0x002410, 0x00000000);
-	nv_wr32(dev, 0x002420, 0x00000000);
-	nv_wr32(dev, 0x002058, 0x00000001);
-	nv_wr32(dev, 0x00221c, 0x00000000);
-	/* something with 0x2084, read/modify/write, no change */
 	nv_wr32(dev, 0x002040, 0x000000ff);
-	nv_wr32(dev, 0x002500, 0x00000000);
-	nv_wr32(dev, 0x003200, 0x00000000);
-
-	nv_wr32(dev, NV04_PFIFO_DMA_TIMESLICE, 0x2101ffff);
-}
-
-static void
-nv40_fifo_init_ramxx(struct drm_device *dev)
-{
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	nv_wr32(dev, 0x002044, 0x2101ffff);
+	nv_wr32(dev, 0x002058, 0x00000001);
 
 	nv_wr32(dev, NV03_PFIFO_RAMHT, (0x03 << 24) /* search 128 */ |
 				       ((dev_priv->ramht->bits - 9) << 16) |
@@ -244,65 +152,59 @@
 	case 0x47:
 	case 0x49:
 	case 0x4b:
-		nv_wr32(dev, 0x2230, 1);
-		break;
-	default:
-		break;
-	}
-
-	switch (dev_priv->chipset) {
+		nv_wr32(dev, 0x002230, 0x00000001);
 	case 0x40:
 	case 0x41:
 	case 0x42:
 	case 0x43:
 	case 0x45:
-	case 0x47:
 	case 0x48:
-	case 0x49:
-	case 0x4b:
-		nv_wr32(dev, NV40_PFIFO_RAMFC, 0x30002);
+		nv_wr32(dev, 0x002220, 0x00030002);
 		break;
 	default:
-		nv_wr32(dev, 0x2230, 0);
-		nv_wr32(dev, NV40_PFIFO_RAMFC,
-			((dev_priv->vram_size - 512 * 1024 +
-			  dev_priv->ramfc->pinst) >> 16) | (3 << 16));
+		nv_wr32(dev, 0x002230, 0x00000000);
+		nv_wr32(dev, 0x002220, ((dev_priv->vram_size - 512 * 1024 +
+					 dev_priv->ramfc->pinst) >> 16) |
+				       0x00030000);
 		break;
 	}
-}
 
-static void
-nv40_fifo_init_intr(struct drm_device *dev)
-{
-	nouveau_irq_register(dev, 8, nv04_fifo_isr);
-	nv_wr32(dev, 0x002100, 0xffffffff);
-	nv_wr32(dev, 0x002140, 0xffffffff);
-}
+	nv_wr32(dev, NV03_PFIFO_CACHE1_PUSH1, priv->base.channels);
 
-int
-nv40_fifo_init(struct drm_device *dev)
-{
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo;
-	int i;
+	nv_wr32(dev, NV03_PFIFO_INTR_0, 0xffffffff);
+	nv_wr32(dev, NV03_PFIFO_INTR_EN_0, 0xffffffff);
 
-	nv40_fifo_init_reset(dev);
-	nv40_fifo_init_ramxx(dev);
-
-	nv40_fifo_do_load_context(dev, pfifo->channels - 1);
-	nv_wr32(dev, NV03_PFIFO_CACHE1_PUSH1, pfifo->channels - 1);
-
-	nv40_fifo_init_intr(dev);
 	nv_wr32(dev, NV03_PFIFO_CACHE1_PUSH0, 1);
 	nv_wr32(dev, NV04_PFIFO_CACHE1_PULL0, 1);
 	nv_wr32(dev, NV03_PFIFO_CACHES, 1);
 
-	for (i = 0; i < dev_priv->engine.fifo.channels; i++) {
-		if (dev_priv->channels.ptr[i]) {
-			uint32_t mode = nv_rd32(dev, NV04_PFIFO_MODE);
-			nv_wr32(dev, NV04_PFIFO_MODE, mode | (1 << i));
-		}
+	for (i = 0; i < priv->base.channels; i++) {
+		if (dev_priv->channels.ptr[i])
+			nv_mask(dev, NV04_PFIFO_MODE, (1 << i), (1 << i));
 	}
 
 	return 0;
 }
+
+int
+nv40_fifo_create(struct drm_device *dev)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nv40_fifo_priv *priv;
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->base.base.destroy = nv04_fifo_destroy;
+	priv->base.base.init = nv40_fifo_init;
+	priv->base.base.fini = nv04_fifo_fini;
+	priv->base.base.context_new = nv40_fifo_context_new;
+	priv->base.base.context_del = nv04_fifo_context_del;
+	priv->base.channels = 31;
+	priv->ramfc_desc = nv40_ramfc;
+	dev_priv->eng[NVOBJ_ENGINE_FIFO] = &priv->base.base;
+
+	nouveau_irq_register(dev, 8, nv04_fifo_isr);
+	return 0;
+}
diff --git a/drivers/gpu/drm/nouveau/nv40_graph.c b/drivers/gpu/drm/nouveau/nv40_graph.c
index 0383720..aa9e2df 100644
--- a/drivers/gpu/drm/nouveau/nv40_graph.c
+++ b/drivers/gpu/drm/nouveau/nv40_graph.c
@@ -27,6 +27,7 @@
 #include "drmP.h"
 #include "drm.h"
 #include "nouveau_drv.h"
+#include "nouveau_fifo.h"
 #include "nouveau_ramht.h"
 
 struct nv40_graph_engine {
@@ -345,13 +346,14 @@
 static int
 nv40_graph_isr_chid(struct drm_device *dev, u32 inst)
 {
+	struct nouveau_fifo_priv *pfifo = nv_engine(dev, NVOBJ_ENGINE_FIFO);
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_gpuobj *grctx;
 	unsigned long flags;
 	int i;
 
 	spin_lock_irqsave(&dev_priv->channels.lock, flags);
-	for (i = 0; i < dev_priv->engine.fifo.channels; i++) {
+	for (i = 0; i < pfifo->channels; i++) {
 		if (!dev_priv->channels.ptr[i])
 			continue;
 		grctx = dev_priv->channels.ptr[i]->engctx[NVOBJ_ENGINE_GR];
diff --git a/drivers/gpu/drm/nouveau/nv40_pm.c b/drivers/gpu/drm/nouveau/nv40_pm.c
index c761538..e66273af 100644
--- a/drivers/gpu/drm/nouveau/nv40_pm.c
+++ b/drivers/gpu/drm/nouveau/nv40_pm.c
@@ -27,6 +27,7 @@
 #include "nouveau_bios.h"
 #include "nouveau_pm.h"
 #include "nouveau_hw.h"
+#include "nouveau_fifo.h"
 
 #define min2(a,b) ((a) < (b) ? (a) : (b))
 
diff --git a/drivers/gpu/drm/nouveau/nv50_fb.c b/drivers/gpu/drm/nouveau/nv50_fb.c
index bdd2afe..f1e4b9e 100644
--- a/drivers/gpu/drm/nouveau/nv50_fb.c
+++ b/drivers/gpu/drm/nouveau/nv50_fb.c
@@ -2,6 +2,7 @@
 #include "drm.h"
 #include "nouveau_drv.h"
 #include "nouveau_drm.h"
+#include "nouveau_fifo.h"
 
 struct nv50_fb_priv {
 	struct page *r100c08_page;
@@ -212,6 +213,7 @@
 void
 nv50_fb_vm_trap(struct drm_device *dev, int display)
 {
+	struct nouveau_fifo_priv *pfifo = nv_engine(dev, NVOBJ_ENGINE_FIFO);
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	const struct nouveau_enum *en, *cl;
 	unsigned long flags;
@@ -236,7 +238,7 @@
 	/* lookup channel id */
 	chinst = (trap[2] << 16) | trap[1];
 	spin_lock_irqsave(&dev_priv->channels.lock, flags);
-	for (ch = 0; ch < dev_priv->engine.fifo.channels; ch++) {
+	for (ch = 0; ch < pfifo->channels; ch++) {
 		struct nouveau_channel *chan = dev_priv->channels.ptr[ch];
 
 		if (!chan || !chan->ramin)
diff --git a/drivers/gpu/drm/nouveau/nv50_fifo.c b/drivers/gpu/drm/nouveau/nv50_fifo.c
index 12e9e82..55383b8 100644
--- a/drivers/gpu/drm/nouveau/nv50_fifo.c
+++ b/drivers/gpu/drm/nouveau/nv50_fifo.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2007 Ben Skeggs.
+ * Copyright (C) 2012 Ben Skeggs.
  * All Rights Reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining
@@ -27,288 +27,135 @@
 #include "drmP.h"
 #include "drm.h"
 #include "nouveau_drv.h"
+#include "nouveau_fifo.h"
 #include "nouveau_ramht.h"
 #include "nouveau_vm.h"
 
-static void
+struct nv50_fifo_priv {
+	struct nouveau_fifo_priv base;
+	struct nouveau_gpuobj *playlist[2];
+	int cur_playlist;
+};
+
+struct nv50_fifo_chan {
+	struct nouveau_fifo_chan base;
+};
+
+void
 nv50_fifo_playlist_update(struct drm_device *dev)
 {
+	struct nv50_fifo_priv *priv = nv_engine(dev, NVOBJ_ENGINE_FIFO);
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo;
 	struct nouveau_gpuobj *cur;
 	int i, p;
 
-	NV_DEBUG(dev, "\n");
+	cur = priv->playlist[priv->cur_playlist];
+	priv->cur_playlist = !priv->cur_playlist;
 
-	cur = pfifo->playlist[pfifo->cur_playlist];
-	pfifo->cur_playlist = !pfifo->cur_playlist;
-
-	for (i = 0, p = 0; i < pfifo->channels; i++) {
+	for (i = 0, p = 0; i < priv->base.channels; i++) {
 		if (nv_rd32(dev, 0x002600 + (i * 4)) & 0x80000000)
 			nv_wo32(cur, p++ * 4, i);
 	}
 
 	dev_priv->engine.instmem.flush(dev);
 
-	nv_wr32(dev, 0x32f4, cur->vinst >> 12);
-	nv_wr32(dev, 0x32ec, p);
-	nv_wr32(dev, 0x2500, 0x101);
+	nv_wr32(dev, 0x0032f4, cur->vinst >> 12);
+	nv_wr32(dev, 0x0032ec, p);
+	nv_wr32(dev, 0x002500, 0x00000101);
 }
 
-static void
-nv50_fifo_channel_enable(struct drm_device *dev, int channel)
+static int
+nv50_fifo_context_new(struct nouveau_channel *chan, int engine)
 {
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_channel *chan = dev_priv->channels.ptr[channel];
-	uint32_t inst;
-
-	NV_DEBUG(dev, "ch%d\n", channel);
-
-	if (dev_priv->chipset == 0x50)
-		inst = chan->ramfc->vinst >> 12;
-	else
-		inst = chan->ramfc->vinst >> 8;
-
-	nv_wr32(dev, NV50_PFIFO_CTX_TABLE(channel), inst |
-		     NV50_PFIFO_CTX_TABLE_CHANNEL_ENABLED);
-}
-
-static void
-nv50_fifo_channel_disable(struct drm_device *dev, int channel)
-{
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	uint32_t inst;
-
-	NV_DEBUG(dev, "ch%d\n", channel);
-
-	if (dev_priv->chipset == 0x50)
-		inst = NV50_PFIFO_CTX_TABLE_INSTANCE_MASK_G80;
-	else
-		inst = NV50_PFIFO_CTX_TABLE_INSTANCE_MASK_G84;
-	nv_wr32(dev, NV50_PFIFO_CTX_TABLE(channel), inst);
-}
-
-static void
-nv50_fifo_init_reset(struct drm_device *dev)
-{
-	uint32_t pmc_e = NV_PMC_ENABLE_PFIFO;
-
-	NV_DEBUG(dev, "\n");
-
-	nv_wr32(dev, NV03_PMC_ENABLE, nv_rd32(dev, NV03_PMC_ENABLE) & ~pmc_e);
-	nv_wr32(dev, NV03_PMC_ENABLE, nv_rd32(dev, NV03_PMC_ENABLE) |  pmc_e);
-}
-
-static void
-nv50_fifo_init_intr(struct drm_device *dev)
-{
-	NV_DEBUG(dev, "\n");
-
-	nouveau_irq_register(dev, 8, nv04_fifo_isr);
-	nv_wr32(dev, NV03_PFIFO_INTR_0, 0xFFFFFFFF);
-	nv_wr32(dev, NV03_PFIFO_INTR_EN_0, 0xFFFFFFFF);
-}
-
-static void
-nv50_fifo_init_context_table(struct drm_device *dev)
-{
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	int i;
-
-	NV_DEBUG(dev, "\n");
-
-	for (i = 0; i < NV50_PFIFO_CTX_TABLE__SIZE; i++) {
-		if (dev_priv->channels.ptr[i])
-			nv50_fifo_channel_enable(dev, i);
-		else
-			nv50_fifo_channel_disable(dev, i);
-	}
-
-	nv50_fifo_playlist_update(dev);
-}
-
-static void
-nv50_fifo_init_regs__nv(struct drm_device *dev)
-{
-	NV_DEBUG(dev, "\n");
-
-	nv_wr32(dev, 0x250c, 0x6f3cfc34);
-}
-
-static void
-nv50_fifo_init_regs(struct drm_device *dev)
-{
-	NV_DEBUG(dev, "\n");
-
-	nv_wr32(dev, 0x2500, 0);
-	nv_wr32(dev, 0x3250, 0);
-	nv_wr32(dev, 0x3220, 0);
-	nv_wr32(dev, 0x3204, 0);
-	nv_wr32(dev, 0x3210, 0);
-	nv_wr32(dev, 0x3270, 0);
-	nv_wr32(dev, 0x2044, 0x01003fff);
-
-	/* Enable dummy channels setup by nv50_instmem.c */
-	nv50_fifo_channel_enable(dev, 0);
-	nv50_fifo_channel_enable(dev, 127);
-}
-
-int
-nv50_fifo_init(struct drm_device *dev)
-{
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo;
-	int ret;
-
-	NV_DEBUG(dev, "\n");
-
-	if (pfifo->playlist[0]) {
-		pfifo->cur_playlist = !pfifo->cur_playlist;
-		goto just_reset;
-	}
-
-	ret = nouveau_gpuobj_new(dev, NULL, 128*4, 0x1000,
-				 NVOBJ_FLAG_ZERO_ALLOC,
-				 &pfifo->playlist[0]);
-	if (ret) {
-		NV_ERROR(dev, "error creating playlist 0: %d\n", ret);
-		return ret;
-	}
-
-	ret = nouveau_gpuobj_new(dev, NULL, 128*4, 0x1000,
-				 NVOBJ_FLAG_ZERO_ALLOC,
-				 &pfifo->playlist[1]);
-	if (ret) {
-		nouveau_gpuobj_ref(NULL, &pfifo->playlist[0]);
-		NV_ERROR(dev, "error creating playlist 1: %d\n", ret);
-		return ret;
-	}
-
-just_reset:
-	nv50_fifo_init_reset(dev);
-	nv50_fifo_init_intr(dev);
-	nv50_fifo_init_context_table(dev);
-	nv50_fifo_init_regs__nv(dev);
-	nv50_fifo_init_regs(dev);
-	nv_wr32(dev, NV03_PFIFO_CACHE1_PUSH0, 1);
-	nv_wr32(dev, NV04_PFIFO_CACHE1_PULL0, 1);
-	nv_wr32(dev, NV03_PFIFO_CACHES, 1);
-
-	return 0;
-}
-
-void
-nv50_fifo_takedown(struct drm_device *dev)
-{
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo;
-
-	NV_DEBUG(dev, "\n");
-
-	if (!pfifo->playlist[0])
-		return;
-
-	nv_wr32(dev, 0x2140, 0x00000000);
-	nouveau_irq_unregister(dev, 8);
-
-	nouveau_gpuobj_ref(NULL, &pfifo->playlist[0]);
-	nouveau_gpuobj_ref(NULL, &pfifo->playlist[1]);
-}
-
-int
-nv50_fifo_create_context(struct nouveau_channel *chan)
-{
+	struct nv50_fifo_priv *priv = nv_engine(chan->dev, engine);
+	struct nv50_fifo_chan *fctx;
 	struct drm_device *dev = chan->dev;
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_gpuobj *ramfc = NULL;
-        uint64_t ib_offset = chan->pushbuf_base + chan->dma.ib_base * 4;
+	u64 ib_offset = chan->pushbuf_base + chan->dma.ib_base * 4;
+	u64 instance = chan->ramin->vinst >> 12;
 	unsigned long flags;
-	int ret;
+	int ret = 0, i;
 
-	NV_DEBUG(dev, "ch%d\n", chan->id);
-
-	if (dev_priv->chipset == 0x50) {
-		ret = nouveau_gpuobj_new_fake(dev, chan->ramin->pinst,
-					      chan->ramin->vinst, 0x100,
-					      NVOBJ_FLAG_ZERO_ALLOC |
-					      NVOBJ_FLAG_ZERO_FREE,
-					      &chan->ramfc);
-		if (ret)
-			return ret;
-
-		ret = nouveau_gpuobj_new_fake(dev, chan->ramin->pinst + 0x0400,
-					      chan->ramin->vinst + 0x0400,
-					      4096, 0, &chan->cache);
-		if (ret)
-			return ret;
-	} else {
-		ret = nouveau_gpuobj_new(dev, chan, 0x100, 256,
-					 NVOBJ_FLAG_ZERO_ALLOC |
-					 NVOBJ_FLAG_ZERO_FREE, &chan->ramfc);
-		if (ret)
-			return ret;
-
-		ret = nouveau_gpuobj_new(dev, chan, 4096, 1024,
-					 0, &chan->cache);
-		if (ret)
-			return ret;
-	}
-	ramfc = chan->ramfc;
+	fctx = chan->engctx[engine] = kzalloc(sizeof(*fctx), GFP_KERNEL);
+	if (!fctx)
+		return -ENOMEM;
+	atomic_inc(&chan->vm->engref[engine]);
 
 	chan->user = ioremap(pci_resource_start(dev->pdev, 0) +
 			     NV50_USER(chan->id), PAGE_SIZE);
-	if (!chan->user)
-		return -ENOMEM;
-
-	spin_lock_irqsave(&dev_priv->context_switch_lock, flags);
-
-	nv_wo32(ramfc, 0x48, chan->pushbuf->cinst >> 4);
-	nv_wo32(ramfc, 0x80, ((chan->ramht->bits - 9) << 27) |
-			     (4 << 24) /* SEARCH_FULL */ |
-			     (chan->ramht->gpuobj->cinst >> 4));
-	nv_wo32(ramfc, 0x44, 0x01003fff);
-	nv_wo32(ramfc, 0x60, 0x7fffffff);
-	nv_wo32(ramfc, 0x40, 0x00000000);
-	nv_wo32(ramfc, 0x7c, 0x30000001);
-	nv_wo32(ramfc, 0x78, 0x00000000);
-	nv_wo32(ramfc, 0x3c, 0x403f6078);
-	nv_wo32(ramfc, 0x50, lower_32_bits(ib_offset));
-	nv_wo32(ramfc, 0x54, upper_32_bits(ib_offset) |
-                drm_order(chan->dma.ib_max + 1) << 16);
-
-	if (dev_priv->chipset != 0x50) {
-		nv_wo32(chan->ramin, 0, chan->id);
-		nv_wo32(chan->ramin, 4, chan->ramfc->vinst >> 8);
-
-		nv_wo32(ramfc, 0x88, chan->cache->vinst >> 10);
-		nv_wo32(ramfc, 0x98, chan->ramin->vinst >> 12);
+	if (!chan->user) {
+		ret = -ENOMEM;
+		goto error;
 	}
 
+	for (i = 0; i < 0x100; i += 4)
+		nv_wo32(chan->ramin, i, 0x00000000);
+	nv_wo32(chan->ramin, 0x3c, 0x403f6078);
+	nv_wo32(chan->ramin, 0x40, 0x00000000);
+	nv_wo32(chan->ramin, 0x44, 0x01003fff);
+	nv_wo32(chan->ramin, 0x48, chan->pushbuf->cinst >> 4);
+	nv_wo32(chan->ramin, 0x50, lower_32_bits(ib_offset));
+	nv_wo32(chan->ramin, 0x54, upper_32_bits(ib_offset) |
+				   drm_order(chan->dma.ib_max + 1) << 16);
+	nv_wo32(chan->ramin, 0x60, 0x7fffffff);
+	nv_wo32(chan->ramin, 0x78, 0x00000000);
+	nv_wo32(chan->ramin, 0x7c, 0x30000001);
+	nv_wo32(chan->ramin, 0x80, ((chan->ramht->bits - 9) << 27) |
+				   (4 << 24) /* SEARCH_FULL */ |
+				   (chan->ramht->gpuobj->cinst >> 4));
+
 	dev_priv->engine.instmem.flush(dev);
 
-	nv50_fifo_channel_enable(dev, chan->id);
+	spin_lock_irqsave(&dev_priv->context_switch_lock, flags);
+	nv_wr32(dev, 0x002600 + (chan->id * 4), 0x80000000 | instance);
 	nv50_fifo_playlist_update(dev);
 	spin_unlock_irqrestore(&dev_priv->context_switch_lock, flags);
-	return 0;
+
+error:
+	if (ret)
+		priv->base.base.context_del(chan, engine);
+	return ret;
 }
 
 static bool
-nv50_fifo_wait_kickoff(void *data)
+nv50_fifo_kickoff(struct nouveau_channel *chan)
 {
-	struct drm_nouveau_private *dev_priv = data;
-	struct drm_device *dev = dev_priv->dev;
+	struct drm_device *dev = chan->dev;
+	bool done = true;
+	u32 me;
 
-	if (dev_priv->chipset == 0x50) {
-		u32 me_enable = nv_mask(dev, 0x00b860, 0x00000001, 0x00000001);
-		nv_wr32(dev, 0x00b860, me_enable);
+	/* HW bug workaround:
+	 *
+	 * PFIFO will hang forever if the connected engines don't report
+	 * that they've processed the context switch request.
+	 *
+	 * In order for the kickoff to work, we need to ensure all the
+	 * connected engines are in a state where they can answer.
+	 *
+	 * Newer chipsets don't seem to suffer from this issue, and well,
+	 * there's also a "ignore these engines" bitmask reg we can use
+	 * if we hit the issue there..
+	 */
+
+	/* PME: make sure engine is enabled */
+	me = nv_mask(dev, 0x00b860, 0x00000001, 0x00000001);
+
+	/* do the kickoff... */
+	nv_wr32(dev, 0x0032fc, chan->ramin->vinst >> 12);
+	if (!nv_wait_ne(dev, 0x0032fc, 0xffffffff, 0xffffffff)) {
+		NV_INFO(dev, "PFIFO: channel %d unload timeout\n", chan->id);
+		done = false;
 	}
 
-	return nv_rd32(dev, 0x0032fc) != 0xffffffff;
+	/* restore any engine states we changed, and exit */
+	nv_wr32(dev, 0x00b860, me);
+	return done;
 }
 
-void
-nv50_fifo_destroy_context(struct nouveau_channel *chan)
+static void
+nv50_fifo_context_del(struct nouveau_channel *chan, int engine)
 {
+	struct nv50_fifo_chan *fctx = chan->engctx[engine];
 	struct drm_device *dev = chan->dev;
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	unsigned long flags;
@@ -319,9 +166,7 @@
 	nv50_fifo_playlist_update(dev);
 
 	/* tell any engines on this channel to unload their contexts */
-	nv_wr32(dev, 0x0032fc, chan->ramin->vinst >> 12);
-	if (!nv_wait_cb(dev, nv50_fifo_wait_kickoff, dev_priv))
-		NV_INFO(dev, "PFIFO: channel %d unload timeout\n", chan->id);
+	nv50_fifo_kickoff(chan);
 
 	nv_wr32(dev, 0x002600 + (chan->id * 4), 0x00000000);
 	spin_unlock_irqrestore(&dev_priv->context_switch_lock, flags);
@@ -332,41 +177,118 @@
 		chan->user = NULL;
 	}
 
-	nouveau_gpuobj_ref(NULL, &chan->ramfc);
-	nouveau_gpuobj_ref(NULL, &chan->cache);
+	atomic_dec(&chan->vm->engref[engine]);
+	chan->engctx[engine] = NULL;
+	kfree(fctx);
 }
 
-int
-nv50_fifo_load_context(struct nouveau_channel *chan)
+static int
+nv50_fifo_init(struct drm_device *dev, int engine)
 {
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	u32 instance;
+	int i;
+
+	nv_mask(dev, 0x000200, 0x00000100, 0x00000000);
+	nv_mask(dev, 0x000200, 0x00000100, 0x00000100);
+	nv_wr32(dev, 0x00250c, 0x6f3cfc34);
+	nv_wr32(dev, 0x002044, 0x01003fff);
+
+	nv_wr32(dev, 0x002100, 0xffffffff);
+	nv_wr32(dev, 0x002140, 0xffffffff);
+
+	for (i = 0; i < 128; i++) {
+		struct nouveau_channel *chan = dev_priv->channels.ptr[i];
+		if (chan && chan->engctx[engine])
+			instance = 0x80000000 | chan->ramin->vinst >> 12;
+		else
+			instance = 0x00000000;
+		nv_wr32(dev, 0x002600 + (i * 4), instance);
+	}
+
+	nv50_fifo_playlist_update(dev);
+
+	nv_wr32(dev, 0x003200, 1);
+	nv_wr32(dev, 0x003250, 1);
+	nv_wr32(dev, 0x002500, 1);
 	return 0;
 }
 
-int
-nv50_fifo_unload_context(struct drm_device *dev)
+static int
+nv50_fifo_fini(struct drm_device *dev, int engine, bool suspend)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nv50_fifo_priv *priv = nv_engine(dev, engine);
 	int i;
 
 	/* set playlist length to zero, fifo will unload context */
 	nv_wr32(dev, 0x0032ec, 0);
 
 	/* tell all connected engines to unload their contexts */
-	for (i = 0; i < dev_priv->engine.fifo.channels; i++) {
+	for (i = 0; i < priv->base.channels; i++) {
 		struct nouveau_channel *chan = dev_priv->channels.ptr[i];
-		if (chan)
-			nv_wr32(dev, 0x0032fc, chan->ramin->vinst >> 12);
-		if (!nv_wait_cb(dev, nv50_fifo_wait_kickoff, dev_priv)) {
-			NV_INFO(dev, "PFIFO: channel %d unload timeout\n", i);
+		if (chan && !nv50_fifo_kickoff(chan))
 			return -EBUSY;
-		}
 	}
 
+	nv_wr32(dev, 0x002140, 0);
 	return 0;
 }
 
 void
-nv50_fifo_tlb_flush(struct drm_device *dev)
+nv50_fifo_tlb_flush(struct drm_device *dev, int engine)
 {
 	nv50_vm_flush_engine(dev, 5);
 }
+
+void
+nv50_fifo_destroy(struct drm_device *dev, int engine)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nv50_fifo_priv *priv = nv_engine(dev, engine);
+
+	nouveau_irq_unregister(dev, 8);
+
+	nouveau_gpuobj_ref(NULL, &priv->playlist[0]);
+	nouveau_gpuobj_ref(NULL, &priv->playlist[1]);
+
+	dev_priv->eng[engine] = NULL;
+	kfree(priv);
+}
+
+int
+nv50_fifo_create(struct drm_device *dev)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nv50_fifo_priv *priv;
+	int ret;
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->base.base.destroy = nv50_fifo_destroy;
+	priv->base.base.init = nv50_fifo_init;
+	priv->base.base.fini = nv50_fifo_fini;
+	priv->base.base.context_new = nv50_fifo_context_new;
+	priv->base.base.context_del = nv50_fifo_context_del;
+	priv->base.base.tlb_flush = nv50_fifo_tlb_flush;
+	priv->base.channels = 127;
+	dev_priv->eng[NVOBJ_ENGINE_FIFO] = &priv->base.base;
+
+	ret = nouveau_gpuobj_new(dev, NULL, priv->base.channels * 4, 0x1000,
+				 NVOBJ_FLAG_ZERO_ALLOC, &priv->playlist[0]);
+	if (ret)
+		goto error;
+
+	ret = nouveau_gpuobj_new(dev, NULL, priv->base.channels * 4, 0x1000,
+				 NVOBJ_FLAG_ZERO_ALLOC, &priv->playlist[1]);
+	if (ret)
+		goto error;
+
+	nouveau_irq_register(dev, 8, nv04_fifo_isr);
+error:
+	if (ret)
+		priv->base.base.destroy(dev, NVOBJ_ENGINE_FIFO);
+	return ret;
+}
diff --git a/drivers/gpu/drm/nouveau/nv50_graph.c b/drivers/gpu/drm/nouveau/nv50_graph.c
index 46b2c95..636e22b 100644
--- a/drivers/gpu/drm/nouveau/nv50_graph.c
+++ b/drivers/gpu/drm/nouveau/nv50_graph.c
@@ -27,6 +27,7 @@
 #include "drmP.h"
 #include "drm.h"
 #include "nouveau_drv.h"
+#include "nouveau_fifo.h"
 #include "nouveau_ramht.h"
 #include "nouveau_dma.h"
 #include "nouveau_vm.h"
@@ -710,13 +711,14 @@
 int
 nv50_graph_isr_chid(struct drm_device *dev, u64 inst)
 {
+	struct nouveau_fifo_priv *pfifo = nv_engine(dev, NVOBJ_ENGINE_FIFO);
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_channel *chan;
 	unsigned long flags;
 	int i;
 
 	spin_lock_irqsave(&dev_priv->channels.lock, flags);
-	for (i = 0; i < dev_priv->engine.fifo.channels; i++) {
+	for (i = 0; i < pfifo->channels; i++) {
 		chan = dev_priv->channels.ptr[i];
 		if (!chan || !chan->ramin)
 			continue;
diff --git a/drivers/gpu/drm/nouveau/nv50_vm.c b/drivers/gpu/drm/nouveau/nv50_vm.c
index 44fbac9..179bb42 100644
--- a/drivers/gpu/drm/nouveau/nv50_vm.c
+++ b/drivers/gpu/drm/nouveau/nv50_vm.c
@@ -147,7 +147,6 @@
 {
 	struct drm_nouveau_private *dev_priv = vm->dev->dev_private;
 	struct nouveau_instmem_engine *pinstmem = &dev_priv->engine.instmem;
-	struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo;
 	int i;
 
 	pinstmem->flush(vm->dev);
@@ -158,7 +157,6 @@
 		return;
 	}
 
-	pfifo->tlb_flush(vm->dev);
 	for (i = 0; i < NVOBJ_ENGINE_NR; i++) {
 		if (atomic_read(&vm->engref[i]))
 			dev_priv->eng[i]->tlb_flush(vm->dev, i);
diff --git a/drivers/gpu/drm/nouveau/nv84_fence.c b/drivers/gpu/drm/nouveau/nv84_fence.c
index 0ac98c0..c2f889b 100644
--- a/drivers/gpu/drm/nouveau/nv84_fence.c
+++ b/drivers/gpu/drm/nouveau/nv84_fence.c
@@ -25,6 +25,7 @@
 #include "drmP.h"
 #include "nouveau_drv.h"
 #include "nouveau_dma.h"
+#include "nouveau_fifo.h"
 #include "nouveau_ramht.h"
 #include "nouveau_fence.h"
 
@@ -145,8 +146,8 @@
 int
 nv84_fence_create(struct drm_device *dev)
 {
+	struct nouveau_fifo_priv *pfifo = nv_engine(dev, NVOBJ_ENGINE_FIFO);
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo;
 	struct nv84_fence_priv *priv;
 	int ret;
 
diff --git a/drivers/gpu/drm/nouveau/nv84_fifo.c b/drivers/gpu/drm/nouveau/nv84_fifo.c
new file mode 100644
index 0000000..cc82d79
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nv84_fifo.c
@@ -0,0 +1,241 @@
+/*
+ * Copyright (C) 2012 Ben Skeggs.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "drmP.h"
+#include "drm.h"
+#include "nouveau_drv.h"
+#include "nouveau_fifo.h"
+#include "nouveau_ramht.h"
+#include "nouveau_vm.h"
+
+struct nv84_fifo_priv {
+	struct nouveau_fifo_priv base;
+	struct nouveau_gpuobj *playlist[2];
+	int cur_playlist;
+};
+
+struct nv84_fifo_chan {
+	struct nouveau_fifo_chan base;
+	struct nouveau_gpuobj *ramfc;
+	struct nouveau_gpuobj *cache;
+};
+
+static int
+nv84_fifo_context_new(struct nouveau_channel *chan, int engine)
+{
+	struct nv84_fifo_priv *priv = nv_engine(chan->dev, engine);
+	struct nv84_fifo_chan *fctx;
+	struct drm_device *dev = chan->dev;
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+        u64 ib_offset = chan->pushbuf_base + chan->dma.ib_base * 4;
+	u64 instance;
+	unsigned long flags;
+	int ret;
+
+	fctx = chan->engctx[engine] = kzalloc(sizeof(*fctx), GFP_KERNEL);
+	if (!fctx)
+		return -ENOMEM;
+	atomic_inc(&chan->vm->engref[engine]);
+
+	chan->user = ioremap(pci_resource_start(dev->pdev, 0) +
+			     NV50_USER(chan->id), PAGE_SIZE);
+	if (!chan->user) {
+		ret = -ENOMEM;
+		goto error;
+	}
+
+	ret = nouveau_gpuobj_new(dev, chan, 256, 256, NVOBJ_FLAG_ZERO_ALLOC |
+				 NVOBJ_FLAG_ZERO_FREE, &fctx->ramfc);
+	if (ret)
+		goto error;
+
+	instance = fctx->ramfc->vinst >> 8;
+
+	ret = nouveau_gpuobj_new(dev, chan, 4096, 1024, 0, &fctx->cache);
+	if (ret)
+		goto error;
+
+	nv_wo32(fctx->ramfc, 0x3c, 0x403f6078);
+	nv_wo32(fctx->ramfc, 0x40, 0x00000000);
+	nv_wo32(fctx->ramfc, 0x44, 0x01003fff);
+	nv_wo32(fctx->ramfc, 0x48, chan->pushbuf->cinst >> 4);
+	nv_wo32(fctx->ramfc, 0x50, lower_32_bits(ib_offset));
+	nv_wo32(fctx->ramfc, 0x54, upper_32_bits(ib_offset) |
+				   drm_order(chan->dma.ib_max + 1) << 16);
+	nv_wo32(fctx->ramfc, 0x60, 0x7fffffff);
+	nv_wo32(fctx->ramfc, 0x78, 0x00000000);
+	nv_wo32(fctx->ramfc, 0x7c, 0x30000001);
+	nv_wo32(fctx->ramfc, 0x80, ((chan->ramht->bits - 9) << 27) |
+				   (4 << 24) /* SEARCH_FULL */ |
+				   (chan->ramht->gpuobj->cinst >> 4));
+	nv_wo32(fctx->ramfc, 0x88, fctx->cache->vinst >> 10);
+	nv_wo32(fctx->ramfc, 0x98, chan->ramin->vinst >> 12);
+
+	nv_wo32(chan->ramin, 0x00, chan->id);
+	nv_wo32(chan->ramin, 0x04, fctx->ramfc->vinst >> 8);
+
+	dev_priv->engine.instmem.flush(dev);
+
+	spin_lock_irqsave(&dev_priv->context_switch_lock, flags);
+	nv_wr32(dev, 0x002600 + (chan->id * 4), 0x80000000 | instance);
+	nv50_fifo_playlist_update(dev);
+	spin_unlock_irqrestore(&dev_priv->context_switch_lock, flags);
+
+error:
+	if (ret)
+		priv->base.base.context_del(chan, engine);
+	return ret;
+}
+
+static void
+nv84_fifo_context_del(struct nouveau_channel *chan, int engine)
+{
+	struct nv84_fifo_chan *fctx = chan->engctx[engine];
+	struct drm_device *dev = chan->dev;
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	unsigned long flags;
+
+	/* remove channel from playlist, will context switch if active */
+	spin_lock_irqsave(&dev_priv->context_switch_lock, flags);
+	nv_mask(dev, 0x002600 + (chan->id * 4), 0x80000000, 0x00000000);
+	nv50_fifo_playlist_update(dev);
+
+	/* tell any engines on this channel to unload their contexts */
+	nv_wr32(dev, 0x0032fc, chan->ramin->vinst >> 12);
+	if (!nv_wait_ne(dev, 0x0032fc, 0xffffffff, 0xffffffff))
+		NV_INFO(dev, "PFIFO: channel %d unload timeout\n", chan->id);
+
+	nv_wr32(dev, 0x002600 + (chan->id * 4), 0x00000000);
+	spin_unlock_irqrestore(&dev_priv->context_switch_lock, flags);
+
+	/* clean up */
+	if (chan->user) {
+		iounmap(chan->user);
+		chan->user = NULL;
+	}
+
+	nouveau_gpuobj_ref(NULL, &fctx->ramfc);
+	nouveau_gpuobj_ref(NULL, &fctx->cache);
+
+	atomic_dec(&chan->vm->engref[engine]);
+	chan->engctx[engine] = NULL;
+	kfree(fctx);
+}
+
+static int
+nv84_fifo_init(struct drm_device *dev, int engine)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nv84_fifo_chan *fctx;
+	u32 instance;
+	int i;
+
+	nv_mask(dev, 0x000200, 0x00000100, 0x00000000);
+	nv_mask(dev, 0x000200, 0x00000100, 0x00000100);
+	nv_wr32(dev, 0x00250c, 0x6f3cfc34);
+	nv_wr32(dev, 0x002044, 0x01003fff);
+
+	nv_wr32(dev, 0x002100, 0xffffffff);
+	nv_wr32(dev, 0x002140, 0xffffffff);
+
+	for (i = 0; i < 128; i++) {
+		struct nouveau_channel *chan = dev_priv->channels.ptr[i];
+		if (chan && (fctx = chan->engctx[engine]))
+			instance = 0x80000000 | fctx->ramfc->vinst >> 8;
+		else
+			instance = 0x00000000;
+		nv_wr32(dev, 0x002600 + (i * 4), instance);
+	}
+
+	nv50_fifo_playlist_update(dev);
+
+	nv_wr32(dev, 0x003200, 1);
+	nv_wr32(dev, 0x003250, 1);
+	nv_wr32(dev, 0x002500, 1);
+	return 0;
+}
+
+static int
+nv84_fifo_fini(struct drm_device *dev, int engine, bool suspend)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nv84_fifo_priv *priv = nv_engine(dev, engine);
+	int i;
+
+	/* set playlist length to zero, fifo will unload context */
+	nv_wr32(dev, 0x0032ec, 0);
+
+	/* tell all connected engines to unload their contexts */
+	for (i = 0; i < priv->base.channels; i++) {
+		struct nouveau_channel *chan = dev_priv->channels.ptr[i];
+		if (chan)
+			nv_wr32(dev, 0x0032fc, chan->ramin->vinst >> 12);
+		if (!nv_wait_ne(dev, 0x0032fc, 0xffffffff, 0xffffffff)) {
+			NV_INFO(dev, "PFIFO: channel %d unload timeout\n", i);
+			return -EBUSY;
+		}
+	}
+
+	nv_wr32(dev, 0x002140, 0);
+	return 0;
+}
+
+int
+nv84_fifo_create(struct drm_device *dev)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nv84_fifo_priv *priv;
+	int ret;
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->base.base.destroy = nv50_fifo_destroy;
+	priv->base.base.init = nv84_fifo_init;
+	priv->base.base.fini = nv84_fifo_fini;
+	priv->base.base.context_new = nv84_fifo_context_new;
+	priv->base.base.context_del = nv84_fifo_context_del;
+	priv->base.base.tlb_flush = nv50_fifo_tlb_flush;
+	priv->base.channels = 127;
+	dev_priv->eng[NVOBJ_ENGINE_FIFO] = &priv->base.base;
+
+	ret = nouveau_gpuobj_new(dev, NULL, priv->base.channels * 4, 0x1000,
+				 NVOBJ_FLAG_ZERO_ALLOC, &priv->playlist[0]);
+	if (ret)
+		goto error;
+
+	ret = nouveau_gpuobj_new(dev, NULL, priv->base.channels * 4, 0x1000,
+				 NVOBJ_FLAG_ZERO_ALLOC, &priv->playlist[1]);
+	if (ret)
+		goto error;
+
+	nouveau_irq_register(dev, 8, nv04_fifo_isr);
+error:
+	if (ret)
+		priv->base.base.destroy(dev, NVOBJ_ENGINE_FIFO);
+	return ret;
+}
diff --git a/drivers/gpu/drm/nouveau/nvc0_fence.c b/drivers/gpu/drm/nouveau/nvc0_fence.c
index 817228c..47ab388 100644
--- a/drivers/gpu/drm/nouveau/nvc0_fence.c
+++ b/drivers/gpu/drm/nouveau/nvc0_fence.c
@@ -25,6 +25,7 @@
 #include "drmP.h"
 #include "nouveau_drv.h"
 #include "nouveau_dma.h"
+#include "nouveau_fifo.h"
 #include "nouveau_ramht.h"
 #include "nouveau_fence.h"
 
@@ -148,8 +149,8 @@
 int
 nvc0_fence_create(struct drm_device *dev)
 {
+	struct nouveau_fifo_priv *pfifo = nv_engine(dev, NVOBJ_ENGINE_FIFO);
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo;
 	struct nvc0_fence_priv *priv;
 	int ret;
 
diff --git a/drivers/gpu/drm/nouveau/nvc0_fifo.c b/drivers/gpu/drm/nouveau/nvc0_fifo.c
index 471723e..7d85553 100644
--- a/drivers/gpu/drm/nouveau/nvc0_fifo.c
+++ b/drivers/gpu/drm/nouveau/nvc0_fifo.c
@@ -26,10 +26,12 @@
 
 #include "nouveau_drv.h"
 #include "nouveau_mm.h"
+#include "nouveau_fifo.h"
 
 static void nvc0_fifo_isr(struct drm_device *);
 
 struct nvc0_fifo_priv {
+	struct nouveau_fifo_priv base;
 	struct nouveau_gpuobj *playlist[2];
 	int cur_playlist;
 	struct nouveau_vma user_vma;
@@ -37,8 +39,8 @@
 };
 
 struct nvc0_fifo_chan {
+	struct nouveau_fifo_chan base;
 	struct nouveau_gpuobj *user;
-	struct nouveau_gpuobj *ramfc;
 };
 
 static void
@@ -46,8 +48,7 @@
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_instmem_engine *pinstmem = &dev_priv->engine.instmem;
-	struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo;
-	struct nvc0_fifo_priv *priv = pfifo->priv;
+	struct nvc0_fifo_priv *priv = nv_engine(dev, NVOBJ_ENGINE_FIFO);
 	struct nouveau_gpuobj *cur;
 	int i, p;
 
@@ -69,31 +70,20 @@
 		NV_ERROR(dev, "PFIFO - playlist update failed\n");
 }
 
-int
-nvc0_fifo_create_context(struct nouveau_channel *chan)
+static int
+nvc0_fifo_context_new(struct nouveau_channel *chan, int engine)
 {
 	struct drm_device *dev = chan->dev;
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_instmem_engine *pinstmem = &dev_priv->engine.instmem;
-	struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo;
-	struct nvc0_fifo_priv *priv = pfifo->priv;
-	struct nvc0_fifo_chan *fifoch;
+	struct nvc0_fifo_priv *priv = nv_engine(dev, engine);
+	struct nvc0_fifo_chan *fctx;
 	u64 ib_virt = chan->pushbuf_base + chan->dma.ib_base * 4;
-	int ret;
+	int ret, i;
 
-	chan->fifo_priv = kzalloc(sizeof(*fifoch), GFP_KERNEL);
-	if (!chan->fifo_priv)
+	fctx = chan->engctx[engine] = kzalloc(sizeof(*fctx), GFP_KERNEL);
+	if (!fctx)
 		return -ENOMEM;
-	fifoch = chan->fifo_priv;
-
-	/* allocate vram for control regs, map into polling area */
-	ret = nouveau_gpuobj_new(dev, NULL, 0x1000, 0x1000,
-				 NVOBJ_FLAG_ZERO_ALLOC, &fifoch->user);
-	if (ret)
-		goto error;
-
-	nouveau_vm_map_at(&priv->user_vma, chan->id * 0x1000,
-			  *(struct nouveau_mem **)fifoch->user->node);
 
 	chan->user = ioremap_wc(pci_resource_start(dev->pdev, 1) +
 				priv->user_vma.offset + (chan->id * 0x1000),
@@ -103,175 +93,77 @@
 		goto error;
 	}
 
-	/* ramfc */
-	ret = nouveau_gpuobj_new_fake(dev, chan->ramin->pinst,
-				      chan->ramin->vinst, 0x100,
-				      NVOBJ_FLAG_ZERO_ALLOC, &fifoch->ramfc);
+	/* allocate vram for control regs, map into polling area */
+	ret = nouveau_gpuobj_new(dev, NULL, 0x1000, 0x1000,
+				 NVOBJ_FLAG_ZERO_ALLOC, &fctx->user);
 	if (ret)
 		goto error;
 
-	nv_wo32(fifoch->ramfc, 0x08, lower_32_bits(fifoch->user->vinst));
-	nv_wo32(fifoch->ramfc, 0x0c, upper_32_bits(fifoch->user->vinst));
-	nv_wo32(fifoch->ramfc, 0x10, 0x0000face);
-	nv_wo32(fifoch->ramfc, 0x30, 0xfffff902);
-	nv_wo32(fifoch->ramfc, 0x48, lower_32_bits(ib_virt));
-	nv_wo32(fifoch->ramfc, 0x4c, drm_order(chan->dma.ib_max + 1) << 16 |
+	nouveau_vm_map_at(&priv->user_vma, chan->id * 0x1000,
+			  *(struct nouveau_mem **)fctx->user->node);
+
+	for (i = 0; i < 0x100; i += 4)
+		nv_wo32(chan->ramin, i, 0x00000000);
+	nv_wo32(chan->ramin, 0x08, lower_32_bits(fctx->user->vinst));
+	nv_wo32(chan->ramin, 0x0c, upper_32_bits(fctx->user->vinst));
+	nv_wo32(chan->ramin, 0x10, 0x0000face);
+	nv_wo32(chan->ramin, 0x30, 0xfffff902);
+	nv_wo32(chan->ramin, 0x48, lower_32_bits(ib_virt));
+	nv_wo32(chan->ramin, 0x4c, drm_order(chan->dma.ib_max + 1) << 16 |
 				   upper_32_bits(ib_virt));
-	nv_wo32(fifoch->ramfc, 0x54, 0x00000002);
-	nv_wo32(fifoch->ramfc, 0x84, 0x20400000);
-	nv_wo32(fifoch->ramfc, 0x94, 0x30000001);
-	nv_wo32(fifoch->ramfc, 0x9c, 0x00000100);
-	nv_wo32(fifoch->ramfc, 0xa4, 0x1f1f1f1f);
-	nv_wo32(fifoch->ramfc, 0xa8, 0x1f1f1f1f);
-	nv_wo32(fifoch->ramfc, 0xac, 0x0000001f);
-	nv_wo32(fifoch->ramfc, 0xb8, 0xf8000000);
-	nv_wo32(fifoch->ramfc, 0xf8, 0x10003080); /* 0x002310 */
-	nv_wo32(fifoch->ramfc, 0xfc, 0x10000010); /* 0x002350 */
+	nv_wo32(chan->ramin, 0x54, 0x00000002);
+	nv_wo32(chan->ramin, 0x84, 0x20400000);
+	nv_wo32(chan->ramin, 0x94, 0x30000001);
+	nv_wo32(chan->ramin, 0x9c, 0x00000100);
+	nv_wo32(chan->ramin, 0xa4, 0x1f1f1f1f);
+	nv_wo32(chan->ramin, 0xa8, 0x1f1f1f1f);
+	nv_wo32(chan->ramin, 0xac, 0x0000001f);
+	nv_wo32(chan->ramin, 0xb8, 0xf8000000);
+	nv_wo32(chan->ramin, 0xf8, 0x10003080); /* 0x002310 */
+	nv_wo32(chan->ramin, 0xfc, 0x10000010); /* 0x002350 */
 	pinstmem->flush(dev);
 
 	nv_wr32(dev, 0x003000 + (chan->id * 8), 0xc0000000 |
 						(chan->ramin->vinst >> 12));
 	nv_wr32(dev, 0x003004 + (chan->id * 8), 0x001f0001);
 	nvc0_fifo_playlist_update(dev);
-	return 0;
 
 error:
-	pfifo->destroy_context(chan);
+	if (ret)
+		priv->base.base.context_del(chan, engine);
 	return ret;
 }
 
-void
-nvc0_fifo_destroy_context(struct nouveau_channel *chan)
+static void
+nvc0_fifo_context_del(struct nouveau_channel *chan, int engine)
 {
+	struct nvc0_fifo_chan *fctx = chan->engctx[engine];
 	struct drm_device *dev = chan->dev;
-	struct nvc0_fifo_chan *fifoch;
 
 	nv_mask(dev, 0x003004 + (chan->id * 8), 0x00000001, 0x00000000);
 	nv_wr32(dev, 0x002634, chan->id);
 	if (!nv_wait(dev, 0x0002634, 0xffffffff, chan->id))
 		NV_WARN(dev, "0x2634 != chid: 0x%08x\n", nv_rd32(dev, 0x2634));
-
 	nvc0_fifo_playlist_update(dev);
-
 	nv_wr32(dev, 0x003000 + (chan->id * 8), 0x00000000);
 
+	nouveau_gpuobj_ref(NULL, &fctx->user);
 	if (chan->user) {
 		iounmap(chan->user);
 		chan->user = NULL;
 	}
 
-	fifoch = chan->fifo_priv;
-	chan->fifo_priv = NULL;
-	if (!fifoch)
-		return;
-
-	nouveau_gpuobj_ref(NULL, &fifoch->ramfc);
-	nouveau_gpuobj_ref(NULL, &fifoch->user);
-	kfree(fifoch);
-}
-
-int
-nvc0_fifo_load_context(struct nouveau_channel *chan)
-{
-	return 0;
-}
-
-int
-nvc0_fifo_unload_context(struct drm_device *dev)
-{
-	int i;
-
-	for (i = 0; i < 128; i++) {
-		if (!(nv_rd32(dev, 0x003004 + (i * 8)) & 1))
-			continue;
-
-		nv_mask(dev, 0x003004 + (i * 8), 0x00000001, 0x00000000);
-		nv_wr32(dev, 0x002634, i);
-		if (!nv_wait(dev, 0x002634, 0xffffffff, i)) {
-			NV_INFO(dev, "PFIFO: kick ch %d failed: 0x%08x\n",
-				i, nv_rd32(dev, 0x002634));
-			return -EBUSY;
-		}
-	}
-
-	return 0;
-}
-
-static void
-nvc0_fifo_destroy(struct drm_device *dev)
-{
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo;
-	struct nvc0_fifo_priv *priv;
-
-	priv = pfifo->priv;
-	if (!priv)
-		return;
-
-	nouveau_vm_put(&priv->user_vma);
-	nouveau_gpuobj_ref(NULL, &priv->playlist[1]);
-	nouveau_gpuobj_ref(NULL, &priv->playlist[0]);
-	kfree(priv);
-}
-
-void
-nvc0_fifo_takedown(struct drm_device *dev)
-{
-	nv_wr32(dev, 0x002140, 0x00000000);
-	nvc0_fifo_destroy(dev);
+	chan->engctx[engine] = NULL;
+	kfree(fctx);
 }
 
 static int
-nvc0_fifo_create(struct drm_device *dev)
+nvc0_fifo_init(struct drm_device *dev, int engine)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo;
-	struct nvc0_fifo_priv *priv;
-	int ret;
-
-	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
-	if (!priv)
-		return -ENOMEM;
-	pfifo->priv = priv;
-
-	ret = nouveau_gpuobj_new(dev, NULL, 0x1000, 0x1000, 0,
-				 &priv->playlist[0]);
-	if (ret)
-		goto error;
-
-	ret = nouveau_gpuobj_new(dev, NULL, 0x1000, 0x1000, 0,
-				 &priv->playlist[1]);
-	if (ret)
-		goto error;
-
-	ret = nouveau_vm_get(dev_priv->bar1_vm, pfifo->channels * 0x1000,
-			     12, NV_MEM_ACCESS_RW, &priv->user_vma);
-	if (ret)
-		goto error;
-
-	nouveau_irq_register(dev, 8, nvc0_fifo_isr);
-	return 0;
-
-error:
-	nvc0_fifo_destroy(dev);
-	return ret;
-}
-
-int
-nvc0_fifo_init(struct drm_device *dev)
-{
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo;
+	struct nvc0_fifo_priv *priv = nv_engine(dev, engine);
 	struct nouveau_channel *chan;
-	struct nvc0_fifo_priv *priv;
-	int ret, i;
-
-	if (!pfifo->priv) {
-		ret = nvc0_fifo_create(dev);
-		if (ret)
-			return ret;
-	}
-	priv = pfifo->priv;
+	int i;
 
 	/* reset PFIFO, enable all available PSUBFIFO areas */
 	nv_mask(dev, 0x000200, 0x00000100, 0x00000000);
@@ -309,7 +201,7 @@
 	/* restore PFIFO context table */
 	for (i = 0; i < 128; i++) {
 		chan = dev_priv->channels.ptr[i];
-		if (!chan || !chan->fifo_priv)
+		if (!chan || !chan->engctx[engine])
 			continue;
 
 		nv_wr32(dev, 0x003000 + (i * 8), 0xc0000000 |
@@ -321,6 +213,29 @@
 	return 0;
 }
 
+static int
+nvc0_fifo_fini(struct drm_device *dev, int engine, bool suspend)
+{
+	int i;
+
+	for (i = 0; i < 128; i++) {
+		if (!(nv_rd32(dev, 0x003004 + (i * 8)) & 1))
+			continue;
+
+		nv_mask(dev, 0x003004 + (i * 8), 0x00000001, 0x00000000);
+		nv_wr32(dev, 0x002634, i);
+		if (!nv_wait(dev, 0x002634, 0xffffffff, i)) {
+			NV_INFO(dev, "PFIFO: kick ch %d failed: 0x%08x\n",
+				i, nv_rd32(dev, 0x002634));
+			return -EBUSY;
+		}
+	}
+
+	nv_wr32(dev, 0x002140, 0x00000000);
+	return 0;
+}
+
+
 struct nouveau_enum nvc0_fifo_fault_unit[] = {
 	{ 0x00, "PGRAPH" },
 	{ 0x03, "PEEPHOLE" },
@@ -410,13 +325,14 @@
 static int
 nvc0_fifo_page_flip(struct drm_device *dev, u32 chid)
 {
+	struct nvc0_fifo_priv *priv = nv_engine(dev, NVOBJ_ENGINE_FIFO);
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_channel *chan = NULL;
 	unsigned long flags;
 	int ret = -EINVAL;
 
 	spin_lock_irqsave(&dev_priv->channels.lock, flags);
-	if (likely(chid >= 0 && chid < dev_priv->engine.fifo.channels)) {
+	if (likely(chid >= 0 && chid < priv->base.channels)) {
 		chan = dev_priv->channels.ptr[chid];
 		if (likely(chan))
 			ret = nouveau_finish_page_flip(chan, NULL);
@@ -505,3 +421,56 @@
 		nv_wr32(dev, 0x002140, 0);
 	}
 }
+
+static void
+nvc0_fifo_destroy(struct drm_device *dev, int engine)
+{
+	struct nvc0_fifo_priv *priv = nv_engine(dev, NVOBJ_ENGINE_FIFO);
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+
+	nouveau_vm_put(&priv->user_vma);
+	nouveau_gpuobj_ref(NULL, &priv->playlist[1]);
+	nouveau_gpuobj_ref(NULL, &priv->playlist[0]);
+
+	dev_priv->eng[engine] = NULL;
+	kfree(priv);
+}
+
+int
+nvc0_fifo_create(struct drm_device *dev)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nvc0_fifo_priv *priv;
+	int ret;
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->base.base.destroy = nvc0_fifo_destroy;
+	priv->base.base.init = nvc0_fifo_init;
+	priv->base.base.fini = nvc0_fifo_fini;
+	priv->base.base.context_new = nvc0_fifo_context_new;
+	priv->base.base.context_del = nvc0_fifo_context_del;
+	priv->base.channels = 128;
+	dev_priv->eng[NVOBJ_ENGINE_FIFO] = &priv->base.base;
+
+	ret = nouveau_gpuobj_new(dev, NULL, 4096, 4096, 0, &priv->playlist[0]);
+	if (ret)
+		goto error;
+
+	ret = nouveau_gpuobj_new(dev, NULL, 4096, 4096, 0, &priv->playlist[1]);
+	if (ret)
+		goto error;
+
+	ret = nouveau_vm_get(dev_priv->bar1_vm, priv->base.channels * 0x1000,
+			     12, NV_MEM_ACCESS_RW, &priv->user_vma);
+	if (ret)
+		goto error;
+
+	nouveau_irq_register(dev, 8, nvc0_fifo_isr);
+error:
+	if (ret)
+		priv->base.base.destroy(dev, NVOBJ_ENGINE_FIFO);
+	return ret;
+}
diff --git a/drivers/gpu/drm/nouveau/nvc0_graph.c b/drivers/gpu/drm/nouveau/nvc0_graph.c
index 9066102..2a01e6e 100644
--- a/drivers/gpu/drm/nouveau/nvc0_graph.c
+++ b/drivers/gpu/drm/nouveau/nvc0_graph.c
@@ -29,6 +29,7 @@
 
 #include "nouveau_drv.h"
 #include "nouveau_mm.h"
+#include "nouveau_fifo.h"
 
 #include "nvc0_graph.h"
 #include "nvc0_grhub.fuc.h"
@@ -620,13 +621,14 @@
 int
 nvc0_graph_isr_chid(struct drm_device *dev, u64 inst)
 {
+	struct nouveau_fifo_priv *pfifo = nv_engine(dev, NVOBJ_ENGINE_FIFO);
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_channel *chan;
 	unsigned long flags;
 	int i;
 
 	spin_lock_irqsave(&dev_priv->channels.lock, flags);
-	for (i = 0; i < dev_priv->engine.fifo.channels; i++) {
+	for (i = 0; i < pfifo->channels; i++) {
 		chan = dev_priv->channels.ptr[i];
 		if (!chan || !chan->ramin)
 			continue;
diff --git a/drivers/gpu/drm/nouveau/nve0_fifo.c b/drivers/gpu/drm/nouveau/nve0_fifo.c
index 52c54e0..1855ecbd 100644
--- a/drivers/gpu/drm/nouveau/nve0_fifo.c
+++ b/drivers/gpu/drm/nouveau/nve0_fifo.c
@@ -26,6 +26,7 @@
 
 #include "nouveau_drv.h"
 #include "nouveau_mm.h"
+#include "nouveau_fifo.h"
 
 #define NVE0_FIFO_ENGINE_NUM 32
 
@@ -37,6 +38,7 @@
 };
 
 struct nve0_fifo_priv {
+	struct nouveau_fifo_priv base;
 	struct nve0_fifo_engine engine[NVE0_FIFO_ENGINE_NUM];
 	struct {
 		struct nouveau_gpuobj *mem;
@@ -46,7 +48,7 @@
 };
 
 struct nve0_fifo_chan {
-	struct nouveau_gpuobj *ramfc;
+	struct nouveau_fifo_chan base;
 	u32 engine;
 };
 
@@ -55,8 +57,7 @@
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_instmem_engine *pinstmem = &dev_priv->engine.instmem;
-	struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo;
-	struct nve0_fifo_priv *priv = pfifo->priv;
+	struct nve0_fifo_priv *priv = nv_engine(dev, NVOBJ_ENGINE_FIFO);
 	struct nve0_fifo_engine *peng = &priv->engine[engine];
 	struct nouveau_gpuobj *cur;
 	u32 match = (engine << 16) | 0x00000001;
@@ -75,7 +76,7 @@
 
 	peng->cur_playlist = !peng->cur_playlist;
 
-	for (i = 0, p = 0; i < pfifo->channels; i++) {
+	for (i = 0, p = 0; i < priv->base.channels; i++) {
 		u32 ctrl = nv_rd32(dev, 0x800004 + (i * 8)) & 0x001f0001;
 		if (ctrl != match)
 			continue;
@@ -91,24 +92,23 @@
 		NV_ERROR(dev, "PFIFO: playlist %d update timeout\n", engine);
 }
 
-int
-nve0_fifo_create_context(struct nouveau_channel *chan)
+static int
+nve0_fifo_context_new(struct nouveau_channel *chan, int engine)
 {
 	struct drm_device *dev = chan->dev;
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_instmem_engine *pinstmem = &dev_priv->engine.instmem;
-	struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo;
-	struct nve0_fifo_priv *priv = pfifo->priv;
-	struct nve0_fifo_chan *fifoch;
+	struct nve0_fifo_priv *priv = nv_engine(dev, engine);
+	struct nve0_fifo_chan *fctx;
 	u64 usermem = priv->user.mem->vinst + chan->id * 512;
 	u64 ib_virt = chan->pushbuf_base + chan->dma.ib_base * 4;
-	int ret;
+	int ret = 0, i;
 
-	chan->fifo_priv = kzalloc(sizeof(*fifoch), GFP_KERNEL);
-	if (!chan->fifo_priv)
+	fctx = chan->engctx[engine] = kzalloc(sizeof(*fctx), GFP_KERNEL);
+	if (!fctx)
 		return -ENOMEM;
-	fifoch = chan->fifo_priv;
-	fifoch->engine = 0; /* PGRAPH */
+
+	fctx->engine = 0; /* PGRAPH */
 
 	/* allocate vram for control regs, map into polling area */
 	chan->user = ioremap_wc(pci_resource_start(dev->pdev, 1) +
@@ -118,56 +118,48 @@
 		goto error;
 	}
 
-	/* ramfc */
-	ret = nouveau_gpuobj_new_fake(dev, chan->ramin->pinst,
-				      chan->ramin->vinst, 0x100,
-				      NVOBJ_FLAG_ZERO_ALLOC, &fifoch->ramfc);
-	if (ret)
-		goto error;
-
-	nv_wo32(fifoch->ramfc, 0x08, lower_32_bits(usermem));
-	nv_wo32(fifoch->ramfc, 0x0c, upper_32_bits(usermem));
-	nv_wo32(fifoch->ramfc, 0x10, 0x0000face);
-	nv_wo32(fifoch->ramfc, 0x30, 0xfffff902);
-	nv_wo32(fifoch->ramfc, 0x48, lower_32_bits(ib_virt));
-	nv_wo32(fifoch->ramfc, 0x4c, drm_order(chan->dma.ib_max + 1) << 16 |
+	for (i = 0; i < 0x100; i += 4)
+		nv_wo32(chan->ramin, i, 0x00000000);
+	nv_wo32(chan->ramin, 0x08, lower_32_bits(usermem));
+	nv_wo32(chan->ramin, 0x0c, upper_32_bits(usermem));
+	nv_wo32(chan->ramin, 0x10, 0x0000face);
+	nv_wo32(chan->ramin, 0x30, 0xfffff902);
+	nv_wo32(chan->ramin, 0x48, lower_32_bits(ib_virt));
+	nv_wo32(chan->ramin, 0x4c, drm_order(chan->dma.ib_max + 1) << 16 |
 				     upper_32_bits(ib_virt));
-	nv_wo32(fifoch->ramfc, 0x84, 0x20400000);
-	nv_wo32(fifoch->ramfc, 0x94, 0x30000001);
-	nv_wo32(fifoch->ramfc, 0x9c, 0x00000100);
-	nv_wo32(fifoch->ramfc, 0xac, 0x0000001f);
-	nv_wo32(fifoch->ramfc, 0xe4, 0x00000000);
-	nv_wo32(fifoch->ramfc, 0xe8, chan->id);
-	nv_wo32(fifoch->ramfc, 0xf8, 0x10003080); /* 0x002310 */
-	nv_wo32(fifoch->ramfc, 0xfc, 0x10000010); /* 0x002350 */
+	nv_wo32(chan->ramin, 0x84, 0x20400000);
+	nv_wo32(chan->ramin, 0x94, 0x30000001);
+	nv_wo32(chan->ramin, 0x9c, 0x00000100);
+	nv_wo32(chan->ramin, 0xac, 0x0000001f);
+	nv_wo32(chan->ramin, 0xe4, 0x00000000);
+	nv_wo32(chan->ramin, 0xe8, chan->id);
+	nv_wo32(chan->ramin, 0xf8, 0x10003080); /* 0x002310 */
+	nv_wo32(chan->ramin, 0xfc, 0x10000010); /* 0x002350 */
 	pinstmem->flush(dev);
 
 	nv_wr32(dev, 0x800000 + (chan->id * 8), 0x80000000 |
 						(chan->ramin->vinst >> 12));
 	nv_mask(dev, 0x800004 + (chan->id * 8), 0x00000400, 0x00000400);
-	nve0_fifo_playlist_update(dev, fifoch->engine);
+	nve0_fifo_playlist_update(dev, fctx->engine);
 	nv_mask(dev, 0x800004 + (chan->id * 8), 0x00000400, 0x00000400);
-	return 0;
 
 error:
-	pfifo->destroy_context(chan);
+	if (ret)
+		priv->base.base.context_del(chan, engine);
 	return ret;
 }
 
-void
-nve0_fifo_destroy_context(struct nouveau_channel *chan)
+static void
+nve0_fifo_context_del(struct nouveau_channel *chan, int engine)
 {
-	struct nve0_fifo_chan *fifoch = chan->fifo_priv;
+	struct nve0_fifo_chan *fctx = chan->engctx[engine];
 	struct drm_device *dev = chan->dev;
 
-	if (!fifoch)
-		return;
-
 	nv_mask(dev, 0x800004 + (chan->id * 8), 0x00000800, 0x00000800);
 	nv_wr32(dev, 0x002634, chan->id);
 	if (!nv_wait(dev, 0x0002634, 0xffffffff, chan->id))
 		NV_WARN(dev, "0x2634 != chid: 0x%08x\n", nv_rd32(dev, 0x2634));
-	nve0_fifo_playlist_update(dev, fifoch->engine);
+	nve0_fifo_playlist_update(dev, fctx->engine);
 	nv_wr32(dev, 0x800000 + (chan->id * 8), 0x00000000);
 
 	if (chan->user) {
@@ -175,118 +167,17 @@
 		chan->user = NULL;
 	}
 
-	nouveau_gpuobj_ref(NULL, &fifoch->ramfc);
-	chan->fifo_priv = NULL;
-	kfree(fifoch);
-}
-
-int
-nve0_fifo_load_context(struct nouveau_channel *chan)
-{
-	return 0;
-}
-
-int
-nve0_fifo_unload_context(struct drm_device *dev)
-{
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo;
-	int i;
-
-	for (i = 0; i < pfifo->channels; i++) {
-		if (!(nv_rd32(dev, 0x800004 + (i * 8)) & 1))
-			continue;
-
-		nv_mask(dev, 0x800004 + (i * 8), 0x00000800, 0x00000800);
-		nv_wr32(dev, 0x002634, i);
-		if (!nv_wait(dev, 0x002634, 0xffffffff, i)) {
-			NV_INFO(dev, "PFIFO: kick ch %d failed: 0x%08x\n",
-				i, nv_rd32(dev, 0x002634));
-			return -EBUSY;
-		}
-	}
-
-	return 0;
-}
-
-static void
-nve0_fifo_destroy(struct drm_device *dev)
-{
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo;
-	struct nve0_fifo_priv *priv;
-	int i;
-
-	priv = pfifo->priv;
-	if (!priv)
-		return;
-
-	nouveau_vm_put(&priv->user.bar);
-	nouveau_gpuobj_ref(NULL, &priv->user.mem);
-
-	for (i = 0; i < NVE0_FIFO_ENGINE_NUM; i++) {
-		nouveau_gpuobj_ref(NULL, &priv->engine[i].playlist[0]);
-		nouveau_gpuobj_ref(NULL, &priv->engine[i].playlist[1]);
-	}
-	kfree(priv);
-}
-
-void
-nve0_fifo_takedown(struct drm_device *dev)
-{
-	nv_wr32(dev, 0x002140, 0x00000000);
-	nve0_fifo_destroy(dev);
+	chan->engctx[NVOBJ_ENGINE_FIFO] = NULL;
+	kfree(fctx);
 }
 
 static int
-nve0_fifo_create(struct drm_device *dev)
+nve0_fifo_init(struct drm_device *dev, int engine)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo;
-	struct nve0_fifo_priv *priv;
-	int ret;
-
-	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
-	if (!priv)
-		return -ENOMEM;
-	pfifo->priv = priv;
-
-	ret = nouveau_gpuobj_new(dev, NULL, pfifo->channels * 512, 0x1000,
-				 NVOBJ_FLAG_ZERO_ALLOC, &priv->user.mem);
-	if (ret)
-		goto error;
-
-	ret = nouveau_vm_get(dev_priv->bar1_vm, priv->user.mem->size,
-			     12, NV_MEM_ACCESS_RW, &priv->user.bar);
-	if (ret)
-		goto error;
-
-	nouveau_vm_map(&priv->user.bar, *(struct nouveau_mem **)priv->user.mem->node);
-
-	nouveau_irq_register(dev, 8, nve0_fifo_isr);
-	return 0;
-
-error:
-	nve0_fifo_destroy(dev);
-	return ret;
-}
-
-int
-nve0_fifo_init(struct drm_device *dev)
-{
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo;
-	struct nouveau_channel *chan;
-	struct nve0_fifo_chan *fifoch;
-	struct nve0_fifo_priv *priv;
-	int ret, i;
-
-	if (!pfifo->priv) {
-		ret = nve0_fifo_create(dev);
-		if (ret)
-			return ret;
-	}
-	priv = pfifo->priv;
+	struct nve0_fifo_priv *priv = nv_engine(dev, engine);
+	struct nve0_fifo_chan *fctx;
+	int i;
 
 	/* reset PFIFO, enable all available PSUBFIFO areas */
 	nv_mask(dev, 0x000200, 0x00000100, 0x00000000);
@@ -310,22 +201,44 @@
 	nv_wr32(dev, 0x002140, 0xbfffffff);
 
 	/* restore PFIFO context table */
-	for (i = 0; i < pfifo->channels; i++) {
-		chan = dev_priv->channels.ptr[i];
-		if (!chan || !chan->fifo_priv)
+	for (i = 0; i < priv->base.channels; i++) {
+		struct nouveau_channel *chan = dev_priv->channels.ptr[i];
+		if (!chan || !(fctx = chan->engctx[engine]))
 			continue;
-		fifoch = chan->fifo_priv;
 
 		nv_wr32(dev, 0x800000 + (i * 8), 0x80000000 |
 						 (chan->ramin->vinst >> 12));
 		nv_mask(dev, 0x800004 + (i * 8), 0x00000400, 0x00000400);
-		nve0_fifo_playlist_update(dev, fifoch->engine);
+		nve0_fifo_playlist_update(dev, fctx->engine);
 		nv_mask(dev, 0x800004 + (i * 8), 0x00000400, 0x00000400);
 	}
 
 	return 0;
 }
 
+static int
+nve0_fifo_fini(struct drm_device *dev, int engine, bool suspend)
+{
+	struct nve0_fifo_priv *priv = nv_engine(dev, engine);
+	int i;
+
+	for (i = 0; i < priv->base.channels; i++) {
+		if (!(nv_rd32(dev, 0x800004 + (i * 8)) & 1))
+			continue;
+
+		nv_mask(dev, 0x800004 + (i * 8), 0x00000800, 0x00000800);
+		nv_wr32(dev, 0x002634, i);
+		if (!nv_wait(dev, 0x002634, 0xffffffff, i)) {
+			NV_INFO(dev, "PFIFO: kick ch %d failed: 0x%08x\n",
+				i, nv_rd32(dev, 0x002634));
+			return -EBUSY;
+		}
+	}
+
+	nv_wr32(dev, 0x002140, 0x00000000);
+	return 0;
+}
+
 struct nouveau_enum nve0_fifo_fault_unit[] = {
 	{}
 };
@@ -451,3 +364,60 @@
 		nv_wr32(dev, 0x002140, 0);
 	}
 }
+
+static void
+nve0_fifo_destroy(struct drm_device *dev, int engine)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nve0_fifo_priv *priv = nv_engine(dev, engine);
+	int i;
+
+	nouveau_vm_put(&priv->user.bar);
+	nouveau_gpuobj_ref(NULL, &priv->user.mem);
+
+	for (i = 0; i < NVE0_FIFO_ENGINE_NUM; i++) {
+		nouveau_gpuobj_ref(NULL, &priv->engine[i].playlist[0]);
+		nouveau_gpuobj_ref(NULL, &priv->engine[i].playlist[1]);
+	}
+
+	dev_priv->eng[engine] = NULL;
+	kfree(priv);
+}
+
+int
+nve0_fifo_create(struct drm_device *dev)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nve0_fifo_priv *priv;
+	int ret;
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->base.base.destroy = nve0_fifo_destroy;
+	priv->base.base.init = nve0_fifo_init;
+	priv->base.base.fini = nve0_fifo_fini;
+	priv->base.base.context_new = nve0_fifo_context_new;
+	priv->base.base.context_del = nve0_fifo_context_del;
+	priv->base.channels = 4096;
+	dev_priv->eng[NVOBJ_ENGINE_FIFO] = &priv->base.base;
+
+	ret = nouveau_gpuobj_new(dev, NULL, priv->base.channels * 512, 0x1000,
+				 NVOBJ_FLAG_ZERO_ALLOC, &priv->user.mem);
+	if (ret)
+		goto error;
+
+	ret = nouveau_vm_get(dev_priv->bar1_vm, priv->user.mem->size,
+			     12, NV_MEM_ACCESS_RW, &priv->user.bar);
+	if (ret)
+		goto error;
+
+	nouveau_vm_map(&priv->user.bar, *(struct nouveau_mem **)priv->user.mem->node);
+
+	nouveau_irq_register(dev, 8, nve0_fifo_isr);
+error:
+	if (ret)
+		priv->base.base.destroy(dev, NVOBJ_ENGINE_FIFO);
+	return ret;
+}
diff --git a/drivers/gpu/drm/nouveau/nve0_graph.c b/drivers/gpu/drm/nouveau/nve0_graph.c
index 9cc3cf2..4135627 100644
--- a/drivers/gpu/drm/nouveau/nve0_graph.c
+++ b/drivers/gpu/drm/nouveau/nve0_graph.c
@@ -29,6 +29,7 @@
 
 #include "nouveau_drv.h"
 #include "nouveau_mm.h"
+#include "nouveau_fifo.h"
 
 #include "nve0_graph.h"
 
@@ -548,13 +549,14 @@
 int
 nve0_graph_isr_chid(struct drm_device *dev, u64 inst)
 {
+	struct nouveau_fifo_priv *pfifo = nv_engine(dev, NVOBJ_ENGINE_FIFO);
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_channel *chan;
 	unsigned long flags;
 	int i;
 
 	spin_lock_irqsave(&dev_priv->channels.lock, flags);
-	for (i = 0; i < dev_priv->engine.fifo.channels; i++) {
+	for (i = 0; i < pfifo->channels; i++) {
 		chan = dev_priv->channels.ptr[i];
 		if (!chan || !chan->ramin)
 			continue;