drm/nouveau: modify object accessors, offset in bytes rather than dwords

Reviewed-by: Francisco Jerez <currojerez@riseup.net>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h
index c684686..372adfd 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@@ -138,6 +138,7 @@
 #define NVOBJ_FLAG_ZERO_FREE		(1 << 2)
 #define NVOBJ_FLAG_FAKE			(1 << 3)
 struct nouveau_gpuobj {
+	struct drm_device *dev;
 	struct list_head list;
 
 	struct nouveau_channel *im_channel;
@@ -1291,17 +1292,8 @@
 }
 
 /* object access */
-static inline u32 nv_ro32(struct drm_device *dev, struct nouveau_gpuobj *obj,
-				unsigned index)
-{
-	return nv_ri32(dev, obj->im_pramin->start + index * 4);
-}
-
-static inline void nv_wo32(struct drm_device *dev, struct nouveau_gpuobj *obj,
-				unsigned index, u32 val)
-{
-	nv_wi32(dev, obj->im_pramin->start + index * 4, val);
-}
+extern u32 nv_ro32(struct nouveau_gpuobj *, u32 offset);
+extern void nv_wo32(struct nouveau_gpuobj *, u32 offset, u32 val);
 
 /*
  * Logging
diff --git a/drivers/gpu/drm/nouveau/nouveau_grctx.h b/drivers/gpu/drm/nouveau/nouveau_grctx.h
index 5d39c4c..4a8ad13 100644
--- a/drivers/gpu/drm/nouveau/nouveau_grctx.h
+++ b/drivers/gpu/drm/nouveau/nouveau_grctx.h
@@ -126,7 +126,7 @@
 	reg = (reg - 0x00400000) / 4;
 	reg = (reg - ctx->ctxprog_reg) + ctx->ctxvals_base;
 
-	nv_wo32(ctx->dev, ctx->data, reg, val);
+	nv_wo32(ctx->data, reg * 4, val);
 }
 #endif
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_mem.c b/drivers/gpu/drm/nouveau/nouveau_mem.c
index c14466b..f34c532 100644
--- a/drivers/gpu/drm/nouveau/nouveau_mem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_mem.c
@@ -169,8 +169,9 @@
 			virt  += (end - pte);
 
 			while (pte < end) {
-				nv_wo32(dev, pgt, pte++, offset_l);
-				nv_wo32(dev, pgt, pte++, offset_h);
+				nv_wo32(pgt, (pte * 4) + 0, offset_l);
+				nv_wo32(pgt, (pte * 4) + 4, offset_h);
+				pte += 2;
 			}
 		}
 	}
@@ -203,8 +204,10 @@
 		pages -= (end - pte);
 		virt  += (end - pte) << 15;
 
-		while (pte < end)
-			nv_wo32(dev, pgt, pte++, 0);
+		while (pte < end) {
+			nv_wo32(pgt, (pte * 4), 0);
+			pte++;
+		}
 	}
 	dev_priv->engine.instmem.flush(dev);
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_object.c b/drivers/gpu/drm/nouveau/nouveau_object.c
index e658aa2..52db13c 100644
--- a/drivers/gpu/drm/nouveau/nouveau_object.c
+++ b/drivers/gpu/drm/nouveau/nouveau_object.c
@@ -88,6 +88,7 @@
 	if (!gpuobj)
 		return -ENOMEM;
 	NV_DEBUG(dev, "gpuobj %p\n", gpuobj);
+	gpuobj->dev = dev;
 	gpuobj->flags = flags;
 	gpuobj->im_channel = chan;
 
@@ -134,7 +135,7 @@
 		int i;
 
 		for (i = 0; i < gpuobj->im_pramin->size; i += 4)
-			nv_wo32(dev, gpuobj, i/4, 0);
+			nv_wo32(gpuobj, i, 0);
 		engine->instmem.flush(dev);
 	}
 
@@ -224,7 +225,7 @@
 
 	if (gpuobj->im_pramin && (gpuobj->flags & NVOBJ_FLAG_ZERO_FREE)) {
 		for (i = 0; i < gpuobj->im_pramin->size; i += 4)
-			nv_wo32(dev, gpuobj, i/4, 0);
+			nv_wo32(gpuobj, i, 0);
 		engine->instmem.flush(dev);
 	}
 
@@ -435,6 +436,7 @@
 	if (!gpuobj)
 		return -ENOMEM;
 	NV_DEBUG(dev, "gpuobj %p\n", gpuobj);
+	gpuobj->dev = dev;
 	gpuobj->im_channel = NULL;
 	gpuobj->flags      = flags | NVOBJ_FLAG_FAKE;
 
@@ -458,7 +460,7 @@
 
 	if (gpuobj->flags & NVOBJ_FLAG_ZERO_ALLOC) {
 		for (i = 0; i < gpuobj->im_pramin->size; i += 4)
-			nv_wo32(dev, gpuobj, i/4, 0);
+			nv_wo32(gpuobj, i, 0);
 		dev_priv->engine.instmem.flush(dev);
 	}
 
@@ -555,14 +557,12 @@
 		adjust = offset &  0x00000fff;
 		frame  = offset & ~0x00000fff;
 
-		nv_wo32(dev, *gpuobj, 0, ((1<<12) | (1<<13) |
-				(adjust << 20) |
-				 (access << 14) |
-				 (target << 16) |
-				  class));
-		nv_wo32(dev, *gpuobj, 1, size - 1);
-		nv_wo32(dev, *gpuobj, 2, frame | pte_flags);
-		nv_wo32(dev, *gpuobj, 3, frame | pte_flags);
+		nv_wo32(*gpuobj,  0, ((1<<12) | (1<<13) | (adjust << 20) |
+				      (access << 14) | (target << 16) |
+				      class));
+		nv_wo32(*gpuobj,  4, size - 1);
+		nv_wo32(*gpuobj,  8, frame | pte_flags);
+		nv_wo32(*gpuobj, 12, frame | pte_flags);
 	} else {
 		uint64_t limit = offset + size - 1;
 		uint32_t flags0, flags5;
@@ -575,12 +575,12 @@
 			flags5 = 0x00080000;
 		}
 
-		nv_wo32(dev, *gpuobj, 0, flags0 | class);
-		nv_wo32(dev, *gpuobj, 1, lower_32_bits(limit));
-		nv_wo32(dev, *gpuobj, 2, lower_32_bits(offset));
-		nv_wo32(dev, *gpuobj, 3, ((upper_32_bits(limit) & 0xff) << 24) |
-					(upper_32_bits(offset) & 0xff));
-		nv_wo32(dev, *gpuobj, 5, flags5);
+		nv_wo32(*gpuobj,  0, flags0 | class);
+		nv_wo32(*gpuobj,  4, lower_32_bits(limit));
+		nv_wo32(*gpuobj,  8, lower_32_bits(offset));
+		nv_wo32(*gpuobj, 12, ((upper_32_bits(limit) & 0xff) << 24) |
+				      (upper_32_bits(offset) & 0xff));
+		nv_wo32(*gpuobj, 20, flags5);
 	}
 
 	instmem->flush(dev);
@@ -699,25 +699,25 @@
 	}
 
 	if (dev_priv->card_type >= NV_50) {
-		nv_wo32(dev, *gpuobj, 0, class);
-		nv_wo32(dev, *gpuobj, 5, 0x00010000);
+		nv_wo32(*gpuobj,  0, class);
+		nv_wo32(*gpuobj, 20, 0x00010000);
 	} else {
 		switch (class) {
 		case NV_CLASS_NULL:
-			nv_wo32(dev, *gpuobj, 0, 0x00001030);
-			nv_wo32(dev, *gpuobj, 1, 0xFFFFFFFF);
+			nv_wo32(*gpuobj, 0, 0x00001030);
+			nv_wo32(*gpuobj, 4, 0xFFFFFFFF);
 			break;
 		default:
 			if (dev_priv->card_type >= NV_40) {
-				nv_wo32(dev, *gpuobj, 0, class);
+				nv_wo32(*gpuobj, 0, class);
 #ifdef __BIG_ENDIAN
-				nv_wo32(dev, *gpuobj, 2, 0x01000000);
+				nv_wo32(*gpuobj, 8, 0x01000000);
 #endif
 			} else {
 #ifdef __BIG_ENDIAN
-				nv_wo32(dev, *gpuobj, 0, class | 0x00080000);
+				nv_wo32(*gpuobj, 0, class | 0x00080000);
 #else
-				nv_wo32(dev, *gpuobj, 0, class);
+				nv_wo32(*gpuobj, 0, class);
 #endif
 			}
 		}
@@ -836,21 +836,20 @@
 		if (ret)
 			return ret;
 		for (i = 0; i < 0x4000; i += 8) {
-			nv_wo32(dev, chan->vm_pd, (i+0)/4, 0x00000000);
-			nv_wo32(dev, chan->vm_pd, (i+4)/4, 0xdeadcafe);
+			nv_wo32(chan->vm_pd, i + 0, 0x00000000);
+			nv_wo32(chan->vm_pd, i + 4, 0xdeadcafe);
 		}
 
-		pde = (dev_priv->vm_gart_base / (512*1024*1024)) * 2;
+		pde = (dev_priv->vm_gart_base / (512*1024*1024)) * 8;
 		ret = nouveau_gpuobj_ref_add(dev, NULL, 0,
 					     dev_priv->gart_info.sg_ctxdma,
 					     &chan->vm_gart_pt);
 		if (ret)
 			return ret;
-		nv_wo32(dev, chan->vm_pd, pde++,
-			    chan->vm_gart_pt->instance | 0x03);
-		nv_wo32(dev, chan->vm_pd, pde++, 0x00000000);
+		nv_wo32(chan->vm_pd, pde + 0, chan->vm_gart_pt->instance | 3);
+		nv_wo32(chan->vm_pd, pde + 4, 0x00000000);
 
-		pde = (dev_priv->vm_vram_base / (512*1024*1024)) * 2;
+		pde = (dev_priv->vm_vram_base / (512*1024*1024)) * 8;
 		for (i = 0; i < dev_priv->vm_vram_pt_nr; i++) {
 			ret = nouveau_gpuobj_ref_add(dev, NULL, 0,
 						     dev_priv->vm_vram_pt[i],
@@ -858,9 +857,10 @@
 			if (ret)
 				return ret;
 
-			nv_wo32(dev, chan->vm_pd, pde++,
-				    chan->vm_vram_pt[i]->instance | 0x61);
-			nv_wo32(dev, chan->vm_pd, pde++, 0x00000000);
+			nv_wo32(chan->vm_pd, pde + 0,
+				chan->vm_vram_pt[i]->instance | 0x61);
+			nv_wo32(chan->vm_pd, pde + 4, 0x00000000);
+			pde += 8;
 		}
 
 		instmem->flush(dev);
@@ -996,8 +996,8 @@
 			return -ENOMEM;
 		}
 
-		for (i = 0; i < gpuobj->im_pramin->size / 4; i++)
-			gpuobj->im_backing_suspend[i] = nv_ro32(dev, gpuobj, i);
+		for (i = 0; i < gpuobj->im_pramin->size; i += 4)
+			gpuobj->im_backing_suspend[i/4] = nv_ro32(gpuobj, i);
 	}
 
 	return 0;
@@ -1042,8 +1042,8 @@
 		if (!gpuobj->im_backing_suspend)
 			continue;
 
-		for (i = 0; i < gpuobj->im_pramin->size / 4; i++)
-			nv_wo32(dev, gpuobj, i, gpuobj->im_backing_suspend[i]);
+		for (i = 0; i < gpuobj->im_pramin->size; i += 4)
+			nv_wo32(gpuobj, i, gpuobj->im_backing_suspend[i/4]);
 		dev_priv->engine.instmem.flush(dev);
 	}
 
@@ -1120,3 +1120,17 @@
 
 	return 0;
 }
+
+u32
+nv_ro32(struct nouveau_gpuobj *gpuobj, u32 offset)
+{
+	struct drm_device *dev = gpuobj->dev;
+	return nv_ri32(dev, gpuobj->im_pramin->start + offset);
+}
+
+void
+nv_wo32(struct nouveau_gpuobj *gpuobj, u32 offset, u32 val)
+{
+	struct drm_device *dev = gpuobj->dev;
+	nv_wi32(dev, gpuobj->im_pramin->start + offset, val);
+}
diff --git a/drivers/gpu/drm/nouveau/nouveau_ramht.c b/drivers/gpu/drm/nouveau/nouveau_ramht.c
index 8b27ee5..e5cc93c 100644
--- a/drivers/gpu/drm/nouveau/nouveau_ramht.c
+++ b/drivers/gpu/drm/nouveau/nouveau_ramht.c
@@ -54,7 +54,7 @@
 			  uint32_t offset)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	uint32_t ctx = nv_ro32(dev, ramht, (offset + 4)/4);
+	uint32_t ctx = nv_ro32(ramht, offset + 4);
 
 	if (dev_priv->card_type < NV_40)
 		return ((ctx & NV_RAMHT_CONTEXT_VALID) != 0);
@@ -100,15 +100,15 @@
 			NV_DEBUG(dev,
 				 "insert ch%d 0x%08x: h=0x%08x, c=0x%08x\n",
 				 chan->id, co, ref->handle, ctx);
-			nv_wo32(dev, ramht, (co + 0)/4, ref->handle);
-			nv_wo32(dev, ramht, (co + 4)/4, ctx);
+			nv_wo32(ramht, co + 0, ref->handle);
+			nv_wo32(ramht, co + 4, ctx);
 
 			list_add_tail(&ref->list, &chan->ramht_refs);
 			instmem->flush(dev);
 			return 0;
 		}
 		NV_DEBUG(dev, "collision ch%d 0x%08x: h=0x%08x\n",
-			 chan->id, co, nv_ro32(dev, ramht, co/4));
+			 chan->id, co, nv_ro32(ramht, co));
 
 		co += 8;
 		if (co >= dev_priv->ramht_size)
@@ -136,13 +136,13 @@
 	co = ho = nouveau_ramht_hash_handle(dev, chan->id, ref->handle);
 	do {
 		if (nouveau_ramht_entry_valid(dev, ramht, co) &&
-		    (ref->handle == nv_ro32(dev, ramht, (co/4)))) {
+		    (ref->handle == nv_ro32(ramht, co))) {
 			NV_DEBUG(dev,
 				 "remove ch%d 0x%08x: h=0x%08x, c=0x%08x\n",
 				 chan->id, co, ref->handle,
-				 nv_ro32(dev, ramht, (co + 4)));
-			nv_wo32(dev, ramht, (co + 0)/4, 0x00000000);
-			nv_wo32(dev, ramht, (co + 4)/4, 0x00000000);
+				 nv_ro32(ramht, co + 4));
+			nv_wo32(ramht, co + 0, 0x00000000);
+			nv_wo32(ramht, co + 4, 0x00000000);
 
 			list_del(&ref->list);
 			instmem->flush(dev);
diff --git a/drivers/gpu/drm/nouveau/nouveau_sgdma.c b/drivers/gpu/drm/nouveau/nouveau_sgdma.c
index 6b9187d..630988a 100644
--- a/drivers/gpu/drm/nouveau/nouveau_sgdma.c
+++ b/drivers/gpu/drm/nouveau/nouveau_sgdma.c
@@ -105,11 +105,13 @@
 		uint32_t offset_h = upper_32_bits(dma_offset);
 
 		for (j = 0; j < PAGE_SIZE / NV_CTXDMA_PAGE_SIZE; j++) {
-			if (dev_priv->card_type < NV_50)
-				nv_wo32(dev, gpuobj, pte++, offset_l | 3);
-			else {
-				nv_wo32(dev, gpuobj, pte++, offset_l | 0x21);
-				nv_wo32(dev, gpuobj, pte++, offset_h & 0xff);
+			if (dev_priv->card_type < NV_50) {
+				nv_wo32(gpuobj, (pte * 4) + 0, offset_l | 3);
+				pte += 1;
+			} else {
+				nv_wo32(gpuobj, (pte * 4) + 0, offset_l | 0x21);
+				nv_wo32(gpuobj, (pte * 4) + 4, offset_h & 0xff);
+				pte += 2;
 			}
 
 			dma_offset += NV_CTXDMA_PAGE_SIZE;
@@ -145,11 +147,13 @@
 		dma_addr_t dma_offset = dev_priv->gart_info.sg_dummy_bus;
 
 		for (j = 0; j < PAGE_SIZE / NV_CTXDMA_PAGE_SIZE; j++) {
-			if (dev_priv->card_type < NV_50)
-				nv_wo32(dev, gpuobj, pte++, dma_offset | 3);
-			else {
-				nv_wo32(dev, gpuobj, pte++, dma_offset | 0x21);
-				nv_wo32(dev, gpuobj, pte++, 0x00000000);
+			if (dev_priv->card_type < NV_50) {
+				nv_wo32(gpuobj, (pte * 4) + 0, dma_offset | 3);
+				pte += 1;
+			} else {
+				nv_wo32(gpuobj, (pte * 4), dma_offset | 0x21);
+				nv_wo32(gpuobj, (pte * 4) + 4, 0x00000000);
+				pte += 2;
 			}
 
 			dma_offset += NV_CTXDMA_PAGE_SIZE;
@@ -258,21 +262,21 @@
 		/* Maybe use NV_DMA_TARGET_AGP for PCIE? NVIDIA do this, and
 		 * confirmed to work on c51.  Perhaps means NV_DMA_TARGET_PCIE
 		 * on those cards? */
-		nv_wo32(dev, gpuobj, 0, NV_CLASS_DMA_IN_MEMORY |
-				       (1 << 12) /* PT present */ |
-				       (0 << 13) /* PT *not* linear */ |
-				       (NV_DMA_ACCESS_RW  << 14) |
-				       (NV_DMA_TARGET_PCI << 16));
-		nv_wo32(dev, gpuobj, 1, aper_size - 1);
+		nv_wo32(gpuobj, 0, NV_CLASS_DMA_IN_MEMORY |
+				   (1 << 12) /* PT present */ |
+				   (0 << 13) /* PT *not* linear */ |
+				   (NV_DMA_ACCESS_RW  << 14) |
+				   (NV_DMA_TARGET_PCI << 16));
+		nv_wo32(gpuobj, 4, aper_size - 1);
 		for (i = 2; i < 2 + (aper_size >> 12); i++) {
-			nv_wo32(dev, gpuobj, i,
-				    dev_priv->gart_info.sg_dummy_bus | 3);
+			nv_wo32(gpuobj, i * 4,
+				dev_priv->gart_info.sg_dummy_bus | 3);
 		}
 	} else {
 		for (i = 0; i < obj_size; i += 8) {
-			nv_wo32(dev, gpuobj, (i+0)/4,
-				    dev_priv->gart_info.sg_dummy_bus | 0x21);
-			nv_wo32(dev, gpuobj, (i+4)/4, 0);
+			nv_wo32(gpuobj, i + 0,
+				dev_priv->gart_info.sg_dummy_bus | 0x21);
+			nv_wo32(gpuobj, i + 4, 0);
 		}
 	}
 	dev_priv->engine.instmem.flush(dev);
@@ -308,9 +312,9 @@
 	struct nouveau_gpuobj *gpuobj = dev_priv->gart_info.sg_ctxdma;
 	int pte;
 
-	pte = (offset >> NV_CTXDMA_PAGE_SHIFT);
+	pte = (offset >> NV_CTXDMA_PAGE_SHIFT) << 2;
 	if (dev_priv->card_type < NV_50) {
-		*page = nv_ro32(dev, gpuobj, (pte + 2)) & ~NV_CTXDMA_PAGE_MASK;
+		*page = nv_ro32(gpuobj, (pte + 8)) & ~NV_CTXDMA_PAGE_MASK;
 		return 0;
 	}
 
diff --git a/drivers/gpu/drm/nouveau/nv04_fifo.c b/drivers/gpu/drm/nouveau/nv04_fifo.c
index 06cedd9..bbb87ef 100644
--- a/drivers/gpu/drm/nouveau/nv04_fifo.c
+++ b/drivers/gpu/drm/nouveau/nv04_fifo.c
@@ -38,10 +38,10 @@
 #define NV04_RAMFC_ENGINE                                        0x14
 #define NV04_RAMFC_PULL1_ENGINE                                  0x18
 
-#define RAMFC_WR(offset, val) nv_wo32(dev, chan->ramfc->gpuobj, \
-					 NV04_RAMFC_##offset/4, (val))
-#define RAMFC_RD(offset)      nv_ro32(dev, chan->ramfc->gpuobj, \
-					 NV04_RAMFC_##offset/4)
+#define RAMFC_WR(offset, val) nv_wo32(chan->ramfc->gpuobj, \
+				      NV04_RAMFC_##offset, (val))
+#define RAMFC_RD(offset)      nv_ro32(chan->ramfc->gpuobj, \
+				      NV04_RAMFC_##offset)
 
 void
 nv04_fifo_disable(struct drm_device *dev)
diff --git a/drivers/gpu/drm/nouveau/nv20_graph.c b/drivers/gpu/drm/nouveau/nv20_graph.c
index 17f309b..d8693d3 100644
--- a/drivers/gpu/drm/nouveau/nv20_graph.c
+++ b/drivers/gpu/drm/nouveau/nv20_graph.c
@@ -37,49 +37,49 @@
 {
 	int i;
 
-	nv_wo32(dev, ctx, 0x033c/4, 0xffff0000);
-	nv_wo32(dev, ctx, 0x03a0/4, 0x0fff0000);
-	nv_wo32(dev, ctx, 0x03a4/4, 0x0fff0000);
-	nv_wo32(dev, ctx, 0x047c/4, 0x00000101);
-	nv_wo32(dev, ctx, 0x0490/4, 0x00000111);
-	nv_wo32(dev, ctx, 0x04a8/4, 0x44400000);
+	nv_wo32(ctx, 0x033c, 0xffff0000);
+	nv_wo32(ctx, 0x03a0, 0x0fff0000);
+	nv_wo32(ctx, 0x03a4, 0x0fff0000);
+	nv_wo32(ctx, 0x047c, 0x00000101);
+	nv_wo32(ctx, 0x0490, 0x00000111);
+	nv_wo32(ctx, 0x04a8, 0x44400000);
 	for (i = 0x04d4; i <= 0x04e0; i += 4)
-		nv_wo32(dev, ctx, i/4, 0x00030303);
+		nv_wo32(ctx, i, 0x00030303);
 	for (i = 0x04f4; i <= 0x0500; i += 4)
-		nv_wo32(dev, ctx, i/4, 0x00080000);
+		nv_wo32(ctx, i, 0x00080000);
 	for (i = 0x050c; i <= 0x0518; i += 4)
-		nv_wo32(dev, ctx, i/4, 0x01012000);
+		nv_wo32(ctx, i, 0x01012000);
 	for (i = 0x051c; i <= 0x0528; i += 4)
-		nv_wo32(dev, ctx, i/4, 0x000105b8);
+		nv_wo32(ctx, i, 0x000105b8);
 	for (i = 0x052c; i <= 0x0538; i += 4)
-		nv_wo32(dev, ctx, i/4, 0x00080008);
+		nv_wo32(ctx, i, 0x00080008);
 	for (i = 0x055c; i <= 0x0598; i += 4)
-		nv_wo32(dev, ctx, i/4, 0x07ff0000);
-	nv_wo32(dev, ctx, 0x05a4/4, 0x4b7fffff);
-	nv_wo32(dev, ctx, 0x05fc/4, 0x00000001);
-	nv_wo32(dev, ctx, 0x0604/4, 0x00004000);
-	nv_wo32(dev, ctx, 0x0610/4, 0x00000001);
-	nv_wo32(dev, ctx, 0x0618/4, 0x00040000);
-	nv_wo32(dev, ctx, 0x061c/4, 0x00010000);
+		nv_wo32(ctx, i, 0x07ff0000);
+	nv_wo32(ctx, 0x05a4, 0x4b7fffff);
+	nv_wo32(ctx, 0x05fc, 0x00000001);
+	nv_wo32(ctx, 0x0604, 0x00004000);
+	nv_wo32(ctx, 0x0610, 0x00000001);
+	nv_wo32(ctx, 0x0618, 0x00040000);
+	nv_wo32(ctx, 0x061c, 0x00010000);
 	for (i = 0x1c1c; i <= 0x248c; i += 16) {
-		nv_wo32(dev, ctx, (i + 0)/4, 0x10700ff9);
-		nv_wo32(dev, ctx, (i + 4)/4, 0x0436086c);
-		nv_wo32(dev, ctx, (i + 8)/4, 0x000c001b);
+		nv_wo32(ctx, (i + 0), 0x10700ff9);
+		nv_wo32(ctx, (i + 4), 0x0436086c);
+		nv_wo32(ctx, (i + 8), 0x000c001b);
 	}
-	nv_wo32(dev, ctx, 0x281c/4, 0x3f800000);
-	nv_wo32(dev, ctx, 0x2830/4, 0x3f800000);
-	nv_wo32(dev, ctx, 0x285c/4, 0x40000000);
-	nv_wo32(dev, ctx, 0x2860/4, 0x3f800000);
-	nv_wo32(dev, ctx, 0x2864/4, 0x3f000000);
-	nv_wo32(dev, ctx, 0x286c/4, 0x40000000);
-	nv_wo32(dev, ctx, 0x2870/4, 0x3f800000);
-	nv_wo32(dev, ctx, 0x2878/4, 0xbf800000);
-	nv_wo32(dev, ctx, 0x2880/4, 0xbf800000);
-	nv_wo32(dev, ctx, 0x34a4/4, 0x000fe000);
-	nv_wo32(dev, ctx, 0x3530/4, 0x000003f8);
-	nv_wo32(dev, ctx, 0x3540/4, 0x002fe000);
+	nv_wo32(ctx, 0x281c, 0x3f800000);
+	nv_wo32(ctx, 0x2830, 0x3f800000);
+	nv_wo32(ctx, 0x285c, 0x40000000);
+	nv_wo32(ctx, 0x2860, 0x3f800000);
+	nv_wo32(ctx, 0x2864, 0x3f000000);
+	nv_wo32(ctx, 0x286c, 0x40000000);
+	nv_wo32(ctx, 0x2870, 0x3f800000);
+	nv_wo32(ctx, 0x2878, 0xbf800000);
+	nv_wo32(ctx, 0x2880, 0xbf800000);
+	nv_wo32(ctx, 0x34a4, 0x000fe000);
+	nv_wo32(ctx, 0x3530, 0x000003f8);
+	nv_wo32(ctx, 0x3540, 0x002fe000);
 	for (i = 0x355c; i <= 0x3578; i += 4)
-		nv_wo32(dev, ctx, i/4, 0x001c527c);
+		nv_wo32(ctx, i, 0x001c527c);
 }
 
 static void
@@ -87,58 +87,58 @@
 {
 	int i;
 
-	nv_wo32(dev, ctx, 0x035c/4, 0xffff0000);
-	nv_wo32(dev, ctx, 0x03c0/4, 0x0fff0000);
-	nv_wo32(dev, ctx, 0x03c4/4, 0x0fff0000);
-	nv_wo32(dev, ctx, 0x049c/4, 0x00000101);
-	nv_wo32(dev, ctx, 0x04b0/4, 0x00000111);
-	nv_wo32(dev, ctx, 0x04c8/4, 0x00000080);
-	nv_wo32(dev, ctx, 0x04cc/4, 0xffff0000);
-	nv_wo32(dev, ctx, 0x04d0/4, 0x00000001);
-	nv_wo32(dev, ctx, 0x04e4/4, 0x44400000);
-	nv_wo32(dev, ctx, 0x04fc/4, 0x4b800000);
+	nv_wo32(ctx, 0x035c, 0xffff0000);
+	nv_wo32(ctx, 0x03c0, 0x0fff0000);
+	nv_wo32(ctx, 0x03c4, 0x0fff0000);
+	nv_wo32(ctx, 0x049c, 0x00000101);
+	nv_wo32(ctx, 0x04b0, 0x00000111);
+	nv_wo32(ctx, 0x04c8, 0x00000080);
+	nv_wo32(ctx, 0x04cc, 0xffff0000);
+	nv_wo32(ctx, 0x04d0, 0x00000001);
+	nv_wo32(ctx, 0x04e4, 0x44400000);
+	nv_wo32(ctx, 0x04fc, 0x4b800000);
 	for (i = 0x0510; i <= 0x051c; i += 4)
-		nv_wo32(dev, ctx, i/4, 0x00030303);
+		nv_wo32(ctx, i, 0x00030303);
 	for (i = 0x0530; i <= 0x053c; i += 4)
-		nv_wo32(dev, ctx, i/4, 0x00080000);
+		nv_wo32(ctx, i, 0x00080000);
 	for (i = 0x0548; i <= 0x0554; i += 4)
-		nv_wo32(dev, ctx, i/4, 0x01012000);
+		nv_wo32(ctx, i, 0x01012000);
 	for (i = 0x0558; i <= 0x0564; i += 4)
-		nv_wo32(dev, ctx, i/4, 0x000105b8);
+		nv_wo32(ctx, i, 0x000105b8);
 	for (i = 0x0568; i <= 0x0574; i += 4)
-		nv_wo32(dev, ctx, i/4, 0x00080008);
+		nv_wo32(ctx, i, 0x00080008);
 	for (i = 0x0598; i <= 0x05d4; i += 4)
-		nv_wo32(dev, ctx, i/4, 0x07ff0000);
-	nv_wo32(dev, ctx, 0x05e0/4, 0x4b7fffff);
-	nv_wo32(dev, ctx, 0x0620/4, 0x00000080);
-	nv_wo32(dev, ctx, 0x0624/4, 0x30201000);
-	nv_wo32(dev, ctx, 0x0628/4, 0x70605040);
-	nv_wo32(dev, ctx, 0x062c/4, 0xb0a09080);
-	nv_wo32(dev, ctx, 0x0630/4, 0xf0e0d0c0);
-	nv_wo32(dev, ctx, 0x0664/4, 0x00000001);
-	nv_wo32(dev, ctx, 0x066c/4, 0x00004000);
-	nv_wo32(dev, ctx, 0x0678/4, 0x00000001);
-	nv_wo32(dev, ctx, 0x0680/4, 0x00040000);
-	nv_wo32(dev, ctx, 0x0684/4, 0x00010000);
+		nv_wo32(ctx, i, 0x07ff0000);
+	nv_wo32(ctx, 0x05e0, 0x4b7fffff);
+	nv_wo32(ctx, 0x0620, 0x00000080);
+	nv_wo32(ctx, 0x0624, 0x30201000);
+	nv_wo32(ctx, 0x0628, 0x70605040);
+	nv_wo32(ctx, 0x062c, 0xb0a09080);
+	nv_wo32(ctx, 0x0630, 0xf0e0d0c0);
+	nv_wo32(ctx, 0x0664, 0x00000001);
+	nv_wo32(ctx, 0x066c, 0x00004000);
+	nv_wo32(ctx, 0x0678, 0x00000001);
+	nv_wo32(ctx, 0x0680, 0x00040000);
+	nv_wo32(ctx, 0x0684, 0x00010000);
 	for (i = 0x1b04; i <= 0x2374; i += 16) {
-		nv_wo32(dev, ctx, (i + 0)/4, 0x10700ff9);
-		nv_wo32(dev, ctx, (i + 4)/4, 0x0436086c);
-		nv_wo32(dev, ctx, (i + 8)/4, 0x000c001b);
+		nv_wo32(ctx, (i + 0), 0x10700ff9);
+		nv_wo32(ctx, (i + 4), 0x0436086c);
+		nv_wo32(ctx, (i + 8), 0x000c001b);
 	}
-	nv_wo32(dev, ctx, 0x2704/4, 0x3f800000);
-	nv_wo32(dev, ctx, 0x2718/4, 0x3f800000);
-	nv_wo32(dev, ctx, 0x2744/4, 0x40000000);
-	nv_wo32(dev, ctx, 0x2748/4, 0x3f800000);
-	nv_wo32(dev, ctx, 0x274c/4, 0x3f000000);
-	nv_wo32(dev, ctx, 0x2754/4, 0x40000000);
-	nv_wo32(dev, ctx, 0x2758/4, 0x3f800000);
-	nv_wo32(dev, ctx, 0x2760/4, 0xbf800000);
-	nv_wo32(dev, ctx, 0x2768/4, 0xbf800000);
-	nv_wo32(dev, ctx, 0x308c/4, 0x000fe000);
-	nv_wo32(dev, ctx, 0x3108/4, 0x000003f8);
-	nv_wo32(dev, ctx, 0x3468/4, 0x002fe000);
+	nv_wo32(ctx, 0x2704, 0x3f800000);
+	nv_wo32(ctx, 0x2718, 0x3f800000);
+	nv_wo32(ctx, 0x2744, 0x40000000);
+	nv_wo32(ctx, 0x2748, 0x3f800000);
+	nv_wo32(ctx, 0x274c, 0x3f000000);
+	nv_wo32(ctx, 0x2754, 0x40000000);
+	nv_wo32(ctx, 0x2758, 0x3f800000);
+	nv_wo32(ctx, 0x2760, 0xbf800000);
+	nv_wo32(ctx, 0x2768, 0xbf800000);
+	nv_wo32(ctx, 0x308c, 0x000fe000);
+	nv_wo32(ctx, 0x3108, 0x000003f8);
+	nv_wo32(ctx, 0x3468, 0x002fe000);
 	for (i = 0x3484; i <= 0x34a0; i += 4)
-		nv_wo32(dev, ctx, i/4, 0x001c527c);
+		nv_wo32(ctx, i, 0x001c527c);
 }
 
 static void
@@ -146,49 +146,49 @@
 {
 	int i;
 
-	nv_wo32(dev, ctx, 0x033c/4, 0xffff0000);
-	nv_wo32(dev, ctx, 0x03a0/4, 0x0fff0000);
-	nv_wo32(dev, ctx, 0x03a4/4, 0x0fff0000);
-	nv_wo32(dev, ctx, 0x047c/4, 0x00000101);
-	nv_wo32(dev, ctx, 0x0490/4, 0x00000111);
-	nv_wo32(dev, ctx, 0x04a8/4, 0x44400000);
+	nv_wo32(ctx, 0x033c, 0xffff0000);
+	nv_wo32(ctx, 0x03a0, 0x0fff0000);
+	nv_wo32(ctx, 0x03a4, 0x0fff0000);
+	nv_wo32(ctx, 0x047c, 0x00000101);
+	nv_wo32(ctx, 0x0490, 0x00000111);
+	nv_wo32(ctx, 0x04a8, 0x44400000);
 	for (i = 0x04d4; i <= 0x04e0; i += 4)
-		nv_wo32(dev, ctx, i/4, 0x00030303);
+		nv_wo32(ctx, i, 0x00030303);
 	for (i = 0x04f4; i <= 0x0500; i += 4)
-		nv_wo32(dev, ctx, i/4, 0x00080000);
+		nv_wo32(ctx, i, 0x00080000);
 	for (i = 0x050c; i <= 0x0518; i += 4)
-		nv_wo32(dev, ctx, i/4, 0x01012000);
+		nv_wo32(ctx, i, 0x01012000);
 	for (i = 0x051c; i <= 0x0528; i += 4)
-		nv_wo32(dev, ctx, i/4, 0x000105b8);
+		nv_wo32(ctx, i, 0x000105b8);
 	for (i = 0x052c; i <= 0x0538; i += 4)
-		nv_wo32(dev, ctx, i/4, 0x00080008);
+		nv_wo32(ctx, i, 0x00080008);
 	for (i = 0x055c; i <= 0x0598; i += 4)
-		nv_wo32(dev, ctx, i/4, 0x07ff0000);
-	nv_wo32(dev, ctx, 0x05a4/4, 0x4b7fffff);
-	nv_wo32(dev, ctx, 0x05fc/4, 0x00000001);
-	nv_wo32(dev, ctx, 0x0604/4, 0x00004000);
-	nv_wo32(dev, ctx, 0x0610/4, 0x00000001);
-	nv_wo32(dev, ctx, 0x0618/4, 0x00040000);
-	nv_wo32(dev, ctx, 0x061c/4, 0x00010000);
+		nv_wo32(ctx, i, 0x07ff0000);
+	nv_wo32(ctx, 0x05a4, 0x4b7fffff);
+	nv_wo32(ctx, 0x05fc, 0x00000001);
+	nv_wo32(ctx, 0x0604, 0x00004000);
+	nv_wo32(ctx, 0x0610, 0x00000001);
+	nv_wo32(ctx, 0x0618, 0x00040000);
+	nv_wo32(ctx, 0x061c, 0x00010000);
 	for (i = 0x1a9c; i <= 0x22fc; i += 16) { /*XXX: check!! */
-		nv_wo32(dev, ctx, (i + 0)/4, 0x10700ff9);
-		nv_wo32(dev, ctx, (i + 4)/4, 0x0436086c);
-		nv_wo32(dev, ctx, (i + 8)/4, 0x000c001b);
+		nv_wo32(ctx, (i + 0), 0x10700ff9);
+		nv_wo32(ctx, (i + 4), 0x0436086c);
+		nv_wo32(ctx, (i + 8), 0x000c001b);
 	}
-	nv_wo32(dev, ctx, 0x269c/4, 0x3f800000);
-	nv_wo32(dev, ctx, 0x26b0/4, 0x3f800000);
-	nv_wo32(dev, ctx, 0x26dc/4, 0x40000000);
-	nv_wo32(dev, ctx, 0x26e0/4, 0x3f800000);
-	nv_wo32(dev, ctx, 0x26e4/4, 0x3f000000);
-	nv_wo32(dev, ctx, 0x26ec/4, 0x40000000);
-	nv_wo32(dev, ctx, 0x26f0/4, 0x3f800000);
-	nv_wo32(dev, ctx, 0x26f8/4, 0xbf800000);
-	nv_wo32(dev, ctx, 0x2700/4, 0xbf800000);
-	nv_wo32(dev, ctx, 0x3024/4, 0x000fe000);
-	nv_wo32(dev, ctx, 0x30a0/4, 0x000003f8);
-	nv_wo32(dev, ctx, 0x33fc/4, 0x002fe000);
+	nv_wo32(ctx, 0x269c, 0x3f800000);
+	nv_wo32(ctx, 0x26b0, 0x3f800000);
+	nv_wo32(ctx, 0x26dc, 0x40000000);
+	nv_wo32(ctx, 0x26e0, 0x3f800000);
+	nv_wo32(ctx, 0x26e4, 0x3f000000);
+	nv_wo32(ctx, 0x26ec, 0x40000000);
+	nv_wo32(ctx, 0x26f0, 0x3f800000);
+	nv_wo32(ctx, 0x26f8, 0xbf800000);
+	nv_wo32(ctx, 0x2700, 0xbf800000);
+	nv_wo32(ctx, 0x3024, 0x000fe000);
+	nv_wo32(ctx, 0x30a0, 0x000003f8);
+	nv_wo32(ctx, 0x33fc, 0x002fe000);
 	for (i = 0x341c; i <= 0x3438; i += 4)
-		nv_wo32(dev, ctx, i/4, 0x001c527c);
+		nv_wo32(ctx, i, 0x001c527c);
 }
 
 static void
@@ -196,57 +196,57 @@
 {
 	int i;
 
-	nv_wo32(dev, ctx, 0x0410/4, 0x00000101);
-	nv_wo32(dev, ctx, 0x0424/4, 0x00000111);
-	nv_wo32(dev, ctx, 0x0428/4, 0x00000060);
-	nv_wo32(dev, ctx, 0x0444/4, 0x00000080);
-	nv_wo32(dev, ctx, 0x0448/4, 0xffff0000);
-	nv_wo32(dev, ctx, 0x044c/4, 0x00000001);
-	nv_wo32(dev, ctx, 0x0460/4, 0x44400000);
-	nv_wo32(dev, ctx, 0x048c/4, 0xffff0000);
+	nv_wo32(ctx, 0x0410, 0x00000101);
+	nv_wo32(ctx, 0x0424, 0x00000111);
+	nv_wo32(ctx, 0x0428, 0x00000060);
+	nv_wo32(ctx, 0x0444, 0x00000080);
+	nv_wo32(ctx, 0x0448, 0xffff0000);
+	nv_wo32(ctx, 0x044c, 0x00000001);
+	nv_wo32(ctx, 0x0460, 0x44400000);
+	nv_wo32(ctx, 0x048c, 0xffff0000);
 	for (i = 0x04e0; i < 0x04e8; i += 4)
-		nv_wo32(dev, ctx, i/4, 0x0fff0000);
-	nv_wo32(dev, ctx, 0x04ec/4, 0x00011100);
+		nv_wo32(ctx, i, 0x0fff0000);
+	nv_wo32(ctx, 0x04ec, 0x00011100);
 	for (i = 0x0508; i < 0x0548; i += 4)
-		nv_wo32(dev, ctx, i/4, 0x07ff0000);
-	nv_wo32(dev, ctx, 0x0550/4, 0x4b7fffff);
-	nv_wo32(dev, ctx, 0x058c/4, 0x00000080);
-	nv_wo32(dev, ctx, 0x0590/4, 0x30201000);
-	nv_wo32(dev, ctx, 0x0594/4, 0x70605040);
-	nv_wo32(dev, ctx, 0x0598/4, 0xb8a89888);
-	nv_wo32(dev, ctx, 0x059c/4, 0xf8e8d8c8);
-	nv_wo32(dev, ctx, 0x05b0/4, 0xb0000000);
+		nv_wo32(ctx, i, 0x07ff0000);
+	nv_wo32(ctx, 0x0550, 0x4b7fffff);
+	nv_wo32(ctx, 0x058c, 0x00000080);
+	nv_wo32(ctx, 0x0590, 0x30201000);
+	nv_wo32(ctx, 0x0594, 0x70605040);
+	nv_wo32(ctx, 0x0598, 0xb8a89888);
+	nv_wo32(ctx, 0x059c, 0xf8e8d8c8);
+	nv_wo32(ctx, 0x05b0, 0xb0000000);
 	for (i = 0x0600; i < 0x0640; i += 4)
-		nv_wo32(dev, ctx, i/4, 0x00010588);
+		nv_wo32(ctx, i, 0x00010588);
 	for (i = 0x0640; i < 0x0680; i += 4)
-		nv_wo32(dev, ctx, i/4, 0x00030303);
+		nv_wo32(ctx, i, 0x00030303);
 	for (i = 0x06c0; i < 0x0700; i += 4)
-		nv_wo32(dev, ctx, i/4, 0x0008aae4);
+		nv_wo32(ctx, i, 0x0008aae4);
 	for (i = 0x0700; i < 0x0740; i += 4)
-		nv_wo32(dev, ctx, i/4, 0x01012000);
+		nv_wo32(ctx, i, 0x01012000);
 	for (i = 0x0740; i < 0x0780; i += 4)
-		nv_wo32(dev, ctx, i/4, 0x00080008);
-	nv_wo32(dev, ctx, 0x085c/4, 0x00040000);
-	nv_wo32(dev, ctx, 0x0860/4, 0x00010000);
+		nv_wo32(ctx, i, 0x00080008);
+	nv_wo32(ctx, 0x085c, 0x00040000);
+	nv_wo32(ctx, 0x0860, 0x00010000);
 	for (i = 0x0864; i < 0x0874; i += 4)
-		nv_wo32(dev, ctx, i/4, 0x00040004);
+		nv_wo32(ctx, i, 0x00040004);
 	for (i = 0x1f18; i <= 0x3088 ; i += 16) {
-		nv_wo32(dev, ctx, i/4 + 0, 0x10700ff9);
-		nv_wo32(dev, ctx, i/4 + 1, 0x0436086c);
-		nv_wo32(dev, ctx, i/4 + 2, 0x000c001b);
+		nv_wo32(ctx, i + 0, 0x10700ff9);
+		nv_wo32(ctx, i + 1, 0x0436086c);
+		nv_wo32(ctx, i + 2, 0x000c001b);
 	}
 	for (i = 0x30b8; i < 0x30c8; i += 4)
-		nv_wo32(dev, ctx, i/4, 0x0000ffff);
-	nv_wo32(dev, ctx, 0x344c/4, 0x3f800000);
-	nv_wo32(dev, ctx, 0x3808/4, 0x3f800000);
-	nv_wo32(dev, ctx, 0x381c/4, 0x3f800000);
-	nv_wo32(dev, ctx, 0x3848/4, 0x40000000);
-	nv_wo32(dev, ctx, 0x384c/4, 0x3f800000);
-	nv_wo32(dev, ctx, 0x3850/4, 0x3f000000);
-	nv_wo32(dev, ctx, 0x3858/4, 0x40000000);
-	nv_wo32(dev, ctx, 0x385c/4, 0x3f800000);
-	nv_wo32(dev, ctx, 0x3864/4, 0xbf800000);
-	nv_wo32(dev, ctx, 0x386c/4, 0xbf800000);
+		nv_wo32(ctx, i, 0x0000ffff);
+	nv_wo32(ctx, 0x344c, 0x3f800000);
+	nv_wo32(ctx, 0x3808, 0x3f800000);
+	nv_wo32(ctx, 0x381c, 0x3f800000);
+	nv_wo32(ctx, 0x3848, 0x40000000);
+	nv_wo32(ctx, 0x384c, 0x3f800000);
+	nv_wo32(ctx, 0x3850, 0x3f000000);
+	nv_wo32(ctx, 0x3858, 0x40000000);
+	nv_wo32(ctx, 0x385c, 0x3f800000);
+	nv_wo32(ctx, 0x3864, 0xbf800000);
+	nv_wo32(ctx, 0x386c, 0xbf800000);
 }
 
 static void
@@ -254,57 +254,57 @@
 {
 	int i;
 
-	nv_wo32(dev, ctx, 0x040c/4, 0x01000101);
-	nv_wo32(dev, ctx, 0x0420/4, 0x00000111);
-	nv_wo32(dev, ctx, 0x0424/4, 0x00000060);
-	nv_wo32(dev, ctx, 0x0440/4, 0x00000080);
-	nv_wo32(dev, ctx, 0x0444/4, 0xffff0000);
-	nv_wo32(dev, ctx, 0x0448/4, 0x00000001);
-	nv_wo32(dev, ctx, 0x045c/4, 0x44400000);
-	nv_wo32(dev, ctx, 0x0480/4, 0xffff0000);
+	nv_wo32(ctx, 0x040c, 0x01000101);
+	nv_wo32(ctx, 0x0420, 0x00000111);
+	nv_wo32(ctx, 0x0424, 0x00000060);
+	nv_wo32(ctx, 0x0440, 0x00000080);
+	nv_wo32(ctx, 0x0444, 0xffff0000);
+	nv_wo32(ctx, 0x0448, 0x00000001);
+	nv_wo32(ctx, 0x045c, 0x44400000);
+	nv_wo32(ctx, 0x0480, 0xffff0000);
 	for (i = 0x04d4; i < 0x04dc; i += 4)
-		nv_wo32(dev, ctx, i/4, 0x0fff0000);
-	nv_wo32(dev, ctx, 0x04e0/4, 0x00011100);
+		nv_wo32(ctx, i, 0x0fff0000);
+	nv_wo32(ctx, 0x04e0, 0x00011100);
 	for (i = 0x04fc; i < 0x053c; i += 4)
-		nv_wo32(dev, ctx, i/4, 0x07ff0000);
-	nv_wo32(dev, ctx, 0x0544/4, 0x4b7fffff);
-	nv_wo32(dev, ctx, 0x057c/4, 0x00000080);
-	nv_wo32(dev, ctx, 0x0580/4, 0x30201000);
-	nv_wo32(dev, ctx, 0x0584/4, 0x70605040);
-	nv_wo32(dev, ctx, 0x0588/4, 0xb8a89888);
-	nv_wo32(dev, ctx, 0x058c/4, 0xf8e8d8c8);
-	nv_wo32(dev, ctx, 0x05a0/4, 0xb0000000);
+		nv_wo32(ctx, i, 0x07ff0000);
+	nv_wo32(ctx, 0x0544, 0x4b7fffff);
+	nv_wo32(ctx, 0x057c, 0x00000080);
+	nv_wo32(ctx, 0x0580, 0x30201000);
+	nv_wo32(ctx, 0x0584, 0x70605040);
+	nv_wo32(ctx, 0x0588, 0xb8a89888);
+	nv_wo32(ctx, 0x058c, 0xf8e8d8c8);
+	nv_wo32(ctx, 0x05a0, 0xb0000000);
 	for (i = 0x05f0; i < 0x0630; i += 4)
-		nv_wo32(dev, ctx, i/4, 0x00010588);
+		nv_wo32(ctx, i, 0x00010588);
 	for (i = 0x0630; i < 0x0670; i += 4)
-		nv_wo32(dev, ctx, i/4, 0x00030303);
+		nv_wo32(ctx, i, 0x00030303);
 	for (i = 0x06b0; i < 0x06f0; i += 4)
-		nv_wo32(dev, ctx, i/4, 0x0008aae4);
+		nv_wo32(ctx, i, 0x0008aae4);
 	for (i = 0x06f0; i < 0x0730; i += 4)
-		nv_wo32(dev, ctx, i/4, 0x01012000);
+		nv_wo32(ctx, i, 0x01012000);
 	for (i = 0x0730; i < 0x0770; i += 4)
-		nv_wo32(dev, ctx, i/4, 0x00080008);
-	nv_wo32(dev, ctx, 0x0850/4, 0x00040000);
-	nv_wo32(dev, ctx, 0x0854/4, 0x00010000);
+		nv_wo32(ctx, i, 0x00080008);
+	nv_wo32(ctx, 0x0850, 0x00040000);
+	nv_wo32(ctx, 0x0854, 0x00010000);
 	for (i = 0x0858; i < 0x0868; i += 4)
-		nv_wo32(dev, ctx, i/4, 0x00040004);
+		nv_wo32(ctx, i, 0x00040004);
 	for (i = 0x15ac; i <= 0x271c ; i += 16) {
-		nv_wo32(dev, ctx, i/4 + 0, 0x10700ff9);
-		nv_wo32(dev, ctx, i/4 + 1, 0x0436086c);
-		nv_wo32(dev, ctx, i/4 + 2, 0x000c001b);
+		nv_wo32(ctx, i + 0, 0x10700ff9);
+		nv_wo32(ctx, i + 1, 0x0436086c);
+		nv_wo32(ctx, i + 2, 0x000c001b);
 	}
 	for (i = 0x274c; i < 0x275c; i += 4)
-		nv_wo32(dev, ctx, i/4, 0x0000ffff);
-	nv_wo32(dev, ctx, 0x2ae0/4, 0x3f800000);
-	nv_wo32(dev, ctx, 0x2e9c/4, 0x3f800000);
-	nv_wo32(dev, ctx, 0x2eb0/4, 0x3f800000);
-	nv_wo32(dev, ctx, 0x2edc/4, 0x40000000);
-	nv_wo32(dev, ctx, 0x2ee0/4, 0x3f800000);
-	nv_wo32(dev, ctx, 0x2ee4/4, 0x3f000000);
-	nv_wo32(dev, ctx, 0x2eec/4, 0x40000000);
-	nv_wo32(dev, ctx, 0x2ef0/4, 0x3f800000);
-	nv_wo32(dev, ctx, 0x2ef8/4, 0xbf800000);
-	nv_wo32(dev, ctx, 0x2f00/4, 0xbf800000);
+		nv_wo32(ctx, i, 0x0000ffff);
+	nv_wo32(ctx, 0x2ae0, 0x3f800000);
+	nv_wo32(ctx, 0x2e9c, 0x3f800000);
+	nv_wo32(ctx, 0x2eb0, 0x3f800000);
+	nv_wo32(ctx, 0x2edc, 0x40000000);
+	nv_wo32(ctx, 0x2ee0, 0x3f800000);
+	nv_wo32(ctx, 0x2ee4, 0x3f000000);
+	nv_wo32(ctx, 0x2eec, 0x40000000);
+	nv_wo32(ctx, 0x2ef0, 0x3f800000);
+	nv_wo32(ctx, 0x2ef8, 0xbf800000);
+	nv_wo32(ctx, 0x2f00, 0xbf800000);
 }
 
 static void
@@ -312,57 +312,57 @@
 {
 	int i;
 
-	nv_wo32(dev, ctx, 0x040c/4, 0x00000101);
-	nv_wo32(dev, ctx, 0x0420/4, 0x00000111);
-	nv_wo32(dev, ctx, 0x0424/4, 0x00000060);
-	nv_wo32(dev, ctx, 0x0440/4, 0x00000080);
-	nv_wo32(dev, ctx, 0x0444/4, 0xffff0000);
-	nv_wo32(dev, ctx, 0x0448/4, 0x00000001);
-	nv_wo32(dev, ctx, 0x045c/4, 0x44400000);
-	nv_wo32(dev, ctx, 0x0488/4, 0xffff0000);
+	nv_wo32(ctx, 0x040c, 0x00000101);
+	nv_wo32(ctx, 0x0420, 0x00000111);
+	nv_wo32(ctx, 0x0424, 0x00000060);
+	nv_wo32(ctx, 0x0440, 0x00000080);
+	nv_wo32(ctx, 0x0444, 0xffff0000);
+	nv_wo32(ctx, 0x0448, 0x00000001);
+	nv_wo32(ctx, 0x045c, 0x44400000);
+	nv_wo32(ctx, 0x0488, 0xffff0000);
 	for (i = 0x04dc; i < 0x04e4; i += 4)
-		nv_wo32(dev, ctx, i/4, 0x0fff0000);
-	nv_wo32(dev, ctx, 0x04e8/4, 0x00011100);
+		nv_wo32(ctx, i, 0x0fff0000);
+	nv_wo32(ctx, 0x04e8, 0x00011100);
 	for (i = 0x0504; i < 0x0544; i += 4)
-		nv_wo32(dev, ctx, i/4, 0x07ff0000);
-	nv_wo32(dev, ctx, 0x054c/4, 0x4b7fffff);
-	nv_wo32(dev, ctx, 0x0588/4, 0x00000080);
-	nv_wo32(dev, ctx, 0x058c/4, 0x30201000);
-	nv_wo32(dev, ctx, 0x0590/4, 0x70605040);
-	nv_wo32(dev, ctx, 0x0594/4, 0xb8a89888);
-	nv_wo32(dev, ctx, 0x0598/4, 0xf8e8d8c8);
-	nv_wo32(dev, ctx, 0x05ac/4, 0xb0000000);
+		nv_wo32(ctx, i, 0x07ff0000);
+	nv_wo32(ctx, 0x054c, 0x4b7fffff);
+	nv_wo32(ctx, 0x0588, 0x00000080);
+	nv_wo32(ctx, 0x058c, 0x30201000);
+	nv_wo32(ctx, 0x0590, 0x70605040);
+	nv_wo32(ctx, 0x0594, 0xb8a89888);
+	nv_wo32(ctx, 0x0598, 0xf8e8d8c8);
+	nv_wo32(ctx, 0x05ac, 0xb0000000);
 	for (i = 0x0604; i < 0x0644; i += 4)
-		nv_wo32(dev, ctx, i/4, 0x00010588);
+		nv_wo32(ctx, i, 0x00010588);
 	for (i = 0x0644; i < 0x0684; i += 4)
-		nv_wo32(dev, ctx, i/4, 0x00030303);
+		nv_wo32(ctx, i, 0x00030303);
 	for (i = 0x06c4; i < 0x0704; i += 4)
-		nv_wo32(dev, ctx, i/4, 0x0008aae4);
+		nv_wo32(ctx, i, 0x0008aae4);
 	for (i = 0x0704; i < 0x0744; i += 4)
-		nv_wo32(dev, ctx, i/4, 0x01012000);
+		nv_wo32(ctx, i, 0x01012000);
 	for (i = 0x0744; i < 0x0784; i += 4)
-		nv_wo32(dev, ctx, i/4, 0x00080008);
-	nv_wo32(dev, ctx, 0x0860/4, 0x00040000);
-	nv_wo32(dev, ctx, 0x0864/4, 0x00010000);
+		nv_wo32(ctx, i, 0x00080008);
+	nv_wo32(ctx, 0x0860, 0x00040000);
+	nv_wo32(ctx, 0x0864, 0x00010000);
 	for (i = 0x0868; i < 0x0878; i += 4)
-		nv_wo32(dev, ctx, i/4, 0x00040004);
+		nv_wo32(ctx, i, 0x00040004);
 	for (i = 0x1f1c; i <= 0x308c ; i += 16) {
-		nv_wo32(dev, ctx, i/4 + 0, 0x10700ff9);
-		nv_wo32(dev, ctx, i/4 + 1, 0x0436086c);
-		nv_wo32(dev, ctx, i/4 + 2, 0x000c001b);
+		nv_wo32(ctx, i + 0, 0x10700ff9);
+		nv_wo32(ctx, i + 4, 0x0436086c);
+		nv_wo32(ctx, i + 8, 0x000c001b);
 	}
 	for (i = 0x30bc; i < 0x30cc; i += 4)
-		nv_wo32(dev, ctx, i/4, 0x0000ffff);
-	nv_wo32(dev, ctx, 0x3450/4, 0x3f800000);
-	nv_wo32(dev, ctx, 0x380c/4, 0x3f800000);
-	nv_wo32(dev, ctx, 0x3820/4, 0x3f800000);
-	nv_wo32(dev, ctx, 0x384c/4, 0x40000000);
-	nv_wo32(dev, ctx, 0x3850/4, 0x3f800000);
-	nv_wo32(dev, ctx, 0x3854/4, 0x3f000000);
-	nv_wo32(dev, ctx, 0x385c/4, 0x40000000);
-	nv_wo32(dev, ctx, 0x3860/4, 0x3f800000);
-	nv_wo32(dev, ctx, 0x3868/4, 0xbf800000);
-	nv_wo32(dev, ctx, 0x3870/4, 0xbf800000);
+		nv_wo32(ctx, i, 0x0000ffff);
+	nv_wo32(ctx, 0x3450, 0x3f800000);
+	nv_wo32(ctx, 0x380c, 0x3f800000);
+	nv_wo32(ctx, 0x3820, 0x3f800000);
+	nv_wo32(ctx, 0x384c, 0x40000000);
+	nv_wo32(ctx, 0x3850, 0x3f800000);
+	nv_wo32(ctx, 0x3854, 0x3f000000);
+	nv_wo32(ctx, 0x385c, 0x40000000);
+	nv_wo32(ctx, 0x3860, 0x3f800000);
+	nv_wo32(ctx, 0x3868, 0xbf800000);
+	nv_wo32(ctx, 0x3870, 0xbf800000);
 }
 
 int
@@ -372,7 +372,7 @@
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_pgraph_engine *pgraph = &dev_priv->engine.graph;
 	void (*ctx_init)(struct drm_device *, struct nouveau_gpuobj *);
-	unsigned int idoffs = 0x28/4;
+	unsigned int idoffs = 0x28;
 	int ret;
 
 	switch (dev_priv->chipset) {
@@ -413,11 +413,11 @@
 	ctx_init(dev, chan->ramin_grctx->gpuobj);
 
 	/* nv20: nv_wo32(dev, chan->ramin_grctx->gpuobj, 10, chan->id<<24); */
-	nv_wo32(dev, chan->ramin_grctx->gpuobj, idoffs,
-					(chan->id << 24) | 0x1); /* CTX_USER */
+	nv_wo32(chan->ramin_grctx->gpuobj, idoffs,
+		(chan->id << 24) | 0x1); /* CTX_USER */
 
-	nv_wo32(dev, pgraph->ctx_table->gpuobj, chan->id,
-		     chan->ramin_grctx->instance >> 4);
+	nv_wo32(pgraph->ctx_table->gpuobj, chan->id * 4,
+		chan->ramin_grctx->instance >> 4);
 	return 0;
 }
 
@@ -431,7 +431,7 @@
 	if (chan->ramin_grctx)
 		nouveau_gpuobj_ref_del(dev, &chan->ramin_grctx);
 
-	nv_wo32(dev, pgraph->ctx_table->gpuobj, chan->id, 0);
+	nv_wo32(pgraph->ctx_table->gpuobj, chan->id * 4, 0);
 }
 
 int
diff --git a/drivers/gpu/drm/nouveau/nv40_graph.c b/drivers/gpu/drm/nouveau/nv40_graph.c
index fd7d2b5..6215dfc 100644
--- a/drivers/gpu/drm/nouveau/nv40_graph.c
+++ b/drivers/gpu/drm/nouveau/nv40_graph.c
@@ -73,8 +73,8 @@
 	ctx.data = chan->ramin_grctx->gpuobj;
 	nv40_grctx_init(&ctx);
 
-	nv_wo32(dev, chan->ramin_grctx->gpuobj, 0,
-		     chan->ramin_grctx->gpuobj->im_pramin->start);
+	nv_wo32(chan->ramin_grctx->gpuobj, 0,
+		chan->ramin_grctx->gpuobj->im_pramin->start);
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/nouveau/nv40_grctx.c b/drivers/gpu/drm/nouveau/nv40_grctx.c
index 9b5c974..ce58509 100644
--- a/drivers/gpu/drm/nouveau/nv40_grctx.c
+++ b/drivers/gpu/drm/nouveau/nv40_grctx.c
@@ -596,13 +596,13 @@
 
 	offset += 0x0280/4;
 	for (i = 0; i < 16; i++, offset += 2)
-		nv_wo32(dev, obj, offset, 0x3f800000);
+		nv_wo32(obj, offset * 4, 0x3f800000);
 
 	for (vs = 0; vs < vs_nr; vs++, offset += vs_len) {
 		for (i = 0; i < vs_nr_b0 * 6; i += 6)
-			nv_wo32(dev, obj, offset + b0_offset + i, 0x00000001);
+			nv_wo32(obj, (offset + b0_offset + i) * 4, 0x00000001);
 		for (i = 0; i < vs_nr_b1 * 4; i += 4)
-			nv_wo32(dev, obj, offset + b1_offset + i, 0x3f800000);
+			nv_wo32(obj, (offset + b1_offset + i) * 4, 0x3f800000);
 	}
 }
 
diff --git a/drivers/gpu/drm/nouveau/nv50_display.c b/drivers/gpu/drm/nouveau/nv50_display.c
index c87f874..435d2b7 100644
--- a/drivers/gpu/drm/nouveau/nv50_display.c
+++ b/drivers/gpu/drm/nouveau/nv50_display.c
@@ -72,15 +72,15 @@
 		return ret;
 	}
 
-	nv_wo32(dev, obj, 0, (tile_flags << 22) | (magic_flags << 16) | class);
-	nv_wo32(dev, obj, 1, limit);
-	nv_wo32(dev, obj, 2, offset);
-	nv_wo32(dev, obj, 3, 0x00000000);
-	nv_wo32(dev, obj, 4, 0x00000000);
+	nv_wo32(obj,  0, (tile_flags << 22) | (magic_flags << 16) | class);
+	nv_wo32(obj,  4, limit);
+	nv_wo32(obj,  8, offset);
+	nv_wo32(obj, 12, 0x00000000);
+	nv_wo32(obj, 16, 0x00000000);
 	if (dev_priv->card_type < NV_C0)
-		nv_wo32(dev, obj, 5, 0x00010000);
+		nv_wo32(obj, 20, 0x00010000);
 	else
-		nv_wo32(dev, obj, 5, 0x00020000);
+		nv_wo32(obj, 20, 0x00020000);
 	dev_priv->engine.instmem.flush(dev);
 
 	return 0;
diff --git a/drivers/gpu/drm/nouveau/nv50_fifo.c b/drivers/gpu/drm/nouveau/nv50_fifo.c
index fb0281a..38dbcda 100644
--- a/drivers/gpu/drm/nouveau/nv50_fifo.c
+++ b/drivers/gpu/drm/nouveau/nv50_fifo.c
@@ -43,8 +43,10 @@
 
 	/* We never schedule channel 0 or 127 */
 	for (i = 1, nr = 0; i < 127; i++) {
-		if (dev_priv->fifos[i] && dev_priv->fifos[i]->ramfc)
-			nv_wo32(dev, cur->gpuobj, nr++, i);
+		if (dev_priv->fifos[i] && dev_priv->fifos[i]->ramfc) {
+			nv_wo32(cur->gpuobj, (nr * 4), i);
+			nr++;
+		}
 	}
 	dev_priv->engine.instmem.flush(dev);
 
@@ -258,27 +260,25 @@
 
 	spin_lock_irqsave(&dev_priv->context_switch_lock, flags);
 
-	nv_wo32(dev, ramfc, 0x48/4, chan->pushbuf->instance >> 4);
-	nv_wo32(dev, ramfc, 0x80/4, (0 << 27) /* 4KiB */ |
-				    (4 << 24) /* SEARCH_FULL */ |
-				    (chan->ramht->instance >> 4));
-	nv_wo32(dev, ramfc, 0x44/4, 0x2101ffff);
-	nv_wo32(dev, ramfc, 0x60/4, 0x7fffffff);
-	nv_wo32(dev, ramfc, 0x40/4, 0x00000000);
-	nv_wo32(dev, ramfc, 0x7c/4, 0x30000001);
-	nv_wo32(dev, ramfc, 0x78/4, 0x00000000);
-	nv_wo32(dev, ramfc, 0x3c/4, 0x403f6078);
-	nv_wo32(dev, ramfc, 0x50/4, chan->pushbuf_base +
-				    chan->dma.ib_base * 4);
-	nv_wo32(dev, ramfc, 0x54/4, drm_order(chan->dma.ib_max + 1) << 16);
+	nv_wo32(ramfc, 0x48, chan->pushbuf->instance >> 4);
+	nv_wo32(ramfc, 0x80, (0 << 27) /* 4KiB */ |
+			     (4 << 24) /* SEARCH_FULL */ |
+			     (chan->ramht->instance >> 4));
+	nv_wo32(ramfc, 0x44, 0x2101ffff);
+	nv_wo32(ramfc, 0x60, 0x7fffffff);
+	nv_wo32(ramfc, 0x40, 0x00000000);
+	nv_wo32(ramfc, 0x7c, 0x30000001);
+	nv_wo32(ramfc, 0x78, 0x00000000);
+	nv_wo32(ramfc, 0x3c, 0x403f6078);
+	nv_wo32(ramfc, 0x50, chan->pushbuf_base + chan->dma.ib_base * 4);
+	nv_wo32(ramfc, 0x54, drm_order(chan->dma.ib_max + 1) << 16);
 
 	if (dev_priv->chipset != 0x50) {
-		nv_wo32(dev, chan->ramin->gpuobj, 0, chan->id);
-		nv_wo32(dev, chan->ramin->gpuobj, 1,
-						chan->ramfc->instance >> 8);
+		nv_wo32(chan->ramin->gpuobj, 0, chan->id);
+		nv_wo32(chan->ramin->gpuobj, 4, chan->ramfc->instance >> 8);
 
-		nv_wo32(dev, ramfc, 0x88/4, chan->cache->instance >> 10);
-		nv_wo32(dev, ramfc, 0x98/4, chan->ramin->instance >> 12);
+		nv_wo32(ramfc, 0x88, chan->cache->instance >> 10);
+		nv_wo32(ramfc, 0x98, chan->ramin->instance >> 12);
 	}
 
 	dev_priv->engine.instmem.flush(dev);
@@ -321,57 +321,57 @@
 
 	NV_DEBUG(dev, "ch%d\n", chan->id);
 
-	nv_wr32(dev, 0x3330, nv_ro32(dev, ramfc, 0x00/4));
-	nv_wr32(dev, 0x3334, nv_ro32(dev, ramfc, 0x04/4));
-	nv_wr32(dev, 0x3240, nv_ro32(dev, ramfc, 0x08/4));
-	nv_wr32(dev, 0x3320, nv_ro32(dev, ramfc, 0x0c/4));
-	nv_wr32(dev, 0x3244, nv_ro32(dev, ramfc, 0x10/4));
-	nv_wr32(dev, 0x3328, nv_ro32(dev, ramfc, 0x14/4));
-	nv_wr32(dev, 0x3368, nv_ro32(dev, ramfc, 0x18/4));
-	nv_wr32(dev, 0x336c, nv_ro32(dev, ramfc, 0x1c/4));
-	nv_wr32(dev, 0x3370, nv_ro32(dev, ramfc, 0x20/4));
-	nv_wr32(dev, 0x3374, nv_ro32(dev, ramfc, 0x24/4));
-	nv_wr32(dev, 0x3378, nv_ro32(dev, ramfc, 0x28/4));
-	nv_wr32(dev, 0x337c, nv_ro32(dev, ramfc, 0x2c/4));
-	nv_wr32(dev, 0x3228, nv_ro32(dev, ramfc, 0x30/4));
-	nv_wr32(dev, 0x3364, nv_ro32(dev, ramfc, 0x34/4));
-	nv_wr32(dev, 0x32a0, nv_ro32(dev, ramfc, 0x38/4));
-	nv_wr32(dev, 0x3224, nv_ro32(dev, ramfc, 0x3c/4));
-	nv_wr32(dev, 0x324c, nv_ro32(dev, ramfc, 0x40/4));
-	nv_wr32(dev, 0x2044, nv_ro32(dev, ramfc, 0x44/4));
-	nv_wr32(dev, 0x322c, nv_ro32(dev, ramfc, 0x48/4));
-	nv_wr32(dev, 0x3234, nv_ro32(dev, ramfc, 0x4c/4));
-	nv_wr32(dev, 0x3340, nv_ro32(dev, ramfc, 0x50/4));
-	nv_wr32(dev, 0x3344, nv_ro32(dev, ramfc, 0x54/4));
-	nv_wr32(dev, 0x3280, nv_ro32(dev, ramfc, 0x58/4));
-	nv_wr32(dev, 0x3254, nv_ro32(dev, ramfc, 0x5c/4));
-	nv_wr32(dev, 0x3260, nv_ro32(dev, ramfc, 0x60/4));
-	nv_wr32(dev, 0x3264, nv_ro32(dev, ramfc, 0x64/4));
-	nv_wr32(dev, 0x3268, nv_ro32(dev, ramfc, 0x68/4));
-	nv_wr32(dev, 0x326c, nv_ro32(dev, ramfc, 0x6c/4));
-	nv_wr32(dev, 0x32e4, nv_ro32(dev, ramfc, 0x70/4));
-	nv_wr32(dev, 0x3248, nv_ro32(dev, ramfc, 0x74/4));
-	nv_wr32(dev, 0x2088, nv_ro32(dev, ramfc, 0x78/4));
-	nv_wr32(dev, 0x2058, nv_ro32(dev, ramfc, 0x7c/4));
-	nv_wr32(dev, 0x2210, nv_ro32(dev, ramfc, 0x80/4));
+	nv_wr32(dev, 0x3330, nv_ro32(ramfc, 0x00));
+	nv_wr32(dev, 0x3334, nv_ro32(ramfc, 0x04));
+	nv_wr32(dev, 0x3240, nv_ro32(ramfc, 0x08));
+	nv_wr32(dev, 0x3320, nv_ro32(ramfc, 0x0c));
+	nv_wr32(dev, 0x3244, nv_ro32(ramfc, 0x10));
+	nv_wr32(dev, 0x3328, nv_ro32(ramfc, 0x14));
+	nv_wr32(dev, 0x3368, nv_ro32(ramfc, 0x18));
+	nv_wr32(dev, 0x336c, nv_ro32(ramfc, 0x1c));
+	nv_wr32(dev, 0x3370, nv_ro32(ramfc, 0x20));
+	nv_wr32(dev, 0x3374, nv_ro32(ramfc, 0x24));
+	nv_wr32(dev, 0x3378, nv_ro32(ramfc, 0x28));
+	nv_wr32(dev, 0x337c, nv_ro32(ramfc, 0x2c));
+	nv_wr32(dev, 0x3228, nv_ro32(ramfc, 0x30));
+	nv_wr32(dev, 0x3364, nv_ro32(ramfc, 0x34));
+	nv_wr32(dev, 0x32a0, nv_ro32(ramfc, 0x38));
+	nv_wr32(dev, 0x3224, nv_ro32(ramfc, 0x3c));
+	nv_wr32(dev, 0x324c, nv_ro32(ramfc, 0x40));
+	nv_wr32(dev, 0x2044, nv_ro32(ramfc, 0x44));
+	nv_wr32(dev, 0x322c, nv_ro32(ramfc, 0x48));
+	nv_wr32(dev, 0x3234, nv_ro32(ramfc, 0x4c));
+	nv_wr32(dev, 0x3340, nv_ro32(ramfc, 0x50));
+	nv_wr32(dev, 0x3344, nv_ro32(ramfc, 0x54));
+	nv_wr32(dev, 0x3280, nv_ro32(ramfc, 0x58));
+	nv_wr32(dev, 0x3254, nv_ro32(ramfc, 0x5c));
+	nv_wr32(dev, 0x3260, nv_ro32(ramfc, 0x60));
+	nv_wr32(dev, 0x3264, nv_ro32(ramfc, 0x64));
+	nv_wr32(dev, 0x3268, nv_ro32(ramfc, 0x68));
+	nv_wr32(dev, 0x326c, nv_ro32(ramfc, 0x6c));
+	nv_wr32(dev, 0x32e4, nv_ro32(ramfc, 0x70));
+	nv_wr32(dev, 0x3248, nv_ro32(ramfc, 0x74));
+	nv_wr32(dev, 0x2088, nv_ro32(ramfc, 0x78));
+	nv_wr32(dev, 0x2058, nv_ro32(ramfc, 0x7c));
+	nv_wr32(dev, 0x2210, nv_ro32(ramfc, 0x80));
 
-	cnt = nv_ro32(dev, ramfc, 0x84/4);
+	cnt = nv_ro32(ramfc, 0x84);
 	for (ptr = 0; ptr < cnt; ptr++) {
 		nv_wr32(dev, NV40_PFIFO_CACHE1_METHOD(ptr),
-			nv_ro32(dev, cache, (ptr * 2) + 0));
+			nv_ro32(cache, (ptr * 8) + 0));
 		nv_wr32(dev, NV40_PFIFO_CACHE1_DATA(ptr),
-			nv_ro32(dev, cache, (ptr * 2) + 1));
+			nv_ro32(cache, (ptr * 8) + 4));
 	}
 	nv_wr32(dev, NV03_PFIFO_CACHE1_PUT, cnt << 2);
 	nv_wr32(dev, NV03_PFIFO_CACHE1_GET, 0);
 
 	/* guessing that all the 0x34xx regs aren't on NV50 */
 	if (dev_priv->chipset != 0x50) {
-		nv_wr32(dev, 0x340c, nv_ro32(dev, ramfc, 0x88/4));
-		nv_wr32(dev, 0x3400, nv_ro32(dev, ramfc, 0x8c/4));
-		nv_wr32(dev, 0x3404, nv_ro32(dev, ramfc, 0x90/4));
-		nv_wr32(dev, 0x3408, nv_ro32(dev, ramfc, 0x94/4));
-		nv_wr32(dev, 0x3410, nv_ro32(dev, ramfc, 0x98/4));
+		nv_wr32(dev, 0x340c, nv_ro32(ramfc, 0x88));
+		nv_wr32(dev, 0x3400, nv_ro32(ramfc, 0x8c));
+		nv_wr32(dev, 0x3404, nv_ro32(ramfc, 0x90));
+		nv_wr32(dev, 0x3408, nv_ro32(ramfc, 0x94));
+		nv_wr32(dev, 0x3410, nv_ro32(ramfc, 0x98));
 	}
 
 	nv_wr32(dev, NV03_PFIFO_CACHE1_PUSH1, chan->id | (1<<16));
@@ -402,59 +402,60 @@
 	ramfc = chan->ramfc->gpuobj;
 	cache = chan->cache->gpuobj;
 
-	nv_wo32(dev, ramfc, 0x00/4, nv_rd32(dev, 0x3330));
-	nv_wo32(dev, ramfc, 0x04/4, nv_rd32(dev, 0x3334));
-	nv_wo32(dev, ramfc, 0x08/4, nv_rd32(dev, 0x3240));
-	nv_wo32(dev, ramfc, 0x0c/4, nv_rd32(dev, 0x3320));
-	nv_wo32(dev, ramfc, 0x10/4, nv_rd32(dev, 0x3244));
-	nv_wo32(dev, ramfc, 0x14/4, nv_rd32(dev, 0x3328));
-	nv_wo32(dev, ramfc, 0x18/4, nv_rd32(dev, 0x3368));
-	nv_wo32(dev, ramfc, 0x1c/4, nv_rd32(dev, 0x336c));
-	nv_wo32(dev, ramfc, 0x20/4, nv_rd32(dev, 0x3370));
-	nv_wo32(dev, ramfc, 0x24/4, nv_rd32(dev, 0x3374));
-	nv_wo32(dev, ramfc, 0x28/4, nv_rd32(dev, 0x3378));
-	nv_wo32(dev, ramfc, 0x2c/4, nv_rd32(dev, 0x337c));
-	nv_wo32(dev, ramfc, 0x30/4, nv_rd32(dev, 0x3228));
-	nv_wo32(dev, ramfc, 0x34/4, nv_rd32(dev, 0x3364));
-	nv_wo32(dev, ramfc, 0x38/4, nv_rd32(dev, 0x32a0));
-	nv_wo32(dev, ramfc, 0x3c/4, nv_rd32(dev, 0x3224));
-	nv_wo32(dev, ramfc, 0x40/4, nv_rd32(dev, 0x324c));
-	nv_wo32(dev, ramfc, 0x44/4, nv_rd32(dev, 0x2044));
-	nv_wo32(dev, ramfc, 0x48/4, nv_rd32(dev, 0x322c));
-	nv_wo32(dev, ramfc, 0x4c/4, nv_rd32(dev, 0x3234));
-	nv_wo32(dev, ramfc, 0x50/4, nv_rd32(dev, 0x3340));
-	nv_wo32(dev, ramfc, 0x54/4, nv_rd32(dev, 0x3344));
-	nv_wo32(dev, ramfc, 0x58/4, nv_rd32(dev, 0x3280));
-	nv_wo32(dev, ramfc, 0x5c/4, nv_rd32(dev, 0x3254));
-	nv_wo32(dev, ramfc, 0x60/4, nv_rd32(dev, 0x3260));
-	nv_wo32(dev, ramfc, 0x64/4, nv_rd32(dev, 0x3264));
-	nv_wo32(dev, ramfc, 0x68/4, nv_rd32(dev, 0x3268));
-	nv_wo32(dev, ramfc, 0x6c/4, nv_rd32(dev, 0x326c));
-	nv_wo32(dev, ramfc, 0x70/4, nv_rd32(dev, 0x32e4));
-	nv_wo32(dev, ramfc, 0x74/4, nv_rd32(dev, 0x3248));
-	nv_wo32(dev, ramfc, 0x78/4, nv_rd32(dev, 0x2088));
-	nv_wo32(dev, ramfc, 0x7c/4, nv_rd32(dev, 0x2058));
-	nv_wo32(dev, ramfc, 0x80/4, nv_rd32(dev, 0x2210));
+	nv_wo32(ramfc, 0x00, nv_rd32(dev, 0x3330));
+	nv_wo32(ramfc, 0x04, nv_rd32(dev, 0x3334));
+	nv_wo32(ramfc, 0x08, nv_rd32(dev, 0x3240));
+	nv_wo32(ramfc, 0x0c, nv_rd32(dev, 0x3320));
+	nv_wo32(ramfc, 0x10, nv_rd32(dev, 0x3244));
+	nv_wo32(ramfc, 0x14, nv_rd32(dev, 0x3328));
+	nv_wo32(ramfc, 0x18, nv_rd32(dev, 0x3368));
+	nv_wo32(ramfc, 0x1c, nv_rd32(dev, 0x336c));
+	nv_wo32(ramfc, 0x20, nv_rd32(dev, 0x3370));
+	nv_wo32(ramfc, 0x24, nv_rd32(dev, 0x3374));
+	nv_wo32(ramfc, 0x28, nv_rd32(dev, 0x3378));
+	nv_wo32(ramfc, 0x2c, nv_rd32(dev, 0x337c));
+	nv_wo32(ramfc, 0x30, nv_rd32(dev, 0x3228));
+	nv_wo32(ramfc, 0x34, nv_rd32(dev, 0x3364));
+	nv_wo32(ramfc, 0x38, nv_rd32(dev, 0x32a0));
+	nv_wo32(ramfc, 0x3c, nv_rd32(dev, 0x3224));
+	nv_wo32(ramfc, 0x40, nv_rd32(dev, 0x324c));
+	nv_wo32(ramfc, 0x44, nv_rd32(dev, 0x2044));
+	nv_wo32(ramfc, 0x48, nv_rd32(dev, 0x322c));
+	nv_wo32(ramfc, 0x4c, nv_rd32(dev, 0x3234));
+	nv_wo32(ramfc, 0x50, nv_rd32(dev, 0x3340));
+	nv_wo32(ramfc, 0x54, nv_rd32(dev, 0x3344));
+	nv_wo32(ramfc, 0x58, nv_rd32(dev, 0x3280));
+	nv_wo32(ramfc, 0x5c, nv_rd32(dev, 0x3254));
+	nv_wo32(ramfc, 0x60, nv_rd32(dev, 0x3260));
+	nv_wo32(ramfc, 0x64, nv_rd32(dev, 0x3264));
+	nv_wo32(ramfc, 0x68, nv_rd32(dev, 0x3268));
+	nv_wo32(ramfc, 0x6c, nv_rd32(dev, 0x326c));
+	nv_wo32(ramfc, 0x70, nv_rd32(dev, 0x32e4));
+	nv_wo32(ramfc, 0x74, nv_rd32(dev, 0x3248));
+	nv_wo32(ramfc, 0x78, nv_rd32(dev, 0x2088));
+	nv_wo32(ramfc, 0x7c, nv_rd32(dev, 0x2058));
+	nv_wo32(ramfc, 0x80, nv_rd32(dev, 0x2210));
 
 	put = (nv_rd32(dev, NV03_PFIFO_CACHE1_PUT) & 0x7ff) >> 2;
 	get = (nv_rd32(dev, NV03_PFIFO_CACHE1_GET) & 0x7ff) >> 2;
 	ptr = 0;
 	while (put != get) {
-		nv_wo32(dev, cache, ptr++,
-			    nv_rd32(dev, NV40_PFIFO_CACHE1_METHOD(get)));
-		nv_wo32(dev, cache, ptr++,
-			    nv_rd32(dev, NV40_PFIFO_CACHE1_DATA(get)));
+		nv_wo32(cache, ptr + 0,
+			nv_rd32(dev, NV40_PFIFO_CACHE1_METHOD(get)));
+		nv_wo32(cache, ptr + 4,
+			nv_rd32(dev, NV40_PFIFO_CACHE1_DATA(get)));
 		get = (get + 1) & 0x1ff;
+		ptr += 8;
 	}
 
 	/* guessing that all the 0x34xx regs aren't on NV50 */
 	if (dev_priv->chipset != 0x50) {
-		nv_wo32(dev, ramfc, 0x84/4, ptr >> 1);
-		nv_wo32(dev, ramfc, 0x88/4, nv_rd32(dev, 0x340c));
-		nv_wo32(dev, ramfc, 0x8c/4, nv_rd32(dev, 0x3400));
-		nv_wo32(dev, ramfc, 0x90/4, nv_rd32(dev, 0x3404));
-		nv_wo32(dev, ramfc, 0x94/4, nv_rd32(dev, 0x3408));
-		nv_wo32(dev, ramfc, 0x98/4, nv_rd32(dev, 0x3410));
+		nv_wo32(ramfc, 0x84, ptr >> 3);
+		nv_wo32(ramfc, 0x88, nv_rd32(dev, 0x340c));
+		nv_wo32(ramfc, 0x8c, nv_rd32(dev, 0x3400));
+		nv_wo32(ramfc, 0x90, nv_rd32(dev, 0x3404));
+		nv_wo32(ramfc, 0x94, nv_rd32(dev, 0x3408));
+		nv_wo32(ramfc, 0x98, nv_rd32(dev, 0x3410));
 	}
 
 	dev_priv->engine.instmem.flush(dev);
diff --git a/drivers/gpu/drm/nouveau/nv50_graph.c b/drivers/gpu/drm/nouveau/nv50_graph.c
index 1413028..17a8d788 100644
--- a/drivers/gpu/drm/nouveau/nv50_graph.c
+++ b/drivers/gpu/drm/nouveau/nv50_graph.c
@@ -220,20 +220,20 @@
 	obj = chan->ramin_grctx->gpuobj;
 
 	hdr = (dev_priv->chipset == 0x50) ? 0x200 : 0x20;
-	nv_wo32(dev, ramin, (hdr + 0x00)/4, 0x00190002);
-	nv_wo32(dev, ramin, (hdr + 0x04)/4, chan->ramin_grctx->instance +
-					   pgraph->grctx_size - 1);
-	nv_wo32(dev, ramin, (hdr + 0x08)/4, chan->ramin_grctx->instance);
-	nv_wo32(dev, ramin, (hdr + 0x0c)/4, 0);
-	nv_wo32(dev, ramin, (hdr + 0x10)/4, 0);
-	nv_wo32(dev, ramin, (hdr + 0x14)/4, 0x00010000);
+	nv_wo32(ramin, hdr + 0x00, 0x00190002);
+	nv_wo32(ramin, hdr + 0x04, chan->ramin_grctx->instance +
+				   pgraph->grctx_size - 1);
+	nv_wo32(ramin, hdr + 0x08, chan->ramin_grctx->instance);
+	nv_wo32(ramin, hdr + 0x0c, 0);
+	nv_wo32(ramin, hdr + 0x10, 0);
+	nv_wo32(ramin, hdr + 0x14, 0x00010000);
 
 	ctx.dev = chan->dev;
 	ctx.mode = NOUVEAU_GRCTX_VALS;
 	ctx.data = obj;
 	nv50_grctx_init(&ctx);
 
-	nv_wo32(dev, obj, 0x00000/4, chan->ramin->instance >> 12);
+	nv_wo32(obj, 0x00000, chan->ramin->instance >> 12);
 
 	dev_priv->engine.instmem.flush(dev);
 	return 0;
@@ -252,7 +252,7 @@
 		return;
 
 	for (i = hdr; i < hdr + 24; i += 4)
-		nv_wo32(dev, chan->ramin->gpuobj, i/4, 0);
+		nv_wo32(chan->ramin->gpuobj, i, 0);
 	dev_priv->engine.instmem.flush(dev);
 
 	nouveau_gpuobj_ref_del(dev, &chan->ramin_grctx);
diff --git a/drivers/gpu/drm/nouveau/nv50_grctx.c b/drivers/gpu/drm/nouveau/nv50_grctx.c
index 42a8fb2..ba6c033 100644
--- a/drivers/gpu/drm/nouveau/nv50_grctx.c
+++ b/drivers/gpu/drm/nouveau/nv50_grctx.c
@@ -995,7 +995,7 @@
 	int i;
 	if (val && ctx->mode == NOUVEAU_GRCTX_VALS)
 		for (i = 0; i < num; i++)
-			nv_wo32(ctx->dev, ctx->data, ctx->ctxvals_pos + (i << 3), val);
+			nv_wo32(ctx->data, (ctx->ctxvals_pos + (i << 3))*4, val);
 	ctx->ctxvals_pos += num << 3;
 }
 
diff --git a/drivers/gpu/drm/nouveau/nv50_instmem.c b/drivers/gpu/drm/nouveau/nv50_instmem.c
index 91ef93c..821806c 100644
--- a/drivers/gpu/drm/nouveau/nv50_instmem.c
+++ b/drivers/gpu/drm/nouveau/nv50_instmem.c
@@ -449,9 +449,10 @@
 	}
 
 	while (pte < pte_end) {
-		nv_wo32(dev, pramin_pt, pte++, lower_32_bits(vram));
-		nv_wo32(dev, pramin_pt, pte++, upper_32_bits(vram));
+		nv_wo32(pramin_pt, (pte * 4) + 0, lower_32_bits(vram));
+		nv_wo32(pramin_pt, (pte * 4) + 4, upper_32_bits(vram));
 		vram += NV50_INSTMEM_PAGE_SIZE;
+		pte += 2;
 	}
 	dev_priv->engine.instmem.flush(dev);
 
@@ -476,8 +477,9 @@
 	pte_end = ((gpuobj->im_pramin->size >> 12) << 1) + pte;
 
 	while (pte < pte_end) {
-		nv_wo32(dev, priv->pramin_pt->gpuobj, pte++, 0x00000000);
-		nv_wo32(dev, priv->pramin_pt->gpuobj, pte++, 0x00000000);
+		nv_wo32(priv->pramin_pt->gpuobj, (pte * 4) + 0, 0x00000000);
+		nv_wo32(priv->pramin_pt->gpuobj, (pte * 4) + 4, 0x00000000);
+		pte += 2;
 	}
 	dev_priv->engine.instmem.flush(dev);