drm/nouveau/gr: switch to device pri macros

Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c
index 22bc4db..43b393f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c
@@ -1005,6 +1005,7 @@
 gf100_grctx_mmio_item(struct gf100_grctx *info, u32 addr, u32 data,
 		      int shift, int buffer)
 {
+	struct nvkm_device *device = info->gr->base.engine.subdev.device;
 	if (info->data) {
 		if (shift >= 0) {
 			info->mmio->addr = addr;
@@ -1021,7 +1022,7 @@
 			return;
 	}
 
-	nv_wr32(info->gr, addr, data);
+	nvkm_wr32(device, addr, data);
 }
 
 void
@@ -1085,20 +1086,21 @@
 void
 gf100_grctx_generate_tpcid(struct gf100_gr *gr)
 {
+	struct nvkm_device *device = gr->base.engine.subdev.device;
 	int gpc, tpc, id;
 
 	for (tpc = 0, id = 0; tpc < 4; tpc++) {
 		for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
 			if (tpc < gr->tpc_nr[gpc]) {
-				nv_wr32(gr, TPC_UNIT(gpc, tpc, 0x698), id);
-				nv_wr32(gr, TPC_UNIT(gpc, tpc, 0x4e8), id);
-				nv_wr32(gr, GPC_UNIT(gpc, 0x0c10 + tpc * 4), id);
-				nv_wr32(gr, TPC_UNIT(gpc, tpc, 0x088), id);
+				nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x698), id);
+				nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x4e8), id);
+				nvkm_wr32(device, GPC_UNIT(gpc, 0x0c10 + tpc * 4), id);
+				nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x088), id);
 				id++;
 			}
 
-			nv_wr32(gr, GPC_UNIT(gpc, 0x0c08), gr->tpc_nr[gpc]);
-			nv_wr32(gr, GPC_UNIT(gpc, 0x0c8c), gr->tpc_nr[gpc]);
+			nvkm_wr32(device, GPC_UNIT(gpc, 0x0c08), gr->tpc_nr[gpc]);
+			nvkm_wr32(device, GPC_UNIT(gpc, 0x0c8c), gr->tpc_nr[gpc]);
 		}
 	}
 }
@@ -1106,18 +1108,20 @@
 void
 gf100_grctx_generate_r406028(struct gf100_gr *gr)
 {
+	struct nvkm_device *device = gr->base.engine.subdev.device;
 	u32 tmp[GPC_MAX / 8] = {}, i = 0;
 	for (i = 0; i < gr->gpc_nr; i++)
 		tmp[i / 8] |= gr->tpc_nr[i] << ((i % 8) * 4);
 	for (i = 0; i < 4; i++) {
-		nv_wr32(gr, 0x406028 + (i * 4), tmp[i]);
-		nv_wr32(gr, 0x405870 + (i * 4), tmp[i]);
+		nvkm_wr32(device, 0x406028 + (i * 4), tmp[i]);
+		nvkm_wr32(device, 0x405870 + (i * 4), tmp[i]);
 	}
 }
 
 void
 gf100_grctx_generate_r4060a8(struct gf100_gr *gr)
 {
+	struct nvkm_device *device = gr->base.engine.subdev.device;
 	u8  tpcnr[GPC_MAX], data[TPC_MAX];
 	int gpc, tpc, i;
 
@@ -1134,12 +1138,13 @@
 	}
 
 	for (i = 0; i < 4; i++)
-		nv_wr32(gr, 0x4060a8 + (i * 4), ((u32 *)data)[i]);
+		nvkm_wr32(device, 0x4060a8 + (i * 4), ((u32 *)data)[i]);
 }
 
 void
 gf100_grctx_generate_r418bb8(struct gf100_gr *gr)
 {
+	struct nvkm_device *device = gr->base.engine.subdev.device;
 	u32 data[6] = {}, data2[2] = {};
 	u8  tpcnr[GPC_MAX];
 	u8  shift, ntpcv;
@@ -1176,28 +1181,29 @@
 		data2[1] |= ((1 << (i + 5)) % ntpcv) << ((i - 1) * 5);
 
 	/* GPC_BROADCAST */
-	nv_wr32(gr, 0x418bb8, (gr->tpc_total << 8) |
+	nvkm_wr32(device, 0x418bb8, (gr->tpc_total << 8) |
 				 gr->magic_not_rop_nr);
 	for (i = 0; i < 6; i++)
-		nv_wr32(gr, 0x418b08 + (i * 4), data[i]);
+		nvkm_wr32(device, 0x418b08 + (i * 4), data[i]);
 
 	/* GPC_BROADCAST.TP_BROADCAST */
-	nv_wr32(gr, 0x419bd0, (gr->tpc_total << 8) |
+	nvkm_wr32(device, 0x419bd0, (gr->tpc_total << 8) |
 				 gr->magic_not_rop_nr | data2[0]);
-	nv_wr32(gr, 0x419be4, data2[1]);
+	nvkm_wr32(device, 0x419be4, data2[1]);
 	for (i = 0; i < 6; i++)
-		nv_wr32(gr, 0x419b00 + (i * 4), data[i]);
+		nvkm_wr32(device, 0x419b00 + (i * 4), data[i]);
 
 	/* UNK78xx */
-	nv_wr32(gr, 0x4078bc, (gr->tpc_total << 8) |
+	nvkm_wr32(device, 0x4078bc, (gr->tpc_total << 8) |
 				 gr->magic_not_rop_nr);
 	for (i = 0; i < 6; i++)
-		nv_wr32(gr, 0x40780c + (i * 4), data[i]);
+		nvkm_wr32(device, 0x40780c + (i * 4), data[i]);
 }
 
 void
 gf100_grctx_generate_r406800(struct gf100_gr *gr)
 {
+	struct nvkm_device *device = gr->base.engine.subdev.device;
 	u64 tpc_mask = 0, tpc_set = 0;
 	u8  tpcnr[GPC_MAX];
 	int gpc, tpc;
@@ -1219,11 +1225,11 @@
 			tpc_set |= 1ULL << ((gpc * 8) + tpc);
 		}
 
-		nv_wr32(gr, 0x406800 + (i * 0x20), lower_32_bits(tpc_set));
-		nv_wr32(gr, 0x406c00 + (i * 0x20), lower_32_bits(tpc_set ^ tpc_mask));
+		nvkm_wr32(device, 0x406800 + (i * 0x20), lower_32_bits(tpc_set));
+		nvkm_wr32(device, 0x406c00 + (i * 0x20), lower_32_bits(tpc_set ^ tpc_mask));
 		if (gr->gpc_nr > 4) {
-			nv_wr32(gr, 0x406804 + (i * 0x20), upper_32_bits(tpc_set));
-			nv_wr32(gr, 0x406c04 + (i * 0x20), upper_32_bits(tpc_set ^ tpc_mask));
+			nvkm_wr32(device, 0x406804 + (i * 0x20), upper_32_bits(tpc_set));
+			nvkm_wr32(device, 0x406c04 + (i * 0x20), upper_32_bits(tpc_set ^ tpc_mask));
 		}
 	}
 }
@@ -1231,6 +1237,7 @@
 void
 gf100_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
 {
+	struct nvkm_device *device = gr->base.engine.subdev.device;
 	struct gf100_grctx_oclass *oclass = (void *)nv_engine(gr)->cclass;
 
 	nvkm_mc(gr)->unk260(nvkm_mc(gr), 0);
@@ -1241,7 +1248,7 @@
 	gf100_gr_mmio(gr, oclass->tpc);
 	gf100_gr_mmio(gr, oclass->ppc);
 
-	nv_wr32(gr, 0x404154, 0x00000000);
+	nvkm_wr32(device, 0x404154, 0x00000000);
 
 	oclass->bundle(info);
 	oclass->pagepool(info);
@@ -1255,7 +1262,7 @@
 	gf100_grctx_generate_r406800(gr);
 
 	gf100_gr_icmd(gr, oclass->icmd);
-	nv_wr32(gr, 0x404154, 0x00000400);
+	nvkm_wr32(device, 0x404154, 0x00000400);
 	gf100_gr_mthd(gr, oclass->mthd);
 	nvkm_mc(gr)->unk260(nvkm_mc(gr), 1);
 }
@@ -1264,7 +1271,8 @@
 gf100_grctx_generate(struct gf100_gr *gr)
 {
 	struct gf100_grctx_oclass *oclass = (void *)nv_engine(gr)->cclass;
-	struct nvkm_bar *bar = nvkm_bar(gr);
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	struct nvkm_bar *bar = device->bar;
 	struct nvkm_gpuobj *chan;
 	struct gf100_grctx info;
 	int ret, i;
@@ -1302,8 +1310,8 @@
 
 	bar->flush(bar);
 
-	nv_wr32(gr, 0x100cb8, (chan->addr + 0x1000) >> 8);
-	nv_wr32(gr, 0x100cbc, 0x80000001);
+	nvkm_wr32(device, 0x100cb8, (chan->addr + 0x1000) >> 8);
+	nvkm_wr32(device, 0x100cbc, 0x80000001);
 	nv_wait(gr, 0x100c80, 0x00008000, 0x00008000);
 
 	/* setup default state for mmio list construction */
@@ -1315,9 +1323,9 @@
 
 	/* make channel current */
 	if (gr->firmware) {
-		nv_wr32(gr, 0x409840, 0x00000030);
-		nv_wr32(gr, 0x409500, 0x80000000 | chan->addr >> 12);
-		nv_wr32(gr, 0x409504, 0x00000003);
+		nvkm_wr32(device, 0x409840, 0x00000030);
+		nvkm_wr32(device, 0x409500, 0x80000000 | chan->addr >> 12);
+		nvkm_wr32(device, 0x409504, 0x00000003);
 		if (!nv_wait(gr, 0x409800, 0x00000010, 0x00000010))
 			nv_error(gr, "load_ctx timeout\n");
 
@@ -1327,9 +1335,9 @@
 		nv_wo32(chan, 0x8002c, 0);
 		bar->flush(bar);
 	} else {
-		nv_wr32(gr, 0x409840, 0x80000000);
-		nv_wr32(gr, 0x409500, 0x80000000 | chan->addr >> 12);
-		nv_wr32(gr, 0x409504, 0x00000001);
+		nvkm_wr32(device, 0x409840, 0x80000000);
+		nvkm_wr32(device, 0x409500, 0x80000000 | chan->addr >> 12);
+		nvkm_wr32(device, 0x409504, 0x00000001);
 		if (!nv_wait(gr, 0x409800, 0x80000000, 0x80000000))
 			nv_error(gr, "HUB_SET_CHAN timeout\n");
 	}
@@ -1339,8 +1347,8 @@
 	/* trigger a context unload by unsetting the "next channel valid" bit
 	 * and faking a context switch interrupt
 	 */
-	nv_mask(gr, 0x409b04, 0x80000000, 0x00000000);
-	nv_wr32(gr, 0x409000, 0x00000100);
+	nvkm_mask(device, 0x409b04, 0x80000000, 0x00000000);
+	nvkm_wr32(device, 0x409000, 0x00000100);
 	if (!nv_wait(gr, 0x409b00, 0x80000000, 0x00000000)) {
 		nv_error(gr, "grctx template channel unload timeout\n");
 		ret = -EBUSY;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf108.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf108.c
index d810a0b..caccfed 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf108.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf108.c
@@ -767,12 +767,13 @@
 void
 gf108_grctx_generate_unkn(struct gf100_gr *gr)
 {
-	nv_mask(gr, 0x418c6c, 0x00000001, 0x00000001);
-	nv_mask(gr, 0x41980c, 0x00000010, 0x00000010);
-	nv_mask(gr, 0x419814, 0x00000004, 0x00000004);
-	nv_mask(gr, 0x4064c0, 0x80000000, 0x80000000);
-	nv_mask(gr, 0x405800, 0x08000000, 0x08000000);
-	nv_mask(gr, 0x419c00, 0x00000008, 0x00000008);
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	nvkm_mask(device, 0x418c6c, 0x00000001, 0x00000001);
+	nvkm_mask(device, 0x41980c, 0x00000010, 0x00000010);
+	nvkm_mask(device, 0x419814, 0x00000004, 0x00000004);
+	nvkm_mask(device, 0x4064c0, 0x80000000, 0x80000000);
+	nvkm_mask(device, 0x405800, 0x08000000, 0x08000000);
+	nvkm_mask(device, 0x419c00, 0x00000008, 0x00000008);
 }
 
 struct nvkm_oclass *
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c
index 7970b9d..78f6be2 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c
@@ -219,6 +219,7 @@
 void
 gf117_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
 {
+	struct nvkm_device *device = gr->base.engine.subdev.device;
 	struct gf100_grctx_oclass *oclass = (void *)nv_engine(gr)->cclass;
 	int i;
 
@@ -230,7 +231,7 @@
 	gf100_gr_mmio(gr, oclass->tpc);
 	gf100_gr_mmio(gr, oclass->ppc);
 
-	nv_wr32(gr, 0x404154, 0x00000000);
+	nvkm_wr32(device, 0x404154, 0x00000000);
 
 	oclass->bundle(info);
 	oclass->pagepool(info);
@@ -244,10 +245,10 @@
 	gf100_grctx_generate_r406800(gr);
 
 	for (i = 0; i < 8; i++)
-		nv_wr32(gr, 0x4064d0 + (i * 0x04), 0x00000000);
+		nvkm_wr32(device, 0x4064d0 + (i * 0x04), 0x00000000);
 
 	gf100_gr_icmd(gr, oclass->icmd);
-	nv_wr32(gr, 0x404154, 0x00000400);
+	nvkm_wr32(device, 0x404154, 0x00000400);
 	gf100_gr_mthd(gr, oclass->mthd);
 	nvkm_mc(gr)->unk260(nvkm_mc(gr), 1);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c
index 7b2a96c..0365aca 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c
@@ -874,17 +874,19 @@
 void
 gk104_grctx_generate_unkn(struct gf100_gr *gr)
 {
-	nv_mask(gr, 0x418c6c, 0x00000001, 0x00000001);
-	nv_mask(gr, 0x41980c, 0x00000010, 0x00000010);
-	nv_mask(gr, 0x41be08, 0x00000004, 0x00000004);
-	nv_mask(gr, 0x4064c0, 0x80000000, 0x80000000);
-	nv_mask(gr, 0x405800, 0x08000000, 0x08000000);
-	nv_mask(gr, 0x419c00, 0x00000008, 0x00000008);
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	nvkm_mask(device, 0x418c6c, 0x00000001, 0x00000001);
+	nvkm_mask(device, 0x41980c, 0x00000010, 0x00000010);
+	nvkm_mask(device, 0x41be08, 0x00000004, 0x00000004);
+	nvkm_mask(device, 0x4064c0, 0x80000000, 0x80000000);
+	nvkm_mask(device, 0x405800, 0x08000000, 0x08000000);
+	nvkm_mask(device, 0x419c00, 0x00000008, 0x00000008);
 }
 
 void
 gk104_grctx_generate_r418bb8(struct gf100_gr *gr)
 {
+	struct nvkm_device *device = gr->base.engine.subdev.device;
 	u32 data[6] = {}, data2[2] = {};
 	u8  tpcnr[GPC_MAX];
 	u8  shift, ntpcv;
@@ -921,36 +923,38 @@
 		data2[1] |= ((1 << (i + 5)) % ntpcv) << ((i - 1) * 5);
 
 	/* GPC_BROADCAST */
-	nv_wr32(gr, 0x418bb8, (gr->tpc_total << 8) |
+	nvkm_wr32(device, 0x418bb8, (gr->tpc_total << 8) |
 				 gr->magic_not_rop_nr);
 	for (i = 0; i < 6; i++)
-		nv_wr32(gr, 0x418b08 + (i * 4), data[i]);
+		nvkm_wr32(device, 0x418b08 + (i * 4), data[i]);
 
 	/* GPC_BROADCAST.TP_BROADCAST */
-	nv_wr32(gr, 0x41bfd0, (gr->tpc_total << 8) |
+	nvkm_wr32(device, 0x41bfd0, (gr->tpc_total << 8) |
 				 gr->magic_not_rop_nr | data2[0]);
-	nv_wr32(gr, 0x41bfe4, data2[1]);
+	nvkm_wr32(device, 0x41bfe4, data2[1]);
 	for (i = 0; i < 6; i++)
-		nv_wr32(gr, 0x41bf00 + (i * 4), data[i]);
+		nvkm_wr32(device, 0x41bf00 + (i * 4), data[i]);
 
 	/* UNK78xx */
-	nv_wr32(gr, 0x4078bc, (gr->tpc_total << 8) |
+	nvkm_wr32(device, 0x4078bc, (gr->tpc_total << 8) |
 				 gr->magic_not_rop_nr);
 	for (i = 0; i < 6; i++)
-		nv_wr32(gr, 0x40780c + (i * 4), data[i]);
+		nvkm_wr32(device, 0x40780c + (i * 4), data[i]);
 }
 
 void
 gk104_grctx_generate_rop_active_fbps(struct gf100_gr *gr)
 {
-	const u32 fbp_count = nv_rd32(gr, 0x120074);
-	nv_mask(gr, 0x408850, 0x0000000f, fbp_count); /* zrop */
-	nv_mask(gr, 0x408958, 0x0000000f, fbp_count); /* crop */
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	const u32 fbp_count = nvkm_rd32(device, 0x120074);
+	nvkm_mask(device, 0x408850, 0x0000000f, fbp_count); /* zrop */
+	nvkm_mask(device, 0x408958, 0x0000000f, fbp_count); /* crop */
 }
 
 void
 gk104_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
 {
+	struct nvkm_device *device = gr->base.engine.subdev.device;
 	struct gf100_grctx_oclass *oclass = (void *)nv_engine(gr)->cclass;
 	int i;
 
@@ -962,7 +966,7 @@
 	gf100_gr_mmio(gr, oclass->tpc);
 	gf100_gr_mmio(gr, oclass->ppc);
 
-	nv_wr32(gr, 0x404154, 0x00000000);
+	nvkm_wr32(device, 0x404154, 0x00000000);
 
 	oclass->bundle(info);
 	oclass->pagepool(info);
@@ -975,19 +979,19 @@
 	gf100_grctx_generate_r406800(gr);
 
 	for (i = 0; i < 8; i++)
-		nv_wr32(gr, 0x4064d0 + (i * 0x04), 0x00000000);
+		nvkm_wr32(device, 0x4064d0 + (i * 0x04), 0x00000000);
 
-	nv_wr32(gr, 0x405b00, (gr->tpc_total << 8) | gr->gpc_nr);
+	nvkm_wr32(device, 0x405b00, (gr->tpc_total << 8) | gr->gpc_nr);
 	gk104_grctx_generate_rop_active_fbps(gr);
-	nv_mask(gr, 0x419f78, 0x00000001, 0x00000000);
+	nvkm_mask(device, 0x419f78, 0x00000001, 0x00000000);
 
 	gf100_gr_icmd(gr, oclass->icmd);
-	nv_wr32(gr, 0x404154, 0x00000400);
+	nvkm_wr32(device, 0x404154, 0x00000400);
 	gf100_gr_mthd(gr, oclass->mthd);
 	nvkm_mc(gr)->unk260(nvkm_mc(gr), 1);
 
-	nv_mask(gr, 0x418800, 0x00200000, 0x00200000);
-	nv_mask(gr, 0x41be10, 0x00800000, 0x00800000);
+	nvkm_mask(device, 0x418800, 0x00200000, 0x00200000);
+	nvkm_mask(device, 0x41be10, 0x00800000, 0x00800000);
 }
 
 struct nvkm_oclass *
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk20a.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk20a.c
index 91e4aac..252bcc3 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk20a.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk20a.c
@@ -28,6 +28,7 @@
 static void
 gk20a_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
 {
+	struct nvkm_device *device = gr->base.engine.subdev.device;
 	struct gf100_grctx_oclass *oclass = (void *)nv_engine(gr)->cclass;
 	int idle_timeout_save;
 	int i;
@@ -36,8 +37,8 @@
 
 	gf100_gr_wait_idle(gr);
 
-	idle_timeout_save = nv_rd32(gr, 0x404154);
-	nv_wr32(gr, 0x404154, 0x00000000);
+	idle_timeout_save = nvkm_rd32(device, 0x404154);
+	nvkm_wr32(device, 0x404154, 0x00000000);
 
 	oclass->attrib(info);
 
@@ -49,17 +50,17 @@
 	gf100_grctx_generate_r406800(gr);
 
 	for (i = 0; i < 8; i++)
-		nv_wr32(gr, 0x4064d0 + (i * 0x04), 0x00000000);
+		nvkm_wr32(device, 0x4064d0 + (i * 0x04), 0x00000000);
 
-	nv_wr32(gr, 0x405b00, (gr->tpc_total << 8) | gr->gpc_nr);
+	nvkm_wr32(device, 0x405b00, (gr->tpc_total << 8) | gr->gpc_nr);
 
 	gk104_grctx_generate_rop_active_fbps(gr);
 
-	nv_mask(gr, 0x5044b0, 0x8000000, 0x8000000);
+	nvkm_mask(device, 0x5044b0, 0x8000000, 0x8000000);
 
 	gf100_gr_wait_idle(gr);
 
-	nv_wr32(gr, 0x404154, idle_timeout_save);
+	nvkm_wr32(device, 0x404154, idle_timeout_save);
 	gf100_gr_wait_idle(gr);
 
 	gf100_gr_mthd(gr, gr->fuc_method);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c
index 0d908a1..9c361ee 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c
@@ -934,19 +934,20 @@
 void
 gm107_grctx_generate_tpcid(struct gf100_gr *gr)
 {
+	struct nvkm_device *device = gr->base.engine.subdev.device;
 	int gpc, tpc, id;
 
 	for (tpc = 0, id = 0; tpc < 4; tpc++) {
 		for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
 			if (tpc < gr->tpc_nr[gpc]) {
-				nv_wr32(gr, TPC_UNIT(gpc, tpc, 0x698), id);
-				nv_wr32(gr, GPC_UNIT(gpc, 0x0c10 + tpc * 4), id);
-				nv_wr32(gr, TPC_UNIT(gpc, tpc, 0x088), id);
+				nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x698), id);
+				nvkm_wr32(device, GPC_UNIT(gpc, 0x0c10 + tpc * 4), id);
+				nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x088), id);
 				id++;
 			}
 
-			nv_wr32(gr, GPC_UNIT(gpc, 0x0c08), gr->tpc_nr[gpc]);
-			nv_wr32(gr, GPC_UNIT(gpc, 0x0c8c), gr->tpc_nr[gpc]);
+			nvkm_wr32(device, GPC_UNIT(gpc, 0x0c08), gr->tpc_nr[gpc]);
+			nvkm_wr32(device, GPC_UNIT(gpc, 0x0c8c), gr->tpc_nr[gpc]);
 		}
 	}
 }
@@ -954,6 +955,7 @@
 static void
 gm107_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
 {
+	struct nvkm_device *device = gr->base.engine.subdev.device;
 	struct gf100_grctx_oclass *oclass = (void *)nv_engine(gr)->cclass;
 	int i;
 
@@ -963,7 +965,7 @@
 	gf100_gr_mmio(gr, oclass->tpc);
 	gf100_gr_mmio(gr, oclass->ppc);
 
-	nv_wr32(gr, 0x404154, 0x00000000);
+	nvkm_wr32(device, 0x404154, 0x00000000);
 
 	oclass->bundle(info);
 	oclass->pagepool(info);
@@ -975,23 +977,23 @@
 	gk104_grctx_generate_r418bb8(gr);
 	gf100_grctx_generate_r406800(gr);
 
-	nv_wr32(gr, 0x4064d0, 0x00000001);
+	nvkm_wr32(device, 0x4064d0, 0x00000001);
 	for (i = 1; i < 8; i++)
-		nv_wr32(gr, 0x4064d0 + (i * 0x04), 0x00000000);
-	nv_wr32(gr, 0x406500, 0x00000001);
+		nvkm_wr32(device, 0x4064d0 + (i * 0x04), 0x00000000);
+	nvkm_wr32(device, 0x406500, 0x00000001);
 
-	nv_wr32(gr, 0x405b00, (gr->tpc_total << 8) | gr->gpc_nr);
+	nvkm_wr32(device, 0x405b00, (gr->tpc_total << 8) | gr->gpc_nr);
 
 	gk104_grctx_generate_rop_active_fbps(gr);
 
 	gf100_gr_icmd(gr, oclass->icmd);
-	nv_wr32(gr, 0x404154, 0x00000400);
+	nvkm_wr32(device, 0x404154, 0x00000400);
 	gf100_gr_mthd(gr, oclass->mthd);
 
-	nv_mask(gr, 0x419e00, 0x00808080, 0x00808080);
-	nv_mask(gr, 0x419ccc, 0x80000000, 0x80000000);
-	nv_mask(gr, 0x419f80, 0x80000000, 0x80000000);
-	nv_mask(gr, 0x419f88, 0x80000000, 0x80000000);
+	nvkm_mask(device, 0x419e00, 0x00808080, 0x00808080);
+	nvkm_mask(device, 0x419ccc, 0x80000000, 0x80000000);
+	nvkm_mask(device, 0x419f80, 0x80000000, 0x80000000);
+	nvkm_mask(device, 0x419f88, 0x80000000, 0x80000000);
 }
 
 struct nvkm_oclass *
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm204.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm204.c
index 93f38bd..f8c2432 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm204.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm204.c
@@ -921,14 +921,15 @@
 void
 gm204_grctx_generate_tpcid(struct gf100_gr *gr)
 {
+	struct nvkm_device *device = gr->base.engine.subdev.device;
 	int gpc, tpc, id;
 
 	for (tpc = 0, id = 0; tpc < 4; tpc++) {
 		for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
 			if (tpc < gr->tpc_nr[gpc]) {
-				nv_wr32(gr, TPC_UNIT(gpc, tpc, 0x698), id);
-				nv_wr32(gr, GPC_UNIT(gpc, 0x0c10 + tpc * 4), id);
-				nv_wr32(gr, TPC_UNIT(gpc, tpc, 0x088), id);
+				nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x698), id);
+				nvkm_wr32(device, GPC_UNIT(gpc, 0x0c10 + tpc * 4), id);
+				nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x088), id);
 				id++;
 			}
 		}
@@ -938,14 +939,16 @@
 static void
 gm204_grctx_generate_rop_active_fbps(struct gf100_gr *gr)
 {
-	const u32 fbp_count = nv_rd32(gr, 0x12006c);
-	nv_mask(gr, 0x408850, 0x0000000f, fbp_count); /* zrop */
-	nv_mask(gr, 0x408958, 0x0000000f, fbp_count); /* crop */
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	const u32 fbp_count = nvkm_rd32(device, 0x12006c);
+	nvkm_mask(device, 0x408850, 0x0000000f, fbp_count); /* zrop */
+	nvkm_mask(device, 0x408958, 0x0000000f, fbp_count); /* crop */
 }
 
 void
 gm204_grctx_generate_405b60(struct gf100_gr *gr)
 {
+	struct nvkm_device *device = gr->base.engine.subdev.device;
 	const u32 dist_nr = DIV_ROUND_UP(gr->tpc_total, 4);
 	u32 dist[TPC_MAX / 4] = {};
 	u32 gpcs[GPC_MAX] = {};
@@ -969,14 +972,15 @@
 	}
 
 	for (i = 0; i < dist_nr; i++)
-		nv_wr32(gr, 0x405b60 + (i * 4), dist[i]);
+		nvkm_wr32(device, 0x405b60 + (i * 4), dist[i]);
 	for (i = 0; i < gr->gpc_nr; i++)
-		nv_wr32(gr, 0x405ba0 + (i * 4), gpcs[i]);
+		nvkm_wr32(device, 0x405ba0 + (i * 4), gpcs[i]);
 }
 
 void
 gm204_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
 {
+	struct nvkm_device *device = gr->base.engine.subdev.device;
 	struct gf100_grctx_oclass *oclass = (void *)nv_engine(gr)->cclass;
 	u32 tmp;
 	int i;
@@ -987,7 +991,7 @@
 	gf100_gr_mmio(gr, oclass->tpc);
 	gf100_gr_mmio(gr, oclass->ppc);
 
-	nv_wr32(gr, 0x404154, 0x00000000);
+	nvkm_wr32(device, 0x404154, 0x00000000);
 
 	oclass->bundle(info);
 	oclass->pagepool(info);
@@ -999,25 +1003,25 @@
 	gk104_grctx_generate_r418bb8(gr);
 
 	for (i = 0; i < 8; i++)
-		nv_wr32(gr, 0x4064d0 + (i * 0x04), 0x00000000);
-	nv_wr32(gr, 0x406500, 0x00000000);
+		nvkm_wr32(device, 0x4064d0 + (i * 0x04), 0x00000000);
+	nvkm_wr32(device, 0x406500, 0x00000000);
 
-	nv_wr32(gr, 0x405b00, (gr->tpc_total << 8) | gr->gpc_nr);
+	nvkm_wr32(device, 0x405b00, (gr->tpc_total << 8) | gr->gpc_nr);
 
 	gm204_grctx_generate_rop_active_fbps(gr);
 
 	for (tmp = 0, i = 0; i < gr->gpc_nr; i++)
 		tmp |= ((1 << gr->tpc_nr[i]) - 1) << (i * 4);
-	nv_wr32(gr, 0x4041c4, tmp);
+	nvkm_wr32(device, 0x4041c4, tmp);
 
 	gm204_grctx_generate_405b60(gr);
 
 	gf100_gr_icmd(gr, oclass->icmd);
-	nv_wr32(gr, 0x404154, 0x00000800);
+	nvkm_wr32(device, 0x404154, 0x00000800);
 	gf100_gr_mthd(gr, oclass->mthd);
 
-	nv_mask(gr, 0x418e94, 0xffffffff, 0xc4230000);
-	nv_mask(gr, 0x418e4c, 0xffffffff, 0x70000000);
+	nvkm_mask(device, 0x418e94, 0xffffffff, 0xc4230000);
+	nvkm_mask(device, 0x418e4c, 0xffffffff, 0x70000000);
 }
 
 struct nvkm_oclass *
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm20b.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm20b.c
index c44b2e1..5f5affc 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm20b.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm20b.c
@@ -24,19 +24,21 @@
 static void
 gm20b_grctx_generate_r406028(struct gf100_gr *gr)
 {
+	struct nvkm_device *device = gr->base.engine.subdev.device;
 	u32 tpc_per_gpc = 0;
 	int i;
 
 	for (i = 0; i < gr->gpc_nr; i++)
 		tpc_per_gpc |= gr->tpc_nr[i] << (4 * i);
 
-	nv_wr32(gr, 0x406028, tpc_per_gpc);
-	nv_wr32(gr, 0x405870, tpc_per_gpc);
+	nvkm_wr32(device, 0x406028, tpc_per_gpc);
+	nvkm_wr32(device, 0x405870, tpc_per_gpc);
 }
 
 static void
 gm20b_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
 {
+	struct nvkm_device *device = gr->base.engine.subdev.device;
 	struct gf100_grctx_oclass *oclass = (void *)nv_engine(gr)->cclass;
 	int idle_timeout_save;
 	int i, tmp;
@@ -45,8 +47,8 @@
 
 	gf100_gr_wait_idle(gr);
 
-	idle_timeout_save = nv_rd32(gr, 0x404154);
-	nv_wr32(gr, 0x404154, 0x00000000);
+	idle_timeout_save = nvkm_rd32(device, 0x404154);
+	nvkm_wr32(device, 0x404154, 0x00000000);
 
 	oclass->attrib(info);
 
@@ -57,22 +59,22 @@
 	gk104_grctx_generate_r418bb8(gr);
 
 	for (i = 0; i < 8; i++)
-		nv_wr32(gr, 0x4064d0 + (i * 0x04), 0x00000000);
+		nvkm_wr32(device, 0x4064d0 + (i * 0x04), 0x00000000);
 
-	nv_wr32(gr, 0x405b00, (gr->tpc_total << 8) | gr->gpc_nr);
+	nvkm_wr32(device, 0x405b00, (gr->tpc_total << 8) | gr->gpc_nr);
 
 	gk104_grctx_generate_rop_active_fbps(gr);
-	nv_wr32(gr, 0x408908, nv_rd32(gr, 0x410108) | 0x80000000);
+	nvkm_wr32(device, 0x408908, nvkm_rd32(device, 0x410108) | 0x80000000);
 
 	for (tmp = 0, i = 0; i < gr->gpc_nr; i++)
 		tmp |= ((1 << gr->tpc_nr[i]) - 1) << (i * 4);
-	nv_wr32(gr, 0x4041c4, tmp);
+	nvkm_wr32(device, 0x4041c4, tmp);
 
 	gm204_grctx_generate_405b60(gr);
 
 	gf100_gr_wait_idle(gr);
 
-	nv_wr32(gr, 0x404154, idle_timeout_save);
+	nvkm_wr32(device, 0x404154, idle_timeout_save);
 	gf100_gr_wait_idle(gr);
 
 	gf100_gr_mthd(gr, gr->fuc_method);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxnv40.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxnv40.c
index a3b0b36..0c71708 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxnv40.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxnv40.c
@@ -683,9 +683,9 @@
 
 	nv40_grctx_generate(&ctx);
 
-	nv_wr32(device, 0x400324, 0);
+	nvkm_wr32(device, 0x400324, 0);
 	for (i = 0; i < ctx.ctxprog_len; i++)
-		nv_wr32(device, 0x400328, ctxprog[i]);
+		nvkm_wr32(device, 0x400328, ctxprog[i]);
 	*size = ctx.ctxvals_pos * 4;
 
 	kfree(ctxprog);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxnv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxnv50.c
index a9a4e0e..e76bf4a 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxnv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxnv50.c
@@ -276,9 +276,9 @@
 		return -ENOMEM;
 	nv50_grctx_generate(&ctx);
 
-	nv_wr32(device, 0x400324, 0);
+	nvkm_wr32(device, 0x400324, 0);
 	for (i = 0; i < ctx.ctxprog_len; i++)
-		nv_wr32(device, 0x400328, ctxprog[i]);
+		nvkm_wr32(device, 0x400328, ctxprog[i]);
 	*size = ctx.ctxvals_pos * 4;
 	kfree(ctxprog);
 	return 0;
@@ -298,7 +298,7 @@
 	struct nvkm_device *device = ctx->device;
 	int i, j;
 	int offset, base;
-	u32 units = nv_rd32 (ctx->device, 0x1540);
+	u32 units = nvkm_rd32(device, 0x1540);
 
 	/* 0800: DISPATCH */
 	cp_ctx(ctx, 0x400808, 7);
@@ -1189,7 +1189,7 @@
 	int i;
 	int offset;
 	int size = 0;
-	u32 units = nv_rd32 (ctx->device, 0x1540);
+	u32 units = nvkm_rd32(device, 0x1540);
 
 	offset = (ctx->ctxvals_pos+0x3f)&~0x3f;
 	ctx->ctxvals_base = offset;
@@ -3272,7 +3272,7 @@
 	struct nvkm_device *device = ctx->device;
 	int i;
 	u32 offset;
-	u32 units = nv_rd32 (ctx->device, 0x1540);
+	u32 units = nvkm_rd32(device, 0x1540);
 	int size = 0;
 
 	offset = (ctx->ctxvals_pos+0x3f)&~0x3f;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
index c1b84a6..b692e8e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
@@ -43,15 +43,16 @@
 static void
 gf100_gr_zbc_clear_color(struct gf100_gr *gr, int zbc)
 {
+	struct nvkm_device *device = gr->base.engine.subdev.device;
 	if (gr->zbc_color[zbc].format) {
-		nv_wr32(gr, 0x405804, gr->zbc_color[zbc].ds[0]);
-		nv_wr32(gr, 0x405808, gr->zbc_color[zbc].ds[1]);
-		nv_wr32(gr, 0x40580c, gr->zbc_color[zbc].ds[2]);
-		nv_wr32(gr, 0x405810, gr->zbc_color[zbc].ds[3]);
+		nvkm_wr32(device, 0x405804, gr->zbc_color[zbc].ds[0]);
+		nvkm_wr32(device, 0x405808, gr->zbc_color[zbc].ds[1]);
+		nvkm_wr32(device, 0x40580c, gr->zbc_color[zbc].ds[2]);
+		nvkm_wr32(device, 0x405810, gr->zbc_color[zbc].ds[3]);
 	}
-	nv_wr32(gr, 0x405814, gr->zbc_color[zbc].format);
-	nv_wr32(gr, 0x405820, zbc);
-	nv_wr32(gr, 0x405824, 0x00000004); /* TRIGGER | WRITE | COLOR */
+	nvkm_wr32(device, 0x405814, gr->zbc_color[zbc].format);
+	nvkm_wr32(device, 0x405820, zbc);
+	nvkm_wr32(device, 0x405824, 0x00000004); /* TRIGGER | WRITE | COLOR */
 }
 
 static int
@@ -93,11 +94,12 @@
 static void
 gf100_gr_zbc_clear_depth(struct gf100_gr *gr, int zbc)
 {
+	struct nvkm_device *device = gr->base.engine.subdev.device;
 	if (gr->zbc_depth[zbc].format)
-		nv_wr32(gr, 0x405818, gr->zbc_depth[zbc].ds);
-	nv_wr32(gr, 0x40581c, gr->zbc_depth[zbc].format);
-	nv_wr32(gr, 0x405820, zbc);
-	nv_wr32(gr, 0x405824, 0x00000005); /* TRIGGER | WRITE | DEPTH */
+		nvkm_wr32(device, 0x405818, gr->zbc_depth[zbc].ds);
+	nvkm_wr32(device, 0x40581c, gr->zbc_depth[zbc].format);
+	nvkm_wr32(device, 0x405820, zbc);
+	nvkm_wr32(device, 0x405824, 0x00000005); /* TRIGGER | WRITE | DEPTH */
 }
 
 static int
@@ -236,10 +238,11 @@
 			       void *pdata, u32 size)
 {
 	struct gf100_gr *gr = (void *)object->engine;
+	struct nvkm_device *device = gr->base.engine.subdev.device;
 	if (size >= sizeof(u32)) {
 		u32 data = *(u32 *)pdata ? 0xffffffff : 0x00000000;
-		nv_wr32(gr, 0x419e44, data);
-		nv_wr32(gr, 0x419e4c, data);
+		nvkm_wr32(device, 0x419e44, data);
+		nvkm_wr32(device, 0x419e4c, data);
 		return 0;
 	}
 	return -EINVAL;
@@ -670,6 +673,7 @@
 int
 gf100_gr_wait_idle(struct gf100_gr *gr)
 {
+	struct nvkm_device *device = gr->base.engine.subdev.device;
 	unsigned long end_jiffies = jiffies + msecs_to_jiffies(2000);
 	bool gr_enabled, ctxsw_active, gr_busy;
 
@@ -678,11 +682,11 @@
 		 * required to make sure FIFO_ENGINE_STATUS (0x2640) is
 		 * up-to-date
 		 */
-		nv_rd32(gr, 0x400700);
+		nvkm_rd32(device, 0x400700);
 
-		gr_enabled = nv_rd32(gr, 0x200) & 0x1000;
-		ctxsw_active = nv_rd32(gr, 0x2640) & 0x8000;
-		gr_busy = nv_rd32(gr, 0x40060c) & 0x1;
+		gr_enabled = nvkm_rd32(device, 0x200) & 0x1000;
+		ctxsw_active = nvkm_rd32(device, 0x2640) & 0x8000;
+		gr_busy = nvkm_rd32(device, 0x40060c) & 0x1;
 
 		if (!gr_enabled || (!gr_busy && !ctxsw_active))
 			return 0;
@@ -696,6 +700,7 @@
 void
 gf100_gr_mmio(struct gf100_gr *gr, const struct gf100_gr_pack *p)
 {
+	struct nvkm_device *device = gr->base.engine.subdev.device;
 	const struct gf100_gr_pack *pack;
 	const struct gf100_gr_init *init;
 
@@ -703,7 +708,7 @@
 		u32 next = init->addr + init->count * init->pitch;
 		u32 addr = init->addr;
 		while (addr < next) {
-			nv_wr32(gr, addr, init->data);
+			nvkm_wr32(device, addr, init->data);
 			addr += init->pitch;
 		}
 	}
@@ -712,23 +717,24 @@
 void
 gf100_gr_icmd(struct gf100_gr *gr, const struct gf100_gr_pack *p)
 {
+	struct nvkm_device *device = gr->base.engine.subdev.device;
 	const struct gf100_gr_pack *pack;
 	const struct gf100_gr_init *init;
 	u32 data = 0;
 
-	nv_wr32(gr, 0x400208, 0x80000000);
+	nvkm_wr32(device, 0x400208, 0x80000000);
 
 	pack_for_each_init(init, pack, p) {
 		u32 next = init->addr + init->count * init->pitch;
 		u32 addr = init->addr;
 
 		if ((pack == p && init == p->init) || data != init->data) {
-			nv_wr32(gr, 0x400204, init->data);
+			nvkm_wr32(device, 0x400204, init->data);
 			data = init->data;
 		}
 
 		while (addr < next) {
-			nv_wr32(gr, 0x400200, addr);
+			nvkm_wr32(device, 0x400200, addr);
 			/**
 			 * Wait for GR to go idle after submitting a
 			 * GO_IDLE bundle
@@ -740,12 +746,13 @@
 		}
 	}
 
-	nv_wr32(gr, 0x400208, 0x00000000);
+	nvkm_wr32(device, 0x400208, 0x00000000);
 }
 
 void
 gf100_gr_mthd(struct gf100_gr *gr, const struct gf100_gr_pack *p)
 {
+	struct nvkm_device *device = gr->base.engine.subdev.device;
 	const struct gf100_gr_pack *pack;
 	const struct gf100_gr_init *init;
 	u32 data = 0;
@@ -756,12 +763,12 @@
 		u32 addr = init->addr;
 
 		if ((pack == p && init == p->init) || data != init->data) {
-			nv_wr32(gr, 0x40448c, init->data);
+			nvkm_wr32(device, 0x40448c, init->data);
 			data = init->data;
 		}
 
 		while (addr < next) {
-			nv_wr32(gr, 0x404488, ctrl | (addr << 14));
+			nvkm_wr32(device, 0x404488, ctrl | (addr << 14));
 			addr += init->pitch;
 		}
 	}
@@ -808,13 +815,14 @@
 static void
 gf100_gr_trap_gpc_rop(struct gf100_gr *gr, int gpc)
 {
+	struct nvkm_device *device = gr->base.engine.subdev.device;
 	u32 trap[4];
 	int i;
 
-	trap[0] = nv_rd32(gr, GPC_UNIT(gpc, 0x0420));
-	trap[1] = nv_rd32(gr, GPC_UNIT(gpc, 0x0434));
-	trap[2] = nv_rd32(gr, GPC_UNIT(gpc, 0x0438));
-	trap[3] = nv_rd32(gr, GPC_UNIT(gpc, 0x043c));
+	trap[0] = nvkm_rd32(device, GPC_UNIT(gpc, 0x0420));
+	trap[1] = nvkm_rd32(device, GPC_UNIT(gpc, 0x0434));
+	trap[2] = nvkm_rd32(device, GPC_UNIT(gpc, 0x0438));
+	trap[3] = nvkm_rd32(device, GPC_UNIT(gpc, 0x043c));
 
 	nv_error(gr, "GPC%d/PROP trap:", gpc);
 	for (i = 0; i <= 29; ++i) {
@@ -828,7 +836,7 @@
 	nv_error(gr, "x = %u, y = %u, format = %x, storage type = %x\n",
 		 trap[1] & 0xffff, trap[1] >> 16, (trap[2] >> 8) & 0x3f,
 		 trap[3] & 0xff);
-	nv_wr32(gr, GPC_UNIT(gpc, 0x0420), 0xc0000000);
+	nvkm_wr32(device, GPC_UNIT(gpc, 0x0420), 0xc0000000);
 }
 
 static const struct nvkm_enum gf100_mp_warp_error[] = {
@@ -853,8 +861,9 @@
 static void
 gf100_gr_trap_mp(struct gf100_gr *gr, int gpc, int tpc)
 {
-	u32 werr = nv_rd32(gr, TPC_UNIT(gpc, tpc, 0x648));
-	u32 gerr = nv_rd32(gr, TPC_UNIT(gpc, tpc, 0x650));
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	u32 werr = nvkm_rd32(device, TPC_UNIT(gpc, tpc, 0x648));
+	u32 gerr = nvkm_rd32(device, TPC_UNIT(gpc, tpc, 0x650));
 
 	nv_error(gr, "GPC%i/TPC%i/MP trap:", gpc, tpc);
 	nvkm_bitfield_print(gf100_mp_global_error, gerr);
@@ -864,19 +873,20 @@
 	}
 	pr_cont("\n");
 
-	nv_wr32(gr, TPC_UNIT(gpc, tpc, 0x648), 0x00000000);
-	nv_wr32(gr, TPC_UNIT(gpc, tpc, 0x650), gerr);
+	nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x648), 0x00000000);
+	nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x650), gerr);
 }
 
 static void
 gf100_gr_trap_tpc(struct gf100_gr *gr, int gpc, int tpc)
 {
-	u32 stat = nv_rd32(gr, TPC_UNIT(gpc, tpc, 0x0508));
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	u32 stat = nvkm_rd32(device, TPC_UNIT(gpc, tpc, 0x0508));
 
 	if (stat & 0x00000001) {
-		u32 trap = nv_rd32(gr, TPC_UNIT(gpc, tpc, 0x0224));
+		u32 trap = nvkm_rd32(device, TPC_UNIT(gpc, tpc, 0x0224));
 		nv_error(gr, "GPC%d/TPC%d/TEX: 0x%08x\n", gpc, tpc, trap);
-		nv_wr32(gr, TPC_UNIT(gpc, tpc, 0x0224), 0xc0000000);
+		nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x0224), 0xc0000000);
 		stat &= ~0x00000001;
 	}
 
@@ -886,16 +896,16 @@
 	}
 
 	if (stat & 0x00000004) {
-		u32 trap = nv_rd32(gr, TPC_UNIT(gpc, tpc, 0x0084));
+		u32 trap = nvkm_rd32(device, TPC_UNIT(gpc, tpc, 0x0084));
 		nv_error(gr, "GPC%d/TPC%d/POLY: 0x%08x\n", gpc, tpc, trap);
-		nv_wr32(gr, TPC_UNIT(gpc, tpc, 0x0084), 0xc0000000);
+		nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x0084), 0xc0000000);
 		stat &= ~0x00000004;
 	}
 
 	if (stat & 0x00000008) {
-		u32 trap = nv_rd32(gr, TPC_UNIT(gpc, tpc, 0x048c));
+		u32 trap = nvkm_rd32(device, TPC_UNIT(gpc, tpc, 0x048c));
 		nv_error(gr, "GPC%d/TPC%d/L1C: 0x%08x\n", gpc, tpc, trap);
-		nv_wr32(gr, TPC_UNIT(gpc, tpc, 0x048c), 0xc0000000);
+		nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x048c), 0xc0000000);
 		stat &= ~0x00000008;
 	}
 
@@ -907,7 +917,8 @@
 static void
 gf100_gr_trap_gpc(struct gf100_gr *gr, int gpc)
 {
-	u32 stat = nv_rd32(gr, GPC_UNIT(gpc, 0x2c90));
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	u32 stat = nvkm_rd32(device, GPC_UNIT(gpc, 0x2c90));
 	int tpc;
 
 	if (stat & 0x00000001) {
@@ -916,23 +927,23 @@
 	}
 
 	if (stat & 0x00000002) {
-		u32 trap = nv_rd32(gr, GPC_UNIT(gpc, 0x0900));
+		u32 trap = nvkm_rd32(device, GPC_UNIT(gpc, 0x0900));
 		nv_error(gr, "GPC%d/ZCULL: 0x%08x\n", gpc, trap);
-		nv_wr32(gr, GPC_UNIT(gpc, 0x0900), 0xc0000000);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x0900), 0xc0000000);
 		stat &= ~0x00000002;
 	}
 
 	if (stat & 0x00000004) {
-		u32 trap = nv_rd32(gr, GPC_UNIT(gpc, 0x1028));
+		u32 trap = nvkm_rd32(device, GPC_UNIT(gpc, 0x1028));
 		nv_error(gr, "GPC%d/CCACHE: 0x%08x\n", gpc, trap);
-		nv_wr32(gr, GPC_UNIT(gpc, 0x1028), 0xc0000000);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x1028), 0xc0000000);
 		stat &= ~0x00000004;
 	}
 
 	if (stat & 0x00000008) {
-		u32 trap = nv_rd32(gr, GPC_UNIT(gpc, 0x0824));
+		u32 trap = nvkm_rd32(device, GPC_UNIT(gpc, 0x0824));
 		nv_error(gr, "GPC%d/ESETUP: 0x%08x\n", gpc, trap);
-		nv_wr32(gr, GPC_UNIT(gpc, 0x0824), 0xc0000000);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x0824), 0xc0000000);
 		stat &= ~0x00000009;
 	}
 
@@ -940,7 +951,7 @@
 		u32 mask = 0x00010000 << tpc;
 		if (stat & mask) {
 			gf100_gr_trap_tpc(gr, gpc, tpc);
-			nv_wr32(gr, GPC_UNIT(gpc, 0x2c90), mask);
+			nvkm_wr32(device, GPC_UNIT(gpc, 0x2c90), mask);
 			stat &= ~mask;
 		}
 	}
@@ -953,59 +964,60 @@
 static void
 gf100_gr_trap_intr(struct gf100_gr *gr)
 {
-	u32 trap = nv_rd32(gr, 0x400108);
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	u32 trap = nvkm_rd32(device, 0x400108);
 	int rop, gpc, i;
 
 	if (trap & 0x00000001) {
-		u32 stat = nv_rd32(gr, 0x404000);
+		u32 stat = nvkm_rd32(device, 0x404000);
 		nv_error(gr, "DISPATCH 0x%08x\n", stat);
-		nv_wr32(gr, 0x404000, 0xc0000000);
-		nv_wr32(gr, 0x400108, 0x00000001);
+		nvkm_wr32(device, 0x404000, 0xc0000000);
+		nvkm_wr32(device, 0x400108, 0x00000001);
 		trap &= ~0x00000001;
 	}
 
 	if (trap & 0x00000002) {
-		u32 stat = nv_rd32(gr, 0x404600);
+		u32 stat = nvkm_rd32(device, 0x404600);
 		nv_error(gr, "M2MF 0x%08x\n", stat);
-		nv_wr32(gr, 0x404600, 0xc0000000);
-		nv_wr32(gr, 0x400108, 0x00000002);
+		nvkm_wr32(device, 0x404600, 0xc0000000);
+		nvkm_wr32(device, 0x400108, 0x00000002);
 		trap &= ~0x00000002;
 	}
 
 	if (trap & 0x00000008) {
-		u32 stat = nv_rd32(gr, 0x408030);
+		u32 stat = nvkm_rd32(device, 0x408030);
 		nv_error(gr, "CCACHE 0x%08x\n", stat);
-		nv_wr32(gr, 0x408030, 0xc0000000);
-		nv_wr32(gr, 0x400108, 0x00000008);
+		nvkm_wr32(device, 0x408030, 0xc0000000);
+		nvkm_wr32(device, 0x400108, 0x00000008);
 		trap &= ~0x00000008;
 	}
 
 	if (trap & 0x00000010) {
-		u32 stat = nv_rd32(gr, 0x405840);
+		u32 stat = nvkm_rd32(device, 0x405840);
 		nv_error(gr, "SHADER 0x%08x\n", stat);
-		nv_wr32(gr, 0x405840, 0xc0000000);
-		nv_wr32(gr, 0x400108, 0x00000010);
+		nvkm_wr32(device, 0x405840, 0xc0000000);
+		nvkm_wr32(device, 0x400108, 0x00000010);
 		trap &= ~0x00000010;
 	}
 
 	if (trap & 0x00000040) {
-		u32 stat = nv_rd32(gr, 0x40601c);
+		u32 stat = nvkm_rd32(device, 0x40601c);
 		nv_error(gr, "UNK6 0x%08x\n", stat);
-		nv_wr32(gr, 0x40601c, 0xc0000000);
-		nv_wr32(gr, 0x400108, 0x00000040);
+		nvkm_wr32(device, 0x40601c, 0xc0000000);
+		nvkm_wr32(device, 0x400108, 0x00000040);
 		trap &= ~0x00000040;
 	}
 
 	if (trap & 0x00000080) {
-		u32 stat = nv_rd32(gr, 0x404490);
+		u32 stat = nvkm_rd32(device, 0x404490);
 		nv_error(gr, "MACRO 0x%08x\n", stat);
-		nv_wr32(gr, 0x404490, 0xc0000000);
-		nv_wr32(gr, 0x400108, 0x00000080);
+		nvkm_wr32(device, 0x404490, 0xc0000000);
+		nvkm_wr32(device, 0x400108, 0x00000080);
 		trap &= ~0x00000080;
 	}
 
 	if (trap & 0x00000100) {
-		u32 stat = nv_rd32(gr, 0x407020);
+		u32 stat = nvkm_rd32(device, 0x407020);
 
 		nv_error(gr, "SKED:");
 		for (i = 0; i <= 29; ++i) {
@@ -1017,61 +1029,63 @@
 		pr_cont("\n");
 
 		if (stat & 0x3fffffff)
-			nv_wr32(gr, 0x407020, 0x40000000);
-		nv_wr32(gr, 0x400108, 0x00000100);
+			nvkm_wr32(device, 0x407020, 0x40000000);
+		nvkm_wr32(device, 0x400108, 0x00000100);
 		trap &= ~0x00000100;
 	}
 
 	if (trap & 0x01000000) {
-		u32 stat = nv_rd32(gr, 0x400118);
+		u32 stat = nvkm_rd32(device, 0x400118);
 		for (gpc = 0; stat && gpc < gr->gpc_nr; gpc++) {
 			u32 mask = 0x00000001 << gpc;
 			if (stat & mask) {
 				gf100_gr_trap_gpc(gr, gpc);
-				nv_wr32(gr, 0x400118, mask);
+				nvkm_wr32(device, 0x400118, mask);
 				stat &= ~mask;
 			}
 		}
-		nv_wr32(gr, 0x400108, 0x01000000);
+		nvkm_wr32(device, 0x400108, 0x01000000);
 		trap &= ~0x01000000;
 	}
 
 	if (trap & 0x02000000) {
 		for (rop = 0; rop < gr->rop_nr; rop++) {
-			u32 statz = nv_rd32(gr, ROP_UNIT(rop, 0x070));
-			u32 statc = nv_rd32(gr, ROP_UNIT(rop, 0x144));
+			u32 statz = nvkm_rd32(device, ROP_UNIT(rop, 0x070));
+			u32 statc = nvkm_rd32(device, ROP_UNIT(rop, 0x144));
 			nv_error(gr, "ROP%d 0x%08x 0x%08x\n",
 				 rop, statz, statc);
-			nv_wr32(gr, ROP_UNIT(rop, 0x070), 0xc0000000);
-			nv_wr32(gr, ROP_UNIT(rop, 0x144), 0xc0000000);
+			nvkm_wr32(device, ROP_UNIT(rop, 0x070), 0xc0000000);
+			nvkm_wr32(device, ROP_UNIT(rop, 0x144), 0xc0000000);
 		}
-		nv_wr32(gr, 0x400108, 0x02000000);
+		nvkm_wr32(device, 0x400108, 0x02000000);
 		trap &= ~0x02000000;
 	}
 
 	if (trap) {
 		nv_error(gr, "TRAP UNHANDLED 0x%08x\n", trap);
-		nv_wr32(gr, 0x400108, trap);
+		nvkm_wr32(device, 0x400108, trap);
 	}
 }
 
 static void
 gf100_gr_ctxctl_debug_unit(struct gf100_gr *gr, u32 base)
 {
+	struct nvkm_device *device = gr->base.engine.subdev.device;
 	nv_error(gr, "%06x - done 0x%08x\n", base,
-		 nv_rd32(gr, base + 0x400));
+		 nvkm_rd32(device, base + 0x400));
 	nv_error(gr, "%06x - stat 0x%08x 0x%08x 0x%08x 0x%08x\n", base,
-		 nv_rd32(gr, base + 0x800), nv_rd32(gr, base + 0x804),
-		 nv_rd32(gr, base + 0x808), nv_rd32(gr, base + 0x80c));
+		 nvkm_rd32(device, base + 0x800), nvkm_rd32(device, base + 0x804),
+		 nvkm_rd32(device, base + 0x808), nvkm_rd32(device, base + 0x80c));
 	nv_error(gr, "%06x - stat 0x%08x 0x%08x 0x%08x 0x%08x\n", base,
-		 nv_rd32(gr, base + 0x810), nv_rd32(gr, base + 0x814),
-		 nv_rd32(gr, base + 0x818), nv_rd32(gr, base + 0x81c));
+		 nvkm_rd32(device, base + 0x810), nvkm_rd32(device, base + 0x814),
+		 nvkm_rd32(device, base + 0x818), nvkm_rd32(device, base + 0x81c));
 }
 
 void
 gf100_gr_ctxctl_debug(struct gf100_gr *gr)
 {
-	u32 gpcnr = nv_rd32(gr, 0x409604) & 0xffff;
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	u32 gpcnr = nvkm_rd32(device, 0x409604) & 0xffff;
 	u32 gpc;
 
 	gf100_gr_ctxctl_debug_unit(gr, 0x409000);
@@ -1082,22 +1096,23 @@
 static void
 gf100_gr_ctxctl_isr(struct gf100_gr *gr)
 {
-	u32 stat = nv_rd32(gr, 0x409c18);
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	u32 stat = nvkm_rd32(device, 0x409c18);
 
 	if (stat & 0x00000001) {
-		u32 code = nv_rd32(gr, 0x409814);
+		u32 code = nvkm_rd32(device, 0x409814);
 		if (code == E_BAD_FWMTHD) {
-			u32 class = nv_rd32(gr, 0x409808);
-			u32  addr = nv_rd32(gr, 0x40980c);
+			u32 class = nvkm_rd32(device, 0x409808);
+			u32  addr = nvkm_rd32(device, 0x40980c);
 			u32  subc = (addr & 0x00070000) >> 16;
 			u32  mthd = (addr & 0x00003ffc);
-			u32  data = nv_rd32(gr, 0x409810);
+			u32  data = nvkm_rd32(device, 0x409810);
 
 			nv_error(gr, "FECS MTHD subc %d class 0x%04x "
 				       "mthd 0x%04x data 0x%08x\n",
 				 subc, class, mthd, data);
 
-			nv_wr32(gr, 0x409c20, 0x00000001);
+			nvkm_wr32(device, 0x409c20, 0x00000001);
 			stat &= ~0x00000001;
 		} else {
 			nv_error(gr, "FECS ucode error %d\n", code);
@@ -1107,37 +1122,38 @@
 	if (stat & 0x00080000) {
 		nv_error(gr, "FECS watchdog timeout\n");
 		gf100_gr_ctxctl_debug(gr);
-		nv_wr32(gr, 0x409c20, 0x00080000);
+		nvkm_wr32(device, 0x409c20, 0x00080000);
 		stat &= ~0x00080000;
 	}
 
 	if (stat) {
 		nv_error(gr, "FECS 0x%08x\n", stat);
 		gf100_gr_ctxctl_debug(gr);
-		nv_wr32(gr, 0x409c20, stat);
+		nvkm_wr32(device, 0x409c20, stat);
 	}
 }
 
 static void
 gf100_gr_intr(struct nvkm_subdev *subdev)
 {
-	struct nvkm_fifo *fifo = nvkm_fifo(subdev);
+	struct gf100_gr *gr = (void *)subdev;
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	struct nvkm_fifo *fifo = device->fifo;
 	struct nvkm_engine *engine = nv_engine(subdev);
 	struct nvkm_object *engctx;
 	struct nvkm_handle *handle;
-	struct gf100_gr *gr = (void *)subdev;
-	u64 inst = nv_rd32(gr, 0x409b00) & 0x0fffffff;
-	u32 stat = nv_rd32(gr, 0x400100);
-	u32 addr = nv_rd32(gr, 0x400704);
+	u64 inst = nvkm_rd32(device, 0x409b00) & 0x0fffffff;
+	u32 stat = nvkm_rd32(device, 0x400100);
+	u32 addr = nvkm_rd32(device, 0x400704);
 	u32 mthd = (addr & 0x00003ffc);
 	u32 subc = (addr & 0x00070000) >> 16;
-	u32 data = nv_rd32(gr, 0x400708);
-	u32 code = nv_rd32(gr, 0x400110);
+	u32 data = nvkm_rd32(device, 0x400708);
+	u32 code = nvkm_rd32(device, 0x400110);
 	u32 class;
 	int chid;
 
 	if (nv_device(gr)->card_type < NV_E0 || subc < 4)
-		class = nv_rd32(gr, 0x404200 + (subc * 4));
+		class = nvkm_rd32(device, 0x404200 + (subc * 4));
 	else
 		class = 0x0000;
 
@@ -1149,7 +1165,7 @@
 		 * notifier interrupt, only needed for cyclestats
 		 * can be safely ignored
 		 */
-		nv_wr32(gr, 0x400100, 0x00000001);
+		nvkm_wr32(device, 0x400100, 0x00000001);
 		stat &= ~0x00000001;
 	}
 
@@ -1162,7 +1178,7 @@
 				 subc, class, mthd, data);
 		}
 		nvkm_handle_put(handle);
-		nv_wr32(gr, 0x400100, 0x00000010);
+		nvkm_wr32(device, 0x400100, 0x00000010);
 		stat &= ~0x00000010;
 	}
 
@@ -1171,7 +1187,7 @@
 			 "ILLEGAL_CLASS ch %d [0x%010llx %s] subc %d class 0x%04x mthd 0x%04x data 0x%08x\n",
 			 chid, inst << 12, nvkm_client_name(engctx), subc,
 			 class, mthd, data);
-		nv_wr32(gr, 0x400100, 0x00000020);
+		nvkm_wr32(device, 0x400100, 0x00000020);
 		stat &= ~0x00000020;
 	}
 
@@ -1181,7 +1197,7 @@
 		pr_cont("] ch %d [0x%010llx %s] subc %d class 0x%04x mthd 0x%04x data 0x%08x\n",
 			chid, inst << 12, nvkm_client_name(engctx), subc,
 			class, mthd, data);
-		nv_wr32(gr, 0x400100, 0x00100000);
+		nvkm_wr32(device, 0x400100, 0x00100000);
 		stat &= ~0x00100000;
 	}
 
@@ -1189,22 +1205,22 @@
 		nv_error(gr, "TRAP ch %d [0x%010llx %s]\n", chid, inst << 12,
 			 nvkm_client_name(engctx));
 		gf100_gr_trap_intr(gr);
-		nv_wr32(gr, 0x400100, 0x00200000);
+		nvkm_wr32(device, 0x400100, 0x00200000);
 		stat &= ~0x00200000;
 	}
 
 	if (stat & 0x00080000) {
 		gf100_gr_ctxctl_isr(gr);
-		nv_wr32(gr, 0x400100, 0x00080000);
+		nvkm_wr32(device, 0x400100, 0x00080000);
 		stat &= ~0x00080000;
 	}
 
 	if (stat) {
 		nv_error(gr, "unknown stat 0x%08x\n", stat);
-		nv_wr32(gr, 0x400100, stat);
+		nvkm_wr32(device, 0x400100, stat);
 	}
 
-	nv_wr32(gr, 0x400500, 0x00010001);
+	nvkm_wr32(device, 0x400500, 0x00010001);
 	nvkm_engctx_put(engctx);
 }
 
@@ -1212,22 +1228,23 @@
 gf100_gr_init_fw(struct gf100_gr *gr, u32 fuc_base,
 		 struct gf100_gr_fuc *code, struct gf100_gr_fuc *data)
 {
+	struct nvkm_device *device = gr->base.engine.subdev.device;
 	int i;
 
-	nv_wr32(gr, fuc_base + 0x01c0, 0x01000000);
+	nvkm_wr32(device, fuc_base + 0x01c0, 0x01000000);
 	for (i = 0; i < data->size / 4; i++)
-		nv_wr32(gr, fuc_base + 0x01c4, data->data[i]);
+		nvkm_wr32(device, fuc_base + 0x01c4, data->data[i]);
 
-	nv_wr32(gr, fuc_base + 0x0180, 0x01000000);
+	nvkm_wr32(device, fuc_base + 0x0180, 0x01000000);
 	for (i = 0; i < code->size / 4; i++) {
 		if ((i & 0x3f) == 0)
-			nv_wr32(gr, fuc_base + 0x0188, i >> 6);
-		nv_wr32(gr, fuc_base + 0x0184, code->data[i]);
+			nvkm_wr32(device, fuc_base + 0x0188, i >> 6);
+		nvkm_wr32(device, fuc_base + 0x0184, code->data[i]);
 	}
 
 	/* code must be padded to 0x40 words */
 	for (; i & 0x3f; i++)
-		nv_wr32(gr, fuc_base + 0x0184, 0);
+		nvkm_wr32(device, fuc_base + 0x0184, 0);
 }
 
 static void
@@ -1235,17 +1252,18 @@
 		     const struct gf100_gr_pack *pack,
 		     u32 falcon, u32 starstar, u32 base)
 {
+	struct nvkm_device *device = gr->base.engine.subdev.device;
 	const struct gf100_gr_pack *iter;
 	const struct gf100_gr_init *init;
 	u32 addr = ~0, prev = ~0, xfer = 0;
 	u32 star, temp;
 
-	nv_wr32(gr, falcon + 0x01c0, 0x02000000 + starstar);
-	star = nv_rd32(gr, falcon + 0x01c4);
-	temp = nv_rd32(gr, falcon + 0x01c4);
+	nvkm_wr32(device, falcon + 0x01c0, 0x02000000 + starstar);
+	star = nvkm_rd32(device, falcon + 0x01c4);
+	temp = nvkm_rd32(device, falcon + 0x01c4);
 	if (temp > star)
 		star = temp;
-	nv_wr32(gr, falcon + 0x01c0, 0x01000000 + star);
+	nvkm_wr32(device, falcon + 0x01c0, 0x01000000 + star);
 
 	pack_for_each_init(init, iter, pack) {
 		u32 head = init->addr - base;
@@ -1254,7 +1272,7 @@
 			if (head != prev + 4 || xfer >= 32) {
 				if (xfer) {
 					u32 data = ((--xfer << 26) | addr);
-					nv_wr32(gr, falcon + 0x01c4, data);
+					nvkm_wr32(device, falcon + 0x01c4, data);
 					star += 4;
 				}
 				addr = head;
@@ -1266,14 +1284,15 @@
 		}
 	}
 
-	nv_wr32(gr, falcon + 0x01c4, (--xfer << 26) | addr);
-	nv_wr32(gr, falcon + 0x01c0, 0x01000004 + starstar);
-	nv_wr32(gr, falcon + 0x01c4, star + 4);
+	nvkm_wr32(device, falcon + 0x01c4, (--xfer << 26) | addr);
+	nvkm_wr32(device, falcon + 0x01c0, 0x01000004 + starstar);
+	nvkm_wr32(device, falcon + 0x01c4, star + 4);
 }
 
 int
 gf100_gr_init_ctxctl(struct gf100_gr *gr)
 {
+	struct nvkm_device *device = gr->base.engine.subdev.device;
 	struct gf100_gr_oclass *oclass = (void *)nv_object(gr)->oclass;
 	struct gf100_grctx_oclass *cclass = (void *)nv_engine(gr)->cclass;
 	int i;
@@ -1288,73 +1307,73 @@
 		nvkm_mc(gr)->unk260(nvkm_mc(gr), 1);
 
 		/* start both of them running */
-		nv_wr32(gr, 0x409840, 0xffffffff);
-		nv_wr32(gr, 0x41a10c, 0x00000000);
-		nv_wr32(gr, 0x40910c, 0x00000000);
-		nv_wr32(gr, 0x41a100, 0x00000002);
-		nv_wr32(gr, 0x409100, 0x00000002);
+		nvkm_wr32(device, 0x409840, 0xffffffff);
+		nvkm_wr32(device, 0x41a10c, 0x00000000);
+		nvkm_wr32(device, 0x40910c, 0x00000000);
+		nvkm_wr32(device, 0x41a100, 0x00000002);
+		nvkm_wr32(device, 0x409100, 0x00000002);
 		if (!nv_wait(gr, 0x409800, 0x00000001, 0x00000001))
 			nv_warn(gr, "0x409800 wait failed\n");
 
-		nv_wr32(gr, 0x409840, 0xffffffff);
-		nv_wr32(gr, 0x409500, 0x7fffffff);
-		nv_wr32(gr, 0x409504, 0x00000021);
+		nvkm_wr32(device, 0x409840, 0xffffffff);
+		nvkm_wr32(device, 0x409500, 0x7fffffff);
+		nvkm_wr32(device, 0x409504, 0x00000021);
 
-		nv_wr32(gr, 0x409840, 0xffffffff);
-		nv_wr32(gr, 0x409500, 0x00000000);
-		nv_wr32(gr, 0x409504, 0x00000010);
+		nvkm_wr32(device, 0x409840, 0xffffffff);
+		nvkm_wr32(device, 0x409500, 0x00000000);
+		nvkm_wr32(device, 0x409504, 0x00000010);
 		if (!nv_wait_ne(gr, 0x409800, 0xffffffff, 0x00000000)) {
 			nv_error(gr, "fuc09 req 0x10 timeout\n");
 			return -EBUSY;
 		}
-		gr->size = nv_rd32(gr, 0x409800);
+		gr->size = nvkm_rd32(device, 0x409800);
 
-		nv_wr32(gr, 0x409840, 0xffffffff);
-		nv_wr32(gr, 0x409500, 0x00000000);
-		nv_wr32(gr, 0x409504, 0x00000016);
+		nvkm_wr32(device, 0x409840, 0xffffffff);
+		nvkm_wr32(device, 0x409500, 0x00000000);
+		nvkm_wr32(device, 0x409504, 0x00000016);
 		if (!nv_wait_ne(gr, 0x409800, 0xffffffff, 0x00000000)) {
 			nv_error(gr, "fuc09 req 0x16 timeout\n");
 			return -EBUSY;
 		}
 
-		nv_wr32(gr, 0x409840, 0xffffffff);
-		nv_wr32(gr, 0x409500, 0x00000000);
-		nv_wr32(gr, 0x409504, 0x00000025);
+		nvkm_wr32(device, 0x409840, 0xffffffff);
+		nvkm_wr32(device, 0x409500, 0x00000000);
+		nvkm_wr32(device, 0x409504, 0x00000025);
 		if (!nv_wait_ne(gr, 0x409800, 0xffffffff, 0x00000000)) {
 			nv_error(gr, "fuc09 req 0x25 timeout\n");
 			return -EBUSY;
 		}
 
 		if (nv_device(gr)->chipset >= 0xe0) {
-			nv_wr32(gr, 0x409800, 0x00000000);
-			nv_wr32(gr, 0x409500, 0x00000001);
-			nv_wr32(gr, 0x409504, 0x00000030);
+			nvkm_wr32(device, 0x409800, 0x00000000);
+			nvkm_wr32(device, 0x409500, 0x00000001);
+			nvkm_wr32(device, 0x409504, 0x00000030);
 			if (!nv_wait_ne(gr, 0x409800, 0xffffffff, 0x00000000)) {
 				nv_error(gr, "fuc09 req 0x30 timeout\n");
 				return -EBUSY;
 			}
 
-			nv_wr32(gr, 0x409810, 0xb00095c8);
-			nv_wr32(gr, 0x409800, 0x00000000);
-			nv_wr32(gr, 0x409500, 0x00000001);
-			nv_wr32(gr, 0x409504, 0x00000031);
+			nvkm_wr32(device, 0x409810, 0xb00095c8);
+			nvkm_wr32(device, 0x409800, 0x00000000);
+			nvkm_wr32(device, 0x409500, 0x00000001);
+			nvkm_wr32(device, 0x409504, 0x00000031);
 			if (!nv_wait_ne(gr, 0x409800, 0xffffffff, 0x00000000)) {
 				nv_error(gr, "fuc09 req 0x31 timeout\n");
 				return -EBUSY;
 			}
 
-			nv_wr32(gr, 0x409810, 0x00080420);
-			nv_wr32(gr, 0x409800, 0x00000000);
-			nv_wr32(gr, 0x409500, 0x00000001);
-			nv_wr32(gr, 0x409504, 0x00000032);
+			nvkm_wr32(device, 0x409810, 0x00080420);
+			nvkm_wr32(device, 0x409800, 0x00000000);
+			nvkm_wr32(device, 0x409500, 0x00000001);
+			nvkm_wr32(device, 0x409504, 0x00000032);
 			if (!nv_wait_ne(gr, 0x409800, 0xffffffff, 0x00000000)) {
 				nv_error(gr, "fuc09 req 0x32 timeout\n");
 				return -EBUSY;
 			}
 
-			nv_wr32(gr, 0x409614, 0x00000070);
-			nv_wr32(gr, 0x409614, 0x00000770);
-			nv_wr32(gr, 0x40802c, 0x00000001);
+			nvkm_wr32(device, 0x409614, 0x00000070);
+			nvkm_wr32(device, 0x409614, 0x00000770);
+			nvkm_wr32(device, 0x40802c, 0x00000001);
 		}
 
 		if (gr->data == NULL) {
@@ -1373,27 +1392,27 @@
 
 	/* load HUB microcode */
 	nvkm_mc(gr)->unk260(nvkm_mc(gr), 0);
-	nv_wr32(gr, 0x4091c0, 0x01000000);
+	nvkm_wr32(device, 0x4091c0, 0x01000000);
 	for (i = 0; i < oclass->fecs.ucode->data.size / 4; i++)
-		nv_wr32(gr, 0x4091c4, oclass->fecs.ucode->data.data[i]);
+		nvkm_wr32(device, 0x4091c4, oclass->fecs.ucode->data.data[i]);
 
-	nv_wr32(gr, 0x409180, 0x01000000);
+	nvkm_wr32(device, 0x409180, 0x01000000);
 	for (i = 0; i < oclass->fecs.ucode->code.size / 4; i++) {
 		if ((i & 0x3f) == 0)
-			nv_wr32(gr, 0x409188, i >> 6);
-		nv_wr32(gr, 0x409184, oclass->fecs.ucode->code.data[i]);
+			nvkm_wr32(device, 0x409188, i >> 6);
+		nvkm_wr32(device, 0x409184, oclass->fecs.ucode->code.data[i]);
 	}
 
 	/* load GPC microcode */
-	nv_wr32(gr, 0x41a1c0, 0x01000000);
+	nvkm_wr32(device, 0x41a1c0, 0x01000000);
 	for (i = 0; i < oclass->gpccs.ucode->data.size / 4; i++)
-		nv_wr32(gr, 0x41a1c4, oclass->gpccs.ucode->data.data[i]);
+		nvkm_wr32(device, 0x41a1c4, oclass->gpccs.ucode->data.data[i]);
 
-	nv_wr32(gr, 0x41a180, 0x01000000);
+	nvkm_wr32(device, 0x41a180, 0x01000000);
 	for (i = 0; i < oclass->gpccs.ucode->code.size / 4; i++) {
 		if ((i & 0x3f) == 0)
-			nv_wr32(gr, 0x41a188, i >> 6);
-		nv_wr32(gr, 0x41a184, oclass->gpccs.ucode->code.data[i]);
+			nvkm_wr32(device, 0x41a188, i >> 6);
+		nvkm_wr32(device, 0x41a184, oclass->gpccs.ucode->code.data[i]);
 	}
 	nvkm_mc(gr)->unk260(nvkm_mc(gr), 1);
 
@@ -1404,15 +1423,15 @@
 	gf100_gr_init_csdata(gr, cclass->ppc, 0x41a000, 0x008, 0x41be00);
 
 	/* start HUB ucode running, it'll init the GPCs */
-	nv_wr32(gr, 0x40910c, 0x00000000);
-	nv_wr32(gr, 0x409100, 0x00000002);
+	nvkm_wr32(device, 0x40910c, 0x00000000);
+	nvkm_wr32(device, 0x409100, 0x00000002);
 	if (!nv_wait(gr, 0x409800, 0x80000000, 0x80000000)) {
 		nv_error(gr, "HUB_INIT timed out\n");
 		gf100_gr_ctxctl_debug(gr);
 		return -EBUSY;
 	}
 
-	gr->size = nv_rd32(gr, 0x409804);
+	gr->size = nvkm_rd32(device, 0x409804);
 	if (gr->data == NULL) {
 		int ret = gf100_grctx_generate(gr);
 		if (ret) {
@@ -1427,8 +1446,9 @@
 int
 gf100_gr_init(struct nvkm_object *object)
 {
-	struct gf100_gr_oclass *oclass = (void *)object->oclass;
 	struct gf100_gr *gr = (void *)object;
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	struct gf100_gr_oclass *oclass = (void *)object->oclass;
 	const u32 magicgpc918 = DIV_ROUND_UP(0x00800000, gr->tpc_total);
 	u32 data[TPC_MAX / 8] = {};
 	u8  tpcnr[GPC_MAX];
@@ -1439,14 +1459,14 @@
 	if (ret)
 		return ret;
 
-	nv_wr32(gr, GPC_BCAST(0x0880), 0x00000000);
-	nv_wr32(gr, GPC_BCAST(0x08a4), 0x00000000);
-	nv_wr32(gr, GPC_BCAST(0x0888), 0x00000000);
-	nv_wr32(gr, GPC_BCAST(0x088c), 0x00000000);
-	nv_wr32(gr, GPC_BCAST(0x0890), 0x00000000);
-	nv_wr32(gr, GPC_BCAST(0x0894), 0x00000000);
-	nv_wr32(gr, GPC_BCAST(0x08b4), gr->unk4188b4->addr >> 8);
-	nv_wr32(gr, GPC_BCAST(0x08b8), gr->unk4188b8->addr >> 8);
+	nvkm_wr32(device, GPC_BCAST(0x0880), 0x00000000);
+	nvkm_wr32(device, GPC_BCAST(0x08a4), 0x00000000);
+	nvkm_wr32(device, GPC_BCAST(0x0888), 0x00000000);
+	nvkm_wr32(device, GPC_BCAST(0x088c), 0x00000000);
+	nvkm_wr32(device, GPC_BCAST(0x0890), 0x00000000);
+	nvkm_wr32(device, GPC_BCAST(0x0894), 0x00000000);
+	nvkm_wr32(device, GPC_BCAST(0x08b4), gr->unk4188b4->addr >> 8);
+	nvkm_wr32(device, GPC_BCAST(0x08b8), gr->unk4188b8->addr >> 8);
 
 	gf100_gr_mmio(gr, oclass->mmio);
 
@@ -1460,76 +1480,76 @@
 		data[i / 8] |= tpc << ((i % 8) * 4);
 	}
 
-	nv_wr32(gr, GPC_BCAST(0x0980), data[0]);
-	nv_wr32(gr, GPC_BCAST(0x0984), data[1]);
-	nv_wr32(gr, GPC_BCAST(0x0988), data[2]);
-	nv_wr32(gr, GPC_BCAST(0x098c), data[3]);
+	nvkm_wr32(device, GPC_BCAST(0x0980), data[0]);
+	nvkm_wr32(device, GPC_BCAST(0x0984), data[1]);
+	nvkm_wr32(device, GPC_BCAST(0x0988), data[2]);
+	nvkm_wr32(device, GPC_BCAST(0x098c), data[3]);
 
 	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
-		nv_wr32(gr, GPC_UNIT(gpc, 0x0914),
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x0914),
 			gr->magic_not_rop_nr << 8 | gr->tpc_nr[gpc]);
-		nv_wr32(gr, GPC_UNIT(gpc, 0x0910), 0x00040000 |
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x0910), 0x00040000 |
 			gr->tpc_total);
-		nv_wr32(gr, GPC_UNIT(gpc, 0x0918), magicgpc918);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x0918), magicgpc918);
 	}
 
 	if (nv_device(gr)->chipset != 0xd7)
-		nv_wr32(gr, GPC_BCAST(0x1bd4), magicgpc918);
+		nvkm_wr32(device, GPC_BCAST(0x1bd4), magicgpc918);
 	else
-		nv_wr32(gr, GPC_BCAST(0x3fd4), magicgpc918);
+		nvkm_wr32(device, GPC_BCAST(0x3fd4), magicgpc918);
 
-	nv_wr32(gr, GPC_BCAST(0x08ac), nv_rd32(gr, 0x100800));
+	nvkm_wr32(device, GPC_BCAST(0x08ac), nvkm_rd32(device, 0x100800));
 
-	nv_wr32(gr, 0x400500, 0x00010001);
+	nvkm_wr32(device, 0x400500, 0x00010001);
 
-	nv_wr32(gr, 0x400100, 0xffffffff);
-	nv_wr32(gr, 0x40013c, 0xffffffff);
+	nvkm_wr32(device, 0x400100, 0xffffffff);
+	nvkm_wr32(device, 0x40013c, 0xffffffff);
 
-	nv_wr32(gr, 0x409c24, 0x000f0000);
-	nv_wr32(gr, 0x404000, 0xc0000000);
-	nv_wr32(gr, 0x404600, 0xc0000000);
-	nv_wr32(gr, 0x408030, 0xc0000000);
-	nv_wr32(gr, 0x40601c, 0xc0000000);
-	nv_wr32(gr, 0x404490, 0xc0000000);
-	nv_wr32(gr, 0x406018, 0xc0000000);
-	nv_wr32(gr, 0x405840, 0xc0000000);
-	nv_wr32(gr, 0x405844, 0x00ffffff);
-	nv_mask(gr, 0x419cc0, 0x00000008, 0x00000008);
-	nv_mask(gr, 0x419eb4, 0x00001000, 0x00001000);
+	nvkm_wr32(device, 0x409c24, 0x000f0000);
+	nvkm_wr32(device, 0x404000, 0xc0000000);
+	nvkm_wr32(device, 0x404600, 0xc0000000);
+	nvkm_wr32(device, 0x408030, 0xc0000000);
+	nvkm_wr32(device, 0x40601c, 0xc0000000);
+	nvkm_wr32(device, 0x404490, 0xc0000000);
+	nvkm_wr32(device, 0x406018, 0xc0000000);
+	nvkm_wr32(device, 0x405840, 0xc0000000);
+	nvkm_wr32(device, 0x405844, 0x00ffffff);
+	nvkm_mask(device, 0x419cc0, 0x00000008, 0x00000008);
+	nvkm_mask(device, 0x419eb4, 0x00001000, 0x00001000);
 
 	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
-		nv_wr32(gr, GPC_UNIT(gpc, 0x0420), 0xc0000000);
-		nv_wr32(gr, GPC_UNIT(gpc, 0x0900), 0xc0000000);
-		nv_wr32(gr, GPC_UNIT(gpc, 0x1028), 0xc0000000);
-		nv_wr32(gr, GPC_UNIT(gpc, 0x0824), 0xc0000000);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x0420), 0xc0000000);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x0900), 0xc0000000);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x1028), 0xc0000000);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x0824), 0xc0000000);
 		for (tpc = 0; tpc < gr->tpc_nr[gpc]; tpc++) {
-			nv_wr32(gr, TPC_UNIT(gpc, tpc, 0x508), 0xffffffff);
-			nv_wr32(gr, TPC_UNIT(gpc, tpc, 0x50c), 0xffffffff);
-			nv_wr32(gr, TPC_UNIT(gpc, tpc, 0x224), 0xc0000000);
-			nv_wr32(gr, TPC_UNIT(gpc, tpc, 0x48c), 0xc0000000);
-			nv_wr32(gr, TPC_UNIT(gpc, tpc, 0x084), 0xc0000000);
-			nv_wr32(gr, TPC_UNIT(gpc, tpc, 0x644), 0x001ffffe);
-			nv_wr32(gr, TPC_UNIT(gpc, tpc, 0x64c), 0x0000000f);
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x508), 0xffffffff);
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x50c), 0xffffffff);
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x224), 0xc0000000);
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x48c), 0xc0000000);
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x084), 0xc0000000);
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x644), 0x001ffffe);
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x64c), 0x0000000f);
 		}
-		nv_wr32(gr, GPC_UNIT(gpc, 0x2c90), 0xffffffff);
-		nv_wr32(gr, GPC_UNIT(gpc, 0x2c94), 0xffffffff);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x2c90), 0xffffffff);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x2c94), 0xffffffff);
 	}
 
 	for (rop = 0; rop < gr->rop_nr; rop++) {
-		nv_wr32(gr, ROP_UNIT(rop, 0x144), 0xc0000000);
-		nv_wr32(gr, ROP_UNIT(rop, 0x070), 0xc0000000);
-		nv_wr32(gr, ROP_UNIT(rop, 0x204), 0xffffffff);
-		nv_wr32(gr, ROP_UNIT(rop, 0x208), 0xffffffff);
+		nvkm_wr32(device, ROP_UNIT(rop, 0x144), 0xc0000000);
+		nvkm_wr32(device, ROP_UNIT(rop, 0x070), 0xc0000000);
+		nvkm_wr32(device, ROP_UNIT(rop, 0x204), 0xffffffff);
+		nvkm_wr32(device, ROP_UNIT(rop, 0x208), 0xffffffff);
 	}
 
-	nv_wr32(gr, 0x400108, 0xffffffff);
-	nv_wr32(gr, 0x400138, 0xffffffff);
-	nv_wr32(gr, 0x400118, 0xffffffff);
-	nv_wr32(gr, 0x400130, 0xffffffff);
-	nv_wr32(gr, 0x40011c, 0xffffffff);
-	nv_wr32(gr, 0x400134, 0xffffffff);
+	nvkm_wr32(device, 0x400108, 0xffffffff);
+	nvkm_wr32(device, 0x400138, 0xffffffff);
+	nvkm_wr32(device, 0x400118, 0xffffffff);
+	nvkm_wr32(device, 0x400130, 0xffffffff);
+	nvkm_wr32(device, 0x40011c, 0xffffffff);
+	nvkm_wr32(device, 0x400134, 0xffffffff);
 
-	nv_wr32(gr, 0x400054, 0x34ce3464);
+	nvkm_wr32(device, 0x400054, 0x34ce3464);
 
 	gf100_gr_zbc_init(gr);
 
@@ -1644,14 +1664,14 @@
 		nv_wo32(gr->unk4188b8, i, 0x00000010);
 	}
 
-	gr->rop_nr = (nv_rd32(gr, 0x409604) & 0x001f0000) >> 16;
-	gr->gpc_nr =  nv_rd32(gr, 0x409604) & 0x0000001f;
+	gr->rop_nr = (nvkm_rd32(device, 0x409604) & 0x001f0000) >> 16;
+	gr->gpc_nr =  nvkm_rd32(device, 0x409604) & 0x0000001f;
 	for (i = 0; i < gr->gpc_nr; i++) {
-		gr->tpc_nr[i]  = nv_rd32(gr, GPC_UNIT(i, 0x2608));
+		gr->tpc_nr[i]  = nvkm_rd32(device, GPC_UNIT(i, 0x2608));
 		gr->tpc_total += gr->tpc_nr[i];
 		gr->ppc_nr[i]  = oclass->ppc_nr;
 		for (j = 0; j < gr->ppc_nr[i]; j++) {
-			u8 mask = nv_rd32(gr, GPC_UNIT(i, 0x0c30 + (j * 4)));
+			u8 mask = nvkm_rd32(device, GPC_UNIT(i, 0x0c30 + (j * 4)));
 			gr->ppc_tpc_nr[i][j] = hweight8(mask);
 		}
 	}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk104.c
index 9f83122..89bb101 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk104.c
@@ -197,7 +197,8 @@
 {
 	struct gf100_gr_oclass *oclass = (void *)object->oclass;
 	struct gf100_gr *gr = (void *)object;
-	struct nvkm_pmu *pmu = nvkm_pmu(gr);
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	struct nvkm_pmu *pmu = device->pmu;
 	const u32 magicgpc918 = DIV_ROUND_UP(0x00800000, gr->tpc_total);
 	u32 data[TPC_MAX / 8] = {};
 	u8  tpcnr[GPC_MAX];
@@ -211,18 +212,18 @@
 	if (ret)
 		return ret;
 
-	nv_wr32(gr, GPC_BCAST(0x0880), 0x00000000);
-	nv_wr32(gr, GPC_BCAST(0x08a4), 0x00000000);
-	nv_wr32(gr, GPC_BCAST(0x0888), 0x00000000);
-	nv_wr32(gr, GPC_BCAST(0x088c), 0x00000000);
-	nv_wr32(gr, GPC_BCAST(0x0890), 0x00000000);
-	nv_wr32(gr, GPC_BCAST(0x0894), 0x00000000);
-	nv_wr32(gr, GPC_BCAST(0x08b4), gr->unk4188b4->addr >> 8);
-	nv_wr32(gr, GPC_BCAST(0x08b8), gr->unk4188b8->addr >> 8);
+	nvkm_wr32(device, GPC_BCAST(0x0880), 0x00000000);
+	nvkm_wr32(device, GPC_BCAST(0x08a4), 0x00000000);
+	nvkm_wr32(device, GPC_BCAST(0x0888), 0x00000000);
+	nvkm_wr32(device, GPC_BCAST(0x088c), 0x00000000);
+	nvkm_wr32(device, GPC_BCAST(0x0890), 0x00000000);
+	nvkm_wr32(device, GPC_BCAST(0x0894), 0x00000000);
+	nvkm_wr32(device, GPC_BCAST(0x08b4), gr->unk4188b4->addr >> 8);
+	nvkm_wr32(device, GPC_BCAST(0x08b8), gr->unk4188b8->addr >> 8);
 
 	gf100_gr_mmio(gr, oclass->mmio);
 
-	nv_wr32(gr, GPC_UNIT(0, 0x3018), 0x00000001);
+	nvkm_wr32(device, GPC_UNIT(0, 0x3018), 0x00000001);
 
 	memset(data, 0x00, sizeof(data));
 	memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));
@@ -235,75 +236,75 @@
 		data[i / 8] |= tpc << ((i % 8) * 4);
 	}
 
-	nv_wr32(gr, GPC_BCAST(0x0980), data[0]);
-	nv_wr32(gr, GPC_BCAST(0x0984), data[1]);
-	nv_wr32(gr, GPC_BCAST(0x0988), data[2]);
-	nv_wr32(gr, GPC_BCAST(0x098c), data[3]);
+	nvkm_wr32(device, GPC_BCAST(0x0980), data[0]);
+	nvkm_wr32(device, GPC_BCAST(0x0984), data[1]);
+	nvkm_wr32(device, GPC_BCAST(0x0988), data[2]);
+	nvkm_wr32(device, GPC_BCAST(0x098c), data[3]);
 
 	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
-		nv_wr32(gr, GPC_UNIT(gpc, 0x0914),
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x0914),
 			gr->magic_not_rop_nr << 8 | gr->tpc_nr[gpc]);
-		nv_wr32(gr, GPC_UNIT(gpc, 0x0910), 0x00040000 |
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x0910), 0x00040000 |
 			gr->tpc_total);
-		nv_wr32(gr, GPC_UNIT(gpc, 0x0918), magicgpc918);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x0918), magicgpc918);
 	}
 
-	nv_wr32(gr, GPC_BCAST(0x3fd4), magicgpc918);
-	nv_wr32(gr, GPC_BCAST(0x08ac), nv_rd32(gr, 0x100800));
+	nvkm_wr32(device, GPC_BCAST(0x3fd4), magicgpc918);
+	nvkm_wr32(device, GPC_BCAST(0x08ac), nvkm_rd32(device, 0x100800));
 
-	nv_wr32(gr, 0x400500, 0x00010001);
+	nvkm_wr32(device, 0x400500, 0x00010001);
 
-	nv_wr32(gr, 0x400100, 0xffffffff);
-	nv_wr32(gr, 0x40013c, 0xffffffff);
+	nvkm_wr32(device, 0x400100, 0xffffffff);
+	nvkm_wr32(device, 0x40013c, 0xffffffff);
 
-	nv_wr32(gr, 0x409ffc, 0x00000000);
-	nv_wr32(gr, 0x409c14, 0x00003e3e);
-	nv_wr32(gr, 0x409c24, 0x000f0001);
-	nv_wr32(gr, 0x404000, 0xc0000000);
-	nv_wr32(gr, 0x404600, 0xc0000000);
-	nv_wr32(gr, 0x408030, 0xc0000000);
-	nv_wr32(gr, 0x404490, 0xc0000000);
-	nv_wr32(gr, 0x406018, 0xc0000000);
-	nv_wr32(gr, 0x407020, 0x40000000);
-	nv_wr32(gr, 0x405840, 0xc0000000);
-	nv_wr32(gr, 0x405844, 0x00ffffff);
-	nv_mask(gr, 0x419cc0, 0x00000008, 0x00000008);
-	nv_mask(gr, 0x419eb4, 0x00001000, 0x00001000);
+	nvkm_wr32(device, 0x409ffc, 0x00000000);
+	nvkm_wr32(device, 0x409c14, 0x00003e3e);
+	nvkm_wr32(device, 0x409c24, 0x000f0001);
+	nvkm_wr32(device, 0x404000, 0xc0000000);
+	nvkm_wr32(device, 0x404600, 0xc0000000);
+	nvkm_wr32(device, 0x408030, 0xc0000000);
+	nvkm_wr32(device, 0x404490, 0xc0000000);
+	nvkm_wr32(device, 0x406018, 0xc0000000);
+	nvkm_wr32(device, 0x407020, 0x40000000);
+	nvkm_wr32(device, 0x405840, 0xc0000000);
+	nvkm_wr32(device, 0x405844, 0x00ffffff);
+	nvkm_mask(device, 0x419cc0, 0x00000008, 0x00000008);
+	nvkm_mask(device, 0x419eb4, 0x00001000, 0x00001000);
 
 	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
-		nv_wr32(gr, GPC_UNIT(gpc, 0x3038), 0xc0000000);
-		nv_wr32(gr, GPC_UNIT(gpc, 0x0420), 0xc0000000);
-		nv_wr32(gr, GPC_UNIT(gpc, 0x0900), 0xc0000000);
-		nv_wr32(gr, GPC_UNIT(gpc, 0x1028), 0xc0000000);
-		nv_wr32(gr, GPC_UNIT(gpc, 0x0824), 0xc0000000);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x3038), 0xc0000000);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x0420), 0xc0000000);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x0900), 0xc0000000);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x1028), 0xc0000000);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x0824), 0xc0000000);
 		for (tpc = 0; tpc < gr->tpc_nr[gpc]; tpc++) {
-			nv_wr32(gr, TPC_UNIT(gpc, tpc, 0x508), 0xffffffff);
-			nv_wr32(gr, TPC_UNIT(gpc, tpc, 0x50c), 0xffffffff);
-			nv_wr32(gr, TPC_UNIT(gpc, tpc, 0x224), 0xc0000000);
-			nv_wr32(gr, TPC_UNIT(gpc, tpc, 0x48c), 0xc0000000);
-			nv_wr32(gr, TPC_UNIT(gpc, tpc, 0x084), 0xc0000000);
-			nv_wr32(gr, TPC_UNIT(gpc, tpc, 0x644), 0x001ffffe);
-			nv_wr32(gr, TPC_UNIT(gpc, tpc, 0x64c), 0x0000000f);
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x508), 0xffffffff);
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x50c), 0xffffffff);
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x224), 0xc0000000);
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x48c), 0xc0000000);
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x084), 0xc0000000);
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x644), 0x001ffffe);
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x64c), 0x0000000f);
 		}
-		nv_wr32(gr, GPC_UNIT(gpc, 0x2c90), 0xffffffff);
-		nv_wr32(gr, GPC_UNIT(gpc, 0x2c94), 0xffffffff);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x2c90), 0xffffffff);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x2c94), 0xffffffff);
 	}
 
 	for (rop = 0; rop < gr->rop_nr; rop++) {
-		nv_wr32(gr, ROP_UNIT(rop, 0x144), 0xc0000000);
-		nv_wr32(gr, ROP_UNIT(rop, 0x070), 0xc0000000);
-		nv_wr32(gr, ROP_UNIT(rop, 0x204), 0xffffffff);
-		nv_wr32(gr, ROP_UNIT(rop, 0x208), 0xffffffff);
+		nvkm_wr32(device, ROP_UNIT(rop, 0x144), 0xc0000000);
+		nvkm_wr32(device, ROP_UNIT(rop, 0x070), 0xc0000000);
+		nvkm_wr32(device, ROP_UNIT(rop, 0x204), 0xffffffff);
+		nvkm_wr32(device, ROP_UNIT(rop, 0x208), 0xffffffff);
 	}
 
-	nv_wr32(gr, 0x400108, 0xffffffff);
-	nv_wr32(gr, 0x400138, 0xffffffff);
-	nv_wr32(gr, 0x400118, 0xffffffff);
-	nv_wr32(gr, 0x400130, 0xffffffff);
-	nv_wr32(gr, 0x40011c, 0xffffffff);
-	nv_wr32(gr, 0x400134, 0xffffffff);
+	nvkm_wr32(device, 0x400108, 0xffffffff);
+	nvkm_wr32(device, 0x400138, 0xffffffff);
+	nvkm_wr32(device, 0x400118, 0xffffffff);
+	nvkm_wr32(device, 0x400130, 0xffffffff);
+	nvkm_wr32(device, 0x40011c, 0xffffffff);
+	nvkm_wr32(device, 0x400134, 0xffffffff);
 
-	nv_wr32(gr, 0x400054, 0x34ce3464);
+	nvkm_wr32(device, 0x400054, 0x34ce3464);
 
 	gf100_gr_zbc_init(gr);
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c
index 9816303..12b34c7 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c
@@ -236,8 +236,9 @@
 static void
 gk20a_gr_set_hww_esr_report_mask(struct gf100_gr *gr)
 {
-	nv_wr32(gr, 0x419e44, 0x1ffffe);
-	nv_wr32(gr, 0x419e4c, 0x7f);
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	nvkm_wr32(device, 0x419e44, 0x1ffffe);
+	nvkm_wr32(device, 0x419e4c, 0x7f);
 }
 
 int
@@ -245,6 +246,7 @@
 {
 	struct gk20a_gr_oclass *oclass = (void *)object->oclass;
 	struct gf100_gr *gr = (void *)object;
+	struct nvkm_device *device = gr->base.engine.subdev.device;
 	const u32 magicgpc918 = DIV_ROUND_UP(0x00800000, gr->tpc_total);
 	u32 data[TPC_MAX / 8] = {};
 	u8  tpcnr[GPC_MAX];
@@ -256,7 +258,7 @@
 		return ret;
 
 	/* Clear SCC RAM */
-	nv_wr32(gr, 0x40802c, 0x1);
+	nvkm_wr32(device, 0x40802c, 0x1);
 
 	gf100_gr_mmio(gr, gr->fuc_sw_nonctx);
 
@@ -269,14 +271,14 @@
 		return ret;
 
 	/* MMU debug buffer */
-	nv_wr32(gr, 0x100cc8, gr->unk4188b4->addr >> 8);
-	nv_wr32(gr, 0x100ccc, gr->unk4188b8->addr >> 8);
+	nvkm_wr32(device, 0x100cc8, gr->unk4188b4->addr >> 8);
+	nvkm_wr32(device, 0x100ccc, gr->unk4188b8->addr >> 8);
 
 	if (oclass->init_gpc_mmu)
 		oclass->init_gpc_mmu(gr);
 
 	/* Set the PE as stream master */
-	nv_mask(gr, 0x503018, 0x1, 0x1);
+	nvkm_mask(device, 0x503018, 0x1, 0x1);
 
 	/* Zcull init */
 	memset(data, 0x00, sizeof(data));
@@ -290,49 +292,49 @@
 		data[i / 8] |= tpc << ((i % 8) * 4);
 	}
 
-	nv_wr32(gr, GPC_BCAST(0x0980), data[0]);
-	nv_wr32(gr, GPC_BCAST(0x0984), data[1]);
-	nv_wr32(gr, GPC_BCAST(0x0988), data[2]);
-	nv_wr32(gr, GPC_BCAST(0x098c), data[3]);
+	nvkm_wr32(device, GPC_BCAST(0x0980), data[0]);
+	nvkm_wr32(device, GPC_BCAST(0x0984), data[1]);
+	nvkm_wr32(device, GPC_BCAST(0x0988), data[2]);
+	nvkm_wr32(device, GPC_BCAST(0x098c), data[3]);
 
 	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
-		nv_wr32(gr, GPC_UNIT(gpc, 0x0914),
-			gr->magic_not_rop_nr << 8 | gr->tpc_nr[gpc]);
-		nv_wr32(gr, GPC_UNIT(gpc, 0x0910), 0x00040000 |
-			gr->tpc_total);
-		nv_wr32(gr, GPC_UNIT(gpc, 0x0918), magicgpc918);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x0914),
+			  gr->magic_not_rop_nr << 8 | gr->tpc_nr[gpc]);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x0910), 0x00040000 |
+			  gr->tpc_total);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x0918), magicgpc918);
 	}
 
-	nv_wr32(gr, GPC_BCAST(0x3fd4), magicgpc918);
+	nvkm_wr32(device, GPC_BCAST(0x3fd4), magicgpc918);
 
 	/* Enable FIFO access */
-	nv_wr32(gr, 0x400500, 0x00010001);
+	nvkm_wr32(device, 0x400500, 0x00010001);
 
 	/* Enable interrupts */
-	nv_wr32(gr, 0x400100, 0xffffffff);
-	nv_wr32(gr, 0x40013c, 0xffffffff);
+	nvkm_wr32(device, 0x400100, 0xffffffff);
+	nvkm_wr32(device, 0x40013c, 0xffffffff);
 
 	/* Enable FECS error interrupts */
-	nv_wr32(gr, 0x409c24, 0x000f0000);
+	nvkm_wr32(device, 0x409c24, 0x000f0000);
 
 	/* Enable hardware warning exceptions */
-	nv_wr32(gr, 0x404000, 0xc0000000);
-	nv_wr32(gr, 0x404600, 0xc0000000);
+	nvkm_wr32(device, 0x404000, 0xc0000000);
+	nvkm_wr32(device, 0x404600, 0xc0000000);
 
 	if (oclass->set_hww_esr_report_mask)
 		oclass->set_hww_esr_report_mask(gr);
 
 	/* Enable TPC exceptions per GPC */
-	nv_wr32(gr, 0x419d0c, 0x2);
-	nv_wr32(gr, 0x41ac94, (((1 << gr->tpc_total) - 1) & 0xff) << 16);
+	nvkm_wr32(device, 0x419d0c, 0x2);
+	nvkm_wr32(device, 0x41ac94, (((1 << gr->tpc_total) - 1) & 0xff) << 16);
 
 	/* Reset and enable all exceptions */
-	nv_wr32(gr, 0x400108, 0xffffffff);
-	nv_wr32(gr, 0x400138, 0xffffffff);
-	nv_wr32(gr, 0x400118, 0xffffffff);
-	nv_wr32(gr, 0x400130, 0xffffffff);
-	nv_wr32(gr, 0x40011c, 0xffffffff);
-	nv_wr32(gr, 0x400134, 0xffffffff);
+	nvkm_wr32(device, 0x400108, 0xffffffff);
+	nvkm_wr32(device, 0x400138, 0xffffffff);
+	nvkm_wr32(device, 0x400118, 0xffffffff);
+	nvkm_wr32(device, 0x400130, 0xffffffff);
+	nvkm_wr32(device, 0x40011c, 0xffffffff);
+	nvkm_wr32(device, 0x400134, 0xffffffff);
 
 	gf100_gr_zbc_init(gr);
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm107.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm107.c
index 5e9560f..1e451a9 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm107.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm107.c
@@ -304,7 +304,8 @@
 		{ 0x419af0, 0x419af4 },
 		{ 0x419af8, 0x419afc },
 	};
-	struct nvkm_bios *bios = nvkm_bios(gr);
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	struct nvkm_bios *bios = device->bios;
 	struct nvbios_P0260E infoE;
 	struct nvbios_P0260X infoX;
 	int E = -1, X;
@@ -312,9 +313,9 @@
 
 	while (nvbios_P0260Ep(bios, ++E, &ver, &hdr, &infoE)) {
 		if (X = -1, E < ARRAY_SIZE(regs)) {
-			nv_wr32(gr, regs[E].ctrl, infoE.data);
+			nvkm_wr32(device, regs[E].ctrl, infoE.data);
 			while (nvbios_P0260Xp(bios, ++X, &ver, &hdr, &infoX))
-				nv_wr32(gr, regs[E].data, infoX.data);
+				nvkm_wr32(device, regs[E].data, infoX.data);
 		}
 	}
 }
@@ -324,6 +325,7 @@
 {
 	struct gf100_gr_oclass *oclass = (void *)object->oclass;
 	struct gf100_gr *gr = (void *)object;
+	struct nvkm_device *device = gr->base.engine.subdev.device;
 	const u32 magicgpc918 = DIV_ROUND_UP(0x00800000, gr->tpc_total);
 	u32 data[TPC_MAX / 8] = {};
 	u8  tpcnr[GPC_MAX];
@@ -334,17 +336,17 @@
 	if (ret)
 		return ret;
 
-	nv_wr32(gr, GPC_BCAST(0x0880), 0x00000000);
-	nv_wr32(gr, GPC_BCAST(0x0890), 0x00000000);
-	nv_wr32(gr, GPC_BCAST(0x0894), 0x00000000);
-	nv_wr32(gr, GPC_BCAST(0x08b4), gr->unk4188b4->addr >> 8);
-	nv_wr32(gr, GPC_BCAST(0x08b8), gr->unk4188b8->addr >> 8);
+	nvkm_wr32(device, GPC_BCAST(0x0880), 0x00000000);
+	nvkm_wr32(device, GPC_BCAST(0x0890), 0x00000000);
+	nvkm_wr32(device, GPC_BCAST(0x0894), 0x00000000);
+	nvkm_wr32(device, GPC_BCAST(0x08b4), gr->unk4188b4->addr >> 8);
+	nvkm_wr32(device, GPC_BCAST(0x08b8), gr->unk4188b8->addr >> 8);
 
 	gf100_gr_mmio(gr, oclass->mmio);
 
 	gm107_gr_init_bios(gr);
 
-	nv_wr32(gr, GPC_UNIT(0, 0x3018), 0x00000001);
+	nvkm_wr32(device, GPC_UNIT(0, 0x3018), 0x00000001);
 
 	memset(data, 0x00, sizeof(data));
 	memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));
@@ -357,75 +359,75 @@
 		data[i / 8] |= tpc << ((i % 8) * 4);
 	}
 
-	nv_wr32(gr, GPC_BCAST(0x0980), data[0]);
-	nv_wr32(gr, GPC_BCAST(0x0984), data[1]);
-	nv_wr32(gr, GPC_BCAST(0x0988), data[2]);
-	nv_wr32(gr, GPC_BCAST(0x098c), data[3]);
+	nvkm_wr32(device, GPC_BCAST(0x0980), data[0]);
+	nvkm_wr32(device, GPC_BCAST(0x0984), data[1]);
+	nvkm_wr32(device, GPC_BCAST(0x0988), data[2]);
+	nvkm_wr32(device, GPC_BCAST(0x098c), data[3]);
 
 	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
-		nv_wr32(gr, GPC_UNIT(gpc, 0x0914),
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x0914),
 			gr->magic_not_rop_nr << 8 | gr->tpc_nr[gpc]);
-		nv_wr32(gr, GPC_UNIT(gpc, 0x0910), 0x00040000 |
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x0910), 0x00040000 |
 			gr->tpc_total);
-		nv_wr32(gr, GPC_UNIT(gpc, 0x0918), magicgpc918);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x0918), magicgpc918);
 	}
 
-	nv_wr32(gr, GPC_BCAST(0x3fd4), magicgpc918);
-	nv_wr32(gr, GPC_BCAST(0x08ac), nv_rd32(gr, 0x100800));
+	nvkm_wr32(device, GPC_BCAST(0x3fd4), magicgpc918);
+	nvkm_wr32(device, GPC_BCAST(0x08ac), nvkm_rd32(device, 0x100800));
 
-	nv_wr32(gr, 0x400500, 0x00010001);
+	nvkm_wr32(device, 0x400500, 0x00010001);
 
-	nv_wr32(gr, 0x400100, 0xffffffff);
-	nv_wr32(gr, 0x40013c, 0xffffffff);
-	nv_wr32(gr, 0x400124, 0x00000002);
-	nv_wr32(gr, 0x409c24, 0x000e0000);
+	nvkm_wr32(device, 0x400100, 0xffffffff);
+	nvkm_wr32(device, 0x40013c, 0xffffffff);
+	nvkm_wr32(device, 0x400124, 0x00000002);
+	nvkm_wr32(device, 0x409c24, 0x000e0000);
 
-	nv_wr32(gr, 0x404000, 0xc0000000);
-	nv_wr32(gr, 0x404600, 0xc0000000);
-	nv_wr32(gr, 0x408030, 0xc0000000);
-	nv_wr32(gr, 0x404490, 0xc0000000);
-	nv_wr32(gr, 0x406018, 0xc0000000);
-	nv_wr32(gr, 0x407020, 0x40000000);
-	nv_wr32(gr, 0x405840, 0xc0000000);
-	nv_wr32(gr, 0x405844, 0x00ffffff);
-	nv_mask(gr, 0x419cc0, 0x00000008, 0x00000008);
+	nvkm_wr32(device, 0x404000, 0xc0000000);
+	nvkm_wr32(device, 0x404600, 0xc0000000);
+	nvkm_wr32(device, 0x408030, 0xc0000000);
+	nvkm_wr32(device, 0x404490, 0xc0000000);
+	nvkm_wr32(device, 0x406018, 0xc0000000);
+	nvkm_wr32(device, 0x407020, 0x40000000);
+	nvkm_wr32(device, 0x405840, 0xc0000000);
+	nvkm_wr32(device, 0x405844, 0x00ffffff);
+	nvkm_mask(device, 0x419cc0, 0x00000008, 0x00000008);
 
 	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
 		for (ppc = 0; ppc < 2 /* gr->ppc_nr[gpc] */; ppc++)
-			nv_wr32(gr, PPC_UNIT(gpc, ppc, 0x038), 0xc0000000);
-		nv_wr32(gr, GPC_UNIT(gpc, 0x0420), 0xc0000000);
-		nv_wr32(gr, GPC_UNIT(gpc, 0x0900), 0xc0000000);
-		nv_wr32(gr, GPC_UNIT(gpc, 0x1028), 0xc0000000);
-		nv_wr32(gr, GPC_UNIT(gpc, 0x0824), 0xc0000000);
+			nvkm_wr32(device, PPC_UNIT(gpc, ppc, 0x038), 0xc0000000);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x0420), 0xc0000000);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x0900), 0xc0000000);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x1028), 0xc0000000);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x0824), 0xc0000000);
 		for (tpc = 0; tpc < gr->tpc_nr[gpc]; tpc++) {
-			nv_wr32(gr, TPC_UNIT(gpc, tpc, 0x508), 0xffffffff);
-			nv_wr32(gr, TPC_UNIT(gpc, tpc, 0x50c), 0xffffffff);
-			nv_wr32(gr, TPC_UNIT(gpc, tpc, 0x224), 0xc0000000);
-			nv_wr32(gr, TPC_UNIT(gpc, tpc, 0x48c), 0xc0000000);
-			nv_wr32(gr, TPC_UNIT(gpc, tpc, 0x084), 0xc0000000);
-			nv_wr32(gr, TPC_UNIT(gpc, tpc, 0x430), 0xc0000000);
-			nv_wr32(gr, TPC_UNIT(gpc, tpc, 0x644), 0x00dffffe);
-			nv_wr32(gr, TPC_UNIT(gpc, tpc, 0x64c), 0x00000005);
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x508), 0xffffffff);
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x50c), 0xffffffff);
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x224), 0xc0000000);
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x48c), 0xc0000000);
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x084), 0xc0000000);
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x430), 0xc0000000);
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x644), 0x00dffffe);
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x64c), 0x00000005);
 		}
-		nv_wr32(gr, GPC_UNIT(gpc, 0x2c90), 0xffffffff);
-		nv_wr32(gr, GPC_UNIT(gpc, 0x2c94), 0xffffffff);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x2c90), 0xffffffff);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x2c94), 0xffffffff);
 	}
 
 	for (rop = 0; rop < gr->rop_nr; rop++) {
-		nv_wr32(gr, ROP_UNIT(rop, 0x144), 0x40000000);
-		nv_wr32(gr, ROP_UNIT(rop, 0x070), 0x40000000);
-		nv_wr32(gr, ROP_UNIT(rop, 0x204), 0xffffffff);
-		nv_wr32(gr, ROP_UNIT(rop, 0x208), 0xffffffff);
+		nvkm_wr32(device, ROP_UNIT(rop, 0x144), 0x40000000);
+		nvkm_wr32(device, ROP_UNIT(rop, 0x070), 0x40000000);
+		nvkm_wr32(device, ROP_UNIT(rop, 0x204), 0xffffffff);
+		nvkm_wr32(device, ROP_UNIT(rop, 0x208), 0xffffffff);
 	}
 
-	nv_wr32(gr, 0x400108, 0xffffffff);
-	nv_wr32(gr, 0x400138, 0xffffffff);
-	nv_wr32(gr, 0x400118, 0xffffffff);
-	nv_wr32(gr, 0x400130, 0xffffffff);
-	nv_wr32(gr, 0x40011c, 0xffffffff);
-	nv_wr32(gr, 0x400134, 0xffffffff);
+	nvkm_wr32(device, 0x400108, 0xffffffff);
+	nvkm_wr32(device, 0x400138, 0xffffffff);
+	nvkm_wr32(device, 0x400118, 0xffffffff);
+	nvkm_wr32(device, 0x400130, 0xffffffff);
+	nvkm_wr32(device, 0x40011c, 0xffffffff);
+	nvkm_wr32(device, 0x400134, 0xffffffff);
 
-	nv_wr32(gr, 0x400054, 0x2c350f63);
+	nvkm_wr32(device, 0x400054, 0x2c350f63);
 
 	gf100_gr_zbc_init(gr);
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm204.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm204.c
index 4cc60ed..c3d2343 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm204.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm204.c
@@ -253,6 +253,7 @@
 {
 	struct gf100_gr_oclass *oclass = (void *)object->oclass;
 	struct gf100_gr *gr = (void *)object;
+	struct nvkm_device *device = gr->base.engine.subdev.device;
 	const u32 magicgpc918 = DIV_ROUND_UP(0x00800000, gr->tpc_total);
 	u32 data[TPC_MAX / 8] = {};
 	u8  tpcnr[GPC_MAX];
@@ -264,24 +265,24 @@
 	if (ret)
 		return ret;
 
-	tmp = nv_rd32(gr, 0x100c80); /*XXX: mask? */
-	nv_wr32(gr, 0x418880, 0x00001000 | (tmp & 0x00000fff));
-	nv_wr32(gr, 0x418890, 0x00000000);
-	nv_wr32(gr, 0x418894, 0x00000000);
-	nv_wr32(gr, 0x4188b4, gr->unk4188b4->addr >> 8);
-	nv_wr32(gr, 0x4188b8, gr->unk4188b8->addr >> 8);
-	nv_mask(gr, 0x4188b0, 0x00040000, 0x00040000);
+	tmp = nvkm_rd32(device, 0x100c80); /*XXX: mask? */
+	nvkm_wr32(device, 0x418880, 0x00001000 | (tmp & 0x00000fff));
+	nvkm_wr32(device, 0x418890, 0x00000000);
+	nvkm_wr32(device, 0x418894, 0x00000000);
+	nvkm_wr32(device, 0x4188b4, gr->unk4188b4->addr >> 8);
+	nvkm_wr32(device, 0x4188b8, gr->unk4188b8->addr >> 8);
+	nvkm_mask(device, 0x4188b0, 0x00040000, 0x00040000);
 
 	/*XXX: belongs in fb */
-	nv_wr32(gr, 0x100cc8, gr->unk4188b4->addr >> 8);
-	nv_wr32(gr, 0x100ccc, gr->unk4188b8->addr >> 8);
-	nv_mask(gr, 0x100cc4, 0x00040000, 0x00040000);
+	nvkm_wr32(device, 0x100cc8, gr->unk4188b4->addr >> 8);
+	nvkm_wr32(device, 0x100ccc, gr->unk4188b8->addr >> 8);
+	nvkm_mask(device, 0x100cc4, 0x00040000, 0x00040000);
 
 	gf100_gr_mmio(gr, oclass->mmio);
 
 	gm107_gr_init_bios(gr);
 
-	nv_wr32(gr, GPC_UNIT(0, 0x3018), 0x00000001);
+	nvkm_wr32(device, GPC_UNIT(0, 0x3018), 0x00000001);
 
 	memset(data, 0x00, sizeof(data));
 	memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));
@@ -294,76 +295,76 @@
 		data[i / 8] |= tpc << ((i % 8) * 4);
 	}
 
-	nv_wr32(gr, GPC_BCAST(0x0980), data[0]);
-	nv_wr32(gr, GPC_BCAST(0x0984), data[1]);
-	nv_wr32(gr, GPC_BCAST(0x0988), data[2]);
-	nv_wr32(gr, GPC_BCAST(0x098c), data[3]);
+	nvkm_wr32(device, GPC_BCAST(0x0980), data[0]);
+	nvkm_wr32(device, GPC_BCAST(0x0984), data[1]);
+	nvkm_wr32(device, GPC_BCAST(0x0988), data[2]);
+	nvkm_wr32(device, GPC_BCAST(0x098c), data[3]);
 
 	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
-		nv_wr32(gr, GPC_UNIT(gpc, 0x0914),
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x0914),
 			gr->magic_not_rop_nr << 8 | gr->tpc_nr[gpc]);
-		nv_wr32(gr, GPC_UNIT(gpc, 0x0910), 0x00040000 |
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x0910), 0x00040000 |
 			gr->tpc_total);
-		nv_wr32(gr, GPC_UNIT(gpc, 0x0918), magicgpc918);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x0918), magicgpc918);
 	}
 
-	nv_wr32(gr, GPC_BCAST(0x3fd4), magicgpc918);
-	nv_wr32(gr, GPC_BCAST(0x08ac), nv_rd32(gr, 0x100800));
-	nv_wr32(gr, GPC_BCAST(0x033c), nv_rd32(gr, 0x100804));
+	nvkm_wr32(device, GPC_BCAST(0x3fd4), magicgpc918);
+	nvkm_wr32(device, GPC_BCAST(0x08ac), nvkm_rd32(device, 0x100800));
+	nvkm_wr32(device, GPC_BCAST(0x033c), nvkm_rd32(device, 0x100804));
 
-	nv_wr32(gr, 0x400500, 0x00010001);
-	nv_wr32(gr, 0x400100, 0xffffffff);
-	nv_wr32(gr, 0x40013c, 0xffffffff);
-	nv_wr32(gr, 0x400124, 0x00000002);
-	nv_wr32(gr, 0x409c24, 0x000e0000);
-	nv_wr32(gr, 0x405848, 0xc0000000);
-	nv_wr32(gr, 0x40584c, 0x00000001);
-	nv_wr32(gr, 0x404000, 0xc0000000);
-	nv_wr32(gr, 0x404600, 0xc0000000);
-	nv_wr32(gr, 0x408030, 0xc0000000);
-	nv_wr32(gr, 0x404490, 0xc0000000);
-	nv_wr32(gr, 0x406018, 0xc0000000);
-	nv_wr32(gr, 0x407020, 0x40000000);
-	nv_wr32(gr, 0x405840, 0xc0000000);
-	nv_wr32(gr, 0x405844, 0x00ffffff);
-	nv_mask(gr, 0x419cc0, 0x00000008, 0x00000008);
+	nvkm_wr32(device, 0x400500, 0x00010001);
+	nvkm_wr32(device, 0x400100, 0xffffffff);
+	nvkm_wr32(device, 0x40013c, 0xffffffff);
+	nvkm_wr32(device, 0x400124, 0x00000002);
+	nvkm_wr32(device, 0x409c24, 0x000e0000);
+	nvkm_wr32(device, 0x405848, 0xc0000000);
+	nvkm_wr32(device, 0x40584c, 0x00000001);
+	nvkm_wr32(device, 0x404000, 0xc0000000);
+	nvkm_wr32(device, 0x404600, 0xc0000000);
+	nvkm_wr32(device, 0x408030, 0xc0000000);
+	nvkm_wr32(device, 0x404490, 0xc0000000);
+	nvkm_wr32(device, 0x406018, 0xc0000000);
+	nvkm_wr32(device, 0x407020, 0x40000000);
+	nvkm_wr32(device, 0x405840, 0xc0000000);
+	nvkm_wr32(device, 0x405844, 0x00ffffff);
+	nvkm_mask(device, 0x419cc0, 0x00000008, 0x00000008);
 
 	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
 		for (ppc = 0; ppc < gr->ppc_nr[gpc]; ppc++)
-			nv_wr32(gr, PPC_UNIT(gpc, ppc, 0x038), 0xc0000000);
-		nv_wr32(gr, GPC_UNIT(gpc, 0x0420), 0xc0000000);
-		nv_wr32(gr, GPC_UNIT(gpc, 0x0900), 0xc0000000);
-		nv_wr32(gr, GPC_UNIT(gpc, 0x1028), 0xc0000000);
-		nv_wr32(gr, GPC_UNIT(gpc, 0x0824), 0xc0000000);
+			nvkm_wr32(device, PPC_UNIT(gpc, ppc, 0x038), 0xc0000000);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x0420), 0xc0000000);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x0900), 0xc0000000);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x1028), 0xc0000000);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x0824), 0xc0000000);
 		for (tpc = 0; tpc < gr->tpc_nr[gpc]; tpc++) {
-			nv_wr32(gr, TPC_UNIT(gpc, tpc, 0x508), 0xffffffff);
-			nv_wr32(gr, TPC_UNIT(gpc, tpc, 0x50c), 0xffffffff);
-			nv_wr32(gr, TPC_UNIT(gpc, tpc, 0x224), 0xc0000000);
-			nv_wr32(gr, TPC_UNIT(gpc, tpc, 0x48c), 0xc0000000);
-			nv_wr32(gr, TPC_UNIT(gpc, tpc, 0x084), 0xc0000000);
-			nv_wr32(gr, TPC_UNIT(gpc, tpc, 0x430), 0xc0000000);
-			nv_wr32(gr, TPC_UNIT(gpc, tpc, 0x644), 0x00dffffe);
-			nv_wr32(gr, TPC_UNIT(gpc, tpc, 0x64c), 0x00000005);
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x508), 0xffffffff);
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x50c), 0xffffffff);
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x224), 0xc0000000);
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x48c), 0xc0000000);
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x084), 0xc0000000);
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x430), 0xc0000000);
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x644), 0x00dffffe);
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x64c), 0x00000005);
 		}
-		nv_wr32(gr, GPC_UNIT(gpc, 0x2c90), 0xffffffff);
-		nv_wr32(gr, GPC_UNIT(gpc, 0x2c94), 0xffffffff);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x2c90), 0xffffffff);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x2c94), 0xffffffff);
 	}
 
 	for (rop = 0; rop < gr->rop_nr; rop++) {
-		nv_wr32(gr, ROP_UNIT(rop, 0x144), 0x40000000);
-		nv_wr32(gr, ROP_UNIT(rop, 0x070), 0x40000000);
-		nv_wr32(gr, ROP_UNIT(rop, 0x204), 0xffffffff);
-		nv_wr32(gr, ROP_UNIT(rop, 0x208), 0xffffffff);
+		nvkm_wr32(device, ROP_UNIT(rop, 0x144), 0x40000000);
+		nvkm_wr32(device, ROP_UNIT(rop, 0x070), 0x40000000);
+		nvkm_wr32(device, ROP_UNIT(rop, 0x204), 0xffffffff);
+		nvkm_wr32(device, ROP_UNIT(rop, 0x208), 0xffffffff);
 	}
 
-	nv_wr32(gr, 0x400108, 0xffffffff);
-	nv_wr32(gr, 0x400138, 0xffffffff);
-	nv_wr32(gr, 0x400118, 0xffffffff);
-	nv_wr32(gr, 0x400130, 0xffffffff);
-	nv_wr32(gr, 0x40011c, 0xffffffff);
-	nv_wr32(gr, 0x400134, 0xffffffff);
+	nvkm_wr32(device, 0x400108, 0xffffffff);
+	nvkm_wr32(device, 0x400138, 0xffffffff);
+	nvkm_wr32(device, 0x400118, 0xffffffff);
+	nvkm_wr32(device, 0x400130, 0xffffffff);
+	nvkm_wr32(device, 0x40011c, 0xffffffff);
+	nvkm_wr32(device, 0x400134, 0xffffffff);
 
-	nv_wr32(gr, 0x400054, 0x2c350f63);
+	nvkm_wr32(device, 0x400054, 0x2c350f63);
 
 	gf100_gr_zbc_init(gr);
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm20b.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm20b.c
index 6b9c84f..719ebfb 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm20b.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm20b.c
@@ -37,32 +37,34 @@
 static void
 gm20b_gr_init_gpc_mmu(struct gf100_gr *gr)
 {
+	struct nvkm_device *device = gr->base.engine.subdev.device;
 	u32 val;
 
 	/* TODO this needs to be removed once secure boot works */
 	if (1) {
-		nv_wr32(gr, 0x100ce4, 0xffffffff);
+		nvkm_wr32(device, 0x100ce4, 0xffffffff);
 	}
 
 	/* TODO update once secure boot works */
-	val = nv_rd32(gr, 0x100c80);
+	val = nvkm_rd32(device, 0x100c80);
 	val &= 0xf000087f;
-	nv_wr32(gr, 0x418880, val);
-	nv_wr32(gr, 0x418890, 0);
-	nv_wr32(gr, 0x418894, 0);
+	nvkm_wr32(device, 0x418880, val);
+	nvkm_wr32(device, 0x418890, 0);
+	nvkm_wr32(device, 0x418894, 0);
 
-	nv_wr32(gr, 0x4188b0, nv_rd32(gr, 0x100cc4));
-	nv_wr32(gr, 0x4188b4, nv_rd32(gr, 0x100cc8));
-	nv_wr32(gr, 0x4188b8, nv_rd32(gr, 0x100ccc));
+	nvkm_wr32(device, 0x4188b0, nvkm_rd32(device, 0x100cc4));
+	nvkm_wr32(device, 0x4188b4, nvkm_rd32(device, 0x100cc8));
+	nvkm_wr32(device, 0x4188b8, nvkm_rd32(device, 0x100ccc));
 
-	nv_wr32(gr, 0x4188ac, nv_rd32(gr, 0x100800));
+	nvkm_wr32(device, 0x4188ac, nvkm_rd32(device, 0x100800));
 }
 
 static void
 gm20b_gr_set_hww_esr_report_mask(struct gf100_gr *gr)
 {
-	nv_wr32(gr, 0x419e44, 0xdffffe);
-	nv_wr32(gr, 0x419e4c, 0x5);
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	nvkm_wr32(device, 0x419e44, 0xdffffe);
+	nvkm_wr32(device, 0x419e4c, 0x5);
 }
 
 struct nvkm_oclass *
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv04.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv04.c
index e161abe..d1792ef 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv04.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv04.c
@@ -446,7 +446,8 @@
 nv04_gr_set_ctx1(struct nvkm_object *object, u32 mask, u32 value)
 {
 	struct nv04_gr *gr = (void *)object->engine;
-	int subc = (nv_rd32(gr, NV04_PGRAPH_TRAPPED_ADDR) >> 13) & 0x7;
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	int subc = (nvkm_rd32(device, NV04_PGRAPH_TRAPPED_ADDR) >> 13) & 0x7;
 	u32 tmp;
 
 	tmp  = nv_ro32(object, 0x00);
@@ -454,8 +455,8 @@
 	tmp |= value;
 	nv_wo32(object, 0x00, tmp);
 
-	nv_wr32(gr, NV04_PGRAPH_CTX_SWITCH1, tmp);
-	nv_wr32(gr, NV04_PGRAPH_CTX_CACHE1 + (subc<<2), tmp);
+	nvkm_wr32(device, NV04_PGRAPH_CTX_SWITCH1, tmp);
+	nvkm_wr32(device, NV04_PGRAPH_CTX_CACHE1 + (subc<<2), tmp);
 }
 
 static void
@@ -528,6 +529,7 @@
 			   void *args, u32 size)
 {
 	struct nv04_gr *gr = (void *)object->engine;
+	struct nvkm_device *device = gr->base.engine.subdev.device;
 	u32 data = *(u32 *)args;
 	u32 min = data & 0xffff, max;
 	u32 w = data >> 16;
@@ -539,8 +541,8 @@
 		w |= 0xffff0000;
 	max = min + w;
 	max &= 0x3ffff;
-	nv_wr32(gr, 0x40053c, min);
-	nv_wr32(gr, 0x400544, max);
+	nvkm_wr32(device, 0x40053c, min);
+	nvkm_wr32(device, 0x400544, max);
 	return 0;
 }
 
@@ -549,6 +551,7 @@
 			   void *args, u32 size)
 {
 	struct nv04_gr *gr = (void *)object->engine;
+	struct nvkm_device *device = gr->base.engine.subdev.device;
 	u32 data = *(u32 *)args;
 	u32 min = data & 0xffff, max;
 	u32 w = data >> 16;
@@ -560,8 +563,8 @@
 		w |= 0xffff0000;
 	max = min + w;
 	max &= 0x3ffff;
-	nv_wr32(gr, 0x400540, min);
-	nv_wr32(gr, 0x400548, max);
+	nvkm_wr32(device, 0x400540, min);
+	nvkm_wr32(device, 0x400548, max);
 	return 0;
 }
 
@@ -1033,9 +1036,10 @@
 static struct nv04_gr_chan *
 nv04_gr_channel(struct nv04_gr *gr)
 {
+	struct nvkm_device *device = gr->base.engine.subdev.device;
 	struct nv04_gr_chan *chan = NULL;
-	if (nv_rd32(gr, NV04_PGRAPH_CTX_CONTROL) & 0x00010000) {
-		int chid = nv_rd32(gr, NV04_PGRAPH_CTX_USER) >> 24;
+	if (nvkm_rd32(device, NV04_PGRAPH_CTX_CONTROL) & 0x00010000) {
+		int chid = nvkm_rd32(device, NV04_PGRAPH_CTX_USER) >> 24;
 		if (chid < ARRAY_SIZE(gr->chan))
 			chan = gr->chan[chid];
 	}
@@ -1046,14 +1050,15 @@
 nv04_gr_load_context(struct nv04_gr_chan *chan, int chid)
 {
 	struct nv04_gr *gr = nv04_gr(chan);
+	struct nvkm_device *device = gr->base.engine.subdev.device;
 	int i;
 
 	for (i = 0; i < ARRAY_SIZE(nv04_gr_ctx_regs); i++)
-		nv_wr32(gr, nv04_gr_ctx_regs[i], chan->nv04[i]);
+		nvkm_wr32(device, nv04_gr_ctx_regs[i], chan->nv04[i]);
 
-	nv_wr32(gr, NV04_PGRAPH_CTX_CONTROL, 0x10010100);
-	nv_mask(gr, NV04_PGRAPH_CTX_USER, 0xff000000, chid << 24);
-	nv_mask(gr, NV04_PGRAPH_FFINTFC_ST2, 0xfff00000, 0x00000000);
+	nvkm_wr32(device, NV04_PGRAPH_CTX_CONTROL, 0x10010100);
+	nvkm_mask(device, NV04_PGRAPH_CTX_USER, 0xff000000, chid << 24);
+	nvkm_mask(device, NV04_PGRAPH_FFINTFC_ST2, 0xfff00000, 0x00000000);
 	return 0;
 }
 
@@ -1061,19 +1066,21 @@
 nv04_gr_unload_context(struct nv04_gr_chan *chan)
 {
 	struct nv04_gr *gr = nv04_gr(chan);
+	struct nvkm_device *device = gr->base.engine.subdev.device;
 	int i;
 
 	for (i = 0; i < ARRAY_SIZE(nv04_gr_ctx_regs); i++)
-		chan->nv04[i] = nv_rd32(gr, nv04_gr_ctx_regs[i]);
+		chan->nv04[i] = nvkm_rd32(device, nv04_gr_ctx_regs[i]);
 
-	nv_wr32(gr, NV04_PGRAPH_CTX_CONTROL, 0x10000000);
-	nv_mask(gr, NV04_PGRAPH_CTX_USER, 0xff000000, 0x0f000000);
+	nvkm_wr32(device, NV04_PGRAPH_CTX_CONTROL, 0x10000000);
+	nvkm_mask(device, NV04_PGRAPH_CTX_USER, 0xff000000, 0x0f000000);
 	return 0;
 }
 
 static void
 nv04_gr_context_switch(struct nv04_gr *gr)
 {
+	struct nvkm_device *device = gr->base.engine.subdev.device;
 	struct nv04_gr_chan *prev = NULL;
 	struct nv04_gr_chan *next = NULL;
 	unsigned long flags;
@@ -1088,7 +1095,7 @@
 		nv04_gr_unload_context(prev);
 
 	/* load context for next channel */
-	chid = (nv_rd32(gr, NV04_PGRAPH_TRAPPED_ADDR) >> 24) & 0x0f;
+	chid = (nvkm_rd32(device, NV04_PGRAPH_TRAPPED_ADDR) >> 24) & 0x0f;
 	next = gr->chan[chid];
 	if (next)
 		nv04_gr_load_context(next, chid);
@@ -1161,13 +1168,14 @@
 {
 	struct nv04_gr *gr = (void *)object->engine;
 	struct nv04_gr_chan *chan = (void *)object;
+	struct nvkm_device *device = gr->base.engine.subdev.device;
 	unsigned long flags;
 
 	spin_lock_irqsave(&gr->lock, flags);
-	nv_mask(gr, NV04_PGRAPH_FIFO, 0x00000001, 0x00000000);
+	nvkm_mask(device, NV04_PGRAPH_FIFO, 0x00000001, 0x00000000);
 	if (nv04_gr_channel(gr) == chan)
 		nv04_gr_unload_context(chan);
-	nv_mask(gr, NV04_PGRAPH_FIFO, 0x00000001, 0x00000001);
+	nvkm_mask(device, NV04_PGRAPH_FIFO, 0x00000001, 0x00000001);
 	spin_unlock_irqrestore(&gr->lock, flags);
 
 	return nvkm_object_fini(&chan->base, suspend);
@@ -1192,6 +1200,7 @@
 nv04_gr_idle(void *obj)
 {
 	struct nvkm_gr *gr = nvkm_gr(obj);
+	struct nvkm_device *device = gr->engine.subdev.device;
 	u32 mask = 0xffffffff;
 
 	if (nv_device(obj)->card_type == NV_40)
@@ -1199,7 +1208,7 @@
 
 	if (!nv_wait(gr, NV04_PGRAPH_STATUS, mask, 0)) {
 		nv_error(gr, "idle timed out with status 0x%08x\n",
-			 nv_rd32(gr, NV04_PGRAPH_STATUS));
+			 nvkm_rd32(device, NV04_PGRAPH_STATUS));
 		return false;
 	}
 
@@ -1252,16 +1261,17 @@
 	struct nv04_gr_chan *chan = NULL;
 	struct nvkm_namedb *namedb = NULL;
 	struct nvkm_handle *handle = NULL;
-	u32 stat = nv_rd32(gr, NV03_PGRAPH_INTR);
-	u32 nsource = nv_rd32(gr, NV03_PGRAPH_NSOURCE);
-	u32 nstatus = nv_rd32(gr, NV03_PGRAPH_NSTATUS);
-	u32 addr = nv_rd32(gr, NV04_PGRAPH_TRAPPED_ADDR);
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	u32 stat = nvkm_rd32(device, NV03_PGRAPH_INTR);
+	u32 nsource = nvkm_rd32(device, NV03_PGRAPH_NSOURCE);
+	u32 nstatus = nvkm_rd32(device, NV03_PGRAPH_NSTATUS);
+	u32 addr = nvkm_rd32(device, NV04_PGRAPH_TRAPPED_ADDR);
 	u32 chid = (addr & 0x0f000000) >> 24;
 	u32 subc = (addr & 0x0000e000) >> 13;
 	u32 mthd = (addr & 0x00001ffc);
-	u32 data = nv_rd32(gr, NV04_PGRAPH_TRAPPED_DATA);
-	u32 class = nv_rd32(gr, 0x400180 + subc * 4) & 0xff;
-	u32 inst = (nv_rd32(gr, 0x40016c) & 0xffff) << 4;
+	u32 data = nvkm_rd32(device, NV04_PGRAPH_TRAPPED_DATA);
+	u32 class = nvkm_rd32(device, 0x400180 + subc * 4) & 0xff;
+	u32 inst = (nvkm_rd32(device, 0x40016c) & 0xffff) << 4;
 	u32 show = stat;
 	unsigned long flags;
 
@@ -1280,14 +1290,14 @@
 	}
 
 	if (stat & NV_PGRAPH_INTR_CONTEXT_SWITCH) {
-		nv_wr32(gr, NV03_PGRAPH_INTR, NV_PGRAPH_INTR_CONTEXT_SWITCH);
+		nvkm_wr32(device, NV03_PGRAPH_INTR, NV_PGRAPH_INTR_CONTEXT_SWITCH);
 		stat &= ~NV_PGRAPH_INTR_CONTEXT_SWITCH;
 		show &= ~NV_PGRAPH_INTR_CONTEXT_SWITCH;
 		nv04_gr_context_switch(gr);
 	}
 
-	nv_wr32(gr, NV03_PGRAPH_INTR, stat);
-	nv_wr32(gr, NV04_PGRAPH_FIFO, 0x00000001);
+	nvkm_wr32(device, NV03_PGRAPH_INTR, stat);
+	nvkm_wr32(device, NV04_PGRAPH_FIFO, 0x00000001);
 
 	if (show) {
 		nv_error(gr, "%s", "");
@@ -1332,6 +1342,7 @@
 {
 	struct nvkm_engine *engine = nv_engine(object);
 	struct nv04_gr *gr = (void *)engine;
+	struct nvkm_device *device = gr->base.engine.subdev.device;
 	int ret;
 
 	ret = nvkm_gr_init(&gr->base);
@@ -1339,33 +1350,33 @@
 		return ret;
 
 	/* Enable PGRAPH interrupts */
-	nv_wr32(gr, NV03_PGRAPH_INTR, 0xFFFFFFFF);
-	nv_wr32(gr, NV03_PGRAPH_INTR_EN, 0xFFFFFFFF);
+	nvkm_wr32(device, NV03_PGRAPH_INTR, 0xFFFFFFFF);
+	nvkm_wr32(device, NV03_PGRAPH_INTR_EN, 0xFFFFFFFF);
 
-	nv_wr32(gr, NV04_PGRAPH_VALID1, 0);
-	nv_wr32(gr, NV04_PGRAPH_VALID2, 0);
-	/*nv_wr32(gr, NV04_PGRAPH_DEBUG_0, 0x000001FF);
-	nv_wr32(gr, NV04_PGRAPH_DEBUG_0, 0x001FFFFF);*/
-	nv_wr32(gr, NV04_PGRAPH_DEBUG_0, 0x1231c000);
+	nvkm_wr32(device, NV04_PGRAPH_VALID1, 0);
+	nvkm_wr32(device, NV04_PGRAPH_VALID2, 0);
+	/*nvkm_wr32(device, NV04_PGRAPH_DEBUG_0, 0x000001FF);
+	nvkm_wr32(device, NV04_PGRAPH_DEBUG_0, 0x001FFFFF);*/
+	nvkm_wr32(device, NV04_PGRAPH_DEBUG_0, 0x1231c000);
 	/*1231C000 blob, 001 haiku*/
 	/*V_WRITE(NV04_PGRAPH_DEBUG_1, 0xf2d91100);*/
-	nv_wr32(gr, NV04_PGRAPH_DEBUG_1, 0x72111100);
+	nvkm_wr32(device, NV04_PGRAPH_DEBUG_1, 0x72111100);
 	/*0x72111100 blob , 01 haiku*/
-	/*nv_wr32(gr, NV04_PGRAPH_DEBUG_2, 0x11d5f870);*/
-	nv_wr32(gr, NV04_PGRAPH_DEBUG_2, 0x11d5f071);
+	/*nvkm_wr32(device, NV04_PGRAPH_DEBUG_2, 0x11d5f870);*/
+	nvkm_wr32(device, NV04_PGRAPH_DEBUG_2, 0x11d5f071);
 	/*haiku same*/
 
-	/*nv_wr32(gr, NV04_PGRAPH_DEBUG_3, 0xfad4ff31);*/
-	nv_wr32(gr, NV04_PGRAPH_DEBUG_3, 0xf0d4ff31);
+	/*nvkm_wr32(device, NV04_PGRAPH_DEBUG_3, 0xfad4ff31);*/
+	nvkm_wr32(device, NV04_PGRAPH_DEBUG_3, 0xf0d4ff31);
 	/*haiku and blob 10d4*/
 
-	nv_wr32(gr, NV04_PGRAPH_STATE        , 0xFFFFFFFF);
-	nv_wr32(gr, NV04_PGRAPH_CTX_CONTROL  , 0x10000100);
-	nv_mask(gr, NV04_PGRAPH_CTX_USER, 0xff000000, 0x0f000000);
+	nvkm_wr32(device, NV04_PGRAPH_STATE        , 0xFFFFFFFF);
+	nvkm_wr32(device, NV04_PGRAPH_CTX_CONTROL  , 0x10000100);
+	nvkm_mask(device, NV04_PGRAPH_CTX_USER, 0xff000000, 0x0f000000);
 
 	/* These don't belong here, they're part of a per-channel context */
-	nv_wr32(gr, NV04_PGRAPH_PATTERN_SHAPE, 0x00000000);
-	nv_wr32(gr, NV04_PGRAPH_BETA_AND     , 0xFFFFFFFF);
+	nvkm_wr32(device, NV04_PGRAPH_PATTERN_SHAPE, 0x00000000);
+	nvkm_wr32(device, NV04_PGRAPH_BETA_AND     , 0xFFFFFFFF);
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv10.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv10.c
index af33514..6b3ee95 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv10.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv10.c
@@ -414,17 +414,17 @@
 #define PIPE_SAVE(gr, state, addr)					\
 	do {								\
 		int __i;						\
-		nv_wr32(gr, NV10_PGRAPH_PIPE_ADDRESS, addr);		\
+		nvkm_wr32(device, NV10_PGRAPH_PIPE_ADDRESS, addr);		\
 		for (__i = 0; __i < ARRAY_SIZE(state); __i++)		\
-			state[__i] = nv_rd32(gr, NV10_PGRAPH_PIPE_DATA); \
+			state[__i] = nvkm_rd32(device, NV10_PGRAPH_PIPE_DATA); \
 	} while (0)
 
 #define PIPE_RESTORE(gr, state, addr)					\
 	do {								\
 		int __i;						\
-		nv_wr32(gr, NV10_PGRAPH_PIPE_ADDRESS, addr);		\
+		nvkm_wr32(device, NV10_PGRAPH_PIPE_ADDRESS, addr);		\
 		for (__i = 0; __i < ARRAY_SIZE(state); __i++)		\
-			nv_wr32(gr, NV10_PGRAPH_PIPE_DATA, state[__i]); \
+			nvkm_wr32(device, NV10_PGRAPH_PIPE_DATA, state[__i]); \
 	} while (0)
 
 static struct nvkm_oclass
@@ -480,6 +480,7 @@
 	struct nv10_gr_chan *chan = (void *)object->parent;
 	struct nv10_gr *gr = nv10_gr(chan);
 	struct pipe_state *pipe = &chan->pipe_state;
+	struct nvkm_device *device = gr->base.engine.subdev.device;
 	u32 pipe_0x0040[1], pipe_0x64c0[8], pipe_0x6a80[3], pipe_0x6ab0[3];
 	u32 xfmode0, xfmode1;
 	u32 data = *(u32 *)args;
@@ -499,8 +500,8 @@
 
 	nv04_gr_idle(gr);
 
-	xfmode0 = nv_rd32(gr, NV10_PGRAPH_XFMODE0);
-	xfmode1 = nv_rd32(gr, NV10_PGRAPH_XFMODE1);
+	xfmode0 = nvkm_rd32(device, NV10_PGRAPH_XFMODE0);
+	xfmode1 = nvkm_rd32(device, NV10_PGRAPH_XFMODE1);
 
 	PIPE_SAVE(gr, pipe->pipe_0x4400, 0x4400);
 	PIPE_SAVE(gr, pipe_0x64c0, 0x64c0);
@@ -509,24 +510,24 @@
 
 	nv04_gr_idle(gr);
 
-	nv_wr32(gr, NV10_PGRAPH_XFMODE0, 0x10000000);
-	nv_wr32(gr, NV10_PGRAPH_XFMODE1, 0x00000000);
-	nv_wr32(gr, NV10_PGRAPH_PIPE_ADDRESS, 0x000064c0);
+	nvkm_wr32(device, NV10_PGRAPH_XFMODE0, 0x10000000);
+	nvkm_wr32(device, NV10_PGRAPH_XFMODE1, 0x00000000);
+	nvkm_wr32(device, NV10_PGRAPH_PIPE_ADDRESS, 0x000064c0);
 	for (i = 0; i < 4; i++)
-		nv_wr32(gr, NV10_PGRAPH_PIPE_DATA, 0x3f800000);
+		nvkm_wr32(device, NV10_PGRAPH_PIPE_DATA, 0x3f800000);
 	for (i = 0; i < 4; i++)
-		nv_wr32(gr, NV10_PGRAPH_PIPE_DATA, 0x00000000);
+		nvkm_wr32(device, NV10_PGRAPH_PIPE_DATA, 0x00000000);
 
-	nv_wr32(gr, NV10_PGRAPH_PIPE_ADDRESS, 0x00006ab0);
+	nvkm_wr32(device, NV10_PGRAPH_PIPE_ADDRESS, 0x00006ab0);
 	for (i = 0; i < 3; i++)
-		nv_wr32(gr, NV10_PGRAPH_PIPE_DATA, 0x3f800000);
+		nvkm_wr32(device, NV10_PGRAPH_PIPE_DATA, 0x3f800000);
 
-	nv_wr32(gr, NV10_PGRAPH_PIPE_ADDRESS, 0x00006a80);
+	nvkm_wr32(device, NV10_PGRAPH_PIPE_ADDRESS, 0x00006a80);
 	for (i = 0; i < 3; i++)
-		nv_wr32(gr, NV10_PGRAPH_PIPE_DATA, 0x00000000);
+		nvkm_wr32(device, NV10_PGRAPH_PIPE_DATA, 0x00000000);
 
-	nv_wr32(gr, NV10_PGRAPH_PIPE_ADDRESS, 0x00000040);
-	nv_wr32(gr, NV10_PGRAPH_PIPE_DATA, 0x00000008);
+	nvkm_wr32(device, NV10_PGRAPH_PIPE_ADDRESS, 0x00000040);
+	nvkm_wr32(device, NV10_PGRAPH_PIPE_DATA, 0x00000008);
 
 	PIPE_RESTORE(gr, pipe->pipe_0x0200, 0x0200);
 
@@ -534,16 +535,16 @@
 
 	PIPE_RESTORE(gr, pipe_0x0040, 0x0040);
 
-	nv_wr32(gr, NV10_PGRAPH_XFMODE0, xfmode0);
-	nv_wr32(gr, NV10_PGRAPH_XFMODE1, xfmode1);
+	nvkm_wr32(device, NV10_PGRAPH_XFMODE0, xfmode0);
+	nvkm_wr32(device, NV10_PGRAPH_XFMODE1, xfmode1);
 
 	PIPE_RESTORE(gr, pipe_0x64c0, 0x64c0);
 	PIPE_RESTORE(gr, pipe_0x6ab0, 0x6ab0);
 	PIPE_RESTORE(gr, pipe_0x6a80, 0x6a80);
 	PIPE_RESTORE(gr, pipe->pipe_0x4400, 0x4400);
 
-	nv_wr32(gr, NV10_PGRAPH_PIPE_ADDRESS, 0x000000c0);
-	nv_wr32(gr, NV10_PGRAPH_PIPE_DATA, 0x00000000);
+	nvkm_wr32(device, NV10_PGRAPH_PIPE_ADDRESS, 0x000000c0);
+	nvkm_wr32(device, NV10_PGRAPH_PIPE_DATA, 0x00000000);
 
 	nv04_gr_idle(gr);
 
@@ -556,11 +557,12 @@
 {
 	struct nv10_gr_chan *chan = (void *)object->parent;
 	struct nv10_gr *gr = nv10_gr(chan);
+	struct nvkm_device *device = gr->base.engine.subdev.device;
 
 	nv04_gr_idle(gr);
 
-	nv_mask(gr, NV10_PGRAPH_DEBUG_4, 0x00000100, 0x00000100);
-	nv_mask(gr, 0x4006b0, 0x08000000, 0x08000000);
+	nvkm_mask(device, NV10_PGRAPH_DEBUG_4, 0x00000100, 0x00000100);
+	nvkm_mask(device, 0x4006b0, 0x08000000, 0x08000000);
 	return 0;
 }
 
@@ -604,9 +606,10 @@
 static struct nv10_gr_chan *
 nv10_gr_channel(struct nv10_gr *gr)
 {
+	struct nvkm_device *device = gr->base.engine.subdev.device;
 	struct nv10_gr_chan *chan = NULL;
-	if (nv_rd32(gr, 0x400144) & 0x00010000) {
-		int chid = nv_rd32(gr, 0x400148) >> 24;
+	if (nvkm_rd32(device, 0x400144) & 0x00010000) {
+		int chid = nvkm_rd32(device, 0x400148) >> 24;
 		if (chid < ARRAY_SIZE(gr->chan))
 			chan = gr->chan[chid];
 	}
@@ -618,6 +621,7 @@
 {
 	struct nv10_gr *gr = nv10_gr(chan);
 	struct pipe_state *pipe = &chan->pipe_state;
+	struct nvkm_device *device = gr->base.engine.subdev.device;
 
 	PIPE_SAVE(gr, pipe->pipe_0x4400, 0x4400);
 	PIPE_SAVE(gr, pipe->pipe_0x0200, 0x0200);
@@ -636,39 +640,40 @@
 {
 	struct nv10_gr *gr = nv10_gr(chan);
 	struct pipe_state *pipe = &chan->pipe_state;
+	struct nvkm_device *device = gr->base.engine.subdev.device;
 	u32 xfmode0, xfmode1;
 	int i;
 
 	nv04_gr_idle(gr);
 	/* XXX check haiku comments */
-	xfmode0 = nv_rd32(gr, NV10_PGRAPH_XFMODE0);
-	xfmode1 = nv_rd32(gr, NV10_PGRAPH_XFMODE1);
-	nv_wr32(gr, NV10_PGRAPH_XFMODE0, 0x10000000);
-	nv_wr32(gr, NV10_PGRAPH_XFMODE1, 0x00000000);
-	nv_wr32(gr, NV10_PGRAPH_PIPE_ADDRESS, 0x000064c0);
+	xfmode0 = nvkm_rd32(device, NV10_PGRAPH_XFMODE0);
+	xfmode1 = nvkm_rd32(device, NV10_PGRAPH_XFMODE1);
+	nvkm_wr32(device, NV10_PGRAPH_XFMODE0, 0x10000000);
+	nvkm_wr32(device, NV10_PGRAPH_XFMODE1, 0x00000000);
+	nvkm_wr32(device, NV10_PGRAPH_PIPE_ADDRESS, 0x000064c0);
 	for (i = 0; i < 4; i++)
-		nv_wr32(gr, NV10_PGRAPH_PIPE_DATA, 0x3f800000);
+		nvkm_wr32(device, NV10_PGRAPH_PIPE_DATA, 0x3f800000);
 	for (i = 0; i < 4; i++)
-		nv_wr32(gr, NV10_PGRAPH_PIPE_DATA, 0x00000000);
+		nvkm_wr32(device, NV10_PGRAPH_PIPE_DATA, 0x00000000);
 
-	nv_wr32(gr, NV10_PGRAPH_PIPE_ADDRESS, 0x00006ab0);
+	nvkm_wr32(device, NV10_PGRAPH_PIPE_ADDRESS, 0x00006ab0);
 	for (i = 0; i < 3; i++)
-		nv_wr32(gr, NV10_PGRAPH_PIPE_DATA, 0x3f800000);
+		nvkm_wr32(device, NV10_PGRAPH_PIPE_DATA, 0x3f800000);
 
-	nv_wr32(gr, NV10_PGRAPH_PIPE_ADDRESS, 0x00006a80);
+	nvkm_wr32(device, NV10_PGRAPH_PIPE_ADDRESS, 0x00006a80);
 	for (i = 0; i < 3; i++)
-		nv_wr32(gr, NV10_PGRAPH_PIPE_DATA, 0x00000000);
+		nvkm_wr32(device, NV10_PGRAPH_PIPE_DATA, 0x00000000);
 
-	nv_wr32(gr, NV10_PGRAPH_PIPE_ADDRESS, 0x00000040);
-	nv_wr32(gr, NV10_PGRAPH_PIPE_DATA, 0x00000008);
+	nvkm_wr32(device, NV10_PGRAPH_PIPE_ADDRESS, 0x00000040);
+	nvkm_wr32(device, NV10_PGRAPH_PIPE_DATA, 0x00000008);
 
 
 	PIPE_RESTORE(gr, pipe->pipe_0x0200, 0x0200);
 	nv04_gr_idle(gr);
 
 	/* restore XFMODE */
-	nv_wr32(gr, NV10_PGRAPH_XFMODE0, xfmode0);
-	nv_wr32(gr, NV10_PGRAPH_XFMODE1, xfmode1);
+	nvkm_wr32(device, NV10_PGRAPH_XFMODE0, xfmode0);
+	nvkm_wr32(device, NV10_PGRAPH_XFMODE1, xfmode1);
 	PIPE_RESTORE(gr, pipe->pipe_0x6400, 0x6400);
 	PIPE_RESTORE(gr, pipe->pipe_0x6800, 0x6800);
 	PIPE_RESTORE(gr, pipe->pipe_0x6c00, 0x6c00);
@@ -864,6 +869,7 @@
 nv10_gr_load_dma_vtxbuf(struct nv10_gr_chan *chan, int chid, u32 inst)
 {
 	struct nv10_gr *gr = nv10_gr(chan);
+	struct nvkm_device *device = gr->base.engine.subdev.device;
 	u32 st2, st2_dl, st2_dh, fifo_ptr, fifo[0x60/4];
 	u32 ctx_user, ctx_switch[5];
 	int i, subchan = -1;
@@ -875,7 +881,7 @@
 
 	/* Look for a celsius object */
 	for (i = 0; i < 8; i++) {
-		int class = nv_rd32(gr, NV10_PGRAPH_CTX_CACHE(i, 0)) & 0xfff;
+		int class = nvkm_rd32(device, NV10_PGRAPH_CTX_CACHE(i, 0)) & 0xfff;
 
 		if (class == 0x56 || class == 0x96 || class == 0x99) {
 			subchan = i;
@@ -887,73 +893,74 @@
 		return;
 
 	/* Save the current ctx object */
-	ctx_user = nv_rd32(gr, NV10_PGRAPH_CTX_USER);
+	ctx_user = nvkm_rd32(device, NV10_PGRAPH_CTX_USER);
 	for (i = 0; i < 5; i++)
-		ctx_switch[i] = nv_rd32(gr, NV10_PGRAPH_CTX_SWITCH(i));
+		ctx_switch[i] = nvkm_rd32(device, NV10_PGRAPH_CTX_SWITCH(i));
 
 	/* Save the FIFO state */
-	st2 = nv_rd32(gr, NV10_PGRAPH_FFINTFC_ST2);
-	st2_dl = nv_rd32(gr, NV10_PGRAPH_FFINTFC_ST2_DL);
-	st2_dh = nv_rd32(gr, NV10_PGRAPH_FFINTFC_ST2_DH);
-	fifo_ptr = nv_rd32(gr, NV10_PGRAPH_FFINTFC_FIFO_PTR);
+	st2 = nvkm_rd32(device, NV10_PGRAPH_FFINTFC_ST2);
+	st2_dl = nvkm_rd32(device, NV10_PGRAPH_FFINTFC_ST2_DL);
+	st2_dh = nvkm_rd32(device, NV10_PGRAPH_FFINTFC_ST2_DH);
+	fifo_ptr = nvkm_rd32(device, NV10_PGRAPH_FFINTFC_FIFO_PTR);
 
 	for (i = 0; i < ARRAY_SIZE(fifo); i++)
-		fifo[i] = nv_rd32(gr, 0x4007a0 + 4 * i);
+		fifo[i] = nvkm_rd32(device, 0x4007a0 + 4 * i);
 
 	/* Switch to the celsius subchannel */
 	for (i = 0; i < 5; i++)
-		nv_wr32(gr, NV10_PGRAPH_CTX_SWITCH(i),
-			nv_rd32(gr, NV10_PGRAPH_CTX_CACHE(subchan, i)));
-	nv_mask(gr, NV10_PGRAPH_CTX_USER, 0xe000, subchan << 13);
+		nvkm_wr32(device, NV10_PGRAPH_CTX_SWITCH(i),
+			nvkm_rd32(device, NV10_PGRAPH_CTX_CACHE(subchan, i)));
+	nvkm_mask(device, NV10_PGRAPH_CTX_USER, 0xe000, subchan << 13);
 
 	/* Inject NV10TCL_DMA_VTXBUF */
-	nv_wr32(gr, NV10_PGRAPH_FFINTFC_FIFO_PTR, 0);
-	nv_wr32(gr, NV10_PGRAPH_FFINTFC_ST2,
+	nvkm_wr32(device, NV10_PGRAPH_FFINTFC_FIFO_PTR, 0);
+	nvkm_wr32(device, NV10_PGRAPH_FFINTFC_ST2,
 		0x2c000000 | chid << 20 | subchan << 16 | 0x18c);
-	nv_wr32(gr, NV10_PGRAPH_FFINTFC_ST2_DL, inst);
-	nv_mask(gr, NV10_PGRAPH_CTX_CONTROL, 0, 0x10000);
-	nv_mask(gr, NV04_PGRAPH_FIFO, 0x00000001, 0x00000001);
-	nv_mask(gr, NV04_PGRAPH_FIFO, 0x00000001, 0x00000000);
+	nvkm_wr32(device, NV10_PGRAPH_FFINTFC_ST2_DL, inst);
+	nvkm_mask(device, NV10_PGRAPH_CTX_CONTROL, 0, 0x10000);
+	nvkm_mask(device, NV04_PGRAPH_FIFO, 0x00000001, 0x00000001);
+	nvkm_mask(device, NV04_PGRAPH_FIFO, 0x00000001, 0x00000000);
 
 	/* Restore the FIFO state */
 	for (i = 0; i < ARRAY_SIZE(fifo); i++)
-		nv_wr32(gr, 0x4007a0 + 4 * i, fifo[i]);
+		nvkm_wr32(device, 0x4007a0 + 4 * i, fifo[i]);
 
-	nv_wr32(gr, NV10_PGRAPH_FFINTFC_FIFO_PTR, fifo_ptr);
-	nv_wr32(gr, NV10_PGRAPH_FFINTFC_ST2, st2);
-	nv_wr32(gr, NV10_PGRAPH_FFINTFC_ST2_DL, st2_dl);
-	nv_wr32(gr, NV10_PGRAPH_FFINTFC_ST2_DH, st2_dh);
+	nvkm_wr32(device, NV10_PGRAPH_FFINTFC_FIFO_PTR, fifo_ptr);
+	nvkm_wr32(device, NV10_PGRAPH_FFINTFC_ST2, st2);
+	nvkm_wr32(device, NV10_PGRAPH_FFINTFC_ST2_DL, st2_dl);
+	nvkm_wr32(device, NV10_PGRAPH_FFINTFC_ST2_DH, st2_dh);
 
 	/* Restore the current ctx object */
 	for (i = 0; i < 5; i++)
-		nv_wr32(gr, NV10_PGRAPH_CTX_SWITCH(i), ctx_switch[i]);
-	nv_wr32(gr, NV10_PGRAPH_CTX_USER, ctx_user);
+		nvkm_wr32(device, NV10_PGRAPH_CTX_SWITCH(i), ctx_switch[i]);
+	nvkm_wr32(device, NV10_PGRAPH_CTX_USER, ctx_user);
 }
 
 static int
 nv10_gr_load_context(struct nv10_gr_chan *chan, int chid)
 {
 	struct nv10_gr *gr = nv10_gr(chan);
+	struct nvkm_device *device = gr->base.engine.subdev.device;
 	u32 inst;
 	int i;
 
 	for (i = 0; i < ARRAY_SIZE(nv10_gr_ctx_regs); i++)
-		nv_wr32(gr, nv10_gr_ctx_regs[i], chan->nv10[i]);
+		nvkm_wr32(device, nv10_gr_ctx_regs[i], chan->nv10[i]);
 
 	if (nv_device(gr)->card_type >= NV_11 &&
 	    nv_device(gr)->chipset >= 0x17) {
 		for (i = 0; i < ARRAY_SIZE(nv17_gr_ctx_regs); i++)
-			nv_wr32(gr, nv17_gr_ctx_regs[i], chan->nv17[i]);
+			nvkm_wr32(device, nv17_gr_ctx_regs[i], chan->nv17[i]);
 	}
 
 	nv10_gr_load_pipe(chan);
 
-	inst = nv_rd32(gr, NV10_PGRAPH_GLOBALSTATE1) & 0xffff;
+	inst = nvkm_rd32(device, NV10_PGRAPH_GLOBALSTATE1) & 0xffff;
 	nv10_gr_load_dma_vtxbuf(chan, chid, inst);
 
-	nv_wr32(gr, NV10_PGRAPH_CTX_CONTROL, 0x10010100);
-	nv_mask(gr, NV10_PGRAPH_CTX_USER, 0xff000000, chid << 24);
-	nv_mask(gr, NV10_PGRAPH_FFINTFC_ST2, 0x30000000, 0x00000000);
+	nvkm_wr32(device, NV10_PGRAPH_CTX_CONTROL, 0x10010100);
+	nvkm_mask(device, NV10_PGRAPH_CTX_USER, 0xff000000, chid << 24);
+	nvkm_mask(device, NV10_PGRAPH_FFINTFC_ST2, 0x30000000, 0x00000000);
 	return 0;
 }
 
@@ -961,27 +968,29 @@
 nv10_gr_unload_context(struct nv10_gr_chan *chan)
 {
 	struct nv10_gr *gr = nv10_gr(chan);
+	struct nvkm_device *device = gr->base.engine.subdev.device;
 	int i;
 
 	for (i = 0; i < ARRAY_SIZE(nv10_gr_ctx_regs); i++)
-		chan->nv10[i] = nv_rd32(gr, nv10_gr_ctx_regs[i]);
+		chan->nv10[i] = nvkm_rd32(device, nv10_gr_ctx_regs[i]);
 
 	if (nv_device(gr)->card_type >= NV_11 &&
 	    nv_device(gr)->chipset >= 0x17) {
 		for (i = 0; i < ARRAY_SIZE(nv17_gr_ctx_regs); i++)
-			chan->nv17[i] = nv_rd32(gr, nv17_gr_ctx_regs[i]);
+			chan->nv17[i] = nvkm_rd32(device, nv17_gr_ctx_regs[i]);
 	}
 
 	nv10_gr_save_pipe(chan);
 
-	nv_wr32(gr, NV10_PGRAPH_CTX_CONTROL, 0x10000000);
-	nv_mask(gr, NV10_PGRAPH_CTX_USER, 0xff000000, 0x1f000000);
+	nvkm_wr32(device, NV10_PGRAPH_CTX_CONTROL, 0x10000000);
+	nvkm_mask(device, NV10_PGRAPH_CTX_USER, 0xff000000, 0x1f000000);
 	return 0;
 }
 
 static void
 nv10_gr_context_switch(struct nv10_gr *gr)
 {
+	struct nvkm_device *device = gr->base.engine.subdev.device;
 	struct nv10_gr_chan *prev = NULL;
 	struct nv10_gr_chan *next = NULL;
 	unsigned long flags;
@@ -996,7 +1005,7 @@
 		nv10_gr_unload_context(prev);
 
 	/* load context for next channel */
-	chid = (nv_rd32(gr, NV04_PGRAPH_TRAPPED_ADDR) >> 20) & 0x1f;
+	chid = (nvkm_rd32(device, NV04_PGRAPH_TRAPPED_ADDR) >> 20) & 0x1f;
 	next = gr->chan[chid];
 	if (next)
 		nv10_gr_load_context(next, chid);
@@ -1024,6 +1033,7 @@
 	struct nvkm_fifo_chan *fifo = (void *)parent;
 	struct nv10_gr *gr = (void *)engine;
 	struct nv10_gr_chan *chan;
+	struct nvkm_device *device = gr->base.engine.subdev.device;
 	unsigned long flags;
 	int ret;
 
@@ -1052,8 +1062,8 @@
 	    nv_device(gr)->chipset >= 0x17) {
 		/* is it really needed ??? */
 		NV17_WRITE_CTX(NV10_PGRAPH_DEBUG_4,
-					nv_rd32(gr, NV10_PGRAPH_DEBUG_4));
-		NV17_WRITE_CTX(0x004006b0, nv_rd32(gr, 0x004006b0));
+					nvkm_rd32(device, NV10_PGRAPH_DEBUG_4));
+		NV17_WRITE_CTX(0x004006b0, nvkm_rd32(device, 0x004006b0));
 		NV17_WRITE_CTX(0x00400eac, 0x0fff0000);
 		NV17_WRITE_CTX(0x00400eb0, 0x0fff0000);
 		NV17_WRITE_CTX(0x00400ec0, 0x00000080);
@@ -1088,13 +1098,14 @@
 {
 	struct nv10_gr *gr = (void *)object->engine;
 	struct nv10_gr_chan *chan = (void *)object;
+	struct nvkm_device *device = gr->base.engine.subdev.device;
 	unsigned long flags;
 
 	spin_lock_irqsave(&gr->lock, flags);
-	nv_mask(gr, NV04_PGRAPH_FIFO, 0x00000001, 0x00000000);
+	nvkm_mask(device, NV04_PGRAPH_FIFO, 0x00000001, 0x00000000);
 	if (nv10_gr_channel(gr) == chan)
 		nv10_gr_unload_context(chan);
-	nv_mask(gr, NV04_PGRAPH_FIFO, 0x00000001, 0x00000001);
+	nvkm_mask(device, NV04_PGRAPH_FIFO, 0x00000001, 0x00000001);
 	spin_unlock_irqrestore(&gr->lock, flags);
 
 	return nvkm_object_fini(&chan->base, suspend);
@@ -1118,17 +1129,18 @@
 static void
 nv10_gr_tile_prog(struct nvkm_engine *engine, int i)
 {
-	struct nvkm_fb_tile *tile = &nvkm_fb(engine)->tile.region[i];
-	struct nvkm_fifo *fifo = nvkm_fifo(engine);
 	struct nv10_gr *gr = (void *)engine;
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	struct nvkm_fifo *fifo = device->fifo;
+	struct nvkm_fb_tile *tile = &device->fb->tile.region[i];
 	unsigned long flags;
 
 	fifo->pause(fifo, &flags);
 	nv04_gr_idle(gr);
 
-	nv_wr32(gr, NV10_PGRAPH_TLIMIT(i), tile->limit);
-	nv_wr32(gr, NV10_PGRAPH_TSIZE(i), tile->pitch);
-	nv_wr32(gr, NV10_PGRAPH_TILE(i), tile->addr);
+	nvkm_wr32(device, NV10_PGRAPH_TLIMIT(i), tile->limit);
+	nvkm_wr32(device, NV10_PGRAPH_TSIZE(i), tile->pitch);
+	nvkm_wr32(device, NV10_PGRAPH_TILE(i), tile->addr);
 
 	fifo->start(fifo, &flags);
 }
@@ -1154,15 +1166,16 @@
 	struct nv10_gr_chan *chan = NULL;
 	struct nvkm_namedb *namedb = NULL;
 	struct nvkm_handle *handle = NULL;
-	u32 stat = nv_rd32(gr, NV03_PGRAPH_INTR);
-	u32 nsource = nv_rd32(gr, NV03_PGRAPH_NSOURCE);
-	u32 nstatus = nv_rd32(gr, NV03_PGRAPH_NSTATUS);
-	u32 addr = nv_rd32(gr, NV04_PGRAPH_TRAPPED_ADDR);
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	u32 stat = nvkm_rd32(device, NV03_PGRAPH_INTR);
+	u32 nsource = nvkm_rd32(device, NV03_PGRAPH_NSOURCE);
+	u32 nstatus = nvkm_rd32(device, NV03_PGRAPH_NSTATUS);
+	u32 addr = nvkm_rd32(device, NV04_PGRAPH_TRAPPED_ADDR);
 	u32 chid = (addr & 0x01f00000) >> 20;
 	u32 subc = (addr & 0x00070000) >> 16;
 	u32 mthd = (addr & 0x00001ffc);
-	u32 data = nv_rd32(gr, NV04_PGRAPH_TRAPPED_DATA);
-	u32 class = nv_rd32(gr, 0x400160 + subc * 4) & 0xfff;
+	u32 data = nvkm_rd32(device, NV04_PGRAPH_TRAPPED_DATA);
+	u32 class = nvkm_rd32(device, 0x400160 + subc * 4) & 0xfff;
 	u32 show = stat;
 	unsigned long flags;
 
@@ -1181,14 +1194,14 @@
 	}
 
 	if (stat & NV_PGRAPH_INTR_CONTEXT_SWITCH) {
-		nv_wr32(gr, NV03_PGRAPH_INTR, NV_PGRAPH_INTR_CONTEXT_SWITCH);
+		nvkm_wr32(device, NV03_PGRAPH_INTR, NV_PGRAPH_INTR_CONTEXT_SWITCH);
 		stat &= ~NV_PGRAPH_INTR_CONTEXT_SWITCH;
 		show &= ~NV_PGRAPH_INTR_CONTEXT_SWITCH;
 		nv10_gr_context_switch(gr);
 	}
 
-	nv_wr32(gr, NV03_PGRAPH_INTR, stat);
-	nv_wr32(gr, NV04_PGRAPH_FIFO, 0x00000001);
+	nvkm_wr32(device, NV03_PGRAPH_INTR, stat);
+	nvkm_wr32(device, NV04_PGRAPH_FIFO, 0x00000001);
 
 	if (show) {
 		nv_error(gr, "%s", "");
@@ -1249,49 +1262,50 @@
 nv10_gr_init(struct nvkm_object *object)
 {
 	struct nvkm_engine *engine = nv_engine(object);
-	struct nvkm_fb *fb = nvkm_fb(object);
 	struct nv10_gr *gr = (void *)engine;
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	struct nvkm_fb *fb = device->fb;
 	int ret, i;
 
 	ret = nvkm_gr_init(&gr->base);
 	if (ret)
 		return ret;
 
-	nv_wr32(gr, NV03_PGRAPH_INTR   , 0xFFFFFFFF);
-	nv_wr32(gr, NV03_PGRAPH_INTR_EN, 0xFFFFFFFF);
+	nvkm_wr32(device, NV03_PGRAPH_INTR   , 0xFFFFFFFF);
+	nvkm_wr32(device, NV03_PGRAPH_INTR_EN, 0xFFFFFFFF);
 
-	nv_wr32(gr, NV04_PGRAPH_DEBUG_0, 0xFFFFFFFF);
-	nv_wr32(gr, NV04_PGRAPH_DEBUG_0, 0x00000000);
-	nv_wr32(gr, NV04_PGRAPH_DEBUG_1, 0x00118700);
-	/* nv_wr32(gr, NV04_PGRAPH_DEBUG_2, 0x24E00810); */ /* 0x25f92ad9 */
-	nv_wr32(gr, NV04_PGRAPH_DEBUG_2, 0x25f92ad9);
-	nv_wr32(gr, NV04_PGRAPH_DEBUG_3, 0x55DE0830 | (1 << 29) | (1 << 31));
+	nvkm_wr32(device, NV04_PGRAPH_DEBUG_0, 0xFFFFFFFF);
+	nvkm_wr32(device, NV04_PGRAPH_DEBUG_0, 0x00000000);
+	nvkm_wr32(device, NV04_PGRAPH_DEBUG_1, 0x00118700);
+	/* nvkm_wr32(device, NV04_PGRAPH_DEBUG_2, 0x24E00810); */ /* 0x25f92ad9 */
+	nvkm_wr32(device, NV04_PGRAPH_DEBUG_2, 0x25f92ad9);
+	nvkm_wr32(device, NV04_PGRAPH_DEBUG_3, 0x55DE0830 | (1 << 29) | (1 << 31));
 
 	if (nv_device(gr)->card_type >= NV_11 &&
 	    nv_device(gr)->chipset >= 0x17) {
-		nv_wr32(gr, NV10_PGRAPH_DEBUG_4, 0x1f000000);
-		nv_wr32(gr, 0x400a10, 0x03ff3fb6);
-		nv_wr32(gr, 0x400838, 0x002f8684);
-		nv_wr32(gr, 0x40083c, 0x00115f3f);
-		nv_wr32(gr, 0x4006b0, 0x40000020);
+		nvkm_wr32(device, NV10_PGRAPH_DEBUG_4, 0x1f000000);
+		nvkm_wr32(device, 0x400a10, 0x03ff3fb6);
+		nvkm_wr32(device, 0x400838, 0x002f8684);
+		nvkm_wr32(device, 0x40083c, 0x00115f3f);
+		nvkm_wr32(device, 0x4006b0, 0x40000020);
 	} else {
-		nv_wr32(gr, NV10_PGRAPH_DEBUG_4, 0x00000000);
+		nvkm_wr32(device, NV10_PGRAPH_DEBUG_4, 0x00000000);
 	}
 
 	/* Turn all the tiling regions off. */
 	for (i = 0; i < fb->tile.regions; i++)
 		engine->tile_prog(engine, i);
 
-	nv_wr32(gr, NV10_PGRAPH_CTX_SWITCH(0), 0x00000000);
-	nv_wr32(gr, NV10_PGRAPH_CTX_SWITCH(1), 0x00000000);
-	nv_wr32(gr, NV10_PGRAPH_CTX_SWITCH(2), 0x00000000);
-	nv_wr32(gr, NV10_PGRAPH_CTX_SWITCH(3), 0x00000000);
-	nv_wr32(gr, NV10_PGRAPH_CTX_SWITCH(4), 0x00000000);
-	nv_wr32(gr, NV10_PGRAPH_STATE, 0xFFFFFFFF);
+	nvkm_wr32(device, NV10_PGRAPH_CTX_SWITCH(0), 0x00000000);
+	nvkm_wr32(device, NV10_PGRAPH_CTX_SWITCH(1), 0x00000000);
+	nvkm_wr32(device, NV10_PGRAPH_CTX_SWITCH(2), 0x00000000);
+	nvkm_wr32(device, NV10_PGRAPH_CTX_SWITCH(3), 0x00000000);
+	nvkm_wr32(device, NV10_PGRAPH_CTX_SWITCH(4), 0x00000000);
+	nvkm_wr32(device, NV10_PGRAPH_STATE, 0xFFFFFFFF);
 
-	nv_mask(gr, NV10_PGRAPH_CTX_USER, 0xff000000, 0x1f000000);
-	nv_wr32(gr, NV10_PGRAPH_CTX_CONTROL, 0x10000100);
-	nv_wr32(gr, NV10_PGRAPH_FFINTFC_ST2, 0x08000000);
+	nvkm_mask(device, NV10_PGRAPH_CTX_USER, 0xff000000, 0x1f000000);
+	nvkm_wr32(device, NV10_PGRAPH_CTX_CONTROL, 0x10000100);
+	nvkm_wr32(device, NV10_PGRAPH_FFINTFC_ST2, 0x08000000);
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv20.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv20.c
index 3e54285..8e264f7 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv20.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv20.c
@@ -118,19 +118,20 @@
 {
 	struct nv20_gr *gr = (void *)object->engine;
 	struct nv20_gr_chan *chan = (void *)object;
+	struct nvkm_device *device = gr->base.engine.subdev.device;
 	int chid = -1;
 
-	nv_mask(gr, 0x400720, 0x00000001, 0x00000000);
-	if (nv_rd32(gr, 0x400144) & 0x00010000)
-		chid = (nv_rd32(gr, 0x400148) & 0x1f000000) >> 24;
+	nvkm_mask(device, 0x400720, 0x00000001, 0x00000000);
+	if (nvkm_rd32(device, 0x400144) & 0x00010000)
+		chid = (nvkm_rd32(device, 0x400148) & 0x1f000000) >> 24;
 	if (chan->chid == chid) {
-		nv_wr32(gr, 0x400784, nv_gpuobj(chan)->addr >> 4);
-		nv_wr32(gr, 0x400788, 0x00000002);
+		nvkm_wr32(device, 0x400784, nv_gpuobj(chan)->addr >> 4);
+		nvkm_wr32(device, 0x400788, 0x00000002);
 		nv_wait(gr, 0x400700, 0xffffffff, 0x00000000);
-		nv_wr32(gr, 0x400144, 0x10000000);
-		nv_mask(gr, 0x400148, 0xff000000, 0x1f000000);
+		nvkm_wr32(device, 0x400144, 0x10000000);
+		nvkm_mask(device, 0x400148, 0xff000000, 0x1f000000);
 	}
-	nv_mask(gr, 0x400720, 0x00000001, 0x00000001);
+	nvkm_mask(device, 0x400720, 0x00000001, 0x00000001);
 
 	nv_wo32(gr->ctxtab, chan->chid * 4, 0x00000000);
 	return nvkm_gr_context_fini(&chan->base, suspend);
@@ -156,29 +157,30 @@
 void
 nv20_gr_tile_prog(struct nvkm_engine *engine, int i)
 {
-	struct nvkm_fb_tile *tile = &nvkm_fb(engine)->tile.region[i];
-	struct nvkm_fifo *fifo = nvkm_fifo(engine);
 	struct nv20_gr *gr = (void *)engine;
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	struct nvkm_fifo *fifo = device->fifo;
+	struct nvkm_fb_tile *tile = &device->fb->tile.region[i];
 	unsigned long flags;
 
 	fifo->pause(fifo, &flags);
 	nv04_gr_idle(gr);
 
-	nv_wr32(gr, NV20_PGRAPH_TLIMIT(i), tile->limit);
-	nv_wr32(gr, NV20_PGRAPH_TSIZE(i), tile->pitch);
-	nv_wr32(gr, NV20_PGRAPH_TILE(i), tile->addr);
+	nvkm_wr32(device, NV20_PGRAPH_TLIMIT(i), tile->limit);
+	nvkm_wr32(device, NV20_PGRAPH_TSIZE(i), tile->pitch);
+	nvkm_wr32(device, NV20_PGRAPH_TILE(i), tile->addr);
 
-	nv_wr32(gr, NV10_PGRAPH_RDI_INDEX, 0x00EA0030 + 4 * i);
-	nv_wr32(gr, NV10_PGRAPH_RDI_DATA, tile->limit);
-	nv_wr32(gr, NV10_PGRAPH_RDI_INDEX, 0x00EA0050 + 4 * i);
-	nv_wr32(gr, NV10_PGRAPH_RDI_DATA, tile->pitch);
-	nv_wr32(gr, NV10_PGRAPH_RDI_INDEX, 0x00EA0010 + 4 * i);
-	nv_wr32(gr, NV10_PGRAPH_RDI_DATA, tile->addr);
+	nvkm_wr32(device, NV10_PGRAPH_RDI_INDEX, 0x00EA0030 + 4 * i);
+	nvkm_wr32(device, NV10_PGRAPH_RDI_DATA, tile->limit);
+	nvkm_wr32(device, NV10_PGRAPH_RDI_INDEX, 0x00EA0050 + 4 * i);
+	nvkm_wr32(device, NV10_PGRAPH_RDI_DATA, tile->pitch);
+	nvkm_wr32(device, NV10_PGRAPH_RDI_INDEX, 0x00EA0010 + 4 * i);
+	nvkm_wr32(device, NV10_PGRAPH_RDI_DATA, tile->addr);
 
 	if (nv_device(engine)->chipset != 0x34) {
-		nv_wr32(gr, NV20_PGRAPH_ZCOMP(i), tile->zcomp);
-		nv_wr32(gr, NV10_PGRAPH_RDI_INDEX, 0x00ea0090 + 4 * i);
-		nv_wr32(gr, NV10_PGRAPH_RDI_DATA, tile->zcomp);
+		nvkm_wr32(device, NV20_PGRAPH_ZCOMP(i), tile->zcomp);
+		nvkm_wr32(device, NV10_PGRAPH_RDI_INDEX, 0x00ea0090 + 4 * i);
+		nvkm_wr32(device, NV10_PGRAPH_RDI_DATA, tile->zcomp);
 	}
 
 	fifo->start(fifo, &flags);
@@ -191,15 +193,16 @@
 	struct nvkm_object *engctx;
 	struct nvkm_handle *handle;
 	struct nv20_gr *gr = (void *)subdev;
-	u32 stat = nv_rd32(gr, NV03_PGRAPH_INTR);
-	u32 nsource = nv_rd32(gr, NV03_PGRAPH_NSOURCE);
-	u32 nstatus = nv_rd32(gr, NV03_PGRAPH_NSTATUS);
-	u32 addr = nv_rd32(gr, NV04_PGRAPH_TRAPPED_ADDR);
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	u32 stat = nvkm_rd32(device, NV03_PGRAPH_INTR);
+	u32 nsource = nvkm_rd32(device, NV03_PGRAPH_NSOURCE);
+	u32 nstatus = nvkm_rd32(device, NV03_PGRAPH_NSTATUS);
+	u32 addr = nvkm_rd32(device, NV04_PGRAPH_TRAPPED_ADDR);
 	u32 chid = (addr & 0x01f00000) >> 20;
 	u32 subc = (addr & 0x00070000) >> 16;
 	u32 mthd = (addr & 0x00001ffc);
-	u32 data = nv_rd32(gr, NV04_PGRAPH_TRAPPED_DATA);
-	u32 class = nv_rd32(gr, 0x400160 + subc * 4) & 0xfff;
+	u32 data = nvkm_rd32(device, NV04_PGRAPH_TRAPPED_DATA);
+	u32 class = nvkm_rd32(device, 0x400160 + subc * 4) & 0xfff;
 	u32 show = stat;
 
 	engctx = nvkm_engctx_get(engine, chid);
@@ -212,8 +215,8 @@
 		}
 	}
 
-	nv_wr32(gr, NV03_PGRAPH_INTR, stat);
-	nv_wr32(gr, NV04_PGRAPH_FIFO, 0x00000001);
+	nvkm_wr32(device, NV03_PGRAPH_INTR, stat);
+	nvkm_wr32(device, NV04_PGRAPH_FIFO, 0x00000001);
 
 	if (show) {
 		nv_error(gr, "%s", "");
@@ -271,7 +274,8 @@
 {
 	struct nvkm_engine *engine = nv_engine(object);
 	struct nv20_gr *gr = (void *)engine;
-	struct nvkm_fb *fb = nvkm_fb(object);
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	struct nvkm_fb *fb = device->fb;
 	u32 tmp, vramsz;
 	int ret, i;
 
@@ -279,87 +283,87 @@
 	if (ret)
 		return ret;
 
-	nv_wr32(gr, NV20_PGRAPH_CHANNEL_CTX_TABLE, gr->ctxtab->addr >> 4);
+	nvkm_wr32(device, NV20_PGRAPH_CHANNEL_CTX_TABLE, gr->ctxtab->addr >> 4);
 
 	if (nv_device(gr)->chipset == 0x20) {
-		nv_wr32(gr, NV10_PGRAPH_RDI_INDEX, 0x003d0000);
+		nvkm_wr32(device, NV10_PGRAPH_RDI_INDEX, 0x003d0000);
 		for (i = 0; i < 15; i++)
-			nv_wr32(gr, NV10_PGRAPH_RDI_DATA, 0x00000000);
+			nvkm_wr32(device, NV10_PGRAPH_RDI_DATA, 0x00000000);
 		nv_wait(gr, 0x400700, 0xffffffff, 0x00000000);
 	} else {
-		nv_wr32(gr, NV10_PGRAPH_RDI_INDEX, 0x02c80000);
+		nvkm_wr32(device, NV10_PGRAPH_RDI_INDEX, 0x02c80000);
 		for (i = 0; i < 32; i++)
-			nv_wr32(gr, NV10_PGRAPH_RDI_DATA, 0x00000000);
+			nvkm_wr32(device, NV10_PGRAPH_RDI_DATA, 0x00000000);
 		nv_wait(gr, 0x400700, 0xffffffff, 0x00000000);
 	}
 
-	nv_wr32(gr, NV03_PGRAPH_INTR   , 0xFFFFFFFF);
-	nv_wr32(gr, NV03_PGRAPH_INTR_EN, 0xFFFFFFFF);
+	nvkm_wr32(device, NV03_PGRAPH_INTR   , 0xFFFFFFFF);
+	nvkm_wr32(device, NV03_PGRAPH_INTR_EN, 0xFFFFFFFF);
 
-	nv_wr32(gr, NV04_PGRAPH_DEBUG_0, 0xFFFFFFFF);
-	nv_wr32(gr, NV04_PGRAPH_DEBUG_0, 0x00000000);
-	nv_wr32(gr, NV04_PGRAPH_DEBUG_1, 0x00118700);
-	nv_wr32(gr, NV04_PGRAPH_DEBUG_3, 0xF3CE0475); /* 0x4 = auto ctx switch */
-	nv_wr32(gr, NV10_PGRAPH_DEBUG_4, 0x00000000);
-	nv_wr32(gr, 0x40009C           , 0x00000040);
+	nvkm_wr32(device, NV04_PGRAPH_DEBUG_0, 0xFFFFFFFF);
+	nvkm_wr32(device, NV04_PGRAPH_DEBUG_0, 0x00000000);
+	nvkm_wr32(device, NV04_PGRAPH_DEBUG_1, 0x00118700);
+	nvkm_wr32(device, NV04_PGRAPH_DEBUG_3, 0xF3CE0475); /* 0x4 = auto ctx switch */
+	nvkm_wr32(device, NV10_PGRAPH_DEBUG_4, 0x00000000);
+	nvkm_wr32(device, 0x40009C           , 0x00000040);
 
 	if (nv_device(gr)->chipset >= 0x25) {
-		nv_wr32(gr, 0x400890, 0x00a8cfff);
-		nv_wr32(gr, 0x400610, 0x304B1FB6);
-		nv_wr32(gr, 0x400B80, 0x1cbd3883);
-		nv_wr32(gr, 0x400B84, 0x44000000);
-		nv_wr32(gr, 0x400098, 0x40000080);
-		nv_wr32(gr, 0x400B88, 0x000000ff);
+		nvkm_wr32(device, 0x400890, 0x00a8cfff);
+		nvkm_wr32(device, 0x400610, 0x304B1FB6);
+		nvkm_wr32(device, 0x400B80, 0x1cbd3883);
+		nvkm_wr32(device, 0x400B84, 0x44000000);
+		nvkm_wr32(device, 0x400098, 0x40000080);
+		nvkm_wr32(device, 0x400B88, 0x000000ff);
 
 	} else {
-		nv_wr32(gr, 0x400880, 0x0008c7df);
-		nv_wr32(gr, 0x400094, 0x00000005);
-		nv_wr32(gr, 0x400B80, 0x45eae20e);
-		nv_wr32(gr, 0x400B84, 0x24000000);
-		nv_wr32(gr, 0x400098, 0x00000040);
-		nv_wr32(gr, NV10_PGRAPH_RDI_INDEX, 0x00E00038);
-		nv_wr32(gr, NV10_PGRAPH_RDI_DATA , 0x00000030);
-		nv_wr32(gr, NV10_PGRAPH_RDI_INDEX, 0x00E10038);
-		nv_wr32(gr, NV10_PGRAPH_RDI_DATA , 0x00000030);
+		nvkm_wr32(device, 0x400880, 0x0008c7df);
+		nvkm_wr32(device, 0x400094, 0x00000005);
+		nvkm_wr32(device, 0x400B80, 0x45eae20e);
+		nvkm_wr32(device, 0x400B84, 0x24000000);
+		nvkm_wr32(device, 0x400098, 0x00000040);
+		nvkm_wr32(device, NV10_PGRAPH_RDI_INDEX, 0x00E00038);
+		nvkm_wr32(device, NV10_PGRAPH_RDI_DATA , 0x00000030);
+		nvkm_wr32(device, NV10_PGRAPH_RDI_INDEX, 0x00E10038);
+		nvkm_wr32(device, NV10_PGRAPH_RDI_DATA , 0x00000030);
 	}
 
 	/* Turn all the tiling regions off. */
 	for (i = 0; i < fb->tile.regions; i++)
 		engine->tile_prog(engine, i);
 
-	nv_wr32(gr, 0x4009a0, nv_rd32(gr, 0x100324));
-	nv_wr32(gr, NV10_PGRAPH_RDI_INDEX, 0x00EA000C);
-	nv_wr32(gr, NV10_PGRAPH_RDI_DATA, nv_rd32(gr, 0x100324));
+	nvkm_wr32(device, 0x4009a0, nvkm_rd32(device, 0x100324));
+	nvkm_wr32(device, NV10_PGRAPH_RDI_INDEX, 0x00EA000C);
+	nvkm_wr32(device, NV10_PGRAPH_RDI_DATA, nvkm_rd32(device, 0x100324));
 
-	nv_wr32(gr, NV10_PGRAPH_CTX_CONTROL, 0x10000100);
-	nv_wr32(gr, NV10_PGRAPH_STATE      , 0xFFFFFFFF);
+	nvkm_wr32(device, NV10_PGRAPH_CTX_CONTROL, 0x10000100);
+	nvkm_wr32(device, NV10_PGRAPH_STATE      , 0xFFFFFFFF);
 
-	tmp = nv_rd32(gr, NV10_PGRAPH_SURFACE) & 0x0007ff00;
-	nv_wr32(gr, NV10_PGRAPH_SURFACE, tmp);
-	tmp = nv_rd32(gr, NV10_PGRAPH_SURFACE) | 0x00020100;
-	nv_wr32(gr, NV10_PGRAPH_SURFACE, tmp);
+	tmp = nvkm_rd32(device, NV10_PGRAPH_SURFACE) & 0x0007ff00;
+	nvkm_wr32(device, NV10_PGRAPH_SURFACE, tmp);
+	tmp = nvkm_rd32(device, NV10_PGRAPH_SURFACE) | 0x00020100;
+	nvkm_wr32(device, NV10_PGRAPH_SURFACE, tmp);
 
 	/* begin RAM config */
 	vramsz = nv_device_resource_len(nv_device(gr), 1) - 1;
-	nv_wr32(gr, 0x4009A4, nv_rd32(gr, 0x100200));
-	nv_wr32(gr, 0x4009A8, nv_rd32(gr, 0x100204));
-	nv_wr32(gr, NV10_PGRAPH_RDI_INDEX, 0x00EA0000);
-	nv_wr32(gr, NV10_PGRAPH_RDI_DATA , nv_rd32(gr, 0x100200));
-	nv_wr32(gr, NV10_PGRAPH_RDI_INDEX, 0x00EA0004);
-	nv_wr32(gr, NV10_PGRAPH_RDI_DATA , nv_rd32(gr, 0x100204));
-	nv_wr32(gr, 0x400820, 0);
-	nv_wr32(gr, 0x400824, 0);
-	nv_wr32(gr, 0x400864, vramsz - 1);
-	nv_wr32(gr, 0x400868, vramsz - 1);
+	nvkm_wr32(device, 0x4009A4, nvkm_rd32(device, 0x100200));
+	nvkm_wr32(device, 0x4009A8, nvkm_rd32(device, 0x100204));
+	nvkm_wr32(device, NV10_PGRAPH_RDI_INDEX, 0x00EA0000);
+	nvkm_wr32(device, NV10_PGRAPH_RDI_DATA , nvkm_rd32(device, 0x100200));
+	nvkm_wr32(device, NV10_PGRAPH_RDI_INDEX, 0x00EA0004);
+	nvkm_wr32(device, NV10_PGRAPH_RDI_DATA , nvkm_rd32(device, 0x100204));
+	nvkm_wr32(device, 0x400820, 0);
+	nvkm_wr32(device, 0x400824, 0);
+	nvkm_wr32(device, 0x400864, vramsz - 1);
+	nvkm_wr32(device, 0x400868, vramsz - 1);
 
 	/* interesting.. the below overwrites some of the tile setup above.. */
-	nv_wr32(gr, 0x400B20, 0x00000000);
-	nv_wr32(gr, 0x400B04, 0xFFFFFFFF);
+	nvkm_wr32(device, 0x400B20, 0x00000000);
+	nvkm_wr32(device, 0x400B04, 0xFFFFFFFF);
 
-	nv_wr32(gr, NV03_PGRAPH_ABS_UCLIP_XMIN, 0);
-	nv_wr32(gr, NV03_PGRAPH_ABS_UCLIP_YMIN, 0);
-	nv_wr32(gr, NV03_PGRAPH_ABS_UCLIP_XMAX, 0x7fff);
-	nv_wr32(gr, NV03_PGRAPH_ABS_UCLIP_YMAX, 0x7fff);
+	nvkm_wr32(device, NV03_PGRAPH_ABS_UCLIP_XMIN, 0);
+	nvkm_wr32(device, NV03_PGRAPH_ABS_UCLIP_YMIN, 0);
+	nvkm_wr32(device, NV03_PGRAPH_ABS_UCLIP_XMAX, 0x7fff);
+	nvkm_wr32(device, NV03_PGRAPH_ABS_UCLIP_YMAX, 0x7fff);
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv30.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv30.c
index dea1cb9..8be77b4 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv30.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv30.c
@@ -153,67 +153,68 @@
 {
 	struct nvkm_engine *engine = nv_engine(object);
 	struct nv20_gr *gr = (void *)engine;
-	struct nvkm_fb *fb = nvkm_fb(object);
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	struct nvkm_fb *fb = device->fb;
 	int ret, i;
 
 	ret = nvkm_gr_init(&gr->base);
 	if (ret)
 		return ret;
 
-	nv_wr32(gr, NV20_PGRAPH_CHANNEL_CTX_TABLE, gr->ctxtab->addr >> 4);
+	nvkm_wr32(device, NV20_PGRAPH_CHANNEL_CTX_TABLE, gr->ctxtab->addr >> 4);
 
-	nv_wr32(gr, NV03_PGRAPH_INTR   , 0xFFFFFFFF);
-	nv_wr32(gr, NV03_PGRAPH_INTR_EN, 0xFFFFFFFF);
+	nvkm_wr32(device, NV03_PGRAPH_INTR   , 0xFFFFFFFF);
+	nvkm_wr32(device, NV03_PGRAPH_INTR_EN, 0xFFFFFFFF);
 
-	nv_wr32(gr, NV04_PGRAPH_DEBUG_0, 0xFFFFFFFF);
-	nv_wr32(gr, NV04_PGRAPH_DEBUG_0, 0x00000000);
-	nv_wr32(gr, NV04_PGRAPH_DEBUG_1, 0x401287c0);
-	nv_wr32(gr, 0x400890, 0x01b463ff);
-	nv_wr32(gr, NV04_PGRAPH_DEBUG_3, 0xf2de0475);
-	nv_wr32(gr, NV10_PGRAPH_DEBUG_4, 0x00008000);
-	nv_wr32(gr, NV04_PGRAPH_LIMIT_VIOL_PIX, 0xf04bdff6);
-	nv_wr32(gr, 0x400B80, 0x1003d888);
-	nv_wr32(gr, 0x400B84, 0x0c000000);
-	nv_wr32(gr, 0x400098, 0x00000000);
-	nv_wr32(gr, 0x40009C, 0x0005ad00);
-	nv_wr32(gr, 0x400B88, 0x62ff00ff); /* suspiciously like PGRAPH_DEBUG_2 */
-	nv_wr32(gr, 0x4000a0, 0x00000000);
-	nv_wr32(gr, 0x4000a4, 0x00000008);
-	nv_wr32(gr, 0x4008a8, 0xb784a400);
-	nv_wr32(gr, 0x400ba0, 0x002f8685);
-	nv_wr32(gr, 0x400ba4, 0x00231f3f);
-	nv_wr32(gr, 0x4008a4, 0x40000020);
+	nvkm_wr32(device, NV04_PGRAPH_DEBUG_0, 0xFFFFFFFF);
+	nvkm_wr32(device, NV04_PGRAPH_DEBUG_0, 0x00000000);
+	nvkm_wr32(device, NV04_PGRAPH_DEBUG_1, 0x401287c0);
+	nvkm_wr32(device, 0x400890, 0x01b463ff);
+	nvkm_wr32(device, NV04_PGRAPH_DEBUG_3, 0xf2de0475);
+	nvkm_wr32(device, NV10_PGRAPH_DEBUG_4, 0x00008000);
+	nvkm_wr32(device, NV04_PGRAPH_LIMIT_VIOL_PIX, 0xf04bdff6);
+	nvkm_wr32(device, 0x400B80, 0x1003d888);
+	nvkm_wr32(device, 0x400B84, 0x0c000000);
+	nvkm_wr32(device, 0x400098, 0x00000000);
+	nvkm_wr32(device, 0x40009C, 0x0005ad00);
+	nvkm_wr32(device, 0x400B88, 0x62ff00ff); /* suspiciously like PGRAPH_DEBUG_2 */
+	nvkm_wr32(device, 0x4000a0, 0x00000000);
+	nvkm_wr32(device, 0x4000a4, 0x00000008);
+	nvkm_wr32(device, 0x4008a8, 0xb784a400);
+	nvkm_wr32(device, 0x400ba0, 0x002f8685);
+	nvkm_wr32(device, 0x400ba4, 0x00231f3f);
+	nvkm_wr32(device, 0x4008a4, 0x40000020);
 
 	if (nv_device(gr)->chipset == 0x34) {
-		nv_wr32(gr, NV10_PGRAPH_RDI_INDEX, 0x00EA0004);
-		nv_wr32(gr, NV10_PGRAPH_RDI_DATA , 0x00200201);
-		nv_wr32(gr, NV10_PGRAPH_RDI_INDEX, 0x00EA0008);
-		nv_wr32(gr, NV10_PGRAPH_RDI_DATA , 0x00000008);
-		nv_wr32(gr, NV10_PGRAPH_RDI_INDEX, 0x00EA0000);
-		nv_wr32(gr, NV10_PGRAPH_RDI_DATA , 0x00000032);
-		nv_wr32(gr, NV10_PGRAPH_RDI_INDEX, 0x00E00004);
-		nv_wr32(gr, NV10_PGRAPH_RDI_DATA , 0x00000002);
+		nvkm_wr32(device, NV10_PGRAPH_RDI_INDEX, 0x00EA0004);
+		nvkm_wr32(device, NV10_PGRAPH_RDI_DATA , 0x00200201);
+		nvkm_wr32(device, NV10_PGRAPH_RDI_INDEX, 0x00EA0008);
+		nvkm_wr32(device, NV10_PGRAPH_RDI_DATA , 0x00000008);
+		nvkm_wr32(device, NV10_PGRAPH_RDI_INDEX, 0x00EA0000);
+		nvkm_wr32(device, NV10_PGRAPH_RDI_DATA , 0x00000032);
+		nvkm_wr32(device, NV10_PGRAPH_RDI_INDEX, 0x00E00004);
+		nvkm_wr32(device, NV10_PGRAPH_RDI_DATA , 0x00000002);
 	}
 
-	nv_wr32(gr, 0x4000c0, 0x00000016);
+	nvkm_wr32(device, 0x4000c0, 0x00000016);
 
 	/* Turn all the tiling regions off. */
 	for (i = 0; i < fb->tile.regions; i++)
 		engine->tile_prog(engine, i);
 
-	nv_wr32(gr, NV10_PGRAPH_CTX_CONTROL, 0x10000100);
-	nv_wr32(gr, NV10_PGRAPH_STATE      , 0xFFFFFFFF);
-	nv_wr32(gr, 0x0040075c             , 0x00000001);
+	nvkm_wr32(device, NV10_PGRAPH_CTX_CONTROL, 0x10000100);
+	nvkm_wr32(device, NV10_PGRAPH_STATE      , 0xFFFFFFFF);
+	nvkm_wr32(device, 0x0040075c             , 0x00000001);
 
 	/* begin RAM config */
 	/* vramsz = pci_resource_len(gr->dev->pdev, 1) - 1; */
-	nv_wr32(gr, 0x4009A4, nv_rd32(gr, 0x100200));
-	nv_wr32(gr, 0x4009A8, nv_rd32(gr, 0x100204));
+	nvkm_wr32(device, 0x4009A4, nvkm_rd32(device, 0x100200));
+	nvkm_wr32(device, 0x4009A8, nvkm_rd32(device, 0x100204));
 	if (nv_device(gr)->chipset != 0x34) {
-		nv_wr32(gr, 0x400750, 0x00EA0000);
-		nv_wr32(gr, 0x400754, nv_rd32(gr, 0x100200));
-		nv_wr32(gr, 0x400750, 0x00EA0004);
-		nv_wr32(gr, 0x400754, nv_rd32(gr, 0x100204));
+		nvkm_wr32(device, 0x400750, 0x00EA0000);
+		nvkm_wr32(device, 0x400754, nvkm_rd32(device, 0x100200));
+		nvkm_wr32(device, 0x400750, 0x00EA0004);
+		nvkm_wr32(device, 0x400754, nvkm_rd32(device, 0x100204));
 	}
 	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv40.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv40.c
index 3c2df9d..edcaa65 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv40.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv40.c
@@ -42,7 +42,7 @@
 static u64
 nv40_gr_units(struct nvkm_gr *gr)
 {
-	return nv_rd32(gr, 0x1540);
+	return nvkm_rd32(gr->engine.subdev.device, 0x1540);
 }
 
 /*******************************************************************************
@@ -155,31 +155,32 @@
 {
 	struct nv40_gr *gr = (void *)object->engine;
 	struct nv40_gr_chan *chan = (void *)object;
+	struct nvkm_device *device = gr->base.engine.subdev.device;
 	u32 inst = 0x01000000 | nv_gpuobj(chan)->addr >> 4;
 	int ret = 0;
 
-	nv_mask(gr, 0x400720, 0x00000001, 0x00000000);
+	nvkm_mask(device, 0x400720, 0x00000001, 0x00000000);
 
-	if (nv_rd32(gr, 0x40032c) == inst) {
+	if (nvkm_rd32(device, 0x40032c) == inst) {
 		if (suspend) {
-			nv_wr32(gr, 0x400720, 0x00000000);
-			nv_wr32(gr, 0x400784, inst);
-			nv_mask(gr, 0x400310, 0x00000020, 0x00000020);
-			nv_mask(gr, 0x400304, 0x00000001, 0x00000001);
+			nvkm_wr32(device, 0x400720, 0x00000000);
+			nvkm_wr32(device, 0x400784, inst);
+			nvkm_mask(device, 0x400310, 0x00000020, 0x00000020);
+			nvkm_mask(device, 0x400304, 0x00000001, 0x00000001);
 			if (!nv_wait(gr, 0x400300, 0x00000001, 0x00000000)) {
-				u32 insn = nv_rd32(gr, 0x400308);
+				u32 insn = nvkm_rd32(device, 0x400308);
 				nv_warn(gr, "ctxprog timeout 0x%08x\n", insn);
 				ret = -EBUSY;
 			}
 		}
 
-		nv_mask(gr, 0x40032c, 0x01000000, 0x00000000);
+		nvkm_mask(device, 0x40032c, 0x01000000, 0x00000000);
 	}
 
-	if (nv_rd32(gr, 0x400330) == inst)
-		nv_mask(gr, 0x400330, 0x01000000, 0x00000000);
+	if (nvkm_rd32(device, 0x400330) == inst)
+		nvkm_mask(device, 0x400330, 0x01000000, 0x00000000);
 
-	nv_mask(gr, 0x400720, 0x00000001, 0x00000001);
+	nvkm_mask(device, 0x400720, 0x00000001, 0x00000001);
 	return ret;
 }
 
@@ -203,9 +204,10 @@
 static void
 nv40_gr_tile_prog(struct nvkm_engine *engine, int i)
 {
-	struct nvkm_fb_tile *tile = &nvkm_fb(engine)->tile.region[i];
-	struct nvkm_fifo *fifo = nvkm_fifo(engine);
 	struct nv40_gr *gr = (void *)engine;
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	struct nvkm_fifo *fifo = device->fifo;
+	struct nvkm_fb_tile *tile = &device->fb->tile.region[i];
 	unsigned long flags;
 
 	fifo->pause(fifo, &flags);
@@ -218,23 +220,23 @@
 	case 0x43:
 	case 0x45:
 	case 0x4e:
-		nv_wr32(gr, NV20_PGRAPH_TSIZE(i), tile->pitch);
-		nv_wr32(gr, NV20_PGRAPH_TLIMIT(i), tile->limit);
-		nv_wr32(gr, NV20_PGRAPH_TILE(i), tile->addr);
-		nv_wr32(gr, NV40_PGRAPH_TSIZE1(i), tile->pitch);
-		nv_wr32(gr, NV40_PGRAPH_TLIMIT1(i), tile->limit);
-		nv_wr32(gr, NV40_PGRAPH_TILE1(i), tile->addr);
+		nvkm_wr32(device, NV20_PGRAPH_TSIZE(i), tile->pitch);
+		nvkm_wr32(device, NV20_PGRAPH_TLIMIT(i), tile->limit);
+		nvkm_wr32(device, NV20_PGRAPH_TILE(i), tile->addr);
+		nvkm_wr32(device, NV40_PGRAPH_TSIZE1(i), tile->pitch);
+		nvkm_wr32(device, NV40_PGRAPH_TLIMIT1(i), tile->limit);
+		nvkm_wr32(device, NV40_PGRAPH_TILE1(i), tile->addr);
 		switch (nv_device(gr)->chipset) {
 		case 0x40:
 		case 0x45:
-			nv_wr32(gr, NV20_PGRAPH_ZCOMP(i), tile->zcomp);
-			nv_wr32(gr, NV40_PGRAPH_ZCOMP1(i), tile->zcomp);
+			nvkm_wr32(device, NV20_PGRAPH_ZCOMP(i), tile->zcomp);
+			nvkm_wr32(device, NV40_PGRAPH_ZCOMP1(i), tile->zcomp);
 			break;
 		case 0x41:
 		case 0x42:
 		case 0x43:
-			nv_wr32(gr, NV41_PGRAPH_ZCOMP0(i), tile->zcomp);
-			nv_wr32(gr, NV41_PGRAPH_ZCOMP1(i), tile->zcomp);
+			nvkm_wr32(device, NV41_PGRAPH_ZCOMP0(i), tile->zcomp);
+			nvkm_wr32(device, NV41_PGRAPH_ZCOMP1(i), tile->zcomp);
 			break;
 		default:
 			break;
@@ -242,9 +244,9 @@
 		break;
 	case 0x44:
 	case 0x4a:
-		nv_wr32(gr, NV20_PGRAPH_TSIZE(i), tile->pitch);
-		nv_wr32(gr, NV20_PGRAPH_TLIMIT(i), tile->limit);
-		nv_wr32(gr, NV20_PGRAPH_TILE(i), tile->addr);
+		nvkm_wr32(device, NV20_PGRAPH_TSIZE(i), tile->pitch);
+		nvkm_wr32(device, NV20_PGRAPH_TLIMIT(i), tile->limit);
+		nvkm_wr32(device, NV20_PGRAPH_TILE(i), tile->addr);
 		break;
 	case 0x46:
 	case 0x4c:
@@ -254,18 +256,18 @@
 	case 0x63:
 	case 0x67:
 	case 0x68:
-		nv_wr32(gr, NV47_PGRAPH_TSIZE(i), tile->pitch);
-		nv_wr32(gr, NV47_PGRAPH_TLIMIT(i), tile->limit);
-		nv_wr32(gr, NV47_PGRAPH_TILE(i), tile->addr);
-		nv_wr32(gr, NV40_PGRAPH_TSIZE1(i), tile->pitch);
-		nv_wr32(gr, NV40_PGRAPH_TLIMIT1(i), tile->limit);
-		nv_wr32(gr, NV40_PGRAPH_TILE1(i), tile->addr);
+		nvkm_wr32(device, NV47_PGRAPH_TSIZE(i), tile->pitch);
+		nvkm_wr32(device, NV47_PGRAPH_TLIMIT(i), tile->limit);
+		nvkm_wr32(device, NV47_PGRAPH_TILE(i), tile->addr);
+		nvkm_wr32(device, NV40_PGRAPH_TSIZE1(i), tile->pitch);
+		nvkm_wr32(device, NV40_PGRAPH_TLIMIT1(i), tile->limit);
+		nvkm_wr32(device, NV40_PGRAPH_TILE1(i), tile->addr);
 		switch (nv_device(gr)->chipset) {
 		case 0x47:
 		case 0x49:
 		case 0x4b:
-			nv_wr32(gr, NV47_PGRAPH_ZCOMP0(i), tile->zcomp);
-			nv_wr32(gr, NV47_PGRAPH_ZCOMP1(i), tile->zcomp);
+			nvkm_wr32(device, NV47_PGRAPH_ZCOMP0(i), tile->zcomp);
+			nvkm_wr32(device, NV47_PGRAPH_ZCOMP1(i), tile->zcomp);
 			break;
 		default:
 			break;
@@ -286,15 +288,16 @@
 	struct nvkm_object *engctx;
 	struct nvkm_handle *handle = NULL;
 	struct nv40_gr *gr = (void *)subdev;
-	u32 stat = nv_rd32(gr, NV03_PGRAPH_INTR);
-	u32 nsource = nv_rd32(gr, NV03_PGRAPH_NSOURCE);
-	u32 nstatus = nv_rd32(gr, NV03_PGRAPH_NSTATUS);
-	u32 inst = nv_rd32(gr, 0x40032c) & 0x000fffff;
-	u32 addr = nv_rd32(gr, NV04_PGRAPH_TRAPPED_ADDR);
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	u32 stat = nvkm_rd32(device, NV03_PGRAPH_INTR);
+	u32 nsource = nvkm_rd32(device, NV03_PGRAPH_NSOURCE);
+	u32 nstatus = nvkm_rd32(device, NV03_PGRAPH_NSTATUS);
+	u32 inst = nvkm_rd32(device, 0x40032c) & 0x000fffff;
+	u32 addr = nvkm_rd32(device, NV04_PGRAPH_TRAPPED_ADDR);
 	u32 subc = (addr & 0x00070000) >> 16;
 	u32 mthd = (addr & 0x00001ffc);
-	u32 data = nv_rd32(gr, NV04_PGRAPH_TRAPPED_DATA);
-	u32 class = nv_rd32(gr, 0x400160 + subc * 4) & 0xffff;
+	u32 data = nvkm_rd32(device, NV04_PGRAPH_TRAPPED_DATA);
+	u32 class = nvkm_rd32(device, 0x400160 + subc * 4) & 0xffff;
 	u32 show = stat;
 	int chid;
 
@@ -310,12 +313,12 @@
 		}
 
 		if (nsource & NV03_PGRAPH_NSOURCE_DMA_VTX_PROTECTION) {
-			nv_mask(gr, 0x402000, 0, 0);
+			nvkm_mask(device, 0x402000, 0, 0);
 		}
 	}
 
-	nv_wr32(gr, NV03_PGRAPH_INTR, stat);
-	nv_wr32(gr, NV04_PGRAPH_FIFO, 0x00000001);
+	nvkm_wr32(device, NV03_PGRAPH_INTR, stat);
+	nvkm_wr32(device, NV04_PGRAPH_FIFO, 0x00000001);
 
 	if (show) {
 		nv_error(gr, "%s", "");
@@ -364,8 +367,9 @@
 nv40_gr_init(struct nvkm_object *object)
 {
 	struct nvkm_engine *engine = nv_engine(object);
-	struct nvkm_fb *fb = nvkm_fb(object);
 	struct nv40_gr *gr = (void *)engine;
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	struct nvkm_fb *fb = device->fb;
 	int ret, i, j;
 	u32 vramsz;
 
@@ -379,89 +383,89 @@
 		return ret;
 
 	/* No context present currently */
-	nv_wr32(gr, NV40_PGRAPH_CTXCTL_CUR, 0x00000000);
+	nvkm_wr32(device, NV40_PGRAPH_CTXCTL_CUR, 0x00000000);
 
-	nv_wr32(gr, NV03_PGRAPH_INTR   , 0xFFFFFFFF);
-	nv_wr32(gr, NV40_PGRAPH_INTR_EN, 0xFFFFFFFF);
+	nvkm_wr32(device, NV03_PGRAPH_INTR   , 0xFFFFFFFF);
+	nvkm_wr32(device, NV40_PGRAPH_INTR_EN, 0xFFFFFFFF);
 
-	nv_wr32(gr, NV04_PGRAPH_DEBUG_0, 0xFFFFFFFF);
-	nv_wr32(gr, NV04_PGRAPH_DEBUG_0, 0x00000000);
-	nv_wr32(gr, NV04_PGRAPH_DEBUG_1, 0x401287c0);
-	nv_wr32(gr, NV04_PGRAPH_DEBUG_3, 0xe0de8055);
-	nv_wr32(gr, NV10_PGRAPH_DEBUG_4, 0x00008000);
-	nv_wr32(gr, NV04_PGRAPH_LIMIT_VIOL_PIX, 0x00be3c5f);
+	nvkm_wr32(device, NV04_PGRAPH_DEBUG_0, 0xFFFFFFFF);
+	nvkm_wr32(device, NV04_PGRAPH_DEBUG_0, 0x00000000);
+	nvkm_wr32(device, NV04_PGRAPH_DEBUG_1, 0x401287c0);
+	nvkm_wr32(device, NV04_PGRAPH_DEBUG_3, 0xe0de8055);
+	nvkm_wr32(device, NV10_PGRAPH_DEBUG_4, 0x00008000);
+	nvkm_wr32(device, NV04_PGRAPH_LIMIT_VIOL_PIX, 0x00be3c5f);
 
-	nv_wr32(gr, NV10_PGRAPH_CTX_CONTROL, 0x10010100);
-	nv_wr32(gr, NV10_PGRAPH_STATE      , 0xFFFFFFFF);
+	nvkm_wr32(device, NV10_PGRAPH_CTX_CONTROL, 0x10010100);
+	nvkm_wr32(device, NV10_PGRAPH_STATE      , 0xFFFFFFFF);
 
-	j = nv_rd32(gr, 0x1540) & 0xff;
+	j = nvkm_rd32(device, 0x1540) & 0xff;
 	if (j) {
 		for (i = 0; !(j & 1); j >>= 1, i++)
 			;
-		nv_wr32(gr, 0x405000, i);
+		nvkm_wr32(device, 0x405000, i);
 	}
 
 	if (nv_device(gr)->chipset == 0x40) {
-		nv_wr32(gr, 0x4009b0, 0x83280fff);
-		nv_wr32(gr, 0x4009b4, 0x000000a0);
+		nvkm_wr32(device, 0x4009b0, 0x83280fff);
+		nvkm_wr32(device, 0x4009b4, 0x000000a0);
 	} else {
-		nv_wr32(gr, 0x400820, 0x83280eff);
-		nv_wr32(gr, 0x400824, 0x000000a0);
+		nvkm_wr32(device, 0x400820, 0x83280eff);
+		nvkm_wr32(device, 0x400824, 0x000000a0);
 	}
 
 	switch (nv_device(gr)->chipset) {
 	case 0x40:
 	case 0x45:
-		nv_wr32(gr, 0x4009b8, 0x0078e366);
-		nv_wr32(gr, 0x4009bc, 0x0000014c);
+		nvkm_wr32(device, 0x4009b8, 0x0078e366);
+		nvkm_wr32(device, 0x4009bc, 0x0000014c);
 		break;
 	case 0x41:
 	case 0x42: /* pciid also 0x00Cx */
 	/* case 0x0120: XXX (pciid) */
-		nv_wr32(gr, 0x400828, 0x007596ff);
-		nv_wr32(gr, 0x40082c, 0x00000108);
+		nvkm_wr32(device, 0x400828, 0x007596ff);
+		nvkm_wr32(device, 0x40082c, 0x00000108);
 		break;
 	case 0x43:
-		nv_wr32(gr, 0x400828, 0x0072cb77);
-		nv_wr32(gr, 0x40082c, 0x00000108);
+		nvkm_wr32(device, 0x400828, 0x0072cb77);
+		nvkm_wr32(device, 0x40082c, 0x00000108);
 		break;
 	case 0x44:
 	case 0x46: /* G72 */
 	case 0x4a:
 	case 0x4c: /* G7x-based C51 */
 	case 0x4e:
-		nv_wr32(gr, 0x400860, 0);
-		nv_wr32(gr, 0x400864, 0);
+		nvkm_wr32(device, 0x400860, 0);
+		nvkm_wr32(device, 0x400864, 0);
 		break;
 	case 0x47: /* G70 */
 	case 0x49: /* G71 */
 	case 0x4b: /* G73 */
-		nv_wr32(gr, 0x400828, 0x07830610);
-		nv_wr32(gr, 0x40082c, 0x0000016A);
+		nvkm_wr32(device, 0x400828, 0x07830610);
+		nvkm_wr32(device, 0x40082c, 0x0000016A);
 		break;
 	default:
 		break;
 	}
 
-	nv_wr32(gr, 0x400b38, 0x2ffff800);
-	nv_wr32(gr, 0x400b3c, 0x00006000);
+	nvkm_wr32(device, 0x400b38, 0x2ffff800);
+	nvkm_wr32(device, 0x400b3c, 0x00006000);
 
 	/* Tiling related stuff. */
 	switch (nv_device(gr)->chipset) {
 	case 0x44:
 	case 0x4a:
-		nv_wr32(gr, 0x400bc4, 0x1003d888);
-		nv_wr32(gr, 0x400bbc, 0xb7a7b500);
+		nvkm_wr32(device, 0x400bc4, 0x1003d888);
+		nvkm_wr32(device, 0x400bbc, 0xb7a7b500);
 		break;
 	case 0x46:
-		nv_wr32(gr, 0x400bc4, 0x0000e024);
-		nv_wr32(gr, 0x400bbc, 0xb7a7b520);
+		nvkm_wr32(device, 0x400bc4, 0x0000e024);
+		nvkm_wr32(device, 0x400bbc, 0xb7a7b520);
 		break;
 	case 0x4c:
 	case 0x4e:
 	case 0x67:
-		nv_wr32(gr, 0x400bc4, 0x1003d888);
-		nv_wr32(gr, 0x400bbc, 0xb7a7b540);
+		nvkm_wr32(device, 0x400bc4, 0x1003d888);
+		nvkm_wr32(device, 0x400bbc, 0xb7a7b540);
 		break;
 	default:
 		break;
@@ -475,14 +479,14 @@
 	vramsz = nv_device_resource_len(nv_device(gr), 1) - 1;
 	switch (nv_device(gr)->chipset) {
 	case 0x40:
-		nv_wr32(gr, 0x4009A4, nv_rd32(gr, 0x100200));
-		nv_wr32(gr, 0x4009A8, nv_rd32(gr, 0x100204));
-		nv_wr32(gr, 0x4069A4, nv_rd32(gr, 0x100200));
-		nv_wr32(gr, 0x4069A8, nv_rd32(gr, 0x100204));
-		nv_wr32(gr, 0x400820, 0);
-		nv_wr32(gr, 0x400824, 0);
-		nv_wr32(gr, 0x400864, vramsz);
-		nv_wr32(gr, 0x400868, vramsz);
+		nvkm_wr32(device, 0x4009A4, nvkm_rd32(device, 0x100200));
+		nvkm_wr32(device, 0x4009A8, nvkm_rd32(device, 0x100204));
+		nvkm_wr32(device, 0x4069A4, nvkm_rd32(device, 0x100200));
+		nvkm_wr32(device, 0x4069A8, nvkm_rd32(device, 0x100204));
+		nvkm_wr32(device, 0x400820, 0);
+		nvkm_wr32(device, 0x400824, 0);
+		nvkm_wr32(device, 0x400864, vramsz);
+		nvkm_wr32(device, 0x400868, vramsz);
 		break;
 	default:
 		switch (nv_device(gr)->chipset) {
@@ -493,20 +497,20 @@
 		case 0x4e:
 		case 0x44:
 		case 0x4a:
-			nv_wr32(gr, 0x4009F0, nv_rd32(gr, 0x100200));
-			nv_wr32(gr, 0x4009F4, nv_rd32(gr, 0x100204));
+			nvkm_wr32(device, 0x4009F0, nvkm_rd32(device, 0x100200));
+			nvkm_wr32(device, 0x4009F4, nvkm_rd32(device, 0x100204));
 			break;
 		default:
-			nv_wr32(gr, 0x400DF0, nv_rd32(gr, 0x100200));
-			nv_wr32(gr, 0x400DF4, nv_rd32(gr, 0x100204));
+			nvkm_wr32(device, 0x400DF0, nvkm_rd32(device, 0x100200));
+			nvkm_wr32(device, 0x400DF4, nvkm_rd32(device, 0x100204));
 			break;
 		}
-		nv_wr32(gr, 0x4069F0, nv_rd32(gr, 0x100200));
-		nv_wr32(gr, 0x4069F4, nv_rd32(gr, 0x100204));
-		nv_wr32(gr, 0x400840, 0);
-		nv_wr32(gr, 0x400844, 0);
-		nv_wr32(gr, 0x4008A0, vramsz);
-		nv_wr32(gr, 0x4008A4, vramsz);
+		nvkm_wr32(device, 0x4069F0, nvkm_rd32(device, 0x100200));
+		nvkm_wr32(device, 0x4069F4, nvkm_rd32(device, 0x100204));
+		nvkm_wr32(device, 0x400840, 0);
+		nvkm_wr32(device, 0x400844, 0);
+		nvkm_wr32(device, 0x4008A0, vramsz);
+		nvkm_wr32(device, 0x4008A4, vramsz);
 		break;
 	}
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv50.c
index 70be675..ade34d8 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv50.c
@@ -41,7 +41,7 @@
 static u64
 nv50_gr_units(struct nvkm_gr *gr)
 {
-	return nv_rd32(gr, 0x1540);
+	return nvkm_rd32(gr->engine.subdev.device, 0x1540);
 }
 
 /*******************************************************************************
@@ -235,31 +235,32 @@
 static int
 g84_gr_tlb_flush(struct nvkm_engine *engine)
 {
-	struct nvkm_timer *tmr = nvkm_timer(engine);
 	struct nv50_gr *gr = (void *)engine;
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	struct nvkm_timer *tmr = device->timer;
 	bool idle, timeout = false;
 	unsigned long flags;
 	u64 start;
 	u32 tmp;
 
 	spin_lock_irqsave(&gr->lock, flags);
-	nv_mask(gr, 0x400500, 0x00000001, 0x00000000);
+	nvkm_mask(device, 0x400500, 0x00000001, 0x00000000);
 
 	start = tmr->read(tmr);
 	do {
 		idle = true;
 
-		for (tmp = nv_rd32(gr, 0x400380); tmp && idle; tmp >>= 3) {
+		for (tmp = nvkm_rd32(device, 0x400380); tmp && idle; tmp >>= 3) {
 			if ((tmp & 7) == 1)
 				idle = false;
 		}
 
-		for (tmp = nv_rd32(gr, 0x400384); tmp && idle; tmp >>= 3) {
+		for (tmp = nvkm_rd32(device, 0x400384); tmp && idle; tmp >>= 3) {
 			if ((tmp & 7) == 1)
 				idle = false;
 		}
 
-		for (tmp = nv_rd32(gr, 0x400388); tmp && idle; tmp >>= 3) {
+		for (tmp = nvkm_rd32(device, 0x400388); tmp && idle; tmp >>= 3) {
 			if ((tmp & 7) == 1)
 				idle = false;
 		}
@@ -269,24 +270,24 @@
 	if (timeout) {
 		nv_error(gr, "PGRAPH TLB flush idle timeout fail\n");
 
-		tmp = nv_rd32(gr, 0x400700);
+		tmp = nvkm_rd32(device, 0x400700);
 		nv_error(gr, "PGRAPH_STATUS  : 0x%08x", tmp);
 		nvkm_bitfield_print(nv50_gr_status, tmp);
 		pr_cont("\n");
 
 		nvkm_gr_vstatus_print(gr, 0, nv50_gr_vstatus_0,
-				       nv_rd32(gr, 0x400380));
+				       nvkm_rd32(device, 0x400380));
 		nvkm_gr_vstatus_print(gr, 1, nv50_gr_vstatus_1,
-				       nv_rd32(gr, 0x400384));
+				       nvkm_rd32(device, 0x400384));
 		nvkm_gr_vstatus_print(gr, 2, nv50_gr_vstatus_2,
-				       nv_rd32(gr, 0x400388));
+				       nvkm_rd32(device, 0x400388));
 	}
 
 
-	nv_wr32(gr, 0x100c80, 0x00000001);
+	nvkm_wr32(device, 0x100c80, 0x00000001);
 	if (!nv_wait(gr, 0x100c80, 0x00000001, 0x00000000))
 		nv_error(gr, "vm flush timeout\n");
-	nv_mask(gr, 0x400500, 0x00000001, 0x00000001);
+	nvkm_mask(device, 0x400500, 0x00000001, 0x00000001);
 	spin_unlock_irqrestore(&gr->lock, flags);
 	return timeout ? -EBUSY : 0;
 }
@@ -427,13 +428,14 @@
 nv50_gr_prop_trap(struct nv50_gr *gr,
 		    u32 ustatus_addr, u32 ustatus, u32 tp)
 {
-	u32 e0c = nv_rd32(gr, ustatus_addr + 0x04);
-	u32 e10 = nv_rd32(gr, ustatus_addr + 0x08);
-	u32 e14 = nv_rd32(gr, ustatus_addr + 0x0c);
-	u32 e18 = nv_rd32(gr, ustatus_addr + 0x10);
-	u32 e1c = nv_rd32(gr, ustatus_addr + 0x14);
-	u32 e20 = nv_rd32(gr, ustatus_addr + 0x18);
-	u32 e24 = nv_rd32(gr, ustatus_addr + 0x1c);
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	u32 e0c = nvkm_rd32(device, ustatus_addr + 0x04);
+	u32 e10 = nvkm_rd32(device, ustatus_addr + 0x08);
+	u32 e14 = nvkm_rd32(device, ustatus_addr + 0x0c);
+	u32 e18 = nvkm_rd32(device, ustatus_addr + 0x10);
+	u32 e1c = nvkm_rd32(device, ustatus_addr + 0x14);
+	u32 e20 = nvkm_rd32(device, ustatus_addr + 0x18);
+	u32 e24 = nvkm_rd32(device, ustatus_addr + 0x1c);
 
 	/* CUDA memory: l[], g[] or stack. */
 	if (ustatus & 0x00000080) {
@@ -465,7 +467,8 @@
 static void
 nv50_gr_mp_trap(struct nv50_gr *gr, int tpid, int display)
 {
-	u32 units = nv_rd32(gr, 0x1540);
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	u32 units = nvkm_rd32(device, 0x1540);
 	u32 addr, mp10, status, pc, oplow, ophigh;
 	int i;
 	int mps = 0;
@@ -476,15 +479,15 @@
 			addr = 0x408200 + (tpid << 12) + (i << 7);
 		else
 			addr = 0x408100 + (tpid << 11) + (i << 7);
-		mp10 = nv_rd32(gr, addr + 0x10);
-		status = nv_rd32(gr, addr + 0x14);
+		mp10 = nvkm_rd32(device, addr + 0x10);
+		status = nvkm_rd32(device, addr + 0x14);
 		if (!status)
 			continue;
 		if (display) {
-			nv_rd32(gr, addr + 0x20);
-			pc = nv_rd32(gr, addr + 0x24);
-			oplow = nv_rd32(gr, addr + 0x70);
-			ophigh = nv_rd32(gr, addr + 0x74);
+			nvkm_rd32(device, addr + 0x20);
+			pc = nvkm_rd32(device, addr + 0x24);
+			oplow = nvkm_rd32(device, addr + 0x70);
+			ophigh = nvkm_rd32(device, addr + 0x74);
 			nv_error(gr, "TRAP_MP_EXEC - "
 					"TP %d MP %d:", tpid, i);
 			nvkm_bitfield_print(nv50_mp_exec_errors, status);
@@ -492,8 +495,8 @@
 					pc&0xffffff, pc >> 24,
 					oplow, ophigh);
 		}
-		nv_wr32(gr, addr + 0x10, mp10);
-		nv_wr32(gr, addr + 0x14, 0);
+		nvkm_wr32(device, addr + 0x10, mp10);
+		nvkm_wr32(device, addr + 0x14, 0);
 		mps++;
 	}
 	if (!mps && display)
@@ -505,8 +508,9 @@
 nv50_gr_tp_trap(struct nv50_gr *gr, int type, u32 ustatus_old,
 		  u32 ustatus_new, int display, const char *name)
 {
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	u32 units = nvkm_rd32(device, 0x1540);
 	int tps = 0;
-	u32 units = nv_rd32(gr, 0x1540);
 	int i, r;
 	u32 ustatus_addr, ustatus;
 	for (i = 0; i < 16; i++) {
@@ -516,7 +520,7 @@
 			ustatus_addr = ustatus_old + (i << 12);
 		else
 			ustatus_addr = ustatus_new + (i << 11);
-		ustatus = nv_rd32(gr, ustatus_addr) & 0x7fffffff;
+		ustatus = nvkm_rd32(device, ustatus_addr) & 0x7fffffff;
 		if (!ustatus)
 			continue;
 		tps++;
@@ -526,7 +530,7 @@
 				nv_error(gr, "magic set %d:\n", i);
 				for (r = ustatus_addr + 4; r <= ustatus_addr + 0x10; r += 4)
 					nv_error(gr, "\t0x%08x: 0x%08x\n", r,
-						nv_rd32(gr, r));
+						nvkm_rd32(device, r));
 				if (ustatus) {
 					nv_error(gr, "%s - TP%d:", name, i);
 					nvkm_bitfield_print(nv50_tex_traps,
@@ -559,7 +563,7 @@
 			if (display)
 				nv_error(gr, "%s - TP%d: Unhandled ustatus 0x%08x\n", name, i, ustatus);
 		}
-		nv_wr32(gr, ustatus_addr, 0xc0000000);
+		nvkm_wr32(device, ustatus_addr, 0xc0000000);
 	}
 
 	if (!tps && display)
@@ -570,7 +574,8 @@
 nv50_gr_trap_handler(struct nv50_gr *gr, u32 display,
 		     int chid, u64 inst, struct nvkm_object *engctx)
 {
-	u32 status = nv_rd32(gr, 0x400108);
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	u32 status = nvkm_rd32(device, 0x400108);
 	u32 ustatus;
 
 	if (!status && display) {
@@ -582,22 +587,22 @@
 	 * COND, QUERY. If you get a trap from it, the command is still stuck
 	 * in DISPATCH and you need to do something about it. */
 	if (status & 0x001) {
-		ustatus = nv_rd32(gr, 0x400804) & 0x7fffffff;
+		ustatus = nvkm_rd32(device, 0x400804) & 0x7fffffff;
 		if (!ustatus && display) {
 			nv_error(gr, "TRAP_DISPATCH - no ustatus?\n");
 		}
 
-		nv_wr32(gr, 0x400500, 0x00000000);
+		nvkm_wr32(device, 0x400500, 0x00000000);
 
 		/* Known to be triggered by screwed up NOTIFY and COND... */
 		if (ustatus & 0x00000001) {
-			u32 addr = nv_rd32(gr, 0x400808);
+			u32 addr = nvkm_rd32(device, 0x400808);
 			u32 subc = (addr & 0x00070000) >> 16;
 			u32 mthd = (addr & 0x00001ffc);
-			u32 datal = nv_rd32(gr, 0x40080c);
-			u32 datah = nv_rd32(gr, 0x400810);
-			u32 class = nv_rd32(gr, 0x400814);
-			u32 r848 = nv_rd32(gr, 0x400848);
+			u32 datal = nvkm_rd32(device, 0x40080c);
+			u32 datah = nvkm_rd32(device, 0x400810);
+			u32 class = nvkm_rd32(device, 0x400814);
+			u32 r848 = nvkm_rd32(device, 0x400848);
 
 			nv_error(gr, "TRAP DISPATCH_FAULT\n");
 			if (display && (addr & 0x80000000)) {
@@ -611,18 +616,18 @@
 				nv_error(gr, "no stuck command?\n");
 			}
 
-			nv_wr32(gr, 0x400808, 0);
-			nv_wr32(gr, 0x4008e8, nv_rd32(gr, 0x4008e8) & 3);
-			nv_wr32(gr, 0x400848, 0);
+			nvkm_wr32(device, 0x400808, 0);
+			nvkm_wr32(device, 0x4008e8, nvkm_rd32(device, 0x4008e8) & 3);
+			nvkm_wr32(device, 0x400848, 0);
 			ustatus &= ~0x00000001;
 		}
 
 		if (ustatus & 0x00000002) {
-			u32 addr = nv_rd32(gr, 0x40084c);
+			u32 addr = nvkm_rd32(device, 0x40084c);
 			u32 subc = (addr & 0x00070000) >> 16;
 			u32 mthd = (addr & 0x00001ffc);
-			u32 data = nv_rd32(gr, 0x40085c);
-			u32 class = nv_rd32(gr, 0x400814);
+			u32 data = nvkm_rd32(device, 0x40085c);
+			u32 class = nvkm_rd32(device, 0x400814);
 
 			nv_error(gr, "TRAP DISPATCH_QUERY\n");
 			if (display && (addr & 0x80000000)) {
@@ -636,7 +641,7 @@
 				nv_error(gr, "no stuck command?\n");
 			}
 
-			nv_wr32(gr, 0x40084c, 0);
+			nvkm_wr32(device, 0x40084c, 0);
 			ustatus &= ~0x00000002;
 		}
 
@@ -645,8 +650,8 @@
 				      "0x%08x)\n", ustatus);
 		}
 
-		nv_wr32(gr, 0x400804, 0xc0000000);
-		nv_wr32(gr, 0x400108, 0x001);
+		nvkm_wr32(device, 0x400804, 0xc0000000);
+		nvkm_wr32(device, 0x400108, 0x001);
 		status &= ~0x001;
 		if (!status)
 			return 0;
@@ -654,81 +659,81 @@
 
 	/* M2MF: Memory to memory copy engine. */
 	if (status & 0x002) {
-		u32 ustatus = nv_rd32(gr, 0x406800) & 0x7fffffff;
+		u32 ustatus = nvkm_rd32(device, 0x406800) & 0x7fffffff;
 		if (display) {
 			nv_error(gr, "TRAP_M2MF");
 			nvkm_bitfield_print(nv50_gr_trap_m2mf, ustatus);
 			pr_cont("\n");
 			nv_error(gr, "TRAP_M2MF %08x %08x %08x %08x\n",
-				nv_rd32(gr, 0x406804), nv_rd32(gr, 0x406808),
-				nv_rd32(gr, 0x40680c), nv_rd32(gr, 0x406810));
+				nvkm_rd32(device, 0x406804), nvkm_rd32(device, 0x406808),
+				nvkm_rd32(device, 0x40680c), nvkm_rd32(device, 0x406810));
 
 		}
 
 		/* No sane way found yet -- just reset the bugger. */
-		nv_wr32(gr, 0x400040, 2);
-		nv_wr32(gr, 0x400040, 0);
-		nv_wr32(gr, 0x406800, 0xc0000000);
-		nv_wr32(gr, 0x400108, 0x002);
+		nvkm_wr32(device, 0x400040, 2);
+		nvkm_wr32(device, 0x400040, 0);
+		nvkm_wr32(device, 0x406800, 0xc0000000);
+		nvkm_wr32(device, 0x400108, 0x002);
 		status &= ~0x002;
 	}
 
 	/* VFETCH: Fetches data from vertex buffers. */
 	if (status & 0x004) {
-		u32 ustatus = nv_rd32(gr, 0x400c04) & 0x7fffffff;
+		u32 ustatus = nvkm_rd32(device, 0x400c04) & 0x7fffffff;
 		if (display) {
 			nv_error(gr, "TRAP_VFETCH");
 			nvkm_bitfield_print(nv50_gr_trap_vfetch, ustatus);
 			pr_cont("\n");
 			nv_error(gr, "TRAP_VFETCH %08x %08x %08x %08x\n",
-				nv_rd32(gr, 0x400c00), nv_rd32(gr, 0x400c08),
-				nv_rd32(gr, 0x400c0c), nv_rd32(gr, 0x400c10));
+				nvkm_rd32(device, 0x400c00), nvkm_rd32(device, 0x400c08),
+				nvkm_rd32(device, 0x400c0c), nvkm_rd32(device, 0x400c10));
 		}
 
-		nv_wr32(gr, 0x400c04, 0xc0000000);
-		nv_wr32(gr, 0x400108, 0x004);
+		nvkm_wr32(device, 0x400c04, 0xc0000000);
+		nvkm_wr32(device, 0x400108, 0x004);
 		status &= ~0x004;
 	}
 
 	/* STRMOUT: DirectX streamout / OpenGL transform feedback. */
 	if (status & 0x008) {
-		ustatus = nv_rd32(gr, 0x401800) & 0x7fffffff;
+		ustatus = nvkm_rd32(device, 0x401800) & 0x7fffffff;
 		if (display) {
 			nv_error(gr, "TRAP_STRMOUT");
 			nvkm_bitfield_print(nv50_gr_trap_strmout, ustatus);
 			pr_cont("\n");
 			nv_error(gr, "TRAP_STRMOUT %08x %08x %08x %08x\n",
-				nv_rd32(gr, 0x401804), nv_rd32(gr, 0x401808),
-				nv_rd32(gr, 0x40180c), nv_rd32(gr, 0x401810));
+				nvkm_rd32(device, 0x401804), nvkm_rd32(device, 0x401808),
+				nvkm_rd32(device, 0x40180c), nvkm_rd32(device, 0x401810));
 
 		}
 
 		/* No sane way found yet -- just reset the bugger. */
-		nv_wr32(gr, 0x400040, 0x80);
-		nv_wr32(gr, 0x400040, 0);
-		nv_wr32(gr, 0x401800, 0xc0000000);
-		nv_wr32(gr, 0x400108, 0x008);
+		nvkm_wr32(device, 0x400040, 0x80);
+		nvkm_wr32(device, 0x400040, 0);
+		nvkm_wr32(device, 0x401800, 0xc0000000);
+		nvkm_wr32(device, 0x400108, 0x008);
 		status &= ~0x008;
 	}
 
 	/* CCACHE: Handles code and c[] caches and fills them. */
 	if (status & 0x010) {
-		ustatus = nv_rd32(gr, 0x405018) & 0x7fffffff;
+		ustatus = nvkm_rd32(device, 0x405018) & 0x7fffffff;
 		if (display) {
 			nv_error(gr, "TRAP_CCACHE");
 			nvkm_bitfield_print(nv50_gr_trap_ccache, ustatus);
 			pr_cont("\n");
 			nv_error(gr, "TRAP_CCACHE %08x %08x %08x %08x"
 				     " %08x %08x %08x\n",
-				nv_rd32(gr, 0x405000), nv_rd32(gr, 0x405004),
-				nv_rd32(gr, 0x405008), nv_rd32(gr, 0x40500c),
-				nv_rd32(gr, 0x405010), nv_rd32(gr, 0x405014),
-				nv_rd32(gr, 0x40501c));
+				nvkm_rd32(device, 0x405000), nvkm_rd32(device, 0x405004),
+				nvkm_rd32(device, 0x405008), nvkm_rd32(device, 0x40500c),
+				nvkm_rd32(device, 0x405010), nvkm_rd32(device, 0x405014),
+				nvkm_rd32(device, 0x40501c));
 
 		}
 
-		nv_wr32(gr, 0x405018, 0xc0000000);
-		nv_wr32(gr, 0x400108, 0x010);
+		nvkm_wr32(device, 0x405018, 0xc0000000);
+		nvkm_wr32(device, 0x400108, 0x010);
 		status &= ~0x010;
 	}
 
@@ -736,10 +741,10 @@
 	 * remaining, so try to handle it anyway. Perhaps related to that
 	 * unknown DMA slot on tesla? */
 	if (status & 0x20) {
-		ustatus = nv_rd32(gr, 0x402000) & 0x7fffffff;
+		ustatus = nvkm_rd32(device, 0x402000) & 0x7fffffff;
 		if (display)
 			nv_error(gr, "TRAP_UNKC04 0x%08x\n", ustatus);
-		nv_wr32(gr, 0x402000, 0xc0000000);
+		nvkm_wr32(device, 0x402000, 0xc0000000);
 		/* no status modifiction on purpose */
 	}
 
@@ -747,7 +752,7 @@
 	if (status & 0x040) {
 		nv50_gr_tp_trap(gr, 6, 0x408900, 0x408600, display,
 				    "TRAP_TEXTURE");
-		nv_wr32(gr, 0x400108, 0x040);
+		nvkm_wr32(device, 0x400108, 0x040);
 		status &= ~0x040;
 	}
 
@@ -755,7 +760,7 @@
 	if (status & 0x080) {
 		nv50_gr_tp_trap(gr, 7, 0x408314, 0x40831c, display,
 				    "TRAP_MP");
-		nv_wr32(gr, 0x400108, 0x080);
+		nvkm_wr32(device, 0x400108, 0x080);
 		status &= ~0x080;
 	}
 
@@ -764,14 +769,14 @@
 	if (status & 0x100) {
 		nv50_gr_tp_trap(gr, 8, 0x408e08, 0x408708, display,
 				    "TRAP_PROP");
-		nv_wr32(gr, 0x400108, 0x100);
+		nvkm_wr32(device, 0x400108, 0x100);
 		status &= ~0x100;
 	}
 
 	if (status) {
 		if (display)
 			nv_error(gr, "TRAP: unknown 0x%08x\n", status);
-		nv_wr32(gr, 0x400108, status);
+		nvkm_wr32(device, 0x400108, status);
 	}
 
 	return 1;
@@ -780,18 +785,19 @@
 static void
 nv50_gr_intr(struct nvkm_subdev *subdev)
 {
-	struct nvkm_fifo *fifo = nvkm_fifo(subdev);
+	struct nv50_gr *gr = (void *)subdev;
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	struct nvkm_fifo *fifo = device->fifo;
 	struct nvkm_engine *engine = nv_engine(subdev);
 	struct nvkm_object *engctx;
 	struct nvkm_handle *handle = NULL;
-	struct nv50_gr *gr = (void *)subdev;
-	u32 stat = nv_rd32(gr, 0x400100);
-	u32 inst = nv_rd32(gr, 0x40032c) & 0x0fffffff;
-	u32 addr = nv_rd32(gr, 0x400704);
+	u32 stat = nvkm_rd32(device, 0x400100);
+	u32 inst = nvkm_rd32(device, 0x40032c) & 0x0fffffff;
+	u32 addr = nvkm_rd32(device, 0x400704);
 	u32 subc = (addr & 0x00070000) >> 16;
 	u32 mthd = (addr & 0x00001ffc);
-	u32 data = nv_rd32(gr, 0x400708);
-	u32 class = nv_rd32(gr, 0x400814);
+	u32 data = nvkm_rd32(device, 0x400708);
+	u32 class = nvkm_rd32(device, 0x400814);
 	u32 show = stat, show_bitfield = stat;
 	int chid;
 
@@ -806,7 +812,7 @@
 	}
 
 	if (show & 0x00100000) {
-		u32 ecode = nv_rd32(gr, 0x400110);
+		u32 ecode = nvkm_rd32(device, 0x400110);
 		nv_error(gr, "DATA_ERROR ");
 		nvkm_enum_print(nv50_data_error_names, ecode);
 		pr_cont("\n");
@@ -820,8 +826,8 @@
 		show_bitfield &= ~0x00200000;
 	}
 
-	nv_wr32(gr, 0x400100, stat);
-	nv_wr32(gr, 0x400500, 0x00010001);
+	nvkm_wr32(device, 0x400100, stat);
+	nvkm_wr32(device, 0x400500, 0x00010001);
 
 	if (show) {
 		show &= show_bitfield;
@@ -836,8 +842,8 @@
 			 subc, class, mthd, data);
 	}
 
-	if (nv_rd32(gr, 0x400824) & (1 << 31))
-		nv_wr32(gr, 0x400824, nv_rd32(gr, 0x400824) & ~(1 << 31));
+	if (nvkm_rd32(device, 0x400824) & (1 << 31))
+		nvkm_wr32(device, 0x400824, nvkm_rd32(device, 0x400824) & ~(1 << 31));
 
 	nvkm_engctx_put(engctx);
 }
@@ -902,6 +908,7 @@
 nv50_gr_init(struct nvkm_object *object)
 {
 	struct nv50_gr *gr = (void *)object;
+	struct nvkm_device *device = gr->base.engine.subdev.device;
 	int ret, units, i;
 
 	ret = nvkm_gr_init(&gr->base);
@@ -909,66 +916,66 @@
 		return ret;
 
 	/* NV_PGRAPH_DEBUG_3_HW_CTX_SWITCH_ENABLED */
-	nv_wr32(gr, 0x40008c, 0x00000004);
+	nvkm_wr32(device, 0x40008c, 0x00000004);
 
 	/* reset/enable traps and interrupts */
-	nv_wr32(gr, 0x400804, 0xc0000000);
-	nv_wr32(gr, 0x406800, 0xc0000000);
-	nv_wr32(gr, 0x400c04, 0xc0000000);
-	nv_wr32(gr, 0x401800, 0xc0000000);
-	nv_wr32(gr, 0x405018, 0xc0000000);
-	nv_wr32(gr, 0x402000, 0xc0000000);
+	nvkm_wr32(device, 0x400804, 0xc0000000);
+	nvkm_wr32(device, 0x406800, 0xc0000000);
+	nvkm_wr32(device, 0x400c04, 0xc0000000);
+	nvkm_wr32(device, 0x401800, 0xc0000000);
+	nvkm_wr32(device, 0x405018, 0xc0000000);
+	nvkm_wr32(device, 0x402000, 0xc0000000);
 
-	units = nv_rd32(gr, 0x001540);
+	units = nvkm_rd32(device, 0x001540);
 	for (i = 0; i < 16; i++) {
 		if (!(units & (1 << i)))
 			continue;
 
 		if (nv_device(gr)->chipset < 0xa0) {
-			nv_wr32(gr, 0x408900 + (i << 12), 0xc0000000);
-			nv_wr32(gr, 0x408e08 + (i << 12), 0xc0000000);
-			nv_wr32(gr, 0x408314 + (i << 12), 0xc0000000);
+			nvkm_wr32(device, 0x408900 + (i << 12), 0xc0000000);
+			nvkm_wr32(device, 0x408e08 + (i << 12), 0xc0000000);
+			nvkm_wr32(device, 0x408314 + (i << 12), 0xc0000000);
 		} else {
-			nv_wr32(gr, 0x408600 + (i << 11), 0xc0000000);
-			nv_wr32(gr, 0x408708 + (i << 11), 0xc0000000);
-			nv_wr32(gr, 0x40831c + (i << 11), 0xc0000000);
+			nvkm_wr32(device, 0x408600 + (i << 11), 0xc0000000);
+			nvkm_wr32(device, 0x408708 + (i << 11), 0xc0000000);
+			nvkm_wr32(device, 0x40831c + (i << 11), 0xc0000000);
 		}
 	}
 
-	nv_wr32(gr, 0x400108, 0xffffffff);
-	nv_wr32(gr, 0x400138, 0xffffffff);
-	nv_wr32(gr, 0x400100, 0xffffffff);
-	nv_wr32(gr, 0x40013c, 0xffffffff);
-	nv_wr32(gr, 0x400500, 0x00010001);
+	nvkm_wr32(device, 0x400108, 0xffffffff);
+	nvkm_wr32(device, 0x400138, 0xffffffff);
+	nvkm_wr32(device, 0x400100, 0xffffffff);
+	nvkm_wr32(device, 0x40013c, 0xffffffff);
+	nvkm_wr32(device, 0x400500, 0x00010001);
 
 	/* upload context program, initialise ctxctl defaults */
 	ret = nv50_grctx_init(nv_device(gr), &gr->size);
 	if (ret)
 		return ret;
 
-	nv_wr32(gr, 0x400824, 0x00000000);
-	nv_wr32(gr, 0x400828, 0x00000000);
-	nv_wr32(gr, 0x40082c, 0x00000000);
-	nv_wr32(gr, 0x400830, 0x00000000);
-	nv_wr32(gr, 0x40032c, 0x00000000);
-	nv_wr32(gr, 0x400330, 0x00000000);
+	nvkm_wr32(device, 0x400824, 0x00000000);
+	nvkm_wr32(device, 0x400828, 0x00000000);
+	nvkm_wr32(device, 0x40082c, 0x00000000);
+	nvkm_wr32(device, 0x400830, 0x00000000);
+	nvkm_wr32(device, 0x40032c, 0x00000000);
+	nvkm_wr32(device, 0x400330, 0x00000000);
 
 	/* some unknown zcull magic */
 	switch (nv_device(gr)->chipset & 0xf0) {
 	case 0x50:
 	case 0x80:
 	case 0x90:
-		nv_wr32(gr, 0x402ca8, 0x00000800);
+		nvkm_wr32(device, 0x402ca8, 0x00000800);
 		break;
 	case 0xa0:
 	default:
 		if (nv_device(gr)->chipset == 0xa0 ||
 		    nv_device(gr)->chipset == 0xaa ||
 		    nv_device(gr)->chipset == 0xac) {
-			nv_wr32(gr, 0x402ca8, 0x00000802);
+			nvkm_wr32(device, 0x402ca8, 0x00000802);
 		} else {
-			nv_wr32(gr, 0x402cc0, 0x00000000);
-			nv_wr32(gr, 0x402ca8, 0x00000002);
+			nvkm_wr32(device, 0x402cc0, 0x00000000);
+			nvkm_wr32(device, 0x402ca8, 0x00000002);
 		}
 
 		break;
@@ -976,10 +983,10 @@
 
 	/* zero out zcull regions */
 	for (i = 0; i < 8; i++) {
-		nv_wr32(gr, 0x402c20 + (i * 0x10), 0x00000000);
-		nv_wr32(gr, 0x402c24 + (i * 0x10), 0x00000000);
-		nv_wr32(gr, 0x402c28 + (i * 0x10), 0x00000000);
-		nv_wr32(gr, 0x402c2c + (i * 0x10), 0x00000000);
+		nvkm_wr32(device, 0x402c20 + (i * 0x10), 0x00000000);
+		nvkm_wr32(device, 0x402c24 + (i * 0x10), 0x00000000);
+		nvkm_wr32(device, 0x402c28 + (i * 0x10), 0x00000000);
+		nvkm_wr32(device, 0x402c2c + (i * 0x10), 0x00000000);
 	}
 	return 0;
 }