drm/nvc0/gr: update fuc source to assemble with latest envyas

Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
diff --git a/drivers/gpu/drm/nouveau/nvc0_graph.fuc b/drivers/gpu/drm/nouveau/nvc0_graph.fuc
index 2a4b6dc..e6b2288 100644
--- a/drivers/gpu/drm/nouveau/nvc0_graph.fuc
+++ b/drivers/gpu/drm/nouveau/nvc0_graph.fuc
@@ -71,9 +71,9 @@
 	ld b32 $r9 D[$r13 + 0x4]	// PUT
 	xor $r8 8
 	cmpu b32 $r8 $r9
-	bra ne queue_put_next
+	bra ne #queue_put_next
 		mov $r15 E_CMD_OVERFLOW
-		call error
+		call #error
 		ret
 
 	// store cmd/data on queue
@@ -104,7 +104,7 @@
 	ld b32 $r8 D[$r13 + 0x0]	// GET
 	ld b32 $r9 D[$r13 + 0x4]	// PUT
 	cmpu b32 $r8 $r9
-	bra e queue_get_done
+	bra e #queue_get_done
 		// fetch first cmd/data pair
 		and $r9 $r8 7
 		shl b32 $r9 3
@@ -135,9 +135,9 @@
 	nv_rd32_wait:
 		iord $r12 I[$r11 + 0x000]
 		xbit $r12 $r12 31
-		bra ne nv_rd32_wait
+		bra ne #nv_rd32_wait
 	mov $r10 6			// DONE_MMIO_RD
-	call wait_doneo
+	call #wait_doneo
 	iord $r15 I[$r11 + 0x100]	// MMIO_RDVAL
 	ret
 
@@ -157,7 +157,7 @@
 	nv_wr32_wait:
 		iord $r12 I[$r11 + 0x000]
 		xbit $r12 $r12 31
-		bra ne nv_wr32_wait
+		bra ne #nv_wr32_wait
 	ret
 
 // (re)set watchdog timer
@@ -193,7 +193,7 @@
 		shl b32 $r8 6
 		iord $r8 I[$r8 + 0x000]	// DONE
 		xbit $r8 $r8 $r10
-		bra $2 wait_done_$1
+		bra $2 #wait_done_$1
 	trace_clr(T_WAIT)
 	ret
 ')
@@ -216,7 +216,7 @@
 		add b32 $r9 $r8
 		add b32 $r14 4
 		cmpu b32 $r14 $r15
-		bra ne nv_mmctx_size_loop
+		bra ne #nv_mmctx_size_loop
 	mov b32 $r15 $r9
 	ret
 
@@ -238,12 +238,12 @@
 	shl b32 $r8 6
 	clear b32 $r9
 	or $r11 $r11
-	bra e mmctx_base_disabled
+	bra e #mmctx_base_disabled
 		iowr I[$r8 + 0x000] $r11	// MMCTX_BASE
 		bset $r9 0			// BASE_EN
 	mmctx_base_disabled:
 	or $r14 $r14
-	bra e mmctx_multi_disabled
+	bra e #mmctx_multi_disabled
 		iowr I[$r8 + 0x200] $r14 	// MMCTX_MULTI_STRIDE
 		iowr I[$r8 + 0x300] $r15 	// MMCTX_MULTI_MASK
 		bset $r9 1			// MULTI_EN
@@ -264,7 +264,7 @@
 		mmctx_wait_free:
 			iord $r14 I[$r8 + 0x000] // MMCTX_CTRL
 			and $r14 0x1f
-			bra e mmctx_wait_free
+			bra e #mmctx_wait_free
 
 		// queue up an entry
 		ld b32 $r14 D[$r12]
@@ -272,19 +272,19 @@
 		iowr I[$r8 + 0x300] $r14
 		add b32 $r12 4
 		cmpu b32 $r12 $r13
-		bra ne mmctx_exec_loop
+		bra ne #mmctx_exec_loop
 
 	xbit $r11 $r10 2
-	bra ne mmctx_stop
+	bra ne #mmctx_stop
 		// wait for queue to empty
 		mmctx_fini_wait:
 			iord $r11 I[$r8 + 0x000]	// MMCTX_CTRL
 			and $r11 0x1f
 			cmpu b32 $r11 0x10
-			bra ne mmctx_fini_wait
+			bra ne #mmctx_fini_wait
 		mov $r10 2				// DONE_MMCTX
-		call wait_donez
-		bra mmctx_done
+		call #wait_donez
+		bra #mmctx_done
 	mmctx_stop:
 		xbit $r11 $r10 0
 		shl b32 $r11 16			// DIR
@@ -295,7 +295,7 @@
 			// wait for STOP_TRIGGER to clear
 			iord $r11 I[$r8 + 0x000] // MMCTX_CTRL
 			xbit $r11 $r11 18
-			bra ne mmctx_stop_wait
+			bra ne #mmctx_stop_wait
 	mmctx_done:
 	trace_clr(T_MMCTX)
 	ret
@@ -305,7 +305,7 @@
 strand_wait:
 	push $r10
 	mov $r10 2
-	call wait_donez
+	call #wait_donez
 	pop $r10
 	ret
 
@@ -316,7 +316,7 @@
 	sethi $r8 0x20000
 	mov $r9 0xc
 	iowr I[$r8] $r9
-	call strand_wait
+	call #strand_wait
 	ret
 
 // unknown - call after issuing strand commands
@@ -326,7 +326,7 @@
 	sethi $r8 0x20000
 	mov $r9 0xd
 	iowr I[$r8] $r9
-	call strand_wait
+	call #strand_wait
 	ret
 
 // Selects strand set?!
@@ -341,11 +341,11 @@
 	iowr I[$r10 + 0x000] $r12		// 0x93c = 0xf
 	mov $r12 0xb
 	iowr I[$r11 + 0x000] $r12		// 0x928 = 0xb
-	call strand_wait
+	call #strand_wait
 	iowr I[$r10 + 0x000] $r14		// 0x93c = <id>
 	mov $r12 0xa
 	iowr I[$r11 + 0x000] $r12		// 0x928 = 0xa
-	call strand_wait
+	call #strand_wait
 	ret
 
 // Initialise strand context data
@@ -357,22 +357,22 @@
 //
 strand_ctx_init:
 	trace_set(T_STRINIT)
-	call strand_pre
+	call #strand_pre
 	mov $r14 3
-	call strand_set
+	call #strand_set
 	mov $r10 0x46fc
 	sethi $r10 0x20000
 	add b32 $r11 $r10 0x400
 	iowr I[$r10 + 0x100] $r0	// STRAND_FIRST_GENE = 0
 	mov $r12 1
 	iowr I[$r11 + 0x000] $r12	// STRAND_CMD = LATCH_FIRST_GENE
-	call strand_wait
+	call #strand_wait
 	sub b32 $r12 $r0 1
 	iowr I[$r10 + 0x000] $r12	// STRAND_GENE_CNT = 0xffffffff
 	mov $r12 2
 	iowr I[$r11 + 0x000] $r12	// STRAND_CMD = LATCH_GENE_CNT
-	call strand_wait
-	call strand_post
+	call #strand_wait
+	call #strand_post
 
 	// read the size of each strand, poke the context offset of
 	// each into STRAND_{SAVE,LOAD}_SWBASE now, no need to worry
@@ -391,7 +391,7 @@
 		add b32 $r14 $r10
 		add b32 $r8 4
 		sub b32 $r9 1
-		bra ne ctx_init_strand_loop
+		bra ne #ctx_init_strand_loop
 
 	shl b32 $r14 8
 	sub b32 $r15 $r14 $r15
diff --git a/drivers/gpu/drm/nouveau/nvc0_grgpc.fuc b/drivers/gpu/drm/nouveau/nvc0_grgpc.fuc
index 06f5e26..a9e93c8 100644
--- a/drivers/gpu/drm/nouveau/nvc0_grgpc.fuc
+++ b/drivers/gpu/drm/nouveau/nvc0_grgpc.fuc
@@ -32,7 +32,7 @@
  * - watchdog timer around ctx operations
  */
 
-.section nvc0_grgpc_data
+.section #nvc0_grgpc_data
 include(`nvc0_graph.fuc')
 gpc_id:			.b32 0
 gpc_mmio_list_head:	.b32 0
@@ -48,40 +48,40 @@
 // chipset descriptions
 chipsets:
 .b8  0xc0 0 0 0
-.b16 nvc0_gpc_mmio_head
-.b16 nvc0_gpc_mmio_tail
-.b16 nvc0_tpc_mmio_head
-.b16 nvc0_tpc_mmio_tail
+.b16 #nvc0_gpc_mmio_head
+.b16 #nvc0_gpc_mmio_tail
+.b16 #nvc0_tpc_mmio_head
+.b16 #nvc0_tpc_mmio_tail
 .b8  0xc1 0 0 0
-.b16 nvc0_gpc_mmio_head
-.b16 nvc1_gpc_mmio_tail
-.b16 nvc0_tpc_mmio_head
-.b16 nvc1_tpc_mmio_tail
+.b16 #nvc0_gpc_mmio_head
+.b16 #nvc1_gpc_mmio_tail
+.b16 #nvc0_tpc_mmio_head
+.b16 #nvc1_tpc_mmio_tail
 .b8  0xc3 0 0 0
-.b16 nvc0_gpc_mmio_head
-.b16 nvc0_gpc_mmio_tail
-.b16 nvc0_tpc_mmio_head
-.b16 nvc3_tpc_mmio_tail
+.b16 #nvc0_gpc_mmio_head
+.b16 #nvc0_gpc_mmio_tail
+.b16 #nvc0_tpc_mmio_head
+.b16 #nvc3_tpc_mmio_tail
 .b8  0xc4 0 0 0
-.b16 nvc0_gpc_mmio_head
-.b16 nvc0_gpc_mmio_tail
-.b16 nvc0_tpc_mmio_head
-.b16 nvc3_tpc_mmio_tail
+.b16 #nvc0_gpc_mmio_head
+.b16 #nvc0_gpc_mmio_tail
+.b16 #nvc0_tpc_mmio_head
+.b16 #nvc3_tpc_mmio_tail
 .b8  0xc8 0 0 0
-.b16 nvc0_gpc_mmio_head
-.b16 nvc0_gpc_mmio_tail
-.b16 nvc0_tpc_mmio_head
-.b16 nvc0_tpc_mmio_tail
+.b16 #nvc0_gpc_mmio_head
+.b16 #nvc0_gpc_mmio_tail
+.b16 #nvc0_tpc_mmio_head
+.b16 #nvc0_tpc_mmio_tail
 .b8  0xce 0 0 0
-.b16 nvc0_gpc_mmio_head
-.b16 nvc0_gpc_mmio_tail
-.b16 nvc0_tpc_mmio_head
-.b16 nvc3_tpc_mmio_tail
+.b16 #nvc0_gpc_mmio_head
+.b16 #nvc0_gpc_mmio_tail
+.b16 #nvc0_tpc_mmio_head
+.b16 #nvc3_tpc_mmio_tail
 .b8  0xcf 0 0 0
-.b16 nvc0_gpc_mmio_head
-.b16 nvc0_gpc_mmio_tail
-.b16 nvc0_tpc_mmio_head
-.b16 nvcf_tpc_mmio_tail
+.b16 #nvc0_gpc_mmio_head
+.b16 #nvc0_gpc_mmio_tail
+.b16 #nvc0_tpc_mmio_head
+.b16 #nvcf_tpc_mmio_tail
 .b8  0 0 0 0
 
 // GPC mmio lists
@@ -147,8 +147,8 @@
 nvc1_tpc_mmio_tail:
 
 
-.section nvc0_grgpc_code
-bra init
+.section #nvc0_grgpc_code
+bra #init
 define(`include_code')
 include(`nvc0_graph.fuc')
 
@@ -160,10 +160,10 @@
 	push $r14
 	mov $r14 -0x67ec 	// 0x9814
 	sethi $r14 0x400000
-	call nv_wr32		// HUB_CTXCTL_CC_SCRATCH[5] = error code
+	call #nv_wr32		// HUB_CTXCTL_CC_SCRATCH[5] = error code
 	add b32 $r14 0x41c
 	mov $r15 1
-	call nv_wr32		// HUB_CTXCTL_INTR_UP_SET
+	call #nv_wr32		// HUB_CTXCTL_INTR_UP_SET
 	pop $r14
 	ret
 
@@ -190,7 +190,7 @@
 	iowr I[$r1 + 0x000] $r2		// FIFO_ENABLE
 
 	// setup i0 handler, and route all interrupts to it
-	mov $r1 ih
+	mov $r1 #ih
 	mov $iv0 $r1
 	mov $r1 0x400
 	iowr I[$r1 + 0x300] $r0		// INTR_DISPATCH
@@ -210,24 +210,24 @@
 	and $r2 0x1f
 	shl b32 $r3 $r2
 	sub b32 $r3 1
-	st b32 D[$r0 + tpc_count] $r2
-	st b32 D[$r0 + tpc_mask] $r3
+	st b32 D[$r0 + #tpc_count] $r2
+	st b32 D[$r0 + #tpc_mask] $r3
 	add b32 $r1 0x400
 	iord $r2 I[$r1 + 0x000]		// MYINDEX
-	st b32 D[$r0 + gpc_id] $r2
+	st b32 D[$r0 + #gpc_id] $r2
 
 	// find context data for this chipset
 	mov $r2 0x800
 	shl b32 $r2 6
 	iord $r2 I[$r2 + 0x000]		// CC_SCRATCH[0]
-	mov $r1 chipsets - 12
+	mov $r1 #chipsets - 12
 	init_find_chipset:
 		add b32 $r1 12
 		ld b32 $r3 D[$r1 + 0x00]
 		cmpu b32 $r3 $r2
-		bra e init_context
+		bra e #init_context
 		cmpu b32 $r3 0
-		bra ne init_find_chipset
+		bra ne #init_find_chipset
 		// unknown chipset
 		ret
 
@@ -253,19 +253,19 @@
 	clear b32 $r15
 	ld b16 $r14 D[$r1 + 4]
 	ld b16 $r15 D[$r1 + 6]
-	st b16 D[$r0 + gpc_mmio_list_head] $r14
-	st b16 D[$r0 + gpc_mmio_list_tail] $r15
-	call mmctx_size
+	st b16 D[$r0 + #gpc_mmio_list_head] $r14
+	st b16 D[$r0 + #gpc_mmio_list_tail] $r15
+	call #mmctx_size
 	add b32 $r2 $r15
 	add b32 $r3 $r15
 
 	// calculate per-TPC mmio context size, store the list pointers
 	ld b16 $r14 D[$r1 + 8]
 	ld b16 $r15 D[$r1 + 10]
-	st b16 D[$r0 + tpc_mmio_list_head] $r14
-	st b16 D[$r0 + tpc_mmio_list_tail] $r15
-	call mmctx_size
-	ld b32 $r14 D[$r0 + tpc_count]
+	st b16 D[$r0 + #tpc_mmio_list_head] $r14
+	st b16 D[$r0 + #tpc_mmio_list_tail] $r15
+	call #mmctx_size
+	ld b32 $r14 D[$r0 + #tpc_count]
 	mulu $r14 $r15
 	add b32 $r2 $r14
 	add b32 $r3 $r14
@@ -283,7 +283,7 @@
 
 	// calculate size of strand context data
 	mov b32 $r15 $r2
-	call strand_ctx_init
+	call #strand_ctx_init
 	add b32 $r3 $r15
 
 	// save context size, and tell HUB we're done
@@ -301,13 +301,13 @@
 main:
 	bset $flags $p0
 	sleep $p0
-	mov $r13 cmd_queue
-	call queue_get
-	bra $p1 main
+	mov $r13 #cmd_queue
+	call #queue_get
+	bra $p1 #main
 
 	// 0x0000-0x0003 are all context transfers
 	cmpu b32 $r14 0x04
-	bra nc main_not_ctx_xfer
+	bra nc #main_not_ctx_xfer
 		// fetch $flags and mask off $p1/$p2
 		mov $r1 $flags
 		mov $r2 0x0006
@@ -318,14 +318,14 @@
 		or $r1 $r14
 		mov $flags $r1
 		// transfer context data
-		call ctx_xfer
-		bra main
+		call #ctx_xfer
+		bra #main
 
 	main_not_ctx_xfer:
 	shl b32 $r15 $r14 16
 	or $r15 E_BAD_COMMAND
-	call error
-	bra main
+	call #error
+	bra #main
 
 // interrupt handler
 ih:
@@ -342,13 +342,13 @@
 	// incoming fifo command?
 	iord $r10 I[$r0 + 0x200]	// INTR
 	and $r11 $r10 0x00000004
-	bra e ih_no_fifo
+	bra e #ih_no_fifo
 		// queue incoming fifo command for later processing
 		mov $r11 0x1900
-		mov $r13 cmd_queue
+		mov $r13 #cmd_queue
 		iord $r14 I[$r11 + 0x100]	// FIFO_CMD
 		iord $r15 I[$r11 + 0x000]	// FIFO_DATA
-		call queue_put
+		call #queue_put
 		add b32 $r11 0x400
 		mov $r14 1
 		iowr I[$r11 + 0x000] $r14	// FIFO_ACK
@@ -374,11 +374,11 @@
 //
 hub_barrier_done:
 	mov $r15 1
-	ld b32 $r14 D[$r0 + gpc_id]
+	ld b32 $r14 D[$r0 + #gpc_id]
 	shl b32 $r15 $r14
 	mov $r14 -0x6be8 	// 0x409418 - HUB_BAR_SET
 	sethi $r14 0x400000
-	call nv_wr32
+	call #nv_wr32
 	ret
 
 // Disables various things, waits a bit, and re-enables them..
@@ -395,7 +395,7 @@
 	mov $r15 8
 	ctx_redswitch_delay:
 		sub b32 $r15 1
-		bra ne ctx_redswitch_delay
+		bra ne #ctx_redswitch_delay
 	mov $r15 0xa20
 	iowr I[$r14] $r15	// GPC_RED_SWITCH = UNK11, ENABLE, POWER
 	ret
@@ -413,8 +413,8 @@
 	mov $r1 0xa04
 	shl b32 $r1 6
 	iowr I[$r1 + 0x000] $r15// MEM_BASE
-	bra not $p1 ctx_xfer_not_load
-		call ctx_redswitch
+	bra not $p1 #ctx_xfer_not_load
+		call #ctx_redswitch
 	ctx_xfer_not_load:
 
 	// strands
@@ -422,7 +422,7 @@
 	sethi $r1 0x20000
 	mov $r2 0xc
 	iowr I[$r1] $r2		// STRAND_CMD(0x3f) = 0x0c
-	call strand_wait
+	call #strand_wait
 	mov $r2 0x47fc
 	sethi $r2 0x20000
 	iowr I[$r2] $r0		// STRAND_FIRST_GENE(0x3f) = 0x00
@@ -435,46 +435,46 @@
 	or $r10 2		// first
 	mov $r11 0x0000
 	sethi $r11 0x500000
-	ld b32 $r12 D[$r0 + gpc_id]
+	ld b32 $r12 D[$r0 + #gpc_id]
 	shl b32 $r12 15
 	add b32 $r11 $r12	// base = NV_PGRAPH_GPCn
-	ld b32 $r12 D[$r0 + gpc_mmio_list_head]
-	ld b32 $r13 D[$r0 + gpc_mmio_list_tail]
+	ld b32 $r12 D[$r0 + #gpc_mmio_list_head]
+	ld b32 $r13 D[$r0 + #gpc_mmio_list_tail]
 	mov $r14 0		// not multi
-	call mmctx_xfer
+	call #mmctx_xfer
 
 	// per-TPC mmio context
 	xbit $r10 $flags $p1	// direction
 	or $r10 4		// last
 	mov $r11 0x4000
 	sethi $r11 0x500000	// base = NV_PGRAPH_GPC0_TPC0
-	ld b32 $r12 D[$r0 + gpc_id]
+	ld b32 $r12 D[$r0 + #gpc_id]
 	shl b32 $r12 15
 	add b32 $r11 $r12	// base = NV_PGRAPH_GPCn_TPC0
-	ld b32 $r12 D[$r0 + tpc_mmio_list_head]
-	ld b32 $r13 D[$r0 + tpc_mmio_list_tail]
-	ld b32 $r15 D[$r0 + tpc_mask]
+	ld b32 $r12 D[$r0 + #tpc_mmio_list_head]
+	ld b32 $r13 D[$r0 + #tpc_mmio_list_tail]
+	ld b32 $r15 D[$r0 + #tpc_mask]
 	mov $r14 0x800		// stride = 0x800
-	call mmctx_xfer
+	call #mmctx_xfer
 
 	// wait for strands to finish
-	call strand_wait
+	call #strand_wait
 
 	// if load, or a save without a load following, do some
 	// unknown stuff that's done after finishing a block of
 	// strand commands
-	bra $p1 ctx_xfer_post
-	bra not $p2 ctx_xfer_done
+	bra $p1 #ctx_xfer_post
+	bra not $p2 #ctx_xfer_done
 	ctx_xfer_post:
 		mov $r1 0x4afc
 		sethi $r1 0x20000
 		mov $r2 0xd
 		iowr I[$r1] $r2		// STRAND_CMD(0x3f) = 0x0d
-		call strand_wait
+		call #strand_wait
 
 	// mark completion in HUB's barrier
 	ctx_xfer_done:
-	call hub_barrier_done
+	call #hub_barrier_done
 	ret
 
 .align 256
diff --git a/drivers/gpu/drm/nouveau/nvc0_grhub.fuc b/drivers/gpu/drm/nouveau/nvc0_grhub.fuc
index e4f8c7e..3ea3196 100644
--- a/drivers/gpu/drm/nouveau/nvc0_grhub.fuc
+++ b/drivers/gpu/drm/nouveau/nvc0_grhub.fuc
@@ -27,7 +27,7 @@
  *    m4 nvc0_grhub.fuc | envyas -a -w -m fuc -V nva3 -o nvc0_grhub.fuc.h
  */
 
-.section nvc0_grhub_data
+.section #nvc0_grhub_data
 include(`nvc0_graph.fuc')
 gpc_count:		.b32 0
 rop_count:		.b32 0
@@ -39,26 +39,26 @@
 
 chipsets:
 .b8  0xc0 0 0 0
-.b16 nvc0_hub_mmio_head
-.b16 nvc0_hub_mmio_tail
+.b16 #nvc0_hub_mmio_head
+.b16 #nvc0_hub_mmio_tail
 .b8  0xc1 0 0 0
-.b16 nvc0_hub_mmio_head
-.b16 nvc1_hub_mmio_tail
+.b16 #nvc0_hub_mmio_head
+.b16 #nvc1_hub_mmio_tail
 .b8  0xc3 0 0 0
-.b16 nvc0_hub_mmio_head
-.b16 nvc0_hub_mmio_tail
+.b16 #nvc0_hub_mmio_head
+.b16 #nvc0_hub_mmio_tail
 .b8  0xc4 0 0 0
-.b16 nvc0_hub_mmio_head
-.b16 nvc0_hub_mmio_tail
+.b16 #nvc0_hub_mmio_head
+.b16 #nvc0_hub_mmio_tail
 .b8  0xc8 0 0 0
-.b16 nvc0_hub_mmio_head
-.b16 nvc0_hub_mmio_tail
+.b16 #nvc0_hub_mmio_head
+.b16 #nvc0_hub_mmio_tail
 .b8  0xce 0 0 0
-.b16 nvc0_hub_mmio_head
-.b16 nvc0_hub_mmio_tail
+.b16 #nvc0_hub_mmio_head
+.b16 #nvc0_hub_mmio_tail
 .b8  0xcf 0 0 0
-.b16 nvc0_hub_mmio_head
-.b16 nvc0_hub_mmio_tail
+.b16 #nvc0_hub_mmio_head
+.b16 #nvc0_hub_mmio_tail
 .b8  0 0 0 0
 
 nvc0_hub_mmio_head:
@@ -113,8 +113,8 @@
 .align 256
 xfer_data: 		.b32 0
 
-.section nvc0_grhub_code
-bra init
+.section #nvc0_grhub_code
+bra #init
 define(`include_code')
 include(`nvc0_graph.fuc')
 
@@ -157,7 +157,7 @@
 	iowr I[$r1 + 0x000] $r2	// FIFO_ENABLE
 
 	// setup i0 handler, and route all interrupts to it
-	mov $r1 ih
+	mov $r1 #ih
 	mov $iv0 $r1
 	mov $r1 0x400
 	iowr I[$r1 + 0x300] $r0	// INTR_DISPATCH
@@ -201,11 +201,11 @@
 	// fetch enabled GPC/ROP counts
 	mov $r14 -0x69fc	// 0x409604
 	sethi $r14 0x400000
-	call nv_rd32
+	call #nv_rd32
 	extr $r1 $r15 16:20
-	st b32 D[$r0 + rop_count] $r1
+	st b32 D[$r0 + #rop_count] $r1
 	and $r15 0x1f
-	st b32 D[$r0 + gpc_count] $r15
+	st b32 D[$r0 + #gpc_count] $r15
 
 	// set BAR_REQMASK to GPC mask
 	mov $r1 1
@@ -220,14 +220,14 @@
 	mov $r2 0x800
 	shl b32 $r2 6
 	iord $r2 I[$r2 + 0x000]		// CC_SCRATCH[0]
-	mov $r15 chipsets - 8
+	mov $r15 #chipsets - 8
 	init_find_chipset:
 		add b32 $r15 8
 		ld b32 $r3 D[$r15 + 0x00]
 		cmpu b32 $r3 $r2
-		bra e init_context
+		bra e #init_context
 		cmpu b32 $r3 0
-		bra ne init_find_chipset
+		bra ne #init_find_chipset
 		// unknown chipset
 		ret
 
@@ -239,9 +239,9 @@
 	ld b16 $r14 D[$r15 + 4]
 	ld b16 $r15 D[$r15 + 6]
 	sethi $r14 0
-	st b32 D[$r0 + hub_mmio_list_head] $r14
-	st b32 D[$r0 + hub_mmio_list_tail] $r15
-	call mmctx_size
+	st b32 D[$r0 + #hub_mmio_list_head] $r14
+	st b32 D[$r0 + #hub_mmio_list_tail] $r15
+	call #mmctx_size
 
 	// set mmctx base addresses now so we don't have to do it later,
 	// they don't (currently) ever change
@@ -260,7 +260,7 @@
 	add b32 $r1 1
 	shl b32 $r1 8
 	mov b32 $r15 $r1
-	call strand_ctx_init
+	call #strand_ctx_init
 	add b32 $r1 $r15
 
 	// initialise each GPC in sequence by passing in the offset of its
@@ -271,40 +271,40 @@
 	// when it has completed, and return the size of its context data
 	// in GPCn_CC_SCRATCH[1]
 	//
-	ld b32 $r3 D[$r0 + gpc_count]
+	ld b32 $r3 D[$r0 + #gpc_count]
 	mov $r4 0x2000
 	sethi $r4 0x500000
 	init_gpc:
 		// setup, and start GPC ucode running
 		add b32 $r14 $r4 0x804
 		mov b32 $r15 $r1
-		call nv_wr32			// CC_SCRATCH[1] = ctx offset
+		call #nv_wr32			// CC_SCRATCH[1] = ctx offset
 		add b32 $r14 $r4 0x800
 		mov b32 $r15 $r2
-		call nv_wr32			// CC_SCRATCH[0] = chipset
+		call #nv_wr32			// CC_SCRATCH[0] = chipset
 		add b32 $r14 $r4 0x10c
 		clear b32 $r15
-		call nv_wr32
+		call #nv_wr32
 		add b32 $r14 $r4 0x104
-		call nv_wr32			// ENTRY
+		call #nv_wr32			// ENTRY
 		add b32 $r14 $r4 0x100
 		mov $r15 2			// CTRL_START_TRIGGER
-		call nv_wr32			// CTRL
+		call #nv_wr32			// CTRL
 
 		// wait for it to complete, and adjust context size
 		add b32 $r14 $r4 0x800
 		init_gpc_wait:
-			call nv_rd32
+			call #nv_rd32
 			xbit $r15 $r15 31
-			bra e init_gpc_wait
+			bra e #init_gpc_wait
 		add b32 $r14 $r4 0x804
-		call nv_rd32
+		call #nv_rd32
 		add b32 $r1 $r15
 
 		// next!
 		add b32 $r4 0x8000
 		sub b32 $r3 1
-		bra ne init_gpc
+		bra ne #init_gpc
 
 	// save context size, and tell host we're ready
 	mov $r2 0x800
@@ -322,13 +322,13 @@
 	// sleep until we have something to do
 	bset $flags $p0
 	sleep $p0
-	mov $r13 cmd_queue
-	call queue_get
-	bra $p1 main
+	mov $r13 #cmd_queue
+	call #queue_get
+	bra $p1 #main
 
 	// context switch, requested by GPU?
 	cmpu b32 $r14 0x4001
-	bra ne main_not_ctx_switch
+	bra ne #main_not_ctx_switch
 		trace_set(T_AUTO)
 		mov $r1 0xb00
 		shl b32 $r1 6
@@ -336,39 +336,39 @@
 		iord $r1 I[$r1 + 0x000]		// CHAN_CUR
 
 		xbit $r3 $r1 31
-		bra e chsw_no_prev
+		bra e #chsw_no_prev
 			xbit $r3 $r2 31
-			bra e chsw_prev_no_next
+			bra e #chsw_prev_no_next
 				push $r2
 				mov b32 $r2 $r1
 				trace_set(T_SAVE)
 				bclr $flags $p1
 				bset $flags $p2
-				call ctx_xfer
+				call #ctx_xfer
 				trace_clr(T_SAVE);
 				pop $r2
 				trace_set(T_LOAD);
 				bset $flags $p1
-				call ctx_xfer
+				call #ctx_xfer
 				trace_clr(T_LOAD);
-				bra chsw_done
+				bra #chsw_done
 			chsw_prev_no_next:
 				push $r2
 				mov b32 $r2 $r1
 				bclr $flags $p1
 				bclr $flags $p2
-				call ctx_xfer
+				call #ctx_xfer
 				pop $r2
 				mov $r1 0xb00
 				shl b32 $r1 6
 				iowr I[$r1] $r2
-				bra chsw_done
+				bra #chsw_done
 		chsw_no_prev:
 			xbit $r3 $r2 31
-			bra e chsw_done
+			bra e #chsw_done
 				bset $flags $p1
 				bclr $flags $p2
-				call ctx_xfer
+				call #ctx_xfer
 
 		// ack the context switch request
 		chsw_done:
@@ -377,32 +377,32 @@
 		mov $r2 1
 		iowr I[$r1 + 0x000] $r2		// 0x409b0c
 		trace_clr(T_AUTO)
-		bra main
+		bra #main
 
 	// request to set current channel? (*not* a context switch)
 	main_not_ctx_switch:
 	cmpu b32 $r14 0x0001
-	bra ne main_not_ctx_chan
+	bra ne #main_not_ctx_chan
 		mov b32 $r2 $r15
-		call ctx_chan
-		bra main_done
+		call #ctx_chan
+		bra #main_done
 
 	// request to store current channel context?
 	main_not_ctx_chan:
 	cmpu b32 $r14 0x0002
-	bra ne main_not_ctx_save
+	bra ne #main_not_ctx_save
 		trace_set(T_SAVE)
 		bclr $flags $p1
 		bclr $flags $p2
-		call ctx_xfer
+		call #ctx_xfer
 		trace_clr(T_SAVE)
-		bra main_done
+		bra #main_done
 
 	main_not_ctx_save:
 		shl b32 $r15 $r14 16
 		or $r15 E_BAD_COMMAND
-		call error
-		bra main
+		call #error
+		bra #main
 
 	main_done:
 	mov $r1 0x820
@@ -410,7 +410,7 @@
 	clear b32 $r2
 	bset $r2 31
 	iowr I[$r1 + 0x000] $r2		// CC_SCRATCH[0] |= 0x80000000
-	bra main
+	bra #main
 
 // interrupt handler
 ih:
@@ -427,13 +427,13 @@
 	// incoming fifo command?
 	iord $r10 I[$r0 + 0x200]	// INTR
 	and $r11 $r10 0x00000004
-	bra e ih_no_fifo
+	bra e #ih_no_fifo
 		// queue incoming fifo command for later processing
 		mov $r11 0x1900
-		mov $r13 cmd_queue
+		mov $r13 #cmd_queue
 		iord $r14 I[$r11 + 0x100]	// FIFO_CMD
 		iord $r15 I[$r11 + 0x000]	// FIFO_DATA
-		call queue_put
+		call #queue_put
 		add b32 $r11 0x400
 		mov $r14 1
 		iowr I[$r11 + 0x000] $r14	// FIFO_ACK
@@ -441,18 +441,18 @@
 	// context switch request?
 	ih_no_fifo:
 	and $r11 $r10 0x00000100
-	bra e ih_no_ctxsw
+	bra e #ih_no_ctxsw
 		// enqueue a context switch for later processing
-		mov $r13 cmd_queue
+		mov $r13 #cmd_queue
 		mov $r14 0x4001
-		call queue_put
+		call #queue_put
 
 	// anything we didn't handle, bring it to the host's attention
 	ih_no_ctxsw:
 	mov $r11 0x104
 	not b32 $r11
 	and $r11 $r10 $r11
-	bra e ih_no_other
+	bra e #ih_no_other
 		mov $r10 0xc1c
 		shl b32 $r10 6
 		iowr I[$r10] $r11	// INTR_UP_SET
@@ -478,11 +478,11 @@
 	mov $r14 0x4160
 	sethi $r14 0x400000
 	mov $r15 1
-	call nv_wr32
+	call #nv_wr32
 	ctx_4160s_wait:
-		call nv_rd32
+		call #nv_rd32
 		xbit $r15 $r15 4
-		bra e ctx_4160s_wait
+		bra e #ctx_4160s_wait
 	ret
 
 // Without clearing again at end of xfer, some things cause PGRAPH
@@ -492,7 +492,7 @@
 	mov $r14 0x4160
 	sethi $r14 0x400000
 	clear b32 $r15
-	call nv_wr32
+	call #nv_wr32
 	ret
 
 // Again, not real sure
@@ -503,7 +503,7 @@
 	mov $r14 0x4170
 	sethi $r14 0x400000
 	or $r15 0x10
-	call nv_wr32
+	call #nv_wr32
 	ret
 
 // Waits for a ctx_4170s() call to complete
@@ -511,9 +511,9 @@
 ctx_4170w:
 	mov $r14 0x4170
 	sethi $r14 0x400000
-	call nv_rd32
+	call #nv_rd32
 	and $r15 0x10
-	bra ne ctx_4170w
+	bra ne #ctx_4170w
 	ret
 
 // Disables various things, waits a bit, and re-enables them..
@@ -530,7 +530,7 @@
 	mov $r15 8
 	ctx_redswitch_delay:
 		sub b32 $r15 1
-		bra ne ctx_redswitch_delay
+		bra ne #ctx_redswitch_delay
 	mov $r15 0x770
 	iowr I[$r14] $r15	// HUB_RED_SWITCH = ENABLE_ALL, POWER_ALL
 	ret
@@ -546,10 +546,10 @@
 	iowr I[$r14] $r15	// HUB(0x86c) = val
 	mov $r14 -0x75ec
 	sethi $r14 0x400000
-	call nv_wr32		// ROP(0xa14) = val
+	call #nv_wr32		// ROP(0xa14) = val
 	mov $r14 -0x5794
 	sethi $r14 0x410000
-	call nv_wr32		// GPC(0x86c) = val
+	call #nv_wr32		// GPC(0x86c) = val
 	ret
 
 // ctx_load - load's a channel's ctxctl data, and selects its vm
@@ -561,7 +561,7 @@
 
 	// switch to channel, somewhat magic in parts..
 	mov $r10 12		// DONE_UNK12
-	call wait_donez
+	call #wait_donez
 	mov $r1 0xa24
 	shl b32 $r1 6
 	iowr I[$r1 + 0x000] $r0	// 0x409a24
@@ -576,7 +576,7 @@
 	ctx_chan_wait_0:
 		iord $r4 I[$r1 + 0x100]
 		and $r4 0x1f
-		bra ne ctx_chan_wait_0
+		bra ne #ctx_chan_wait_0
 	iowr I[$r3 + 0x000] $r2	// CHAN_CUR
 
 	// load channel header, fetch PGRAPH context pointer
@@ -595,19 +595,19 @@
 	sethi $r2 0x80000000
 	iowr I[$r1 + 0x000] $r2		// MEM_TARGET = vram
 	mov $r1 0x10			// chan + 0x0210
-	mov $r2 xfer_data
+	mov $r2 #xfer_data
 	sethi $r2 0x00020000		// 16 bytes
 	xdld $r1 $r2
 	xdwait
 	trace_clr(T_LCHAN)
 
 	// update current context
-	ld b32 $r1 D[$r0 + xfer_data + 4]
+	ld b32 $r1 D[$r0 + #xfer_data + 4]
 	shl b32 $r1 24
-	ld b32 $r2 D[$r0 + xfer_data + 0]
+	ld b32 $r2 D[$r0 + #xfer_data + 0]
 	shr b32 $r2 8
 	or $r1 $r2
-	st b32 D[$r0 + ctx_current] $r1
+	st b32 D[$r0 + #ctx_current] $r1
 
 	// set transfer base to start of context, and fetch context header
 	trace_set(T_LCTXH)
@@ -618,7 +618,7 @@
 	mov $r1 0xa20
 	shl b32 $r1 6
 	iowr I[$r1 + 0x000] $r2		// MEM_TARGET = vm
-	mov $r1 chan_data
+	mov $r1 #chan_data
 	sethi $r1 0x00060000		// 256 bytes
 	xdld $r0 $r1
 	xdwait
@@ -635,10 +635,10 @@
 // In: $r2 channel address
 //
 ctx_chan:
-	call ctx_4160s
-	call ctx_load
+	call #ctx_4160s
+	call #ctx_load
 	mov $r10 12			// DONE_UNK12
-	call wait_donez
+	call #wait_donez
 	mov $r1 0xa10
 	shl b32 $r1 6
 	mov $r2 5
@@ -646,8 +646,8 @@
 	ctx_chan_wait:
 		iord $r2 I[$r1 + 0x000]
 		or $r2 $r2
-		bra ne ctx_chan_wait
-	call ctx_4160c
+		bra ne #ctx_chan_wait
+	call #ctx_4160c
 	ret
 
 // Execute per-context state overrides list
@@ -661,7 +661,7 @@
 //
 ctx_mmio_exec:
 	// set transfer base to be the mmio list
-	ld b32 $r3 D[$r0 + chan_mmio_address]
+	ld b32 $r3 D[$r0 + #chan_mmio_address]
 	mov $r2 0xa04
 	shl b32 $r2 6
 	iowr I[$r2 + 0x000] $r3		// MEM_BASE
@@ -670,31 +670,31 @@
 	ctx_mmio_loop:
 		// fetch next 256 bytes of mmio list if necessary
 		and $r4 $r3 0xff
-		bra ne ctx_mmio_pull
-			mov $r5 xfer_data
+		bra ne #ctx_mmio_pull
+			mov $r5 #xfer_data
 			sethi $r5 0x00060000	// 256 bytes
 			xdld $r3 $r5
 			xdwait
 
 		// execute a single list entry
 		ctx_mmio_pull:
-		ld b32 $r14 D[$r4 + xfer_data + 0x00]
-		ld b32 $r15 D[$r4 + xfer_data + 0x04]
-		call nv_wr32
+		ld b32 $r14 D[$r4 + #xfer_data + 0x00]
+		ld b32 $r15 D[$r4 + #xfer_data + 0x04]
+		call #nv_wr32
 
 		// next!
 		add b32 $r3 8
 		sub b32 $r1 1
-		bra ne ctx_mmio_loop
+		bra ne #ctx_mmio_loop
 
 	// set transfer base back to the current context
 	ctx_mmio_done:
-	ld b32 $r3 D[$r0 + ctx_current]
+	ld b32 $r3 D[$r0 + #ctx_current]
 	iowr I[$r2 + 0x000] $r3		// MEM_BASE
 
 	// disable the mmio list now, we don't need/want to execute it again
-	st b32 D[$r0 + chan_mmio_count] $r0
-	mov $r1 chan_data
+	st b32 D[$r0 + #chan_mmio_count] $r0
+	mov $r1 #chan_data
 	sethi $r1 0x00060000		// 256 bytes
 	xdst $r0 $r1
 	xdwait
@@ -709,46 +709,46 @@
 //		on load it means: "a save preceeded this load"
 //
 ctx_xfer:
-	bra not $p1 ctx_xfer_pre
-	bra $p2 ctx_xfer_pre_load
+	bra not $p1 #ctx_xfer_pre
+	bra $p2 #ctx_xfer_pre_load
 	ctx_xfer_pre:
 		mov $r15 0x10
-		call ctx_86c
-		call ctx_4160s
-		bra not $p1 ctx_xfer_exec
+		call #ctx_86c
+		call #ctx_4160s
+		bra not $p1 #ctx_xfer_exec
 
 	ctx_xfer_pre_load:
 		mov $r15 2
-		call ctx_4170s
-		call ctx_4170w
-		call ctx_redswitch
+		call #ctx_4170s
+		call #ctx_4170w
+		call #ctx_redswitch
 		clear b32 $r15
-		call ctx_4170s
-		call ctx_load
+		call #ctx_4170s
+		call #ctx_load
 
 	// fetch context pointer, and initiate xfer on all GPCs
 	ctx_xfer_exec:
-	ld b32 $r1 D[$r0 + ctx_current]
+	ld b32 $r1 D[$r0 + #ctx_current]
 	mov $r2 0x414
 	shl b32 $r2 6
 	iowr I[$r2 + 0x000] $r0	// BAR_STATUS = reset
 	mov $r14 -0x5b00
 	sethi $r14 0x410000
 	mov b32 $r15 $r1
-	call nv_wr32		// GPC_BCAST_WRCMD_DATA = ctx pointer
+	call #nv_wr32		// GPC_BCAST_WRCMD_DATA = ctx pointer
 	add b32 $r14 4
 	xbit $r15 $flags $p1
 	xbit $r2 $flags $p2
 	shl b32 $r2 1
 	or $r15 $r2
-	call nv_wr32		// GPC_BCAST_WRCMD_CMD = GPC_XFER(type)
+	call #nv_wr32		// GPC_BCAST_WRCMD_CMD = GPC_XFER(type)
 
 	// strands
 	mov $r1 0x4afc
 	sethi $r1 0x20000
 	mov $r2 0xc
 	iowr I[$r1] $r2		// STRAND_CMD(0x3f) = 0x0c
-	call strand_wait
+	call #strand_wait
 	mov $r2 0x47fc
 	sethi $r2 0x20000
 	iowr I[$r2] $r0		// STRAND_FIRST_GENE(0x3f) = 0x00
@@ -760,22 +760,22 @@
 	xbit $r10 $flags $p1	// direction
 	or $r10 6		// first, last
 	mov $r11 0		// base = 0
-	ld b32 $r12 D[$r0 + hub_mmio_list_head]
-	ld b32 $r13 D[$r0 + hub_mmio_list_tail]
+	ld b32 $r12 D[$r0 + #hub_mmio_list_head]
+	ld b32 $r13 D[$r0 + #hub_mmio_list_tail]
 	mov $r14 0		// not multi
-	call mmctx_xfer
+	call #mmctx_xfer
 
 	// wait for GPCs to all complete
 	mov $r10 8		// DONE_BAR
-	call wait_doneo
+	call #wait_doneo
 
 	// wait for strand xfer to complete
-	call strand_wait
+	call #strand_wait
 
 	// post-op
-	bra $p1 ctx_xfer_post
+	bra $p1 #ctx_xfer_post
 		mov $r10 12		// DONE_UNK12
-		call wait_donez
+		call #wait_donez
 		mov $r1 0xa10
 		shl b32 $r1 6
 		mov $r2 5
@@ -783,27 +783,27 @@
 		ctx_xfer_post_save_wait:
 			iord $r2 I[$r1]
 			or $r2 $r2
-			bra ne ctx_xfer_post_save_wait
+			bra ne #ctx_xfer_post_save_wait
 
-	bra $p2 ctx_xfer_done
+	bra $p2 #ctx_xfer_done
 	ctx_xfer_post:
 		mov $r15 2
-		call ctx_4170s
+		call #ctx_4170s
 		clear b32 $r15
-		call ctx_86c
-		call strand_post
-		call ctx_4170w
+		call #ctx_86c
+		call #strand_post
+		call #ctx_4170w
 		clear b32 $r15
-		call ctx_4170s
+		call #ctx_4170s
 
-		bra not $p1 ctx_xfer_no_post_mmio
-		ld b32 $r1 D[$r0 + chan_mmio_count]
+		bra not $p1 #ctx_xfer_no_post_mmio
+		ld b32 $r1 D[$r0 + #chan_mmio_count]
 		or $r1 $r1
-		bra e ctx_xfer_no_post_mmio
-			call ctx_mmio_exec
+		bra e #ctx_xfer_no_post_mmio
+			call #ctx_mmio_exec
 
 		ctx_xfer_no_post_mmio:
-		call ctx_4160c
+		call #ctx_4160c
 
 	ctx_xfer_done:
 	ret