synchronize with external depot

-add relocation code to initial asm setup
-print the top of the stack on a crash
-turn off debug spew when threads exit
-make sure the critical section routines are fully inlined
-remove warning in dpc code
diff --git a/arch/arm/crt0.S b/arch/arm/crt0.S
index 801e2b9..759f11a 100644
--- a/arch/arm/crt0.S
+++ b/arch/arm/crt0.S
@@ -51,6 +51,30 @@
 	.word 0xe1600070
 #endif
 
+	/* see if we need to relocate */
+	mov		r0, pc
+	sub		r0, r0, #(.Laddr - _start)
+.Laddr:
+	ldr		r1, =_start
+	cmp		r0, r1
+	beq		.Lstack_setup
+
+	/* we need to relocate ourselves to the proper spot */
+	ldr		r2, =__data_end	
+
+.Lrelocate_loop:
+	ldr		r3, [r0], #4
+	str		r3, [r1], #4
+	cmp		r1, r2
+	bne		.Lrelocate_loop
+
+	/* we're relocated, jump to the right address */
+	ldr		r0, =.Lstack_setup
+	bx		r0
+
+.ltorg
+
+.Lstack_setup:
 	/* set up the stack for irq, fiq, abort, undefined, system/user, and lastly supervisor mode */
 	mrs     r0, cpsr
 	bic     r0, r0, #0x1f
@@ -86,23 +110,23 @@
 	ldr		r2, =__data_end
 
 	cmp		r0, r1
-	beq		__do_bss
+	beq		.L__do_bss
 
-__copy_loop:
+.L__copy_loop:
 	cmp		r1, r2
 	ldrlt	r3, [r0], #4
 	strlt	r3, [r1], #4
-	blt		__copy_loop
+	blt		.L__copy_loop
 
-__do_bss:
+.L__do_bss:
 	/* clear out the bss */
 	ldr		r0, =__bss_start
 	ldr		r1, =_end
 	mov		r2, #0
-__bss_loop:
+.L__bss_loop:
 	cmp		r0, r1
 	strlt	r2, [r0], #4
-	blt		__bss_loop
+	blt		.L__bss_loop
 
 	bl		kmain
 	b		.
diff --git a/arch/arm/faults.c b/arch/arm/faults.c
index c16aff0..487db1f 100644
--- a/arch/arm/faults.c
+++ b/arch/arm/faults.c
@@ -40,14 +40,31 @@
 	dprintf("%c%s r13 0x%08x r14 0x%08x\n", ((frame->spsr & MODE_MASK) == MODE_SVC) ? '*' : ' ', "svc", regs.svc_r13, regs.svc_r14);
 	dprintf("%c%s r13 0x%08x r14 0x%08x\n", ((frame->spsr & MODE_MASK) == MODE_UND) ? '*' : ' ', "und", regs.und_r13, regs.und_r14);
 	dprintf("%c%s r13 0x%08x r14 0x%08x\n", ((frame->spsr & MODE_MASK) == MODE_SYS) ? '*' : ' ', "sys", regs.sys_r13, regs.sys_r14);
+
+	// dump the bottom of the current stack
+	addr_t stack;
+	switch (frame->spsr & MODE_MASK) {
+		case MODE_FIQ: stack = regs.fiq_r13; break;
+		case MODE_IRQ: stack = regs.irq_r13; break;
+		case MODE_SVC: stack = regs.svc_r13; break;
+		case MODE_UND: stack = regs.und_r13; break;
+		case MODE_SYS: stack = regs.sys_r13; break;
+		default:
+			stack = 0;
+	}
+
+	if (stack != 0) {
+		dprintf("bottom of stack at 0x%08x:\n", (unsigned int)stack);
+		hexdump((void *)stack, 128);
+	}
 }
 
 static void exception_die(struct arm_fault_frame *frame, int pc_off, const char *msg)
 {
 	inc_critical_section();
 	frame->pc += pc_off;
-	dump_fault_frame(frame);
 	dprintf(msg);
+	dump_fault_frame(frame);
 	debug_halt();
 	for(;;);
 }
diff --git a/arch/arm/system-twosegment.ld b/arch/arm/system-twosegment.ld
index aee3913..748d565 100644
--- a/arch/arm/system-twosegment.ld
+++ b/arch/arm/system-twosegment.ld
@@ -38,7 +38,7 @@
 
 	.rodata : { 
 		*(.rodata .rodata.* .gnu.linkonce.r.*) 
-		. = ALIGN(4); 
+		. = ALIGN(4);
 		__commands_start = .;
 		KEEP (*(.commands))
 		__commands_end = .;
diff --git a/arch/arm/thread.c b/arch/arm/thread.c
index a3b9a95..c16a432 100644
--- a/arch/arm/thread.c
+++ b/arch/arm/thread.c
@@ -56,7 +56,7 @@
 
 	ret = current_thread->entry(current_thread->arg);
 
-	dprintf("initial_thread_func: thread %p exiting with %d\n", current_thread, ret);
+//	dprintf("initial_thread_func: thread %p exiting with %d\n", current_thread, ret);
 
 	thread_exit(ret);
 }