synchronize with external depot

-add relocation code to initial asm setup
-print the top of the stack on a crash
-turn off debug spew when threads exit
-make sure the critical section routines are fully inlined
-remove warning in dpc code
diff --git a/arch/arm/crt0.S b/arch/arm/crt0.S
index 801e2b9..759f11a 100644
--- a/arch/arm/crt0.S
+++ b/arch/arm/crt0.S
@@ -51,6 +51,30 @@
 	.word 0xe1600070
 #endif
 
+	/* see if we need to relocate */
+	mov		r0, pc
+	sub		r0, r0, #(.Laddr - _start)
+.Laddr:
+	ldr		r1, =_start
+	cmp		r0, r1
+	beq		.Lstack_setup
+
+	/* we need to relocate ourselves to the proper spot */
+	ldr		r2, =__data_end	
+
+.Lrelocate_loop:
+	ldr		r3, [r0], #4
+	str		r3, [r1], #4
+	cmp		r1, r2
+	bne		.Lrelocate_loop
+
+	/* we're relocated, jump to the right address */
+	ldr		r0, =.Lstack_setup
+	bx		r0
+
+.ltorg
+
+.Lstack_setup:
 	/* set up the stack for irq, fiq, abort, undefined, system/user, and lastly supervisor mode */
 	mrs     r0, cpsr
 	bic     r0, r0, #0x1f
@@ -86,23 +110,23 @@
 	ldr		r2, =__data_end
 
 	cmp		r0, r1
-	beq		__do_bss
+	beq		.L__do_bss
 
-__copy_loop:
+.L__copy_loop:
 	cmp		r1, r2
 	ldrlt	r3, [r0], #4
 	strlt	r3, [r1], #4
-	blt		__copy_loop
+	blt		.L__copy_loop
 
-__do_bss:
+.L__do_bss:
 	/* clear out the bss */
 	ldr		r0, =__bss_start
 	ldr		r1, =_end
 	mov		r2, #0
-__bss_loop:
+.L__bss_loop:
 	cmp		r0, r1
 	strlt	r2, [r0], #4
-	blt		__bss_loop
+	blt		.L__bss_loop
 
 	bl		kmain
 	b		.
diff --git a/arch/arm/faults.c b/arch/arm/faults.c
index c16aff0..487db1f 100644
--- a/arch/arm/faults.c
+++ b/arch/arm/faults.c
@@ -40,14 +40,31 @@
 	dprintf("%c%s r13 0x%08x r14 0x%08x\n", ((frame->spsr & MODE_MASK) == MODE_SVC) ? '*' : ' ', "svc", regs.svc_r13, regs.svc_r14);
 	dprintf("%c%s r13 0x%08x r14 0x%08x\n", ((frame->spsr & MODE_MASK) == MODE_UND) ? '*' : ' ', "und", regs.und_r13, regs.und_r14);
 	dprintf("%c%s r13 0x%08x r14 0x%08x\n", ((frame->spsr & MODE_MASK) == MODE_SYS) ? '*' : ' ', "sys", regs.sys_r13, regs.sys_r14);
+
+	// dump the bottom of the current stack
+	addr_t stack;
+	switch (frame->spsr & MODE_MASK) {
+		case MODE_FIQ: stack = regs.fiq_r13; break;
+		case MODE_IRQ: stack = regs.irq_r13; break;
+		case MODE_SVC: stack = regs.svc_r13; break;
+		case MODE_UND: stack = regs.und_r13; break;
+		case MODE_SYS: stack = regs.sys_r13; break;
+		default:
+			stack = 0;
+	}
+
+	if (stack != 0) {
+		dprintf("bottom of stack at 0x%08x:\n", (unsigned int)stack);
+		hexdump((void *)stack, 128);
+	}
 }
 
 static void exception_die(struct arm_fault_frame *frame, int pc_off, const char *msg)
 {
 	inc_critical_section();
 	frame->pc += pc_off;
-	dump_fault_frame(frame);
 	dprintf(msg);
+	dump_fault_frame(frame);
 	debug_halt();
 	for(;;);
 }
diff --git a/arch/arm/system-twosegment.ld b/arch/arm/system-twosegment.ld
index aee3913..748d565 100644
--- a/arch/arm/system-twosegment.ld
+++ b/arch/arm/system-twosegment.ld
@@ -38,7 +38,7 @@
 
 	.rodata : { 
 		*(.rodata .rodata.* .gnu.linkonce.r.*) 
-		. = ALIGN(4); 
+		. = ALIGN(4);
 		__commands_start = .;
 		KEEP (*(.commands))
 		__commands_end = .;
diff --git a/arch/arm/thread.c b/arch/arm/thread.c
index a3b9a95..c16a432 100644
--- a/arch/arm/thread.c
+++ b/arch/arm/thread.c
@@ -56,7 +56,7 @@
 
 	ret = current_thread->entry(current_thread->arg);
 
-	dprintf("initial_thread_func: thread %p exiting with %d\n", current_thread, ret);
+//	dprintf("initial_thread_func: thread %p exiting with %d\n", current_thread, ret);
 
 	thread_exit(ret);
 }
diff --git a/include/kernel/thread.h b/include/kernel/thread.h
index 7753ad4..99a6c41 100644
--- a/include/kernel/thread.h
+++ b/include/kernel/thread.h
@@ -52,7 +52,7 @@
 	enum thread_state state;	
 	int saved_critical_section_count;
 	int remaining_quantum;
-	
+
 	/* if blocked, a pointer to the wait queue */
 	struct wait_queue *blocking_wait_queue;
 	status_t wait_queue_block_ret;
@@ -117,21 +117,21 @@
 /* critical sections */
 extern int critical_section_count;
 
-static inline void enter_critical_section(void)
+static inline __ALWAYS_INLINE void enter_critical_section(void)
 {
 	critical_section_count++;
 	if (critical_section_count == 1)
 		arch_disable_ints();
 }
 
-static inline void exit_critical_section(void)
+static inline __ALWAYS_INLINE void exit_critical_section(void)
 {
 	critical_section_count--;
 	if (critical_section_count == 0)
 		arch_enable_ints();
 }
 
-static inline bool in_critical_section(void)
+static inline __ALWAYS_INLINE bool in_critical_section(void)
 {
 	return critical_section_count > 0;
 }
diff --git a/kernel/dpc.c b/kernel/dpc.c
index d4c2078..184ac31 100644
--- a/kernel/dpc.c
+++ b/kernel/dpc.c
@@ -81,6 +81,8 @@
 			free(dpc);
 		}
 	}
+
+	return 0;
 }
 
 
diff --git a/kernel/timer.c b/kernel/timer.c
index eb86e4a..71bc4fb 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -58,6 +58,8 @@
 {
 	time_t now;
 
+//	TRACEF("delay %d, callback %p, arg %p\n", delay, callback, arg);
+
 	DEBUG_ASSERT(timer->magic == TIMER_MAGIC);	
 
 	if (list_in_list(&timer->node)) {
@@ -70,6 +72,8 @@
 	timer->callback = callback;
 	timer->arg = arg;
 
+//	TRACEF("scheduled time %u\n", timer->scheduled_time);
+
 	enter_critical_section();
 
 	insert_timer_in_queue(timer);
@@ -115,6 +119,7 @@
 		thread_stats.timers++;
 #endif
 
+//		TRACEF("firing callback %p, arg %p\n", timer->callback, timer->arg);
 		if (timer->callback(timer, now, timer->arg) == INT_RESCHEDULE)
 			ret = INT_RESCHEDULE;
 	}