[ARM] update atomic ops to use the ldr/strex instructions on armv6+
diff --git a/app/tests/thread_tests.c b/app/tests/thread_tests.c
index 69b5b20..7512726 100644
--- a/app/tests/thread_tests.c
+++ b/app/tests/thread_tests.c
@@ -267,13 +267,56 @@
 	thread_sleep(100);
 }
 
+static volatile int atomic;
+static volatile int atomic_count;
+
+static int atomic_tester(void *arg)
+{
+	int add = (int)arg;
+	int i;
+
+	TRACEF("add %d\n", add);
+
+	for (i=0; i < 1000000; i++) {
+		atomic_add(&atomic, add);
+	}
+
+	int old = atomic_add(&atomic_count, -1);
+	TRACEF("exiting, old count %d\n", old);
+
+	return 0;
+}
+
+static void atomic_test(void)
+{
+	atomic = 0;
+	atomic_count = 8;
+
+	thread_resume(thread_create("atomic tester 1", &atomic_tester, (void *)1, LOW_PRIORITY, DEFAULT_STACK_SIZE));
+	thread_resume(thread_create("atomic tester 1", &atomic_tester, (void *)1, LOW_PRIORITY, DEFAULT_STACK_SIZE));
+	thread_resume(thread_create("atomic tester 1", &atomic_tester, (void *)1, LOW_PRIORITY, DEFAULT_STACK_SIZE));
+	thread_resume(thread_create("atomic tester 1", &atomic_tester, (void *)1, LOW_PRIORITY, DEFAULT_STACK_SIZE));
+	thread_resume(thread_create("atomic tester 2", &atomic_tester, (void *)-1, LOW_PRIORITY, DEFAULT_STACK_SIZE));
+	thread_resume(thread_create("atomic tester 2", &atomic_tester, (void *)-1, LOW_PRIORITY, DEFAULT_STACK_SIZE));
+	thread_resume(thread_create("atomic tester 2", &atomic_tester, (void *)-1, LOW_PRIORITY, DEFAULT_STACK_SIZE));
+	thread_resume(thread_create("atomic tester 2", &atomic_tester, (void *)-1, LOW_PRIORITY, DEFAULT_STACK_SIZE));
+
+	while (atomic_count > 0) {
+		thread_sleep(1);
+	}
+
+	printf("atomic count == %d (should be zero)\n", atomic);
+}
+
 int thread_tests(void) 
 {
 	mutex_test();
-//	event_test();
+	event_test();
 
 	thread_sleep(200);
 	context_switch_test();
+
+	atomic_test();
 	
 	return 0;
 }
diff --git a/arch/arm/asm.S b/arch/arm/asm.S
index 2953bd6..4a31684 100644
--- a/arch/arm/asm.S
+++ b/arch/arm/asm.S
@@ -47,12 +47,24 @@
 	/* save old sp */
 	str		r3, [r0] 
 
+	/* clear any exlusive locks that the old thread holds */
+#if ARM_ISA_ARMV7
+	/* can clear it directly */
+	.word	0xf57ff01f // clrex
+#elif ARM_ISA_ARMV6
+	/* have to do a fake strex to clear it */
+	ldr		r0, =strex_spot
+	strex	r3, r2, [r0]
+#endif
+
 	/* load new regs */
 	ldmia	r1, { r4-r11, r12, r13, r14 }^
 	mov		lr, r12				/* restore lr */
 	add		sp, r1, #(11*4)     /* restore sp */
 	bx		lr
 
+.ltorg
+
 FUNCTION(arm_save_mode_regs)
 	mrs		r1, cpsr
 
@@ -84,5 +96,8 @@
 
 	bx		lr
 
+.data
+strex_spot:
+	.word	0
 	
 
diff --git a/arch/arm/ops.S b/arch/arm/ops.S
index c1d612c..a7cb5b2 100644
--- a/arch/arm/ops.S
+++ b/arch/arm/ops.S
@@ -45,6 +45,19 @@
 
 /* int atomic_add(int *ptr, int val); */
 FUNCTION(atomic_add)
+#if ARM_ISA_ARMV6 || ARM_ISA_ARMV7
+	/* use load/store exclusive */
+.L_loop_add:
+	ldrex 	r12, [r0]
+	add		r2, r12, r1
+	strex 	r3, r2, [r0]
+	cmp		r3, #0
+	bne 	.L_loop_add
+	
+	/* save old value */
+	mov		r0, r12
+	bx		lr
+#else
 	/* disable interrupts, do the add, and reenable */
 	mrs	r2, cpsr
 	mov	r12, r2
@@ -62,9 +75,23 @@
 	/* restore interrupts and exit */
 	msr	cpsr_c, r12
 	bx	lr
+#endif
 	
 /* int atomic_and(int *ptr, int val); */
 FUNCTION(atomic_and)
+#if ARM_ISA_ARMV6 || ARM_ISA_ARMV7
+	/* use load/store exclusive */
+.L_loop_and:
+	ldrex 	r12, [r0]
+	and		r2, r12, r1
+	strex 	r3, r2, [r0]
+	cmp		r3, #0
+	bne 	.L_loop_and
+	
+	/* save old value */
+	mov		r0, r12
+	bx		lr
+#else
 	/* disable interrupts, do the and, and reenable */
 	mrs	r2, cpsr
 	mov	r12, r2
@@ -82,9 +109,23 @@
 	/* restore interrupts and exit */
 	msr	cpsr_c, r12
 	bx	lr
+#endif
 	
 /* int atomic_or(int *ptr, int val); */
 FUNCTION(atomic_or)
+#if ARM_ISA_ARMV6 || ARM_ISA_ARMV7
+	/* use load/store exclusive */
+.L_loop_or:
+	ldrex 	r12, [r0]
+	orr		r2, r12, r1
+	strex 	r3, r2, [r0]
+	cmp		r3, #0
+	bne 	.L_loop_or
+	
+	/* save old value */
+	mov		r0, r12
+	bx		lr
+#else
 	/* disable interrupts, do the or, and reenable */
 	mrs	r2, cpsr
 	mov	r12, r2
@@ -102,6 +143,7 @@
 	/* restore interrupts and exit */
 	msr	cpsr_c, r12
 	bx	lr
+#endif
 
 /* void arch_idle(); */
 FUNCTION(arch_idle)