msm_shared: Cleanup Qtimers.

1. mdelay(), udelay(): The counter wrapping
needs to accounted for while waitng for a
calculated number of ticks.
2. Remove the Virtual timer implementation.
3. Add functions to read the physical count and
counter frequency and move cp15 specific functions
to a new file.
4. Move function declarations and #define's to
header file.
5. Add isb() for each cp15 write.

Change-Id: I5c7efaee4233a8650064e6a9bbcaaf729d836180
diff --git a/arch/arm/ops.S b/arch/arm/ops.S
index c9660e7..2cb2688 100644
--- a/arch/arm/ops.S
+++ b/arch/arm/ops.S
@@ -48,12 +48,12 @@
 #if ARM_ISA_ARMV6 || ARM_ISA_ARMV7
 	/* use load/store exclusive */
 .L_loop_add:
-	ldrex 	r12, [r0]
+	ldrex	r12, [r0]
 	add		r2, r12, r1
-	strex 	r3, r2, [r0]
+	strex	r3, r2, [r0]
 	cmp		r3, #0
-	bne 	.L_loop_add
-	
+	bne	.L_loop_add
+
 	/* save old value */
 	mov		r0, r12
 	bx		lr
@@ -65,7 +65,7 @@
 	msr	cpsr_c, r2
 
 	/* ints disabled, old cpsr state in r12 */
-	
+
 	/* do the add, leave the previous value in r0 */
 	mov	r3, r0
 	ldr	r0, [r3]
@@ -76,18 +76,18 @@
 	msr	cpsr_c, r12
 	bx	lr
 #endif
-	
+
 /* int atomic_and(int *ptr, int val); */
 FUNCTION(atomic_and)
 #if ARM_ISA_ARMV6 || ARM_ISA_ARMV7
 	/* use load/store exclusive */
 .L_loop_and:
-	ldrex 	r12, [r0]
+	ldrex	r12, [r0]
 	and		r2, r12, r1
-	strex 	r3, r2, [r0]
+	strex	r3, r2, [r0]
 	cmp		r3, #0
-	bne 	.L_loop_and
-	
+	bne	.L_loop_and
+
 	/* save old value */
 	mov		r0, r12
 	bx		lr
@@ -99,7 +99,7 @@
 	msr	cpsr_c, r2
 
 	/* ints disabled, old cpsr state in r12 */
-	
+
 	/* do the and, leave the previous value in r0 */
 	mov	r3, r0
 	ldr	r0, [r3]
@@ -110,18 +110,18 @@
 	msr	cpsr_c, r12
 	bx	lr
 #endif
-	
+
 /* int atomic_or(int *ptr, int val); */
 FUNCTION(atomic_or)
 #if ARM_ISA_ARMV6 || ARM_ISA_ARMV7
 	/* use load/store exclusive */
 .L_loop_or:
-	ldrex 	r12, [r0]
+	ldrex	r12, [r0]
 	orr		r2, r12, r1
-	strex 	r3, r2, [r0]
+	strex	r3, r2, [r0]
 	cmp		r3, #0
-	bne 	.L_loop_or
-	
+	bne	.L_loop_or
+
 	/* save old value */
 	mov		r0, r12
 	bx		lr
@@ -133,7 +133,7 @@
 	msr	cpsr_c, r2
 
 	/* ints disabled, old cpsr state in r12 */
-	
+
 	/* do the or, leave the previous value in r0 */
 	mov	r3, r0
 	ldr	r0, [r3]
@@ -183,24 +183,24 @@
 
 /* void arm_write_ttbr(uint32_t val) */
 FUNCTION(arm_write_ttbr)
-	mcr 	p15, 0, r0, c2, c0, 0
+	mcr	p15, 0, r0, c2, c0, 0
 	bx		lr
 
 /* void arm_write_dacr(uint32_t val) */
 FUNCTION(arm_write_dacr)
-	mcr 	p15, 0, r0, c3, c0, 0
+	mcr	p15, 0, r0, c3, c0, 0
 	bx		lr
 
 /* void arm_invalidate_tlb(void) */
 FUNCTION(arm_invalidate_tlb)
 	mov		r0, #0
-	mcr 	p15, 0, r0, c8, c7, 0
+	mcr	p15, 0, r0, c8, c7, 0
 	bx		lr
 
 /* void arch_switch_stacks_and_call(addr_t call, addr_t stack) */
 FUNCTION(arch_switch_stacks_and_call)
 	mov		sp, r1
-	bx		r0	
+	bx		r0
 
 /*void dmb(void) */
 FUNCTION(dmb)
@@ -212,6 +212,16 @@
 #endif
 	bx		lr
 
+/*void isb(void) */
+FUNCTION(isb)
+#if ARM_CPU_CORTEX_A8
+	isb		sy
+#elif ARM_CPU_ARM1136
+	mov		r0, #0
+	mcr		p15, 0, r0, c7, c5, 4
+#endif
+	bx		lr
+
 /*void dsb(void) */
 FUNCTION(dsb)
 #if ARM_CPU_CORTEX_A8