arch: arm: Fix the cache routines

cache invalidate routines were going out of bound because of
incorrect branch instruction. Fix the loop in the cache routine
for invalidating the cache.

Change-Id: Ife1c3764e9f7ebf1238e2079118d351eea53f6db
diff --git a/arch/arm/cache-ops.S b/arch/arm/cache-ops.S
index a1151ab..22d9a2b 100644
--- a/arch/arm/cache-ops.S
+++ b/arch/arm/cache-ops.S
@@ -313,11 +313,13 @@
 
 	/* void arch_flush_cache_range(addr_t start, size_t len); */
 FUNCTION(arch_clean_cache_range)
+	add 	r2, r0, r1					// Calculate the end address
+	bic 	r0,#(CACHE_LINE-1)			// Align start with cache line
 0:
 	mcr		p15, 0, r0, c7, c10, 1		// clean cache to PoC by MVA
 	add		r0, r0, #CACHE_LINE
-	subs	r1, r1, #CACHE_LINE
-	bhs		0b
+	cmp 	r0, r2
+	blo		0b
 	
 	mov		r0, #0
 	dsb
@@ -326,11 +328,13 @@
 
 	/* void arch_flush_invalidate_cache_range(addr_t start, size_t len); */
 FUNCTION(arch_clean_invalidate_cache_range)
+	add 	r2, r0, r1					// Calculate the end address
+	bic 	r0,#(CACHE_LINE-1)			// Align start with cache line
 0:
 	mcr		p15, 0, r0, c7, c14, 1		// clean & invalidate cache to PoC by MVA
 	add		r0, r0, #CACHE_LINE
-	subs	r1, r1, #CACHE_LINE
-	bhs		0b
+	cmp 	r0, r2
+	blo		0b
 
 	mov		r0, #0
 	dsb
@@ -339,12 +343,14 @@
 
 	/* void arch_invalidate_cache_range(addr_t start, size_t len); */
 FUNCTION(arch_invalidate_cache_range)
-0:
 	/* invalidate cache line */
+	add 	r2, r0, r1					// Calculate the end address
+	bic 	r0,#(CACHE_LINE-1)			// Align start with cache line
+0:
 	mcr		p15, 0, r0, c7, c6, 1
 	add		r0, r0, #CACHE_LINE
-	subs	r1, r1, #CACHE_LINE
-	bhs		0b
+	cmp 	r0, r2
+	blo		0b
 	mov		r0, #0
 	dsb
 	bx		lr