[AVR32] Fix bug in invalidate_dcache_region()

If (start + size) is not cacheline aligned and (start & mask) > (end &
mask), the last but one cacheline won't be invalidated as it should.
Fix this by rounding `end' down to the nearest cacheline boundary if
it gets adjusted due to misalignment.

Also flush the write buffer unconditionally -- if the dcache wrote
back a line just before we invalidated it, the dirty data may be
sitting in the write buffer waiting to corrupt our buffer later.

Signed-off-by: Haavard Skinnemoen <hskinnemoen@atmel.com>
diff --git a/arch/avr32/mm/cache.c b/arch/avr32/mm/cache.c
index 8f7b1c3..c1233c6 100644
--- a/arch/avr32/mm/cache.c
+++ b/arch/avr32/mm/cache.c
@@ -23,7 +23,6 @@
 void invalidate_dcache_region(void *start, size_t size)
 {
 	unsigned long v, begin, end, linesz, mask;
-	int flush = 0;
 
 	linesz = boot_cpu_data.dcache.linesz;
 	mask = linesz - 1;
@@ -32,24 +31,21 @@
 	 * instead of invalidating ... never discard valid data!
 	 */
 	begin = (unsigned long)start;
-	end = begin + size - 1;
+	end = begin + size;
 
 	if (begin & mask) {
 		flush_dcache_line(start);
 		begin += linesz;
-		flush = 1;
 	}
-	if ((end & mask) != mask) {
+	if (end & mask) {
 		flush_dcache_line((void *)end);
-		end -= linesz;
-		flush = 1;
+		end &= ~mask;
 	}
 
 	/* remaining cachelines only need invalidation */
-	for (v = begin; v <= end; v += linesz)
+	for (v = begin; v < end; v += linesz)
 		invalidate_dcache_line((void *)v);
-	if (flush)
-		flush_write_buffer();
+	flush_write_buffer();
 }
 
 void clean_dcache_region(void *start, size_t size)