Blackfin arch: fix some bugs in lib/string.h functions found by our string testing modules

 - use ints for the return value rather than char since we actually return
   an int and we dont want it improperly being sign extended during the reload
   http://blackfin.uclinux.org/gf/project/uclinux-dist/tracker/?action=TrackerItemEdit&tracker_item_id=3525

 - if src is shorter than the requested number of copy bytes, we need to null
   pad the rest
   http://blackfin.uclinux.org/gf/project/uclinux-dist/tracker/?action=TrackerItemEdit&tracker_item_id=3524

 - mark these as __volatile__ and add memory to the clobber list so gcc does
   not optimize buffers around on us we may be using

 - rewrite asm code to be readable/maintainable

Signed-off-by: Mike Frysinger <michael.frysinger@analog.com>
Signed-off-by: Bryan Wu <bryan.wu@analog.com>

diff --git a/include/asm-blackfin/string.h b/include/asm-blackfin/string.h
index 6f1eb7d..e8ada91 100644
--- a/include/asm-blackfin/string.h
+++ b/include/asm-blackfin/string.h
@@ -9,13 +9,16 @@
 	char *xdest = dest;
 	char temp = 0;
 
-	__asm__ __volatile__
-	    ("1:\t%2 = B [%1++] (Z);\n\t"
-	     "B [%0++] = %2;\n\t"
-	     "CC = %2;\n\t"
-        "if cc jump 1b (bp);\n"
-	: "+&a" (dest), "+&a" (src), "=&d" (temp)
-	     ::"memory", "CC");
+	__asm__ __volatile__ (
+		"1:"
+		"%2 = B [%1++] (Z);"
+		"B [%0++] = %2;"
+		"CC = %2;"
+		"if cc jump 1b (bp);"
+		: "+&a" (dest), "+&a" (src), "=&d" (temp)
+		:
+		: "memory", "CC");
+
 	return xdest;
 }
 
@@ -28,37 +31,56 @@
 	if (n == 0)
 		return xdest;
 
-	__asm__ __volatile__
-	    ("1:\t%3 = B [%1++] (Z);\n\t"
-	     "B [%0++] = %3;\n\t"
-	     "CC = %3;\n\t"
-	     "if ! cc jump 2f;\n\t"
-	     "%2 += -1;\n\t"
-	     "CC = %2 == 0;\n\t"
-	     "if ! cc jump 1b (bp);\n"
-        "2:\n"
-	: "+&a" (dest), "+&a" (src), "+&da" (n), "=&d" (temp)
-	     ::"memory", "CC");
+	__asm__ __volatile__ (
+		"1:"
+		"%3 = B [%1++] (Z);"
+		"B [%0++] = %3;"
+		"CC = %3;"
+		"if ! cc jump 2f;"
+		"%2 += -1;"
+		"CC = %2 == 0;"
+		"if ! cc jump 1b (bp);"
+		"jump 4f;"
+		"2:"
+		/* if src is shorter than n, we need to null pad bytes now */
+		"%3 = 0;"
+		"3:"
+		"%2 += -1;"
+		"CC = %2 == 0;"
+		"if cc jump 4f;"
+		"B [%0++] = %3;"
+		"jump 3b;"
+		"4:"
+		: "+&a" (dest), "+&a" (src), "+&da" (n), "=&d" (temp)
+		:
+		: "memory", "CC");
+
 	return xdest;
 }
 
 #define __HAVE_ARCH_STRCMP
 extern inline int strcmp(const char *cs, const char *ct)
 {
-	char __res1, __res2;
+	/* need to use int's here so the char's in the assembly don't get
+	 * sign extended incorrectly when we don't want them to be
+	 */
+	int __res1, __res2;
 
-	__asm__
-       ("1:\t%2 = B[%0++] (Z);\n\t" /* get *cs */
-		"%3 = B[%1++] (Z);\n\t"	/* get *ct */
-		"CC = %2 == %3;\n\t"	/* compare a byte */
-		"if ! cc jump 2f;\n\t"	/* not equal, break out */
-		"CC = %2;\n\t"	/* at end of cs? */
-		"if cc jump 1b (bp);\n\t"	/* no, keep going */
-		"jump.s 3f;\n"	/* strings are equal */
-		"2:\t%2 = %2 - %3;\n"	/* *cs - *ct */
-        "3:\n"
-	: "+&a" (cs), "+&a" (ct), "=&d" (__res1), "=&d" (__res2)
-      : :	"CC");
+	__asm__ __volatile__ (
+		"1:"
+		"%2 = B[%0++] (Z);"      /* get *cs */
+		"%3 = B[%1++] (Z);"      /* get *ct */
+		"CC = %2 == %3;"         /* compare a byte */
+		"if ! cc jump 2f;"       /* not equal, break out */
+		"CC = %2;"               /* at end of cs? */
+		"if cc jump 1b (bp);"    /* no, keep going */
+		"jump.s 3f;"             /* strings are equal */
+		"2:"
+		"%2 = %2 - %3;"          /* *cs - *ct */
+		"3:"
+		: "+&a" (cs), "+&a" (ct), "=&d" (__res1), "=&d" (__res2)
+		:
+		: "memory", "CC");
 
 	return __res1;
 }
@@ -66,26 +88,35 @@
 #define __HAVE_ARCH_STRNCMP
 extern inline int strncmp(const char *cs, const char *ct, size_t count)
 {
-	char __res1, __res2;
+	/* need to use int's here so the char's in the assembly don't get
+	 * sign extended incorrectly when we don't want them to be
+	 */
+	int __res1, __res2;
 
 	if (!count)
 		return 0;
-	__asm__
-       ("1:\t%3 = B[%0++] (Z);\n\t"        /* get *cs */
-		"%4 = B[%1++] (Z);\n\t"	/* get *ct */
-		"CC = %3 == %4;\n\t"	/* compare a byte */
-		"if ! cc jump 3f;\n\t"	/* not equal, break out */
-		"CC = %3;\n\t"	/* at end of cs? */
-		"if ! cc jump 4f;\n\t"	/* yes, all done */
-		"%2 += -1;\n\t"	/* no, adjust count */
-	"CC = %2 == 0;\n\t"
-        "if ! cc jump 1b;\n"                 /* more to do, keep going */
-		"2:\t%3 = 0;\n\t"	/* strings are equal */
-        "jump.s    4f;\n"
-        "3:\t%3 = %3 - %4;\n"          /* *cs - *ct */
-        "4:"
-	: "+&a" (cs), "+&a" (ct), "+&da" (count), "=&d" (__res1), "=&d" (__res2)
-      : :	"CC");
+
+	__asm__ __volatile__ (
+		"1:"
+		"%3 = B[%0++] (Z);"      /* get *cs */
+		"%4 = B[%1++] (Z);"      /* get *ct */
+		"CC = %3 == %4;"         /* compare a byte */
+		"if ! cc jump 3f;"       /* not equal, break out */
+		"CC = %3;"               /* at end of cs? */
+		"if ! cc jump 4f;"       /* yes, all done */
+		"%2 += -1;"              /* no, adjust count */
+		"CC = %2 == 0;"
+		"if ! cc jump 1b;"       /* more to do, keep going */
+		"2:"
+		"%3 = 0;"                /* strings are equal */
+		"jump.s 4f;"
+		"3:"
+		"%3 = %3 - %4;"          /* *cs - *ct */
+		"4:"
+		: "+&a" (cs), "+&a" (ct), "+&da" (count), "=&d" (__res1), "=&d" (__res2)
+		:
+		: "memory", "CC");
+
 	return __res1;
 }