Verify we actually have valid utf8 metadata

Before calling NewStringUTF, check that the metadata value is actually
valid utf-8, and replace the offending characters with "?" if it is not.
b/5534491

Change-Id: I43de4307e739ae0b7d4177937ed33aa1dfb90d98
diff --git a/media/jni/android_media_MediaScanner.cpp b/media/jni/android_media_MediaScanner.cpp
index b88296f..09152f5 100644
--- a/media/jni/android_media_MediaScanner.cpp
+++ b/media/jni/android_media_MediaScanner.cpp
@@ -56,6 +56,53 @@
     return OK;
 }
 
+// stolen from dalvik/vm/checkJni.cpp
+static bool isValidUtf8(const char* bytes) {
+    while (*bytes != '\0') {
+        unsigned char utf8 = *(bytes++);
+        // Switch on the high four bits.
+        switch (utf8 >> 4) {
+        case 0x00:
+        case 0x01:
+        case 0x02:
+        case 0x03:
+        case 0x04:
+        case 0x05:
+        case 0x06:
+        case 0x07:
+            // Bit pattern 0xxx. No need for any extra bytes.
+            break;
+        case 0x08:
+        case 0x09:
+        case 0x0a:
+        case 0x0b:
+        case 0x0f:
+            /*
+             * Bit pattern 10xx or 1111, which are illegal start bytes.
+             * Note: 1111 is valid for normal UTF-8, but not the
+             * modified UTF-8 used here.
+             */
+            return false;
+        case 0x0e:
+            // Bit pattern 1110, so there are two additional bytes.
+            utf8 = *(bytes++);
+            if ((utf8 & 0xc0) != 0x80) {
+                return false;
+            }
+            // Fall through to take care of the final byte.
+        case 0x0c:
+        case 0x0d:
+            // Bit pattern 110x, so there is one additional byte.
+            utf8 = *(bytes++);
+            if ((utf8 & 0xc0) != 0x80) {
+                return false;
+            }
+            break;
+        }
+    }
+    return true;
+}
+
 class MyMediaScannerClient : public MediaScannerClient
 {
 public:
@@ -123,7 +170,22 @@
             mEnv->ExceptionClear();
             return NO_MEMORY;
         }
-        if ((valueStr = mEnv->NewStringUTF(value)) == NULL) {
+        char *cleaned = NULL;
+        if (!isValidUtf8(value)) {
+            cleaned = strdup(value);
+            char *chp = cleaned;
+            char ch;
+            while ((ch = *chp)) {
+                if (ch & 0x80) {
+                    *chp = '?';
+                }
+                chp++;
+            }
+            value = cleaned;
+        }
+        valueStr = mEnv->NewStringUTF(value);
+        free(cleaned);
+        if (valueStr == NULL) {
             mEnv->DeleteLocalRef(nameStr);
             mEnv->ExceptionClear();
             return NO_MEMORY;