Revert "use utf8_length() instead of local function, isValidUtf8()"

This reverts commit 1a8b6c29aaa5a1591097daca0876808cc029cbda

Reason for revert: utf8_length handles a slightly different range than the checkjni code, so it's possible for a given string to pass the utf8_length check, but then fail the jni  check.

Change-Id: I81e15c95edd8f89782d8ad9025e68502b9340f22
diff --git a/media/jni/android_media_MediaScanner.cpp b/media/jni/android_media_MediaScanner.cpp
index f930a03..5d27966 100644
--- a/media/jni/android_media_MediaScanner.cpp
+++ b/media/jni/android_media_MediaScanner.cpp
@@ -19,7 +19,6 @@
 #define LOG_TAG "MediaScannerJNI"
 #include <utils/Log.h>
 #include <utils/threads.h>
-#include <utils/Unicode.h>
 #include <media/mediascanner.h>
 #include <media/stagefright/StagefrightMediaScanner.h>
 
@@ -57,6 +56,53 @@
     return OK;
 }
 
+// stolen from dalvik/vm/checkJni.cpp
+static bool isValidUtf8(const char* bytes) {
+    while (*bytes != '\0') {
+        unsigned char utf8 = *(bytes++);
+        // Switch on the high four bits.
+        switch (utf8 >> 4) {
+        case 0x00:
+        case 0x01:
+        case 0x02:
+        case 0x03:
+        case 0x04:
+        case 0x05:
+        case 0x06:
+        case 0x07:
+            // Bit pattern 0xxx. No need for any extra bytes.
+            break;
+        case 0x08:
+        case 0x09:
+        case 0x0a:
+        case 0x0b:
+        case 0x0f:
+            /*
+             * Bit pattern 10xx or 1111, which are illegal start bytes.
+             * Note: 1111 is valid for normal UTF-8, but not the
+             * modified UTF-8 used here.
+             */
+            return false;
+        case 0x0e:
+            // Bit pattern 1110, so there are two additional bytes.
+            utf8 = *(bytes++);
+            if ((utf8 & 0xc0) != 0x80) {
+                return false;
+            }
+            // Fall through to take care of the final byte.
+        case 0x0c:
+        case 0x0d:
+            // Bit pattern 110x, so there is one additional byte.
+            utf8 = *(bytes++);
+            if ((utf8 & 0xc0) != 0x80) {
+                return false;
+            }
+            break;
+        }
+    }
+    return true;
+}
+
 class MyMediaScannerClient : public MediaScannerClient
 {
 public:
@@ -124,11 +170,8 @@
             mEnv->ExceptionClear();
             return NO_MEMORY;
         }
-
-        // Check if the value is valid UTF-8 string and replace
-        // any un-printable characters with '?' when it's not.
         char *cleaned = NULL;
-        if (utf8_length(value) == -1) {
+        if (!isValidUtf8(value)) {
             cleaned = strdup(value);
             char *chp = cleaned;
             char ch;