Verify we actually have valid utf8 metadata
Before calling NewStringUTF, check that the metadata value is actually
valid utf-8, and replace the offending characters with "?" if it is not.
b/5534491
Change-Id: I43de4307e739ae0b7d4177937ed33aa1dfb90d98
diff --git a/media/jni/android_media_MediaScanner.cpp b/media/jni/android_media_MediaScanner.cpp
index b88296f..09152f5 100644
--- a/media/jni/android_media_MediaScanner.cpp
+++ b/media/jni/android_media_MediaScanner.cpp
@@ -56,6 +56,53 @@
return OK;
}
+// stolen from dalvik/vm/checkJni.cpp
+static bool isValidUtf8(const char* bytes) {
+ while (*bytes != '\0') {
+ unsigned char utf8 = *(bytes++);
+ // Switch on the high four bits.
+ switch (utf8 >> 4) {
+ case 0x00:
+ case 0x01:
+ case 0x02:
+ case 0x03:
+ case 0x04:
+ case 0x05:
+ case 0x06:
+ case 0x07:
+ // Bit pattern 0xxx. No need for any extra bytes.
+ break;
+ case 0x08:
+ case 0x09:
+ case 0x0a:
+ case 0x0b:
+ case 0x0f:
+ /*
+ * Bit pattern 10xx or 1111, which are illegal start bytes.
+ * Note: 1111 is valid for normal UTF-8, but not the
+ * modified UTF-8 used here.
+ */
+ return false;
+ case 0x0e:
+ // Bit pattern 1110, so there are two additional bytes.
+ utf8 = *(bytes++);
+ if ((utf8 & 0xc0) != 0x80) {
+ return false;
+ }
+ // Fall through to take care of the final byte.
+ case 0x0c:
+ case 0x0d:
+ // Bit pattern 110x, so there is one additional byte.
+ utf8 = *(bytes++);
+ if ((utf8 & 0xc0) != 0x80) {
+ return false;
+ }
+ break;
+ }
+ }
+ return true;
+}
+
class MyMediaScannerClient : public MediaScannerClient
{
public:
@@ -123,7 +170,22 @@
mEnv->ExceptionClear();
return NO_MEMORY;
}
- if ((valueStr = mEnv->NewStringUTF(value)) == NULL) {
+ char *cleaned = NULL;
+ if (!isValidUtf8(value)) {
+ cleaned = strdup(value);
+ char *chp = cleaned;
+ char ch;
+ while ((ch = *chp)) {
+ if (ch & 0x80) {
+ *chp = '?';
+ }
+ chp++;
+ }
+ value = cleaned;
+ }
+ valueStr = mEnv->NewStringUTF(value);
+ free(cleaned);
+ if (valueStr == NULL) {
mEnv->DeleteLocalRef(nameStr);
mEnv->ExceptionClear();
return NO_MEMORY;