| /* |
| * Copyright (C) 2008 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| /* |
| * UTF-8 and Unicode string manipulation, plus java/lang/String convenience |
| * functions. |
| * |
| * In most cases we populate the fields in the String object directly, |
| * rather than going through an instance field lookup. |
| */ |
| #include "Dalvik.h" |
| #include <stdlib.h> |
| |
| /* |
| * Initialize string globals. |
| * |
| * This isn't part of the VM init sequence because it's hard to get the |
| * timing right -- we need it to happen after java/lang/String has been |
| * loaded, but before anybody wants to use a string. It's easiest to |
| * just initialize it on first use. |
| * |
| * In some unusual circumstances (e.g. trying to throw an exception because |
| * String implements java/lang/CharSequence, but CharSequence doesn't exist) |
| * we can try to create an exception string internally before anything has |
| * really tried to use String. In that case we basically self-destruct. |
| */ |
| static bool stringStartup() |
| { |
| if (gDvm.javaLangStringReady < 0) { |
| LOGE("ERROR: reentrant string initialization\n"); |
| assert(false); |
| return false; |
| } |
| assert(gDvm.javaLangStringReady == 0); |
| |
| gDvm.javaLangStringReady = -1; |
| |
| if (gDvm.classJavaLangString == NULL) |
| gDvm.classJavaLangString = |
| dvmFindSystemClassNoInit("Ljava/lang/String;"); |
| |
| gDvm.offJavaLangString_value = |
| dvmFindFieldOffset(gDvm.classJavaLangString, "value", "[C"); |
| gDvm.offJavaLangString_count = |
| dvmFindFieldOffset(gDvm.classJavaLangString, "count", "I"); |
| gDvm.offJavaLangString_offset = |
| dvmFindFieldOffset(gDvm.classJavaLangString, "offset", "I"); |
| gDvm.offJavaLangString_hashCode = |
| dvmFindFieldOffset(gDvm.classJavaLangString, "hashCode", "I"); |
| |
| if (gDvm.offJavaLangString_value < 0 || |
| gDvm.offJavaLangString_count < 0 || |
| gDvm.offJavaLangString_offset < 0 || |
| gDvm.offJavaLangString_hashCode < 0) |
| { |
| LOGE("VM-required field missing from java/lang/String\n"); |
| return false; |
| } |
| |
| gDvm.javaLangStringReady = 1; |
| |
| return true; |
| } |
| |
| /* |
| * Discard heap-allocated storage. |
| */ |
| void dvmStringShutdown() |
| { |
| // currently unused |
| } |
| |
| /* |
| * Compute a hash code on a UTF-8 string, for use with internal hash tables. |
| * |
| * This may or may not yield the same results as the java/lang/String |
| * computeHashCode() function. (To make sure this doesn't get abused, |
| * I'm initializing the hash code to 1 so they *don't* match up.) |
| * |
| * It would be more correct to invoke dexGetUtf16FromUtf8() here and compute |
| * the hash with the result. That way, if something encoded the same |
| * character in two different ways, the hash value would be the same. For |
| * our purposes that isn't necessary. |
| */ |
| u4 dvmComputeUtf8Hash(const char* utf8Str) |
| { |
| u4 hash = 1; |
| |
| while (*utf8Str != '\0') |
| hash = hash * 31 + *utf8Str++; |
| |
| return hash; |
| } |
| |
| /* |
| * Like "strlen", but for strings encoded with "modified" UTF-8. |
| * |
| * The value returned is the number of characters, which may or may not |
| * be the same as the number of bytes. |
| * |
| * (If this needs optimizing, try: mask against 0xa0, shift right 5, |
| * get increment {1-3} from table of 8 values.) |
| */ |
| int dvmUtf8Len(const char* utf8Str) |
| { |
| int ic, len = 0; |
| |
| while ((ic = *utf8Str++) != '\0') { |
| len++; |
| if ((ic & 0x80) != 0) { |
| /* two- or three-byte encoding */ |
| utf8Str++; |
| if ((ic & 0x20) != 0) { |
| /* three-byte encoding */ |
| utf8Str++; |
| } |
| } |
| } |
| |
| return len; |
| } |
| |
| /* |
| * Convert a "modified" UTF-8 string to UTF-16. |
| */ |
| void dvmConvertUtf8ToUtf16(u2* utf16Str, const char* utf8Str) |
| { |
| while (*utf8Str != '\0') |
| *utf16Str++ = dexGetUtf16FromUtf8(&utf8Str); |
| } |
| |
| /* |
| * Given a UTF-16 string, compute the length of the corresponding UTF-8 |
| * string in bytes. |
| */ |
| static int utf16_utf8ByteLen(const u2* utf16Str, int len) |
| { |
| int utf8Len = 0; |
| |
| while (len--) { |
| unsigned int uic = *utf16Str++; |
| |
| /* |
| * The most common case is (uic > 0 && uic <= 0x7f). |
| */ |
| if (uic == 0 || uic > 0x7f) { |
| if (uic > 0x07ff) |
| utf8Len += 3; |
| else /*(uic > 0x7f || uic == 0) */ |
| utf8Len += 2; |
| } else |
| utf8Len++; |
| } |
| return utf8Len; |
| } |
| |
| /* |
| * Convert a UTF-16 string to UTF-8. |
| * |
| * Make sure you allocate "utf8Str" with the result of utf16_utf8ByteLen(), |
| * not just "len". |
| */ |
| static void convertUtf16ToUtf8(char* utf8Str, const u2* utf16Str, int len) |
| { |
| assert(len >= 0); |
| |
| while (len--) { |
| unsigned int uic = *utf16Str++; |
| |
| /* |
| * The most common case is (uic > 0 && uic <= 0x7f). |
| */ |
| if (uic == 0 || uic > 0x7f) { |
| if (uic > 0x07ff) { |
| *utf8Str++ = (uic >> 12) | 0xe0; |
| *utf8Str++ = ((uic >> 6) & 0x3f) | 0x80; |
| *utf8Str++ = (uic & 0x3f) | 0x80; |
| } else /*(uic > 0x7f || uic == 0)*/ { |
| *utf8Str++ = (uic >> 6) | 0xc0; |
| *utf8Str++ = (uic & 0x3f) | 0x80; |
| } |
| } else { |
| *utf8Str++ = uic; |
| } |
| } |
| |
| *utf8Str = '\0'; |
| } |
| |
| /* |
| * Use the java/lang/String.computeHashCode() algorithm. |
| */ |
| static inline u4 dvmComputeUtf16Hash(const u2* utf16Str, int len) |
| { |
| u4 hash = 0; |
| |
| while (len--) |
| hash = hash * 31 + *utf16Str++; |
| |
| return hash; |
| } |
| u4 dvmComputeStringHash(StringObject* strObj) { |
| ArrayObject* chars = (ArrayObject*) dvmGetFieldObject((Object*) strObj, |
| gDvm.offJavaLangString_value); |
| int offset, len; |
| |
| len = dvmGetFieldInt((Object*) strObj, gDvm.offJavaLangString_count); |
| offset = dvmGetFieldInt((Object*) strObj, gDvm.offJavaLangString_offset); |
| |
| return dvmComputeUtf16Hash((u2*) chars->contents + offset, len); |
| } |
| |
| /* |
| * Create a new java/lang/String object, using the string data in "utf8Str". |
| * |
| * Note that "allocFlags" affects both of the allocations here. If you |
| * use ALLOC_DONT_TRACK in a context where a GC could happen between the |
| * two allocations, you could lose the array reference. |
| * |
| * Returns NULL and throws an exception on failure. |
| */ |
| StringObject* dvmCreateStringFromCstr(const char* utf8Str, int allocFlags) |
| { |
| assert(utf8Str != NULL); |
| |
| return dvmCreateStringFromCstrAndLength(utf8Str, dvmUtf8Len(utf8Str), |
| allocFlags); |
| } |
| |
| /* |
| * Create a java/lang/String from a C string, given its UTF-16 length |
| * (number of UTF-16 code points). |
| * |
| * The caller must call dvmReleaseTrackedAlloc() on the return value or |
| * use a non-default value for "allocFlags". It is never appropriate |
| * to use ALLOC_DONT_TRACK with this function. |
| * |
| * Returns NULL and throws an exception on failure. |
| */ |
| StringObject* dvmCreateStringFromCstrAndLength(const char* utf8Str, |
| u4 utf16Length, int allocFlags) |
| { |
| StringObject* newObj; |
| ArrayObject* chars; |
| u4 hashCode = 0; |
| |
| //LOGV("Creating String from '%s'\n", utf8Str); |
| assert(allocFlags != ALLOC_DONT_TRACK); /* don't currently need */ |
| assert(utf8Str != NULL); |
| |
| if (gDvm.javaLangStringReady <= 0) { |
| if (!stringStartup()) |
| return NULL; |
| } |
| |
| /* init before alloc */ |
| if (!dvmIsClassInitialized(gDvm.classJavaLangString) && |
| !dvmInitClass(gDvm.classJavaLangString)) |
| { |
| return NULL; |
| } |
| |
| newObj = (StringObject*) dvmAllocObject(gDvm.classJavaLangString, |
| allocFlags); |
| if (newObj == NULL) |
| return NULL; |
| |
| chars = dvmAllocPrimitiveArray('C', utf16Length, allocFlags); |
| if (chars == NULL) { |
| dvmReleaseTrackedAllocIFN((Object*) newObj, NULL, allocFlags); |
| return NULL; |
| } |
| dvmConvertUtf8ToUtf16((u2*)chars->contents, utf8Str); |
| hashCode = dvmComputeUtf16Hash((u2*) chars->contents, utf16Length); |
| |
| dvmSetFieldObject((Object*)newObj, gDvm.offJavaLangString_value, |
| (Object*)chars); |
| dvmReleaseTrackedAllocIFN((Object*) chars, NULL, allocFlags); |
| dvmSetFieldInt((Object*)newObj, gDvm.offJavaLangString_count, utf16Length); |
| dvmSetFieldInt((Object*)newObj, gDvm.offJavaLangString_hashCode, hashCode); |
| /* leave offset set to zero */ |
| |
| /* debugging stuff */ |
| //dvmDumpObject((Object*)newObj); |
| //printHexDumpEx(ANDROID_LOG_DEBUG, chars->contents, utf16Length * 2, |
| // kHexDumpMem); |
| |
| /* caller may need to dvmReleaseTrackedAlloc(newObj) */ |
| return newObj; |
| } |
| |
| /* |
| * Create a new java/lang/String object, using the Unicode data. |
| */ |
| StringObject* dvmCreateStringFromUnicode(const u2* unichars, int len) |
| { |
| StringObject* newObj; |
| ArrayObject* chars; |
| u4 hashCode = 0; |
| |
| /* we allow a null pointer if the length is zero */ |
| assert(len == 0 || unichars != NULL); |
| |
| if (gDvm.javaLangStringReady <= 0) { |
| if (!stringStartup()) |
| return NULL; |
| } |
| |
| /* init before alloc */ |
| if (!dvmIsClassInitialized(gDvm.classJavaLangString) && |
| !dvmInitClass(gDvm.classJavaLangString)) |
| { |
| return NULL; |
| } |
| |
| newObj = (StringObject*) dvmAllocObject(gDvm.classJavaLangString, |
| ALLOC_DEFAULT); |
| if (newObj == NULL) |
| return NULL; |
| |
| chars = dvmAllocPrimitiveArray('C', len, ALLOC_DEFAULT); |
| if (chars == NULL) { |
| dvmReleaseTrackedAlloc((Object*) newObj, NULL); |
| return NULL; |
| } |
| if (len > 0) |
| memcpy(chars->contents, unichars, len * sizeof(u2)); |
| hashCode = dvmComputeUtf16Hash((u2*) chars->contents, len); |
| |
| dvmSetFieldObject((Object*)newObj, gDvm.offJavaLangString_value, |
| (Object*)chars); |
| dvmReleaseTrackedAlloc((Object*) chars, NULL); |
| dvmSetFieldInt((Object*)newObj, gDvm.offJavaLangString_count, len); |
| dvmSetFieldInt((Object*)newObj, gDvm.offJavaLangString_hashCode, hashCode); |
| /* leave offset set to zero */ |
| |
| /* debugging stuff */ |
| //dvmDumpObject((Object*)newObj); |
| //printHexDumpEx(ANDROID_LOG_DEBUG, chars->contents, len*2, kHexDumpMem); |
| |
| /* caller must dvmReleaseTrackedAlloc(newObj) */ |
| return newObj; |
| } |
| |
| /* |
| * Create a new C string from a java/lang/String object. |
| * |
| * Returns NULL if the object is NULL. |
| */ |
| char* dvmCreateCstrFromString(StringObject* jstr) |
| { |
| char* newStr; |
| ArrayObject* chars; |
| int len, byteLen, offset; |
| const u2* data; |
| |
| assert(gDvm.javaLangStringReady > 0); |
| |
| if (jstr == NULL) |
| return NULL; |
| |
| len = dvmGetFieldInt((Object*) jstr, gDvm.offJavaLangString_count); |
| offset = dvmGetFieldInt((Object*) jstr, gDvm.offJavaLangString_offset); |
| chars = (ArrayObject*) dvmGetFieldObject((Object*) jstr, |
| gDvm.offJavaLangString_value); |
| data = (const u2*) chars->contents + offset; |
| assert(offset + len <= (int) chars->length); |
| |
| byteLen = utf16_utf8ByteLen(data, len); |
| newStr = (char*) malloc(byteLen+1); |
| if (newStr == NULL) |
| return NULL; |
| convertUtf16ToUtf8(newStr, data, len); |
| |
| return newStr; |
| } |
| |
| /* |
| * Create a UTF-8 C string from a region of a java/lang/String. (Used by |
| * the JNI GetStringUTFRegion call.) |
| */ |
| void dvmCreateCstrFromStringRegion(StringObject* jstr, int start, int len, |
| char* buf) |
| { |
| const u2* data; |
| |
| data = dvmStringChars(jstr) + start; |
| convertUtf16ToUtf8(buf, data, len); |
| } |
| |
| /* |
| * Compute the length, in modified UTF-8, of a java/lang/String object. |
| * |
| * Does not include the terminating null byte. |
| */ |
| int dvmStringUtf8ByteLen(StringObject* jstr) |
| { |
| ArrayObject* chars; |
| int len, offset; |
| const u2* data; |
| |
| assert(gDvm.javaLangStringReady > 0); |
| |
| if (jstr == NULL) |
| return 0; // should we throw something? assert? |
| |
| len = dvmGetFieldInt((Object*) jstr, gDvm.offJavaLangString_count); |
| offset = dvmGetFieldInt((Object*) jstr, gDvm.offJavaLangString_offset); |
| chars = (ArrayObject*) dvmGetFieldObject((Object*) jstr, |
| gDvm.offJavaLangString_value); |
| data = (const u2*) chars->contents + offset; |
| assert(offset + len <= (int) chars->length); |
| |
| return utf16_utf8ByteLen(data, len); |
| } |
| |
| /* |
| * Get the string's length. |
| */ |
| int dvmStringLen(StringObject* jstr) |
| { |
| return dvmGetFieldInt((Object*) jstr, gDvm.offJavaLangString_count); |
| } |
| |
| /* |
| * Get the char[] object from the String. |
| */ |
| ArrayObject* dvmStringCharArray(StringObject* jstr) |
| { |
| return (ArrayObject*) dvmGetFieldObject((Object*) jstr, |
| gDvm.offJavaLangString_value); |
| } |
| |
| /* |
| * Get the string's data. |
| */ |
| const u2* dvmStringChars(StringObject* jstr) |
| { |
| ArrayObject* chars; |
| int offset; |
| |
| offset = dvmGetFieldInt((Object*) jstr, gDvm.offJavaLangString_offset); |
| chars = (ArrayObject*) dvmGetFieldObject((Object*) jstr, |
| gDvm.offJavaLangString_value); |
| return (const u2*) chars->contents + offset; |
| } |
| |
| |
| /* |
| * Compare two String objects. |
| * |
| * This is a dvmHashTableLookup() callback. The function has already |
| * compared their hash values; we need to do a full compare to ensure |
| * that the strings really match. |
| */ |
| int dvmHashcmpStrings(const void* vstrObj1, const void* vstrObj2) |
| { |
| const StringObject* strObj1 = (const StringObject*) vstrObj1; |
| const StringObject* strObj2 = (const StringObject*) vstrObj2; |
| ArrayObject* chars1; |
| ArrayObject* chars2; |
| int len1, len2, offset1, offset2; |
| |
| assert(gDvm.javaLangStringReady > 0); |
| |
| /* get offset and length into char array; all values are in 16-bit units */ |
| len1 = dvmGetFieldInt((Object*) strObj1, gDvm.offJavaLangString_count); |
| offset1 = dvmGetFieldInt((Object*) strObj1, gDvm.offJavaLangString_offset); |
| len2 = dvmGetFieldInt((Object*) strObj2, gDvm.offJavaLangString_count); |
| offset2 = dvmGetFieldInt((Object*) strObj2, gDvm.offJavaLangString_offset); |
| if (len1 != len2) |
| return len1 - len2; |
| |
| chars1 = (ArrayObject*) dvmGetFieldObject((Object*) strObj1, |
| gDvm.offJavaLangString_value); |
| chars2 = (ArrayObject*) dvmGetFieldObject((Object*) strObj2, |
| gDvm.offJavaLangString_value); |
| |
| /* damage here actually indicates a broken java/lang/String */ |
| assert(offset1 + len1 <= (int) chars1->length); |
| assert(offset2 + len2 <= (int) chars2->length); |
| |
| return memcmp((const u2*) chars1->contents + offset1, |
| (const u2*) chars2->contents + offset2, |
| len1 * sizeof(u2)); |
| } |
| |