Flesh out AllocString
Change-Id: Ie8c1170e71374942eafdcb40775ca2df3cf7bbc7
diff --git a/src/object.h b/src/object.h
index 77eb164..0abf444 100644
--- a/src/object.h
+++ b/src/object.h
@@ -607,12 +607,12 @@
}
T* Get(uint32_t i) const {
- DCHECK_LT(i, GetLength());
+ CHECK_LT(i, GetLength());
Object* const * data = reinterpret_cast<Object* const *>(GetData());
return down_cast<T*>(data[i]);
}
void Set(uint32_t i, T* object) {
- DCHECK_LT(i, GetLength());
+ CHECK_LT(i, GetLength());
T** data = reinterpret_cast<T**>(GetData());
data[i] = object;
}
@@ -992,24 +992,53 @@
length,
sizeof(uint16_t)));
}
+
+ uint16_t* GetChars() {
+ return reinterpret_cast<uint16_t*>(GetData());
+ }
+
+ const uint16_t* GetChars() const {
+ return reinterpret_cast<const uint16_t*>(GetData());
+ }
+
+ uint16_t GetChar(uint32_t i) const {
+ CHECK_LT(i, GetLength());
+ return GetChars()[i];
+ }
+
+ void SetChar(uint32_t i, uint16_t ch) {
+ CHECK_LT(i, GetLength());
+ GetChars()[i] = ch;
+ }
+
private:
CharArray();
};
class String : public Object {
public:
- static String* Alloc(Class* java_lang_String) {
- return down_cast<String*>(Object::Alloc(java_lang_String));
+ static String* AllocFromUtf16(Class* java_lang_String,
+ Class* char_array,
+ int32_t utf16_length,
+ uint16_t* utf16_data_in) {
+ String* string = Alloc(java_lang_String, char_array, utf16_length);
+ uint16_t* utf16_data_out = string->array_->GetChars();
+ // TODO use 16-bit wide memset variant
+ for (int i = 0; i < utf16_length; i++ ) {
+ utf16_data_out[i] = utf16_data_in[i];
+ }
+ string->hash_code_ = ComputeUtf16Hash(utf16_data_out, utf16_length);
+ return string;
}
static String* AllocFromModifiedUtf8(Class* java_lang_String,
Class* char_array,
- const char* data) {
- String* string = Alloc(java_lang_String);
- uint32_t count = strlen(data); // TODO
- CharArray* array = CharArray::Alloc(char_array, count);
- string->array_ = array;
- string->count_ = count;
+ int32_t utf16_length,
+ const char* utf8_data_in) {
+ String* string = Alloc(java_lang_String, char_array, utf16_length);
+ uint16_t* utf16_data_out = string->array_->GetChars();
+ ConvertModifiedUtf8ToUtf16(utf16_data_out, utf8_data_in);
+ string->hash_code_ = ComputeUtf16Hash(utf16_data_out, utf16_length);
return string;
}
@@ -1022,6 +1051,65 @@
uint32_t count_;
+ static String* Alloc(Class* java_lang_String,
+ Class* char_array,
+ int32_t utf16_length) {
+ String* string = down_cast<String*>(Object::Alloc(java_lang_String));
+ CharArray* array = CharArray::Alloc(char_array, utf16_length);
+ string->array_ = array;
+ string->count_ = utf16_length;
+ return string;
+ }
+
+ // Convert Modified UTF-8 to UTF-16
+ // http://en.wikipedia.org/wiki/UTF-8#Modified_UTF-8
+ static void ConvertModifiedUtf8ToUtf16(uint16_t* utf16_data_out, const char* utf8_data_in) {
+ while (*utf8_data_in != '\0') {
+ *utf16_data_out++ = GetUtf16FromUtf8(&utf8_data_in);
+ }
+ }
+
+ // Retrieve the next UTF-16 character from a UTF-8 string.
+ //
+ // Advances "*pUtf8Ptr" to the start of the next character.
+ //
+ // WARNING: If a string is corrupted by dropping a '\0' in the middle
+ // of a 3-byte sequence, you can end up overrunning the buffer with
+ // reads (and possibly with the writes if the length was computed and
+ // cached before the damage). For performance reasons, this function
+ // assumes that the string being parsed is known to be valid (e.g., by
+ // already being verified). Most strings we process here are coming
+ // out of dex files or other internal translations, so the only real
+ // risk comes from the JNI NewStringUTF call.
+ static uint16_t GetUtf16FromUtf8(const char** utf8_data_in) {
+ uint8_t one = *(*utf8_data_in)++;
+ if ((one & 0x80) == 0) {
+ /* one-byte encoding */
+ return one;
+ }
+ /* two- or three-byte encoding */
+ uint8_t two = *(*utf8_data_in)++;
+ if ((one & 0x20) == 0) {
+ /* two-byte encoding */
+ return ((one & 0x1f) << 6) |
+ (two & 0x3f);
+ }
+ /* three-byte encoding */
+ uint8_t three = *(*utf8_data_in)++;
+ return ((one & 0x0f) << 12) |
+ ((two & 0x3f) << 6) |
+ (three & 0x3f);
+ }
+
+ // The java/lang/String.computeHashCode() algorithm
+ static uint32_t ComputeUtf16Hash(const uint16_t* string_data, size_t string_length) {
+ uint32_t hash = 0;
+ while (string_length--) {
+ hash = hash * 31 + *string_data++;
+ }
+ return hash;
+ }
+
private:
String();
};