Add some missing JNI string functions, GetObjectArrayElement, and all the primitive array region functions.
This also pulls the UTF-8/UTF-16 functions out of class String.
Change-Id: I75936b84fd619c9cf91f6e6a6037488220b05781
diff --git a/src/object.h b/src/object.h
index f86142a..7004e59 100644
--- a/src/object.h
+++ b/src/object.h
@@ -3,15 +3,16 @@
#ifndef ART_SRC_OBJECT_H_
#define ART_SRC_OBJECT_H_
-#include "constants.h"
#include "casts.h"
+#include "constants.h"
#include "globals.h"
#include "heap.h"
#include "logging.h"
#include "macros.h"
+#include "monitor.h"
#include "offsets.h"
#include "stringpiece.h"
-#include "monitor.h"
+#include "utf.h"
namespace art {
@@ -1486,14 +1487,18 @@
return hash_code_;
}
- uint32_t GetOffset() const {
+ int32_t GetOffset() const {
return offset_;
}
- uint32_t GetLength() const {
+ int32_t GetLength() const {
return count_;
}
+ int32_t GetUtfLength() const {
+ return CountUtf8Bytes(array_->GetData(), count_);
+ }
+
// TODO: do we need this? Equals is the only caller, and could
// bounds check itself.
uint16_t CharAt(int32_t index) const {
@@ -1508,19 +1513,23 @@
static String* AllocFromUtf16(int32_t utf16_length,
const uint16_t* utf16_data_in,
- int32_t hash_code) {
+ int32_t hash_code = 0) {
String* string = Alloc(GetJavaLangString(),
utf16_length);
// TODO: use 16-bit wide memset variant
for (int i = 0; i < utf16_length; i++ ) {
string->array_->Set(i, utf16_data_in[i]);
}
- string->ComputeHashCode();
+ if (hash_code != 0) {
+ string->hash_code_ = hash_code;
+ } else {
+ string->ComputeHashCode();
+ }
return string;
}
static String* AllocFromModifiedUtf8(const char* utf) {
- size_t char_count = ModifiedUtf8Len(utf);
+ size_t char_count = CountModifiedUtf8Chars(utf);
return AllocFromModifiedUtf8(char_count, utf);
}
@@ -1536,103 +1545,24 @@
static void SetClass(Class* java_lang_String);
static void ResetClass();
- static String* Alloc(Class* java_lang_String,
- int32_t utf16_length) {
+ static String* Alloc(Class* java_lang_String, int32_t utf16_length) {
return Alloc(java_lang_String, CharArray::Alloc(utf16_length));
}
- static String* Alloc(Class* java_lang_String,
- CharArray* array) {
+ static String* Alloc(Class* java_lang_String, CharArray* array) {
String* string = down_cast<String*>(java_lang_String->NewInstance());
string->array_ = array;
string->count_ = array->GetLength();
return string;
}
- // Convert Modified UTF-8 to UTF-16
- // http://en.wikipedia.org/wiki/UTF-8#Modified_UTF-8
- static void ConvertModifiedUtf8ToUtf16(uint16_t* utf16_data_out, const char* utf8_data_in) {
- while (*utf8_data_in != '\0') {
- *utf16_data_out++ = GetUtf16FromUtf8(&utf8_data_in);
- }
- }
-
- // Retrieve the next UTF-16 character from a UTF-8 string.
- //
- // Advances "*pUtf8Ptr" to the start of the next character.
- //
- // WARNING: If a string is corrupted by dropping a '\0' in the middle
- // of a 3-byte sequence, you can end up overrunning the buffer with
- // reads (and possibly with the writes if the length was computed and
- // cached before the damage). For performance reasons, this function
- // assumes that the string being parsed is known to be valid (e.g., by
- // already being verified). Most strings we process here are coming
- // out of dex files or other internal translations, so the only real
- // risk comes from the JNI NewStringUTF call.
- static uint16_t GetUtf16FromUtf8(const char** utf8_data_in) {
- uint8_t one = *(*utf8_data_in)++;
- if ((one & 0x80) == 0) {
- // one-byte encoding
- return one;
- }
- // two- or three-byte encoding
- uint8_t two = *(*utf8_data_in)++;
- if ((one & 0x20) == 0) {
- // two-byte encoding
- return ((one & 0x1f) << 6) |
- (two & 0x3f);
- }
- // three-byte encoding
- uint8_t three = *(*utf8_data_in)++;
- return ((one & 0x0f) << 12) |
- ((two & 0x3f) << 6) |
- (three & 0x3f);
- }
-
- // Like "strlen", but for strings encoded with "modified" UTF-8.
- //
- // The value returned is the number of characters, which may or may not
- // be the same as the number of bytes.
- //
- // (If this needs optimizing, try: mask against 0xa0, shift right 5,
- // get increment {1-3} from table of 8 values.)
- static size_t ModifiedUtf8Len(const char* utf8) {
- size_t len = 0;
- int ic;
- while ((ic = *utf8++) != '\0') {
- len++;
- if ((ic & 0x80) == 0) {
- // one-byte encoding
- continue;
- }
- // two- or three-byte encoding
- utf8++;
- if ((ic & 0x20) == 0) {
- // two-byte encoding
- continue;
- }
- // three-byte encoding
- utf8++;
- }
- return len;
- }
-
- // The java/lang/String.computeHashCode() algorithm
- static int32_t ComputeUtf16Hash(const uint16_t* string_data, size_t string_length) {
- int32_t hash = 0;
- while (string_length--) {
- hash = hash * 31 + *string_data++;
- }
- return hash;
- }
-
void ComputeHashCode() {
hash_code_ = ComputeUtf16Hash(array_->GetData(), count_);
}
// TODO: do we need this overload? give it a more intention-revealing name.
bool Equals(const char* modified_utf8) const {
- for (uint32_t i = 0; i < GetLength(); ++i) {
+ for (int32_t i = 0; i < GetLength(); ++i) {
uint16_t ch = GetUtf16FromUtf8(&modified_utf8);
if (ch == '\0' || ch != CharAt(i)) {
return false;
@@ -1652,7 +1582,7 @@
if (this->GetLength() != that->GetLength()) {
return false;
}
- for (uint32_t i = 0; i < that->GetLength(); ++i) {
+ for (int32_t i = 0; i < that->GetLength(); ++i) {
if (this->CharAt(i) != that->CharAt(i)) {
return false;
}
@@ -1661,11 +1591,11 @@
}
// TODO: do we need this overload? give it a more intention-revealing name.
- bool Equals(const uint16_t* that_chars, uint32_t that_offset, uint32_t that_length) const {
+ bool Equals(const uint16_t* that_chars, int32_t that_offset, int32_t that_length) const {
if (this->GetLength() != that_length) {
return false;
}
- for (uint32_t i = 0; i < that_length; ++i) {
+ for (int32_t i = 0; i < that_length; ++i) {
if (this->CharAt(i) != that_chars[that_offset + i]) {
return false;
}
@@ -1676,7 +1606,7 @@
// Create a modified UTF-8 encoded std::string from a java/lang/String object.
std::string ToModifiedUtf8() const {
std::string result;
- for (uint32_t i = 0; i < GetLength(); i++) {
+ for (int32_t i = 0; i < GetLength(); i++) {
uint16_t ch = CharAt(i);
// The most common case is (ch > 0 && ch <= 0x7f).
if (ch == 0 || ch > 0x7f) {
@@ -1695,7 +1625,6 @@
return result;
}
-
private:
// Field order required by test "ValidateFieldOrderOfJavaCppUnionClasses".
CharArray* array_;