Implement VMClassLoader's native methods.
Change-Id: I6e617f415e54ce29c3a0a470b58e79d3aaa0a4bf
diff --git a/src/utils.cc b/src/utils.cc
index 08ab705..33a07ad 100644
--- a/src/utils.cc
+++ b/src/utils.cc
@@ -218,6 +218,192 @@
return long_name;
}
+namespace {
+
+// Helper for IsValidMemberNameUtf8(), a bit vector indicating valid low ascii.
+uint32_t DEX_MEMBER_VALID_LOW_ASCII[4] = {
+ 0x00000000, // 00..1f low control characters; nothing valid
+ 0x03ff2010, // 20..3f digits and symbols; valid: '0'..'9', '$', '-'
+ 0x87fffffe, // 40..5f uppercase etc.; valid: 'A'..'Z', '_'
+ 0x07fffffe // 60..7f lowercase etc.; valid: 'a'..'z'
+};
+
+// Helper for IsValidMemberNameUtf8(); do not call directly.
+bool IsValidMemberNameUtf8Slow(const char** pUtf8Ptr) {
+ /*
+ * It's a multibyte encoded character. Decode it and analyze. We
+ * accept anything that isn't (a) an improperly encoded low value,
+ * (b) an improper surrogate pair, (c) an encoded '\0', (d) a high
+ * control character, or (e) a high space, layout, or special
+ * character (U+00a0, U+2000..U+200f, U+2028..U+202f,
+ * U+fff0..U+ffff). This is all specified in the dex format
+ * document.
+ */
+
+ uint16_t utf16 = GetUtf16FromUtf8(pUtf8Ptr);
+
+ // Perform follow-up tests based on the high 8 bits.
+ switch (utf16 >> 8) {
+ case 0x00:
+ // It's only valid if it's above the ISO-8859-1 high space (0xa0).
+ return (utf16 > 0x00a0);
+ case 0xd8:
+ case 0xd9:
+ case 0xda:
+ case 0xdb:
+ // It's a leading surrogate. Check to see that a trailing
+ // surrogate follows.
+ utf16 = GetUtf16FromUtf8(pUtf8Ptr);
+ return (utf16 >= 0xdc00) && (utf16 <= 0xdfff);
+ case 0xdc:
+ case 0xdd:
+ case 0xde:
+ case 0xdf:
+ // It's a trailing surrogate, which is not valid at this point.
+ return false;
+ case 0x20:
+ case 0xff:
+ // It's in the range that has spaces, controls, and specials.
+ switch (utf16 & 0xfff8) {
+ case 0x2000:
+ case 0x2008:
+ case 0x2028:
+ case 0xfff0:
+ case 0xfff8:
+ return false;
+ }
+ break;
+ }
+ return true;
+}
+
+/* Return whether the pointed-at modified-UTF-8 encoded character is
+ * valid as part of a member name, updating the pointer to point past
+ * the consumed character. This will consume two encoded UTF-16 code
+ * points if the character is encoded as a surrogate pair. Also, if
+ * this function returns false, then the given pointer may only have
+ * been partially advanced.
+ */
+bool IsValidMemberNameUtf8(const char** pUtf8Ptr) {
+ uint8_t c = (uint8_t) **pUtf8Ptr;
+ if (c <= 0x7f) {
+ // It's low-ascii, so check the table.
+ uint32_t wordIdx = c >> 5;
+ uint32_t bitIdx = c & 0x1f;
+ (*pUtf8Ptr)++;
+ return (DEX_MEMBER_VALID_LOW_ASCII[wordIdx] & (1 << bitIdx)) != 0;
+ }
+
+ // It's a multibyte encoded character. Call a non-inline function
+ // for the heavy lifting.
+ return IsValidMemberNameUtf8Slow(pUtf8Ptr);
+}
+
+} // namespace
+
+bool IsValidClassName(const char* s, bool isClassName, bool dot_or_slash) {
+ char separator = (dot_or_slash ? '.' : '/');
+
+ int arrayCount = 0;
+ while (*s == '[') {
+ arrayCount++;
+ s++;
+ }
+
+ if (arrayCount > 255) {
+ // Arrays may have no more than 255 dimensions.
+ return false;
+ }
+
+ if (arrayCount != 0) {
+ /*
+ * If we're looking at an array of some sort, then it doesn't
+ * matter if what is being asked for is a class name; the
+ * format looks the same as a type descriptor in that case, so
+ * treat it as such.
+ */
+ isClassName = false;
+ }
+
+ if (!isClassName) {
+ /*
+ * We are looking for a descriptor. Either validate it as a
+ * single-character primitive type, or continue on to check the
+ * embedded class name (bracketed by "L" and ";").
+ */
+ switch (*(s++)) {
+ case 'B':
+ case 'C':
+ case 'D':
+ case 'F':
+ case 'I':
+ case 'J':
+ case 'S':
+ case 'Z':
+ // These are all single-character descriptors for primitive types.
+ return (*s == '\0');
+ case 'V':
+ // Non-array void is valid, but you can't have an array of void.
+ return (arrayCount == 0) && (*s == '\0');
+ case 'L':
+ // Class name: Break out and continue below.
+ break;
+ default:
+ // Oddball descriptor character.
+ return false;
+ }
+ }
+
+ /*
+ * We just consumed the 'L' that introduces a class name as part
+ * of a type descriptor, or we are looking for an unadorned class
+ * name.
+ */
+
+ bool sepOrFirst = true; // first character or just encountered a separator.
+ for (;;) {
+ uint8_t c = (uint8_t) *s;
+ switch (c) {
+ case '\0':
+ /*
+ * Premature end for a type descriptor, but valid for
+ * a class name as long as we haven't encountered an
+ * empty component (including the degenerate case of
+ * the empty string "").
+ */
+ return isClassName && !sepOrFirst;
+ case ';':
+ /*
+ * Invalid character for a class name, but the
+ * legitimate end of a type descriptor. In the latter
+ * case, make sure that this is the end of the string
+ * and that it doesn't end with an empty component
+ * (including the degenerate case of "L;").
+ */
+ return !isClassName && !sepOrFirst && (s[1] == '\0');
+ case '/':
+ case '.':
+ if (c != separator) {
+ // The wrong separator character.
+ return false;
+ }
+ if (sepOrFirst) {
+ // Separator at start or two separators in a row.
+ return false;
+ }
+ sepOrFirst = true;
+ s++;
+ break;
+ default:
+ if (!IsValidMemberNameUtf8(&s)) {
+ return false;
+ }
+ sepOrFirst = false;
+ break;
+ }
+ }
+}
+
void Split(const std::string& s, char delim, std::vector<std::string>& result) {
const char* p = s.data();
const char* end = p + s.size();