Implement VMClassLoader's native methods.

Change-Id: I6e617f415e54ce29c3a0a470b58e79d3aaa0a4bf
diff --git a/src/utils.cc b/src/utils.cc
index 08ab705..33a07ad 100644
--- a/src/utils.cc
+++ b/src/utils.cc
@@ -218,6 +218,192 @@
   return long_name;
 }
 
+namespace {
+
+// Helper for IsValidMemberNameUtf8(), a bit vector indicating valid low ascii.
+uint32_t DEX_MEMBER_VALID_LOW_ASCII[4] = {
+  0x00000000, // 00..1f low control characters; nothing valid
+  0x03ff2010, // 20..3f digits and symbols; valid: '0'..'9', '$', '-'
+  0x87fffffe, // 40..5f uppercase etc.; valid: 'A'..'Z', '_'
+  0x07fffffe  // 60..7f lowercase etc.; valid: 'a'..'z'
+};
+
+// Helper for IsValidMemberNameUtf8(); do not call directly.
+bool IsValidMemberNameUtf8Slow(const char** pUtf8Ptr) {
+  /*
+   * It's a multibyte encoded character. Decode it and analyze. We
+   * accept anything that isn't (a) an improperly encoded low value,
+   * (b) an improper surrogate pair, (c) an encoded '\0', (d) a high
+   * control character, or (e) a high space, layout, or special
+   * character (U+00a0, U+2000..U+200f, U+2028..U+202f,
+   * U+fff0..U+ffff). This is all specified in the dex format
+   * document.
+   */
+
+  uint16_t utf16 = GetUtf16FromUtf8(pUtf8Ptr);
+
+  // Perform follow-up tests based on the high 8 bits.
+  switch (utf16 >> 8) {
+  case 0x00:
+    // It's only valid if it's above the ISO-8859-1 high space (0xa0).
+    return (utf16 > 0x00a0);
+  case 0xd8:
+  case 0xd9:
+  case 0xda:
+  case 0xdb:
+    // It's a leading surrogate. Check to see that a trailing
+    // surrogate follows.
+    utf16 = GetUtf16FromUtf8(pUtf8Ptr);
+    return (utf16 >= 0xdc00) && (utf16 <= 0xdfff);
+  case 0xdc:
+  case 0xdd:
+  case 0xde:
+  case 0xdf:
+    // It's a trailing surrogate, which is not valid at this point.
+    return false;
+  case 0x20:
+  case 0xff:
+    // It's in the range that has spaces, controls, and specials.
+    switch (utf16 & 0xfff8) {
+    case 0x2000:
+    case 0x2008:
+    case 0x2028:
+    case 0xfff0:
+    case 0xfff8:
+      return false;
+    }
+    break;
+  }
+  return true;
+}
+
+/* Return whether the pointed-at modified-UTF-8 encoded character is
+ * valid as part of a member name, updating the pointer to point past
+ * the consumed character. This will consume two encoded UTF-16 code
+ * points if the character is encoded as a surrogate pair. Also, if
+ * this function returns false, then the given pointer may only have
+ * been partially advanced.
+ */
+bool IsValidMemberNameUtf8(const char** pUtf8Ptr) {
+  uint8_t c = (uint8_t) **pUtf8Ptr;
+  if (c <= 0x7f) {
+    // It's low-ascii, so check the table.
+    uint32_t wordIdx = c >> 5;
+    uint32_t bitIdx = c & 0x1f;
+    (*pUtf8Ptr)++;
+    return (DEX_MEMBER_VALID_LOW_ASCII[wordIdx] & (1 << bitIdx)) != 0;
+  }
+
+  // It's a multibyte encoded character. Call a non-inline function
+  // for the heavy lifting.
+  return IsValidMemberNameUtf8Slow(pUtf8Ptr);
+}
+
+}  // namespace
+
+bool IsValidClassName(const char* s, bool isClassName, bool dot_or_slash) {
+  char separator = (dot_or_slash ? '.' : '/');
+
+  int arrayCount = 0;
+  while (*s == '[') {
+    arrayCount++;
+    s++;
+  }
+
+  if (arrayCount > 255) {
+    // Arrays may have no more than 255 dimensions.
+    return false;
+  }
+
+  if (arrayCount != 0) {
+    /*
+     * If we're looking at an array of some sort, then it doesn't
+     * matter if what is being asked for is a class name; the
+     * format looks the same as a type descriptor in that case, so
+     * treat it as such.
+     */
+    isClassName = false;
+  }
+
+  if (!isClassName) {
+    /*
+     * We are looking for a descriptor. Either validate it as a
+     * single-character primitive type, or continue on to check the
+     * embedded class name (bracketed by "L" and ";").
+     */
+    switch (*(s++)) {
+    case 'B':
+    case 'C':
+    case 'D':
+    case 'F':
+    case 'I':
+    case 'J':
+    case 'S':
+    case 'Z':
+      // These are all single-character descriptors for primitive types.
+      return (*s == '\0');
+    case 'V':
+      // Non-array void is valid, but you can't have an array of void.
+      return (arrayCount == 0) && (*s == '\0');
+    case 'L':
+      // Class name: Break out and continue below.
+      break;
+    default:
+      // Oddball descriptor character.
+      return false;
+    }
+  }
+
+  /*
+   * We just consumed the 'L' that introduces a class name as part
+   * of a type descriptor, or we are looking for an unadorned class
+   * name.
+   */
+
+  bool sepOrFirst = true; // first character or just encountered a separator.
+  for (;;) {
+    uint8_t c = (uint8_t) *s;
+    switch (c) {
+    case '\0':
+      /*
+       * Premature end for a type descriptor, but valid for
+       * a class name as long as we haven't encountered an
+       * empty component (including the degenerate case of
+       * the empty string "").
+       */
+      return isClassName && !sepOrFirst;
+    case ';':
+      /*
+       * Invalid character for a class name, but the
+       * legitimate end of a type descriptor. In the latter
+       * case, make sure that this is the end of the string
+       * and that it doesn't end with an empty component
+       * (including the degenerate case of "L;").
+       */
+      return !isClassName && !sepOrFirst && (s[1] == '\0');
+    case '/':
+    case '.':
+      if (c != separator) {
+        // The wrong separator character.
+        return false;
+      }
+      if (sepOrFirst) {
+        // Separator at start or two separators in a row.
+        return false;
+      }
+      sepOrFirst = true;
+      s++;
+      break;
+    default:
+      if (!IsValidMemberNameUtf8(&s)) {
+        return false;
+      }
+      sepOrFirst = false;
+      break;
+    }
+  }
+}
+
 void Split(const std::string& s, char delim, std::vector<std::string>& result) {
   const char* p = s.data();
   const char* end = p + s.size();