Implement VMClassLoader's native methods.

Change-Id: I6e617f415e54ce29c3a0a470b58e79d3aaa0a4bf
diff --git a/src/check_jni.cc b/src/check_jni.cc
index 73c35b5..8a8862d 100644
--- a/src/check_jni.cc
+++ b/src/check_jni.cc
@@ -63,187 +63,6 @@
   return reinterpret_cast<T>(ts.Self()->DecodeJObject(obj));
 }
 
-/* for IsValidMemberNameUtf8(), a bit vector indicating valid low ascii */
-uint32_t DEX_MEMBER_VALID_LOW_ASCII[4] = {
-  0x00000000, // 00..1f low control characters; nothing valid
-  0x03ff2010, // 20..3f digits and symbols; valid: '0'..'9', '$', '-'
-  0x87fffffe, // 40..5f uppercase etc.; valid: 'A'..'Z', '_'
-  0x07fffffe  // 60..7f lowercase etc.; valid: 'a'..'z'
-};
-
-/* Helper for IsValidMemberNameUtf8(); do not call directly. */
-bool IsValidMemberNameUtf8Slow(const char** pUtf8Ptr) {
-  /*
-   * It's a multibyte encoded character. Decode it and analyze. We
-   * accept anything that isn't (a) an improperly encoded low value,
-   * (b) an improper surrogate pair, (c) an encoded '\0', (d) a high
-   * control character, or (e) a high space, layout, or special
-   * character (U+00a0, U+2000..U+200f, U+2028..U+202f,
-   * U+fff0..U+ffff). This is all specified in the dex format
-   * document.
-   */
-
-  uint16_t utf16 = GetUtf16FromUtf8(pUtf8Ptr);
-
-  // Perform follow-up tests based on the high 8 bits.
-  switch (utf16 >> 8) {
-  case 0x00:
-    // It's only valid if it's above the ISO-8859-1 high space (0xa0).
-    return (utf16 > 0x00a0);
-  case 0xd8:
-  case 0xd9:
-  case 0xda:
-  case 0xdb:
-    // It's a leading surrogate. Check to see that a trailing
-    // surrogate follows.
-    utf16 = GetUtf16FromUtf8(pUtf8Ptr);
-    return (utf16 >= 0xdc00) && (utf16 <= 0xdfff);
-  case 0xdc:
-  case 0xdd:
-  case 0xde:
-  case 0xdf:
-    // It's a trailing surrogate, which is not valid at this point.
-    return false;
-  case 0x20:
-  case 0xff:
-    // It's in the range that has spaces, controls, and specials.
-    switch (utf16 & 0xfff8) {
-    case 0x2000:
-    case 0x2008:
-    case 0x2028:
-    case 0xfff0:
-    case 0xfff8:
-      return false;
-    }
-    break;
-  }
-  return true;
-}
-
-/* Return whether the pointed-at modified-UTF-8 encoded character is
- * valid as part of a member name, updating the pointer to point past
- * the consumed character. This will consume two encoded UTF-16 code
- * points if the character is encoded as a surrogate pair. Also, if
- * this function returns false, then the given pointer may only have
- * been partially advanced.
- */
-bool IsValidMemberNameUtf8(const char** pUtf8Ptr) {
-  uint8_t c = (uint8_t) **pUtf8Ptr;
-  if (c <= 0x7f) {
-    // It's low-ascii, so check the table.
-    uint32_t wordIdx = c >> 5;
-    uint32_t bitIdx = c & 0x1f;
-    (*pUtf8Ptr)++;
-    return (DEX_MEMBER_VALID_LOW_ASCII[wordIdx] & (1 << bitIdx)) != 0;
-  }
-
-  // It's a multibyte encoded character. Call a non-inline function
-  // for the heavy lifting.
-  return IsValidMemberNameUtf8Slow(pUtf8Ptr);
-}
-
-bool IsValidClassName(const char* s, bool isClassName, bool dotSeparator) {
-  int arrayCount = 0;
-
-  while (*s == '[') {
-    arrayCount++;
-    s++;
-  }
-
-  if (arrayCount > 255) {
-    // Arrays may have no more than 255 dimensions.
-    return false;
-  }
-
-  if (arrayCount != 0) {
-    /*
-     * If we're looking at an array of some sort, then it doesn't
-     * matter if what is being asked for is a class name; the
-     * format looks the same as a type descriptor in that case, so
-     * treat it as such.
-     */
-    isClassName = false;
-  }
-
-  if (!isClassName) {
-    /*
-     * We are looking for a descriptor. Either validate it as a
-     * single-character primitive type, or continue on to check the
-     * embedded class name (bracketed by "L" and ";").
-     */
-    switch (*(s++)) {
-    case 'B':
-    case 'C':
-    case 'D':
-    case 'F':
-    case 'I':
-    case 'J':
-    case 'S':
-    case 'Z':
-      // These are all single-character descriptors for primitive types.
-      return (*s == '\0');
-    case 'V':
-      // Non-array void is valid, but you can't have an array of void.
-      return (arrayCount == 0) && (*s == '\0');
-    case 'L':
-      // Class name: Break out and continue below.
-      break;
-    default:
-      // Oddball descriptor character.
-      return false;
-    }
-  }
-
-  /*
-   * We just consumed the 'L' that introduces a class name as part
-   * of a type descriptor, or we are looking for an unadorned class
-   * name.
-   */
-
-  bool sepOrFirst = true; // first character or just encountered a separator.
-  for (;;) {
-    uint8_t c = (uint8_t) *s;
-    switch (c) {
-    case '\0':
-      /*
-       * Premature end for a type descriptor, but valid for
-       * a class name as long as we haven't encountered an
-       * empty component (including the degenerate case of
-       * the empty string "").
-       */
-      return isClassName && !sepOrFirst;
-    case ';':
-      /*
-       * Invalid character for a class name, but the
-       * legitimate end of a type descriptor. In the latter
-       * case, make sure that this is the end of the string
-       * and that it doesn't end with an empty component
-       * (including the degenerate case of "L;").
-       */
-      return !isClassName && !sepOrFirst && (s[1] == '\0');
-    case '/':
-    case '.':
-      if (dotSeparator != (c == '.')) {
-        // The wrong separator character.
-        return false;
-      }
-      if (sepOrFirst) {
-        // Separator at start or two separators in a row.
-        return false;
-      }
-      sepOrFirst = true;
-      s++;
-      break;
-    default:
-      if (!IsValidMemberNameUtf8(&s)) {
-        return false;
-      }
-      sepOrFirst = false;
-      break;
-    }
-  }
-}
-
 /*
  * Hack to allow forcecopy to work with jniGetNonMovableArrayElements.
  * The code deliberately uses an invalid sequence of operations, so we
diff --git a/src/class_linker.h b/src/class_linker.h
index bb38f00..939fa55 100644
--- a/src/class_linker.h
+++ b/src/class_linker.h
@@ -31,10 +31,13 @@
 
   ~ClassLinker();
 
-  // Finds a class by its descriptor name.
+  // Finds a class by its descriptor, loading it if necessary.
   // If class_loader is null, searches boot_class_path_.
-  Class* FindClass(const StringPiece& descriptor,
-                   const ClassLoader* class_loader);
+  Class* FindClass(const StringPiece& descriptor, const ClassLoader* class_loader);
+
+  // Finds a class by its descriptor, returning NULL if it isn't wasn't loaded
+  // by the given 'class_loader'.
+  Class* LookupClass(const StringPiece& descriptor, const ClassLoader* class_loader);
 
   Class* FindPrimitiveClass(char type);
 
@@ -236,8 +239,6 @@
                   Class* klass,
                   Method* dst);
 
-  Class* LookupClass(const StringPiece& descriptor, const ClassLoader* class_loader);
-
   // Inserts a class into the class table.  Returns true if the class
   // was inserted.
   bool InsertClass(const StringPiece& descriptor, Class* klass);
diff --git a/src/dex_file.h b/src/dex_file.h
index 9ebcce2..359825a 100644
--- a/src/dex_file.h
+++ b/src/dex_file.h
@@ -911,7 +911,7 @@
   // The size of the underlying memory allocation in bytes.
   size_t length_;
 
-  // Typically the dex file name when availble, alternatively some identifying string.
+  // Typically the dex file name when available, alternatively some identifying string.
   //
   // The ClassLinker will use this to match DexFiles the boot class
   // path to DexCache::GetLocation when loading from an image.
diff --git a/src/java_lang_VMClassLoader.cc b/src/java_lang_VMClassLoader.cc
new file mode 100644
index 0000000..bed89d6
--- /dev/null
+++ b/src/java_lang_VMClassLoader.cc
@@ -0,0 +1,143 @@
+/*
+ * Copyright (C) 2008 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "class_linker.h"
+#include "jni_internal.h"
+#include "ScopedUtfChars.h"
+#include "zip_archive.h"
+
+#include "JniConstants.h" // Last to avoid problems with LOG redefinition.
+
+namespace art {
+
+namespace {
+
+// Turn "java.lang.String" into "Ljava/lang/String;".
+std::string ToDescriptor(const char* class_name) {
+  std::string descriptor(class_name);
+  std::replace(descriptor.begin(), descriptor.end(), '.', '/');
+  if (descriptor.length() > 0 && descriptor[0] != '[') {
+    descriptor = "L" + descriptor + ";";
+  }
+  return descriptor;
+}
+
+jclass VMClassLoader_findLoadedClass(JNIEnv* env, jclass, jobject javaLoader, jstring javaName) {
+  ClassLoader* loader = Decode<ClassLoader*>(env, javaLoader);
+  ScopedUtfChars name(env, javaName);
+  if (name.c_str() == NULL) {
+    return NULL;
+  }
+
+  std::string descriptor(ToDescriptor(name.c_str()));
+  Class* c = Runtime::Current()->GetClassLinker()->LookupClass(descriptor.c_str(), loader);
+  return AddLocalReference<jclass>(env, c);
+}
+
+jint VMClassLoader_getBootClassPathSize(JNIEnv* env, jclass) {
+  return Runtime::Current()->GetClassLinker()->GetBootClassPath().size();
+}
+
+/*
+ * Returns a string URL for a resource with the specified 'javaName' in
+ * entry 'index' of the boot class path.
+ *
+ * We return a newly-allocated String in the following form:
+ *
+ *   jar:file://path!/name
+ *
+ * Where "path" is the bootstrap class path entry and "name" is the string
+ * passed into this method.  "path" needs to be an absolute path (starting
+ * with '/'); if it's not we'd need to make it absolute as part of forming
+ * the URL string.
+ */
+jstring VMClassLoader_getBootClassPathResource(JNIEnv* env, jclass, jstring javaName, jint index) {
+  ScopedUtfChars name(env, javaName);
+  if (name.c_str() == NULL) {
+    return NULL;
+  }
+
+  const std::vector<const DexFile*>& path = Runtime::Current()->GetClassLinker()->GetBootClassPath();
+  if (index < 0 || size_t(index) >= path.size()) {
+    return NULL;
+  }
+  const DexFile* dex_file = path[index];
+  const std::string& location(dex_file->GetLocation());
+  UniquePtr<ZipArchive> zip_archive(ZipArchive::Open(location));
+  if (zip_archive.get() == NULL) {
+    return NULL;
+  }
+  UniquePtr<ZipEntry> zip_entry(zip_archive->Find(name.c_str()));
+  if (zip_entry.get() == NULL) {
+    return NULL;
+  }
+
+  std::string url;
+  StringAppendF(&url, "jar:file://%s!/%s", location.c_str(), name.c_str());
+  return env->NewStringUTF(url.c_str());
+}
+
+/*
+ * static Class loadClass(String name, boolean resolve)
+ *     throws ClassNotFoundException
+ *
+ * Load class using bootstrap class loader.
+ *
+ * Return the Class object associated with the class or interface with
+ * the specified name.
+ *
+ * "name" is in "binary name" format, e.g. "dalvik.system.Debug$1".
+ */
+jclass VMClassLoader_loadClass(JNIEnv* env, jclass, jstring javaName, jboolean resolve) {
+  ScopedUtfChars name(env, javaName);
+  if (name.c_str() == NULL) {
+    return NULL;
+  }
+
+  /*
+   * We need to validate and convert the name (from x.y.z to x/y/z).  This
+   * is especially handy for array types, since we want to avoid
+   * auto-generating bogus array classes.
+   */
+  if (!IsValidClassName(name.c_str(), true, true)) {
+    Thread::Current()->ThrowNewException("Ljava/lang/ClassNotFoundException;",
+        "Invalid name: %s", name.c_str());
+    return NULL;
+  }
+
+  std::string descriptor(ToDescriptor(name.c_str()));
+  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+  Class* c = class_linker->FindClass(descriptor.c_str(), NULL);
+  if (resolve) {
+    class_linker->EnsureInitialized(c, true);
+  }
+  return AddLocalReference<jclass>(env, c);
+}
+
+static JNINativeMethod gMethods[] = {
+  NATIVE_METHOD(VMClassLoader, findLoadedClass, "(Ljava/lang/ClassLoader;Ljava/lang/String;)Ljava/lang/Class;"),
+  NATIVE_METHOD(VMClassLoader, getBootClassPathResource, "(Ljava/lang/String;I)Ljava/lang/String;"),
+  NATIVE_METHOD(VMClassLoader, getBootClassPathSize, "()I"),
+  NATIVE_METHOD(VMClassLoader, loadClass, "(Ljava/lang/String;Z)Ljava/lang/Class;"),
+};
+
+}  // namespace
+
+void register_java_lang_VMClassLoader(JNIEnv* env) {
+  jniRegisterNativeMethods(env, "java/lang/VMClassLoader", gMethods, NELEM(gMethods));
+}
+
+}  // namespace art
diff --git a/src/runtime.cc b/src/runtime.cc
index 5dc057e..0990851 100644
--- a/src/runtime.cc
+++ b/src/runtime.cc
@@ -463,7 +463,7 @@
   REGISTER(register_java_lang_System);
   REGISTER(register_java_lang_Thread);
   REGISTER(register_java_lang_Throwable);
-  //REGISTER(register_java_lang_VMClassLoader);
+  REGISTER(register_java_lang_VMClassLoader);
   //REGISTER(register_java_lang_reflect_AccessibleObject);
   REGISTER(register_java_lang_reflect_Array);
   //REGISTER(register_java_lang_reflect_Constructor);
diff --git a/src/utils.cc b/src/utils.cc
index 08ab705..33a07ad 100644
--- a/src/utils.cc
+++ b/src/utils.cc
@@ -218,6 +218,192 @@
   return long_name;
 }
 
+namespace {
+
+// Helper for IsValidMemberNameUtf8(), a bit vector indicating valid low ascii.
+uint32_t DEX_MEMBER_VALID_LOW_ASCII[4] = {
+  0x00000000, // 00..1f low control characters; nothing valid
+  0x03ff2010, // 20..3f digits and symbols; valid: '0'..'9', '$', '-'
+  0x87fffffe, // 40..5f uppercase etc.; valid: 'A'..'Z', '_'
+  0x07fffffe  // 60..7f lowercase etc.; valid: 'a'..'z'
+};
+
+// Helper for IsValidMemberNameUtf8(); do not call directly.
+bool IsValidMemberNameUtf8Slow(const char** pUtf8Ptr) {
+  /*
+   * It's a multibyte encoded character. Decode it and analyze. We
+   * accept anything that isn't (a) an improperly encoded low value,
+   * (b) an improper surrogate pair, (c) an encoded '\0', (d) a high
+   * control character, or (e) a high space, layout, or special
+   * character (U+00a0, U+2000..U+200f, U+2028..U+202f,
+   * U+fff0..U+ffff). This is all specified in the dex format
+   * document.
+   */
+
+  uint16_t utf16 = GetUtf16FromUtf8(pUtf8Ptr);
+
+  // Perform follow-up tests based on the high 8 bits.
+  switch (utf16 >> 8) {
+  case 0x00:
+    // It's only valid if it's above the ISO-8859-1 high space (0xa0).
+    return (utf16 > 0x00a0);
+  case 0xd8:
+  case 0xd9:
+  case 0xda:
+  case 0xdb:
+    // It's a leading surrogate. Check to see that a trailing
+    // surrogate follows.
+    utf16 = GetUtf16FromUtf8(pUtf8Ptr);
+    return (utf16 >= 0xdc00) && (utf16 <= 0xdfff);
+  case 0xdc:
+  case 0xdd:
+  case 0xde:
+  case 0xdf:
+    // It's a trailing surrogate, which is not valid at this point.
+    return false;
+  case 0x20:
+  case 0xff:
+    // It's in the range that has spaces, controls, and specials.
+    switch (utf16 & 0xfff8) {
+    case 0x2000:
+    case 0x2008:
+    case 0x2028:
+    case 0xfff0:
+    case 0xfff8:
+      return false;
+    }
+    break;
+  }
+  return true;
+}
+
+/* Return whether the pointed-at modified-UTF-8 encoded character is
+ * valid as part of a member name, updating the pointer to point past
+ * the consumed character. This will consume two encoded UTF-16 code
+ * points if the character is encoded as a surrogate pair. Also, if
+ * this function returns false, then the given pointer may only have
+ * been partially advanced.
+ */
+bool IsValidMemberNameUtf8(const char** pUtf8Ptr) {
+  uint8_t c = (uint8_t) **pUtf8Ptr;
+  if (c <= 0x7f) {
+    // It's low-ascii, so check the table.
+    uint32_t wordIdx = c >> 5;
+    uint32_t bitIdx = c & 0x1f;
+    (*pUtf8Ptr)++;
+    return (DEX_MEMBER_VALID_LOW_ASCII[wordIdx] & (1 << bitIdx)) != 0;
+  }
+
+  // It's a multibyte encoded character. Call a non-inline function
+  // for the heavy lifting.
+  return IsValidMemberNameUtf8Slow(pUtf8Ptr);
+}
+
+}  // namespace
+
+bool IsValidClassName(const char* s, bool isClassName, bool dot_or_slash) {
+  char separator = (dot_or_slash ? '.' : '/');
+
+  int arrayCount = 0;
+  while (*s == '[') {
+    arrayCount++;
+    s++;
+  }
+
+  if (arrayCount > 255) {
+    // Arrays may have no more than 255 dimensions.
+    return false;
+  }
+
+  if (arrayCount != 0) {
+    /*
+     * If we're looking at an array of some sort, then it doesn't
+     * matter if what is being asked for is a class name; the
+     * format looks the same as a type descriptor in that case, so
+     * treat it as such.
+     */
+    isClassName = false;
+  }
+
+  if (!isClassName) {
+    /*
+     * We are looking for a descriptor. Either validate it as a
+     * single-character primitive type, or continue on to check the
+     * embedded class name (bracketed by "L" and ";").
+     */
+    switch (*(s++)) {
+    case 'B':
+    case 'C':
+    case 'D':
+    case 'F':
+    case 'I':
+    case 'J':
+    case 'S':
+    case 'Z':
+      // These are all single-character descriptors for primitive types.
+      return (*s == '\0');
+    case 'V':
+      // Non-array void is valid, but you can't have an array of void.
+      return (arrayCount == 0) && (*s == '\0');
+    case 'L':
+      // Class name: Break out and continue below.
+      break;
+    default:
+      // Oddball descriptor character.
+      return false;
+    }
+  }
+
+  /*
+   * We just consumed the 'L' that introduces a class name as part
+   * of a type descriptor, or we are looking for an unadorned class
+   * name.
+   */
+
+  bool sepOrFirst = true; // first character or just encountered a separator.
+  for (;;) {
+    uint8_t c = (uint8_t) *s;
+    switch (c) {
+    case '\0':
+      /*
+       * Premature end for a type descriptor, but valid for
+       * a class name as long as we haven't encountered an
+       * empty component (including the degenerate case of
+       * the empty string "").
+       */
+      return isClassName && !sepOrFirst;
+    case ';':
+      /*
+       * Invalid character for a class name, but the
+       * legitimate end of a type descriptor. In the latter
+       * case, make sure that this is the end of the string
+       * and that it doesn't end with an empty component
+       * (including the degenerate case of "L;").
+       */
+      return !isClassName && !sepOrFirst && (s[1] == '\0');
+    case '/':
+    case '.':
+      if (c != separator) {
+        // The wrong separator character.
+        return false;
+      }
+      if (sepOrFirst) {
+        // Separator at start or two separators in a row.
+        return false;
+      }
+      sepOrFirst = true;
+      s++;
+      break;
+    default:
+      if (!IsValidMemberNameUtf8(&s)) {
+        return false;
+      }
+      sepOrFirst = false;
+      break;
+    }
+  }
+}
+
 void Split(const std::string& s, char delim, std::vector<std::string>& result) {
   const char* p = s.data();
   const char* end = p + s.size();
diff --git a/src/utils.h b/src/utils.h
index fc58617..e582c7d 100644
--- a/src/utils.h
+++ b/src/utils.h
@@ -172,6 +172,15 @@
 // of the JNI spec.
 std::string MangleForJni(const std::string& s);
 
+// Tests whether 's' is a valid class name.
+// name_or_descriptor
+//     true  => "java/lang/String"
+//     false => "Ljava/lang/String;" (i.e. "descriptor")
+// dot_or_slash
+//     true  => "java.lang.String"
+//     false => "java/lang/String" (i.e. "dot or slash")
+bool IsValidClassName(const char* s, bool name_or_descriptor, bool dot_or_slash);
+
 // Returns the JNI native function name for the non-overloaded method 'm'.
 std::string JniShortName(const Method* m);
 // Returns the JNI native function name for the overloaded method 'm'.