AAPT2: Accept Java unicode identifiers

Test: make aapt2_tests
Change-Id: I75a0e52d43b1785001bfe120eea7484f7bb4682b
diff --git a/tools/aapt2/Android.bp b/tools/aapt2/Android.bp
index 46c0ff0..14d05fd 100644
--- a/tools/aapt2/Android.bp
+++ b/tools/aapt2/Android.bp
@@ -111,11 +111,12 @@
         "proto/TableProtoDeserializer.cpp",
         "proto/TableProtoSerializer.cpp",
         "split/TableSplitter.cpp",
+        "text/Unicode.cpp",
+        "text/Utf8Iterator.cpp",
         "unflatten/BinaryResourceParser.cpp",
         "unflatten/ResChunkPullParser.cpp",
         "util/BigBuffer.cpp",
         "util/Files.cpp",
-        "util/Utf8Iterator.cpp",
         "util/Util.cpp",
         "ConfigDescription.cpp",
         "Debug.cpp",
diff --git a/tools/aapt2/ResourceTable.cpp b/tools/aapt2/ResourceTable.cpp
index 168004f..ab59560 100644
--- a/tools/aapt2/ResourceTable.cpp
+++ b/tools/aapt2/ResourceTable.cpp
@@ -15,20 +15,24 @@
  */
 
 #include "ResourceTable.h"
-#include "ConfigDescription.h"
-#include "NameMangler.h"
-#include "ResourceValues.h"
-#include "ValueVisitor.h"
-#include "util/Util.h"
 
-#include <android-base/logging.h>
-#include <androidfw/ResourceTypes.h>
 #include <algorithm>
 #include <memory>
 #include <string>
 #include <tuple>
 
-using android::StringPiece;
+#include "android-base/logging.h"
+#include "androidfw/ResourceTypes.h"
+
+#include "ConfigDescription.h"
+#include "NameMangler.h"
+#include "ResourceValues.h"
+#include "ValueVisitor.h"
+#include "text/Unicode.h"
+#include "util/Util.h"
+
+using ::aapt::text::IsValidResourceEntryName;
+using ::android::StringPiece;
 
 namespace aapt {
 
@@ -283,12 +287,9 @@
   return CollisionResult::kConflict;
 }
 
-static constexpr const char* kValidNameChars = "._-";
-
 static StringPiece ValidateName(const StringPiece& name) {
-  auto iter = util::FindNonAlphaNumericAndNotInSet(name, kValidNameChars);
-  if (iter != name.end()) {
-    return StringPiece(iter, 1);
+  if (!IsValidResourceEntryName(name)) {
+    return name;
   }
   return {};
 }
diff --git a/tools/aapt2/text/Unicode.cpp b/tools/aapt2/text/Unicode.cpp
new file mode 100644
index 0000000..38ec9c4
--- /dev/null
+++ b/tools/aapt2/text/Unicode.cpp
@@ -0,0 +1,114 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "text/Unicode.h"
+
+#include <algorithm>
+#include <array>
+
+#include "text/Utf8Iterator.h"
+
+using ::android::StringPiece;
+
+namespace aapt {
+namespace text {
+
+namespace {
+
+struct CharacterProperties {
+  enum : uint32_t {
+    kXidStart = 1 << 0,
+    kXidContinue = 1 << 1,
+  };
+
+  char32_t first_char;
+  char32_t last_char;
+  uint32_t properties;
+};
+
+// Incude the generated data table.
+#include "text/Unicode_data.cpp"
+
+bool CompareCharacterProperties(const CharacterProperties& a, char32_t codepoint) {
+  return a.last_char < codepoint;
+}
+
+uint32_t FindCharacterProperties(char32_t codepoint) {
+  const auto iter_end = sCharacterProperties.end();
+  const auto iter = std::lower_bound(sCharacterProperties.begin(), iter_end, codepoint,
+                                     CompareCharacterProperties);
+  if (iter != iter_end && codepoint >= iter->first_char) {
+    return iter->properties;
+  }
+  return 0u;
+}
+
+}  // namespace
+
+bool IsXidStart(char32_t codepoint) {
+  return FindCharacterProperties(codepoint) & CharacterProperties::kXidStart;
+}
+
+bool IsXidContinue(char32_t codepoint) {
+  return FindCharacterProperties(codepoint) & CharacterProperties::kXidContinue;
+}
+
+bool IsJavaIdentifier(const StringPiece& str) {
+  Utf8Iterator iter(str);
+
+  // Check the first character.
+  if (!iter.HasNext()) {
+    return false;
+  }
+
+  if (!IsXidStart(iter.Next())) {
+    return false;
+  }
+
+  while (iter.HasNext()) {
+    const char32_t codepoint = iter.Next();
+    if (!IsXidContinue(codepoint) && codepoint != U'$') {
+      return false;
+    }
+  }
+  return true;
+}
+
+bool IsValidResourceEntryName(const StringPiece& str) {
+  Utf8Iterator iter(str);
+
+  // Check the first character.
+  if (!iter.HasNext()) {
+    return false;
+  }
+
+  // Resources are allowed to start with '_'
+  const char32_t first_codepoint = iter.Next();
+  if (!IsXidStart(first_codepoint) && first_codepoint != U'_') {
+    return false;
+  }
+
+  while (iter.HasNext()) {
+    const char32_t codepoint = iter.Next();
+    if (!IsXidContinue(codepoint) && codepoint != U'.' && codepoint != U'-') {
+      return false;
+    }
+  }
+  return true;
+}
+
+}  // namespace text
+}  // namespace aapt
diff --git a/tools/aapt2/text/Unicode.h b/tools/aapt2/text/Unicode.h
new file mode 100644
index 0000000..2707187
--- /dev/null
+++ b/tools/aapt2/text/Unicode.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef AAPT_TEXT_UNICODE_H
+#define AAPT_TEXT_UNICODE_H
+
+#include "androidfw/StringPiece.h"
+
+namespace aapt {
+namespace text {
+
+// Returns true if the Unicode codepoint has the XID_Start property, meaning it can be used as the
+// first character of a programming language identifier.
+// http://unicode.org/reports/tr31/#Default_Identifier_Syntax
+//
+// XID_Start is a Unicode Derived Core Property. It is a variation of the ID_Start
+// Derived Core Property, accounting for a few characters that, when normalized, yield valid
+// characters in the ID_Start set.
+bool IsXidStart(char32_t codepoint);
+
+// Returns true if the Unicode codepoint has the XID_Continue property, meaning it can be used in
+// any position of a programming language identifier, except the first.
+// http://unicode.org/reports/tr31/#Default_Identifier_Syntax
+//
+// XID_Continue is a Unicode Derived Core Property. It is a variation of the ID_Continue
+// Derived Core Property, accounting for a few characters that, when normalized, yield valid
+// characters in the ID_Continue set.
+bool IsXidContinue(char32_t codepoint);
+
+// Returns true if the UTF8 string can be used as a Java identifier.
+// NOTE: This does not check against the set of reserved Java keywords.
+bool IsJavaIdentifier(const android::StringPiece& str);
+
+// Returns true if the UTF8 string can be used as the entry name of a resource name.
+// This is the `entry` part of package:type/entry.
+bool IsValidResourceEntryName(const android::StringPiece& str);
+
+}  // namespace text
+}  // namespace aapt
+
+#endif  // AAPT_TEXT_UNICODE_H
diff --git a/tools/aapt2/text/Unicode_data.cpp b/tools/aapt2/text/Unicode_data.cpp
new file mode 100644
index 0000000..96dc57b
--- /dev/null
+++ b/tools/aapt2/text/Unicode_data.cpp
@@ -0,0 +1,629 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+const static std::array<CharacterProperties, 611> sCharacterProperties = {{
+    {0x0030, 0x0039, CharacterProperties::kXidContinue},
+    {0x0041, 0x005a, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x005f, 0x005f, CharacterProperties::kXidContinue},
+    {0x0061, 0x007a, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x00aa, 0x00aa, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x00b5, 0x00b5, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x00b7, 0x00b7, CharacterProperties::kXidContinue},
+    {0x00ba, 0x00ba, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x00c0, 0x00d6, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x00d8, 0x00f6, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x00f8, 0x02c1, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x02c6, 0x02d1, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x02e0, 0x02e4, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x02ec, 0x02ec, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x02ee, 0x02ee, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0300, 0x036f, CharacterProperties::kXidContinue},
+    {0x0370, 0x0374, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0376, 0x0377, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x037b, 0x037d, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x037f, 0x037f, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0386, 0x0386, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0387, 0x0387, CharacterProperties::kXidContinue},
+    {0x0388, 0x038a, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x038c, 0x038c, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x038e, 0x03a1, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x03a3, 0x03f5, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x03f7, 0x0481, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0483, 0x0487, CharacterProperties::kXidContinue},
+    {0x048a, 0x052f, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0531, 0x0556, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0559, 0x0559, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0561, 0x0587, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0591, 0x05bd, CharacterProperties::kXidContinue},
+    {0x05bf, 0x05bf, CharacterProperties::kXidContinue},
+    {0x05c1, 0x05c2, CharacterProperties::kXidContinue},
+    {0x05c4, 0x05c5, CharacterProperties::kXidContinue},
+    {0x05c7, 0x05c7, CharacterProperties::kXidContinue},
+    {0x05d0, 0x05ea, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x05f0, 0x05f2, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0610, 0x061a, CharacterProperties::kXidContinue},
+    {0x0620, 0x064a, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x064b, 0x0669, CharacterProperties::kXidContinue},
+    {0x066e, 0x066f, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0670, 0x0670, CharacterProperties::kXidContinue},
+    {0x0671, 0x06d3, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x06d5, 0x06d5, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x06d6, 0x06dc, CharacterProperties::kXidContinue},
+    {0x06df, 0x06e4, CharacterProperties::kXidContinue},
+    {0x06e5, 0x06e6, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x06e7, 0x06e8, CharacterProperties::kXidContinue},
+    {0x06ea, 0x06ed, CharacterProperties::kXidContinue},
+    {0x06ee, 0x06ef, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x06f0, 0x06f9, CharacterProperties::kXidContinue},
+    {0x06fa, 0x06fc, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x06ff, 0x06ff, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0710, 0x0710, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0711, 0x0711, CharacterProperties::kXidContinue},
+    {0x0712, 0x072f, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0730, 0x074a, CharacterProperties::kXidContinue},
+    {0x074d, 0x07a5, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x07a6, 0x07b0, CharacterProperties::kXidContinue},
+    {0x07b1, 0x07b1, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x07c0, 0x07c9, CharacterProperties::kXidContinue},
+    {0x07ca, 0x07ea, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x07eb, 0x07f3, CharacterProperties::kXidContinue},
+    {0x07f4, 0x07f5, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x07fa, 0x07fa, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0800, 0x0815, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0816, 0x0819, CharacterProperties::kXidContinue},
+    {0x081a, 0x081a, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x081b, 0x0823, CharacterProperties::kXidContinue},
+    {0x0824, 0x0824, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0825, 0x0827, CharacterProperties::kXidContinue},
+    {0x0828, 0x0828, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0829, 0x082d, CharacterProperties::kXidContinue},
+    {0x0840, 0x0858, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0859, 0x085b, CharacterProperties::kXidContinue},
+    {0x08a0, 0x08b4, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x08b6, 0x08bd, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x08d4, 0x08e1, CharacterProperties::kXidContinue},
+    {0x08e3, 0x0903, CharacterProperties::kXidContinue},
+    {0x0904, 0x0939, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x093a, 0x093c, CharacterProperties::kXidContinue},
+    {0x093d, 0x093d, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x093e, 0x094f, CharacterProperties::kXidContinue},
+    {0x0950, 0x0950, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0951, 0x0957, CharacterProperties::kXidContinue},
+    {0x0958, 0x0961, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0962, 0x0963, CharacterProperties::kXidContinue},
+    {0x0966, 0x096f, CharacterProperties::kXidContinue},
+    {0x0971, 0x0980, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0981, 0x0983, CharacterProperties::kXidContinue},
+    {0x0985, 0x098c, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x098f, 0x0990, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0993, 0x09a8, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x09aa, 0x09b0, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x09b2, 0x09b2, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x09b6, 0x09b9, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x09bc, 0x09bc, CharacterProperties::kXidContinue},
+    {0x09bd, 0x09bd, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x09be, 0x09c4, CharacterProperties::kXidContinue},
+    {0x09c7, 0x09c8, CharacterProperties::kXidContinue},
+    {0x09cb, 0x09cd, CharacterProperties::kXidContinue},
+    {0x09ce, 0x09ce, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x09d7, 0x09d7, CharacterProperties::kXidContinue},
+    {0x09dc, 0x09dd, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x09df, 0x09e1, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x09e2, 0x09e3, CharacterProperties::kXidContinue},
+    {0x09e6, 0x09ef, CharacterProperties::kXidContinue},
+    {0x09f0, 0x09f1, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0a01, 0x0a03, CharacterProperties::kXidContinue},
+    {0x0a05, 0x0a0a, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0a0f, 0x0a10, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0a13, 0x0a28, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0a2a, 0x0a30, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0a32, 0x0a33, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0a35, 0x0a36, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0a38, 0x0a39, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0a3c, 0x0a3c, CharacterProperties::kXidContinue},
+    {0x0a3e, 0x0a42, CharacterProperties::kXidContinue},
+    {0x0a47, 0x0a48, CharacterProperties::kXidContinue},
+    {0x0a4b, 0x0a4d, CharacterProperties::kXidContinue},
+    {0x0a51, 0x0a51, CharacterProperties::kXidContinue},
+    {0x0a59, 0x0a5c, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0a5e, 0x0a5e, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0a66, 0x0a71, CharacterProperties::kXidContinue},
+    {0x0a72, 0x0a74, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0a75, 0x0a75, CharacterProperties::kXidContinue},
+    {0x0a81, 0x0a83, CharacterProperties::kXidContinue},
+    {0x0a85, 0x0a8d, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0a8f, 0x0a91, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0a93, 0x0aa8, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0aaa, 0x0ab0, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0ab2, 0x0ab3, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0ab5, 0x0ab9, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0abc, 0x0abc, CharacterProperties::kXidContinue},
+    {0x0abd, 0x0abd, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0abe, 0x0ac5, CharacterProperties::kXidContinue},
+    {0x0ac7, 0x0ac9, CharacterProperties::kXidContinue},
+    {0x0acb, 0x0acd, CharacterProperties::kXidContinue},
+    {0x0ad0, 0x0ad0, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0ae0, 0x0ae1, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0ae2, 0x0ae3, CharacterProperties::kXidContinue},
+    {0x0ae6, 0x0aef, CharacterProperties::kXidContinue},
+    {0x0af9, 0x0af9, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0b01, 0x0b03, CharacterProperties::kXidContinue},
+    {0x0b05, 0x0b0c, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0b0f, 0x0b10, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0b13, 0x0b28, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0b2a, 0x0b30, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0b32, 0x0b33, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0b35, 0x0b39, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0b3c, 0x0b3c, CharacterProperties::kXidContinue},
+    {0x0b3d, 0x0b3d, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0b3e, 0x0b44, CharacterProperties::kXidContinue},
+    {0x0b47, 0x0b48, CharacterProperties::kXidContinue},
+    {0x0b4b, 0x0b4d, CharacterProperties::kXidContinue},
+    {0x0b56, 0x0b57, CharacterProperties::kXidContinue},
+    {0x0b5c, 0x0b5d, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0b5f, 0x0b61, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0b62, 0x0b63, CharacterProperties::kXidContinue},
+    {0x0b66, 0x0b6f, CharacterProperties::kXidContinue},
+    {0x0b71, 0x0b71, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0b82, 0x0b82, CharacterProperties::kXidContinue},
+    {0x0b83, 0x0b83, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0b85, 0x0b8a, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0b8e, 0x0b90, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0b92, 0x0b95, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0b99, 0x0b9a, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0b9c, 0x0b9c, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0b9e, 0x0b9f, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0ba3, 0x0ba4, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0ba8, 0x0baa, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0bae, 0x0bb9, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0bbe, 0x0bc2, CharacterProperties::kXidContinue},
+    {0x0bc6, 0x0bc8, CharacterProperties::kXidContinue},
+    {0x0bca, 0x0bcd, CharacterProperties::kXidContinue},
+    {0x0bd0, 0x0bd0, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0bd7, 0x0bd7, CharacterProperties::kXidContinue},
+    {0x0be6, 0x0bef, CharacterProperties::kXidContinue},
+    {0x0c00, 0x0c03, CharacterProperties::kXidContinue},
+    {0x0c05, 0x0c0c, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0c0e, 0x0c10, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0c12, 0x0c28, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0c2a, 0x0c39, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0c3d, 0x0c3d, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0c3e, 0x0c44, CharacterProperties::kXidContinue},
+    {0x0c46, 0x0c48, CharacterProperties::kXidContinue},
+    {0x0c4a, 0x0c4d, CharacterProperties::kXidContinue},
+    {0x0c55, 0x0c56, CharacterProperties::kXidContinue},
+    {0x0c58, 0x0c5a, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0c60, 0x0c61, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0c62, 0x0c63, CharacterProperties::kXidContinue},
+    {0x0c66, 0x0c6f, CharacterProperties::kXidContinue},
+    {0x0c80, 0x0c80, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0c81, 0x0c83, CharacterProperties::kXidContinue},
+    {0x0c85, 0x0c8c, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0c8e, 0x0c90, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0c92, 0x0ca8, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0caa, 0x0cb3, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0cb5, 0x0cb9, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0cbc, 0x0cbc, CharacterProperties::kXidContinue},
+    {0x0cbd, 0x0cbd, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0cbe, 0x0cc4, CharacterProperties::kXidContinue},
+    {0x0cc6, 0x0cc8, CharacterProperties::kXidContinue},
+    {0x0cca, 0x0ccd, CharacterProperties::kXidContinue},
+    {0x0cd5, 0x0cd6, CharacterProperties::kXidContinue},
+    {0x0cde, 0x0cde, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0ce0, 0x0ce1, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0ce2, 0x0ce3, CharacterProperties::kXidContinue},
+    {0x0ce6, 0x0cef, CharacterProperties::kXidContinue},
+    {0x0cf1, 0x0cf2, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0d01, 0x0d03, CharacterProperties::kXidContinue},
+    {0x0d05, 0x0d0c, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0d0e, 0x0d10, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0d12, 0x0d3a, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0d3d, 0x0d3d, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0d3e, 0x0d44, CharacterProperties::kXidContinue},
+    {0x0d46, 0x0d48, CharacterProperties::kXidContinue},
+    {0x0d4a, 0x0d4d, CharacterProperties::kXidContinue},
+    {0x0d4e, 0x0d4e, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0d54, 0x0d56, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0d57, 0x0d57, CharacterProperties::kXidContinue},
+    {0x0d5f, 0x0d61, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0d62, 0x0d63, CharacterProperties::kXidContinue},
+    {0x0d66, 0x0d6f, CharacterProperties::kXidContinue},
+    {0x0d7a, 0x0d7f, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0d82, 0x0d83, CharacterProperties::kXidContinue},
+    {0x0d85, 0x0d96, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0d9a, 0x0db1, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0db3, 0x0dbb, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0dbd, 0x0dbd, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0dc0, 0x0dc6, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0dca, 0x0dca, CharacterProperties::kXidContinue},
+    {0x0dcf, 0x0dd4, CharacterProperties::kXidContinue},
+    {0x0dd6, 0x0dd6, CharacterProperties::kXidContinue},
+    {0x0dd8, 0x0ddf, CharacterProperties::kXidContinue},
+    {0x0de6, 0x0def, CharacterProperties::kXidContinue},
+    {0x0df2, 0x0df3, CharacterProperties::kXidContinue},
+    {0x0e01, 0x0e30, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0e31, 0x0e31, CharacterProperties::kXidContinue},
+    {0x0e32, 0x0e32, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0e33, 0x0e3a, CharacterProperties::kXidContinue},
+    {0x0e40, 0x0e46, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0e47, 0x0e4e, CharacterProperties::kXidContinue},
+    {0x0e50, 0x0e59, CharacterProperties::kXidContinue},
+    {0x0e81, 0x0e82, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0e84, 0x0e84, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0e87, 0x0e88, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0e8a, 0x0e8a, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0e8d, 0x0e8d, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0e94, 0x0e97, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0e99, 0x0e9f, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0ea1, 0x0ea3, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0ea5, 0x0ea5, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0ea7, 0x0ea7, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0eaa, 0x0eab, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0ead, 0x0eb0, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0eb1, 0x0eb1, CharacterProperties::kXidContinue},
+    {0x0eb2, 0x0eb2, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0eb3, 0x0eb9, CharacterProperties::kXidContinue},
+    {0x0ebb, 0x0ebc, CharacterProperties::kXidContinue},
+    {0x0ebd, 0x0ebd, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0ec0, 0x0ec4, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0ec6, 0x0ec6, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0ec8, 0x0ecd, CharacterProperties::kXidContinue},
+    {0x0ed0, 0x0ed9, CharacterProperties::kXidContinue},
+    {0x0edc, 0x0edf, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0f00, 0x0f00, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0f18, 0x0f19, CharacterProperties::kXidContinue},
+    {0x0f20, 0x0f29, CharacterProperties::kXidContinue},
+    {0x0f35, 0x0f35, CharacterProperties::kXidContinue},
+    {0x0f37, 0x0f37, CharacterProperties::kXidContinue},
+    {0x0f39, 0x0f39, CharacterProperties::kXidContinue},
+    {0x0f3e, 0x0f3f, CharacterProperties::kXidContinue},
+    {0x0f40, 0x0f47, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0f49, 0x0f6c, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0f71, 0x0f84, CharacterProperties::kXidContinue},
+    {0x0f86, 0x0f87, CharacterProperties::kXidContinue},
+    {0x0f88, 0x0f8c, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x0f8d, 0x0f97, CharacterProperties::kXidContinue},
+    {0x0f99, 0x0fbc, CharacterProperties::kXidContinue},
+    {0x0fc6, 0x0fc6, CharacterProperties::kXidContinue},
+    {0x1000, 0x102a, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x102b, 0x103e, CharacterProperties::kXidContinue},
+    {0x103f, 0x103f, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x1040, 0x1049, CharacterProperties::kXidContinue},
+    {0x1050, 0x1055, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x1056, 0x1059, CharacterProperties::kXidContinue},
+    {0x105a, 0x105d, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x105e, 0x1060, CharacterProperties::kXidContinue},
+    {0x1061, 0x1061, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x1062, 0x1064, CharacterProperties::kXidContinue},
+    {0x1065, 0x1066, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x1067, 0x106d, CharacterProperties::kXidContinue},
+    {0x106e, 0x1070, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x1071, 0x1074, CharacterProperties::kXidContinue},
+    {0x1075, 0x1081, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x1082, 0x108d, CharacterProperties::kXidContinue},
+    {0x108e, 0x108e, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x108f, 0x109d, CharacterProperties::kXidContinue},
+    {0x10a0, 0x10c5, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x10c7, 0x10c7, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x10cd, 0x10cd, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x10d0, 0x10fa, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x10fc, 0x1248, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x124a, 0x124d, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x1250, 0x1256, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x1258, 0x1258, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x125a, 0x125d, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x1260, 0x1288, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x128a, 0x128d, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x1290, 0x12b0, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x12b2, 0x12b5, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x12b8, 0x12be, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x12c0, 0x12c0, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x12c2, 0x12c5, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x12c8, 0x12d6, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x12d8, 0x1310, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x1312, 0x1315, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x1318, 0x135a, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x135d, 0x135f, CharacterProperties::kXidContinue},
+    {0x1369, 0x1371, CharacterProperties::kXidContinue},
+    {0x1380, 0x138f, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x13a0, 0x13f5, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x13f8, 0x13fd, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x1401, 0x166c, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x166f, 0x167f, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x1681, 0x169a, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x16a0, 0x16ea, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x16ee, 0x16f8, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x1700, 0x170c, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x170e, 0x1711, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x1712, 0x1714, CharacterProperties::kXidContinue},
+    {0x1720, 0x1731, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x1732, 0x1734, CharacterProperties::kXidContinue},
+    {0x1740, 0x1751, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x1752, 0x1753, CharacterProperties::kXidContinue},
+    {0x1760, 0x176c, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x176e, 0x1770, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x1772, 0x1773, CharacterProperties::kXidContinue},
+    {0x1780, 0x17b3, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x17b4, 0x17d3, CharacterProperties::kXidContinue},
+    {0x17d7, 0x17d7, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x17dc, 0x17dc, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x17dd, 0x17dd, CharacterProperties::kXidContinue},
+    {0x17e0, 0x17e9, CharacterProperties::kXidContinue},
+    {0x180b, 0x180d, CharacterProperties::kXidContinue},
+    {0x1810, 0x1819, CharacterProperties::kXidContinue},
+    {0x1820, 0x1877, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x1880, 0x18a8, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x18a9, 0x18a9, CharacterProperties::kXidContinue},
+    {0x18aa, 0x18aa, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x18b0, 0x18f5, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x1900, 0x191e, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x1920, 0x192b, CharacterProperties::kXidContinue},
+    {0x1930, 0x193b, CharacterProperties::kXidContinue},
+    {0x1946, 0x194f, CharacterProperties::kXidContinue},
+    {0x1950, 0x196d, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x1970, 0x1974, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x1980, 0x19ab, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x19b0, 0x19c9, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x19d0, 0x19da, CharacterProperties::kXidContinue},
+    {0x1a00, 0x1a16, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x1a17, 0x1a1b, CharacterProperties::kXidContinue},
+    {0x1a20, 0x1a54, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x1a55, 0x1a5e, CharacterProperties::kXidContinue},
+    {0x1a60, 0x1a7c, CharacterProperties::kXidContinue},
+    {0x1a7f, 0x1a89, CharacterProperties::kXidContinue},
+    {0x1a90, 0x1a99, CharacterProperties::kXidContinue},
+    {0x1aa7, 0x1aa7, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x1ab0, 0x1abd, CharacterProperties::kXidContinue},
+    {0x1b00, 0x1b04, CharacterProperties::kXidContinue},
+    {0x1b05, 0x1b33, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x1b34, 0x1b44, CharacterProperties::kXidContinue},
+    {0x1b45, 0x1b4b, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x1b50, 0x1b59, CharacterProperties::kXidContinue},
+    {0x1b6b, 0x1b73, CharacterProperties::kXidContinue},
+    {0x1b80, 0x1b82, CharacterProperties::kXidContinue},
+    {0x1b83, 0x1ba0, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x1ba1, 0x1bad, CharacterProperties::kXidContinue},
+    {0x1bae, 0x1baf, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x1bb0, 0x1bb9, CharacterProperties::kXidContinue},
+    {0x1bba, 0x1be5, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x1be6, 0x1bf3, CharacterProperties::kXidContinue},
+    {0x1c00, 0x1c23, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x1c24, 0x1c37, CharacterProperties::kXidContinue},
+    {0x1c40, 0x1c49, CharacterProperties::kXidContinue},
+    {0x1c4d, 0x1c4f, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x1c50, 0x1c59, CharacterProperties::kXidContinue},
+    {0x1c5a, 0x1c7d, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x1c80, 0x1c88, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x1cd0, 0x1cd2, CharacterProperties::kXidContinue},
+    {0x1cd4, 0x1ce8, CharacterProperties::kXidContinue},
+    {0x1ce9, 0x1cec, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x1ced, 0x1ced, CharacterProperties::kXidContinue},
+    {0x1cee, 0x1cf1, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x1cf2, 0x1cf4, CharacterProperties::kXidContinue},
+    {0x1cf5, 0x1cf6, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x1cf8, 0x1cf9, CharacterProperties::kXidContinue},
+    {0x1d00, 0x1dbf, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x1dc0, 0x1df5, CharacterProperties::kXidContinue},
+    {0x1dfb, 0x1dff, CharacterProperties::kXidContinue},
+    {0x1e00, 0x1f15, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x1f18, 0x1f1d, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x1f20, 0x1f45, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x1f48, 0x1f4d, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x1f50, 0x1f57, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x1f59, 0x1f59, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x1f5b, 0x1f5b, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x1f5d, 0x1f5d, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x1f5f, 0x1f7d, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x1f80, 0x1fb4, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x1fb6, 0x1fbc, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x1fbe, 0x1fbe, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x1fc2, 0x1fc4, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x1fc6, 0x1fcc, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x1fd0, 0x1fd3, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x1fd6, 0x1fdb, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x1fe0, 0x1fec, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x1ff2, 0x1ff4, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x1ff6, 0x1ffc, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x203f, 0x2040, CharacterProperties::kXidContinue},
+    {0x2054, 0x2054, CharacterProperties::kXidContinue},
+    {0x2071, 0x2071, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x207f, 0x207f, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x2090, 0x209c, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x20d0, 0x20dc, CharacterProperties::kXidContinue},
+    {0x20e1, 0x20e1, CharacterProperties::kXidContinue},
+    {0x20e5, 0x20f0, CharacterProperties::kXidContinue},
+    {0x2102, 0x2102, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x2107, 0x2107, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x210a, 0x2113, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x2115, 0x2115, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x2118, 0x211d, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x2124, 0x2124, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x2126, 0x2126, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x2128, 0x2128, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x212a, 0x2139, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x213c, 0x213f, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x2145, 0x2149, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x214e, 0x214e, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x2160, 0x2188, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x2c00, 0x2c2e, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x2c30, 0x2c5e, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x2c60, 0x2ce4, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x2ceb, 0x2cee, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x2cef, 0x2cf1, CharacterProperties::kXidContinue},
+    {0x2cf2, 0x2cf3, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x2d00, 0x2d25, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x2d27, 0x2d27, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x2d2d, 0x2d2d, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x2d30, 0x2d67, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x2d6f, 0x2d6f, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x2d7f, 0x2d7f, CharacterProperties::kXidContinue},
+    {0x2d80, 0x2d96, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x2da0, 0x2da6, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x2da8, 0x2dae, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x2db0, 0x2db6, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x2db8, 0x2dbe, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x2dc0, 0x2dc6, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x2dc8, 0x2dce, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x2dd0, 0x2dd6, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x2dd8, 0x2dde, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x2de0, 0x2dff, CharacterProperties::kXidContinue},
+    {0x3005, 0x3007, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x3021, 0x3029, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x302a, 0x302f, CharacterProperties::kXidContinue},
+    {0x3031, 0x3035, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x3038, 0x303c, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x3041, 0x3096, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x3099, 0x309a, CharacterProperties::kXidContinue},
+    {0x309d, 0x309f, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x30a1, 0x30fa, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x30fc, 0x30ff, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x3105, 0x312d, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x3131, 0x318e, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x31a0, 0x31ba, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x31f0, 0x31ff, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x3400, 0x4db5, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0x4e00, 0x9fd5, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0xa000, 0xa48c, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0xa4d0, 0xa4fd, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0xa500, 0xa60c, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0xa610, 0xa61f, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0xa620, 0xa629, CharacterProperties::kXidContinue},
+    {0xa62a, 0xa62b, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0xa640, 0xa66e, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0xa66f, 0xa66f, CharacterProperties::kXidContinue},
+    {0xa674, 0xa67d, CharacterProperties::kXidContinue},
+    {0xa67f, 0xa69d, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0xa69e, 0xa69f, CharacterProperties::kXidContinue},
+    {0xa6a0, 0xa6ef, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0xa6f0, 0xa6f1, CharacterProperties::kXidContinue},
+    {0xa717, 0xa71f, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0xa722, 0xa788, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0xa78b, 0xa7ae, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0xa7b0, 0xa7b7, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0xa7f7, 0xa801, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0xa802, 0xa802, CharacterProperties::kXidContinue},
+    {0xa803, 0xa805, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0xa806, 0xa806, CharacterProperties::kXidContinue},
+    {0xa807, 0xa80a, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0xa80b, 0xa80b, CharacterProperties::kXidContinue},
+    {0xa80c, 0xa822, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0xa823, 0xa827, CharacterProperties::kXidContinue},
+    {0xa840, 0xa873, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0xa880, 0xa881, CharacterProperties::kXidContinue},
+    {0xa882, 0xa8b3, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0xa8b4, 0xa8c5, CharacterProperties::kXidContinue},
+    {0xa8d0, 0xa8d9, CharacterProperties::kXidContinue},
+    {0xa8e0, 0xa8f1, CharacterProperties::kXidContinue},
+    {0xa8f2, 0xa8f7, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0xa8fb, 0xa8fb, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0xa8fd, 0xa8fd, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0xa900, 0xa909, CharacterProperties::kXidContinue},
+    {0xa90a, 0xa925, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0xa926, 0xa92d, CharacterProperties::kXidContinue},
+    {0xa930, 0xa946, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0xa947, 0xa953, CharacterProperties::kXidContinue},
+    {0xa960, 0xa97c, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0xa980, 0xa983, CharacterProperties::kXidContinue},
+    {0xa984, 0xa9b2, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0xa9b3, 0xa9c0, CharacterProperties::kXidContinue},
+    {0xa9cf, 0xa9cf, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0xa9d0, 0xa9d9, CharacterProperties::kXidContinue},
+    {0xa9e0, 0xa9e4, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0xa9e5, 0xa9e5, CharacterProperties::kXidContinue},
+    {0xa9e6, 0xa9ef, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0xa9f0, 0xa9f9, CharacterProperties::kXidContinue},
+    {0xa9fa, 0xa9fe, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0xaa00, 0xaa28, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0xaa29, 0xaa36, CharacterProperties::kXidContinue},
+    {0xaa40, 0xaa42, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0xaa43, 0xaa43, CharacterProperties::kXidContinue},
+    {0xaa44, 0xaa4b, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0xaa4c, 0xaa4d, CharacterProperties::kXidContinue},
+    {0xaa50, 0xaa59, CharacterProperties::kXidContinue},
+    {0xaa60, 0xaa76, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0xaa7a, 0xaa7a, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0xaa7b, 0xaa7d, CharacterProperties::kXidContinue},
+    {0xaa7e, 0xaaaf, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0xaab0, 0xaab0, CharacterProperties::kXidContinue},
+    {0xaab1, 0xaab1, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0xaab2, 0xaab4, CharacterProperties::kXidContinue},
+    {0xaab5, 0xaab6, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0xaab7, 0xaab8, CharacterProperties::kXidContinue},
+    {0xaab9, 0xaabd, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0xaabe, 0xaabf, CharacterProperties::kXidContinue},
+    {0xaac0, 0xaac0, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0xaac1, 0xaac1, CharacterProperties::kXidContinue},
+    {0xaac2, 0xaac2, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0xaadb, 0xaadd, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0xaae0, 0xaaea, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0xaaeb, 0xaaef, CharacterProperties::kXidContinue},
+    {0xaaf2, 0xaaf4, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0xaaf5, 0xaaf6, CharacterProperties::kXidContinue},
+    {0xab01, 0xab06, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0xab09, 0xab0e, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0xab11, 0xab16, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0xab20, 0xab26, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0xab28, 0xab2e, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0xab30, 0xab5a, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0xab5c, 0xab65, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0xab70, 0xabe2, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0xabe3, 0xabea, CharacterProperties::kXidContinue},
+    {0xabec, 0xabed, CharacterProperties::kXidContinue},
+    {0xabf0, 0xabf9, CharacterProperties::kXidContinue},
+    {0xac00, 0xd7a3, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0xd7b0, 0xd7c6, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0xd7cb, 0xd7fb, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0xf900, 0xfa6d, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0xfa70, 0xfad9, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0xfb00, 0xfb06, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0xfb13, 0xfb17, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0xfb1d, 0xfb1d, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0xfb1e, 0xfb1e, CharacterProperties::kXidContinue},
+    {0xfb1f, 0xfb28, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0xfb2a, 0xfb36, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0xfb38, 0xfb3c, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0xfb3e, 0xfb3e, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0xfb40, 0xfb41, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0xfb43, 0xfb44, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0xfb46, 0xfbb1, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0xfbd3, 0xfc5d, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0xfc64, 0xfd3d, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0xfd50, 0xfd8f, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0xfd92, 0xfdc7, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0xfdf0, 0xfdf9, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0xfe00, 0xfe0f, CharacterProperties::kXidContinue},
+    {0xfe20, 0xfe2f, CharacterProperties::kXidContinue},
+    {0xfe33, 0xfe34, CharacterProperties::kXidContinue},
+    {0xfe4d, 0xfe4f, CharacterProperties::kXidContinue},
+    {0xfe71, 0xfe71, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0xfe73, 0xfe73, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0xfe77, 0xfe77, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0xfe79, 0xfe79, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0xfe7b, 0xfe7b, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0xfe7d, 0xfe7d, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0xfe7f, 0xfefc, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0xff10, 0xff19, CharacterProperties::kXidContinue},
+    {0xff21, 0xff3a, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0xff3f, 0xff3f, CharacterProperties::kXidContinue},
+    {0xff41, 0xff5a, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0xff66, 0xff9d, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0xff9e, 0xff9f, CharacterProperties::kXidContinue},
+    {0xffa0, 0xffbe, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0xffc2, 0xffc7, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0xffca, 0xffcf, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0xffd2, 0xffd7, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+    {0xffda, 0xffdc, CharacterProperties::kXidStart | CharacterProperties::kXidContinue},
+}};
diff --git a/tools/aapt2/text/Unicode_test.cpp b/tools/aapt2/text/Unicode_test.cpp
new file mode 100644
index 0000000..d47fb28
--- /dev/null
+++ b/tools/aapt2/text/Unicode_test.cpp
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "text/Unicode.h"
+
+#include "test/Test.h"
+
+using ::testing::Each;
+using ::testing::Eq;
+using ::testing::ResultOf;
+
+namespace aapt {
+namespace text {
+
+TEST(UnicodeTest, IsXidStart) {
+  std::u32string valid_input = U"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZˮø";
+  EXPECT_THAT(valid_input, Each(ResultOf(IsXidStart, Eq(true))));
+
+  std::u32string invalid_input = U"$;\'/<>+=-.{}[]()\\|?@#%^&*!~`\",1234567890_";
+  EXPECT_THAT(invalid_input, Each(ResultOf(IsXidStart, Eq(false))));
+}
+
+TEST(UnicodeTest, IsXidContinue) {
+  std::u32string valid_input = U"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890_ˮø";
+  EXPECT_THAT(valid_input, Each(ResultOf(IsXidContinue, Eq(true))));
+
+  std::u32string invalid_input = U"$;\'/<>+=-.{}[]()\\|?@#%^&*!~`\",";
+  EXPECT_THAT(invalid_input, Each(ResultOf(IsXidContinue, Eq(false))));
+}
+
+TEST(UnicodeTest, IsJavaIdentifier) {
+  EXPECT_TRUE(IsJavaIdentifier("FøøBar_12"));
+  EXPECT_TRUE(IsJavaIdentifier("Føø$Bar"));
+
+  EXPECT_FALSE(IsJavaIdentifier("12FøøBar"));
+  EXPECT_FALSE(IsJavaIdentifier("_FøøBar"));
+  EXPECT_FALSE(IsJavaIdentifier("$Føø$Bar"));
+}
+
+TEST(UnicodeTest, IsValidResourceEntryName) {
+  EXPECT_TRUE(IsJavaIdentifier("FøøBar"));
+  EXPECT_TRUE(IsValidResourceEntryName("FøøBar_12"));
+  EXPECT_TRUE(IsValidResourceEntryName("Føø.Bar"));
+  EXPECT_TRUE(IsValidResourceEntryName("Føø-Bar"));
+  EXPECT_TRUE(IsValidResourceEntryName("_FøøBar"));
+
+  EXPECT_FALSE(IsValidResourceEntryName("12FøøBar"));
+  EXPECT_FALSE(IsValidResourceEntryName("Føø$Bar"));
+  EXPECT_FALSE(IsValidResourceEntryName("Føø/Bar"));
+  EXPECT_FALSE(IsValidResourceEntryName("Føø:Bar"));
+  EXPECT_FALSE(IsValidResourceEntryName("Føø;Bar"));
+}
+
+}  // namespace text
+}  // namespace aapt
diff --git a/tools/aapt2/util/Utf8Iterator.cpp b/tools/aapt2/text/Utf8Iterator.cpp
similarity index 95%
rename from tools/aapt2/util/Utf8Iterator.cpp
rename to tools/aapt2/text/Utf8Iterator.cpp
index c09eda7..0d43353 100644
--- a/tools/aapt2/util/Utf8Iterator.cpp
+++ b/tools/aapt2/text/Utf8Iterator.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "util/Utf8Iterator.h"
+#include "text/Utf8Iterator.h"
 
 #include "android-base/logging.h"
 #include "utils/Unicode.h"
@@ -22,6 +22,7 @@
 using ::android::StringPiece;
 
 namespace aapt {
+namespace text {
 
 Utf8Iterator::Utf8Iterator(const StringPiece& str)
     : str_(str), next_pos_(0), current_codepoint_(0) {
@@ -57,4 +58,5 @@
   return result;
 }
 
+}  // namespace text
 }  // namespace aapt
diff --git a/tools/aapt2/util/Utf8Iterator.h b/tools/aapt2/text/Utf8Iterator.h
similarity index 87%
rename from tools/aapt2/util/Utf8Iterator.h
rename to tools/aapt2/text/Utf8Iterator.h
index f2507d8..6923957 100644
--- a/tools/aapt2/util/Utf8Iterator.h
+++ b/tools/aapt2/text/Utf8Iterator.h
@@ -14,13 +14,14 @@
  * limitations under the License.
  */
 
-#ifndef AAPT_UTIL_UTF8ITERATOR_H
-#define AAPT_UTIL_UTF8ITERATOR_H
+#ifndef AAPT_TEXT_UTF8ITERATOR_H
+#define AAPT_TEXT_UTF8ITERATOR_H
 
 #include "android-base/macros.h"
 #include "androidfw/StringPiece.h"
 
 namespace aapt {
+namespace text {
 
 class Utf8Iterator {
  public:
@@ -42,6 +43,7 @@
   char32_t current_codepoint_;
 };
 
+}  // namespace text
 }  // namespace aapt
 
-#endif  // AAPT_UTIL_UTF8ITERATOR_H
+#endif  // AAPT_TEXT_UTF8ITERATOR_H
diff --git a/tools/aapt2/util/Utf8Iterator_test.cpp b/tools/aapt2/text/Utf8Iterator_test.cpp
similarity index 95%
rename from tools/aapt2/util/Utf8Iterator_test.cpp
rename to tools/aapt2/text/Utf8Iterator_test.cpp
index cfebbb0..f3111c0 100644
--- a/tools/aapt2/util/Utf8Iterator_test.cpp
+++ b/tools/aapt2/text/Utf8Iterator_test.cpp
@@ -14,13 +14,14 @@
  * limitations under the License.
  */
 
-#include "util/Utf8Iterator.h"
+#include "text/Utf8Iterator.h"
 
 #include "test/Test.h"
 
 using ::testing::Eq;
 
 namespace aapt {
+namespace text {
 
 TEST(Utf8IteratorTest, IteratesOverAscii) {
   Utf8Iterator iter("hello");
@@ -62,4 +63,5 @@
   EXPECT_FALSE(iter.HasNext());
 }
 
+}  // namespace text
 }  // namespace aapt
diff --git a/tools/aapt2/tools/extract_unicode_properties.py b/tools/aapt2/tools/extract_unicode_properties.py
new file mode 100644
index 0000000..d7e0479
--- /dev/null
+++ b/tools/aapt2/tools/extract_unicode_properties.py
@@ -0,0 +1,98 @@
+#!/bin/env python3
+
+"""Extracts the XID_Start and XID_Continue Derived core properties from the ICU data files
+and emits a std::array<> for binary searching.
+"""
+
+import re
+import sys
+
+CharacterPropertyEnumMap = {
+        1: "CharacterProperties::kXidStart",
+        2: "CharacterProperties::kXidContinue"
+}
+
+class CharacterProperty:
+    def __init__(self, first_char, last_char, prop_type):
+        self.first_char = first_char
+        self.last_char = last_char
+        self.prop_type = prop_type
+
+    def key(self):
+        return self.first_char
+
+    def merge(self, other):
+        if self.last_char + 1 == other.first_char and self.prop_type == other.prop_type:
+            self.last_char = other.last_char
+        else:
+            raise KeyError()
+
+    def __repr__(self):
+        types = []
+        for enum_int, enum_str in CharacterPropertyEnumMap.items():
+            if enum_int & self.prop_type:
+                types.append(enum_str)
+        return "{}0x{:04x}, 0x{:04x}, {}{}".format(
+                "{", self.first_char, self.last_char, ' | '.join(types), "}")
+
+def extract_unicode_properties(f, props):
+    prog = re.compile(r"^(?P<first>\w{4})(..(?P<last>\w{4}))?\W+;\W+(?P<prop>\w+)\n$")
+    chars = {}
+    for line in f:
+        result = prog.match(line)
+        if result:
+            prop_type_str = result.group('prop')
+            first_char_str = result.group('first')
+            last_char_str = result.group('last')
+            if prop_type_str in props:
+                start_char = int(first_char_str, 16)
+                last_char = (int(last_char_str, 16) if last_char_str else start_char) + 1
+                prop_type = props[prop_type_str]
+                for char in range(start_char, last_char):
+                    if char not in chars:
+                        chars[char] = CharacterProperty(char, char, 0)
+                    chars[char].prop_type |= prop_type
+
+    result = []
+    for char_prop in sorted(chars.values(), key=CharacterProperty.key):
+        if len(result) == 0:
+            result.append(char_prop)
+        else:
+            try:
+                result[len(result) - 1].merge(char_prop)
+            except KeyError:
+                result.append(char_prop)
+    return result
+
+license = """/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+"""
+
+if __name__ == "__main__":
+    if len(sys.argv) != 2:
+        print("must specify path to icu DerivedCoreProperties file (e.g:" \
+                "external/icu/icu4c/source/data/unidata/DerivedCoreProperties.txt)")
+        sys.exit(1)
+
+    with open(sys.argv[1]) as f:
+        props = {"XID_Start": 1, "XID_Continue": 2}
+        char_props = extract_unicode_properties(f, props)
+        print("{}\nconst static std::array<CharacterProperties, {}> sCharacterProperties = {}"
+                .format(license, len(char_props), "{{"))
+        for prop in char_props:
+            print("    {},".format(prop))
+        print("}};")
+
diff --git a/tools/aapt2/util/Util.cpp b/tools/aapt2/util/Util.cpp
index dfa92d7..9fde1b4 100644
--- a/tools/aapt2/util/Util.cpp
+++ b/tools/aapt2/util/Util.cpp
@@ -24,10 +24,11 @@
 #include "androidfw/StringPiece.h"
 #include "utils/Unicode.h"
 
+#include "text/Utf8Iterator.h"
 #include "util/BigBuffer.h"
 #include "util/Maybe.h"
-#include "util/Utf8Iterator.h"
 
+using ::aapt::text::Utf8Iterator;
 using ::android::StringPiece;
 using ::android::StringPiece16;