Merge "Regenerate the TopLevelDomain from iana.org website."
diff --git a/common/java/com/android/common/Patterns.java b/common/java/com/android/common/Patterns.java
index 24a18c0..71c3a5e 100644
--- a/common/java/com/android/common/Patterns.java
+++ b/common/java/com/android/common/Patterns.java
@@ -25,87 +25,87 @@
public class Patterns {
/**
* Regular expression pattern to match all IANA top-level domains.
- * List accurate as of 2007/06/15. List taken from:
+ * List accurate as of 2010/02/05. List taken from:
* http://data.iana.org/TLD/tlds-alpha-by-domain.txt
- * This pattern is auto-generated by //device/tools/make-iana-tld-pattern.py
+ * This pattern is auto-generated by development/tools/make-iana-tld-pattern.py
*/
- public static final Pattern TOP_LEVEL_DOMAIN
- = Pattern.compile(
- "((aero|arpa|asia|a[cdefgilmnoqrstuwxz])"
- + "|(biz|b[abdefghijmnorstvwyz])"
- + "|(cat|com|coop|c[acdfghiklmnoruvxyz])"
- + "|d[ejkmoz]"
- + "|(edu|e[cegrstu])"
- + "|f[ijkmor]"
- + "|(gov|g[abdefghilmnpqrstuwy])"
- + "|h[kmnrtu]"
- + "|(info|int|i[delmnoqrst])"
- + "|(jobs|j[emop])"
- + "|k[eghimnrwyz]"
- + "|l[abcikrstuvy]"
- + "|(mil|mobi|museum|m[acdghklmnopqrstuvwxyz])"
- + "|(name|net|n[acefgilopruz])"
- + "|(org|om)"
- + "|(pro|p[aefghklmnrstwy])"
- + "|qa"
- + "|r[eouw]"
- + "|s[abcdeghijklmnortuvyz]"
- + "|(tel|travel|t[cdfghjklmnoprtvwz])"
- + "|u[agkmsyz]"
- + "|v[aceginu]"
- + "|w[fs]"
- + "|y[etu]"
- + "|z[amw])");
+ public static final Pattern TOP_LEVEL_DOMAIN = Pattern.compile(
+ "((aero|arpa|asia|a[cdefgilmnoqrstuwxz])"
+ + "|(biz|b[abdefghijmnorstvwyz])"
+ + "|(cat|com|coop|c[acdfghiklmnoruvxyz])"
+ + "|d[ejkmoz]"
+ + "|(edu|e[cegrstu])"
+ + "|f[ijkmor]"
+ + "|(gov|g[abdefghilmnpqrstuwy])"
+ + "|h[kmnrtu]"
+ + "|(info|int|i[delmnoqrst])"
+ + "|(jobs|j[emop])"
+ + "|k[eghimnprwyz]"
+ + "|l[abcikrstuvy]"
+ + "|(mil|mobi|museum|m[acdeghklmnopqrstuvwxyz])"
+ + "|(name|net|n[acefgilopruz])"
+ + "|(org|om)"
+ + "|(pro|p[aefghklmnrstwy])"
+ + "|qa"
+ + "|r[eosuw]"
+ + "|s[abcdeghijklmnortuvyz]"
+ + "|(tel|travel|t[cdfghjklmnoprtvwz])"
+ + "|u[agksyz]"
+ + "|v[aceginu]"
+ + "|w[fs]"
+ + "|(xn\\-\\-0zwm56d|xn\\-\\-11b5bs3a9aj6g|xn\\-\\-80akhbyknj4f|xn\\-\\-9t4b11yi5a|xn\\-\\-deba0ad|xn\\-\\-g6w251d|xn\\-\\-hgbk6aj7f53bba|xn\\-\\-hlcj6aya9esc7a|xn\\-\\-jxalpdlp|xn\\-\\-kgbechtv|xn\\-\\-zckzah)"
+ + "|y[etu]"
+ + "|z[amw])");
/**
* Regular expression pattern to match RFC 1738 URLs
- * List accurate as of 2007/06/15. List taken from:
+ * List accurate as of 2010/02/05. List taken from:
* http://data.iana.org/TLD/tlds-alpha-by-domain.txt
- * This pattern is auto-generated by //device/tools/make-iana-tld-pattern.py
+ * This pattern is auto-generated by development/tools/make-iana-tld-pattern.py
*/
- public static final Pattern WEB_URL
- = Pattern.compile(
- "((?:(http|https|Http|Https|rtsp|Rtsp):\\/\\/(?:(?:[a-zA-Z0-9\\$\\-\\_\\.\\+\\!\\*\\'\\(\\)"
- + "\\,\\;\\?\\&\\=]|(?:\\%[a-fA-F0-9]{2})){1,64}(?:\\:(?:[a-zA-Z0-9\\$\\-\\_"
- + "\\.\\+\\!\\*\\'\\(\\)\\,\\;\\?\\&\\=]|(?:\\%[a-fA-F0-9]{2})){1,25})?\\@)?)?"
- + "((?:(?:[a-zA-Z0-9][a-zA-Z0-9\\-]{0,64}\\.)+" // named host
- + "(?:" // plus top level domain
- + "(?:aero|arpa|asia|a[cdefgilmnoqrstuwxz])"
- + "|(?:biz|b[abdefghijmnorstvwyz])"
- + "|(?:cat|com|coop|c[acdfghiklmnoruvxyz])"
- + "|d[ejkmoz]"
- + "|(?:edu|e[cegrstu])"
- + "|f[ijkmor]"
- + "|(?:gov|g[abdefghilmnpqrstuwy])"
- + "|h[kmnrtu]"
- + "|(?:info|int|i[delmnoqrst])"
- + "|(?:jobs|j[emop])"
- + "|k[eghimnrwyz]"
- + "|l[abcikrstuvy]"
- + "|(?:mil|mobi|museum|m[acdghklmnopqrstuvwxyz])"
- + "|(?:name|net|n[acefgilopruz])"
- + "|(?:org|om)"
- + "|(?:pro|p[aefghklmnrstwy])"
- + "|qa"
- + "|r[eouw]"
- + "|s[abcdeghijklmnortuvyz]"
- + "|(?:tel|travel|t[cdfghjklmnoprtvwz])"
- + "|u[agkmsyz]"
- + "|v[aceginu]"
- + "|w[fs]"
- + "|y[etu]"
- + "|z[amw]))"
- + "|(?:(?:25[0-5]|2[0-4]" // or ip address
- + "[0-9]|[0-1][0-9]{2}|[1-9][0-9]|[1-9])\\.(?:25[0-5]|2[0-4][0-9]"
- + "|[0-1][0-9]{2}|[1-9][0-9]|[1-9]|0)\\.(?:25[0-5]|2[0-4][0-9]|[0-1]"
- + "[0-9]{2}|[1-9][0-9]|[1-9]|0)\\.(?:25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}"
- + "|[1-9][0-9]|[0-9])))"
- + "(?:\\:\\d{1,5})?)" // plus option port number
- + "(\\/(?:(?:[a-zA-Z0-9\\;\\/\\?\\:\\@\\&\\=\\#\\~" // plus option query params
- + "\\-\\.\\+\\!\\*\\'\\(\\)\\,\\_])|(?:\\%[a-fA-F0-9]{2}))*)?"
- + "(?:\\b|$)"); // and finally, a word boundary or end of
- // input. This is to stop foo.sure from
- // matching as foo.su
+ public static final Pattern WEB_URL = Pattern.compile(
+ "((?:(http|https|Http|Https|rtsp|Rtsp):\\/\\/(?:(?:[a-zA-Z0-9\\$\\-\\_\\.\\+\\!\\*\\'\\(\\)"
+ + "\\,\\;\\?\\&\\=]|(?:\\%[a-fA-F0-9]{2})){1,64}(?:\\:(?:[a-zA-Z0-9\\$\\-\\_"
+ + "\\.\\+\\!\\*\\'\\(\\)\\,\\;\\?\\&\\=]|(?:\\%[a-fA-F0-9]{2})){1,25})?\\@)?)?"
+ + "((?:(?:[a-zA-Z0-9][a-zA-Z0-9\\-]{0,64}\\.)+" // named host
+ + "(?:" // plus top level domain
+ + "(?:aero|arpa|asia|a[cdefgilmnoqrstuwxz])"
+ + "|(?:biz|b[abdefghijmnorstvwyz])"
+ + "|(?:cat|com|coop|c[acdfghiklmnoruvxyz])"
+ + "|d[ejkmoz]"
+ + "|(?:edu|e[cegrstu])"
+ + "|f[ijkmor]"
+ + "|(?:gov|g[abdefghilmnpqrstuwy])"
+ + "|h[kmnrtu]"
+ + "|(?:info|int|i[delmnoqrst])"
+ + "|(?:jobs|j[emop])"
+ + "|k[eghimnprwyz]"
+ + "|l[abcikrstuvy]"
+ + "|(?:mil|mobi|museum|m[acdeghklmnopqrstuvwxyz])"
+ + "|(?:name|net|n[acefgilopruz])"
+ + "|(?:org|om)"
+ + "|(?:pro|p[aefghklmnrstwy])"
+ + "|qa"
+ + "|r[eosuw]"
+ + "|s[abcdeghijklmnortuvyz]"
+ + "|(?:tel|travel|t[cdfghjklmnoprtvwz])"
+ + "|u[agksyz]"
+ + "|v[aceginu]"
+ + "|w[fs]"
+ + "|(?:xn\\-\\-0zwm56d|xn\\-\\-11b5bs3a9aj6g|xn\\-\\-80akhbyknj4f|xn\\-\\-9t4b11yi5a|xn\\-\\-deba0ad|xn\\-\\-g6w251d|xn\\-\\-hgbk6aj7f53bba|xn\\-\\-hlcj6aya9esc7a|xn\\-\\-jxalpdlp|xn\\-\\-kgbechtv|xn\\-\\-zckzah)"
+ + "|y[etu]"
+ + "|z[amw]))"
+ + "|(?:(?:25[0-5]|2[0-4]" // or ip address
+ + "[0-9]|[0-1][0-9]{2}|[1-9][0-9]|[1-9])\\.(?:25[0-5]|2[0-4][0-9]"
+ + "|[0-1][0-9]{2}|[1-9][0-9]|[1-9]|0)\\.(?:25[0-5]|2[0-4][0-9]|[0-1]"
+ + "[0-9]{2}|[1-9][0-9]|[1-9]|0)\\.(?:25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}"
+ + "|[1-9][0-9]|[0-9])))"
+ + "(?:\\:\\d{1,5})?)" // plus option port number
+ + "(\\/(?:(?:[a-zA-Z0-9\\;\\/\\?\\:\\@\\&\\=\\#\\~" // plus option query params
+ + "\\-\\.\\+\\!\\*\\'\\(\\)\\,\\_])|(?:\\%[a-fA-F0-9]{2}))*)?"
+ + "(?:\\b|$)"); // and finally, a word boundary or end of
+ // input. This is to stop foo.sure from
+ // matching as foo.su
public static final Pattern IP_ADDRESS
= Pattern.compile(
diff --git a/common/tests/src/com/android/common/PatternsTest.java b/common/tests/src/com/android/common/PatternsTest.java
index 7fabe5e..635601e 100644
--- a/common/tests/src/com/android/common/PatternsTest.java
+++ b/common/tests/src/com/android/common/PatternsTest.java
@@ -31,6 +31,20 @@
t = Patterns.TOP_LEVEL_DOMAIN.matcher("com").matches();
assertTrue("Missed valid TLD", t);
+ // One of the new top level domain.
+ t = Patterns.TOP_LEVEL_DOMAIN.matcher("me").matches();
+ assertTrue("Missed valid TLD", t);
+
+ // One of the new top level test domain.
+ t = Patterns.TOP_LEVEL_DOMAIN.matcher("xn--0zwm56d").matches();
+ assertTrue("Missed valid TLD", t);
+
+ t = Patterns.TOP_LEVEL_DOMAIN.matcher("mem").matches();
+ assertFalse("Matched invalid TLD!", t);
+
+ t = Patterns.TOP_LEVEL_DOMAIN.matcher("xn").matches();
+ assertFalse("Matched invalid TLD!", t);
+
t = Patterns.TOP_LEVEL_DOMAIN.matcher("xer").matches();
assertFalse("Matched invalid TLD!", t);
}
@@ -42,6 +56,18 @@
t = Patterns.WEB_URL.matcher("http://www.google.com").matches();
assertTrue("Valid URL", t);
+ // Google in one of the new top level domain.
+ t = Patterns.WEB_URL.matcher("http://www.google.me").matches();
+ assertTrue("Valid URL", t);
+ t = Patterns.WEB_URL.matcher("google.me").matches();
+ assertTrue("Valid URL", t);
+
+ // Test url in Chinese: http://xn--fsqu00a.xn--0zwm56d
+ t = Patterns.WEB_URL.matcher("http://xn--fsqu00a.xn--0zwm56d").matches();
+ assertTrue("Valid URL", t);
+ t = Patterns.WEB_URL.matcher("xn--fsqu00a.xn--0zwm56d").matches();
+ assertTrue("Valid URL", t);
+
t = Patterns.WEB_URL.matcher("ftp://www.example.com").matches();
assertFalse("Matched invalid protocol", t);