Merge "Let Rfc822Validator validate IEA in Punycode or Unicode." into klp-ub-dev
diff --git a/common/java/com/android/common/Rfc822Validator.java b/common/java/com/android/common/Rfc822Validator.java
index 2db00ff..bb77508 100644
--- a/common/java/com/android/common/Rfc822Validator.java
+++ b/common/java/com/android/common/Rfc822Validator.java
@@ -19,6 +19,7 @@
 import android.text.TextUtils;
 import android.text.util.Rfc822Token;
 import android.text.util.Rfc822Tokenizer;
+import android.util.Patterns;
 import android.widget.AutoCompleteTextView;
 
 import java.util.regex.Pattern;
@@ -38,15 +39,45 @@
  */
 @Deprecated
 public class Rfc822Validator implements AutoCompleteTextView.Validator {
-    /*
-     * Regex.EMAIL_ADDRESS_PATTERN hardcodes the TLD that we accept, but we
-     * want to make sure we will keep accepting email addresses with TLD's
-     * that don't exist at the time of this writing, so this regexp relaxes
-     * that constraint by accepting any kind of top level domain, not just
-     * ".com", ".fr", etc...
+    /**
+     * Expression that matches the local part of an email address.
+     * This expression does not follow the constraints of the RFC towards the dots, because the
+     * de facto standard is to allow them anywhere.
+     *
+     * It is however a simplification and it will not validate the double-quote syntax.
+     */
+    private static final String EMAIL_ADDRESS_LOCALPART_REGEXP =
+        "((?!\\s)[\\.\\w!#$%&'*+\\-/=?^`{|}~\u0080-\uFFFE])+";
+
+    /**
+     * Alias of characters that can be used in IRI, as per RFC 3987.
+     */
+    private static final String GOOD_IRI_CHAR = Patterns.GOOD_IRI_CHAR;
+
+    /**
+     * Regular expression for a domain label, as per RFC 3490.
+     * Its total length must not exceed 63 octets, according to RFC 5890.
+     */
+    private static final String LABEL_REGEXP =
+        "([" + GOOD_IRI_CHAR + "][" + GOOD_IRI_CHAR + "\\-]{0,61})?[" + GOOD_IRI_CHAR + "]";
+
+    /**
+     * Expression that matches a domain name, including international domain names in Punycode or
+     * Unicode.
+     */
+    private static final String DOMAIN_REGEXP =
+        "("+ LABEL_REGEXP + "\\.)+"                 // Subdomains and domain
+        // Top-level domain must be at least 2 chars
+        + "[" + GOOD_IRI_CHAR + "][" + GOOD_IRI_CHAR + "\\-]{0,61}[" + GOOD_IRI_CHAR + "]";
+
+    /**
+     * Pattern for an email address.
+     *
+     * It is similar to {@link android.util.Patterns#EMAIL_ADDRESS}, but also accepts Unicode
+     * characters.
      */
     private static final Pattern EMAIL_ADDRESS_PATTERN =
-            Pattern.compile("[^\\s@]+@([^\\s@\\.]+\\.)+[a-zA-z][a-zA-Z][a-zA-Z]*");
+            Pattern.compile(EMAIL_ADDRESS_LOCALPART_REGEXP + "@" + DOMAIN_REGEXP);
 
     private String mDomain;
     private boolean mRemoveInvalid = false;
@@ -64,7 +95,6 @@
      */
     public boolean isValid(CharSequence text) {
         Rfc822Token[] tokens = Rfc822Tokenizer.tokenize(text);
-
         return tokens.length == 1 &&
                EMAIL_ADDRESS_PATTERN.
                    matcher(tokens[0].getAddress()).matches();
diff --git a/common/tests/src/com/android/common/Rfc822ValidatorTest.java b/common/tests/src/com/android/common/Rfc822ValidatorTest.java
index cbcc812..61b8f25 100644
--- a/common/tests/src/com/android/common/Rfc822ValidatorTest.java
+++ b/common/tests/src/com/android/common/Rfc822ValidatorTest.java
@@ -18,30 +18,74 @@
 
 import android.test.suitebuilder.annotation.SmallTest;
 
+import junit.framework.TestCase;
+
 import java.util.HashMap;
 import java.util.Map;
 
-import junit.framework.TestCase;
-
 public class Rfc822ValidatorTest extends TestCase {
+    static final String[] VALID_EMAILS = new String[] {
+            "a@example.org", "b@exemple.fr", "c@d.e-f",
+            "Very.Common@example.org",
+            "john@EXAMPLE.ORG",
+            "john@a123b.c-d.dept.example.com",
+            "xn--r8jz45g@example.com",
+            "disposable.style.email.with+symbol@example.com",
+            "other.email-with-dash@example.com",
+            "!#$%&'*+-/=?^_`{}|~@example.com",  // Use of allowed special characters.
+            "a@domain-label-cannot-be-longer-than-63-chars-and-this-is-maximum.example.com",
+            // Valid de facto, even if RFC doesn't allow it.
+            "a..b@example.com", ".a@example.com", "b.@example.com",
+            // Punycode is an ASCII representation of International domain names.
+            "john.doe@xn--r8jz45g.xn--zckzah",
+            "john.doe@XN--R8JZ45G.XN--ZXKZAH",
+            "xn--r8jz45g@xn--r8jz45g.XN--ZXKZAH",
+            // Quoted address.
+            // TODO(regisd) Fix Rfc822Tokenizer which loses the quotes.
+            // "\"much.more unusual\"",
+            // "\"very.unusual.@.unusual.com\""
+
+            // Valid only in new Internalized email address.
+             "a@\u00E9.example.com",
+            //"みんな@例え.テスト",
+            "\u307F\u3093\u306A@\u4F8B\u3048.\u30C6\u30B9\u30C8",
+            // "test@test.テスト", // Unicode in TLD only.
+            "everybody@example.\u30C6\u30B9\u30C8",
+            // "test@例え.test", // Unicode in domain only.
+            "everybody@\u4F8B\u3048.test",
+            // "みんな@example.com" // Unicode in localpart only.
+            "\u307F\u3093\u306A@example.test"
+    };
+
+    static final String[] INVALID_EMAILS = new String[] {
+            "a", "example.com", "john.example.com", // Missing at sign.
+            "a b", "a space@example.com", // Space not allowed.
+            // Invalid domain.
+            "john@example..com", "a@b", "a@-b.com", "a@b-.com", "a@b.c",
+            "a@a123456789-123456789-123456789-123456789-123456789-123456789-bcd.example.com",
+            // Invalid characters in domain as per RFC 1034 and RFC 1035,
+            // even if these characters are in RFC5322's domain production.
+            "a@d_e.fg", "a@d!e.fg", "a@d#e.fg", "a@d$e.fg", "a@d%e.fg", "a@d&e.fg", "a@d'e.fg",
+            "a@d*e.fg", "a@d+e.fg", "a@d/e.fg", "a@d=e.fg", "a@d?e.fg", "a@d^e.fg", "a@d{}e.fg",
+            "a@d|e.fg", "a@d~e.fg",
+            // The domain is too long
+            "no@domain-label-cannot-be-longer-than-63-chars-but-this-is-64-chars.com",
+            "john@doe@example.com", // @ must be unique.
+            // Incorrect double quote.
+            // TODO(regisd): Fix Rfc822tokenizer which strips the quotes
+            // "just\"not\"right@example.com", "\"just.not\\\"@example.com",
+            "this\\ still\\\"not\\\\allowed@example.com"
+    };
 
     @SmallTest
     public void testEmailValidator() {
         Rfc822Validator validator = new Rfc822Validator("gmail.com");
-        String[] validEmails = new String[] {
-            "a@b.com", "a@b.fr", "a+b@c.com", "a@b.info", "john@example.com", "john@example.fr",
-            "john@corp.example.com",
-        };
 
-        for (String email : validEmails) {
+        for (String email : VALID_EMAILS) {
             assertTrue(email + " should be a valid email address", validator.isValid(email));
         }
 
-        String[] invalidEmails = new String[] {
-            "a", "a@b", "a b", "a@b.12", "john@example..com", "johnexample.com", "john.example.com"
-        };
-
-        for (String email : invalidEmails) {
+        for (String email : INVALID_EMAILS) {
             assertFalse(email + " should not be a valid email address", validator.isValid(email));
         }