Let Rfc822Validator validate IEA in Punycode or Unicode.
Let Rfc822Validator validator internation email adresses.
The implementation is based on a regular expression.
Fixes:
- b/13364030 ComposeActivity rejects TLD that are Punycode
- ComposeActivity rejects TLD that are Unicode
Improve unit tests:
- Use RFC example domains, instead of potentially existing domains.
- Add unicode punycode internationalized email address.
- Add validity check for email with special characters.
- Add validity check "a..b@example.com" (RFC says dot cannot be succesive but it is allowed by major providers)
- Add invalidity check for "a@b-.com" (domain name cannot end with a dash) ; "john@doe@example.com" (@ must be unique)
- Remove a@b.12 from invalid emails. There is no tld made of numbers, but no spec strictly prohibits it.
Bug: 13364030
Change-Id: I78bc5d696f587753d776020ef1f9feded2065ad0
diff --git a/common/java/com/android/common/Rfc822Validator.java b/common/java/com/android/common/Rfc822Validator.java
index 2db00ff..bb77508 100644
--- a/common/java/com/android/common/Rfc822Validator.java
+++ b/common/java/com/android/common/Rfc822Validator.java
@@ -19,6 +19,7 @@
import android.text.TextUtils;
import android.text.util.Rfc822Token;
import android.text.util.Rfc822Tokenizer;
+import android.util.Patterns;
import android.widget.AutoCompleteTextView;
import java.util.regex.Pattern;
@@ -38,15 +39,45 @@
*/
@Deprecated
public class Rfc822Validator implements AutoCompleteTextView.Validator {
- /*
- * Regex.EMAIL_ADDRESS_PATTERN hardcodes the TLD that we accept, but we
- * want to make sure we will keep accepting email addresses with TLD's
- * that don't exist at the time of this writing, so this regexp relaxes
- * that constraint by accepting any kind of top level domain, not just
- * ".com", ".fr", etc...
+ /**
+ * Expression that matches the local part of an email address.
+ * This expression does not follow the constraints of the RFC towards the dots, because the
+ * de facto standard is to allow them anywhere.
+ *
+ * It is however a simplification and it will not validate the double-quote syntax.
+ */
+ private static final String EMAIL_ADDRESS_LOCALPART_REGEXP =
+ "((?!\\s)[\\.\\w!#$%&'*+\\-/=?^`{|}~\u0080-\uFFFE])+";
+
+ /**
+ * Alias of characters that can be used in IRI, as per RFC 3987.
+ */
+ private static final String GOOD_IRI_CHAR = Patterns.GOOD_IRI_CHAR;
+
+ /**
+ * Regular expression for a domain label, as per RFC 3490.
+ * Its total length must not exceed 63 octets, according to RFC 5890.
+ */
+ private static final String LABEL_REGEXP =
+ "([" + GOOD_IRI_CHAR + "][" + GOOD_IRI_CHAR + "\\-]{0,61})?[" + GOOD_IRI_CHAR + "]";
+
+ /**
+ * Expression that matches a domain name, including international domain names in Punycode or
+ * Unicode.
+ */
+ private static final String DOMAIN_REGEXP =
+ "("+ LABEL_REGEXP + "\\.)+" // Subdomains and domain
+ // Top-level domain must be at least 2 chars
+ + "[" + GOOD_IRI_CHAR + "][" + GOOD_IRI_CHAR + "\\-]{0,61}[" + GOOD_IRI_CHAR + "]";
+
+ /**
+ * Pattern for an email address.
+ *
+ * It is similar to {@link android.util.Patterns#EMAIL_ADDRESS}, but also accepts Unicode
+ * characters.
*/
private static final Pattern EMAIL_ADDRESS_PATTERN =
- Pattern.compile("[^\\s@]+@([^\\s@\\.]+\\.)+[a-zA-z][a-zA-Z][a-zA-Z]*");
+ Pattern.compile(EMAIL_ADDRESS_LOCALPART_REGEXP + "@" + DOMAIN_REGEXP);
private String mDomain;
private boolean mRemoveInvalid = false;
@@ -64,7 +95,6 @@
*/
public boolean isValid(CharSequence text) {
Rfc822Token[] tokens = Rfc822Tokenizer.tokenize(text);
-
return tokens.length == 1 &&
EMAIL_ADDRESS_PATTERN.
matcher(tokens[0].getAddress()).matches();
diff --git a/common/tests/src/com/android/common/Rfc822ValidatorTest.java b/common/tests/src/com/android/common/Rfc822ValidatorTest.java
index cbcc812..61b8f25 100644
--- a/common/tests/src/com/android/common/Rfc822ValidatorTest.java
+++ b/common/tests/src/com/android/common/Rfc822ValidatorTest.java
@@ -18,30 +18,74 @@
import android.test.suitebuilder.annotation.SmallTest;
+import junit.framework.TestCase;
+
import java.util.HashMap;
import java.util.Map;
-import junit.framework.TestCase;
-
public class Rfc822ValidatorTest extends TestCase {
+ static final String[] VALID_EMAILS = new String[] {
+ "a@example.org", "b@exemple.fr", "c@d.e-f",
+ "Very.Common@example.org",
+ "john@EXAMPLE.ORG",
+ "john@a123b.c-d.dept.example.com",
+ "xn--r8jz45g@example.com",
+ "disposable.style.email.with+symbol@example.com",
+ "other.email-with-dash@example.com",
+ "!#$%&'*+-/=?^_`{}|~@example.com", // Use of allowed special characters.
+ "a@domain-label-cannot-be-longer-than-63-chars-and-this-is-maximum.example.com",
+ // Valid de facto, even if RFC doesn't allow it.
+ "a..b@example.com", ".a@example.com", "b.@example.com",
+ // Punycode is an ASCII representation of International domain names.
+ "john.doe@xn--r8jz45g.xn--zckzah",
+ "john.doe@XN--R8JZ45G.XN--ZXKZAH",
+ "xn--r8jz45g@xn--r8jz45g.XN--ZXKZAH",
+ // Quoted address.
+ // TODO(regisd) Fix Rfc822Tokenizer which loses the quotes.
+ // "\"much.more unusual\"",
+ // "\"very.unusual.@.unusual.com\""
+
+ // Valid only in new Internalized email address.
+ "a@\u00E9.example.com",
+ //"みんな@例え.テスト",
+ "\u307F\u3093\u306A@\u4F8B\u3048.\u30C6\u30B9\u30C8",
+ // "test@test.テスト", // Unicode in TLD only.
+ "everybody@example.\u30C6\u30B9\u30C8",
+ // "test@例え.test", // Unicode in domain only.
+ "everybody@\u4F8B\u3048.test",
+ // "みんな@example.com" // Unicode in localpart only.
+ "\u307F\u3093\u306A@example.test"
+ };
+
+ static final String[] INVALID_EMAILS = new String[] {
+ "a", "example.com", "john.example.com", // Missing at sign.
+ "a b", "a space@example.com", // Space not allowed.
+ // Invalid domain.
+ "john@example..com", "a@b", "a@-b.com", "a@b-.com", "a@b.c",
+ "a@a123456789-123456789-123456789-123456789-123456789-123456789-bcd.example.com",
+ // Invalid characters in domain as per RFC 1034 and RFC 1035,
+ // even if these characters are in RFC5322's domain production.
+ "a@d_e.fg", "a@d!e.fg", "a@d#e.fg", "a@d$e.fg", "a@d%e.fg", "a@d&e.fg", "a@d'e.fg",
+ "a@d*e.fg", "a@d+e.fg", "a@d/e.fg", "a@d=e.fg", "a@d?e.fg", "a@d^e.fg", "a@d{}e.fg",
+ "a@d|e.fg", "a@d~e.fg",
+ // The domain is too long
+ "no@domain-label-cannot-be-longer-than-63-chars-but-this-is-64-chars.com",
+ "john@doe@example.com", // @ must be unique.
+ // Incorrect double quote.
+ // TODO(regisd): Fix Rfc822tokenizer which strips the quotes
+ // "just\"not\"right@example.com", "\"just.not\\\"@example.com",
+ "this\\ still\\\"not\\\\allowed@example.com"
+ };
@SmallTest
public void testEmailValidator() {
Rfc822Validator validator = new Rfc822Validator("gmail.com");
- String[] validEmails = new String[] {
- "a@b.com", "a@b.fr", "a+b@c.com", "a@b.info", "john@example.com", "john@example.fr",
- "john@corp.example.com",
- };
- for (String email : validEmails) {
+ for (String email : VALID_EMAILS) {
assertTrue(email + " should be a valid email address", validator.isValid(email));
}
- String[] invalidEmails = new String[] {
- "a", "a@b", "a b", "a@b.12", "john@example..com", "johnexample.com", "john.example.com"
- };
-
- for (String email : invalidEmails) {
+ for (String email : INVALID_EMAILS) {
assertFalse(email + " should not be a valid email address", validator.isValid(email));
}