[Bug #921657] Allow '@' in unquoted HTML attributes. Not strictly legal according to the HTML REC, but HTMLParser is already a pretty loose parser. Reported by Bernd Zimmermann.
diff --git a/Lib/HTMLParser.py b/Lib/HTMLParser.py
index 7334581..553e842 100644
--- a/Lib/HTMLParser.py
+++ b/Lib/HTMLParser.py
@@ -26,7 +26,7 @@
tagfind = re.compile('[a-zA-Z][-.a-zA-Z0-9:_]*')
attrfind = re.compile(
r'\s*([a-zA-Z_][-.:a-zA-Z_0-9]*)(\s*=\s*'
- r'(\'[^\']*\'|"[^"]*"|[-a-zA-Z0-9./,:;+*%?!&$\(\)_#=~]*))?')
+ r'(\'[^\']*\'|"[^"]*"|[-a-zA-Z0-9./,:;+*%?!&$\(\)_#=~@]*))?')
locatestarttagend = re.compile(r"""
<[a-zA-Z][-.a-zA-Z0-9:_]* # tag name
diff --git a/Lib/test/test_htmlparser.py b/Lib/test/test_htmlparser.py
index a830ed7..5b4bd56 100755
--- a/Lib/test/test_htmlparser.py
+++ b/Lib/test/test_htmlparser.py
@@ -204,6 +204,10 @@
self._run_check("<e a=rgb(1,2,3)>", [
("starttag", "e", [("a", "rgb(1,2,3)")]),
])
+ # Regression test for SF bug #921657.
+ self._run_check("<a href=mailto:xyz@example.com>", [
+ ("starttag", "a", [("href", "mailto:xyz@example.com")]),
+ ])
def test_attr_entity_replacement(self):
self._run_check("""<a b='&><"''>""", [