SF patch 1504676: Make sgmllib char and entity references pluggable
(implementation/tests contributed by Sam Ruby)
diff --git a/Lib/test/test_sgmllib.py b/Lib/test/test_sgmllib.py
index ec417d0..31b54de 100644
--- a/Lib/test/test_sgmllib.py
+++ b/Lib/test/test_sgmllib.py
@@ -64,6 +64,23 @@
self.setliteral()
+class HTMLEntityCollector(EventCollector):
+ import re, htmlentitydefs
+ entity_or_charref = re.compile('(?:&([a-zA-Z][-.a-zA-Z0-9]*)'
+ '|&#(x[0-9a-zA-Z]+|[0-9]+))(;?)')
+
+ def convert_charref(self, name):
+ self.append(("charref", "convert", name))
+ if name.startswith('x'):
+ return unichr(int(name[1:],16))
+ else:
+ return unichr(int(name))
+
+ def convert_entityref(self, name):
+ self.append(("entityref", "convert", name))
+ return unichr(self.htmlentitydefs.name2codepoint[name])
+
+
class SGMLParserTestCase(unittest.TestCase):
collector = EventCollector
@@ -233,6 +250,16 @@
("k", "*"),
])])
+ def test_convert_overrides(self):
+ self.collector = HTMLEntityCollector
+ self.check_events('<a title="“test”">foo</a>', [
+ ('entityref', 'convert', 'ldquo'),
+ ('charref', 'convert', 'x201d'),
+ ('starttag', 'a', [('title', u'\u201ctest\u201d')]),
+ ('data', 'foo'),
+ ('endtag', 'a'),
+ ])
+
def test_attr_funky_names(self):
self.check_events("""<a a.b='v' c:d=v e-f=v>""", [
("starttag", "a", [("a.b", "v"), ("c:d", "v"), ("e-f", "v")]),