- SF bug #853506: IP6 address parsing in sgmllib
('[' and ']' were not accepted in unquoted attribute values)
- cleaned up tests of character and entity reference decoding so the
tests cover the documented relationships among handle_charref,
handle_entityref, convert_charref, convert_codepoint, and
convert_entityref, without bringing up Unicode issues that sgmllib
cannot be involved in
diff --git a/Lib/sgmllib.py b/Lib/sgmllib.py
index 194396b..3ab57c2 100644
--- a/Lib/sgmllib.py
+++ b/Lib/sgmllib.py
@@ -33,7 +33,7 @@
tagfind = re.compile('[a-zA-Z][-_.a-zA-Z0-9]*')
attrfind = re.compile(
r'\s*([a-zA-Z_][-:.a-zA-Z_0-9]*)(\s*=\s*'
- r'(\'[^\']*\'|"[^"]*"|[-a-zA-Z0-9./,:;+*%?!&$\(\)_#=~\'"@]*))?')
+ r'(\'[^\']*\'|"[^"]*"|[][\-a-zA-Z0-9./,:;+*%?!&$\(\)_#=~\'"@]*))?')
class SGMLParseError(RuntimeError):
@@ -400,11 +400,11 @@
def handle_charref(self, name):
"""Handle character reference, no need to override."""
- replacement = convert_charref(name)
+ replacement = self.convert_charref(name)
if replacement is None:
self.unknown_charref(name)
else:
- self.handle_data(convert_charref(name))
+ self.handle_data(replacement)
# Definition of entities -- derived classes may override
entitydefs = \