Merged revisions 81500-81501 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/trunk
........
r81500 | victor.stinner | 2010-05-24 23:33:24 +0200 (lun., 24 mai 2010) | 2 lines
Issue #6662: Fix parsing of malformatted charref (&#bad;)
........
r81501 | victor.stinner | 2010-05-24 23:37:28 +0200 (lun., 24 mai 2010) | 2 lines
Add the author of the last fix (Issue #6662)
........
diff --git a/Lib/HTMLParser.py b/Lib/HTMLParser.py
index 2cbc2ec..7cee47a 100644
--- a/Lib/HTMLParser.py
+++ b/Lib/HTMLParser.py
@@ -175,6 +175,9 @@
i = self.updatepos(i, k)
continue
else:
+ if ";" in rawdata[i:]: #bail by consuming &#
+ self.handle_data(rawdata[0:2])
+ i = self.updatepos(i, 2)
break
elif startswith('&', i):
match = entityref.match(rawdata, i)
diff --git a/Lib/test/test_htmlparser.py b/Lib/test/test_htmlparser.py
index 810af6c..c45cf00 100755
--- a/Lib/test/test_htmlparser.py
+++ b/Lib/test/test_htmlparser.py
@@ -313,6 +313,13 @@
("starttag", "html", [("foo", u"\u20AC&aa&unsupported;")])
])
+ def test_malformatted_charref(self):
+ self._run_check("<p>&#bad;</p>", [
+ ("starttag", "p", []),
+ ("data", "&#bad;"),
+ ("endtag", "p"),
+ ])
+
def test_main():
test_support.run_unittest(HTMLParserTestCase)
diff --git a/Misc/ACKS b/Misc/ACKS
index efaa20f..94a22a8 100644
--- a/Misc/ACKS
+++ b/Misc/ACKS
@@ -191,7 +191,7 @@
Andy Dustman
Gary Duzan
Eugene Dvurechenski
-Josip Dzolonga
+Josip Dzolonga
Maxim Dzumanenko
Walter Dörwald
Hans Eckardt
@@ -812,3 +812,4 @@
Tarek ZiadĀ
Peter Åstrand
Jesse Noller
+Fredrik Håård
diff --git a/Misc/NEWS b/Misc/NEWS
index badd19a..eba95a0 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -55,6 +55,9 @@
Library
-------
+- Issue #6662: Fix parsing of malformatted charref (&#bad;), patch written by
+ Fredrik Håård
+
- Issue #1628205: Socket file objects returned by socket.socket.makefile() now
properly handles EINTR within the read, readline, write & flush methods.
The socket.sendall() method now properly handles interrupted system calls.