bpo-39011: Preserve line endings within ElementTree attributes (GH-18468)
* bpo-39011: Preserve line endings within attributes
Line endings within attributes were previously normalized to "\n" in Py3.7/3.8.
This patch removes that normalization, as line endings which were
replaced by entity numbers should be preserved in original form.
diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py
index 785edb7..d01649d 100644
--- a/Lib/test/test_xml_etree.py
+++ b/Lib/test/test_xml_etree.py
@@ -430,13 +430,14 @@
self.assertEqual(ET.tostring(elem),
b'<test testa="testval" testb="test1" testc="test2">aa</test>')
+ # Test preserving white space chars in attributes
elem = ET.Element('test')
elem.set('a', '\r')
elem.set('b', '\r\n')
elem.set('c', '\t\n\r ')
- elem.set('d', '\n\n')
+ elem.set('d', '\n\n\r\r\t\t ')
self.assertEqual(ET.tostring(elem),
- b'<test a=" " b=" " c="	 " d=" " />')
+ b'<test a=" " b=" " c="	 " d=" 		 " />')
def test_makeelement(self):
# Test makeelement handling.
diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py
index c8d898f..da2bcad 100644
--- a/Lib/xml/etree/ElementTree.py
+++ b/Lib/xml/etree/ElementTree.py
@@ -1057,15 +1057,15 @@
text = text.replace(">", ">")
if "\"" in text:
text = text.replace("\"", """)
- # The following business with carriage returns is to satisfy
- # Section 2.11 of the XML specification, stating that
- # CR or CR LN should be replaced with just LN
+ # Although section 2.11 of the XML specification states that CR or
+ # CR LN should be replaced with just LN, it applies only to EOLNs
+ # which take part of organizing file into lines. Within attributes,
+ # we are replacing these with entity numbers, so they do not count.
# http://www.w3.org/TR/REC-xml/#sec-line-ends
- if "\r\n" in text:
- text = text.replace("\r\n", "\n")
+ # The current solution, contained in following six lines, was
+ # discussed in issue 17582 and 39011.
if "\r" in text:
- text = text.replace("\r", "\n")
- #The following four lines are issue 17582
+ text = text.replace("\r", " ")
if "\n" in text:
text = text.replace("\n", " ")
if "\t" in text: