py23 XML encoding fixes
Name table entries that are Unicode are written out as native Unicode
now text in the XML now.
diff --git a/Lib/fontTools/misc/xmlWriter.py b/Lib/fontTools/misc/xmlWriter.py
index 116467d..70a6a85 100644
--- a/Lib/fontTools/misc/xmlWriter.py
+++ b/Lib/fontTools/misc/xmlWriter.py
@@ -11,9 +11,13 @@
class XMLWriter:
- def __init__(self, fileOrPath, indentwhite=INDENT, idlefunc=None, encoding="utf-8"):
+ def __init__(self, fileOrPath, indentwhite=INDENT, idlefunc=None):
if not hasattr(fileOrPath, "write"):
- self.file = open(fileOrPath, "w")
+ try:
+ # Python3 has encoding support.
+ self.file = open(fileOrPath, "w")
+ except TypeError:
+ self.file = open(fileOrPath, "w", encoding="utf-8")
else:
# assume writable file object
self.file = fileOrPath
@@ -23,32 +27,44 @@
self.needindent = 1
self.idlefunc = idlefunc
self.idlecounter = 0
- if encoding:
- self.writeraw('<?xml version="1.0" encoding="%s"?>' % encoding)
- else:
- self.writeraw('<?xml version="1.0"?>')
+ self._writeraw('<?xml version="1.0" encoding="utf-8"?>')
self.newline()
def close(self):
self.file.close()
- def write(self, data):
- self.writeraw(escape(data))
-
- def write_noindent(self, data):
- self.file.write(escape(data))
-
+ def write(self, string, indent=True):
+ """Writes text."""
+ self._writeraw(escape(string), indent=indent)
+
+ def writecdata(self, string):
+ """Writes text in a CDATA section."""
+ self._writeraw("<![CDATA[" + string + "]]>")
+
+ def writeutf16be(self, data):
+ """Writes a UTF-16 bytes() sequence into the XML
+ as native Unicode. When this is read in xmlReader,
+ the original bytes can be recovered by encoding to
+ 'utf-16-be'."""
+ self._writeraw(escape(data.decode('utf-16-be')))
+
def write8bit(self, data):
- self.writeraw(escape8bit(data))
+ """Writes a bytes() sequence into the XML, escaping
+ non-ASCII bytes. When this is read in xmlReader,
+ the original bytes can be recovered by encoding to
+ 'latin-1'."""
+ self._writeraw(escape8bit(data.decode('latin-1')))
- def write16bit(self, data):
- self.writeraw(escape16bit(data))
+ def write_noindent(self, string):
+ """Writes text without indentation."""
+ self._writeraw(escape(string), indent=False)
- def writeraw(self, data):
- if self.needindent:
+ def _writeraw(self, data, indent=True):
+ """Writes bytes, possibly indented."""
+ if indent and self.needindent:
self.file.write(self.indentlevel * self.indentwhite)
self.needindent = 0
- self.file.write(data)
+ self.file.write(tostr(data, encoding="utf-8"))
def newline(self):
self.file.write("\n")
@@ -61,21 +77,21 @@
def comment(self, data):
data = escape(data)
lines = data.split("\n")
- self.writeraw("<!-- " + lines[0])
+ self._writeraw("<!-- " + lines[0])
for line in lines[1:]:
self.newline()
- self.writeraw(" " + line)
- self.writeraw(" -->")
+ self._writeraw(" " + line)
+ self._writeraw(" -->")
def simpletag(self, _TAG_, *args, **kwargs):
attrdata = self.stringifyattrs(*args, **kwargs)
data = "<%s%s/>" % (_TAG_, attrdata)
- self.writeraw(data)
+ self._writeraw(data)
def begintag(self, _TAG_, *args, **kwargs):
attrdata = self.stringifyattrs(*args, **kwargs)
data = "<%s%s>" % (_TAG_, attrdata)
- self.writeraw(data)
+ self._writeraw(data)
self.stack.append(_TAG_)
self.indent()
@@ -84,7 +100,7 @@
del self.stack[-1]
self.dedent()
data = "</%s>" % _TAG_
- self.writeraw(data)
+ self._writeraw(data)
def dumphex(self, data):
linelength = 16
@@ -97,7 +113,7 @@
for j in range(0, hexlinelength, chunksize):
line = line + white + hexline[j:j+chunksize]
white = " "
- self.writeraw(line)
+ self._writeraw(line)
self.newline()
def indent(self):
@@ -123,9 +139,10 @@
def escape(data):
- data = tostr(data)
+ data = tostr(data, 'utf-8')
data = data.replace("&", "&")
data = data.replace("<", "<")
+ data = data.replace(">", ">")
return data
def escapeattr(data):
@@ -134,38 +151,15 @@
return data
def escape8bit(data):
- data = tostr(data)
+ """Input is Unicode string."""
def escapechar(c):
- n = byteord(c)
- if c in "<&":
- if c == "&":
- return "&"
- else:
- return "<"
- elif 32 <= n <= 127:
+ n = ord(c)
+ if 32 <= n <= 127 and c not in "<&>":
return c
else:
return "&#" + repr(n) + ";"
return strjoin(map(escapechar, data))
-def escape16bit(data):
- import array
- a = array.array("H")
- a.fromstring(data)
- if sys.byteorder != "big":
- a.byteswap()
- def escapenum(n, amp=byteord("&"), lt=byteord("<")):
- if n == amp:
- return "&"
- elif n == lt:
- return "<"
- elif 32 <= n <= 127:
- return chr(n)
- else:
- return "&#" + repr(n) + ";"
- return strjoin(map(escapenum, a))
-
-
def hexStr(s):
h = string.hexdigits
r = ''
@@ -173,4 +167,3 @@
i = byteord(c)
r = r + h[(i >> 4) & 0xF] + h[i & 0xF]
return r
-