py23 XML encoding fixes
Name table entries that are Unicode are written out as native Unicode
now text in the XML now.
diff --git a/Lib/fontTools/misc/xmlWriter.py b/Lib/fontTools/misc/xmlWriter.py
index 116467d..70a6a85 100644
--- a/Lib/fontTools/misc/xmlWriter.py
+++ b/Lib/fontTools/misc/xmlWriter.py
@@ -11,9 +11,13 @@
class XMLWriter:
- def __init__(self, fileOrPath, indentwhite=INDENT, idlefunc=None, encoding="utf-8"):
+ def __init__(self, fileOrPath, indentwhite=INDENT, idlefunc=None):
if not hasattr(fileOrPath, "write"):
- self.file = open(fileOrPath, "w")
+ try:
+ # Python3 has encoding support.
+ self.file = open(fileOrPath, "w")
+ except TypeError:
+ self.file = open(fileOrPath, "w", encoding="utf-8")
else:
# assume writable file object
self.file = fileOrPath
@@ -23,32 +27,44 @@
self.needindent = 1
self.idlefunc = idlefunc
self.idlecounter = 0
- if encoding:
- self.writeraw('<?xml version="1.0" encoding="%s"?>' % encoding)
- else:
- self.writeraw('<?xml version="1.0"?>')
+ self._writeraw('<?xml version="1.0" encoding="utf-8"?>')
self.newline()
def close(self):
self.file.close()
- def write(self, data):
- self.writeraw(escape(data))
-
- def write_noindent(self, data):
- self.file.write(escape(data))
-
+ def write(self, string, indent=True):
+ """Writes text."""
+ self._writeraw(escape(string), indent=indent)
+
+ def writecdata(self, string):
+ """Writes text in a CDATA section."""
+ self._writeraw("<![CDATA[" + string + "]]>")
+
+ def writeutf16be(self, data):
+ """Writes a UTF-16 bytes() sequence into the XML
+ as native Unicode. When this is read in xmlReader,
+ the original bytes can be recovered by encoding to
+ 'utf-16-be'."""
+ self._writeraw(escape(data.decode('utf-16-be')))
+
def write8bit(self, data):
- self.writeraw(escape8bit(data))
+ """Writes a bytes() sequence into the XML, escaping
+ non-ASCII bytes. When this is read in xmlReader,
+ the original bytes can be recovered by encoding to
+ 'latin-1'."""
+ self._writeraw(escape8bit(data.decode('latin-1')))
- def write16bit(self, data):
- self.writeraw(escape16bit(data))
+ def write_noindent(self, string):
+ """Writes text without indentation."""
+ self._writeraw(escape(string), indent=False)
- def writeraw(self, data):
- if self.needindent:
+ def _writeraw(self, data, indent=True):
+ """Writes bytes, possibly indented."""
+ if indent and self.needindent:
self.file.write(self.indentlevel * self.indentwhite)
self.needindent = 0
- self.file.write(data)
+ self.file.write(tostr(data, encoding="utf-8"))
def newline(self):
self.file.write("\n")
@@ -61,21 +77,21 @@
def comment(self, data):
data = escape(data)
lines = data.split("\n")
- self.writeraw("<!-- " + lines[0])
+ self._writeraw("<!-- " + lines[0])
for line in lines[1:]:
self.newline()
- self.writeraw(" " + line)
- self.writeraw(" -->")
+ self._writeraw(" " + line)
+ self._writeraw(" -->")
def simpletag(self, _TAG_, *args, **kwargs):
attrdata = self.stringifyattrs(*args, **kwargs)
data = "<%s%s/>" % (_TAG_, attrdata)
- self.writeraw(data)
+ self._writeraw(data)
def begintag(self, _TAG_, *args, **kwargs):
attrdata = self.stringifyattrs(*args, **kwargs)
data = "<%s%s>" % (_TAG_, attrdata)
- self.writeraw(data)
+ self._writeraw(data)
self.stack.append(_TAG_)
self.indent()
@@ -84,7 +100,7 @@
del self.stack[-1]
self.dedent()
data = "</%s>" % _TAG_
- self.writeraw(data)
+ self._writeraw(data)
def dumphex(self, data):
linelength = 16
@@ -97,7 +113,7 @@
for j in range(0, hexlinelength, chunksize):
line = line + white + hexline[j:j+chunksize]
white = " "
- self.writeraw(line)
+ self._writeraw(line)
self.newline()
def indent(self):
@@ -123,9 +139,10 @@
def escape(data):
- data = tostr(data)
+ data = tostr(data, 'utf-8')
data = data.replace("&", "&")
data = data.replace("<", "<")
+ data = data.replace(">", ">")
return data
def escapeattr(data):
@@ -134,38 +151,15 @@
return data
def escape8bit(data):
- data = tostr(data)
+ """Input is Unicode string."""
def escapechar(c):
- n = byteord(c)
- if c in "<&":
- if c == "&":
- return "&"
- else:
- return "<"
- elif 32 <= n <= 127:
+ n = ord(c)
+ if 32 <= n <= 127 and c not in "<&>":
return c
else:
return "&#" + repr(n) + ";"
return strjoin(map(escapechar, data))
-def escape16bit(data):
- import array
- a = array.array("H")
- a.fromstring(data)
- if sys.byteorder != "big":
- a.byteswap()
- def escapenum(n, amp=byteord("&"), lt=byteord("<")):
- if n == amp:
- return "&"
- elif n == lt:
- return "<"
- elif 32 <= n <= 127:
- return chr(n)
- else:
- return "&#" + repr(n) + ";"
- return strjoin(map(escapenum, a))
-
-
def hexStr(s):
h = string.hexdigits
r = ''
@@ -173,4 +167,3 @@
i = byteord(c)
r = r + h[(i >> 4) & 0xF] + h[i & 0xF]
return r
-
diff --git a/Lib/fontTools/ttLib/tables/S_V_G_.py b/Lib/fontTools/ttLib/tables/S_V_G_.py
index ed3f4e4..92dd57e 100644
--- a/Lib/fontTools/ttLib/tables/S_V_G_.py
+++ b/Lib/fontTools/ttLib/tables/S_V_G_.py
@@ -253,7 +253,7 @@
for doc, startGID, endGID in self.docList:
writer.begintag("svgDoc", startGlyphID=startGID, endGlyphID=endGID)
writer.newline()
- writer.writeraw("<![CDATA["+ doc + "]]>")
+ writer.writecdata("<![CDATA[" + doc + "]]>")
writer.newline()
writer.endtag("svgDoc")
writer.newline()
@@ -263,7 +263,7 @@
writer.newline()
for uiNameID in self.colorPalettes.colorParamUINameIDs:
writer.begintag("colorParamUINameID")
- writer.writeraw(str(uiNameID))
+ writer.write(uiNameID)
writer.endtag("colorParamUINameID")
writer.newline()
for colorPalette in self.colorPalettes.colorPaletteList:
diff --git a/Lib/fontTools/ttLib/tables/_n_a_m_e.py b/Lib/fontTools/ttLib/tables/_n_a_m_e.py
index 12a0964..001f74f 100644
--- a/Lib/fontTools/ttLib/tables/_n_a_m_e.py
+++ b/Lib/fontTools/ttLib/tables/_n_a_m_e.py
@@ -60,6 +60,7 @@
if name.string in done:
name.offset, name.length = done[name.string]
else:
+ # TODO Convert to UTF-16?
name.offset, name.length = done[name.string] = len(stringData), len(name.string)
stringData = stringData + name.string
data = data + sstruct.pack(nameRecordFormat, name)
@@ -98,13 +99,13 @@
("langID", hex(self.langID)),
])
writer.newline()
- if self.platformID == 0 or (self.platformID == 3 and self.platEncID in (0, 1)):
+ if self.platformID == 0 or (self.platformID == 3 and self.platEncID in (0, 1, 10)):
+ string = self.string
if len(self.string) % 2:
# no, shouldn't happen, but some of the Apple
# tools cause this anyway :-(
- writer.write16bit(self.string + "\0")
- else:
- writer.write16bit(self.string)
+ string = string + b'\0'
+ writer.writeutf16be(string)
else:
writer.write8bit(self.string)
writer.newline()
@@ -117,9 +118,11 @@
self.platEncID = safeEval(attrs["platEncID"])
self.langID = safeEval(attrs["langID"])
s = strjoin(content).strip()
- if self.platformID == 0 or (self.platformID == 3 and self.platEncID in (0, 1)):
- self.string = s.encode("utf_16_be")
+ if self.platformID == 0 or (self.platformID == 3 and self.platEncID in (0, 1, 10)):
+ # This is the inverse of writeutf16be.
+ self.string = s.encode("utf-16-be")
else:
+ # This is the inverse of write8bit...
self.string = s.encode("latin1")
def __lt__(self, other):