Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 1 | """xmlWriter.py -- Simple XML authoring class""" |
| 2 | |
Behdad Esfahbod | 1ae2959 | 2014-01-14 15:07:50 +0800 | [diff] [blame] | 3 | from __future__ import print_function, division, absolute_import |
Behdad Esfahbod | 30e691e | 2013-11-27 17:27:45 -0500 | [diff] [blame] | 4 | from fontTools.misc.py23 import * |
Behdad Esfahbod | 5cf4008 | 2013-11-27 19:51:59 -0500 | [diff] [blame] | 5 | import sys |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 6 | import string |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 7 | |
| 8 | INDENT = " " |
| 9 | |
jvr | 81b0c2b | 2002-09-09 18:17:12 +0000 | [diff] [blame] | 10 | |
Behdad Esfahbod | e388db5 | 2013-11-28 14:26:58 -0500 | [diff] [blame] | 11 | class XMLWriter(object): |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 12 | |
Behdad Esfahbod | 6962f0c | 2013-11-27 22:47:35 -0500 | [diff] [blame] | 13 | def __init__(self, fileOrPath, indentwhite=INDENT, idlefunc=None): |
jvr | 90beb95 | 2005-01-17 21:34:06 +0000 | [diff] [blame] | 14 | if not hasattr(fileOrPath, "write"): |
Behdad Esfahbod | 6962f0c | 2013-11-27 22:47:35 -0500 | [diff] [blame] | 15 | try: |
| 16 | # Python3 has encoding support. |
Behdad Esfahbod | 6962f0c | 2013-11-27 22:47:35 -0500 | [diff] [blame] | 17 | self.file = open(fileOrPath, "w", encoding="utf-8") |
Behdad Esfahbod | c40e26e | 2013-12-04 00:20:19 -0500 | [diff] [blame] | 18 | except TypeError: |
| 19 | self.file = open(fileOrPath, "w") |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 20 | else: |
| 21 | # assume writable file object |
jvr | 90beb95 | 2005-01-17 21:34:06 +0000 | [diff] [blame] | 22 | self.file = fileOrPath |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 23 | self.indentwhite = indentwhite |
| 24 | self.indentlevel = 0 |
| 25 | self.stack = [] |
| 26 | self.needindent = 1 |
jvr | 33f3327 | 2002-07-23 16:41:08 +0000 | [diff] [blame] | 27 | self.idlefunc = idlefunc |
| 28 | self.idlecounter = 0 |
Behdad Esfahbod | 6962f0c | 2013-11-27 22:47:35 -0500 | [diff] [blame] | 29 | self._writeraw('<?xml version="1.0" encoding="utf-8"?>') |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 30 | self.newline() |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 31 | |
| 32 | def close(self): |
| 33 | self.file.close() |
| 34 | |
Behdad Esfahbod | 6962f0c | 2013-11-27 22:47:35 -0500 | [diff] [blame] | 35 | def write(self, string, indent=True): |
| 36 | """Writes text.""" |
| 37 | self._writeraw(escape(string), indent=indent) |
| 38 | |
| 39 | def writecdata(self, string): |
| 40 | """Writes text in a CDATA section.""" |
| 41 | self._writeraw("<![CDATA[" + string + "]]>") |
| 42 | |
Behdad Esfahbod | 1edfe57 | 2013-11-28 18:48:15 -0500 | [diff] [blame] | 43 | def write8bit(self, data, strip=False): |
Behdad Esfahbod | 6962f0c | 2013-11-27 22:47:35 -0500 | [diff] [blame] | 44 | """Writes a bytes() sequence into the XML, escaping |
| 45 | non-ASCII bytes. When this is read in xmlReader, |
| 46 | the original bytes can be recovered by encoding to |
Behdad Esfahbod | ca80208 | 2013-11-28 13:41:54 -0500 | [diff] [blame] | 47 | 'latin-1'.""" |
Behdad Esfahbod | 1edfe57 | 2013-11-28 18:48:15 -0500 | [diff] [blame] | 48 | self._writeraw(escape8bit(data), strip=strip) |
Behdad Esfahbod | ca80208 | 2013-11-28 13:41:54 -0500 | [diff] [blame] | 49 | |
Behdad Esfahbod | 1edfe57 | 2013-11-28 18:48:15 -0500 | [diff] [blame] | 50 | def write16bit(self, data, strip=False): |
| 51 | self._writeraw(escape16bit(data), strip=strip) |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 52 | |
Behdad Esfahbod | 6962f0c | 2013-11-27 22:47:35 -0500 | [diff] [blame] | 53 | def write_noindent(self, string): |
| 54 | """Writes text without indentation.""" |
| 55 | self._writeraw(escape(string), indent=False) |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 56 | |
Behdad Esfahbod | 1edfe57 | 2013-11-28 18:48:15 -0500 | [diff] [blame] | 57 | def _writeraw(self, data, indent=True, strip=False): |
Behdad Esfahbod | 6962f0c | 2013-11-27 22:47:35 -0500 | [diff] [blame] | 58 | """Writes bytes, possibly indented.""" |
| 59 | if indent and self.needindent: |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 60 | self.file.write(self.indentlevel * self.indentwhite) |
| 61 | self.needindent = 0 |
Behdad Esfahbod | 1edfe57 | 2013-11-28 18:48:15 -0500 | [diff] [blame] | 62 | s = tostr(data, encoding="utf-8") |
| 63 | if (strip): |
| 64 | s = s.strip() |
| 65 | self.file.write(s) |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 66 | |
| 67 | def newline(self): |
| 68 | self.file.write("\n") |
| 69 | self.needindent = 1 |
jvr | 33f3327 | 2002-07-23 16:41:08 +0000 | [diff] [blame] | 70 | idlecounter = self.idlecounter |
| 71 | if not idlecounter % 100 and self.idlefunc is not None: |
| 72 | self.idlefunc() |
| 73 | self.idlecounter = idlecounter + 1 |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 74 | |
| 75 | def comment(self, data): |
| 76 | data = escape(data) |
Behdad Esfahbod | 14fb031 | 2013-11-27 05:47:34 -0500 | [diff] [blame] | 77 | lines = data.split("\n") |
Behdad Esfahbod | 6962f0c | 2013-11-27 22:47:35 -0500 | [diff] [blame] | 78 | self._writeraw("<!-- " + lines[0]) |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 79 | for line in lines[1:]: |
| 80 | self.newline() |
Behdad Esfahbod | 6962f0c | 2013-11-27 22:47:35 -0500 | [diff] [blame] | 81 | self._writeraw(" " + line) |
| 82 | self._writeraw(" -->") |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 83 | |
| 84 | def simpletag(self, _TAG_, *args, **kwargs): |
Behdad Esfahbod | 66214cb | 2013-11-27 02:18:18 -0500 | [diff] [blame] | 85 | attrdata = self.stringifyattrs(*args, **kwargs) |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 86 | data = "<%s%s/>" % (_TAG_, attrdata) |
Behdad Esfahbod | 6962f0c | 2013-11-27 22:47:35 -0500 | [diff] [blame] | 87 | self._writeraw(data) |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 88 | |
| 89 | def begintag(self, _TAG_, *args, **kwargs): |
Behdad Esfahbod | 66214cb | 2013-11-27 02:18:18 -0500 | [diff] [blame] | 90 | attrdata = self.stringifyattrs(*args, **kwargs) |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 91 | data = "<%s%s>" % (_TAG_, attrdata) |
Behdad Esfahbod | 6962f0c | 2013-11-27 22:47:35 -0500 | [diff] [blame] | 92 | self._writeraw(data) |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 93 | self.stack.append(_TAG_) |
| 94 | self.indent() |
| 95 | |
| 96 | def endtag(self, _TAG_): |
| 97 | assert self.stack and self.stack[-1] == _TAG_, "nonmatching endtag" |
| 98 | del self.stack[-1] |
| 99 | self.dedent() |
| 100 | data = "</%s>" % _TAG_ |
Behdad Esfahbod | 6962f0c | 2013-11-27 22:47:35 -0500 | [diff] [blame] | 101 | self._writeraw(data) |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 102 | |
| 103 | def dumphex(self, data): |
| 104 | linelength = 16 |
| 105 | hexlinelength = linelength * 2 |
| 106 | chunksize = 8 |
| 107 | for i in range(0, len(data), linelength): |
| 108 | hexline = hexStr(data[i:i+linelength]) |
| 109 | line = "" |
| 110 | white = "" |
| 111 | for j in range(0, hexlinelength, chunksize): |
| 112 | line = line + white + hexline[j:j+chunksize] |
| 113 | white = " " |
Behdad Esfahbod | 6962f0c | 2013-11-27 22:47:35 -0500 | [diff] [blame] | 114 | self._writeraw(line) |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 115 | self.newline() |
| 116 | |
| 117 | def indent(self): |
| 118 | self.indentlevel = self.indentlevel + 1 |
| 119 | |
| 120 | def dedent(self): |
| 121 | assert self.indentlevel > 0 |
| 122 | self.indentlevel = self.indentlevel - 1 |
| 123 | |
| 124 | def stringifyattrs(self, *args, **kwargs): |
| 125 | if kwargs: |
| 126 | assert not args |
Behdad Esfahbod | ac1b435 | 2013-11-27 04:15:34 -0500 | [diff] [blame] | 127 | attributes = sorted(kwargs.items()) |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 128 | elif args: |
| 129 | assert len(args) == 1 |
| 130 | attributes = args[0] |
| 131 | else: |
| 132 | return "" |
| 133 | data = "" |
| 134 | for attr, value in attributes: |
| 135 | data = data + ' %s="%s"' % (attr, escapeattr(str(value))) |
| 136 | return data |
| 137 | |
| 138 | |
| 139 | def escape(data): |
Behdad Esfahbod | 6962f0c | 2013-11-27 22:47:35 -0500 | [diff] [blame] | 140 | data = tostr(data, 'utf-8') |
Behdad Esfahbod | 14fb031 | 2013-11-27 05:47:34 -0500 | [diff] [blame] | 141 | data = data.replace("&", "&") |
| 142 | data = data.replace("<", "<") |
Behdad Esfahbod | 6962f0c | 2013-11-27 22:47:35 -0500 | [diff] [blame] | 143 | data = data.replace(">", ">") |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 144 | return data |
| 145 | |
| 146 | def escapeattr(data): |
Behdad Esfahbod | 5cf4008 | 2013-11-27 19:51:59 -0500 | [diff] [blame] | 147 | data = escape(data) |
Behdad Esfahbod | 14fb031 | 2013-11-27 05:47:34 -0500 | [diff] [blame] | 148 | data = data.replace('"', """) |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 149 | return data |
| 150 | |
| 151 | def escape8bit(data): |
Behdad Esfahbod | 6962f0c | 2013-11-27 22:47:35 -0500 | [diff] [blame] | 152 | """Input is Unicode string.""" |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 153 | def escapechar(c): |
Behdad Esfahbod | 6962f0c | 2013-11-27 22:47:35 -0500 | [diff] [blame] | 154 | n = ord(c) |
| 155 | if 32 <= n <= 127 and c not in "<&>": |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 156 | return c |
| 157 | else: |
Behdad Esfahbod | dc7e6f3 | 2013-11-27 02:44:56 -0500 | [diff] [blame] | 158 | return "&#" + repr(n) + ";" |
Behdad Esfahbod | ca80208 | 2013-11-28 13:41:54 -0500 | [diff] [blame] | 159 | return strjoin(map(escapechar, data.decode('latin-1'))) |
| 160 | |
| 161 | def escape16bit(data): |
| 162 | import array |
| 163 | a = array.array("H") |
| 164 | a.fromstring(data) |
| 165 | if sys.byteorder != "big": |
| 166 | a.byteswap() |
| 167 | def escapenum(n, amp=byteord("&"), lt=byteord("<")): |
| 168 | if n == amp: |
| 169 | return "&" |
| 170 | elif n == lt: |
| 171 | return "<" |
| 172 | elif 32 <= n <= 127: |
| 173 | return chr(n) |
| 174 | else: |
| 175 | return "&#" + repr(n) + ";" |
| 176 | return strjoin(map(escapenum, a)) |
| 177 | |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 178 | |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 179 | def hexStr(s): |
| 180 | h = string.hexdigits |
| 181 | r = '' |
| 182 | for c in s: |
Behdad Esfahbod | 319c5fd | 2013-11-27 18:13:48 -0500 | [diff] [blame] | 183 | i = byteord(c) |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 184 | r = r + h[(i >> 4) & 0xF] + h[i & 0xF] |
| 185 | return r |