Benjamin Peterson | ef3e4c2 | 2009-04-11 19:48:14 +0000 | [diff] [blame] | 1 | r"""plistlib.py -- a tool to generate and parse MacOSX .plist files. |
Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 2 | |
Ezio Melotti | 6e9b1df | 2009-09-16 00:49:03 +0000 | [diff] [blame] | 3 | The property list (.plist) file format is a simple XML pickle supporting |
Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 4 | basic object types, like dictionaries, lists, numbers and strings. |
| 5 | Usually the top level object is a dictionary. |
| 6 | |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 7 | To write out a plist file, use the dump(value, file) |
| 8 | function. 'value' is the top level object, 'file' is |
| 9 | a (writable) file object. |
Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 10 | |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 11 | To parse a plist from a file, use the load(file) function, |
| 12 | with a (readable) file object as the only argument. It |
Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 13 | returns the top level object (again, usually a dictionary). |
| 14 | |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 15 | To work with plist data in bytes objects, you can use loads() |
| 16 | and dumps(). |
Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 17 | |
| 18 | Values can be strings, integers, floats, booleans, tuples, lists, |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 19 | dictionaries (but only with string keys), Data, bytes, bytearray, or |
| 20 | datetime.datetime objects. |
Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 21 | |
| 22 | Generate Plist example: |
| 23 | |
| 24 | pl = dict( |
Ezio Melotti | 6e9b1df | 2009-09-16 00:49:03 +0000 | [diff] [blame] | 25 | aString = "Doodah", |
| 26 | aList = ["A", "B", 12, 32.1, [1, 2, 3]], |
Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 27 | aFloat = 0.1, |
| 28 | anInt = 728, |
Ezio Melotti | 6e9b1df | 2009-09-16 00:49:03 +0000 | [diff] [blame] | 29 | aDict = dict( |
| 30 | anotherString = "<hello & hi there!>", |
| 31 | aUnicodeValue = "M\xe4ssig, Ma\xdf", |
| 32 | aTrueValue = True, |
| 33 | aFalseValue = False, |
Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 34 | ), |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 35 | someData = b"<binary gunk>", |
| 36 | someMoreData = b"<lots of binary gunk>" * 10, |
Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 37 | aDate = datetime.datetime.fromtimestamp(time.mktime(time.gmtime())), |
| 38 | ) |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 39 | with open(fileName, 'wb') as fp: |
| 40 | dump(pl, fp) |
Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 41 | |
| 42 | Parse Plist example: |
| 43 | |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 44 | with open(fileName, 'rb') as fp: |
| 45 | pl = load(fp) |
| 46 | print(pl["aKey"]) |
Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 47 | """ |
Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 48 | __all__ = [ |
| 49 | "readPlist", "writePlist", "readPlistFromBytes", "writePlistToBytes", |
Serhiy Storchaka | edef358 | 2017-05-15 13:21:31 +0300 | [diff] [blame] | 50 | "Data", "InvalidFileException", "FMT_XML", "FMT_BINARY", |
Jon Janzen | c981ad1 | 2019-05-15 22:14:38 +0200 | [diff] [blame] | 51 | "load", "dump", "loads", "dumps", "UID" |
Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 52 | ] |
Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 53 | |
| 54 | import binascii |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 55 | import codecs |
| 56 | import contextlib |
Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 57 | import datetime |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 58 | import enum |
Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 59 | from io import BytesIO |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 60 | import itertools |
| 61 | import os |
Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 62 | import re |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 63 | import struct |
| 64 | from warnings import warn |
| 65 | from xml.parsers.expat import ParserCreate |
Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 66 | |
| 67 | |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 68 | PlistFormat = enum.Enum('PlistFormat', 'FMT_XML FMT_BINARY', module=__name__) |
| 69 | globals().update(PlistFormat.__members__) |
Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 70 | |
| 71 | |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 72 | # |
| 73 | # |
| 74 | # Deprecated functionality |
| 75 | # |
| 76 | # |
Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 77 | |
| 78 | |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 79 | @contextlib.contextmanager |
| 80 | def _maybe_open(pathOrFile, mode): |
| 81 | if isinstance(pathOrFile, str): |
| 82 | with open(pathOrFile, mode) as fp: |
| 83 | yield fp |
Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 84 | |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 85 | else: |
| 86 | yield pathOrFile |
| 87 | |
| 88 | |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 89 | def readPlist(pathOrFile): |
| 90 | """ |
| 91 | Read a .plist from a path or file. pathOrFile should either |
| 92 | be a file name, or a readable binary file object. |
| 93 | |
| 94 | This function is deprecated, use load instead. |
| 95 | """ |
| 96 | warn("The readPlist function is deprecated, use load() instead", |
| 97 | DeprecationWarning, 2) |
| 98 | |
| 99 | with _maybe_open(pathOrFile, 'rb') as fp: |
Serhiy Storchaka | edef358 | 2017-05-15 13:21:31 +0300 | [diff] [blame] | 100 | return load(fp, fmt=None, use_builtin_types=False) |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 101 | |
| 102 | def writePlist(value, pathOrFile): |
| 103 | """ |
| 104 | Write 'value' to a .plist file. 'pathOrFile' may either be a |
| 105 | file name or a (writable) file object. |
| 106 | |
| 107 | This function is deprecated, use dump instead. |
| 108 | """ |
| 109 | warn("The writePlist function is deprecated, use dump() instead", |
| 110 | DeprecationWarning, 2) |
| 111 | with _maybe_open(pathOrFile, 'wb') as fp: |
| 112 | dump(value, fp, fmt=FMT_XML, sort_keys=True, skipkeys=False) |
| 113 | |
| 114 | |
| 115 | def readPlistFromBytes(data): |
| 116 | """ |
| 117 | Read a plist data from a bytes object. Return the root object. |
| 118 | |
| 119 | This function is deprecated, use loads instead. |
| 120 | """ |
| 121 | warn("The readPlistFromBytes function is deprecated, use loads() instead", |
| 122 | DeprecationWarning, 2) |
Serhiy Storchaka | edef358 | 2017-05-15 13:21:31 +0300 | [diff] [blame] | 123 | return load(BytesIO(data), fmt=None, use_builtin_types=False) |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 124 | |
| 125 | |
| 126 | def writePlistToBytes(value): |
| 127 | """ |
| 128 | Return 'value' as a plist-formatted bytes object. |
| 129 | |
| 130 | This function is deprecated, use dumps instead. |
| 131 | """ |
| 132 | warn("The writePlistToBytes function is deprecated, use dumps() instead", |
| 133 | DeprecationWarning, 2) |
| 134 | f = BytesIO() |
| 135 | dump(value, f, fmt=FMT_XML, sort_keys=True, skipkeys=False) |
| 136 | return f.getvalue() |
| 137 | |
Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 138 | |
| 139 | class Data: |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 140 | """ |
| 141 | Wrapper for binary data. |
Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 142 | |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 143 | This class is deprecated, use a bytes object instead. |
| 144 | """ |
Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 145 | |
| 146 | def __init__(self, data): |
| 147 | if not isinstance(data, bytes): |
| 148 | raise TypeError("data must be as bytes") |
| 149 | self.data = data |
| 150 | |
| 151 | @classmethod |
| 152 | def fromBase64(cls, data): |
Georg Brandl | 706824f | 2009-06-04 09:42:55 +0000 | [diff] [blame] | 153 | # base64.decodebytes just calls binascii.a2b_base64; |
Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 154 | # it seems overkill to use both base64 and binascii. |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 155 | return cls(_decode_base64(data)) |
Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 156 | |
| 157 | def asBase64(self, maxlinelength=76): |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 158 | return _encode_base64(self.data, maxlinelength) |
Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 159 | |
| 160 | def __eq__(self, other): |
| 161 | if isinstance(other, self.__class__): |
| 162 | return self.data == other.data |
Serhiy Storchaka | dd1bcdf | 2016-05-01 13:36:16 +0300 | [diff] [blame] | 163 | elif isinstance(other, bytes): |
Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 164 | return self.data == other |
| 165 | else: |
Serhiy Storchaka | dd1bcdf | 2016-05-01 13:36:16 +0300 | [diff] [blame] | 166 | return NotImplemented |
Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 167 | |
| 168 | def __repr__(self): |
| 169 | return "%s(%s)" % (self.__class__.__name__, repr(self.data)) |
| 170 | |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 171 | # |
| 172 | # |
| 173 | # End of deprecated functionality |
| 174 | # |
| 175 | # |
Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 176 | |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 177 | |
Jon Janzen | c981ad1 | 2019-05-15 22:14:38 +0200 | [diff] [blame] | 178 | class UID: |
| 179 | def __init__(self, data): |
| 180 | if not isinstance(data, int): |
| 181 | raise TypeError("data must be an int") |
| 182 | if data >= 1 << 64: |
| 183 | raise ValueError("UIDs cannot be >= 2**64") |
| 184 | if data < 0: |
| 185 | raise ValueError("UIDs must be positive") |
| 186 | self.data = data |
| 187 | |
| 188 | def __index__(self): |
| 189 | return self.data |
| 190 | |
| 191 | def __repr__(self): |
| 192 | return "%s(%s)" % (self.__class__.__name__, repr(self.data)) |
| 193 | |
| 194 | def __reduce__(self): |
| 195 | return self.__class__, (self.data,) |
| 196 | |
| 197 | def __eq__(self, other): |
| 198 | if not isinstance(other, UID): |
| 199 | return NotImplemented |
| 200 | return self.data == other.data |
| 201 | |
| 202 | def __hash__(self): |
| 203 | return hash(self.data) |
| 204 | |
| 205 | |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 206 | # |
| 207 | # XML support |
| 208 | # |
| 209 | |
| 210 | |
| 211 | # XML 'header' |
| 212 | PLISTHEADER = b"""\ |
| 213 | <?xml version="1.0" encoding="UTF-8"?> |
| 214 | <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd"> |
| 215 | """ |
| 216 | |
| 217 | |
| 218 | # Regex to find any control chars, except for \t \n and \r |
| 219 | _controlCharPat = re.compile( |
| 220 | r"[\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0b\x0c\x0e\x0f" |
| 221 | r"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f]") |
| 222 | |
| 223 | def _encode_base64(s, maxlinelength=76): |
| 224 | # copied from base64.encodebytes(), with added maxlinelength argument |
| 225 | maxbinsize = (maxlinelength//4)*3 |
| 226 | pieces = [] |
| 227 | for i in range(0, len(s), maxbinsize): |
| 228 | chunk = s[i : i + maxbinsize] |
| 229 | pieces.append(binascii.b2a_base64(chunk)) |
| 230 | return b''.join(pieces) |
| 231 | |
| 232 | def _decode_base64(s): |
| 233 | if isinstance(s, str): |
| 234 | return binascii.a2b_base64(s.encode("utf-8")) |
| 235 | |
| 236 | else: |
| 237 | return binascii.a2b_base64(s) |
| 238 | |
| 239 | # Contents should conform to a subset of ISO 8601 |
| 240 | # (in particular, YYYY '-' MM '-' DD 'T' HH ':' MM ':' SS 'Z'. Smaller units |
| 241 | # may be omitted with # a loss of precision) |
| 242 | _dateParser = re.compile(r"(?P<year>\d\d\d\d)(?:-(?P<month>\d\d)(?:-(?P<day>\d\d)(?:T(?P<hour>\d\d)(?::(?P<minute>\d\d)(?::(?P<second>\d\d))?)?)?)?)?Z", re.ASCII) |
| 243 | |
| 244 | |
| 245 | def _date_from_string(s): |
| 246 | order = ('year', 'month', 'day', 'hour', 'minute', 'second') |
| 247 | gd = _dateParser.match(s).groupdict() |
| 248 | lst = [] |
| 249 | for key in order: |
| 250 | val = gd[key] |
| 251 | if val is None: |
| 252 | break |
| 253 | lst.append(int(val)) |
| 254 | return datetime.datetime(*lst) |
| 255 | |
| 256 | |
| 257 | def _date_to_string(d): |
| 258 | return '%04d-%02d-%02dT%02d:%02d:%02dZ' % ( |
| 259 | d.year, d.month, d.day, |
| 260 | d.hour, d.minute, d.second |
| 261 | ) |
| 262 | |
| 263 | def _escape(text): |
| 264 | m = _controlCharPat.search(text) |
| 265 | if m is not None: |
| 266 | raise ValueError("strings can't contains control characters; " |
| 267 | "use bytes instead") |
| 268 | text = text.replace("\r\n", "\n") # convert DOS line endings |
| 269 | text = text.replace("\r", "\n") # convert Mac line endings |
| 270 | text = text.replace("&", "&") # escape '&' |
| 271 | text = text.replace("<", "<") # escape '<' |
| 272 | text = text.replace(">", ">") # escape '>' |
| 273 | return text |
| 274 | |
| 275 | class _PlistParser: |
| 276 | def __init__(self, use_builtin_types, dict_type): |
Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 277 | self.stack = [] |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 278 | self.current_key = None |
Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 279 | self.root = None |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 280 | self._use_builtin_types = use_builtin_types |
| 281 | self._dict_type = dict_type |
Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 282 | |
| 283 | def parse(self, fileobj): |
Ned Deily | b8e59f7 | 2011-05-28 02:19:19 -0700 | [diff] [blame] | 284 | self.parser = ParserCreate() |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 285 | self.parser.StartElementHandler = self.handle_begin_element |
| 286 | self.parser.EndElementHandler = self.handle_end_element |
| 287 | self.parser.CharacterDataHandler = self.handle_data |
Ned Deily | b8e59f7 | 2011-05-28 02:19:19 -0700 | [diff] [blame] | 288 | self.parser.ParseFile(fileobj) |
Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 289 | return self.root |
| 290 | |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 291 | def handle_begin_element(self, element, attrs): |
Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 292 | self.data = [] |
| 293 | handler = getattr(self, "begin_" + element, None) |
| 294 | if handler is not None: |
| 295 | handler(attrs) |
| 296 | |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 297 | def handle_end_element(self, element): |
Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 298 | handler = getattr(self, "end_" + element, None) |
| 299 | if handler is not None: |
| 300 | handler() |
| 301 | |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 302 | def handle_data(self, data): |
Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 303 | self.data.append(data) |
| 304 | |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 305 | def add_object(self, value): |
| 306 | if self.current_key is not None: |
Ned Deily | b8e59f7 | 2011-05-28 02:19:19 -0700 | [diff] [blame] | 307 | if not isinstance(self.stack[-1], type({})): |
| 308 | raise ValueError("unexpected element at line %d" % |
| 309 | self.parser.CurrentLineNumber) |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 310 | self.stack[-1][self.current_key] = value |
| 311 | self.current_key = None |
Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 312 | elif not self.stack: |
| 313 | # this is the root object |
| 314 | self.root = value |
| 315 | else: |
Ned Deily | b8e59f7 | 2011-05-28 02:19:19 -0700 | [diff] [blame] | 316 | if not isinstance(self.stack[-1], type([])): |
| 317 | raise ValueError("unexpected element at line %d" % |
| 318 | self.parser.CurrentLineNumber) |
Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 319 | self.stack[-1].append(value) |
| 320 | |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 321 | def get_data(self): |
Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 322 | data = ''.join(self.data) |
| 323 | self.data = [] |
| 324 | return data |
| 325 | |
| 326 | # element handlers |
| 327 | |
| 328 | def begin_dict(self, attrs): |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 329 | d = self._dict_type() |
| 330 | self.add_object(d) |
Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 331 | self.stack.append(d) |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 332 | |
Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 333 | def end_dict(self): |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 334 | if self.current_key: |
Ned Deily | b8e59f7 | 2011-05-28 02:19:19 -0700 | [diff] [blame] | 335 | raise ValueError("missing value for key '%s' at line %d" % |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 336 | (self.current_key,self.parser.CurrentLineNumber)) |
Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 337 | self.stack.pop() |
| 338 | |
| 339 | def end_key(self): |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 340 | if self.current_key or not isinstance(self.stack[-1], type({})): |
Ned Deily | b8e59f7 | 2011-05-28 02:19:19 -0700 | [diff] [blame] | 341 | raise ValueError("unexpected key at line %d" % |
| 342 | self.parser.CurrentLineNumber) |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 343 | self.current_key = self.get_data() |
Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 344 | |
| 345 | def begin_array(self, attrs): |
| 346 | a = [] |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 347 | self.add_object(a) |
Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 348 | self.stack.append(a) |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 349 | |
Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 350 | def end_array(self): |
| 351 | self.stack.pop() |
| 352 | |
| 353 | def end_true(self): |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 354 | self.add_object(True) |
| 355 | |
Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 356 | def end_false(self): |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 357 | self.add_object(False) |
| 358 | |
Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 359 | def end_integer(self): |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 360 | self.add_object(int(self.get_data())) |
| 361 | |
Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 362 | def end_real(self): |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 363 | self.add_object(float(self.get_data())) |
| 364 | |
Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 365 | def end_string(self): |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 366 | self.add_object(self.get_data()) |
| 367 | |
Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 368 | def end_data(self): |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 369 | if self._use_builtin_types: |
| 370 | self.add_object(_decode_base64(self.get_data())) |
| 371 | |
| 372 | else: |
| 373 | self.add_object(Data.fromBase64(self.get_data())) |
| 374 | |
Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 375 | def end_date(self): |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 376 | self.add_object(_date_from_string(self.get_data())) |
| 377 | |
| 378 | |
| 379 | class _DumbXMLWriter: |
| 380 | def __init__(self, file, indent_level=0, indent="\t"): |
| 381 | self.file = file |
| 382 | self.stack = [] |
| 383 | self._indent_level = indent_level |
| 384 | self.indent = indent |
| 385 | |
| 386 | def begin_element(self, element): |
| 387 | self.stack.append(element) |
| 388 | self.writeln("<%s>" % element) |
| 389 | self._indent_level += 1 |
| 390 | |
| 391 | def end_element(self, element): |
| 392 | assert self._indent_level > 0 |
| 393 | assert self.stack.pop() == element |
| 394 | self._indent_level -= 1 |
| 395 | self.writeln("</%s>" % element) |
| 396 | |
| 397 | def simple_element(self, element, value=None): |
| 398 | if value is not None: |
| 399 | value = _escape(value) |
| 400 | self.writeln("<%s>%s</%s>" % (element, value, element)) |
| 401 | |
| 402 | else: |
| 403 | self.writeln("<%s/>" % element) |
| 404 | |
| 405 | def writeln(self, line): |
| 406 | if line: |
| 407 | # plist has fixed encoding of utf-8 |
| 408 | |
| 409 | # XXX: is this test needed? |
| 410 | if isinstance(line, str): |
| 411 | line = line.encode('utf-8') |
| 412 | self.file.write(self._indent_level * self.indent) |
| 413 | self.file.write(line) |
| 414 | self.file.write(b'\n') |
| 415 | |
| 416 | |
| 417 | class _PlistWriter(_DumbXMLWriter): |
| 418 | def __init__( |
| 419 | self, file, indent_level=0, indent=b"\t", writeHeader=1, |
| 420 | sort_keys=True, skipkeys=False): |
| 421 | |
| 422 | if writeHeader: |
| 423 | file.write(PLISTHEADER) |
| 424 | _DumbXMLWriter.__init__(self, file, indent_level, indent) |
| 425 | self._sort_keys = sort_keys |
| 426 | self._skipkeys = skipkeys |
| 427 | |
| 428 | def write(self, value): |
| 429 | self.writeln("<plist version=\"1.0\">") |
| 430 | self.write_value(value) |
| 431 | self.writeln("</plist>") |
| 432 | |
| 433 | def write_value(self, value): |
| 434 | if isinstance(value, str): |
| 435 | self.simple_element("string", value) |
| 436 | |
| 437 | elif value is True: |
| 438 | self.simple_element("true") |
| 439 | |
| 440 | elif value is False: |
| 441 | self.simple_element("false") |
| 442 | |
| 443 | elif isinstance(value, int): |
Ronald Oussoren | 6db6653 | 2014-01-15 11:32:35 +0100 | [diff] [blame] | 444 | if -1 << 63 <= value < 1 << 64: |
| 445 | self.simple_element("integer", "%d" % value) |
| 446 | else: |
| 447 | raise OverflowError(value) |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 448 | |
| 449 | elif isinstance(value, float): |
| 450 | self.simple_element("real", repr(value)) |
| 451 | |
| 452 | elif isinstance(value, dict): |
| 453 | self.write_dict(value) |
| 454 | |
| 455 | elif isinstance(value, Data): |
| 456 | self.write_data(value) |
| 457 | |
| 458 | elif isinstance(value, (bytes, bytearray)): |
| 459 | self.write_bytes(value) |
| 460 | |
| 461 | elif isinstance(value, datetime.datetime): |
| 462 | self.simple_element("date", _date_to_string(value)) |
| 463 | |
| 464 | elif isinstance(value, (tuple, list)): |
| 465 | self.write_array(value) |
| 466 | |
| 467 | else: |
| 468 | raise TypeError("unsupported type: %s" % type(value)) |
| 469 | |
| 470 | def write_data(self, data): |
| 471 | self.write_bytes(data.data) |
| 472 | |
| 473 | def write_bytes(self, data): |
| 474 | self.begin_element("data") |
| 475 | self._indent_level -= 1 |
| 476 | maxlinelength = max( |
| 477 | 16, |
| 478 | 76 - len(self.indent.replace(b"\t", b" " * 8) * self._indent_level)) |
| 479 | |
| 480 | for line in _encode_base64(data, maxlinelength).split(b"\n"): |
| 481 | if line: |
| 482 | self.writeln(line) |
| 483 | self._indent_level += 1 |
| 484 | self.end_element("data") |
| 485 | |
| 486 | def write_dict(self, d): |
| 487 | if d: |
| 488 | self.begin_element("dict") |
| 489 | if self._sort_keys: |
| 490 | items = sorted(d.items()) |
| 491 | else: |
| 492 | items = d.items() |
| 493 | |
| 494 | for key, value in items: |
| 495 | if not isinstance(key, str): |
| 496 | if self._skipkeys: |
| 497 | continue |
| 498 | raise TypeError("keys must be strings") |
| 499 | self.simple_element("key", key) |
| 500 | self.write_value(value) |
| 501 | self.end_element("dict") |
| 502 | |
| 503 | else: |
| 504 | self.simple_element("dict") |
| 505 | |
| 506 | def write_array(self, array): |
| 507 | if array: |
| 508 | self.begin_element("array") |
| 509 | for value in array: |
| 510 | self.write_value(value) |
| 511 | self.end_element("array") |
| 512 | |
| 513 | else: |
| 514 | self.simple_element("array") |
| 515 | |
| 516 | |
| 517 | def _is_fmt_xml(header): |
| 518 | prefixes = (b'<?xml', b'<plist') |
| 519 | |
| 520 | for pfx in prefixes: |
| 521 | if header.startswith(pfx): |
| 522 | return True |
| 523 | |
| 524 | # Also check for alternative XML encodings, this is slightly |
| 525 | # overkill because the Apple tools (and plistlib) will not |
| 526 | # generate files with these encodings. |
| 527 | for bom, encoding in ( |
| 528 | (codecs.BOM_UTF8, "utf-8"), |
| 529 | (codecs.BOM_UTF16_BE, "utf-16-be"), |
| 530 | (codecs.BOM_UTF16_LE, "utf-16-le"), |
| 531 | # expat does not support utf-32 |
| 532 | #(codecs.BOM_UTF32_BE, "utf-32-be"), |
| 533 | #(codecs.BOM_UTF32_LE, "utf-32-le"), |
| 534 | ): |
| 535 | if not header.startswith(bom): |
| 536 | continue |
| 537 | |
| 538 | for start in prefixes: |
| 539 | prefix = bom + start.decode('ascii').encode(encoding) |
| 540 | if header[:len(prefix)] == prefix: |
| 541 | return True |
| 542 | |
| 543 | return False |
| 544 | |
| 545 | # |
| 546 | # Binary Plist |
| 547 | # |
| 548 | |
| 549 | |
| 550 | class InvalidFileException (ValueError): |
| 551 | def __init__(self, message="Invalid file"): |
| 552 | ValueError.__init__(self, message) |
| 553 | |
| 554 | _BINARY_FORMAT = {1: 'B', 2: 'H', 4: 'L', 8: 'Q'} |
| 555 | |
Serhiy Storchaka | a897aee | 2017-11-30 23:26:11 +0200 | [diff] [blame] | 556 | _undefined = object() |
| 557 | |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 558 | class _BinaryPlistParser: |
| 559 | """ |
| 560 | Read or write a binary plist file, following the description of the binary |
| 561 | format. Raise InvalidFileException in case of error, otherwise return the |
| 562 | root object. |
| 563 | |
| 564 | see also: http://opensource.apple.com/source/CF/CF-744.18/CFBinaryPList.c |
| 565 | """ |
| 566 | def __init__(self, use_builtin_types, dict_type): |
| 567 | self._use_builtin_types = use_builtin_types |
| 568 | self._dict_type = dict_type |
| 569 | |
| 570 | def parse(self, fp): |
| 571 | try: |
| 572 | # The basic file format: |
| 573 | # HEADER |
| 574 | # object... |
| 575 | # refid->offset... |
| 576 | # TRAILER |
| 577 | self._fp = fp |
| 578 | self._fp.seek(-32, os.SEEK_END) |
| 579 | trailer = self._fp.read(32) |
| 580 | if len(trailer) != 32: |
| 581 | raise InvalidFileException() |
| 582 | ( |
| 583 | offset_size, self._ref_size, num_objects, top_object, |
| 584 | offset_table_offset |
| 585 | ) = struct.unpack('>6xBBQQQ', trailer) |
| 586 | self._fp.seek(offset_table_offset) |
Serhiy Storchaka | 0652664 | 2014-05-23 16:13:33 +0300 | [diff] [blame] | 587 | self._object_offsets = self._read_ints(num_objects, offset_size) |
Serhiy Storchaka | a897aee | 2017-11-30 23:26:11 +0200 | [diff] [blame] | 588 | self._objects = [_undefined] * num_objects |
| 589 | return self._read_object(top_object) |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 590 | |
Serhiy Storchaka | db91e0f | 2017-10-31 14:05:53 +0200 | [diff] [blame] | 591 | except (OSError, IndexError, struct.error, OverflowError, |
| 592 | UnicodeDecodeError): |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 593 | raise InvalidFileException() |
| 594 | |
| 595 | def _get_size(self, tokenL): |
| 596 | """ return the size of the next object.""" |
| 597 | if tokenL == 0xF: |
| 598 | m = self._fp.read(1)[0] & 0x3 |
| 599 | s = 1 << m |
| 600 | f = '>' + _BINARY_FORMAT[s] |
| 601 | return struct.unpack(f, self._fp.read(s))[0] |
| 602 | |
| 603 | return tokenL |
| 604 | |
Serhiy Storchaka | 0652664 | 2014-05-23 16:13:33 +0300 | [diff] [blame] | 605 | def _read_ints(self, n, size): |
| 606 | data = self._fp.read(size * n) |
| 607 | if size in _BINARY_FORMAT: |
| 608 | return struct.unpack('>' + _BINARY_FORMAT[size] * n, data) |
| 609 | else: |
Serhiy Storchaka | db91e0f | 2017-10-31 14:05:53 +0200 | [diff] [blame] | 610 | if not size or len(data) != size * n: |
| 611 | raise InvalidFileException() |
Serhiy Storchaka | 0652664 | 2014-05-23 16:13:33 +0300 | [diff] [blame] | 612 | return tuple(int.from_bytes(data[i: i + size], 'big') |
| 613 | for i in range(0, size * n, size)) |
| 614 | |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 615 | def _read_refs(self, n): |
Serhiy Storchaka | 0652664 | 2014-05-23 16:13:33 +0300 | [diff] [blame] | 616 | return self._read_ints(n, self._ref_size) |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 617 | |
Serhiy Storchaka | a897aee | 2017-11-30 23:26:11 +0200 | [diff] [blame] | 618 | def _read_object(self, ref): |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 619 | """ |
Serhiy Storchaka | a897aee | 2017-11-30 23:26:11 +0200 | [diff] [blame] | 620 | read the object by reference. |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 621 | |
| 622 | May recursively read sub-objects (content of an array/dict/set) |
| 623 | """ |
Serhiy Storchaka | a897aee | 2017-11-30 23:26:11 +0200 | [diff] [blame] | 624 | result = self._objects[ref] |
| 625 | if result is not _undefined: |
| 626 | return result |
| 627 | |
| 628 | offset = self._object_offsets[ref] |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 629 | self._fp.seek(offset) |
| 630 | token = self._fp.read(1)[0] |
| 631 | tokenH, tokenL = token & 0xF0, token & 0x0F |
| 632 | |
| 633 | if token == 0x00: |
Serhiy Storchaka | a897aee | 2017-11-30 23:26:11 +0200 | [diff] [blame] | 634 | result = None |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 635 | |
| 636 | elif token == 0x08: |
Serhiy Storchaka | a897aee | 2017-11-30 23:26:11 +0200 | [diff] [blame] | 637 | result = False |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 638 | |
| 639 | elif token == 0x09: |
Serhiy Storchaka | a897aee | 2017-11-30 23:26:11 +0200 | [diff] [blame] | 640 | result = True |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 641 | |
| 642 | # The referenced source code also mentions URL (0x0c, 0x0d) and |
| 643 | # UUID (0x0e), but neither can be generated using the Cocoa libraries. |
| 644 | |
| 645 | elif token == 0x0f: |
Serhiy Storchaka | a897aee | 2017-11-30 23:26:11 +0200 | [diff] [blame] | 646 | result = b'' |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 647 | |
| 648 | elif tokenH == 0x10: # int |
Serhiy Storchaka | a897aee | 2017-11-30 23:26:11 +0200 | [diff] [blame] | 649 | result = int.from_bytes(self._fp.read(1 << tokenL), |
| 650 | 'big', signed=tokenL >= 3) |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 651 | |
| 652 | elif token == 0x22: # real |
Serhiy Storchaka | a897aee | 2017-11-30 23:26:11 +0200 | [diff] [blame] | 653 | result = struct.unpack('>f', self._fp.read(4))[0] |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 654 | |
| 655 | elif token == 0x23: # real |
Serhiy Storchaka | a897aee | 2017-11-30 23:26:11 +0200 | [diff] [blame] | 656 | result = struct.unpack('>d', self._fp.read(8))[0] |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 657 | |
| 658 | elif token == 0x33: # date |
| 659 | f = struct.unpack('>d', self._fp.read(8))[0] |
| 660 | # timestamp 0 of binary plists corresponds to 1/1/2001 |
| 661 | # (year of Mac OS X 10.0), instead of 1/1/1970. |
Serhiy Storchaka | a897aee | 2017-11-30 23:26:11 +0200 | [diff] [blame] | 662 | result = (datetime.datetime(2001, 1, 1) + |
| 663 | datetime.timedelta(seconds=f)) |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 664 | |
| 665 | elif tokenH == 0x40: # data |
| 666 | s = self._get_size(tokenL) |
| 667 | if self._use_builtin_types: |
Serhiy Storchaka | a897aee | 2017-11-30 23:26:11 +0200 | [diff] [blame] | 668 | result = self._fp.read(s) |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 669 | else: |
Serhiy Storchaka | a897aee | 2017-11-30 23:26:11 +0200 | [diff] [blame] | 670 | result = Data(self._fp.read(s)) |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 671 | |
| 672 | elif tokenH == 0x50: # ascii string |
| 673 | s = self._get_size(tokenL) |
| 674 | result = self._fp.read(s).decode('ascii') |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 675 | |
| 676 | elif tokenH == 0x60: # unicode string |
| 677 | s = self._get_size(tokenL) |
Serhiy Storchaka | a897aee | 2017-11-30 23:26:11 +0200 | [diff] [blame] | 678 | result = self._fp.read(s * 2).decode('utf-16be') |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 679 | |
Jon Janzen | c981ad1 | 2019-05-15 22:14:38 +0200 | [diff] [blame] | 680 | elif tokenH == 0x80: # UID |
| 681 | # used by Key-Archiver plist files |
| 682 | result = UID(int.from_bytes(self._fp.read(1 + tokenL), 'big')) |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 683 | |
| 684 | elif tokenH == 0xA0: # array |
| 685 | s = self._get_size(tokenL) |
| 686 | obj_refs = self._read_refs(s) |
Serhiy Storchaka | a897aee | 2017-11-30 23:26:11 +0200 | [diff] [blame] | 687 | result = [] |
| 688 | self._objects[ref] = result |
| 689 | result.extend(self._read_object(x) for x in obj_refs) |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 690 | |
| 691 | # tokenH == 0xB0 is documented as 'ordset', but is not actually |
| 692 | # implemented in the Apple reference code. |
| 693 | |
| 694 | # tokenH == 0xC0 is documented as 'set', but sets cannot be used in |
| 695 | # plists. |
| 696 | |
| 697 | elif tokenH == 0xD0: # dict |
| 698 | s = self._get_size(tokenL) |
| 699 | key_refs = self._read_refs(s) |
| 700 | obj_refs = self._read_refs(s) |
| 701 | result = self._dict_type() |
Serhiy Storchaka | a897aee | 2017-11-30 23:26:11 +0200 | [diff] [blame] | 702 | self._objects[ref] = result |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 703 | for k, o in zip(key_refs, obj_refs): |
Serhiy Storchaka | a897aee | 2017-11-30 23:26:11 +0200 | [diff] [blame] | 704 | result[self._read_object(k)] = self._read_object(o) |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 705 | |
Serhiy Storchaka | a897aee | 2017-11-30 23:26:11 +0200 | [diff] [blame] | 706 | else: |
| 707 | raise InvalidFileException() |
| 708 | |
| 709 | self._objects[ref] = result |
| 710 | return result |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 711 | |
| 712 | def _count_to_size(count): |
| 713 | if count < 1 << 8: |
| 714 | return 1 |
| 715 | |
| 716 | elif count < 1 << 16: |
| 717 | return 2 |
| 718 | |
| 719 | elif count << 1 << 32: |
| 720 | return 4 |
| 721 | |
| 722 | else: |
| 723 | return 8 |
| 724 | |
Serhiy Storchaka | a897aee | 2017-11-30 23:26:11 +0200 | [diff] [blame] | 725 | _scalars = (str, int, float, datetime.datetime, bytes) |
| 726 | |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 727 | class _BinaryPlistWriter (object): |
| 728 | def __init__(self, fp, sort_keys, skipkeys): |
| 729 | self._fp = fp |
| 730 | self._sort_keys = sort_keys |
| 731 | self._skipkeys = skipkeys |
| 732 | |
| 733 | def write(self, value): |
| 734 | |
| 735 | # Flattened object list: |
| 736 | self._objlist = [] |
| 737 | |
| 738 | # Mappings from object->objectid |
| 739 | # First dict has (type(object), object) as the key, |
| 740 | # second dict is used when object is not hashable and |
| 741 | # has id(object) as the key. |
| 742 | self._objtable = {} |
| 743 | self._objidtable = {} |
| 744 | |
| 745 | # Create list of all objects in the plist |
| 746 | self._flatten(value) |
| 747 | |
| 748 | # Size of object references in serialized containers |
| 749 | # depends on the number of objects in the plist. |
| 750 | num_objects = len(self._objlist) |
| 751 | self._object_offsets = [0]*num_objects |
| 752 | self._ref_size = _count_to_size(num_objects) |
| 753 | |
| 754 | self._ref_format = _BINARY_FORMAT[self._ref_size] |
| 755 | |
| 756 | # Write file header |
| 757 | self._fp.write(b'bplist00') |
| 758 | |
| 759 | # Write object list |
| 760 | for obj in self._objlist: |
| 761 | self._write_object(obj) |
| 762 | |
| 763 | # Write refnum->object offset table |
| 764 | top_object = self._getrefnum(value) |
| 765 | offset_table_offset = self._fp.tell() |
| 766 | offset_size = _count_to_size(offset_table_offset) |
| 767 | offset_format = '>' + _BINARY_FORMAT[offset_size] * num_objects |
| 768 | self._fp.write(struct.pack(offset_format, *self._object_offsets)) |
| 769 | |
| 770 | # Write trailer |
| 771 | sort_version = 0 |
| 772 | trailer = ( |
| 773 | sort_version, offset_size, self._ref_size, num_objects, |
| 774 | top_object, offset_table_offset |
| 775 | ) |
| 776 | self._fp.write(struct.pack('>5xBBBQQQ', *trailer)) |
| 777 | |
| 778 | def _flatten(self, value): |
| 779 | # First check if the object is in the object table, not used for |
| 780 | # containers to ensure that two subcontainers with the same contents |
| 781 | # will be serialized as distinct values. |
Serhiy Storchaka | a897aee | 2017-11-30 23:26:11 +0200 | [diff] [blame] | 782 | if isinstance(value, _scalars): |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 783 | if (type(value), value) in self._objtable: |
| 784 | return |
| 785 | |
| 786 | elif isinstance(value, Data): |
| 787 | if (type(value.data), value.data) in self._objtable: |
| 788 | return |
| 789 | |
Serhiy Storchaka | a897aee | 2017-11-30 23:26:11 +0200 | [diff] [blame] | 790 | elif id(value) in self._objidtable: |
| 791 | return |
| 792 | |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 793 | # Add to objectreference map |
| 794 | refnum = len(self._objlist) |
| 795 | self._objlist.append(value) |
Serhiy Storchaka | a897aee | 2017-11-30 23:26:11 +0200 | [diff] [blame] | 796 | if isinstance(value, _scalars): |
| 797 | self._objtable[(type(value), value)] = refnum |
| 798 | elif isinstance(value, Data): |
| 799 | self._objtable[(type(value.data), value.data)] = refnum |
| 800 | else: |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 801 | self._objidtable[id(value)] = refnum |
| 802 | |
| 803 | # And finally recurse into containers |
| 804 | if isinstance(value, dict): |
| 805 | keys = [] |
| 806 | values = [] |
| 807 | items = value.items() |
| 808 | if self._sort_keys: |
| 809 | items = sorted(items) |
| 810 | |
| 811 | for k, v in items: |
| 812 | if not isinstance(k, str): |
| 813 | if self._skipkeys: |
| 814 | continue |
| 815 | raise TypeError("keys must be strings") |
| 816 | keys.append(k) |
| 817 | values.append(v) |
| 818 | |
| 819 | for o in itertools.chain(keys, values): |
| 820 | self._flatten(o) |
| 821 | |
| 822 | elif isinstance(value, (list, tuple)): |
| 823 | for o in value: |
| 824 | self._flatten(o) |
| 825 | |
| 826 | def _getrefnum(self, value): |
Serhiy Storchaka | a897aee | 2017-11-30 23:26:11 +0200 | [diff] [blame] | 827 | if isinstance(value, _scalars): |
| 828 | return self._objtable[(type(value), value)] |
| 829 | elif isinstance(value, Data): |
| 830 | return self._objtable[(type(value.data), value.data)] |
| 831 | else: |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 832 | return self._objidtable[id(value)] |
| 833 | |
| 834 | def _write_size(self, token, size): |
| 835 | if size < 15: |
| 836 | self._fp.write(struct.pack('>B', token | size)) |
| 837 | |
| 838 | elif size < 1 << 8: |
| 839 | self._fp.write(struct.pack('>BBB', token | 0xF, 0x10, size)) |
| 840 | |
| 841 | elif size < 1 << 16: |
| 842 | self._fp.write(struct.pack('>BBH', token | 0xF, 0x11, size)) |
| 843 | |
| 844 | elif size < 1 << 32: |
| 845 | self._fp.write(struct.pack('>BBL', token | 0xF, 0x12, size)) |
| 846 | |
| 847 | else: |
| 848 | self._fp.write(struct.pack('>BBQ', token | 0xF, 0x13, size)) |
| 849 | |
| 850 | def _write_object(self, value): |
| 851 | ref = self._getrefnum(value) |
| 852 | self._object_offsets[ref] = self._fp.tell() |
| 853 | if value is None: |
| 854 | self._fp.write(b'\x00') |
| 855 | |
| 856 | elif value is False: |
| 857 | self._fp.write(b'\x08') |
| 858 | |
| 859 | elif value is True: |
| 860 | self._fp.write(b'\x09') |
| 861 | |
| 862 | elif isinstance(value, int): |
Ronald Oussoren | 6db6653 | 2014-01-15 11:32:35 +0100 | [diff] [blame] | 863 | if value < 0: |
| 864 | try: |
| 865 | self._fp.write(struct.pack('>Bq', 0x13, value)) |
| 866 | except struct.error: |
Ronald Oussoren | 94e44a9 | 2014-02-06 11:19:18 +0100 | [diff] [blame] | 867 | raise OverflowError(value) from None |
Ronald Oussoren | 6db6653 | 2014-01-15 11:32:35 +0100 | [diff] [blame] | 868 | elif value < 1 << 8: |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 869 | self._fp.write(struct.pack('>BB', 0x10, value)) |
| 870 | elif value < 1 << 16: |
| 871 | self._fp.write(struct.pack('>BH', 0x11, value)) |
| 872 | elif value < 1 << 32: |
| 873 | self._fp.write(struct.pack('>BL', 0x12, value)) |
Ronald Oussoren | 94e44a9 | 2014-02-06 11:19:18 +0100 | [diff] [blame] | 874 | elif value < 1 << 63: |
| 875 | self._fp.write(struct.pack('>BQ', 0x13, value)) |
| 876 | elif value < 1 << 64: |
| 877 | self._fp.write(b'\x14' + value.to_bytes(16, 'big', signed=True)) |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 878 | else: |
Ronald Oussoren | 94e44a9 | 2014-02-06 11:19:18 +0100 | [diff] [blame] | 879 | raise OverflowError(value) |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 880 | |
| 881 | elif isinstance(value, float): |
| 882 | self._fp.write(struct.pack('>Bd', 0x23, value)) |
| 883 | |
| 884 | elif isinstance(value, datetime.datetime): |
| 885 | f = (value - datetime.datetime(2001, 1, 1)).total_seconds() |
| 886 | self._fp.write(struct.pack('>Bd', 0x33, f)) |
| 887 | |
| 888 | elif isinstance(value, Data): |
| 889 | self._write_size(0x40, len(value.data)) |
| 890 | self._fp.write(value.data) |
| 891 | |
| 892 | elif isinstance(value, (bytes, bytearray)): |
| 893 | self._write_size(0x40, len(value)) |
| 894 | self._fp.write(value) |
| 895 | |
| 896 | elif isinstance(value, str): |
| 897 | try: |
| 898 | t = value.encode('ascii') |
| 899 | self._write_size(0x50, len(value)) |
| 900 | except UnicodeEncodeError: |
| 901 | t = value.encode('utf-16be') |
Serhiy Storchaka | 7338ebc | 2016-10-04 20:04:30 +0300 | [diff] [blame] | 902 | self._write_size(0x60, len(t) // 2) |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 903 | |
| 904 | self._fp.write(t) |
| 905 | |
Jon Janzen | c981ad1 | 2019-05-15 22:14:38 +0200 | [diff] [blame] | 906 | elif isinstance(value, UID): |
| 907 | if value.data < 0: |
| 908 | raise ValueError("UIDs must be positive") |
| 909 | elif value.data < 1 << 8: |
| 910 | self._fp.write(struct.pack('>BB', 0x80, value)) |
| 911 | elif value.data < 1 << 16: |
| 912 | self._fp.write(struct.pack('>BH', 0x81, value)) |
| 913 | elif value.data < 1 << 32: |
| 914 | self._fp.write(struct.pack('>BL', 0x83, value)) |
| 915 | elif value.data < 1 << 64: |
| 916 | self._fp.write(struct.pack('>BQ', 0x87, value)) |
| 917 | else: |
| 918 | raise OverflowError(value) |
| 919 | |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 920 | elif isinstance(value, (list, tuple)): |
| 921 | refs = [self._getrefnum(o) for o in value] |
| 922 | s = len(refs) |
| 923 | self._write_size(0xA0, s) |
| 924 | self._fp.write(struct.pack('>' + self._ref_format * s, *refs)) |
| 925 | |
| 926 | elif isinstance(value, dict): |
| 927 | keyRefs, valRefs = [], [] |
| 928 | |
| 929 | if self._sort_keys: |
| 930 | rootItems = sorted(value.items()) |
| 931 | else: |
| 932 | rootItems = value.items() |
| 933 | |
| 934 | for k, v in rootItems: |
| 935 | if not isinstance(k, str): |
| 936 | if self._skipkeys: |
| 937 | continue |
| 938 | raise TypeError("keys must be strings") |
| 939 | keyRefs.append(self._getrefnum(k)) |
| 940 | valRefs.append(self._getrefnum(v)) |
| 941 | |
| 942 | s = len(keyRefs) |
| 943 | self._write_size(0xD0, s) |
| 944 | self._fp.write(struct.pack('>' + self._ref_format * s, *keyRefs)) |
| 945 | self._fp.write(struct.pack('>' + self._ref_format * s, *valRefs)) |
| 946 | |
| 947 | else: |
Ronald Oussoren | 6db6653 | 2014-01-15 11:32:35 +0100 | [diff] [blame] | 948 | raise TypeError(value) |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 949 | |
| 950 | |
| 951 | def _is_fmt_binary(header): |
| 952 | return header[:8] == b'bplist00' |
| 953 | |
| 954 | |
| 955 | # |
| 956 | # Generic bits |
| 957 | # |
| 958 | |
| 959 | _FORMATS={ |
| 960 | FMT_XML: dict( |
| 961 | detect=_is_fmt_xml, |
| 962 | parser=_PlistParser, |
| 963 | writer=_PlistWriter, |
| 964 | ), |
| 965 | FMT_BINARY: dict( |
| 966 | detect=_is_fmt_binary, |
| 967 | parser=_BinaryPlistParser, |
| 968 | writer=_BinaryPlistWriter, |
| 969 | ) |
| 970 | } |
| 971 | |
| 972 | |
| 973 | def load(fp, *, fmt=None, use_builtin_types=True, dict_type=dict): |
| 974 | """Read a .plist file. 'fp' should be (readable) file object. |
| 975 | Return the unpacked root object (which usually is a dictionary). |
| 976 | """ |
| 977 | if fmt is None: |
| 978 | header = fp.read(32) |
| 979 | fp.seek(0) |
| 980 | for info in _FORMATS.values(): |
| 981 | if info['detect'](header): |
Serhiy Storchaka | 8966759 | 2014-07-23 18:49:31 +0300 | [diff] [blame] | 982 | P = info['parser'] |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 983 | break |
| 984 | |
| 985 | else: |
| 986 | raise InvalidFileException() |
| 987 | |
| 988 | else: |
Serhiy Storchaka | 8966759 | 2014-07-23 18:49:31 +0300 | [diff] [blame] | 989 | P = _FORMATS[fmt]['parser'] |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 990 | |
Serhiy Storchaka | 8966759 | 2014-07-23 18:49:31 +0300 | [diff] [blame] | 991 | p = P(use_builtin_types=use_builtin_types, dict_type=dict_type) |
Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 992 | return p.parse(fp) |
| 993 | |
| 994 | |
| 995 | def loads(value, *, fmt=None, use_builtin_types=True, dict_type=dict): |
| 996 | """Read a .plist file from a bytes object. |
| 997 | Return the unpacked root object (which usually is a dictionary). |
| 998 | """ |
| 999 | fp = BytesIO(value) |
| 1000 | return load( |
| 1001 | fp, fmt=fmt, use_builtin_types=use_builtin_types, dict_type=dict_type) |
| 1002 | |
| 1003 | |
| 1004 | def dump(value, fp, *, fmt=FMT_XML, sort_keys=True, skipkeys=False): |
| 1005 | """Write 'value' to a .plist file. 'fp' should be a (writable) |
| 1006 | file object. |
| 1007 | """ |
| 1008 | if fmt not in _FORMATS: |
| 1009 | raise ValueError("Unsupported format: %r"%(fmt,)) |
| 1010 | |
| 1011 | writer = _FORMATS[fmt]["writer"](fp, sort_keys=sort_keys, skipkeys=skipkeys) |
| 1012 | writer.write(value) |
| 1013 | |
| 1014 | |
| 1015 | def dumps(value, *, fmt=FMT_XML, skipkeys=False, sort_keys=True): |
| 1016 | """Return a bytes object with the contents for a .plist file. |
| 1017 | """ |
| 1018 | fp = BytesIO() |
| 1019 | dump(value, fp, fmt=fmt, skipkeys=skipkeys, sort_keys=sort_keys) |
| 1020 | return fp.getvalue() |