blob: 21ebec3f00459076d008e2c8c1f4959e413d72b5 [file] [log] [blame]
Benjamin Petersonef3e4c22009-04-11 19:48:14 +00001r"""plistlib.py -- a tool to generate and parse MacOSX .plist files.
Christian Heimes7e182542008-01-27 15:20:13 +00002
Ezio Melotti6e9b1df2009-09-16 00:49:03 +00003The property list (.plist) file format is a simple XML pickle supporting
Christian Heimes7e182542008-01-27 15:20:13 +00004basic object types, like dictionaries, lists, numbers and strings.
5Usually the top level object is a dictionary.
6
Ronald Oussorenc5cf7972013-11-21 15:46:49 +01007To write out a plist file, use the dump(value, file)
8function. 'value' is the top level object, 'file' is
9a (writable) file object.
Christian Heimes7e182542008-01-27 15:20:13 +000010
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010011To parse a plist from a file, use the load(file) function,
12with a (readable) file object as the only argument. It
Christian Heimes7e182542008-01-27 15:20:13 +000013returns the top level object (again, usually a dictionary).
14
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010015To work with plist data in bytes objects, you can use loads()
16and dumps().
Christian Heimes7e182542008-01-27 15:20:13 +000017
18Values can be strings, integers, floats, booleans, tuples, lists,
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010019dictionaries (but only with string keys), Data, bytes, bytearray, or
20datetime.datetime objects.
Christian Heimes7e182542008-01-27 15:20:13 +000021
22Generate Plist example:
23
24 pl = dict(
Ezio Melotti6e9b1df2009-09-16 00:49:03 +000025 aString = "Doodah",
26 aList = ["A", "B", 12, 32.1, [1, 2, 3]],
Christian Heimes7e182542008-01-27 15:20:13 +000027 aFloat = 0.1,
28 anInt = 728,
Ezio Melotti6e9b1df2009-09-16 00:49:03 +000029 aDict = dict(
30 anotherString = "<hello & hi there!>",
31 aUnicodeValue = "M\xe4ssig, Ma\xdf",
32 aTrueValue = True,
33 aFalseValue = False,
Christian Heimes7e182542008-01-27 15:20:13 +000034 ),
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010035 someData = b"<binary gunk>",
36 someMoreData = b"<lots of binary gunk>" * 10,
Christian Heimes7e182542008-01-27 15:20:13 +000037 aDate = datetime.datetime.fromtimestamp(time.mktime(time.gmtime())),
38 )
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010039 with open(fileName, 'wb') as fp:
40 dump(pl, fp)
Christian Heimes7e182542008-01-27 15:20:13 +000041
42Parse Plist example:
43
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010044 with open(fileName, 'rb') as fp:
45 pl = load(fp)
46 print(pl["aKey"])
Christian Heimes7e182542008-01-27 15:20:13 +000047"""
Christian Heimes7e182542008-01-27 15:20:13 +000048__all__ = [
49 "readPlist", "writePlist", "readPlistFromBytes", "writePlistToBytes",
Serhiy Storchakaedef3582017-05-15 13:21:31 +030050 "Data", "InvalidFileException", "FMT_XML", "FMT_BINARY",
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010051 "load", "dump", "loads", "dumps"
Christian Heimes7e182542008-01-27 15:20:13 +000052]
Christian Heimes7e182542008-01-27 15:20:13 +000053
54import binascii
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010055import codecs
56import contextlib
Christian Heimes7e182542008-01-27 15:20:13 +000057import datetime
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010058import enum
Christian Heimes7e182542008-01-27 15:20:13 +000059from io import BytesIO
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010060import itertools
61import os
Christian Heimes7e182542008-01-27 15:20:13 +000062import re
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010063import struct
64from warnings import warn
65from xml.parsers.expat import ParserCreate
Christian Heimes7e182542008-01-27 15:20:13 +000066
67
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010068PlistFormat = enum.Enum('PlistFormat', 'FMT_XML FMT_BINARY', module=__name__)
69globals().update(PlistFormat.__members__)
Christian Heimes7e182542008-01-27 15:20:13 +000070
71
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010072#
73#
74# Deprecated functionality
75#
76#
Christian Heimes7e182542008-01-27 15:20:13 +000077
78
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010079@contextlib.contextmanager
80def _maybe_open(pathOrFile, mode):
81 if isinstance(pathOrFile, str):
82 with open(pathOrFile, mode) as fp:
83 yield fp
Christian Heimes7e182542008-01-27 15:20:13 +000084
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010085 else:
86 yield pathOrFile
87
88
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010089def readPlist(pathOrFile):
90 """
91 Read a .plist from a path or file. pathOrFile should either
92 be a file name, or a readable binary file object.
93
94 This function is deprecated, use load instead.
95 """
96 warn("The readPlist function is deprecated, use load() instead",
97 DeprecationWarning, 2)
98
99 with _maybe_open(pathOrFile, 'rb') as fp:
Serhiy Storchakaedef3582017-05-15 13:21:31 +0300100 return load(fp, fmt=None, use_builtin_types=False)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100101
102def writePlist(value, pathOrFile):
103 """
104 Write 'value' to a .plist file. 'pathOrFile' may either be a
105 file name or a (writable) file object.
106
107 This function is deprecated, use dump instead.
108 """
109 warn("The writePlist function is deprecated, use dump() instead",
110 DeprecationWarning, 2)
111 with _maybe_open(pathOrFile, 'wb') as fp:
112 dump(value, fp, fmt=FMT_XML, sort_keys=True, skipkeys=False)
113
114
115def readPlistFromBytes(data):
116 """
117 Read a plist data from a bytes object. Return the root object.
118
119 This function is deprecated, use loads instead.
120 """
121 warn("The readPlistFromBytes function is deprecated, use loads() instead",
122 DeprecationWarning, 2)
Serhiy Storchakaedef3582017-05-15 13:21:31 +0300123 return load(BytesIO(data), fmt=None, use_builtin_types=False)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100124
125
126def writePlistToBytes(value):
127 """
128 Return 'value' as a plist-formatted bytes object.
129
130 This function is deprecated, use dumps instead.
131 """
132 warn("The writePlistToBytes function is deprecated, use dumps() instead",
133 DeprecationWarning, 2)
134 f = BytesIO()
135 dump(value, f, fmt=FMT_XML, sort_keys=True, skipkeys=False)
136 return f.getvalue()
137
Christian Heimes7e182542008-01-27 15:20:13 +0000138
139class Data:
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100140 """
141 Wrapper for binary data.
Christian Heimes7e182542008-01-27 15:20:13 +0000142
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100143 This class is deprecated, use a bytes object instead.
144 """
Christian Heimes7e182542008-01-27 15:20:13 +0000145
146 def __init__(self, data):
147 if not isinstance(data, bytes):
148 raise TypeError("data must be as bytes")
149 self.data = data
150
151 @classmethod
152 def fromBase64(cls, data):
Georg Brandl706824f2009-06-04 09:42:55 +0000153 # base64.decodebytes just calls binascii.a2b_base64;
Christian Heimes7e182542008-01-27 15:20:13 +0000154 # it seems overkill to use both base64 and binascii.
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100155 return cls(_decode_base64(data))
Christian Heimes7e182542008-01-27 15:20:13 +0000156
157 def asBase64(self, maxlinelength=76):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100158 return _encode_base64(self.data, maxlinelength)
Christian Heimes7e182542008-01-27 15:20:13 +0000159
160 def __eq__(self, other):
161 if isinstance(other, self.__class__):
162 return self.data == other.data
Serhiy Storchakadd1bcdf2016-05-01 13:36:16 +0300163 elif isinstance(other, bytes):
Christian Heimes7e182542008-01-27 15:20:13 +0000164 return self.data == other
165 else:
Serhiy Storchakadd1bcdf2016-05-01 13:36:16 +0300166 return NotImplemented
Christian Heimes7e182542008-01-27 15:20:13 +0000167
168 def __repr__(self):
169 return "%s(%s)" % (self.__class__.__name__, repr(self.data))
170
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100171#
172#
173# End of deprecated functionality
174#
175#
Christian Heimes7e182542008-01-27 15:20:13 +0000176
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100177
178#
179# XML support
180#
181
182
183# XML 'header'
184PLISTHEADER = b"""\
185<?xml version="1.0" encoding="UTF-8"?>
186<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
187"""
188
189
190# Regex to find any control chars, except for \t \n and \r
191_controlCharPat = re.compile(
192 r"[\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0b\x0c\x0e\x0f"
193 r"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f]")
194
195def _encode_base64(s, maxlinelength=76):
196 # copied from base64.encodebytes(), with added maxlinelength argument
197 maxbinsize = (maxlinelength//4)*3
198 pieces = []
199 for i in range(0, len(s), maxbinsize):
200 chunk = s[i : i + maxbinsize]
201 pieces.append(binascii.b2a_base64(chunk))
202 return b''.join(pieces)
203
204def _decode_base64(s):
205 if isinstance(s, str):
206 return binascii.a2b_base64(s.encode("utf-8"))
207
208 else:
209 return binascii.a2b_base64(s)
210
211# Contents should conform to a subset of ISO 8601
212# (in particular, YYYY '-' MM '-' DD 'T' HH ':' MM ':' SS 'Z'. Smaller units
213# may be omitted with # a loss of precision)
214_dateParser = re.compile(r"(?P<year>\d\d\d\d)(?:-(?P<month>\d\d)(?:-(?P<day>\d\d)(?:T(?P<hour>\d\d)(?::(?P<minute>\d\d)(?::(?P<second>\d\d))?)?)?)?)?Z", re.ASCII)
215
216
217def _date_from_string(s):
218 order = ('year', 'month', 'day', 'hour', 'minute', 'second')
219 gd = _dateParser.match(s).groupdict()
220 lst = []
221 for key in order:
222 val = gd[key]
223 if val is None:
224 break
225 lst.append(int(val))
226 return datetime.datetime(*lst)
227
228
229def _date_to_string(d):
230 return '%04d-%02d-%02dT%02d:%02d:%02dZ' % (
231 d.year, d.month, d.day,
232 d.hour, d.minute, d.second
233 )
234
235def _escape(text):
236 m = _controlCharPat.search(text)
237 if m is not None:
238 raise ValueError("strings can't contains control characters; "
239 "use bytes instead")
240 text = text.replace("\r\n", "\n") # convert DOS line endings
241 text = text.replace("\r", "\n") # convert Mac line endings
242 text = text.replace("&", "&amp;") # escape '&'
243 text = text.replace("<", "&lt;") # escape '<'
244 text = text.replace(">", "&gt;") # escape '>'
245 return text
246
247class _PlistParser:
248 def __init__(self, use_builtin_types, dict_type):
Christian Heimes7e182542008-01-27 15:20:13 +0000249 self.stack = []
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100250 self.current_key = None
Christian Heimes7e182542008-01-27 15:20:13 +0000251 self.root = None
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100252 self._use_builtin_types = use_builtin_types
253 self._dict_type = dict_type
Christian Heimes7e182542008-01-27 15:20:13 +0000254
255 def parse(self, fileobj):
Ned Deilyb8e59f72011-05-28 02:19:19 -0700256 self.parser = ParserCreate()
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100257 self.parser.StartElementHandler = self.handle_begin_element
258 self.parser.EndElementHandler = self.handle_end_element
259 self.parser.CharacterDataHandler = self.handle_data
Ned Deilyb8e59f72011-05-28 02:19:19 -0700260 self.parser.ParseFile(fileobj)
Christian Heimes7e182542008-01-27 15:20:13 +0000261 return self.root
262
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100263 def handle_begin_element(self, element, attrs):
Christian Heimes7e182542008-01-27 15:20:13 +0000264 self.data = []
265 handler = getattr(self, "begin_" + element, None)
266 if handler is not None:
267 handler(attrs)
268
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100269 def handle_end_element(self, element):
Christian Heimes7e182542008-01-27 15:20:13 +0000270 handler = getattr(self, "end_" + element, None)
271 if handler is not None:
272 handler()
273
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100274 def handle_data(self, data):
Christian Heimes7e182542008-01-27 15:20:13 +0000275 self.data.append(data)
276
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100277 def add_object(self, value):
278 if self.current_key is not None:
Ned Deilyb8e59f72011-05-28 02:19:19 -0700279 if not isinstance(self.stack[-1], type({})):
280 raise ValueError("unexpected element at line %d" %
281 self.parser.CurrentLineNumber)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100282 self.stack[-1][self.current_key] = value
283 self.current_key = None
Christian Heimes7e182542008-01-27 15:20:13 +0000284 elif not self.stack:
285 # this is the root object
286 self.root = value
287 else:
Ned Deilyb8e59f72011-05-28 02:19:19 -0700288 if not isinstance(self.stack[-1], type([])):
289 raise ValueError("unexpected element at line %d" %
290 self.parser.CurrentLineNumber)
Christian Heimes7e182542008-01-27 15:20:13 +0000291 self.stack[-1].append(value)
292
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100293 def get_data(self):
Christian Heimes7e182542008-01-27 15:20:13 +0000294 data = ''.join(self.data)
295 self.data = []
296 return data
297
298 # element handlers
299
300 def begin_dict(self, attrs):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100301 d = self._dict_type()
302 self.add_object(d)
Christian Heimes7e182542008-01-27 15:20:13 +0000303 self.stack.append(d)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100304
Christian Heimes7e182542008-01-27 15:20:13 +0000305 def end_dict(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100306 if self.current_key:
Ned Deilyb8e59f72011-05-28 02:19:19 -0700307 raise ValueError("missing value for key '%s' at line %d" %
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100308 (self.current_key,self.parser.CurrentLineNumber))
Christian Heimes7e182542008-01-27 15:20:13 +0000309 self.stack.pop()
310
311 def end_key(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100312 if self.current_key or not isinstance(self.stack[-1], type({})):
Ned Deilyb8e59f72011-05-28 02:19:19 -0700313 raise ValueError("unexpected key at line %d" %
314 self.parser.CurrentLineNumber)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100315 self.current_key = self.get_data()
Christian Heimes7e182542008-01-27 15:20:13 +0000316
317 def begin_array(self, attrs):
318 a = []
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100319 self.add_object(a)
Christian Heimes7e182542008-01-27 15:20:13 +0000320 self.stack.append(a)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100321
Christian Heimes7e182542008-01-27 15:20:13 +0000322 def end_array(self):
323 self.stack.pop()
324
325 def end_true(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100326 self.add_object(True)
327
Christian Heimes7e182542008-01-27 15:20:13 +0000328 def end_false(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100329 self.add_object(False)
330
Christian Heimes7e182542008-01-27 15:20:13 +0000331 def end_integer(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100332 self.add_object(int(self.get_data()))
333
Christian Heimes7e182542008-01-27 15:20:13 +0000334 def end_real(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100335 self.add_object(float(self.get_data()))
336
Christian Heimes7e182542008-01-27 15:20:13 +0000337 def end_string(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100338 self.add_object(self.get_data())
339
Christian Heimes7e182542008-01-27 15:20:13 +0000340 def end_data(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100341 if self._use_builtin_types:
342 self.add_object(_decode_base64(self.get_data()))
343
344 else:
345 self.add_object(Data.fromBase64(self.get_data()))
346
Christian Heimes7e182542008-01-27 15:20:13 +0000347 def end_date(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100348 self.add_object(_date_from_string(self.get_data()))
349
350
351class _DumbXMLWriter:
352 def __init__(self, file, indent_level=0, indent="\t"):
353 self.file = file
354 self.stack = []
355 self._indent_level = indent_level
356 self.indent = indent
357
358 def begin_element(self, element):
359 self.stack.append(element)
360 self.writeln("<%s>" % element)
361 self._indent_level += 1
362
363 def end_element(self, element):
364 assert self._indent_level > 0
365 assert self.stack.pop() == element
366 self._indent_level -= 1
367 self.writeln("</%s>" % element)
368
369 def simple_element(self, element, value=None):
370 if value is not None:
371 value = _escape(value)
372 self.writeln("<%s>%s</%s>" % (element, value, element))
373
374 else:
375 self.writeln("<%s/>" % element)
376
377 def writeln(self, line):
378 if line:
379 # plist has fixed encoding of utf-8
380
381 # XXX: is this test needed?
382 if isinstance(line, str):
383 line = line.encode('utf-8')
384 self.file.write(self._indent_level * self.indent)
385 self.file.write(line)
386 self.file.write(b'\n')
387
388
389class _PlistWriter(_DumbXMLWriter):
390 def __init__(
391 self, file, indent_level=0, indent=b"\t", writeHeader=1,
392 sort_keys=True, skipkeys=False):
393
394 if writeHeader:
395 file.write(PLISTHEADER)
396 _DumbXMLWriter.__init__(self, file, indent_level, indent)
397 self._sort_keys = sort_keys
398 self._skipkeys = skipkeys
399
400 def write(self, value):
401 self.writeln("<plist version=\"1.0\">")
402 self.write_value(value)
403 self.writeln("</plist>")
404
405 def write_value(self, value):
406 if isinstance(value, str):
407 self.simple_element("string", value)
408
409 elif value is True:
410 self.simple_element("true")
411
412 elif value is False:
413 self.simple_element("false")
414
415 elif isinstance(value, int):
Ronald Oussoren6db66532014-01-15 11:32:35 +0100416 if -1 << 63 <= value < 1 << 64:
417 self.simple_element("integer", "%d" % value)
418 else:
419 raise OverflowError(value)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100420
421 elif isinstance(value, float):
422 self.simple_element("real", repr(value))
423
424 elif isinstance(value, dict):
425 self.write_dict(value)
426
427 elif isinstance(value, Data):
428 self.write_data(value)
429
430 elif isinstance(value, (bytes, bytearray)):
431 self.write_bytes(value)
432
433 elif isinstance(value, datetime.datetime):
434 self.simple_element("date", _date_to_string(value))
435
436 elif isinstance(value, (tuple, list)):
437 self.write_array(value)
438
439 else:
440 raise TypeError("unsupported type: %s" % type(value))
441
442 def write_data(self, data):
443 self.write_bytes(data.data)
444
445 def write_bytes(self, data):
446 self.begin_element("data")
447 self._indent_level -= 1
448 maxlinelength = max(
449 16,
450 76 - len(self.indent.replace(b"\t", b" " * 8) * self._indent_level))
451
452 for line in _encode_base64(data, maxlinelength).split(b"\n"):
453 if line:
454 self.writeln(line)
455 self._indent_level += 1
456 self.end_element("data")
457
458 def write_dict(self, d):
459 if d:
460 self.begin_element("dict")
461 if self._sort_keys:
462 items = sorted(d.items())
463 else:
464 items = d.items()
465
466 for key, value in items:
467 if not isinstance(key, str):
468 if self._skipkeys:
469 continue
470 raise TypeError("keys must be strings")
471 self.simple_element("key", key)
472 self.write_value(value)
473 self.end_element("dict")
474
475 else:
476 self.simple_element("dict")
477
478 def write_array(self, array):
479 if array:
480 self.begin_element("array")
481 for value in array:
482 self.write_value(value)
483 self.end_element("array")
484
485 else:
486 self.simple_element("array")
487
488
489def _is_fmt_xml(header):
490 prefixes = (b'<?xml', b'<plist')
491
492 for pfx in prefixes:
493 if header.startswith(pfx):
494 return True
495
496 # Also check for alternative XML encodings, this is slightly
497 # overkill because the Apple tools (and plistlib) will not
498 # generate files with these encodings.
499 for bom, encoding in (
500 (codecs.BOM_UTF8, "utf-8"),
501 (codecs.BOM_UTF16_BE, "utf-16-be"),
502 (codecs.BOM_UTF16_LE, "utf-16-le"),
503 # expat does not support utf-32
504 #(codecs.BOM_UTF32_BE, "utf-32-be"),
505 #(codecs.BOM_UTF32_LE, "utf-32-le"),
506 ):
507 if not header.startswith(bom):
508 continue
509
510 for start in prefixes:
511 prefix = bom + start.decode('ascii').encode(encoding)
512 if header[:len(prefix)] == prefix:
513 return True
514
515 return False
516
517#
518# Binary Plist
519#
520
521
522class InvalidFileException (ValueError):
523 def __init__(self, message="Invalid file"):
524 ValueError.__init__(self, message)
525
526_BINARY_FORMAT = {1: 'B', 2: 'H', 4: 'L', 8: 'Q'}
527
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200528_undefined = object()
529
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100530class _BinaryPlistParser:
531 """
532 Read or write a binary plist file, following the description of the binary
533 format. Raise InvalidFileException in case of error, otherwise return the
534 root object.
535
536 see also: http://opensource.apple.com/source/CF/CF-744.18/CFBinaryPList.c
537 """
538 def __init__(self, use_builtin_types, dict_type):
539 self._use_builtin_types = use_builtin_types
540 self._dict_type = dict_type
541
542 def parse(self, fp):
543 try:
544 # The basic file format:
545 # HEADER
546 # object...
547 # refid->offset...
548 # TRAILER
549 self._fp = fp
550 self._fp.seek(-32, os.SEEK_END)
551 trailer = self._fp.read(32)
552 if len(trailer) != 32:
553 raise InvalidFileException()
554 (
555 offset_size, self._ref_size, num_objects, top_object,
556 offset_table_offset
557 ) = struct.unpack('>6xBBQQQ', trailer)
558 self._fp.seek(offset_table_offset)
Serhiy Storchaka06526642014-05-23 16:13:33 +0300559 self._object_offsets = self._read_ints(num_objects, offset_size)
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200560 self._objects = [_undefined] * num_objects
561 return self._read_object(top_object)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100562
Serhiy Storchakadb91e0f2017-10-31 14:05:53 +0200563 except (OSError, IndexError, struct.error, OverflowError,
564 UnicodeDecodeError):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100565 raise InvalidFileException()
566
567 def _get_size(self, tokenL):
568 """ return the size of the next object."""
569 if tokenL == 0xF:
570 m = self._fp.read(1)[0] & 0x3
571 s = 1 << m
572 f = '>' + _BINARY_FORMAT[s]
573 return struct.unpack(f, self._fp.read(s))[0]
574
575 return tokenL
576
Serhiy Storchaka06526642014-05-23 16:13:33 +0300577 def _read_ints(self, n, size):
578 data = self._fp.read(size * n)
579 if size in _BINARY_FORMAT:
580 return struct.unpack('>' + _BINARY_FORMAT[size] * n, data)
581 else:
Serhiy Storchakadb91e0f2017-10-31 14:05:53 +0200582 if not size or len(data) != size * n:
583 raise InvalidFileException()
Serhiy Storchaka06526642014-05-23 16:13:33 +0300584 return tuple(int.from_bytes(data[i: i + size], 'big')
585 for i in range(0, size * n, size))
586
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100587 def _read_refs(self, n):
Serhiy Storchaka06526642014-05-23 16:13:33 +0300588 return self._read_ints(n, self._ref_size)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100589
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200590 def _read_object(self, ref):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100591 """
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200592 read the object by reference.
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100593
594 May recursively read sub-objects (content of an array/dict/set)
595 """
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200596 result = self._objects[ref]
597 if result is not _undefined:
598 return result
599
600 offset = self._object_offsets[ref]
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100601 self._fp.seek(offset)
602 token = self._fp.read(1)[0]
603 tokenH, tokenL = token & 0xF0, token & 0x0F
604
605 if token == 0x00:
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200606 result = None
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100607
608 elif token == 0x08:
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200609 result = False
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100610
611 elif token == 0x09:
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200612 result = True
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100613
614 # The referenced source code also mentions URL (0x0c, 0x0d) and
615 # UUID (0x0e), but neither can be generated using the Cocoa libraries.
616
617 elif token == 0x0f:
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200618 result = b''
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100619
620 elif tokenH == 0x10: # int
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200621 result = int.from_bytes(self._fp.read(1 << tokenL),
622 'big', signed=tokenL >= 3)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100623
624 elif token == 0x22: # real
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200625 result = struct.unpack('>f', self._fp.read(4))[0]
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100626
627 elif token == 0x23: # real
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200628 result = struct.unpack('>d', self._fp.read(8))[0]
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100629
630 elif token == 0x33: # date
631 f = struct.unpack('>d', self._fp.read(8))[0]
632 # timestamp 0 of binary plists corresponds to 1/1/2001
633 # (year of Mac OS X 10.0), instead of 1/1/1970.
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200634 result = (datetime.datetime(2001, 1, 1) +
635 datetime.timedelta(seconds=f))
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100636
637 elif tokenH == 0x40: # data
638 s = self._get_size(tokenL)
639 if self._use_builtin_types:
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200640 result = self._fp.read(s)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100641 else:
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200642 result = Data(self._fp.read(s))
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100643
644 elif tokenH == 0x50: # ascii string
645 s = self._get_size(tokenL)
646 result = self._fp.read(s).decode('ascii')
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200647 result = result
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100648
649 elif tokenH == 0x60: # unicode string
650 s = self._get_size(tokenL)
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200651 result = self._fp.read(s * 2).decode('utf-16be')
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100652
653 # tokenH == 0x80 is documented as 'UID' and appears to be used for
654 # keyed-archiving, not in plists.
655
656 elif tokenH == 0xA0: # array
657 s = self._get_size(tokenL)
658 obj_refs = self._read_refs(s)
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200659 result = []
660 self._objects[ref] = result
661 result.extend(self._read_object(x) for x in obj_refs)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100662
663 # tokenH == 0xB0 is documented as 'ordset', but is not actually
664 # implemented in the Apple reference code.
665
666 # tokenH == 0xC0 is documented as 'set', but sets cannot be used in
667 # plists.
668
669 elif tokenH == 0xD0: # dict
670 s = self._get_size(tokenL)
671 key_refs = self._read_refs(s)
672 obj_refs = self._read_refs(s)
673 result = self._dict_type()
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200674 self._objects[ref] = result
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100675 for k, o in zip(key_refs, obj_refs):
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200676 result[self._read_object(k)] = self._read_object(o)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100677
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200678 else:
679 raise InvalidFileException()
680
681 self._objects[ref] = result
682 return result
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100683
684def _count_to_size(count):
685 if count < 1 << 8:
686 return 1
687
688 elif count < 1 << 16:
689 return 2
690
691 elif count << 1 << 32:
692 return 4
693
694 else:
695 return 8
696
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200697_scalars = (str, int, float, datetime.datetime, bytes)
698
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100699class _BinaryPlistWriter (object):
700 def __init__(self, fp, sort_keys, skipkeys):
701 self._fp = fp
702 self._sort_keys = sort_keys
703 self._skipkeys = skipkeys
704
705 def write(self, value):
706
707 # Flattened object list:
708 self._objlist = []
709
710 # Mappings from object->objectid
711 # First dict has (type(object), object) as the key,
712 # second dict is used when object is not hashable and
713 # has id(object) as the key.
714 self._objtable = {}
715 self._objidtable = {}
716
717 # Create list of all objects in the plist
718 self._flatten(value)
719
720 # Size of object references in serialized containers
721 # depends on the number of objects in the plist.
722 num_objects = len(self._objlist)
723 self._object_offsets = [0]*num_objects
724 self._ref_size = _count_to_size(num_objects)
725
726 self._ref_format = _BINARY_FORMAT[self._ref_size]
727
728 # Write file header
729 self._fp.write(b'bplist00')
730
731 # Write object list
732 for obj in self._objlist:
733 self._write_object(obj)
734
735 # Write refnum->object offset table
736 top_object = self._getrefnum(value)
737 offset_table_offset = self._fp.tell()
738 offset_size = _count_to_size(offset_table_offset)
739 offset_format = '>' + _BINARY_FORMAT[offset_size] * num_objects
740 self._fp.write(struct.pack(offset_format, *self._object_offsets))
741
742 # Write trailer
743 sort_version = 0
744 trailer = (
745 sort_version, offset_size, self._ref_size, num_objects,
746 top_object, offset_table_offset
747 )
748 self._fp.write(struct.pack('>5xBBBQQQ', *trailer))
749
750 def _flatten(self, value):
751 # First check if the object is in the object table, not used for
752 # containers to ensure that two subcontainers with the same contents
753 # will be serialized as distinct values.
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200754 if isinstance(value, _scalars):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100755 if (type(value), value) in self._objtable:
756 return
757
758 elif isinstance(value, Data):
759 if (type(value.data), value.data) in self._objtable:
760 return
761
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200762 elif id(value) in self._objidtable:
763 return
764
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100765 # Add to objectreference map
766 refnum = len(self._objlist)
767 self._objlist.append(value)
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200768 if isinstance(value, _scalars):
769 self._objtable[(type(value), value)] = refnum
770 elif isinstance(value, Data):
771 self._objtable[(type(value.data), value.data)] = refnum
772 else:
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100773 self._objidtable[id(value)] = refnum
774
775 # And finally recurse into containers
776 if isinstance(value, dict):
777 keys = []
778 values = []
779 items = value.items()
780 if self._sort_keys:
781 items = sorted(items)
782
783 for k, v in items:
784 if not isinstance(k, str):
785 if self._skipkeys:
786 continue
787 raise TypeError("keys must be strings")
788 keys.append(k)
789 values.append(v)
790
791 for o in itertools.chain(keys, values):
792 self._flatten(o)
793
794 elif isinstance(value, (list, tuple)):
795 for o in value:
796 self._flatten(o)
797
798 def _getrefnum(self, value):
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200799 if isinstance(value, _scalars):
800 return self._objtable[(type(value), value)]
801 elif isinstance(value, Data):
802 return self._objtable[(type(value.data), value.data)]
803 else:
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100804 return self._objidtable[id(value)]
805
806 def _write_size(self, token, size):
807 if size < 15:
808 self._fp.write(struct.pack('>B', token | size))
809
810 elif size < 1 << 8:
811 self._fp.write(struct.pack('>BBB', token | 0xF, 0x10, size))
812
813 elif size < 1 << 16:
814 self._fp.write(struct.pack('>BBH', token | 0xF, 0x11, size))
815
816 elif size < 1 << 32:
817 self._fp.write(struct.pack('>BBL', token | 0xF, 0x12, size))
818
819 else:
820 self._fp.write(struct.pack('>BBQ', token | 0xF, 0x13, size))
821
822 def _write_object(self, value):
823 ref = self._getrefnum(value)
824 self._object_offsets[ref] = self._fp.tell()
825 if value is None:
826 self._fp.write(b'\x00')
827
828 elif value is False:
829 self._fp.write(b'\x08')
830
831 elif value is True:
832 self._fp.write(b'\x09')
833
834 elif isinstance(value, int):
Ronald Oussoren6db66532014-01-15 11:32:35 +0100835 if value < 0:
836 try:
837 self._fp.write(struct.pack('>Bq', 0x13, value))
838 except struct.error:
Ronald Oussoren94e44a92014-02-06 11:19:18 +0100839 raise OverflowError(value) from None
Ronald Oussoren6db66532014-01-15 11:32:35 +0100840 elif value < 1 << 8:
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100841 self._fp.write(struct.pack('>BB', 0x10, value))
842 elif value < 1 << 16:
843 self._fp.write(struct.pack('>BH', 0x11, value))
844 elif value < 1 << 32:
845 self._fp.write(struct.pack('>BL', 0x12, value))
Ronald Oussoren94e44a92014-02-06 11:19:18 +0100846 elif value < 1 << 63:
847 self._fp.write(struct.pack('>BQ', 0x13, value))
848 elif value < 1 << 64:
849 self._fp.write(b'\x14' + value.to_bytes(16, 'big', signed=True))
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100850 else:
Ronald Oussoren94e44a92014-02-06 11:19:18 +0100851 raise OverflowError(value)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100852
853 elif isinstance(value, float):
854 self._fp.write(struct.pack('>Bd', 0x23, value))
855
856 elif isinstance(value, datetime.datetime):
857 f = (value - datetime.datetime(2001, 1, 1)).total_seconds()
858 self._fp.write(struct.pack('>Bd', 0x33, f))
859
860 elif isinstance(value, Data):
861 self._write_size(0x40, len(value.data))
862 self._fp.write(value.data)
863
864 elif isinstance(value, (bytes, bytearray)):
865 self._write_size(0x40, len(value))
866 self._fp.write(value)
867
868 elif isinstance(value, str):
869 try:
870 t = value.encode('ascii')
871 self._write_size(0x50, len(value))
872 except UnicodeEncodeError:
873 t = value.encode('utf-16be')
Serhiy Storchaka7338ebc2016-10-04 20:04:30 +0300874 self._write_size(0x60, len(t) // 2)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100875
876 self._fp.write(t)
877
878 elif isinstance(value, (list, tuple)):
879 refs = [self._getrefnum(o) for o in value]
880 s = len(refs)
881 self._write_size(0xA0, s)
882 self._fp.write(struct.pack('>' + self._ref_format * s, *refs))
883
884 elif isinstance(value, dict):
885 keyRefs, valRefs = [], []
886
887 if self._sort_keys:
888 rootItems = sorted(value.items())
889 else:
890 rootItems = value.items()
891
892 for k, v in rootItems:
893 if not isinstance(k, str):
894 if self._skipkeys:
895 continue
896 raise TypeError("keys must be strings")
897 keyRefs.append(self._getrefnum(k))
898 valRefs.append(self._getrefnum(v))
899
900 s = len(keyRefs)
901 self._write_size(0xD0, s)
902 self._fp.write(struct.pack('>' + self._ref_format * s, *keyRefs))
903 self._fp.write(struct.pack('>' + self._ref_format * s, *valRefs))
904
905 else:
Ronald Oussoren6db66532014-01-15 11:32:35 +0100906 raise TypeError(value)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100907
908
909def _is_fmt_binary(header):
910 return header[:8] == b'bplist00'
911
912
913#
914# Generic bits
915#
916
917_FORMATS={
918 FMT_XML: dict(
919 detect=_is_fmt_xml,
920 parser=_PlistParser,
921 writer=_PlistWriter,
922 ),
923 FMT_BINARY: dict(
924 detect=_is_fmt_binary,
925 parser=_BinaryPlistParser,
926 writer=_BinaryPlistWriter,
927 )
928}
929
930
931def load(fp, *, fmt=None, use_builtin_types=True, dict_type=dict):
932 """Read a .plist file. 'fp' should be (readable) file object.
933 Return the unpacked root object (which usually is a dictionary).
934 """
935 if fmt is None:
936 header = fp.read(32)
937 fp.seek(0)
938 for info in _FORMATS.values():
939 if info['detect'](header):
Serhiy Storchaka89667592014-07-23 18:49:31 +0300940 P = info['parser']
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100941 break
942
943 else:
944 raise InvalidFileException()
945
946 else:
Serhiy Storchaka89667592014-07-23 18:49:31 +0300947 P = _FORMATS[fmt]['parser']
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100948
Serhiy Storchaka89667592014-07-23 18:49:31 +0300949 p = P(use_builtin_types=use_builtin_types, dict_type=dict_type)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100950 return p.parse(fp)
951
952
953def loads(value, *, fmt=None, use_builtin_types=True, dict_type=dict):
954 """Read a .plist file from a bytes object.
955 Return the unpacked root object (which usually is a dictionary).
956 """
957 fp = BytesIO(value)
958 return load(
959 fp, fmt=fmt, use_builtin_types=use_builtin_types, dict_type=dict_type)
960
961
962def dump(value, fp, *, fmt=FMT_XML, sort_keys=True, skipkeys=False):
963 """Write 'value' to a .plist file. 'fp' should be a (writable)
964 file object.
965 """
966 if fmt not in _FORMATS:
967 raise ValueError("Unsupported format: %r"%(fmt,))
968
969 writer = _FORMATS[fmt]["writer"](fp, sort_keys=sort_keys, skipkeys=skipkeys)
970 writer.write(value)
971
972
973def dumps(value, *, fmt=FMT_XML, skipkeys=False, sort_keys=True):
974 """Return a bytes object with the contents for a .plist file.
975 """
976 fp = BytesIO()
977 dump(value, fp, fmt=fmt, skipkeys=skipkeys, sort_keys=sort_keys)
978 return fp.getvalue()