blob: 248f5143f4edf76bfa46f2f75f840f256dd77498 [file] [log] [blame]
Benjamin Petersonef3e4c22009-04-11 19:48:14 +00001r"""plistlib.py -- a tool to generate and parse MacOSX .plist files.
Christian Heimes7e182542008-01-27 15:20:13 +00002
Ezio Melotti6e9b1df2009-09-16 00:49:03 +00003The property list (.plist) file format is a simple XML pickle supporting
Christian Heimes7e182542008-01-27 15:20:13 +00004basic object types, like dictionaries, lists, numbers and strings.
5Usually the top level object is a dictionary.
6
Ronald Oussorenc5cf7972013-11-21 15:46:49 +01007To write out a plist file, use the dump(value, file)
8function. 'value' is the top level object, 'file' is
9a (writable) file object.
Christian Heimes7e182542008-01-27 15:20:13 +000010
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010011To parse a plist from a file, use the load(file) function,
12with a (readable) file object as the only argument. It
Christian Heimes7e182542008-01-27 15:20:13 +000013returns the top level object (again, usually a dictionary).
14
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010015To work with plist data in bytes objects, you can use loads()
16and dumps().
Christian Heimes7e182542008-01-27 15:20:13 +000017
18Values can be strings, integers, floats, booleans, tuples, lists,
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010019dictionaries (but only with string keys), Data, bytes, bytearray, or
20datetime.datetime objects.
Christian Heimes7e182542008-01-27 15:20:13 +000021
22Generate Plist example:
23
24 pl = dict(
Ezio Melotti6e9b1df2009-09-16 00:49:03 +000025 aString = "Doodah",
26 aList = ["A", "B", 12, 32.1, [1, 2, 3]],
Christian Heimes7e182542008-01-27 15:20:13 +000027 aFloat = 0.1,
28 anInt = 728,
Ezio Melotti6e9b1df2009-09-16 00:49:03 +000029 aDict = dict(
30 anotherString = "<hello & hi there!>",
31 aUnicodeValue = "M\xe4ssig, Ma\xdf",
32 aTrueValue = True,
33 aFalseValue = False,
Christian Heimes7e182542008-01-27 15:20:13 +000034 ),
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010035 someData = b"<binary gunk>",
36 someMoreData = b"<lots of binary gunk>" * 10,
Christian Heimes7e182542008-01-27 15:20:13 +000037 aDate = datetime.datetime.fromtimestamp(time.mktime(time.gmtime())),
38 )
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010039 with open(fileName, 'wb') as fp:
40 dump(pl, fp)
Christian Heimes7e182542008-01-27 15:20:13 +000041
42Parse Plist example:
43
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010044 with open(fileName, 'rb') as fp:
45 pl = load(fp)
46 print(pl["aKey"])
Christian Heimes7e182542008-01-27 15:20:13 +000047"""
Christian Heimes7e182542008-01-27 15:20:13 +000048__all__ = [
49 "readPlist", "writePlist", "readPlistFromBytes", "writePlistToBytes",
Serhiy Storchakaedef3582017-05-15 13:21:31 +030050 "Data", "InvalidFileException", "FMT_XML", "FMT_BINARY",
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010051 "load", "dump", "loads", "dumps"
Christian Heimes7e182542008-01-27 15:20:13 +000052]
Christian Heimes7e182542008-01-27 15:20:13 +000053
54import binascii
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010055import codecs
56import contextlib
Christian Heimes7e182542008-01-27 15:20:13 +000057import datetime
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010058import enum
Christian Heimes7e182542008-01-27 15:20:13 +000059from io import BytesIO
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010060import itertools
61import os
Christian Heimes7e182542008-01-27 15:20:13 +000062import re
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010063import struct
64from warnings import warn
65from xml.parsers.expat import ParserCreate
Christian Heimes7e182542008-01-27 15:20:13 +000066
67
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010068PlistFormat = enum.Enum('PlistFormat', 'FMT_XML FMT_BINARY', module=__name__)
69globals().update(PlistFormat.__members__)
Christian Heimes7e182542008-01-27 15:20:13 +000070
71
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010072#
73#
74# Deprecated functionality
75#
76#
Christian Heimes7e182542008-01-27 15:20:13 +000077
78
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010079@contextlib.contextmanager
80def _maybe_open(pathOrFile, mode):
81 if isinstance(pathOrFile, str):
82 with open(pathOrFile, mode) as fp:
83 yield fp
Christian Heimes7e182542008-01-27 15:20:13 +000084
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010085 else:
86 yield pathOrFile
87
88
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010089def readPlist(pathOrFile):
90 """
91 Read a .plist from a path or file. pathOrFile should either
92 be a file name, or a readable binary file object.
93
94 This function is deprecated, use load instead.
95 """
96 warn("The readPlist function is deprecated, use load() instead",
97 DeprecationWarning, 2)
98
99 with _maybe_open(pathOrFile, 'rb') as fp:
Serhiy Storchakaedef3582017-05-15 13:21:31 +0300100 return load(fp, fmt=None, use_builtin_types=False)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100101
102def writePlist(value, pathOrFile):
103 """
104 Write 'value' to a .plist file. 'pathOrFile' may either be a
105 file name or a (writable) file object.
106
107 This function is deprecated, use dump instead.
108 """
109 warn("The writePlist function is deprecated, use dump() instead",
110 DeprecationWarning, 2)
111 with _maybe_open(pathOrFile, 'wb') as fp:
112 dump(value, fp, fmt=FMT_XML, sort_keys=True, skipkeys=False)
113
114
115def readPlistFromBytes(data):
116 """
117 Read a plist data from a bytes object. Return the root object.
118
119 This function is deprecated, use loads instead.
120 """
121 warn("The readPlistFromBytes function is deprecated, use loads() instead",
122 DeprecationWarning, 2)
Serhiy Storchakaedef3582017-05-15 13:21:31 +0300123 return load(BytesIO(data), fmt=None, use_builtin_types=False)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100124
125
126def writePlistToBytes(value):
127 """
128 Return 'value' as a plist-formatted bytes object.
129
130 This function is deprecated, use dumps instead.
131 """
132 warn("The writePlistToBytes function is deprecated, use dumps() instead",
133 DeprecationWarning, 2)
134 f = BytesIO()
135 dump(value, f, fmt=FMT_XML, sort_keys=True, skipkeys=False)
136 return f.getvalue()
137
Christian Heimes7e182542008-01-27 15:20:13 +0000138
139class Data:
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100140 """
141 Wrapper for binary data.
Christian Heimes7e182542008-01-27 15:20:13 +0000142
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100143 This class is deprecated, use a bytes object instead.
144 """
Christian Heimes7e182542008-01-27 15:20:13 +0000145
146 def __init__(self, data):
147 if not isinstance(data, bytes):
148 raise TypeError("data must be as bytes")
149 self.data = data
150
151 @classmethod
152 def fromBase64(cls, data):
Georg Brandl706824f2009-06-04 09:42:55 +0000153 # base64.decodebytes just calls binascii.a2b_base64;
Christian Heimes7e182542008-01-27 15:20:13 +0000154 # it seems overkill to use both base64 and binascii.
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100155 return cls(_decode_base64(data))
Christian Heimes7e182542008-01-27 15:20:13 +0000156
157 def asBase64(self, maxlinelength=76):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100158 return _encode_base64(self.data, maxlinelength)
Christian Heimes7e182542008-01-27 15:20:13 +0000159
160 def __eq__(self, other):
161 if isinstance(other, self.__class__):
162 return self.data == other.data
Serhiy Storchakadd1bcdf2016-05-01 13:36:16 +0300163 elif isinstance(other, bytes):
Christian Heimes7e182542008-01-27 15:20:13 +0000164 return self.data == other
165 else:
Serhiy Storchakadd1bcdf2016-05-01 13:36:16 +0300166 return NotImplemented
Christian Heimes7e182542008-01-27 15:20:13 +0000167
168 def __repr__(self):
169 return "%s(%s)" % (self.__class__.__name__, repr(self.data))
170
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100171#
172#
173# End of deprecated functionality
174#
175#
Christian Heimes7e182542008-01-27 15:20:13 +0000176
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100177
178#
179# XML support
180#
181
182
183# XML 'header'
184PLISTHEADER = b"""\
185<?xml version="1.0" encoding="UTF-8"?>
186<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
187"""
188
189
190# Regex to find any control chars, except for \t \n and \r
191_controlCharPat = re.compile(
192 r"[\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0b\x0c\x0e\x0f"
193 r"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f]")
194
195def _encode_base64(s, maxlinelength=76):
196 # copied from base64.encodebytes(), with added maxlinelength argument
197 maxbinsize = (maxlinelength//4)*3
198 pieces = []
199 for i in range(0, len(s), maxbinsize):
200 chunk = s[i : i + maxbinsize]
201 pieces.append(binascii.b2a_base64(chunk))
202 return b''.join(pieces)
203
204def _decode_base64(s):
205 if isinstance(s, str):
206 return binascii.a2b_base64(s.encode("utf-8"))
207
208 else:
209 return binascii.a2b_base64(s)
210
211# Contents should conform to a subset of ISO 8601
212# (in particular, YYYY '-' MM '-' DD 'T' HH ':' MM ':' SS 'Z'. Smaller units
213# may be omitted with # a loss of precision)
214_dateParser = re.compile(r"(?P<year>\d\d\d\d)(?:-(?P<month>\d\d)(?:-(?P<day>\d\d)(?:T(?P<hour>\d\d)(?::(?P<minute>\d\d)(?::(?P<second>\d\d))?)?)?)?)?Z", re.ASCII)
215
216
217def _date_from_string(s):
218 order = ('year', 'month', 'day', 'hour', 'minute', 'second')
219 gd = _dateParser.match(s).groupdict()
220 lst = []
221 for key in order:
222 val = gd[key]
223 if val is None:
224 break
225 lst.append(int(val))
226 return datetime.datetime(*lst)
227
228
229def _date_to_string(d):
230 return '%04d-%02d-%02dT%02d:%02d:%02dZ' % (
231 d.year, d.month, d.day,
232 d.hour, d.minute, d.second
233 )
234
235def _escape(text):
236 m = _controlCharPat.search(text)
237 if m is not None:
238 raise ValueError("strings can't contains control characters; "
239 "use bytes instead")
240 text = text.replace("\r\n", "\n") # convert DOS line endings
241 text = text.replace("\r", "\n") # convert Mac line endings
242 text = text.replace("&", "&amp;") # escape '&'
243 text = text.replace("<", "&lt;") # escape '<'
244 text = text.replace(">", "&gt;") # escape '>'
245 return text
246
247class _PlistParser:
248 def __init__(self, use_builtin_types, dict_type):
Christian Heimes7e182542008-01-27 15:20:13 +0000249 self.stack = []
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100250 self.current_key = None
Christian Heimes7e182542008-01-27 15:20:13 +0000251 self.root = None
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100252 self._use_builtin_types = use_builtin_types
253 self._dict_type = dict_type
Christian Heimes7e182542008-01-27 15:20:13 +0000254
255 def parse(self, fileobj):
Ned Deilyb8e59f72011-05-28 02:19:19 -0700256 self.parser = ParserCreate()
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100257 self.parser.StartElementHandler = self.handle_begin_element
258 self.parser.EndElementHandler = self.handle_end_element
259 self.parser.CharacterDataHandler = self.handle_data
Ned Deilyb8e59f72011-05-28 02:19:19 -0700260 self.parser.ParseFile(fileobj)
Christian Heimes7e182542008-01-27 15:20:13 +0000261 return self.root
262
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100263 def handle_begin_element(self, element, attrs):
Christian Heimes7e182542008-01-27 15:20:13 +0000264 self.data = []
265 handler = getattr(self, "begin_" + element, None)
266 if handler is not None:
267 handler(attrs)
268
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100269 def handle_end_element(self, element):
Christian Heimes7e182542008-01-27 15:20:13 +0000270 handler = getattr(self, "end_" + element, None)
271 if handler is not None:
272 handler()
273
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100274 def handle_data(self, data):
Christian Heimes7e182542008-01-27 15:20:13 +0000275 self.data.append(data)
276
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100277 def add_object(self, value):
278 if self.current_key is not None:
Ned Deilyb8e59f72011-05-28 02:19:19 -0700279 if not isinstance(self.stack[-1], type({})):
280 raise ValueError("unexpected element at line %d" %
281 self.parser.CurrentLineNumber)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100282 self.stack[-1][self.current_key] = value
283 self.current_key = None
Christian Heimes7e182542008-01-27 15:20:13 +0000284 elif not self.stack:
285 # this is the root object
286 self.root = value
287 else:
Ned Deilyb8e59f72011-05-28 02:19:19 -0700288 if not isinstance(self.stack[-1], type([])):
289 raise ValueError("unexpected element at line %d" %
290 self.parser.CurrentLineNumber)
Christian Heimes7e182542008-01-27 15:20:13 +0000291 self.stack[-1].append(value)
292
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100293 def get_data(self):
Christian Heimes7e182542008-01-27 15:20:13 +0000294 data = ''.join(self.data)
295 self.data = []
296 return data
297
298 # element handlers
299
300 def begin_dict(self, attrs):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100301 d = self._dict_type()
302 self.add_object(d)
Christian Heimes7e182542008-01-27 15:20:13 +0000303 self.stack.append(d)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100304
Christian Heimes7e182542008-01-27 15:20:13 +0000305 def end_dict(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100306 if self.current_key:
Ned Deilyb8e59f72011-05-28 02:19:19 -0700307 raise ValueError("missing value for key '%s' at line %d" %
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100308 (self.current_key,self.parser.CurrentLineNumber))
Christian Heimes7e182542008-01-27 15:20:13 +0000309 self.stack.pop()
310
311 def end_key(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100312 if self.current_key or not isinstance(self.stack[-1], type({})):
Ned Deilyb8e59f72011-05-28 02:19:19 -0700313 raise ValueError("unexpected key at line %d" %
314 self.parser.CurrentLineNumber)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100315 self.current_key = self.get_data()
Christian Heimes7e182542008-01-27 15:20:13 +0000316
317 def begin_array(self, attrs):
318 a = []
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100319 self.add_object(a)
Christian Heimes7e182542008-01-27 15:20:13 +0000320 self.stack.append(a)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100321
Christian Heimes7e182542008-01-27 15:20:13 +0000322 def end_array(self):
323 self.stack.pop()
324
325 def end_true(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100326 self.add_object(True)
327
Christian Heimes7e182542008-01-27 15:20:13 +0000328 def end_false(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100329 self.add_object(False)
330
Christian Heimes7e182542008-01-27 15:20:13 +0000331 def end_integer(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100332 self.add_object(int(self.get_data()))
333
Christian Heimes7e182542008-01-27 15:20:13 +0000334 def end_real(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100335 self.add_object(float(self.get_data()))
336
Christian Heimes7e182542008-01-27 15:20:13 +0000337 def end_string(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100338 self.add_object(self.get_data())
339
Christian Heimes7e182542008-01-27 15:20:13 +0000340 def end_data(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100341 if self._use_builtin_types:
342 self.add_object(_decode_base64(self.get_data()))
343
344 else:
345 self.add_object(Data.fromBase64(self.get_data()))
346
Christian Heimes7e182542008-01-27 15:20:13 +0000347 def end_date(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100348 self.add_object(_date_from_string(self.get_data()))
349
350
351class _DumbXMLWriter:
352 def __init__(self, file, indent_level=0, indent="\t"):
353 self.file = file
354 self.stack = []
355 self._indent_level = indent_level
356 self.indent = indent
357
358 def begin_element(self, element):
359 self.stack.append(element)
360 self.writeln("<%s>" % element)
361 self._indent_level += 1
362
363 def end_element(self, element):
364 assert self._indent_level > 0
365 assert self.stack.pop() == element
366 self._indent_level -= 1
367 self.writeln("</%s>" % element)
368
369 def simple_element(self, element, value=None):
370 if value is not None:
371 value = _escape(value)
372 self.writeln("<%s>%s</%s>" % (element, value, element))
373
374 else:
375 self.writeln("<%s/>" % element)
376
377 def writeln(self, line):
378 if line:
379 # plist has fixed encoding of utf-8
380
381 # XXX: is this test needed?
382 if isinstance(line, str):
383 line = line.encode('utf-8')
384 self.file.write(self._indent_level * self.indent)
385 self.file.write(line)
386 self.file.write(b'\n')
387
388
389class _PlistWriter(_DumbXMLWriter):
390 def __init__(
391 self, file, indent_level=0, indent=b"\t", writeHeader=1,
392 sort_keys=True, skipkeys=False):
393
394 if writeHeader:
395 file.write(PLISTHEADER)
396 _DumbXMLWriter.__init__(self, file, indent_level, indent)
397 self._sort_keys = sort_keys
398 self._skipkeys = skipkeys
399
400 def write(self, value):
401 self.writeln("<plist version=\"1.0\">")
402 self.write_value(value)
403 self.writeln("</plist>")
404
405 def write_value(self, value):
406 if isinstance(value, str):
407 self.simple_element("string", value)
408
409 elif value is True:
410 self.simple_element("true")
411
412 elif value is False:
413 self.simple_element("false")
414
415 elif isinstance(value, int):
Ronald Oussoren6db66532014-01-15 11:32:35 +0100416 if -1 << 63 <= value < 1 << 64:
417 self.simple_element("integer", "%d" % value)
418 else:
419 raise OverflowError(value)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100420
421 elif isinstance(value, float):
422 self.simple_element("real", repr(value))
423
424 elif isinstance(value, dict):
425 self.write_dict(value)
426
427 elif isinstance(value, Data):
428 self.write_data(value)
429
430 elif isinstance(value, (bytes, bytearray)):
431 self.write_bytes(value)
432
433 elif isinstance(value, datetime.datetime):
434 self.simple_element("date", _date_to_string(value))
435
436 elif isinstance(value, (tuple, list)):
437 self.write_array(value)
438
439 else:
440 raise TypeError("unsupported type: %s" % type(value))
441
442 def write_data(self, data):
443 self.write_bytes(data.data)
444
445 def write_bytes(self, data):
446 self.begin_element("data")
447 self._indent_level -= 1
448 maxlinelength = max(
449 16,
450 76 - len(self.indent.replace(b"\t", b" " * 8) * self._indent_level))
451
452 for line in _encode_base64(data, maxlinelength).split(b"\n"):
453 if line:
454 self.writeln(line)
455 self._indent_level += 1
456 self.end_element("data")
457
458 def write_dict(self, d):
459 if d:
460 self.begin_element("dict")
461 if self._sort_keys:
462 items = sorted(d.items())
463 else:
464 items = d.items()
465
466 for key, value in items:
467 if not isinstance(key, str):
468 if self._skipkeys:
469 continue
470 raise TypeError("keys must be strings")
471 self.simple_element("key", key)
472 self.write_value(value)
473 self.end_element("dict")
474
475 else:
476 self.simple_element("dict")
477
478 def write_array(self, array):
479 if array:
480 self.begin_element("array")
481 for value in array:
482 self.write_value(value)
483 self.end_element("array")
484
485 else:
486 self.simple_element("array")
487
488
489def _is_fmt_xml(header):
490 prefixes = (b'<?xml', b'<plist')
491
492 for pfx in prefixes:
493 if header.startswith(pfx):
494 return True
495
496 # Also check for alternative XML encodings, this is slightly
497 # overkill because the Apple tools (and plistlib) will not
498 # generate files with these encodings.
499 for bom, encoding in (
500 (codecs.BOM_UTF8, "utf-8"),
501 (codecs.BOM_UTF16_BE, "utf-16-be"),
502 (codecs.BOM_UTF16_LE, "utf-16-le"),
503 # expat does not support utf-32
504 #(codecs.BOM_UTF32_BE, "utf-32-be"),
505 #(codecs.BOM_UTF32_LE, "utf-32-le"),
506 ):
507 if not header.startswith(bom):
508 continue
509
510 for start in prefixes:
511 prefix = bom + start.decode('ascii').encode(encoding)
512 if header[:len(prefix)] == prefix:
513 return True
514
515 return False
516
517#
518# Binary Plist
519#
520
521
522class InvalidFileException (ValueError):
523 def __init__(self, message="Invalid file"):
524 ValueError.__init__(self, message)
525
526_BINARY_FORMAT = {1: 'B', 2: 'H', 4: 'L', 8: 'Q'}
527
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200528_undefined = object()
529
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100530class _BinaryPlistParser:
531 """
532 Read or write a binary plist file, following the description of the binary
533 format. Raise InvalidFileException in case of error, otherwise return the
534 root object.
535
536 see also: http://opensource.apple.com/source/CF/CF-744.18/CFBinaryPList.c
537 """
538 def __init__(self, use_builtin_types, dict_type):
539 self._use_builtin_types = use_builtin_types
540 self._dict_type = dict_type
541
542 def parse(self, fp):
543 try:
544 # The basic file format:
545 # HEADER
546 # object...
547 # refid->offset...
548 # TRAILER
549 self._fp = fp
550 self._fp.seek(-32, os.SEEK_END)
551 trailer = self._fp.read(32)
552 if len(trailer) != 32:
553 raise InvalidFileException()
554 (
555 offset_size, self._ref_size, num_objects, top_object,
556 offset_table_offset
557 ) = struct.unpack('>6xBBQQQ', trailer)
558 self._fp.seek(offset_table_offset)
Serhiy Storchaka06526642014-05-23 16:13:33 +0300559 self._object_offsets = self._read_ints(num_objects, offset_size)
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200560 self._objects = [_undefined] * num_objects
561 return self._read_object(top_object)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100562
Serhiy Storchakadb91e0f2017-10-31 14:05:53 +0200563 except (OSError, IndexError, struct.error, OverflowError,
564 UnicodeDecodeError):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100565 raise InvalidFileException()
566
567 def _get_size(self, tokenL):
568 """ return the size of the next object."""
569 if tokenL == 0xF:
570 m = self._fp.read(1)[0] & 0x3
571 s = 1 << m
572 f = '>' + _BINARY_FORMAT[s]
573 return struct.unpack(f, self._fp.read(s))[0]
574
575 return tokenL
576
Serhiy Storchaka06526642014-05-23 16:13:33 +0300577 def _read_ints(self, n, size):
578 data = self._fp.read(size * n)
579 if size in _BINARY_FORMAT:
580 return struct.unpack('>' + _BINARY_FORMAT[size] * n, data)
581 else:
Serhiy Storchakadb91e0f2017-10-31 14:05:53 +0200582 if not size or len(data) != size * n:
583 raise InvalidFileException()
Serhiy Storchaka06526642014-05-23 16:13:33 +0300584 return tuple(int.from_bytes(data[i: i + size], 'big')
585 for i in range(0, size * n, size))
586
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100587 def _read_refs(self, n):
Serhiy Storchaka06526642014-05-23 16:13:33 +0300588 return self._read_ints(n, self._ref_size)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100589
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200590 def _read_object(self, ref):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100591 """
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200592 read the object by reference.
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100593
594 May recursively read sub-objects (content of an array/dict/set)
595 """
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200596 result = self._objects[ref]
597 if result is not _undefined:
598 return result
599
600 offset = self._object_offsets[ref]
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100601 self._fp.seek(offset)
602 token = self._fp.read(1)[0]
603 tokenH, tokenL = token & 0xF0, token & 0x0F
604
605 if token == 0x00:
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200606 result = None
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100607
608 elif token == 0x08:
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200609 result = False
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100610
611 elif token == 0x09:
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200612 result = True
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100613
614 # The referenced source code also mentions URL (0x0c, 0x0d) and
615 # UUID (0x0e), but neither can be generated using the Cocoa libraries.
616
617 elif token == 0x0f:
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200618 result = b''
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100619
620 elif tokenH == 0x10: # int
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200621 result = int.from_bytes(self._fp.read(1 << tokenL),
622 'big', signed=tokenL >= 3)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100623
624 elif token == 0x22: # real
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200625 result = struct.unpack('>f', self._fp.read(4))[0]
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100626
627 elif token == 0x23: # real
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200628 result = struct.unpack('>d', self._fp.read(8))[0]
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100629
630 elif token == 0x33: # date
631 f = struct.unpack('>d', self._fp.read(8))[0]
632 # timestamp 0 of binary plists corresponds to 1/1/2001
633 # (year of Mac OS X 10.0), instead of 1/1/1970.
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200634 result = (datetime.datetime(2001, 1, 1) +
635 datetime.timedelta(seconds=f))
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100636
637 elif tokenH == 0x40: # data
638 s = self._get_size(tokenL)
639 if self._use_builtin_types:
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200640 result = self._fp.read(s)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100641 else:
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200642 result = Data(self._fp.read(s))
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100643
644 elif tokenH == 0x50: # ascii string
645 s = self._get_size(tokenL)
646 result = self._fp.read(s).decode('ascii')
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100647
648 elif tokenH == 0x60: # unicode string
649 s = self._get_size(tokenL)
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200650 result = self._fp.read(s * 2).decode('utf-16be')
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100651
652 # tokenH == 0x80 is documented as 'UID' and appears to be used for
653 # keyed-archiving, not in plists.
654
655 elif tokenH == 0xA0: # array
656 s = self._get_size(tokenL)
657 obj_refs = self._read_refs(s)
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200658 result = []
659 self._objects[ref] = result
660 result.extend(self._read_object(x) for x in obj_refs)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100661
662 # tokenH == 0xB0 is documented as 'ordset', but is not actually
663 # implemented in the Apple reference code.
664
665 # tokenH == 0xC0 is documented as 'set', but sets cannot be used in
666 # plists.
667
668 elif tokenH == 0xD0: # dict
669 s = self._get_size(tokenL)
670 key_refs = self._read_refs(s)
671 obj_refs = self._read_refs(s)
672 result = self._dict_type()
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200673 self._objects[ref] = result
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100674 for k, o in zip(key_refs, obj_refs):
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200675 result[self._read_object(k)] = self._read_object(o)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100676
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200677 else:
678 raise InvalidFileException()
679
680 self._objects[ref] = result
681 return result
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100682
683def _count_to_size(count):
684 if count < 1 << 8:
685 return 1
686
687 elif count < 1 << 16:
688 return 2
689
690 elif count << 1 << 32:
691 return 4
692
693 else:
694 return 8
695
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200696_scalars = (str, int, float, datetime.datetime, bytes)
697
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100698class _BinaryPlistWriter (object):
699 def __init__(self, fp, sort_keys, skipkeys):
700 self._fp = fp
701 self._sort_keys = sort_keys
702 self._skipkeys = skipkeys
703
704 def write(self, value):
705
706 # Flattened object list:
707 self._objlist = []
708
709 # Mappings from object->objectid
710 # First dict has (type(object), object) as the key,
711 # second dict is used when object is not hashable and
712 # has id(object) as the key.
713 self._objtable = {}
714 self._objidtable = {}
715
716 # Create list of all objects in the plist
717 self._flatten(value)
718
719 # Size of object references in serialized containers
720 # depends on the number of objects in the plist.
721 num_objects = len(self._objlist)
722 self._object_offsets = [0]*num_objects
723 self._ref_size = _count_to_size(num_objects)
724
725 self._ref_format = _BINARY_FORMAT[self._ref_size]
726
727 # Write file header
728 self._fp.write(b'bplist00')
729
730 # Write object list
731 for obj in self._objlist:
732 self._write_object(obj)
733
734 # Write refnum->object offset table
735 top_object = self._getrefnum(value)
736 offset_table_offset = self._fp.tell()
737 offset_size = _count_to_size(offset_table_offset)
738 offset_format = '>' + _BINARY_FORMAT[offset_size] * num_objects
739 self._fp.write(struct.pack(offset_format, *self._object_offsets))
740
741 # Write trailer
742 sort_version = 0
743 trailer = (
744 sort_version, offset_size, self._ref_size, num_objects,
745 top_object, offset_table_offset
746 )
747 self._fp.write(struct.pack('>5xBBBQQQ', *trailer))
748
749 def _flatten(self, value):
750 # First check if the object is in the object table, not used for
751 # containers to ensure that two subcontainers with the same contents
752 # will be serialized as distinct values.
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200753 if isinstance(value, _scalars):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100754 if (type(value), value) in self._objtable:
755 return
756
757 elif isinstance(value, Data):
758 if (type(value.data), value.data) in self._objtable:
759 return
760
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200761 elif id(value) in self._objidtable:
762 return
763
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100764 # Add to objectreference map
765 refnum = len(self._objlist)
766 self._objlist.append(value)
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200767 if isinstance(value, _scalars):
768 self._objtable[(type(value), value)] = refnum
769 elif isinstance(value, Data):
770 self._objtable[(type(value.data), value.data)] = refnum
771 else:
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100772 self._objidtable[id(value)] = refnum
773
774 # And finally recurse into containers
775 if isinstance(value, dict):
776 keys = []
777 values = []
778 items = value.items()
779 if self._sort_keys:
780 items = sorted(items)
781
782 for k, v in items:
783 if not isinstance(k, str):
784 if self._skipkeys:
785 continue
786 raise TypeError("keys must be strings")
787 keys.append(k)
788 values.append(v)
789
790 for o in itertools.chain(keys, values):
791 self._flatten(o)
792
793 elif isinstance(value, (list, tuple)):
794 for o in value:
795 self._flatten(o)
796
797 def _getrefnum(self, value):
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200798 if isinstance(value, _scalars):
799 return self._objtable[(type(value), value)]
800 elif isinstance(value, Data):
801 return self._objtable[(type(value.data), value.data)]
802 else:
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100803 return self._objidtable[id(value)]
804
805 def _write_size(self, token, size):
806 if size < 15:
807 self._fp.write(struct.pack('>B', token | size))
808
809 elif size < 1 << 8:
810 self._fp.write(struct.pack('>BBB', token | 0xF, 0x10, size))
811
812 elif size < 1 << 16:
813 self._fp.write(struct.pack('>BBH', token | 0xF, 0x11, size))
814
815 elif size < 1 << 32:
816 self._fp.write(struct.pack('>BBL', token | 0xF, 0x12, size))
817
818 else:
819 self._fp.write(struct.pack('>BBQ', token | 0xF, 0x13, size))
820
821 def _write_object(self, value):
822 ref = self._getrefnum(value)
823 self._object_offsets[ref] = self._fp.tell()
824 if value is None:
825 self._fp.write(b'\x00')
826
827 elif value is False:
828 self._fp.write(b'\x08')
829
830 elif value is True:
831 self._fp.write(b'\x09')
832
833 elif isinstance(value, int):
Ronald Oussoren6db66532014-01-15 11:32:35 +0100834 if value < 0:
835 try:
836 self._fp.write(struct.pack('>Bq', 0x13, value))
837 except struct.error:
Ronald Oussoren94e44a92014-02-06 11:19:18 +0100838 raise OverflowError(value) from None
Ronald Oussoren6db66532014-01-15 11:32:35 +0100839 elif value < 1 << 8:
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100840 self._fp.write(struct.pack('>BB', 0x10, value))
841 elif value < 1 << 16:
842 self._fp.write(struct.pack('>BH', 0x11, value))
843 elif value < 1 << 32:
844 self._fp.write(struct.pack('>BL', 0x12, value))
Ronald Oussoren94e44a92014-02-06 11:19:18 +0100845 elif value < 1 << 63:
846 self._fp.write(struct.pack('>BQ', 0x13, value))
847 elif value < 1 << 64:
848 self._fp.write(b'\x14' + value.to_bytes(16, 'big', signed=True))
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100849 else:
Ronald Oussoren94e44a92014-02-06 11:19:18 +0100850 raise OverflowError(value)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100851
852 elif isinstance(value, float):
853 self._fp.write(struct.pack('>Bd', 0x23, value))
854
855 elif isinstance(value, datetime.datetime):
856 f = (value - datetime.datetime(2001, 1, 1)).total_seconds()
857 self._fp.write(struct.pack('>Bd', 0x33, f))
858
859 elif isinstance(value, Data):
860 self._write_size(0x40, len(value.data))
861 self._fp.write(value.data)
862
863 elif isinstance(value, (bytes, bytearray)):
864 self._write_size(0x40, len(value))
865 self._fp.write(value)
866
867 elif isinstance(value, str):
868 try:
869 t = value.encode('ascii')
870 self._write_size(0x50, len(value))
871 except UnicodeEncodeError:
872 t = value.encode('utf-16be')
Serhiy Storchaka7338ebc2016-10-04 20:04:30 +0300873 self._write_size(0x60, len(t) // 2)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100874
875 self._fp.write(t)
876
877 elif isinstance(value, (list, tuple)):
878 refs = [self._getrefnum(o) for o in value]
879 s = len(refs)
880 self._write_size(0xA0, s)
881 self._fp.write(struct.pack('>' + self._ref_format * s, *refs))
882
883 elif isinstance(value, dict):
884 keyRefs, valRefs = [], []
885
886 if self._sort_keys:
887 rootItems = sorted(value.items())
888 else:
889 rootItems = value.items()
890
891 for k, v in rootItems:
892 if not isinstance(k, str):
893 if self._skipkeys:
894 continue
895 raise TypeError("keys must be strings")
896 keyRefs.append(self._getrefnum(k))
897 valRefs.append(self._getrefnum(v))
898
899 s = len(keyRefs)
900 self._write_size(0xD0, s)
901 self._fp.write(struct.pack('>' + self._ref_format * s, *keyRefs))
902 self._fp.write(struct.pack('>' + self._ref_format * s, *valRefs))
903
904 else:
Ronald Oussoren6db66532014-01-15 11:32:35 +0100905 raise TypeError(value)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100906
907
908def _is_fmt_binary(header):
909 return header[:8] == b'bplist00'
910
911
912#
913# Generic bits
914#
915
916_FORMATS={
917 FMT_XML: dict(
918 detect=_is_fmt_xml,
919 parser=_PlistParser,
920 writer=_PlistWriter,
921 ),
922 FMT_BINARY: dict(
923 detect=_is_fmt_binary,
924 parser=_BinaryPlistParser,
925 writer=_BinaryPlistWriter,
926 )
927}
928
929
930def load(fp, *, fmt=None, use_builtin_types=True, dict_type=dict):
931 """Read a .plist file. 'fp' should be (readable) file object.
932 Return the unpacked root object (which usually is a dictionary).
933 """
934 if fmt is None:
935 header = fp.read(32)
936 fp.seek(0)
937 for info in _FORMATS.values():
938 if info['detect'](header):
Serhiy Storchaka89667592014-07-23 18:49:31 +0300939 P = info['parser']
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100940 break
941
942 else:
943 raise InvalidFileException()
944
945 else:
Serhiy Storchaka89667592014-07-23 18:49:31 +0300946 P = _FORMATS[fmt]['parser']
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100947
Serhiy Storchaka89667592014-07-23 18:49:31 +0300948 p = P(use_builtin_types=use_builtin_types, dict_type=dict_type)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100949 return p.parse(fp)
950
951
952def loads(value, *, fmt=None, use_builtin_types=True, dict_type=dict):
953 """Read a .plist file from a bytes object.
954 Return the unpacked root object (which usually is a dictionary).
955 """
956 fp = BytesIO(value)
957 return load(
958 fp, fmt=fmt, use_builtin_types=use_builtin_types, dict_type=dict_type)
959
960
961def dump(value, fp, *, fmt=FMT_XML, sort_keys=True, skipkeys=False):
962 """Write 'value' to a .plist file. 'fp' should be a (writable)
963 file object.
964 """
965 if fmt not in _FORMATS:
966 raise ValueError("Unsupported format: %r"%(fmt,))
967
968 writer = _FORMATS[fmt]["writer"](fp, sort_keys=sort_keys, skipkeys=skipkeys)
969 writer.write(value)
970
971
972def dumps(value, *, fmt=FMT_XML, skipkeys=False, sort_keys=True):
973 """Return a bytes object with the contents for a .plist file.
974 """
975 fp = BytesIO()
976 dump(value, fp, fmt=fmt, skipkeys=skipkeys, sort_keys=sort_keys)
977 return fp.getvalue()