blob: 8262fb049e5869bf36e745eaf08faf5875f5aae0 [file] [log] [blame]
Benjamin Petersonef3e4c22009-04-11 19:48:14 +00001r"""plistlib.py -- a tool to generate and parse MacOSX .plist files.
Christian Heimes7e182542008-01-27 15:20:13 +00002
Ezio Melotti6e9b1df2009-09-16 00:49:03 +00003The property list (.plist) file format is a simple XML pickle supporting
Christian Heimes7e182542008-01-27 15:20:13 +00004basic object types, like dictionaries, lists, numbers and strings.
5Usually the top level object is a dictionary.
6
Ronald Oussorenc5cf7972013-11-21 15:46:49 +01007To write out a plist file, use the dump(value, file)
8function. 'value' is the top level object, 'file' is
9a (writable) file object.
Christian Heimes7e182542008-01-27 15:20:13 +000010
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010011To parse a plist from a file, use the load(file) function,
12with a (readable) file object as the only argument. It
Christian Heimes7e182542008-01-27 15:20:13 +000013returns the top level object (again, usually a dictionary).
14
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010015To work with plist data in bytes objects, you can use loads()
16and dumps().
Christian Heimes7e182542008-01-27 15:20:13 +000017
18Values can be strings, integers, floats, booleans, tuples, lists,
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010019dictionaries (but only with string keys), Data, bytes, bytearray, or
20datetime.datetime objects.
Christian Heimes7e182542008-01-27 15:20:13 +000021
22Generate Plist example:
23
24 pl = dict(
Ezio Melotti6e9b1df2009-09-16 00:49:03 +000025 aString = "Doodah",
26 aList = ["A", "B", 12, 32.1, [1, 2, 3]],
Christian Heimes7e182542008-01-27 15:20:13 +000027 aFloat = 0.1,
28 anInt = 728,
Ezio Melotti6e9b1df2009-09-16 00:49:03 +000029 aDict = dict(
30 anotherString = "<hello & hi there!>",
31 aUnicodeValue = "M\xe4ssig, Ma\xdf",
32 aTrueValue = True,
33 aFalseValue = False,
Christian Heimes7e182542008-01-27 15:20:13 +000034 ),
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010035 someData = b"<binary gunk>",
36 someMoreData = b"<lots of binary gunk>" * 10,
Christian Heimes7e182542008-01-27 15:20:13 +000037 aDate = datetime.datetime.fromtimestamp(time.mktime(time.gmtime())),
38 )
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010039 with open(fileName, 'wb') as fp:
40 dump(pl, fp)
Christian Heimes7e182542008-01-27 15:20:13 +000041
42Parse Plist example:
43
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010044 with open(fileName, 'rb') as fp:
45 pl = load(fp)
46 print(pl["aKey"])
Christian Heimes7e182542008-01-27 15:20:13 +000047"""
Christian Heimes7e182542008-01-27 15:20:13 +000048__all__ = [
49 "readPlist", "writePlist", "readPlistFromBytes", "writePlistToBytes",
Serhiy Storchakaedef3582017-05-15 13:21:31 +030050 "Data", "InvalidFileException", "FMT_XML", "FMT_BINARY",
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010051 "load", "dump", "loads", "dumps"
Christian Heimes7e182542008-01-27 15:20:13 +000052]
Christian Heimes7e182542008-01-27 15:20:13 +000053
54import binascii
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010055import codecs
56import contextlib
Christian Heimes7e182542008-01-27 15:20:13 +000057import datetime
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010058import enum
Christian Heimes7e182542008-01-27 15:20:13 +000059from io import BytesIO
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010060import itertools
61import os
Christian Heimes7e182542008-01-27 15:20:13 +000062import re
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010063import struct
64from warnings import warn
65from xml.parsers.expat import ParserCreate
Christian Heimes7e182542008-01-27 15:20:13 +000066
67
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010068PlistFormat = enum.Enum('PlistFormat', 'FMT_XML FMT_BINARY', module=__name__)
69globals().update(PlistFormat.__members__)
Christian Heimes7e182542008-01-27 15:20:13 +000070
71
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010072#
73#
74# Deprecated functionality
75#
76#
Christian Heimes7e182542008-01-27 15:20:13 +000077
78
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010079@contextlib.contextmanager
80def _maybe_open(pathOrFile, mode):
81 if isinstance(pathOrFile, str):
82 with open(pathOrFile, mode) as fp:
83 yield fp
Christian Heimes7e182542008-01-27 15:20:13 +000084
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010085 else:
86 yield pathOrFile
87
88
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010089def readPlist(pathOrFile):
90 """
91 Read a .plist from a path or file. pathOrFile should either
92 be a file name, or a readable binary file object.
93
94 This function is deprecated, use load instead.
95 """
96 warn("The readPlist function is deprecated, use load() instead",
97 DeprecationWarning, 2)
98
99 with _maybe_open(pathOrFile, 'rb') as fp:
Serhiy Storchakaedef3582017-05-15 13:21:31 +0300100 return load(fp, fmt=None, use_builtin_types=False)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100101
102def writePlist(value, pathOrFile):
103 """
104 Write 'value' to a .plist file. 'pathOrFile' may either be a
105 file name or a (writable) file object.
106
107 This function is deprecated, use dump instead.
108 """
109 warn("The writePlist function is deprecated, use dump() instead",
110 DeprecationWarning, 2)
111 with _maybe_open(pathOrFile, 'wb') as fp:
112 dump(value, fp, fmt=FMT_XML, sort_keys=True, skipkeys=False)
113
114
115def readPlistFromBytes(data):
116 """
117 Read a plist data from a bytes object. Return the root object.
118
119 This function is deprecated, use loads instead.
120 """
121 warn("The readPlistFromBytes function is deprecated, use loads() instead",
122 DeprecationWarning, 2)
Serhiy Storchakaedef3582017-05-15 13:21:31 +0300123 return load(BytesIO(data), fmt=None, use_builtin_types=False)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100124
125
126def writePlistToBytes(value):
127 """
128 Return 'value' as a plist-formatted bytes object.
129
130 This function is deprecated, use dumps instead.
131 """
132 warn("The writePlistToBytes function is deprecated, use dumps() instead",
133 DeprecationWarning, 2)
134 f = BytesIO()
135 dump(value, f, fmt=FMT_XML, sort_keys=True, skipkeys=False)
136 return f.getvalue()
137
Christian Heimes7e182542008-01-27 15:20:13 +0000138
139class Data:
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100140 """
141 Wrapper for binary data.
Christian Heimes7e182542008-01-27 15:20:13 +0000142
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100143 This class is deprecated, use a bytes object instead.
144 """
Christian Heimes7e182542008-01-27 15:20:13 +0000145
146 def __init__(self, data):
147 if not isinstance(data, bytes):
148 raise TypeError("data must be as bytes")
149 self.data = data
150
151 @classmethod
152 def fromBase64(cls, data):
Georg Brandl706824f2009-06-04 09:42:55 +0000153 # base64.decodebytes just calls binascii.a2b_base64;
Christian Heimes7e182542008-01-27 15:20:13 +0000154 # it seems overkill to use both base64 and binascii.
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100155 return cls(_decode_base64(data))
Christian Heimes7e182542008-01-27 15:20:13 +0000156
157 def asBase64(self, maxlinelength=76):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100158 return _encode_base64(self.data, maxlinelength)
Christian Heimes7e182542008-01-27 15:20:13 +0000159
160 def __eq__(self, other):
161 if isinstance(other, self.__class__):
162 return self.data == other.data
Serhiy Storchakadd1bcdf2016-05-01 13:36:16 +0300163 elif isinstance(other, bytes):
Christian Heimes7e182542008-01-27 15:20:13 +0000164 return self.data == other
165 else:
Serhiy Storchakadd1bcdf2016-05-01 13:36:16 +0300166 return NotImplemented
Christian Heimes7e182542008-01-27 15:20:13 +0000167
168 def __repr__(self):
169 return "%s(%s)" % (self.__class__.__name__, repr(self.data))
170
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100171#
172#
173# End of deprecated functionality
174#
175#
Christian Heimes7e182542008-01-27 15:20:13 +0000176
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100177
178#
179# XML support
180#
181
182
183# XML 'header'
184PLISTHEADER = b"""\
185<?xml version="1.0" encoding="UTF-8"?>
186<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
187"""
188
189
190# Regex to find any control chars, except for \t \n and \r
191_controlCharPat = re.compile(
192 r"[\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0b\x0c\x0e\x0f"
193 r"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f]")
194
195def _encode_base64(s, maxlinelength=76):
196 # copied from base64.encodebytes(), with added maxlinelength argument
197 maxbinsize = (maxlinelength//4)*3
198 pieces = []
199 for i in range(0, len(s), maxbinsize):
200 chunk = s[i : i + maxbinsize]
201 pieces.append(binascii.b2a_base64(chunk))
202 return b''.join(pieces)
203
204def _decode_base64(s):
205 if isinstance(s, str):
206 return binascii.a2b_base64(s.encode("utf-8"))
207
208 else:
209 return binascii.a2b_base64(s)
210
211# Contents should conform to a subset of ISO 8601
212# (in particular, YYYY '-' MM '-' DD 'T' HH ':' MM ':' SS 'Z'. Smaller units
213# may be omitted with # a loss of precision)
214_dateParser = re.compile(r"(?P<year>\d\d\d\d)(?:-(?P<month>\d\d)(?:-(?P<day>\d\d)(?:T(?P<hour>\d\d)(?::(?P<minute>\d\d)(?::(?P<second>\d\d))?)?)?)?)?Z", re.ASCII)
215
216
217def _date_from_string(s):
218 order = ('year', 'month', 'day', 'hour', 'minute', 'second')
219 gd = _dateParser.match(s).groupdict()
220 lst = []
221 for key in order:
222 val = gd[key]
223 if val is None:
224 break
225 lst.append(int(val))
226 return datetime.datetime(*lst)
227
228
229def _date_to_string(d):
230 return '%04d-%02d-%02dT%02d:%02d:%02dZ' % (
231 d.year, d.month, d.day,
232 d.hour, d.minute, d.second
233 )
234
235def _escape(text):
236 m = _controlCharPat.search(text)
237 if m is not None:
238 raise ValueError("strings can't contains control characters; "
239 "use bytes instead")
240 text = text.replace("\r\n", "\n") # convert DOS line endings
241 text = text.replace("\r", "\n") # convert Mac line endings
242 text = text.replace("&", "&amp;") # escape '&'
243 text = text.replace("<", "&lt;") # escape '<'
244 text = text.replace(">", "&gt;") # escape '>'
245 return text
246
247class _PlistParser:
248 def __init__(self, use_builtin_types, dict_type):
Christian Heimes7e182542008-01-27 15:20:13 +0000249 self.stack = []
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100250 self.current_key = None
Christian Heimes7e182542008-01-27 15:20:13 +0000251 self.root = None
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100252 self._use_builtin_types = use_builtin_types
253 self._dict_type = dict_type
Christian Heimes7e182542008-01-27 15:20:13 +0000254
255 def parse(self, fileobj):
Ned Deilyb8e59f72011-05-28 02:19:19 -0700256 self.parser = ParserCreate()
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100257 self.parser.StartElementHandler = self.handle_begin_element
258 self.parser.EndElementHandler = self.handle_end_element
259 self.parser.CharacterDataHandler = self.handle_data
Ned Deilyb8e59f72011-05-28 02:19:19 -0700260 self.parser.ParseFile(fileobj)
Christian Heimes7e182542008-01-27 15:20:13 +0000261 return self.root
262
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100263 def handle_begin_element(self, element, attrs):
Christian Heimes7e182542008-01-27 15:20:13 +0000264 self.data = []
265 handler = getattr(self, "begin_" + element, None)
266 if handler is not None:
267 handler(attrs)
268
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100269 def handle_end_element(self, element):
Christian Heimes7e182542008-01-27 15:20:13 +0000270 handler = getattr(self, "end_" + element, None)
271 if handler is not None:
272 handler()
273
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100274 def handle_data(self, data):
Christian Heimes7e182542008-01-27 15:20:13 +0000275 self.data.append(data)
276
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100277 def add_object(self, value):
278 if self.current_key is not None:
Ned Deilyb8e59f72011-05-28 02:19:19 -0700279 if not isinstance(self.stack[-1], type({})):
280 raise ValueError("unexpected element at line %d" %
281 self.parser.CurrentLineNumber)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100282 self.stack[-1][self.current_key] = value
283 self.current_key = None
Christian Heimes7e182542008-01-27 15:20:13 +0000284 elif not self.stack:
285 # this is the root object
286 self.root = value
287 else:
Ned Deilyb8e59f72011-05-28 02:19:19 -0700288 if not isinstance(self.stack[-1], type([])):
289 raise ValueError("unexpected element at line %d" %
290 self.parser.CurrentLineNumber)
Christian Heimes7e182542008-01-27 15:20:13 +0000291 self.stack[-1].append(value)
292
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100293 def get_data(self):
Christian Heimes7e182542008-01-27 15:20:13 +0000294 data = ''.join(self.data)
295 self.data = []
296 return data
297
298 # element handlers
299
300 def begin_dict(self, attrs):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100301 d = self._dict_type()
302 self.add_object(d)
Christian Heimes7e182542008-01-27 15:20:13 +0000303 self.stack.append(d)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100304
Christian Heimes7e182542008-01-27 15:20:13 +0000305 def end_dict(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100306 if self.current_key:
Ned Deilyb8e59f72011-05-28 02:19:19 -0700307 raise ValueError("missing value for key '%s' at line %d" %
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100308 (self.current_key,self.parser.CurrentLineNumber))
Christian Heimes7e182542008-01-27 15:20:13 +0000309 self.stack.pop()
310
311 def end_key(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100312 if self.current_key or not isinstance(self.stack[-1], type({})):
Ned Deilyb8e59f72011-05-28 02:19:19 -0700313 raise ValueError("unexpected key at line %d" %
314 self.parser.CurrentLineNumber)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100315 self.current_key = self.get_data()
Christian Heimes7e182542008-01-27 15:20:13 +0000316
317 def begin_array(self, attrs):
318 a = []
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100319 self.add_object(a)
Christian Heimes7e182542008-01-27 15:20:13 +0000320 self.stack.append(a)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100321
Christian Heimes7e182542008-01-27 15:20:13 +0000322 def end_array(self):
323 self.stack.pop()
324
325 def end_true(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100326 self.add_object(True)
327
Christian Heimes7e182542008-01-27 15:20:13 +0000328 def end_false(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100329 self.add_object(False)
330
Christian Heimes7e182542008-01-27 15:20:13 +0000331 def end_integer(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100332 self.add_object(int(self.get_data()))
333
Christian Heimes7e182542008-01-27 15:20:13 +0000334 def end_real(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100335 self.add_object(float(self.get_data()))
336
Christian Heimes7e182542008-01-27 15:20:13 +0000337 def end_string(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100338 self.add_object(self.get_data())
339
Christian Heimes7e182542008-01-27 15:20:13 +0000340 def end_data(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100341 if self._use_builtin_types:
342 self.add_object(_decode_base64(self.get_data()))
343
344 else:
345 self.add_object(Data.fromBase64(self.get_data()))
346
Christian Heimes7e182542008-01-27 15:20:13 +0000347 def end_date(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100348 self.add_object(_date_from_string(self.get_data()))
349
350
351class _DumbXMLWriter:
352 def __init__(self, file, indent_level=0, indent="\t"):
353 self.file = file
354 self.stack = []
355 self._indent_level = indent_level
356 self.indent = indent
357
358 def begin_element(self, element):
359 self.stack.append(element)
360 self.writeln("<%s>" % element)
361 self._indent_level += 1
362
363 def end_element(self, element):
364 assert self._indent_level > 0
365 assert self.stack.pop() == element
366 self._indent_level -= 1
367 self.writeln("</%s>" % element)
368
369 def simple_element(self, element, value=None):
370 if value is not None:
371 value = _escape(value)
372 self.writeln("<%s>%s</%s>" % (element, value, element))
373
374 else:
375 self.writeln("<%s/>" % element)
376
377 def writeln(self, line):
378 if line:
379 # plist has fixed encoding of utf-8
380
381 # XXX: is this test needed?
382 if isinstance(line, str):
383 line = line.encode('utf-8')
384 self.file.write(self._indent_level * self.indent)
385 self.file.write(line)
386 self.file.write(b'\n')
387
388
389class _PlistWriter(_DumbXMLWriter):
390 def __init__(
391 self, file, indent_level=0, indent=b"\t", writeHeader=1,
392 sort_keys=True, skipkeys=False):
393
394 if writeHeader:
395 file.write(PLISTHEADER)
396 _DumbXMLWriter.__init__(self, file, indent_level, indent)
397 self._sort_keys = sort_keys
398 self._skipkeys = skipkeys
399
400 def write(self, value):
401 self.writeln("<plist version=\"1.0\">")
402 self.write_value(value)
403 self.writeln("</plist>")
404
405 def write_value(self, value):
406 if isinstance(value, str):
407 self.simple_element("string", value)
408
409 elif value is True:
410 self.simple_element("true")
411
412 elif value is False:
413 self.simple_element("false")
414
415 elif isinstance(value, int):
Ronald Oussoren6db66532014-01-15 11:32:35 +0100416 if -1 << 63 <= value < 1 << 64:
417 self.simple_element("integer", "%d" % value)
418 else:
419 raise OverflowError(value)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100420
421 elif isinstance(value, float):
422 self.simple_element("real", repr(value))
423
424 elif isinstance(value, dict):
425 self.write_dict(value)
426
427 elif isinstance(value, Data):
428 self.write_data(value)
429
430 elif isinstance(value, (bytes, bytearray)):
431 self.write_bytes(value)
432
433 elif isinstance(value, datetime.datetime):
434 self.simple_element("date", _date_to_string(value))
435
436 elif isinstance(value, (tuple, list)):
437 self.write_array(value)
438
439 else:
440 raise TypeError("unsupported type: %s" % type(value))
441
442 def write_data(self, data):
443 self.write_bytes(data.data)
444
445 def write_bytes(self, data):
446 self.begin_element("data")
447 self._indent_level -= 1
448 maxlinelength = max(
449 16,
450 76 - len(self.indent.replace(b"\t", b" " * 8) * self._indent_level))
451
452 for line in _encode_base64(data, maxlinelength).split(b"\n"):
453 if line:
454 self.writeln(line)
455 self._indent_level += 1
456 self.end_element("data")
457
458 def write_dict(self, d):
459 if d:
460 self.begin_element("dict")
461 if self._sort_keys:
462 items = sorted(d.items())
463 else:
464 items = d.items()
465
466 for key, value in items:
467 if not isinstance(key, str):
468 if self._skipkeys:
469 continue
470 raise TypeError("keys must be strings")
471 self.simple_element("key", key)
472 self.write_value(value)
473 self.end_element("dict")
474
475 else:
476 self.simple_element("dict")
477
478 def write_array(self, array):
479 if array:
480 self.begin_element("array")
481 for value in array:
482 self.write_value(value)
483 self.end_element("array")
484
485 else:
486 self.simple_element("array")
487
488
489def _is_fmt_xml(header):
490 prefixes = (b'<?xml', b'<plist')
491
492 for pfx in prefixes:
493 if header.startswith(pfx):
494 return True
495
496 # Also check for alternative XML encodings, this is slightly
497 # overkill because the Apple tools (and plistlib) will not
498 # generate files with these encodings.
499 for bom, encoding in (
500 (codecs.BOM_UTF8, "utf-8"),
501 (codecs.BOM_UTF16_BE, "utf-16-be"),
502 (codecs.BOM_UTF16_LE, "utf-16-le"),
503 # expat does not support utf-32
504 #(codecs.BOM_UTF32_BE, "utf-32-be"),
505 #(codecs.BOM_UTF32_LE, "utf-32-le"),
506 ):
507 if not header.startswith(bom):
508 continue
509
510 for start in prefixes:
511 prefix = bom + start.decode('ascii').encode(encoding)
512 if header[:len(prefix)] == prefix:
513 return True
514
515 return False
516
517#
518# Binary Plist
519#
520
521
522class InvalidFileException (ValueError):
523 def __init__(self, message="Invalid file"):
524 ValueError.__init__(self, message)
525
526_BINARY_FORMAT = {1: 'B', 2: 'H', 4: 'L', 8: 'Q'}
527
528class _BinaryPlistParser:
529 """
530 Read or write a binary plist file, following the description of the binary
531 format. Raise InvalidFileException in case of error, otherwise return the
532 root object.
533
534 see also: http://opensource.apple.com/source/CF/CF-744.18/CFBinaryPList.c
535 """
536 def __init__(self, use_builtin_types, dict_type):
537 self._use_builtin_types = use_builtin_types
538 self._dict_type = dict_type
539
540 def parse(self, fp):
541 try:
542 # The basic file format:
543 # HEADER
544 # object...
545 # refid->offset...
546 # TRAILER
547 self._fp = fp
548 self._fp.seek(-32, os.SEEK_END)
549 trailer = self._fp.read(32)
550 if len(trailer) != 32:
551 raise InvalidFileException()
552 (
553 offset_size, self._ref_size, num_objects, top_object,
554 offset_table_offset
555 ) = struct.unpack('>6xBBQQQ', trailer)
556 self._fp.seek(offset_table_offset)
Serhiy Storchaka06526642014-05-23 16:13:33 +0300557 self._object_offsets = self._read_ints(num_objects, offset_size)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100558 return self._read_object(self._object_offsets[top_object])
559
560 except (OSError, IndexError, struct.error):
561 raise InvalidFileException()
562
563 def _get_size(self, tokenL):
564 """ return the size of the next object."""
565 if tokenL == 0xF:
566 m = self._fp.read(1)[0] & 0x3
567 s = 1 << m
568 f = '>' + _BINARY_FORMAT[s]
569 return struct.unpack(f, self._fp.read(s))[0]
570
571 return tokenL
572
Serhiy Storchaka06526642014-05-23 16:13:33 +0300573 def _read_ints(self, n, size):
574 data = self._fp.read(size * n)
575 if size in _BINARY_FORMAT:
576 return struct.unpack('>' + _BINARY_FORMAT[size] * n, data)
577 else:
578 return tuple(int.from_bytes(data[i: i + size], 'big')
579 for i in range(0, size * n, size))
580
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100581 def _read_refs(self, n):
Serhiy Storchaka06526642014-05-23 16:13:33 +0300582 return self._read_ints(n, self._ref_size)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100583
584 def _read_object(self, offset):
585 """
586 read the object at offset.
587
588 May recursively read sub-objects (content of an array/dict/set)
589 """
590 self._fp.seek(offset)
591 token = self._fp.read(1)[0]
592 tokenH, tokenL = token & 0xF0, token & 0x0F
593
594 if token == 0x00:
595 return None
596
597 elif token == 0x08:
598 return False
599
600 elif token == 0x09:
601 return True
602
603 # The referenced source code also mentions URL (0x0c, 0x0d) and
604 # UUID (0x0e), but neither can be generated using the Cocoa libraries.
605
606 elif token == 0x0f:
607 return b''
608
609 elif tokenH == 0x10: # int
Ronald Oussoren6db66532014-01-15 11:32:35 +0100610 return int.from_bytes(self._fp.read(1 << tokenL),
611 'big', signed=tokenL >= 3)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100612
613 elif token == 0x22: # real
614 return struct.unpack('>f', self._fp.read(4))[0]
615
616 elif token == 0x23: # real
617 return struct.unpack('>d', self._fp.read(8))[0]
618
619 elif token == 0x33: # date
620 f = struct.unpack('>d', self._fp.read(8))[0]
621 # timestamp 0 of binary plists corresponds to 1/1/2001
622 # (year of Mac OS X 10.0), instead of 1/1/1970.
Serhiy Storchaka94ad49f2016-04-08 15:00:02 +0300623 return datetime.datetime(2001, 1, 1) + datetime.timedelta(seconds=f)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100624
625 elif tokenH == 0x40: # data
626 s = self._get_size(tokenL)
627 if self._use_builtin_types:
628 return self._fp.read(s)
629 else:
630 return Data(self._fp.read(s))
631
632 elif tokenH == 0x50: # ascii string
633 s = self._get_size(tokenL)
634 result = self._fp.read(s).decode('ascii')
635 return result
636
637 elif tokenH == 0x60: # unicode string
638 s = self._get_size(tokenL)
639 return self._fp.read(s * 2).decode('utf-16be')
640
641 # tokenH == 0x80 is documented as 'UID' and appears to be used for
642 # keyed-archiving, not in plists.
643
644 elif tokenH == 0xA0: # array
645 s = self._get_size(tokenL)
646 obj_refs = self._read_refs(s)
647 return [self._read_object(self._object_offsets[x])
648 for x in obj_refs]
649
650 # tokenH == 0xB0 is documented as 'ordset', but is not actually
651 # implemented in the Apple reference code.
652
653 # tokenH == 0xC0 is documented as 'set', but sets cannot be used in
654 # plists.
655
656 elif tokenH == 0xD0: # dict
657 s = self._get_size(tokenL)
658 key_refs = self._read_refs(s)
659 obj_refs = self._read_refs(s)
660 result = self._dict_type()
661 for k, o in zip(key_refs, obj_refs):
662 result[self._read_object(self._object_offsets[k])
663 ] = self._read_object(self._object_offsets[o])
664 return result
665
666 raise InvalidFileException()
667
668def _count_to_size(count):
669 if count < 1 << 8:
670 return 1
671
672 elif count < 1 << 16:
673 return 2
674
675 elif count << 1 << 32:
676 return 4
677
678 else:
679 return 8
680
681class _BinaryPlistWriter (object):
682 def __init__(self, fp, sort_keys, skipkeys):
683 self._fp = fp
684 self._sort_keys = sort_keys
685 self._skipkeys = skipkeys
686
687 def write(self, value):
688
689 # Flattened object list:
690 self._objlist = []
691
692 # Mappings from object->objectid
693 # First dict has (type(object), object) as the key,
694 # second dict is used when object is not hashable and
695 # has id(object) as the key.
696 self._objtable = {}
697 self._objidtable = {}
698
699 # Create list of all objects in the plist
700 self._flatten(value)
701
702 # Size of object references in serialized containers
703 # depends on the number of objects in the plist.
704 num_objects = len(self._objlist)
705 self._object_offsets = [0]*num_objects
706 self._ref_size = _count_to_size(num_objects)
707
708 self._ref_format = _BINARY_FORMAT[self._ref_size]
709
710 # Write file header
711 self._fp.write(b'bplist00')
712
713 # Write object list
714 for obj in self._objlist:
715 self._write_object(obj)
716
717 # Write refnum->object offset table
718 top_object = self._getrefnum(value)
719 offset_table_offset = self._fp.tell()
720 offset_size = _count_to_size(offset_table_offset)
721 offset_format = '>' + _BINARY_FORMAT[offset_size] * num_objects
722 self._fp.write(struct.pack(offset_format, *self._object_offsets))
723
724 # Write trailer
725 sort_version = 0
726 trailer = (
727 sort_version, offset_size, self._ref_size, num_objects,
728 top_object, offset_table_offset
729 )
730 self._fp.write(struct.pack('>5xBBBQQQ', *trailer))
731
732 def _flatten(self, value):
733 # First check if the object is in the object table, not used for
734 # containers to ensure that two subcontainers with the same contents
735 # will be serialized as distinct values.
736 if isinstance(value, (
737 str, int, float, datetime.datetime, bytes, bytearray)):
738 if (type(value), value) in self._objtable:
739 return
740
741 elif isinstance(value, Data):
742 if (type(value.data), value.data) in self._objtable:
743 return
744
745 # Add to objectreference map
746 refnum = len(self._objlist)
747 self._objlist.append(value)
748 try:
749 if isinstance(value, Data):
750 self._objtable[(type(value.data), value.data)] = refnum
751 else:
752 self._objtable[(type(value), value)] = refnum
753 except TypeError:
754 self._objidtable[id(value)] = refnum
755
756 # And finally recurse into containers
757 if isinstance(value, dict):
758 keys = []
759 values = []
760 items = value.items()
761 if self._sort_keys:
762 items = sorted(items)
763
764 for k, v in items:
765 if not isinstance(k, str):
766 if self._skipkeys:
767 continue
768 raise TypeError("keys must be strings")
769 keys.append(k)
770 values.append(v)
771
772 for o in itertools.chain(keys, values):
773 self._flatten(o)
774
775 elif isinstance(value, (list, tuple)):
776 for o in value:
777 self._flatten(o)
778
779 def _getrefnum(self, value):
780 try:
781 if isinstance(value, Data):
782 return self._objtable[(type(value.data), value.data)]
783 else:
784 return self._objtable[(type(value), value)]
785 except TypeError:
786 return self._objidtable[id(value)]
787
788 def _write_size(self, token, size):
789 if size < 15:
790 self._fp.write(struct.pack('>B', token | size))
791
792 elif size < 1 << 8:
793 self._fp.write(struct.pack('>BBB', token | 0xF, 0x10, size))
794
795 elif size < 1 << 16:
796 self._fp.write(struct.pack('>BBH', token | 0xF, 0x11, size))
797
798 elif size < 1 << 32:
799 self._fp.write(struct.pack('>BBL', token | 0xF, 0x12, size))
800
801 else:
802 self._fp.write(struct.pack('>BBQ', token | 0xF, 0x13, size))
803
804 def _write_object(self, value):
805 ref = self._getrefnum(value)
806 self._object_offsets[ref] = self._fp.tell()
807 if value is None:
808 self._fp.write(b'\x00')
809
810 elif value is False:
811 self._fp.write(b'\x08')
812
813 elif value is True:
814 self._fp.write(b'\x09')
815
816 elif isinstance(value, int):
Ronald Oussoren6db66532014-01-15 11:32:35 +0100817 if value < 0:
818 try:
819 self._fp.write(struct.pack('>Bq', 0x13, value))
820 except struct.error:
Ronald Oussoren94e44a92014-02-06 11:19:18 +0100821 raise OverflowError(value) from None
Ronald Oussoren6db66532014-01-15 11:32:35 +0100822 elif value < 1 << 8:
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100823 self._fp.write(struct.pack('>BB', 0x10, value))
824 elif value < 1 << 16:
825 self._fp.write(struct.pack('>BH', 0x11, value))
826 elif value < 1 << 32:
827 self._fp.write(struct.pack('>BL', 0x12, value))
Ronald Oussoren94e44a92014-02-06 11:19:18 +0100828 elif value < 1 << 63:
829 self._fp.write(struct.pack('>BQ', 0x13, value))
830 elif value < 1 << 64:
831 self._fp.write(b'\x14' + value.to_bytes(16, 'big', signed=True))
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100832 else:
Ronald Oussoren94e44a92014-02-06 11:19:18 +0100833 raise OverflowError(value)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100834
835 elif isinstance(value, float):
836 self._fp.write(struct.pack('>Bd', 0x23, value))
837
838 elif isinstance(value, datetime.datetime):
839 f = (value - datetime.datetime(2001, 1, 1)).total_seconds()
840 self._fp.write(struct.pack('>Bd', 0x33, f))
841
842 elif isinstance(value, Data):
843 self._write_size(0x40, len(value.data))
844 self._fp.write(value.data)
845
846 elif isinstance(value, (bytes, bytearray)):
847 self._write_size(0x40, len(value))
848 self._fp.write(value)
849
850 elif isinstance(value, str):
851 try:
852 t = value.encode('ascii')
853 self._write_size(0x50, len(value))
854 except UnicodeEncodeError:
855 t = value.encode('utf-16be')
Serhiy Storchaka7338ebc2016-10-04 20:04:30 +0300856 self._write_size(0x60, len(t) // 2)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100857
858 self._fp.write(t)
859
860 elif isinstance(value, (list, tuple)):
861 refs = [self._getrefnum(o) for o in value]
862 s = len(refs)
863 self._write_size(0xA0, s)
864 self._fp.write(struct.pack('>' + self._ref_format * s, *refs))
865
866 elif isinstance(value, dict):
867 keyRefs, valRefs = [], []
868
869 if self._sort_keys:
870 rootItems = sorted(value.items())
871 else:
872 rootItems = value.items()
873
874 for k, v in rootItems:
875 if not isinstance(k, str):
876 if self._skipkeys:
877 continue
878 raise TypeError("keys must be strings")
879 keyRefs.append(self._getrefnum(k))
880 valRefs.append(self._getrefnum(v))
881
882 s = len(keyRefs)
883 self._write_size(0xD0, s)
884 self._fp.write(struct.pack('>' + self._ref_format * s, *keyRefs))
885 self._fp.write(struct.pack('>' + self._ref_format * s, *valRefs))
886
887 else:
Ronald Oussoren6db66532014-01-15 11:32:35 +0100888 raise TypeError(value)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100889
890
891def _is_fmt_binary(header):
892 return header[:8] == b'bplist00'
893
894
895#
896# Generic bits
897#
898
899_FORMATS={
900 FMT_XML: dict(
901 detect=_is_fmt_xml,
902 parser=_PlistParser,
903 writer=_PlistWriter,
904 ),
905 FMT_BINARY: dict(
906 detect=_is_fmt_binary,
907 parser=_BinaryPlistParser,
908 writer=_BinaryPlistWriter,
909 )
910}
911
912
913def load(fp, *, fmt=None, use_builtin_types=True, dict_type=dict):
914 """Read a .plist file. 'fp' should be (readable) file object.
915 Return the unpacked root object (which usually is a dictionary).
916 """
917 if fmt is None:
918 header = fp.read(32)
919 fp.seek(0)
920 for info in _FORMATS.values():
921 if info['detect'](header):
Serhiy Storchaka89667592014-07-23 18:49:31 +0300922 P = info['parser']
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100923 break
924
925 else:
926 raise InvalidFileException()
927
928 else:
Serhiy Storchaka89667592014-07-23 18:49:31 +0300929 P = _FORMATS[fmt]['parser']
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100930
Serhiy Storchaka89667592014-07-23 18:49:31 +0300931 p = P(use_builtin_types=use_builtin_types, dict_type=dict_type)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100932 return p.parse(fp)
933
934
935def loads(value, *, fmt=None, use_builtin_types=True, dict_type=dict):
936 """Read a .plist file from a bytes object.
937 Return the unpacked root object (which usually is a dictionary).
938 """
939 fp = BytesIO(value)
940 return load(
941 fp, fmt=fmt, use_builtin_types=use_builtin_types, dict_type=dict_type)
942
943
944def dump(value, fp, *, fmt=FMT_XML, sort_keys=True, skipkeys=False):
945 """Write 'value' to a .plist file. 'fp' should be a (writable)
946 file object.
947 """
948 if fmt not in _FORMATS:
949 raise ValueError("Unsupported format: %r"%(fmt,))
950
951 writer = _FORMATS[fmt]["writer"](fp, sort_keys=sort_keys, skipkeys=skipkeys)
952 writer.write(value)
953
954
955def dumps(value, *, fmt=FMT_XML, skipkeys=False, sort_keys=True):
956 """Return a bytes object with the contents for a .plist file.
957 """
958 fp = BytesIO()
959 dump(value, fp, fmt=fmt, skipkeys=skipkeys, sort_keys=sort_keys)
960 return fp.getvalue()