blob: 2113a2dc57b4e188ef407bd6a3369b4bad90efeb [file] [log] [blame]
Benjamin Petersonef3e4c22009-04-11 19:48:14 +00001r"""plistlib.py -- a tool to generate and parse MacOSX .plist files.
Christian Heimes7e182542008-01-27 15:20:13 +00002
Ezio Melotti6e9b1df2009-09-16 00:49:03 +00003The property list (.plist) file format is a simple XML pickle supporting
Christian Heimes7e182542008-01-27 15:20:13 +00004basic object types, like dictionaries, lists, numbers and strings.
5Usually the top level object is a dictionary.
6
Ronald Oussorenc5cf7972013-11-21 15:46:49 +01007To write out a plist file, use the dump(value, file)
8function. 'value' is the top level object, 'file' is
9a (writable) file object.
Christian Heimes7e182542008-01-27 15:20:13 +000010
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010011To parse a plist from a file, use the load(file) function,
12with a (readable) file object as the only argument. It
Christian Heimes7e182542008-01-27 15:20:13 +000013returns the top level object (again, usually a dictionary).
14
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010015To work with plist data in bytes objects, you can use loads()
16and dumps().
Christian Heimes7e182542008-01-27 15:20:13 +000017
18Values can be strings, integers, floats, booleans, tuples, lists,
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010019dictionaries (but only with string keys), Data, bytes, bytearray, or
20datetime.datetime objects.
Christian Heimes7e182542008-01-27 15:20:13 +000021
22Generate Plist example:
23
24 pl = dict(
Ezio Melotti6e9b1df2009-09-16 00:49:03 +000025 aString = "Doodah",
26 aList = ["A", "B", 12, 32.1, [1, 2, 3]],
Christian Heimes7e182542008-01-27 15:20:13 +000027 aFloat = 0.1,
28 anInt = 728,
Ezio Melotti6e9b1df2009-09-16 00:49:03 +000029 aDict = dict(
30 anotherString = "<hello & hi there!>",
31 aUnicodeValue = "M\xe4ssig, Ma\xdf",
32 aTrueValue = True,
33 aFalseValue = False,
Christian Heimes7e182542008-01-27 15:20:13 +000034 ),
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010035 someData = b"<binary gunk>",
36 someMoreData = b"<lots of binary gunk>" * 10,
Christian Heimes7e182542008-01-27 15:20:13 +000037 aDate = datetime.datetime.fromtimestamp(time.mktime(time.gmtime())),
38 )
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010039 with open(fileName, 'wb') as fp:
40 dump(pl, fp)
Christian Heimes7e182542008-01-27 15:20:13 +000041
42Parse Plist example:
43
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010044 with open(fileName, 'rb') as fp:
45 pl = load(fp)
46 print(pl["aKey"])
Christian Heimes7e182542008-01-27 15:20:13 +000047"""
Christian Heimes7e182542008-01-27 15:20:13 +000048__all__ = [
49 "readPlist", "writePlist", "readPlistFromBytes", "writePlistToBytes",
Serhiy Storchakaedef3582017-05-15 13:21:31 +030050 "Data", "InvalidFileException", "FMT_XML", "FMT_BINARY",
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010051 "load", "dump", "loads", "dumps"
Christian Heimes7e182542008-01-27 15:20:13 +000052]
Christian Heimes7e182542008-01-27 15:20:13 +000053
54import binascii
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010055import codecs
56import contextlib
Christian Heimes7e182542008-01-27 15:20:13 +000057import datetime
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010058import enum
Christian Heimes7e182542008-01-27 15:20:13 +000059from io import BytesIO
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010060import itertools
61import os
Christian Heimes7e182542008-01-27 15:20:13 +000062import re
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010063import struct
64from warnings import warn
65from xml.parsers.expat import ParserCreate
Christian Heimes7e182542008-01-27 15:20:13 +000066
67
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010068PlistFormat = enum.Enum('PlistFormat', 'FMT_XML FMT_BINARY', module=__name__)
69globals().update(PlistFormat.__members__)
Christian Heimes7e182542008-01-27 15:20:13 +000070
71
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010072#
73#
74# Deprecated functionality
75#
76#
Christian Heimes7e182542008-01-27 15:20:13 +000077
78
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010079@contextlib.contextmanager
80def _maybe_open(pathOrFile, mode):
81 if isinstance(pathOrFile, str):
82 with open(pathOrFile, mode) as fp:
83 yield fp
Christian Heimes7e182542008-01-27 15:20:13 +000084
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010085 else:
86 yield pathOrFile
87
88
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010089def readPlist(pathOrFile):
90 """
91 Read a .plist from a path or file. pathOrFile should either
92 be a file name, or a readable binary file object.
93
94 This function is deprecated, use load instead.
95 """
96 warn("The readPlist function is deprecated, use load() instead",
97 DeprecationWarning, 2)
98
99 with _maybe_open(pathOrFile, 'rb') as fp:
Serhiy Storchakaedef3582017-05-15 13:21:31 +0300100 return load(fp, fmt=None, use_builtin_types=False)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100101
102def writePlist(value, pathOrFile):
103 """
104 Write 'value' to a .plist file. 'pathOrFile' may either be a
105 file name or a (writable) file object.
106
107 This function is deprecated, use dump instead.
108 """
109 warn("The writePlist function is deprecated, use dump() instead",
110 DeprecationWarning, 2)
111 with _maybe_open(pathOrFile, 'wb') as fp:
112 dump(value, fp, fmt=FMT_XML, sort_keys=True, skipkeys=False)
113
114
115def readPlistFromBytes(data):
116 """
117 Read a plist data from a bytes object. Return the root object.
118
119 This function is deprecated, use loads instead.
120 """
121 warn("The readPlistFromBytes function is deprecated, use loads() instead",
122 DeprecationWarning, 2)
Serhiy Storchakaedef3582017-05-15 13:21:31 +0300123 return load(BytesIO(data), fmt=None, use_builtin_types=False)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100124
125
126def writePlistToBytes(value):
127 """
128 Return 'value' as a plist-formatted bytes object.
129
130 This function is deprecated, use dumps instead.
131 """
132 warn("The writePlistToBytes function is deprecated, use dumps() instead",
133 DeprecationWarning, 2)
134 f = BytesIO()
135 dump(value, f, fmt=FMT_XML, sort_keys=True, skipkeys=False)
136 return f.getvalue()
137
Christian Heimes7e182542008-01-27 15:20:13 +0000138
139class Data:
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100140 """
141 Wrapper for binary data.
Christian Heimes7e182542008-01-27 15:20:13 +0000142
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100143 This class is deprecated, use a bytes object instead.
144 """
Christian Heimes7e182542008-01-27 15:20:13 +0000145
146 def __init__(self, data):
147 if not isinstance(data, bytes):
148 raise TypeError("data must be as bytes")
149 self.data = data
150
151 @classmethod
152 def fromBase64(cls, data):
Georg Brandl706824f2009-06-04 09:42:55 +0000153 # base64.decodebytes just calls binascii.a2b_base64;
Christian Heimes7e182542008-01-27 15:20:13 +0000154 # it seems overkill to use both base64 and binascii.
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100155 return cls(_decode_base64(data))
Christian Heimes7e182542008-01-27 15:20:13 +0000156
157 def asBase64(self, maxlinelength=76):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100158 return _encode_base64(self.data, maxlinelength)
Christian Heimes7e182542008-01-27 15:20:13 +0000159
160 def __eq__(self, other):
161 if isinstance(other, self.__class__):
162 return self.data == other.data
Serhiy Storchakadd1bcdf2016-05-01 13:36:16 +0300163 elif isinstance(other, bytes):
Christian Heimes7e182542008-01-27 15:20:13 +0000164 return self.data == other
165 else:
Serhiy Storchakadd1bcdf2016-05-01 13:36:16 +0300166 return NotImplemented
Christian Heimes7e182542008-01-27 15:20:13 +0000167
168 def __repr__(self):
169 return "%s(%s)" % (self.__class__.__name__, repr(self.data))
170
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100171#
172#
173# End of deprecated functionality
174#
175#
Christian Heimes7e182542008-01-27 15:20:13 +0000176
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100177
178#
179# XML support
180#
181
182
183# XML 'header'
184PLISTHEADER = b"""\
185<?xml version="1.0" encoding="UTF-8"?>
186<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
187"""
188
189
190# Regex to find any control chars, except for \t \n and \r
191_controlCharPat = re.compile(
192 r"[\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0b\x0c\x0e\x0f"
193 r"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f]")
194
195def _encode_base64(s, maxlinelength=76):
196 # copied from base64.encodebytes(), with added maxlinelength argument
197 maxbinsize = (maxlinelength//4)*3
198 pieces = []
199 for i in range(0, len(s), maxbinsize):
200 chunk = s[i : i + maxbinsize]
201 pieces.append(binascii.b2a_base64(chunk))
202 return b''.join(pieces)
203
204def _decode_base64(s):
205 if isinstance(s, str):
206 return binascii.a2b_base64(s.encode("utf-8"))
207
208 else:
209 return binascii.a2b_base64(s)
210
211# Contents should conform to a subset of ISO 8601
212# (in particular, YYYY '-' MM '-' DD 'T' HH ':' MM ':' SS 'Z'. Smaller units
213# may be omitted with # a loss of precision)
214_dateParser = re.compile(r"(?P<year>\d\d\d\d)(?:-(?P<month>\d\d)(?:-(?P<day>\d\d)(?:T(?P<hour>\d\d)(?::(?P<minute>\d\d)(?::(?P<second>\d\d))?)?)?)?)?Z", re.ASCII)
215
216
217def _date_from_string(s):
218 order = ('year', 'month', 'day', 'hour', 'minute', 'second')
219 gd = _dateParser.match(s).groupdict()
220 lst = []
221 for key in order:
222 val = gd[key]
223 if val is None:
224 break
225 lst.append(int(val))
226 return datetime.datetime(*lst)
227
228
229def _date_to_string(d):
230 return '%04d-%02d-%02dT%02d:%02d:%02dZ' % (
231 d.year, d.month, d.day,
232 d.hour, d.minute, d.second
233 )
234
235def _escape(text):
236 m = _controlCharPat.search(text)
237 if m is not None:
238 raise ValueError("strings can't contains control characters; "
239 "use bytes instead")
240 text = text.replace("\r\n", "\n") # convert DOS line endings
241 text = text.replace("\r", "\n") # convert Mac line endings
242 text = text.replace("&", "&amp;") # escape '&'
243 text = text.replace("<", "&lt;") # escape '<'
244 text = text.replace(">", "&gt;") # escape '>'
245 return text
246
247class _PlistParser:
248 def __init__(self, use_builtin_types, dict_type):
Christian Heimes7e182542008-01-27 15:20:13 +0000249 self.stack = []
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100250 self.current_key = None
Christian Heimes7e182542008-01-27 15:20:13 +0000251 self.root = None
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100252 self._use_builtin_types = use_builtin_types
253 self._dict_type = dict_type
Christian Heimes7e182542008-01-27 15:20:13 +0000254
255 def parse(self, fileobj):
Ned Deilyb8e59f72011-05-28 02:19:19 -0700256 self.parser = ParserCreate()
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100257 self.parser.StartElementHandler = self.handle_begin_element
258 self.parser.EndElementHandler = self.handle_end_element
259 self.parser.CharacterDataHandler = self.handle_data
Ned Deilyb8e59f72011-05-28 02:19:19 -0700260 self.parser.ParseFile(fileobj)
Christian Heimes7e182542008-01-27 15:20:13 +0000261 return self.root
262
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100263 def handle_begin_element(self, element, attrs):
Christian Heimes7e182542008-01-27 15:20:13 +0000264 self.data = []
265 handler = getattr(self, "begin_" + element, None)
266 if handler is not None:
267 handler(attrs)
268
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100269 def handle_end_element(self, element):
Christian Heimes7e182542008-01-27 15:20:13 +0000270 handler = getattr(self, "end_" + element, None)
271 if handler is not None:
272 handler()
273
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100274 def handle_data(self, data):
Christian Heimes7e182542008-01-27 15:20:13 +0000275 self.data.append(data)
276
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100277 def add_object(self, value):
278 if self.current_key is not None:
Ned Deilyb8e59f72011-05-28 02:19:19 -0700279 if not isinstance(self.stack[-1], type({})):
280 raise ValueError("unexpected element at line %d" %
281 self.parser.CurrentLineNumber)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100282 self.stack[-1][self.current_key] = value
283 self.current_key = None
Christian Heimes7e182542008-01-27 15:20:13 +0000284 elif not self.stack:
285 # this is the root object
286 self.root = value
287 else:
Ned Deilyb8e59f72011-05-28 02:19:19 -0700288 if not isinstance(self.stack[-1], type([])):
289 raise ValueError("unexpected element at line %d" %
290 self.parser.CurrentLineNumber)
Christian Heimes7e182542008-01-27 15:20:13 +0000291 self.stack[-1].append(value)
292
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100293 def get_data(self):
Christian Heimes7e182542008-01-27 15:20:13 +0000294 data = ''.join(self.data)
295 self.data = []
296 return data
297
298 # element handlers
299
300 def begin_dict(self, attrs):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100301 d = self._dict_type()
302 self.add_object(d)
Christian Heimes7e182542008-01-27 15:20:13 +0000303 self.stack.append(d)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100304
Christian Heimes7e182542008-01-27 15:20:13 +0000305 def end_dict(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100306 if self.current_key:
Ned Deilyb8e59f72011-05-28 02:19:19 -0700307 raise ValueError("missing value for key '%s' at line %d" %
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100308 (self.current_key,self.parser.CurrentLineNumber))
Christian Heimes7e182542008-01-27 15:20:13 +0000309 self.stack.pop()
310
311 def end_key(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100312 if self.current_key or not isinstance(self.stack[-1], type({})):
Ned Deilyb8e59f72011-05-28 02:19:19 -0700313 raise ValueError("unexpected key at line %d" %
314 self.parser.CurrentLineNumber)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100315 self.current_key = self.get_data()
Christian Heimes7e182542008-01-27 15:20:13 +0000316
317 def begin_array(self, attrs):
318 a = []
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100319 self.add_object(a)
Christian Heimes7e182542008-01-27 15:20:13 +0000320 self.stack.append(a)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100321
Christian Heimes7e182542008-01-27 15:20:13 +0000322 def end_array(self):
323 self.stack.pop()
324
325 def end_true(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100326 self.add_object(True)
327
Christian Heimes7e182542008-01-27 15:20:13 +0000328 def end_false(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100329 self.add_object(False)
330
Christian Heimes7e182542008-01-27 15:20:13 +0000331 def end_integer(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100332 self.add_object(int(self.get_data()))
333
Christian Heimes7e182542008-01-27 15:20:13 +0000334 def end_real(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100335 self.add_object(float(self.get_data()))
336
Christian Heimes7e182542008-01-27 15:20:13 +0000337 def end_string(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100338 self.add_object(self.get_data())
339
Christian Heimes7e182542008-01-27 15:20:13 +0000340 def end_data(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100341 if self._use_builtin_types:
342 self.add_object(_decode_base64(self.get_data()))
343
344 else:
345 self.add_object(Data.fromBase64(self.get_data()))
346
Christian Heimes7e182542008-01-27 15:20:13 +0000347 def end_date(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100348 self.add_object(_date_from_string(self.get_data()))
349
350
351class _DumbXMLWriter:
352 def __init__(self, file, indent_level=0, indent="\t"):
353 self.file = file
354 self.stack = []
355 self._indent_level = indent_level
356 self.indent = indent
357
358 def begin_element(self, element):
359 self.stack.append(element)
360 self.writeln("<%s>" % element)
361 self._indent_level += 1
362
363 def end_element(self, element):
364 assert self._indent_level > 0
365 assert self.stack.pop() == element
366 self._indent_level -= 1
367 self.writeln("</%s>" % element)
368
369 def simple_element(self, element, value=None):
370 if value is not None:
371 value = _escape(value)
372 self.writeln("<%s>%s</%s>" % (element, value, element))
373
374 else:
375 self.writeln("<%s/>" % element)
376
377 def writeln(self, line):
378 if line:
379 # plist has fixed encoding of utf-8
380
381 # XXX: is this test needed?
382 if isinstance(line, str):
383 line = line.encode('utf-8')
384 self.file.write(self._indent_level * self.indent)
385 self.file.write(line)
386 self.file.write(b'\n')
387
388
389class _PlistWriter(_DumbXMLWriter):
390 def __init__(
391 self, file, indent_level=0, indent=b"\t", writeHeader=1,
392 sort_keys=True, skipkeys=False):
393
394 if writeHeader:
395 file.write(PLISTHEADER)
396 _DumbXMLWriter.__init__(self, file, indent_level, indent)
397 self._sort_keys = sort_keys
398 self._skipkeys = skipkeys
399
400 def write(self, value):
401 self.writeln("<plist version=\"1.0\">")
402 self.write_value(value)
403 self.writeln("</plist>")
404
405 def write_value(self, value):
406 if isinstance(value, str):
407 self.simple_element("string", value)
408
409 elif value is True:
410 self.simple_element("true")
411
412 elif value is False:
413 self.simple_element("false")
414
415 elif isinstance(value, int):
Ronald Oussoren6db66532014-01-15 11:32:35 +0100416 if -1 << 63 <= value < 1 << 64:
417 self.simple_element("integer", "%d" % value)
418 else:
419 raise OverflowError(value)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100420
421 elif isinstance(value, float):
422 self.simple_element("real", repr(value))
423
424 elif isinstance(value, dict):
425 self.write_dict(value)
426
427 elif isinstance(value, Data):
428 self.write_data(value)
429
430 elif isinstance(value, (bytes, bytearray)):
431 self.write_bytes(value)
432
433 elif isinstance(value, datetime.datetime):
434 self.simple_element("date", _date_to_string(value))
435
436 elif isinstance(value, (tuple, list)):
437 self.write_array(value)
438
439 else:
440 raise TypeError("unsupported type: %s" % type(value))
441
442 def write_data(self, data):
443 self.write_bytes(data.data)
444
445 def write_bytes(self, data):
446 self.begin_element("data")
447 self._indent_level -= 1
448 maxlinelength = max(
449 16,
450 76 - len(self.indent.replace(b"\t", b" " * 8) * self._indent_level))
451
452 for line in _encode_base64(data, maxlinelength).split(b"\n"):
453 if line:
454 self.writeln(line)
455 self._indent_level += 1
456 self.end_element("data")
457
458 def write_dict(self, d):
459 if d:
460 self.begin_element("dict")
461 if self._sort_keys:
462 items = sorted(d.items())
463 else:
464 items = d.items()
465
466 for key, value in items:
467 if not isinstance(key, str):
468 if self._skipkeys:
469 continue
470 raise TypeError("keys must be strings")
471 self.simple_element("key", key)
472 self.write_value(value)
473 self.end_element("dict")
474
475 else:
476 self.simple_element("dict")
477
478 def write_array(self, array):
479 if array:
480 self.begin_element("array")
481 for value in array:
482 self.write_value(value)
483 self.end_element("array")
484
485 else:
486 self.simple_element("array")
487
488
489def _is_fmt_xml(header):
490 prefixes = (b'<?xml', b'<plist')
491
492 for pfx in prefixes:
493 if header.startswith(pfx):
494 return True
495
496 # Also check for alternative XML encodings, this is slightly
497 # overkill because the Apple tools (and plistlib) will not
498 # generate files with these encodings.
499 for bom, encoding in (
500 (codecs.BOM_UTF8, "utf-8"),
501 (codecs.BOM_UTF16_BE, "utf-16-be"),
502 (codecs.BOM_UTF16_LE, "utf-16-le"),
503 # expat does not support utf-32
504 #(codecs.BOM_UTF32_BE, "utf-32-be"),
505 #(codecs.BOM_UTF32_LE, "utf-32-le"),
506 ):
507 if not header.startswith(bom):
508 continue
509
510 for start in prefixes:
511 prefix = bom + start.decode('ascii').encode(encoding)
512 if header[:len(prefix)] == prefix:
513 return True
514
515 return False
516
517#
518# Binary Plist
519#
520
521
522class InvalidFileException (ValueError):
523 def __init__(self, message="Invalid file"):
524 ValueError.__init__(self, message)
525
526_BINARY_FORMAT = {1: 'B', 2: 'H', 4: 'L', 8: 'Q'}
527
528class _BinaryPlistParser:
529 """
530 Read or write a binary plist file, following the description of the binary
531 format. Raise InvalidFileException in case of error, otherwise return the
532 root object.
533
534 see also: http://opensource.apple.com/source/CF/CF-744.18/CFBinaryPList.c
535 """
536 def __init__(self, use_builtin_types, dict_type):
537 self._use_builtin_types = use_builtin_types
538 self._dict_type = dict_type
539
540 def parse(self, fp):
541 try:
542 # The basic file format:
543 # HEADER
544 # object...
545 # refid->offset...
546 # TRAILER
547 self._fp = fp
548 self._fp.seek(-32, os.SEEK_END)
549 trailer = self._fp.read(32)
550 if len(trailer) != 32:
551 raise InvalidFileException()
552 (
553 offset_size, self._ref_size, num_objects, top_object,
554 offset_table_offset
555 ) = struct.unpack('>6xBBQQQ', trailer)
556 self._fp.seek(offset_table_offset)
Serhiy Storchaka06526642014-05-23 16:13:33 +0300557 self._object_offsets = self._read_ints(num_objects, offset_size)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100558 return self._read_object(self._object_offsets[top_object])
559
Serhiy Storchakadb91e0f2017-10-31 14:05:53 +0200560 except (OSError, IndexError, struct.error, OverflowError,
561 UnicodeDecodeError):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100562 raise InvalidFileException()
563
564 def _get_size(self, tokenL):
565 """ return the size of the next object."""
566 if tokenL == 0xF:
567 m = self._fp.read(1)[0] & 0x3
568 s = 1 << m
569 f = '>' + _BINARY_FORMAT[s]
570 return struct.unpack(f, self._fp.read(s))[0]
571
572 return tokenL
573
Serhiy Storchaka06526642014-05-23 16:13:33 +0300574 def _read_ints(self, n, size):
575 data = self._fp.read(size * n)
576 if size in _BINARY_FORMAT:
577 return struct.unpack('>' + _BINARY_FORMAT[size] * n, data)
578 else:
Serhiy Storchakadb91e0f2017-10-31 14:05:53 +0200579 if not size or len(data) != size * n:
580 raise InvalidFileException()
Serhiy Storchaka06526642014-05-23 16:13:33 +0300581 return tuple(int.from_bytes(data[i: i + size], 'big')
582 for i in range(0, size * n, size))
583
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100584 def _read_refs(self, n):
Serhiy Storchaka06526642014-05-23 16:13:33 +0300585 return self._read_ints(n, self._ref_size)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100586
587 def _read_object(self, offset):
588 """
589 read the object at offset.
590
591 May recursively read sub-objects (content of an array/dict/set)
592 """
593 self._fp.seek(offset)
594 token = self._fp.read(1)[0]
595 tokenH, tokenL = token & 0xF0, token & 0x0F
596
597 if token == 0x00:
598 return None
599
600 elif token == 0x08:
601 return False
602
603 elif token == 0x09:
604 return True
605
606 # The referenced source code also mentions URL (0x0c, 0x0d) and
607 # UUID (0x0e), but neither can be generated using the Cocoa libraries.
608
609 elif token == 0x0f:
610 return b''
611
612 elif tokenH == 0x10: # int
Ronald Oussoren6db66532014-01-15 11:32:35 +0100613 return int.from_bytes(self._fp.read(1 << tokenL),
614 'big', signed=tokenL >= 3)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100615
616 elif token == 0x22: # real
617 return struct.unpack('>f', self._fp.read(4))[0]
618
619 elif token == 0x23: # real
620 return struct.unpack('>d', self._fp.read(8))[0]
621
622 elif token == 0x33: # date
623 f = struct.unpack('>d', self._fp.read(8))[0]
624 # timestamp 0 of binary plists corresponds to 1/1/2001
625 # (year of Mac OS X 10.0), instead of 1/1/1970.
Serhiy Storchaka94ad49f2016-04-08 15:00:02 +0300626 return datetime.datetime(2001, 1, 1) + datetime.timedelta(seconds=f)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100627
628 elif tokenH == 0x40: # data
629 s = self._get_size(tokenL)
630 if self._use_builtin_types:
631 return self._fp.read(s)
632 else:
633 return Data(self._fp.read(s))
634
635 elif tokenH == 0x50: # ascii string
636 s = self._get_size(tokenL)
637 result = self._fp.read(s).decode('ascii')
638 return result
639
640 elif tokenH == 0x60: # unicode string
641 s = self._get_size(tokenL)
642 return self._fp.read(s * 2).decode('utf-16be')
643
644 # tokenH == 0x80 is documented as 'UID' and appears to be used for
645 # keyed-archiving, not in plists.
646
647 elif tokenH == 0xA0: # array
648 s = self._get_size(tokenL)
649 obj_refs = self._read_refs(s)
650 return [self._read_object(self._object_offsets[x])
651 for x in obj_refs]
652
653 # tokenH == 0xB0 is documented as 'ordset', but is not actually
654 # implemented in the Apple reference code.
655
656 # tokenH == 0xC0 is documented as 'set', but sets cannot be used in
657 # plists.
658
659 elif tokenH == 0xD0: # dict
660 s = self._get_size(tokenL)
661 key_refs = self._read_refs(s)
662 obj_refs = self._read_refs(s)
663 result = self._dict_type()
664 for k, o in zip(key_refs, obj_refs):
665 result[self._read_object(self._object_offsets[k])
666 ] = self._read_object(self._object_offsets[o])
667 return result
668
669 raise InvalidFileException()
670
671def _count_to_size(count):
672 if count < 1 << 8:
673 return 1
674
675 elif count < 1 << 16:
676 return 2
677
678 elif count << 1 << 32:
679 return 4
680
681 else:
682 return 8
683
684class _BinaryPlistWriter (object):
685 def __init__(self, fp, sort_keys, skipkeys):
686 self._fp = fp
687 self._sort_keys = sort_keys
688 self._skipkeys = skipkeys
689
690 def write(self, value):
691
692 # Flattened object list:
693 self._objlist = []
694
695 # Mappings from object->objectid
696 # First dict has (type(object), object) as the key,
697 # second dict is used when object is not hashable and
698 # has id(object) as the key.
699 self._objtable = {}
700 self._objidtable = {}
701
702 # Create list of all objects in the plist
703 self._flatten(value)
704
705 # Size of object references in serialized containers
706 # depends on the number of objects in the plist.
707 num_objects = len(self._objlist)
708 self._object_offsets = [0]*num_objects
709 self._ref_size = _count_to_size(num_objects)
710
711 self._ref_format = _BINARY_FORMAT[self._ref_size]
712
713 # Write file header
714 self._fp.write(b'bplist00')
715
716 # Write object list
717 for obj in self._objlist:
718 self._write_object(obj)
719
720 # Write refnum->object offset table
721 top_object = self._getrefnum(value)
722 offset_table_offset = self._fp.tell()
723 offset_size = _count_to_size(offset_table_offset)
724 offset_format = '>' + _BINARY_FORMAT[offset_size] * num_objects
725 self._fp.write(struct.pack(offset_format, *self._object_offsets))
726
727 # Write trailer
728 sort_version = 0
729 trailer = (
730 sort_version, offset_size, self._ref_size, num_objects,
731 top_object, offset_table_offset
732 )
733 self._fp.write(struct.pack('>5xBBBQQQ', *trailer))
734
735 def _flatten(self, value):
736 # First check if the object is in the object table, not used for
737 # containers to ensure that two subcontainers with the same contents
738 # will be serialized as distinct values.
739 if isinstance(value, (
740 str, int, float, datetime.datetime, bytes, bytearray)):
741 if (type(value), value) in self._objtable:
742 return
743
744 elif isinstance(value, Data):
745 if (type(value.data), value.data) in self._objtable:
746 return
747
748 # Add to objectreference map
749 refnum = len(self._objlist)
750 self._objlist.append(value)
751 try:
752 if isinstance(value, Data):
753 self._objtable[(type(value.data), value.data)] = refnum
754 else:
755 self._objtable[(type(value), value)] = refnum
756 except TypeError:
757 self._objidtable[id(value)] = refnum
758
759 # And finally recurse into containers
760 if isinstance(value, dict):
761 keys = []
762 values = []
763 items = value.items()
764 if self._sort_keys:
765 items = sorted(items)
766
767 for k, v in items:
768 if not isinstance(k, str):
769 if self._skipkeys:
770 continue
771 raise TypeError("keys must be strings")
772 keys.append(k)
773 values.append(v)
774
775 for o in itertools.chain(keys, values):
776 self._flatten(o)
777
778 elif isinstance(value, (list, tuple)):
779 for o in value:
780 self._flatten(o)
781
782 def _getrefnum(self, value):
783 try:
784 if isinstance(value, Data):
785 return self._objtable[(type(value.data), value.data)]
786 else:
787 return self._objtable[(type(value), value)]
788 except TypeError:
789 return self._objidtable[id(value)]
790
791 def _write_size(self, token, size):
792 if size < 15:
793 self._fp.write(struct.pack('>B', token | size))
794
795 elif size < 1 << 8:
796 self._fp.write(struct.pack('>BBB', token | 0xF, 0x10, size))
797
798 elif size < 1 << 16:
799 self._fp.write(struct.pack('>BBH', token | 0xF, 0x11, size))
800
801 elif size < 1 << 32:
802 self._fp.write(struct.pack('>BBL', token | 0xF, 0x12, size))
803
804 else:
805 self._fp.write(struct.pack('>BBQ', token | 0xF, 0x13, size))
806
807 def _write_object(self, value):
808 ref = self._getrefnum(value)
809 self._object_offsets[ref] = self._fp.tell()
810 if value is None:
811 self._fp.write(b'\x00')
812
813 elif value is False:
814 self._fp.write(b'\x08')
815
816 elif value is True:
817 self._fp.write(b'\x09')
818
819 elif isinstance(value, int):
Ronald Oussoren6db66532014-01-15 11:32:35 +0100820 if value < 0:
821 try:
822 self._fp.write(struct.pack('>Bq', 0x13, value))
823 except struct.error:
Ronald Oussoren94e44a92014-02-06 11:19:18 +0100824 raise OverflowError(value) from None
Ronald Oussoren6db66532014-01-15 11:32:35 +0100825 elif value < 1 << 8:
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100826 self._fp.write(struct.pack('>BB', 0x10, value))
827 elif value < 1 << 16:
828 self._fp.write(struct.pack('>BH', 0x11, value))
829 elif value < 1 << 32:
830 self._fp.write(struct.pack('>BL', 0x12, value))
Ronald Oussoren94e44a92014-02-06 11:19:18 +0100831 elif value < 1 << 63:
832 self._fp.write(struct.pack('>BQ', 0x13, value))
833 elif value < 1 << 64:
834 self._fp.write(b'\x14' + value.to_bytes(16, 'big', signed=True))
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100835 else:
Ronald Oussoren94e44a92014-02-06 11:19:18 +0100836 raise OverflowError(value)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100837
838 elif isinstance(value, float):
839 self._fp.write(struct.pack('>Bd', 0x23, value))
840
841 elif isinstance(value, datetime.datetime):
842 f = (value - datetime.datetime(2001, 1, 1)).total_seconds()
843 self._fp.write(struct.pack('>Bd', 0x33, f))
844
845 elif isinstance(value, Data):
846 self._write_size(0x40, len(value.data))
847 self._fp.write(value.data)
848
849 elif isinstance(value, (bytes, bytearray)):
850 self._write_size(0x40, len(value))
851 self._fp.write(value)
852
853 elif isinstance(value, str):
854 try:
855 t = value.encode('ascii')
856 self._write_size(0x50, len(value))
857 except UnicodeEncodeError:
858 t = value.encode('utf-16be')
Serhiy Storchaka7338ebc2016-10-04 20:04:30 +0300859 self._write_size(0x60, len(t) // 2)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100860
861 self._fp.write(t)
862
863 elif isinstance(value, (list, tuple)):
864 refs = [self._getrefnum(o) for o in value]
865 s = len(refs)
866 self._write_size(0xA0, s)
867 self._fp.write(struct.pack('>' + self._ref_format * s, *refs))
868
869 elif isinstance(value, dict):
870 keyRefs, valRefs = [], []
871
872 if self._sort_keys:
873 rootItems = sorted(value.items())
874 else:
875 rootItems = value.items()
876
877 for k, v in rootItems:
878 if not isinstance(k, str):
879 if self._skipkeys:
880 continue
881 raise TypeError("keys must be strings")
882 keyRefs.append(self._getrefnum(k))
883 valRefs.append(self._getrefnum(v))
884
885 s = len(keyRefs)
886 self._write_size(0xD0, s)
887 self._fp.write(struct.pack('>' + self._ref_format * s, *keyRefs))
888 self._fp.write(struct.pack('>' + self._ref_format * s, *valRefs))
889
890 else:
Ronald Oussoren6db66532014-01-15 11:32:35 +0100891 raise TypeError(value)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100892
893
894def _is_fmt_binary(header):
895 return header[:8] == b'bplist00'
896
897
898#
899# Generic bits
900#
901
902_FORMATS={
903 FMT_XML: dict(
904 detect=_is_fmt_xml,
905 parser=_PlistParser,
906 writer=_PlistWriter,
907 ),
908 FMT_BINARY: dict(
909 detect=_is_fmt_binary,
910 parser=_BinaryPlistParser,
911 writer=_BinaryPlistWriter,
912 )
913}
914
915
916def load(fp, *, fmt=None, use_builtin_types=True, dict_type=dict):
917 """Read a .plist file. 'fp' should be (readable) file object.
918 Return the unpacked root object (which usually is a dictionary).
919 """
920 if fmt is None:
921 header = fp.read(32)
922 fp.seek(0)
923 for info in _FORMATS.values():
924 if info['detect'](header):
Serhiy Storchaka89667592014-07-23 18:49:31 +0300925 P = info['parser']
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100926 break
927
928 else:
929 raise InvalidFileException()
930
931 else:
Serhiy Storchaka89667592014-07-23 18:49:31 +0300932 P = _FORMATS[fmt]['parser']
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100933
Serhiy Storchaka89667592014-07-23 18:49:31 +0300934 p = P(use_builtin_types=use_builtin_types, dict_type=dict_type)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100935 return p.parse(fp)
936
937
938def loads(value, *, fmt=None, use_builtin_types=True, dict_type=dict):
939 """Read a .plist file from a bytes object.
940 Return the unpacked root object (which usually is a dictionary).
941 """
942 fp = BytesIO(value)
943 return load(
944 fp, fmt=fmt, use_builtin_types=use_builtin_types, dict_type=dict_type)
945
946
947def dump(value, fp, *, fmt=FMT_XML, sort_keys=True, skipkeys=False):
948 """Write 'value' to a .plist file. 'fp' should be a (writable)
949 file object.
950 """
951 if fmt not in _FORMATS:
952 raise ValueError("Unsupported format: %r"%(fmt,))
953
954 writer = _FORMATS[fmt]["writer"](fp, sort_keys=sort_keys, skipkeys=skipkeys)
955 writer.write(value)
956
957
958def dumps(value, *, fmt=FMT_XML, skipkeys=False, sort_keys=True):
959 """Return a bytes object with the contents for a .plist file.
960 """
961 fp = BytesIO()
962 dump(value, fp, fmt=fmt, skipkeys=skipkeys, sort_keys=sort_keys)
963 return fp.getvalue()