blob: 0133c89bdc666c65ceed328d2190c79c9493931b [file] [log] [blame]
Benjamin Petersonef3e4c22009-04-11 19:48:14 +00001r"""plistlib.py -- a tool to generate and parse MacOSX .plist files.
Christian Heimes7e182542008-01-27 15:20:13 +00002
Ezio Melotti6e9b1df2009-09-16 00:49:03 +00003The property list (.plist) file format is a simple XML pickle supporting
Christian Heimes7e182542008-01-27 15:20:13 +00004basic object types, like dictionaries, lists, numbers and strings.
5Usually the top level object is a dictionary.
6
Ronald Oussorenc5cf7972013-11-21 15:46:49 +01007To write out a plist file, use the dump(value, file)
8function. 'value' is the top level object, 'file' is
9a (writable) file object.
Christian Heimes7e182542008-01-27 15:20:13 +000010
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010011To parse a plist from a file, use the load(file) function,
12with a (readable) file object as the only argument. It
Christian Heimes7e182542008-01-27 15:20:13 +000013returns the top level object (again, usually a dictionary).
14
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010015To work with plist data in bytes objects, you can use loads()
16and dumps().
Christian Heimes7e182542008-01-27 15:20:13 +000017
18Values can be strings, integers, floats, booleans, tuples, lists,
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010019dictionaries (but only with string keys), Data, bytes, bytearray, or
20datetime.datetime objects.
Christian Heimes7e182542008-01-27 15:20:13 +000021
22Generate Plist example:
23
24 pl = dict(
Ezio Melotti6e9b1df2009-09-16 00:49:03 +000025 aString = "Doodah",
26 aList = ["A", "B", 12, 32.1, [1, 2, 3]],
Christian Heimes7e182542008-01-27 15:20:13 +000027 aFloat = 0.1,
28 anInt = 728,
Ezio Melotti6e9b1df2009-09-16 00:49:03 +000029 aDict = dict(
30 anotherString = "<hello & hi there!>",
31 aUnicodeValue = "M\xe4ssig, Ma\xdf",
32 aTrueValue = True,
33 aFalseValue = False,
Christian Heimes7e182542008-01-27 15:20:13 +000034 ),
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010035 someData = b"<binary gunk>",
36 someMoreData = b"<lots of binary gunk>" * 10,
Christian Heimes7e182542008-01-27 15:20:13 +000037 aDate = datetime.datetime.fromtimestamp(time.mktime(time.gmtime())),
38 )
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010039 with open(fileName, 'wb') as fp:
40 dump(pl, fp)
Christian Heimes7e182542008-01-27 15:20:13 +000041
42Parse Plist example:
43
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010044 with open(fileName, 'rb') as fp:
45 pl = load(fp)
46 print(pl["aKey"])
Christian Heimes7e182542008-01-27 15:20:13 +000047"""
Christian Heimes7e182542008-01-27 15:20:13 +000048__all__ = [
49 "readPlist", "writePlist", "readPlistFromBytes", "writePlistToBytes",
Serhiy Storchakaedef3582017-05-15 13:21:31 +030050 "Data", "InvalidFileException", "FMT_XML", "FMT_BINARY",
Jon Janzenc981ad12019-05-15 22:14:38 +020051 "load", "dump", "loads", "dumps", "UID"
Christian Heimes7e182542008-01-27 15:20:13 +000052]
Christian Heimes7e182542008-01-27 15:20:13 +000053
54import binascii
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010055import codecs
56import contextlib
Christian Heimes7e182542008-01-27 15:20:13 +000057import datetime
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010058import enum
Christian Heimes7e182542008-01-27 15:20:13 +000059from io import BytesIO
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010060import itertools
61import os
Christian Heimes7e182542008-01-27 15:20:13 +000062import re
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010063import struct
64from warnings import warn
65from xml.parsers.expat import ParserCreate
Christian Heimes7e182542008-01-27 15:20:13 +000066
67
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010068PlistFormat = enum.Enum('PlistFormat', 'FMT_XML FMT_BINARY', module=__name__)
69globals().update(PlistFormat.__members__)
Christian Heimes7e182542008-01-27 15:20:13 +000070
71
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010072#
73#
74# Deprecated functionality
75#
76#
Christian Heimes7e182542008-01-27 15:20:13 +000077
78
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010079@contextlib.contextmanager
80def _maybe_open(pathOrFile, mode):
81 if isinstance(pathOrFile, str):
82 with open(pathOrFile, mode) as fp:
83 yield fp
Christian Heimes7e182542008-01-27 15:20:13 +000084
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010085 else:
86 yield pathOrFile
87
88
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010089def readPlist(pathOrFile):
90 """
91 Read a .plist from a path or file. pathOrFile should either
92 be a file name, or a readable binary file object.
93
94 This function is deprecated, use load instead.
95 """
96 warn("The readPlist function is deprecated, use load() instead",
97 DeprecationWarning, 2)
98
99 with _maybe_open(pathOrFile, 'rb') as fp:
Serhiy Storchakaedef3582017-05-15 13:21:31 +0300100 return load(fp, fmt=None, use_builtin_types=False)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100101
102def writePlist(value, pathOrFile):
103 """
104 Write 'value' to a .plist file. 'pathOrFile' may either be a
105 file name or a (writable) file object.
106
107 This function is deprecated, use dump instead.
108 """
109 warn("The writePlist function is deprecated, use dump() instead",
110 DeprecationWarning, 2)
111 with _maybe_open(pathOrFile, 'wb') as fp:
112 dump(value, fp, fmt=FMT_XML, sort_keys=True, skipkeys=False)
113
114
115def readPlistFromBytes(data):
116 """
117 Read a plist data from a bytes object. Return the root object.
118
119 This function is deprecated, use loads instead.
120 """
121 warn("The readPlistFromBytes function is deprecated, use loads() instead",
122 DeprecationWarning, 2)
Serhiy Storchakaedef3582017-05-15 13:21:31 +0300123 return load(BytesIO(data), fmt=None, use_builtin_types=False)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100124
125
126def writePlistToBytes(value):
127 """
128 Return 'value' as a plist-formatted bytes object.
129
130 This function is deprecated, use dumps instead.
131 """
132 warn("The writePlistToBytes function is deprecated, use dumps() instead",
133 DeprecationWarning, 2)
134 f = BytesIO()
135 dump(value, f, fmt=FMT_XML, sort_keys=True, skipkeys=False)
136 return f.getvalue()
137
Christian Heimes7e182542008-01-27 15:20:13 +0000138
139class Data:
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100140 """
141 Wrapper for binary data.
Christian Heimes7e182542008-01-27 15:20:13 +0000142
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100143 This class is deprecated, use a bytes object instead.
144 """
Christian Heimes7e182542008-01-27 15:20:13 +0000145
146 def __init__(self, data):
147 if not isinstance(data, bytes):
148 raise TypeError("data must be as bytes")
149 self.data = data
150
151 @classmethod
152 def fromBase64(cls, data):
Georg Brandl706824f2009-06-04 09:42:55 +0000153 # base64.decodebytes just calls binascii.a2b_base64;
Christian Heimes7e182542008-01-27 15:20:13 +0000154 # it seems overkill to use both base64 and binascii.
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100155 return cls(_decode_base64(data))
Christian Heimes7e182542008-01-27 15:20:13 +0000156
157 def asBase64(self, maxlinelength=76):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100158 return _encode_base64(self.data, maxlinelength)
Christian Heimes7e182542008-01-27 15:20:13 +0000159
160 def __eq__(self, other):
161 if isinstance(other, self.__class__):
162 return self.data == other.data
Serhiy Storchakadd1bcdf2016-05-01 13:36:16 +0300163 elif isinstance(other, bytes):
Christian Heimes7e182542008-01-27 15:20:13 +0000164 return self.data == other
165 else:
Serhiy Storchakadd1bcdf2016-05-01 13:36:16 +0300166 return NotImplemented
Christian Heimes7e182542008-01-27 15:20:13 +0000167
168 def __repr__(self):
169 return "%s(%s)" % (self.__class__.__name__, repr(self.data))
170
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100171#
172#
173# End of deprecated functionality
174#
175#
Christian Heimes7e182542008-01-27 15:20:13 +0000176
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100177
Jon Janzenc981ad12019-05-15 22:14:38 +0200178class UID:
179 def __init__(self, data):
180 if not isinstance(data, int):
181 raise TypeError("data must be an int")
182 if data >= 1 << 64:
183 raise ValueError("UIDs cannot be >= 2**64")
184 if data < 0:
185 raise ValueError("UIDs must be positive")
186 self.data = data
187
188 def __index__(self):
189 return self.data
190
191 def __repr__(self):
192 return "%s(%s)" % (self.__class__.__name__, repr(self.data))
193
194 def __reduce__(self):
195 return self.__class__, (self.data,)
196
197 def __eq__(self, other):
198 if not isinstance(other, UID):
199 return NotImplemented
200 return self.data == other.data
201
202 def __hash__(self):
203 return hash(self.data)
204
205
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100206#
207# XML support
208#
209
210
211# XML 'header'
212PLISTHEADER = b"""\
213<?xml version="1.0" encoding="UTF-8"?>
214<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
215"""
216
217
218# Regex to find any control chars, except for \t \n and \r
219_controlCharPat = re.compile(
220 r"[\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0b\x0c\x0e\x0f"
221 r"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f]")
222
223def _encode_base64(s, maxlinelength=76):
224 # copied from base64.encodebytes(), with added maxlinelength argument
225 maxbinsize = (maxlinelength//4)*3
226 pieces = []
227 for i in range(0, len(s), maxbinsize):
228 chunk = s[i : i + maxbinsize]
229 pieces.append(binascii.b2a_base64(chunk))
230 return b''.join(pieces)
231
232def _decode_base64(s):
233 if isinstance(s, str):
234 return binascii.a2b_base64(s.encode("utf-8"))
235
236 else:
237 return binascii.a2b_base64(s)
238
239# Contents should conform to a subset of ISO 8601
240# (in particular, YYYY '-' MM '-' DD 'T' HH ':' MM ':' SS 'Z'. Smaller units
241# may be omitted with # a loss of precision)
242_dateParser = re.compile(r"(?P<year>\d\d\d\d)(?:-(?P<month>\d\d)(?:-(?P<day>\d\d)(?:T(?P<hour>\d\d)(?::(?P<minute>\d\d)(?::(?P<second>\d\d))?)?)?)?)?Z", re.ASCII)
243
244
245def _date_from_string(s):
246 order = ('year', 'month', 'day', 'hour', 'minute', 'second')
247 gd = _dateParser.match(s).groupdict()
248 lst = []
249 for key in order:
250 val = gd[key]
251 if val is None:
252 break
253 lst.append(int(val))
254 return datetime.datetime(*lst)
255
256
257def _date_to_string(d):
258 return '%04d-%02d-%02dT%02d:%02d:%02dZ' % (
259 d.year, d.month, d.day,
260 d.hour, d.minute, d.second
261 )
262
263def _escape(text):
264 m = _controlCharPat.search(text)
265 if m is not None:
266 raise ValueError("strings can't contains control characters; "
267 "use bytes instead")
268 text = text.replace("\r\n", "\n") # convert DOS line endings
269 text = text.replace("\r", "\n") # convert Mac line endings
270 text = text.replace("&", "&amp;") # escape '&'
271 text = text.replace("<", "&lt;") # escape '<'
272 text = text.replace(">", "&gt;") # escape '>'
273 return text
274
275class _PlistParser:
276 def __init__(self, use_builtin_types, dict_type):
Christian Heimes7e182542008-01-27 15:20:13 +0000277 self.stack = []
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100278 self.current_key = None
Christian Heimes7e182542008-01-27 15:20:13 +0000279 self.root = None
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100280 self._use_builtin_types = use_builtin_types
281 self._dict_type = dict_type
Christian Heimes7e182542008-01-27 15:20:13 +0000282
283 def parse(self, fileobj):
Ned Deilyb8e59f72011-05-28 02:19:19 -0700284 self.parser = ParserCreate()
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100285 self.parser.StartElementHandler = self.handle_begin_element
286 self.parser.EndElementHandler = self.handle_end_element
287 self.parser.CharacterDataHandler = self.handle_data
Ned Deilyb8e59f72011-05-28 02:19:19 -0700288 self.parser.ParseFile(fileobj)
Christian Heimes7e182542008-01-27 15:20:13 +0000289 return self.root
290
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100291 def handle_begin_element(self, element, attrs):
Christian Heimes7e182542008-01-27 15:20:13 +0000292 self.data = []
293 handler = getattr(self, "begin_" + element, None)
294 if handler is not None:
295 handler(attrs)
296
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100297 def handle_end_element(self, element):
Christian Heimes7e182542008-01-27 15:20:13 +0000298 handler = getattr(self, "end_" + element, None)
299 if handler is not None:
300 handler()
301
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100302 def handle_data(self, data):
Christian Heimes7e182542008-01-27 15:20:13 +0000303 self.data.append(data)
304
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100305 def add_object(self, value):
306 if self.current_key is not None:
Ned Deilyb8e59f72011-05-28 02:19:19 -0700307 if not isinstance(self.stack[-1], type({})):
308 raise ValueError("unexpected element at line %d" %
309 self.parser.CurrentLineNumber)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100310 self.stack[-1][self.current_key] = value
311 self.current_key = None
Christian Heimes7e182542008-01-27 15:20:13 +0000312 elif not self.stack:
313 # this is the root object
314 self.root = value
315 else:
Ned Deilyb8e59f72011-05-28 02:19:19 -0700316 if not isinstance(self.stack[-1], type([])):
317 raise ValueError("unexpected element at line %d" %
318 self.parser.CurrentLineNumber)
Christian Heimes7e182542008-01-27 15:20:13 +0000319 self.stack[-1].append(value)
320
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100321 def get_data(self):
Christian Heimes7e182542008-01-27 15:20:13 +0000322 data = ''.join(self.data)
323 self.data = []
324 return data
325
326 # element handlers
327
328 def begin_dict(self, attrs):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100329 d = self._dict_type()
330 self.add_object(d)
Christian Heimes7e182542008-01-27 15:20:13 +0000331 self.stack.append(d)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100332
Christian Heimes7e182542008-01-27 15:20:13 +0000333 def end_dict(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100334 if self.current_key:
Ned Deilyb8e59f72011-05-28 02:19:19 -0700335 raise ValueError("missing value for key '%s' at line %d" %
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100336 (self.current_key,self.parser.CurrentLineNumber))
Christian Heimes7e182542008-01-27 15:20:13 +0000337 self.stack.pop()
338
339 def end_key(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100340 if self.current_key or not isinstance(self.stack[-1], type({})):
Ned Deilyb8e59f72011-05-28 02:19:19 -0700341 raise ValueError("unexpected key at line %d" %
342 self.parser.CurrentLineNumber)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100343 self.current_key = self.get_data()
Christian Heimes7e182542008-01-27 15:20:13 +0000344
345 def begin_array(self, attrs):
346 a = []
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100347 self.add_object(a)
Christian Heimes7e182542008-01-27 15:20:13 +0000348 self.stack.append(a)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100349
Christian Heimes7e182542008-01-27 15:20:13 +0000350 def end_array(self):
351 self.stack.pop()
352
353 def end_true(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100354 self.add_object(True)
355
Christian Heimes7e182542008-01-27 15:20:13 +0000356 def end_false(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100357 self.add_object(False)
358
Christian Heimes7e182542008-01-27 15:20:13 +0000359 def end_integer(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100360 self.add_object(int(self.get_data()))
361
Christian Heimes7e182542008-01-27 15:20:13 +0000362 def end_real(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100363 self.add_object(float(self.get_data()))
364
Christian Heimes7e182542008-01-27 15:20:13 +0000365 def end_string(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100366 self.add_object(self.get_data())
367
Christian Heimes7e182542008-01-27 15:20:13 +0000368 def end_data(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100369 if self._use_builtin_types:
370 self.add_object(_decode_base64(self.get_data()))
371
372 else:
373 self.add_object(Data.fromBase64(self.get_data()))
374
Christian Heimes7e182542008-01-27 15:20:13 +0000375 def end_date(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100376 self.add_object(_date_from_string(self.get_data()))
377
378
379class _DumbXMLWriter:
380 def __init__(self, file, indent_level=0, indent="\t"):
381 self.file = file
382 self.stack = []
383 self._indent_level = indent_level
384 self.indent = indent
385
386 def begin_element(self, element):
387 self.stack.append(element)
388 self.writeln("<%s>" % element)
389 self._indent_level += 1
390
391 def end_element(self, element):
392 assert self._indent_level > 0
393 assert self.stack.pop() == element
394 self._indent_level -= 1
395 self.writeln("</%s>" % element)
396
397 def simple_element(self, element, value=None):
398 if value is not None:
399 value = _escape(value)
400 self.writeln("<%s>%s</%s>" % (element, value, element))
401
402 else:
403 self.writeln("<%s/>" % element)
404
405 def writeln(self, line):
406 if line:
407 # plist has fixed encoding of utf-8
408
409 # XXX: is this test needed?
410 if isinstance(line, str):
411 line = line.encode('utf-8')
412 self.file.write(self._indent_level * self.indent)
413 self.file.write(line)
414 self.file.write(b'\n')
415
416
417class _PlistWriter(_DumbXMLWriter):
418 def __init__(
419 self, file, indent_level=0, indent=b"\t", writeHeader=1,
420 sort_keys=True, skipkeys=False):
421
422 if writeHeader:
423 file.write(PLISTHEADER)
424 _DumbXMLWriter.__init__(self, file, indent_level, indent)
425 self._sort_keys = sort_keys
426 self._skipkeys = skipkeys
427
428 def write(self, value):
429 self.writeln("<plist version=\"1.0\">")
430 self.write_value(value)
431 self.writeln("</plist>")
432
433 def write_value(self, value):
434 if isinstance(value, str):
435 self.simple_element("string", value)
436
437 elif value is True:
438 self.simple_element("true")
439
440 elif value is False:
441 self.simple_element("false")
442
443 elif isinstance(value, int):
Ronald Oussoren6db66532014-01-15 11:32:35 +0100444 if -1 << 63 <= value < 1 << 64:
445 self.simple_element("integer", "%d" % value)
446 else:
447 raise OverflowError(value)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100448
449 elif isinstance(value, float):
450 self.simple_element("real", repr(value))
451
452 elif isinstance(value, dict):
453 self.write_dict(value)
454
455 elif isinstance(value, Data):
456 self.write_data(value)
457
458 elif isinstance(value, (bytes, bytearray)):
459 self.write_bytes(value)
460
461 elif isinstance(value, datetime.datetime):
462 self.simple_element("date", _date_to_string(value))
463
464 elif isinstance(value, (tuple, list)):
465 self.write_array(value)
466
467 else:
468 raise TypeError("unsupported type: %s" % type(value))
469
470 def write_data(self, data):
471 self.write_bytes(data.data)
472
473 def write_bytes(self, data):
474 self.begin_element("data")
475 self._indent_level -= 1
476 maxlinelength = max(
477 16,
478 76 - len(self.indent.replace(b"\t", b" " * 8) * self._indent_level))
479
480 for line in _encode_base64(data, maxlinelength).split(b"\n"):
481 if line:
482 self.writeln(line)
483 self._indent_level += 1
484 self.end_element("data")
485
486 def write_dict(self, d):
487 if d:
488 self.begin_element("dict")
489 if self._sort_keys:
490 items = sorted(d.items())
491 else:
492 items = d.items()
493
494 for key, value in items:
495 if not isinstance(key, str):
496 if self._skipkeys:
497 continue
498 raise TypeError("keys must be strings")
499 self.simple_element("key", key)
500 self.write_value(value)
501 self.end_element("dict")
502
503 else:
504 self.simple_element("dict")
505
506 def write_array(self, array):
507 if array:
508 self.begin_element("array")
509 for value in array:
510 self.write_value(value)
511 self.end_element("array")
512
513 else:
514 self.simple_element("array")
515
516
517def _is_fmt_xml(header):
518 prefixes = (b'<?xml', b'<plist')
519
520 for pfx in prefixes:
521 if header.startswith(pfx):
522 return True
523
524 # Also check for alternative XML encodings, this is slightly
525 # overkill because the Apple tools (and plistlib) will not
526 # generate files with these encodings.
527 for bom, encoding in (
528 (codecs.BOM_UTF8, "utf-8"),
529 (codecs.BOM_UTF16_BE, "utf-16-be"),
530 (codecs.BOM_UTF16_LE, "utf-16-le"),
531 # expat does not support utf-32
532 #(codecs.BOM_UTF32_BE, "utf-32-be"),
533 #(codecs.BOM_UTF32_LE, "utf-32-le"),
534 ):
535 if not header.startswith(bom):
536 continue
537
538 for start in prefixes:
539 prefix = bom + start.decode('ascii').encode(encoding)
540 if header[:len(prefix)] == prefix:
541 return True
542
543 return False
544
545#
546# Binary Plist
547#
548
549
550class InvalidFileException (ValueError):
551 def __init__(self, message="Invalid file"):
552 ValueError.__init__(self, message)
553
554_BINARY_FORMAT = {1: 'B', 2: 'H', 4: 'L', 8: 'Q'}
555
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200556_undefined = object()
557
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100558class _BinaryPlistParser:
559 """
560 Read or write a binary plist file, following the description of the binary
561 format. Raise InvalidFileException in case of error, otherwise return the
562 root object.
563
564 see also: http://opensource.apple.com/source/CF/CF-744.18/CFBinaryPList.c
565 """
566 def __init__(self, use_builtin_types, dict_type):
567 self._use_builtin_types = use_builtin_types
568 self._dict_type = dict_type
569
570 def parse(self, fp):
571 try:
572 # The basic file format:
573 # HEADER
574 # object...
575 # refid->offset...
576 # TRAILER
577 self._fp = fp
578 self._fp.seek(-32, os.SEEK_END)
579 trailer = self._fp.read(32)
580 if len(trailer) != 32:
581 raise InvalidFileException()
582 (
583 offset_size, self._ref_size, num_objects, top_object,
584 offset_table_offset
585 ) = struct.unpack('>6xBBQQQ', trailer)
586 self._fp.seek(offset_table_offset)
Serhiy Storchaka06526642014-05-23 16:13:33 +0300587 self._object_offsets = self._read_ints(num_objects, offset_size)
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200588 self._objects = [_undefined] * num_objects
589 return self._read_object(top_object)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100590
Serhiy Storchakadb91e0f2017-10-31 14:05:53 +0200591 except (OSError, IndexError, struct.error, OverflowError,
592 UnicodeDecodeError):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100593 raise InvalidFileException()
594
595 def _get_size(self, tokenL):
596 """ return the size of the next object."""
597 if tokenL == 0xF:
598 m = self._fp.read(1)[0] & 0x3
599 s = 1 << m
600 f = '>' + _BINARY_FORMAT[s]
601 return struct.unpack(f, self._fp.read(s))[0]
602
603 return tokenL
604
Serhiy Storchaka06526642014-05-23 16:13:33 +0300605 def _read_ints(self, n, size):
606 data = self._fp.read(size * n)
607 if size in _BINARY_FORMAT:
608 return struct.unpack('>' + _BINARY_FORMAT[size] * n, data)
609 else:
Serhiy Storchakadb91e0f2017-10-31 14:05:53 +0200610 if not size or len(data) != size * n:
611 raise InvalidFileException()
Serhiy Storchaka06526642014-05-23 16:13:33 +0300612 return tuple(int.from_bytes(data[i: i + size], 'big')
613 for i in range(0, size * n, size))
614
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100615 def _read_refs(self, n):
Serhiy Storchaka06526642014-05-23 16:13:33 +0300616 return self._read_ints(n, self._ref_size)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100617
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200618 def _read_object(self, ref):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100619 """
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200620 read the object by reference.
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100621
622 May recursively read sub-objects (content of an array/dict/set)
623 """
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200624 result = self._objects[ref]
625 if result is not _undefined:
626 return result
627
628 offset = self._object_offsets[ref]
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100629 self._fp.seek(offset)
630 token = self._fp.read(1)[0]
631 tokenH, tokenL = token & 0xF0, token & 0x0F
632
633 if token == 0x00:
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200634 result = None
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100635
636 elif token == 0x08:
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200637 result = False
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100638
639 elif token == 0x09:
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200640 result = True
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100641
642 # The referenced source code also mentions URL (0x0c, 0x0d) and
643 # UUID (0x0e), but neither can be generated using the Cocoa libraries.
644
645 elif token == 0x0f:
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200646 result = b''
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100647
648 elif tokenH == 0x10: # int
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200649 result = int.from_bytes(self._fp.read(1 << tokenL),
650 'big', signed=tokenL >= 3)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100651
652 elif token == 0x22: # real
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200653 result = struct.unpack('>f', self._fp.read(4))[0]
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100654
655 elif token == 0x23: # real
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200656 result = struct.unpack('>d', self._fp.read(8))[0]
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100657
658 elif token == 0x33: # date
659 f = struct.unpack('>d', self._fp.read(8))[0]
660 # timestamp 0 of binary plists corresponds to 1/1/2001
661 # (year of Mac OS X 10.0), instead of 1/1/1970.
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200662 result = (datetime.datetime(2001, 1, 1) +
663 datetime.timedelta(seconds=f))
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100664
665 elif tokenH == 0x40: # data
666 s = self._get_size(tokenL)
667 if self._use_builtin_types:
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200668 result = self._fp.read(s)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100669 else:
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200670 result = Data(self._fp.read(s))
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100671
672 elif tokenH == 0x50: # ascii string
673 s = self._get_size(tokenL)
674 result = self._fp.read(s).decode('ascii')
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100675
676 elif tokenH == 0x60: # unicode string
677 s = self._get_size(tokenL)
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200678 result = self._fp.read(s * 2).decode('utf-16be')
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100679
Jon Janzenc981ad12019-05-15 22:14:38 +0200680 elif tokenH == 0x80: # UID
681 # used by Key-Archiver plist files
682 result = UID(int.from_bytes(self._fp.read(1 + tokenL), 'big'))
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100683
684 elif tokenH == 0xA0: # array
685 s = self._get_size(tokenL)
686 obj_refs = self._read_refs(s)
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200687 result = []
688 self._objects[ref] = result
689 result.extend(self._read_object(x) for x in obj_refs)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100690
691 # tokenH == 0xB0 is documented as 'ordset', but is not actually
692 # implemented in the Apple reference code.
693
694 # tokenH == 0xC0 is documented as 'set', but sets cannot be used in
695 # plists.
696
697 elif tokenH == 0xD0: # dict
698 s = self._get_size(tokenL)
699 key_refs = self._read_refs(s)
700 obj_refs = self._read_refs(s)
701 result = self._dict_type()
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200702 self._objects[ref] = result
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100703 for k, o in zip(key_refs, obj_refs):
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200704 result[self._read_object(k)] = self._read_object(o)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100705
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200706 else:
707 raise InvalidFileException()
708
709 self._objects[ref] = result
710 return result
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100711
712def _count_to_size(count):
713 if count < 1 << 8:
714 return 1
715
716 elif count < 1 << 16:
717 return 2
718
719 elif count << 1 << 32:
720 return 4
721
722 else:
723 return 8
724
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200725_scalars = (str, int, float, datetime.datetime, bytes)
726
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100727class _BinaryPlistWriter (object):
728 def __init__(self, fp, sort_keys, skipkeys):
729 self._fp = fp
730 self._sort_keys = sort_keys
731 self._skipkeys = skipkeys
732
733 def write(self, value):
734
735 # Flattened object list:
736 self._objlist = []
737
738 # Mappings from object->objectid
739 # First dict has (type(object), object) as the key,
740 # second dict is used when object is not hashable and
741 # has id(object) as the key.
742 self._objtable = {}
743 self._objidtable = {}
744
745 # Create list of all objects in the plist
746 self._flatten(value)
747
748 # Size of object references in serialized containers
749 # depends on the number of objects in the plist.
750 num_objects = len(self._objlist)
751 self._object_offsets = [0]*num_objects
752 self._ref_size = _count_to_size(num_objects)
753
754 self._ref_format = _BINARY_FORMAT[self._ref_size]
755
756 # Write file header
757 self._fp.write(b'bplist00')
758
759 # Write object list
760 for obj in self._objlist:
761 self._write_object(obj)
762
763 # Write refnum->object offset table
764 top_object = self._getrefnum(value)
765 offset_table_offset = self._fp.tell()
766 offset_size = _count_to_size(offset_table_offset)
767 offset_format = '>' + _BINARY_FORMAT[offset_size] * num_objects
768 self._fp.write(struct.pack(offset_format, *self._object_offsets))
769
770 # Write trailer
771 sort_version = 0
772 trailer = (
773 sort_version, offset_size, self._ref_size, num_objects,
774 top_object, offset_table_offset
775 )
776 self._fp.write(struct.pack('>5xBBBQQQ', *trailer))
777
778 def _flatten(self, value):
779 # First check if the object is in the object table, not used for
780 # containers to ensure that two subcontainers with the same contents
781 # will be serialized as distinct values.
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200782 if isinstance(value, _scalars):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100783 if (type(value), value) in self._objtable:
784 return
785
786 elif isinstance(value, Data):
787 if (type(value.data), value.data) in self._objtable:
788 return
789
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200790 elif id(value) in self._objidtable:
791 return
792
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100793 # Add to objectreference map
794 refnum = len(self._objlist)
795 self._objlist.append(value)
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200796 if isinstance(value, _scalars):
797 self._objtable[(type(value), value)] = refnum
798 elif isinstance(value, Data):
799 self._objtable[(type(value.data), value.data)] = refnum
800 else:
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100801 self._objidtable[id(value)] = refnum
802
803 # And finally recurse into containers
804 if isinstance(value, dict):
805 keys = []
806 values = []
807 items = value.items()
808 if self._sort_keys:
809 items = sorted(items)
810
811 for k, v in items:
812 if not isinstance(k, str):
813 if self._skipkeys:
814 continue
815 raise TypeError("keys must be strings")
816 keys.append(k)
817 values.append(v)
818
819 for o in itertools.chain(keys, values):
820 self._flatten(o)
821
822 elif isinstance(value, (list, tuple)):
823 for o in value:
824 self._flatten(o)
825
826 def _getrefnum(self, value):
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200827 if isinstance(value, _scalars):
828 return self._objtable[(type(value), value)]
829 elif isinstance(value, Data):
830 return self._objtable[(type(value.data), value.data)]
831 else:
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100832 return self._objidtable[id(value)]
833
834 def _write_size(self, token, size):
835 if size < 15:
836 self._fp.write(struct.pack('>B', token | size))
837
838 elif size < 1 << 8:
839 self._fp.write(struct.pack('>BBB', token | 0xF, 0x10, size))
840
841 elif size < 1 << 16:
842 self._fp.write(struct.pack('>BBH', token | 0xF, 0x11, size))
843
844 elif size < 1 << 32:
845 self._fp.write(struct.pack('>BBL', token | 0xF, 0x12, size))
846
847 else:
848 self._fp.write(struct.pack('>BBQ', token | 0xF, 0x13, size))
849
850 def _write_object(self, value):
851 ref = self._getrefnum(value)
852 self._object_offsets[ref] = self._fp.tell()
853 if value is None:
854 self._fp.write(b'\x00')
855
856 elif value is False:
857 self._fp.write(b'\x08')
858
859 elif value is True:
860 self._fp.write(b'\x09')
861
862 elif isinstance(value, int):
Ronald Oussoren6db66532014-01-15 11:32:35 +0100863 if value < 0:
864 try:
865 self._fp.write(struct.pack('>Bq', 0x13, value))
866 except struct.error:
Ronald Oussoren94e44a92014-02-06 11:19:18 +0100867 raise OverflowError(value) from None
Ronald Oussoren6db66532014-01-15 11:32:35 +0100868 elif value < 1 << 8:
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100869 self._fp.write(struct.pack('>BB', 0x10, value))
870 elif value < 1 << 16:
871 self._fp.write(struct.pack('>BH', 0x11, value))
872 elif value < 1 << 32:
873 self._fp.write(struct.pack('>BL', 0x12, value))
Ronald Oussoren94e44a92014-02-06 11:19:18 +0100874 elif value < 1 << 63:
875 self._fp.write(struct.pack('>BQ', 0x13, value))
876 elif value < 1 << 64:
877 self._fp.write(b'\x14' + value.to_bytes(16, 'big', signed=True))
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100878 else:
Ronald Oussoren94e44a92014-02-06 11:19:18 +0100879 raise OverflowError(value)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100880
881 elif isinstance(value, float):
882 self._fp.write(struct.pack('>Bd', 0x23, value))
883
884 elif isinstance(value, datetime.datetime):
885 f = (value - datetime.datetime(2001, 1, 1)).total_seconds()
886 self._fp.write(struct.pack('>Bd', 0x33, f))
887
888 elif isinstance(value, Data):
889 self._write_size(0x40, len(value.data))
890 self._fp.write(value.data)
891
892 elif isinstance(value, (bytes, bytearray)):
893 self._write_size(0x40, len(value))
894 self._fp.write(value)
895
896 elif isinstance(value, str):
897 try:
898 t = value.encode('ascii')
899 self._write_size(0x50, len(value))
900 except UnicodeEncodeError:
901 t = value.encode('utf-16be')
Serhiy Storchaka7338ebc2016-10-04 20:04:30 +0300902 self._write_size(0x60, len(t) // 2)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100903
904 self._fp.write(t)
905
Jon Janzenc981ad12019-05-15 22:14:38 +0200906 elif isinstance(value, UID):
907 if value.data < 0:
908 raise ValueError("UIDs must be positive")
909 elif value.data < 1 << 8:
910 self._fp.write(struct.pack('>BB', 0x80, value))
911 elif value.data < 1 << 16:
912 self._fp.write(struct.pack('>BH', 0x81, value))
913 elif value.data < 1 << 32:
914 self._fp.write(struct.pack('>BL', 0x83, value))
915 elif value.data < 1 << 64:
916 self._fp.write(struct.pack('>BQ', 0x87, value))
917 else:
918 raise OverflowError(value)
919
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100920 elif isinstance(value, (list, tuple)):
921 refs = [self._getrefnum(o) for o in value]
922 s = len(refs)
923 self._write_size(0xA0, s)
924 self._fp.write(struct.pack('>' + self._ref_format * s, *refs))
925
926 elif isinstance(value, dict):
927 keyRefs, valRefs = [], []
928
929 if self._sort_keys:
930 rootItems = sorted(value.items())
931 else:
932 rootItems = value.items()
933
934 for k, v in rootItems:
935 if not isinstance(k, str):
936 if self._skipkeys:
937 continue
938 raise TypeError("keys must be strings")
939 keyRefs.append(self._getrefnum(k))
940 valRefs.append(self._getrefnum(v))
941
942 s = len(keyRefs)
943 self._write_size(0xD0, s)
944 self._fp.write(struct.pack('>' + self._ref_format * s, *keyRefs))
945 self._fp.write(struct.pack('>' + self._ref_format * s, *valRefs))
946
947 else:
Ronald Oussoren6db66532014-01-15 11:32:35 +0100948 raise TypeError(value)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100949
950
951def _is_fmt_binary(header):
952 return header[:8] == b'bplist00'
953
954
955#
956# Generic bits
957#
958
959_FORMATS={
960 FMT_XML: dict(
961 detect=_is_fmt_xml,
962 parser=_PlistParser,
963 writer=_PlistWriter,
964 ),
965 FMT_BINARY: dict(
966 detect=_is_fmt_binary,
967 parser=_BinaryPlistParser,
968 writer=_BinaryPlistWriter,
969 )
970}
971
972
973def load(fp, *, fmt=None, use_builtin_types=True, dict_type=dict):
974 """Read a .plist file. 'fp' should be (readable) file object.
975 Return the unpacked root object (which usually is a dictionary).
976 """
977 if fmt is None:
978 header = fp.read(32)
979 fp.seek(0)
980 for info in _FORMATS.values():
981 if info['detect'](header):
Serhiy Storchaka89667592014-07-23 18:49:31 +0300982 P = info['parser']
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100983 break
984
985 else:
986 raise InvalidFileException()
987
988 else:
Serhiy Storchaka89667592014-07-23 18:49:31 +0300989 P = _FORMATS[fmt]['parser']
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100990
Serhiy Storchaka89667592014-07-23 18:49:31 +0300991 p = P(use_builtin_types=use_builtin_types, dict_type=dict_type)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100992 return p.parse(fp)
993
994
995def loads(value, *, fmt=None, use_builtin_types=True, dict_type=dict):
996 """Read a .plist file from a bytes object.
997 Return the unpacked root object (which usually is a dictionary).
998 """
999 fp = BytesIO(value)
1000 return load(
1001 fp, fmt=fmt, use_builtin_types=use_builtin_types, dict_type=dict_type)
1002
1003
1004def dump(value, fp, *, fmt=FMT_XML, sort_keys=True, skipkeys=False):
1005 """Write 'value' to a .plist file. 'fp' should be a (writable)
1006 file object.
1007 """
1008 if fmt not in _FORMATS:
1009 raise ValueError("Unsupported format: %r"%(fmt,))
1010
1011 writer = _FORMATS[fmt]["writer"](fp, sort_keys=sort_keys, skipkeys=skipkeys)
1012 writer.write(value)
1013
1014
1015def dumps(value, *, fmt=FMT_XML, skipkeys=False, sort_keys=True):
1016 """Return a bytes object with the contents for a .plist file.
1017 """
1018 fp = BytesIO()
1019 dump(value, fp, fmt=fmt, skipkeys=skipkeys, sort_keys=sort_keys)
1020 return fp.getvalue()