blob: a84bb57371ef61a29e6d17c36f9551eeab796a76 [file] [log] [blame]
Benjamin Petersonef3e4c22009-04-11 19:48:14 +00001r"""plistlib.py -- a tool to generate and parse MacOSX .plist files.
Christian Heimes7e182542008-01-27 15:20:13 +00002
Ezio Melotti6e9b1df2009-09-16 00:49:03 +00003The property list (.plist) file format is a simple XML pickle supporting
Christian Heimes7e182542008-01-27 15:20:13 +00004basic object types, like dictionaries, lists, numbers and strings.
5Usually the top level object is a dictionary.
6
Ronald Oussorenc5cf7972013-11-21 15:46:49 +01007To write out a plist file, use the dump(value, file)
8function. 'value' is the top level object, 'file' is
9a (writable) file object.
Christian Heimes7e182542008-01-27 15:20:13 +000010
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010011To parse a plist from a file, use the load(file) function,
12with a (readable) file object as the only argument. It
Christian Heimes7e182542008-01-27 15:20:13 +000013returns the top level object (again, usually a dictionary).
14
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010015To work with plist data in bytes objects, you can use loads()
16and dumps().
Christian Heimes7e182542008-01-27 15:20:13 +000017
18Values can be strings, integers, floats, booleans, tuples, lists,
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010019dictionaries (but only with string keys), Data, bytes, bytearray, or
20datetime.datetime objects.
Christian Heimes7e182542008-01-27 15:20:13 +000021
22Generate Plist example:
23
24 pl = dict(
Ezio Melotti6e9b1df2009-09-16 00:49:03 +000025 aString = "Doodah",
26 aList = ["A", "B", 12, 32.1, [1, 2, 3]],
Christian Heimes7e182542008-01-27 15:20:13 +000027 aFloat = 0.1,
28 anInt = 728,
Ezio Melotti6e9b1df2009-09-16 00:49:03 +000029 aDict = dict(
30 anotherString = "<hello & hi there!>",
31 aUnicodeValue = "M\xe4ssig, Ma\xdf",
32 aTrueValue = True,
33 aFalseValue = False,
Christian Heimes7e182542008-01-27 15:20:13 +000034 ),
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010035 someData = b"<binary gunk>",
36 someMoreData = b"<lots of binary gunk>" * 10,
Christian Heimes7e182542008-01-27 15:20:13 +000037 aDate = datetime.datetime.fromtimestamp(time.mktime(time.gmtime())),
38 )
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010039 with open(fileName, 'wb') as fp:
40 dump(pl, fp)
Christian Heimes7e182542008-01-27 15:20:13 +000041
42Parse Plist example:
43
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010044 with open(fileName, 'rb') as fp:
45 pl = load(fp)
46 print(pl["aKey"])
Christian Heimes7e182542008-01-27 15:20:13 +000047"""
Christian Heimes7e182542008-01-27 15:20:13 +000048__all__ = [
Jon Janzence81a922019-09-05 03:11:35 -050049 "InvalidFileException", "FMT_XML", "FMT_BINARY", "load", "dump", "loads", "dumps", "UID"
Christian Heimes7e182542008-01-27 15:20:13 +000050]
Christian Heimes7e182542008-01-27 15:20:13 +000051
52import binascii
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010053import codecs
54import contextlib
Christian Heimes7e182542008-01-27 15:20:13 +000055import datetime
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010056import enum
Christian Heimes7e182542008-01-27 15:20:13 +000057from io import BytesIO
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010058import itertools
59import os
Christian Heimes7e182542008-01-27 15:20:13 +000060import re
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010061import struct
62from warnings import warn
63from xml.parsers.expat import ParserCreate
Christian Heimes7e182542008-01-27 15:20:13 +000064
65
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010066PlistFormat = enum.Enum('PlistFormat', 'FMT_XML FMT_BINARY', module=__name__)
67globals().update(PlistFormat.__members__)
Christian Heimes7e182542008-01-27 15:20:13 +000068
69
Jon Janzenc981ad12019-05-15 22:14:38 +020070class UID:
71 def __init__(self, data):
72 if not isinstance(data, int):
73 raise TypeError("data must be an int")
74 if data >= 1 << 64:
75 raise ValueError("UIDs cannot be >= 2**64")
76 if data < 0:
77 raise ValueError("UIDs must be positive")
78 self.data = data
79
80 def __index__(self):
81 return self.data
82
83 def __repr__(self):
84 return "%s(%s)" % (self.__class__.__name__, repr(self.data))
85
86 def __reduce__(self):
87 return self.__class__, (self.data,)
88
89 def __eq__(self, other):
90 if not isinstance(other, UID):
91 return NotImplemented
92 return self.data == other.data
93
94 def __hash__(self):
95 return hash(self.data)
96
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010097#
98# XML support
99#
100
101
102# XML 'header'
103PLISTHEADER = b"""\
104<?xml version="1.0" encoding="UTF-8"?>
105<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
106"""
107
108
109# Regex to find any control chars, except for \t \n and \r
110_controlCharPat = re.compile(
111 r"[\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0b\x0c\x0e\x0f"
112 r"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f]")
113
114def _encode_base64(s, maxlinelength=76):
115 # copied from base64.encodebytes(), with added maxlinelength argument
116 maxbinsize = (maxlinelength//4)*3
117 pieces = []
118 for i in range(0, len(s), maxbinsize):
119 chunk = s[i : i + maxbinsize]
120 pieces.append(binascii.b2a_base64(chunk))
121 return b''.join(pieces)
122
123def _decode_base64(s):
124 if isinstance(s, str):
125 return binascii.a2b_base64(s.encode("utf-8"))
126
127 else:
128 return binascii.a2b_base64(s)
129
130# Contents should conform to a subset of ISO 8601
131# (in particular, YYYY '-' MM '-' DD 'T' HH ':' MM ':' SS 'Z'. Smaller units
132# may be omitted with # a loss of precision)
133_dateParser = re.compile(r"(?P<year>\d\d\d\d)(?:-(?P<month>\d\d)(?:-(?P<day>\d\d)(?:T(?P<hour>\d\d)(?::(?P<minute>\d\d)(?::(?P<second>\d\d))?)?)?)?)?Z", re.ASCII)
134
135
136def _date_from_string(s):
137 order = ('year', 'month', 'day', 'hour', 'minute', 'second')
138 gd = _dateParser.match(s).groupdict()
139 lst = []
140 for key in order:
141 val = gd[key]
142 if val is None:
143 break
144 lst.append(int(val))
145 return datetime.datetime(*lst)
146
147
148def _date_to_string(d):
149 return '%04d-%02d-%02dT%02d:%02d:%02dZ' % (
150 d.year, d.month, d.day,
151 d.hour, d.minute, d.second
152 )
153
154def _escape(text):
155 m = _controlCharPat.search(text)
156 if m is not None:
157 raise ValueError("strings can't contains control characters; "
158 "use bytes instead")
159 text = text.replace("\r\n", "\n") # convert DOS line endings
160 text = text.replace("\r", "\n") # convert Mac line endings
161 text = text.replace("&", "&amp;") # escape '&'
162 text = text.replace("<", "&lt;") # escape '<'
163 text = text.replace(">", "&gt;") # escape '>'
164 return text
165
166class _PlistParser:
Jon Janzence81a922019-09-05 03:11:35 -0500167 def __init__(self, dict_type):
Christian Heimes7e182542008-01-27 15:20:13 +0000168 self.stack = []
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100169 self.current_key = None
Christian Heimes7e182542008-01-27 15:20:13 +0000170 self.root = None
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100171 self._dict_type = dict_type
Christian Heimes7e182542008-01-27 15:20:13 +0000172
173 def parse(self, fileobj):
Ned Deilyb8e59f72011-05-28 02:19:19 -0700174 self.parser = ParserCreate()
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100175 self.parser.StartElementHandler = self.handle_begin_element
176 self.parser.EndElementHandler = self.handle_end_element
177 self.parser.CharacterDataHandler = self.handle_data
Ned Deilyb8e59f72011-05-28 02:19:19 -0700178 self.parser.ParseFile(fileobj)
Christian Heimes7e182542008-01-27 15:20:13 +0000179 return self.root
180
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100181 def handle_begin_element(self, element, attrs):
Christian Heimes7e182542008-01-27 15:20:13 +0000182 self.data = []
183 handler = getattr(self, "begin_" + element, None)
184 if handler is not None:
185 handler(attrs)
186
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100187 def handle_end_element(self, element):
Christian Heimes7e182542008-01-27 15:20:13 +0000188 handler = getattr(self, "end_" + element, None)
189 if handler is not None:
190 handler()
191
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100192 def handle_data(self, data):
Christian Heimes7e182542008-01-27 15:20:13 +0000193 self.data.append(data)
194
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100195 def add_object(self, value):
196 if self.current_key is not None:
Ned Deilyb8e59f72011-05-28 02:19:19 -0700197 if not isinstance(self.stack[-1], type({})):
198 raise ValueError("unexpected element at line %d" %
199 self.parser.CurrentLineNumber)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100200 self.stack[-1][self.current_key] = value
201 self.current_key = None
Christian Heimes7e182542008-01-27 15:20:13 +0000202 elif not self.stack:
203 # this is the root object
204 self.root = value
205 else:
Ned Deilyb8e59f72011-05-28 02:19:19 -0700206 if not isinstance(self.stack[-1], type([])):
207 raise ValueError("unexpected element at line %d" %
208 self.parser.CurrentLineNumber)
Christian Heimes7e182542008-01-27 15:20:13 +0000209 self.stack[-1].append(value)
210
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100211 def get_data(self):
Christian Heimes7e182542008-01-27 15:20:13 +0000212 data = ''.join(self.data)
213 self.data = []
214 return data
215
216 # element handlers
217
218 def begin_dict(self, attrs):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100219 d = self._dict_type()
220 self.add_object(d)
Christian Heimes7e182542008-01-27 15:20:13 +0000221 self.stack.append(d)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100222
Christian Heimes7e182542008-01-27 15:20:13 +0000223 def end_dict(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100224 if self.current_key:
Ned Deilyb8e59f72011-05-28 02:19:19 -0700225 raise ValueError("missing value for key '%s' at line %d" %
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100226 (self.current_key,self.parser.CurrentLineNumber))
Christian Heimes7e182542008-01-27 15:20:13 +0000227 self.stack.pop()
228
229 def end_key(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100230 if self.current_key or not isinstance(self.stack[-1], type({})):
Ned Deilyb8e59f72011-05-28 02:19:19 -0700231 raise ValueError("unexpected key at line %d" %
232 self.parser.CurrentLineNumber)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100233 self.current_key = self.get_data()
Christian Heimes7e182542008-01-27 15:20:13 +0000234
235 def begin_array(self, attrs):
236 a = []
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100237 self.add_object(a)
Christian Heimes7e182542008-01-27 15:20:13 +0000238 self.stack.append(a)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100239
Christian Heimes7e182542008-01-27 15:20:13 +0000240 def end_array(self):
241 self.stack.pop()
242
243 def end_true(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100244 self.add_object(True)
245
Christian Heimes7e182542008-01-27 15:20:13 +0000246 def end_false(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100247 self.add_object(False)
248
Christian Heimes7e182542008-01-27 15:20:13 +0000249 def end_integer(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100250 self.add_object(int(self.get_data()))
251
Christian Heimes7e182542008-01-27 15:20:13 +0000252 def end_real(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100253 self.add_object(float(self.get_data()))
254
Christian Heimes7e182542008-01-27 15:20:13 +0000255 def end_string(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100256 self.add_object(self.get_data())
257
Christian Heimes7e182542008-01-27 15:20:13 +0000258 def end_data(self):
Jon Janzence81a922019-09-05 03:11:35 -0500259 self.add_object(_decode_base64(self.get_data()))
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100260
Christian Heimes7e182542008-01-27 15:20:13 +0000261 def end_date(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100262 self.add_object(_date_from_string(self.get_data()))
263
264
265class _DumbXMLWriter:
266 def __init__(self, file, indent_level=0, indent="\t"):
267 self.file = file
268 self.stack = []
269 self._indent_level = indent_level
270 self.indent = indent
271
272 def begin_element(self, element):
273 self.stack.append(element)
274 self.writeln("<%s>" % element)
275 self._indent_level += 1
276
277 def end_element(self, element):
278 assert self._indent_level > 0
279 assert self.stack.pop() == element
280 self._indent_level -= 1
281 self.writeln("</%s>" % element)
282
283 def simple_element(self, element, value=None):
284 if value is not None:
285 value = _escape(value)
286 self.writeln("<%s>%s</%s>" % (element, value, element))
287
288 else:
289 self.writeln("<%s/>" % element)
290
291 def writeln(self, line):
292 if line:
293 # plist has fixed encoding of utf-8
294
295 # XXX: is this test needed?
296 if isinstance(line, str):
297 line = line.encode('utf-8')
298 self.file.write(self._indent_level * self.indent)
299 self.file.write(line)
300 self.file.write(b'\n')
301
302
303class _PlistWriter(_DumbXMLWriter):
304 def __init__(
305 self, file, indent_level=0, indent=b"\t", writeHeader=1,
306 sort_keys=True, skipkeys=False):
307
308 if writeHeader:
309 file.write(PLISTHEADER)
310 _DumbXMLWriter.__init__(self, file, indent_level, indent)
311 self._sort_keys = sort_keys
312 self._skipkeys = skipkeys
313
314 def write(self, value):
315 self.writeln("<plist version=\"1.0\">")
316 self.write_value(value)
317 self.writeln("</plist>")
318
319 def write_value(self, value):
320 if isinstance(value, str):
321 self.simple_element("string", value)
322
323 elif value is True:
324 self.simple_element("true")
325
326 elif value is False:
327 self.simple_element("false")
328
329 elif isinstance(value, int):
Ronald Oussoren6db66532014-01-15 11:32:35 +0100330 if -1 << 63 <= value < 1 << 64:
331 self.simple_element("integer", "%d" % value)
332 else:
333 raise OverflowError(value)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100334
335 elif isinstance(value, float):
336 self.simple_element("real", repr(value))
337
338 elif isinstance(value, dict):
339 self.write_dict(value)
340
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100341 elif isinstance(value, (bytes, bytearray)):
342 self.write_bytes(value)
343
344 elif isinstance(value, datetime.datetime):
345 self.simple_element("date", _date_to_string(value))
346
347 elif isinstance(value, (tuple, list)):
348 self.write_array(value)
349
350 else:
351 raise TypeError("unsupported type: %s" % type(value))
352
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100353 def write_bytes(self, data):
354 self.begin_element("data")
355 self._indent_level -= 1
356 maxlinelength = max(
357 16,
358 76 - len(self.indent.replace(b"\t", b" " * 8) * self._indent_level))
359
360 for line in _encode_base64(data, maxlinelength).split(b"\n"):
361 if line:
362 self.writeln(line)
363 self._indent_level += 1
364 self.end_element("data")
365
366 def write_dict(self, d):
367 if d:
368 self.begin_element("dict")
369 if self._sort_keys:
370 items = sorted(d.items())
371 else:
372 items = d.items()
373
374 for key, value in items:
375 if not isinstance(key, str):
376 if self._skipkeys:
377 continue
378 raise TypeError("keys must be strings")
379 self.simple_element("key", key)
380 self.write_value(value)
381 self.end_element("dict")
382
383 else:
384 self.simple_element("dict")
385
386 def write_array(self, array):
387 if array:
388 self.begin_element("array")
389 for value in array:
390 self.write_value(value)
391 self.end_element("array")
392
393 else:
394 self.simple_element("array")
395
396
397def _is_fmt_xml(header):
398 prefixes = (b'<?xml', b'<plist')
399
400 for pfx in prefixes:
401 if header.startswith(pfx):
402 return True
403
404 # Also check for alternative XML encodings, this is slightly
405 # overkill because the Apple tools (and plistlib) will not
406 # generate files with these encodings.
407 for bom, encoding in (
408 (codecs.BOM_UTF8, "utf-8"),
409 (codecs.BOM_UTF16_BE, "utf-16-be"),
410 (codecs.BOM_UTF16_LE, "utf-16-le"),
411 # expat does not support utf-32
412 #(codecs.BOM_UTF32_BE, "utf-32-be"),
413 #(codecs.BOM_UTF32_LE, "utf-32-le"),
414 ):
415 if not header.startswith(bom):
416 continue
417
418 for start in prefixes:
419 prefix = bom + start.decode('ascii').encode(encoding)
420 if header[:len(prefix)] == prefix:
421 return True
422
423 return False
424
425#
426# Binary Plist
427#
428
429
430class InvalidFileException (ValueError):
431 def __init__(self, message="Invalid file"):
432 ValueError.__init__(self, message)
433
434_BINARY_FORMAT = {1: 'B', 2: 'H', 4: 'L', 8: 'Q'}
435
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200436_undefined = object()
437
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100438class _BinaryPlistParser:
439 """
440 Read or write a binary plist file, following the description of the binary
441 format. Raise InvalidFileException in case of error, otherwise return the
442 root object.
443
444 see also: http://opensource.apple.com/source/CF/CF-744.18/CFBinaryPList.c
445 """
Jon Janzence81a922019-09-05 03:11:35 -0500446 def __init__(self, dict_type):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100447 self._dict_type = dict_type
448
449 def parse(self, fp):
450 try:
451 # The basic file format:
452 # HEADER
453 # object...
454 # refid->offset...
455 # TRAILER
456 self._fp = fp
457 self._fp.seek(-32, os.SEEK_END)
458 trailer = self._fp.read(32)
459 if len(trailer) != 32:
460 raise InvalidFileException()
461 (
462 offset_size, self._ref_size, num_objects, top_object,
463 offset_table_offset
464 ) = struct.unpack('>6xBBQQQ', trailer)
465 self._fp.seek(offset_table_offset)
Serhiy Storchaka06526642014-05-23 16:13:33 +0300466 self._object_offsets = self._read_ints(num_objects, offset_size)
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200467 self._objects = [_undefined] * num_objects
468 return self._read_object(top_object)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100469
Serhiy Storchakadb91e0f2017-10-31 14:05:53 +0200470 except (OSError, IndexError, struct.error, OverflowError,
471 UnicodeDecodeError):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100472 raise InvalidFileException()
473
474 def _get_size(self, tokenL):
475 """ return the size of the next object."""
476 if tokenL == 0xF:
477 m = self._fp.read(1)[0] & 0x3
478 s = 1 << m
479 f = '>' + _BINARY_FORMAT[s]
480 return struct.unpack(f, self._fp.read(s))[0]
481
482 return tokenL
483
Serhiy Storchaka06526642014-05-23 16:13:33 +0300484 def _read_ints(self, n, size):
485 data = self._fp.read(size * n)
486 if size in _BINARY_FORMAT:
487 return struct.unpack('>' + _BINARY_FORMAT[size] * n, data)
488 else:
Serhiy Storchakadb91e0f2017-10-31 14:05:53 +0200489 if not size or len(data) != size * n:
490 raise InvalidFileException()
Serhiy Storchaka06526642014-05-23 16:13:33 +0300491 return tuple(int.from_bytes(data[i: i + size], 'big')
492 for i in range(0, size * n, size))
493
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100494 def _read_refs(self, n):
Serhiy Storchaka06526642014-05-23 16:13:33 +0300495 return self._read_ints(n, self._ref_size)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100496
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200497 def _read_object(self, ref):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100498 """
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200499 read the object by reference.
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100500
501 May recursively read sub-objects (content of an array/dict/set)
502 """
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200503 result = self._objects[ref]
504 if result is not _undefined:
505 return result
506
507 offset = self._object_offsets[ref]
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100508 self._fp.seek(offset)
509 token = self._fp.read(1)[0]
510 tokenH, tokenL = token & 0xF0, token & 0x0F
511
512 if token == 0x00:
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200513 result = None
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100514
515 elif token == 0x08:
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200516 result = False
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100517
518 elif token == 0x09:
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200519 result = True
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100520
521 # The referenced source code also mentions URL (0x0c, 0x0d) and
522 # UUID (0x0e), but neither can be generated using the Cocoa libraries.
523
524 elif token == 0x0f:
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200525 result = b''
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100526
527 elif tokenH == 0x10: # int
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200528 result = int.from_bytes(self._fp.read(1 << tokenL),
529 'big', signed=tokenL >= 3)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100530
531 elif token == 0x22: # real
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200532 result = struct.unpack('>f', self._fp.read(4))[0]
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100533
534 elif token == 0x23: # real
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200535 result = struct.unpack('>d', self._fp.read(8))[0]
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100536
537 elif token == 0x33: # date
538 f = struct.unpack('>d', self._fp.read(8))[0]
539 # timestamp 0 of binary plists corresponds to 1/1/2001
540 # (year of Mac OS X 10.0), instead of 1/1/1970.
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200541 result = (datetime.datetime(2001, 1, 1) +
542 datetime.timedelta(seconds=f))
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100543
544 elif tokenH == 0x40: # data
545 s = self._get_size(tokenL)
Jon Janzence81a922019-09-05 03:11:35 -0500546 result = self._fp.read(s)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100547
548 elif tokenH == 0x50: # ascii string
549 s = self._get_size(tokenL)
550 result = self._fp.read(s).decode('ascii')
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100551
552 elif tokenH == 0x60: # unicode string
553 s = self._get_size(tokenL)
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200554 result = self._fp.read(s * 2).decode('utf-16be')
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100555
Jon Janzenc981ad12019-05-15 22:14:38 +0200556 elif tokenH == 0x80: # UID
557 # used by Key-Archiver plist files
558 result = UID(int.from_bytes(self._fp.read(1 + tokenL), 'big'))
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100559
560 elif tokenH == 0xA0: # array
561 s = self._get_size(tokenL)
562 obj_refs = self._read_refs(s)
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200563 result = []
564 self._objects[ref] = result
565 result.extend(self._read_object(x) for x in obj_refs)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100566
567 # tokenH == 0xB0 is documented as 'ordset', but is not actually
568 # implemented in the Apple reference code.
569
570 # tokenH == 0xC0 is documented as 'set', but sets cannot be used in
571 # plists.
572
573 elif tokenH == 0xD0: # dict
574 s = self._get_size(tokenL)
575 key_refs = self._read_refs(s)
576 obj_refs = self._read_refs(s)
577 result = self._dict_type()
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200578 self._objects[ref] = result
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100579 for k, o in zip(key_refs, obj_refs):
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200580 result[self._read_object(k)] = self._read_object(o)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100581
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200582 else:
583 raise InvalidFileException()
584
585 self._objects[ref] = result
586 return result
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100587
588def _count_to_size(count):
589 if count < 1 << 8:
590 return 1
591
592 elif count < 1 << 16:
593 return 2
594
595 elif count << 1 << 32:
596 return 4
597
598 else:
599 return 8
600
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200601_scalars = (str, int, float, datetime.datetime, bytes)
602
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100603class _BinaryPlistWriter (object):
604 def __init__(self, fp, sort_keys, skipkeys):
605 self._fp = fp
606 self._sort_keys = sort_keys
607 self._skipkeys = skipkeys
608
609 def write(self, value):
610
611 # Flattened object list:
612 self._objlist = []
613
614 # Mappings from object->objectid
615 # First dict has (type(object), object) as the key,
616 # second dict is used when object is not hashable and
617 # has id(object) as the key.
618 self._objtable = {}
619 self._objidtable = {}
620
621 # Create list of all objects in the plist
622 self._flatten(value)
623
624 # Size of object references in serialized containers
625 # depends on the number of objects in the plist.
626 num_objects = len(self._objlist)
627 self._object_offsets = [0]*num_objects
628 self._ref_size = _count_to_size(num_objects)
629
630 self._ref_format = _BINARY_FORMAT[self._ref_size]
631
632 # Write file header
633 self._fp.write(b'bplist00')
634
635 # Write object list
636 for obj in self._objlist:
637 self._write_object(obj)
638
639 # Write refnum->object offset table
640 top_object = self._getrefnum(value)
641 offset_table_offset = self._fp.tell()
642 offset_size = _count_to_size(offset_table_offset)
643 offset_format = '>' + _BINARY_FORMAT[offset_size] * num_objects
644 self._fp.write(struct.pack(offset_format, *self._object_offsets))
645
646 # Write trailer
647 sort_version = 0
648 trailer = (
649 sort_version, offset_size, self._ref_size, num_objects,
650 top_object, offset_table_offset
651 )
652 self._fp.write(struct.pack('>5xBBBQQQ', *trailer))
653
654 def _flatten(self, value):
655 # First check if the object is in the object table, not used for
656 # containers to ensure that two subcontainers with the same contents
657 # will be serialized as distinct values.
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200658 if isinstance(value, _scalars):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100659 if (type(value), value) in self._objtable:
660 return
661
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200662 elif id(value) in self._objidtable:
663 return
664
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100665 # Add to objectreference map
666 refnum = len(self._objlist)
667 self._objlist.append(value)
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200668 if isinstance(value, _scalars):
669 self._objtable[(type(value), value)] = refnum
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200670 else:
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100671 self._objidtable[id(value)] = refnum
672
673 # And finally recurse into containers
674 if isinstance(value, dict):
675 keys = []
676 values = []
677 items = value.items()
678 if self._sort_keys:
679 items = sorted(items)
680
681 for k, v in items:
682 if not isinstance(k, str):
683 if self._skipkeys:
684 continue
685 raise TypeError("keys must be strings")
686 keys.append(k)
687 values.append(v)
688
689 for o in itertools.chain(keys, values):
690 self._flatten(o)
691
692 elif isinstance(value, (list, tuple)):
693 for o in value:
694 self._flatten(o)
695
696 def _getrefnum(self, value):
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200697 if isinstance(value, _scalars):
698 return self._objtable[(type(value), value)]
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200699 else:
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100700 return self._objidtable[id(value)]
701
702 def _write_size(self, token, size):
703 if size < 15:
704 self._fp.write(struct.pack('>B', token | size))
705
706 elif size < 1 << 8:
707 self._fp.write(struct.pack('>BBB', token | 0xF, 0x10, size))
708
709 elif size < 1 << 16:
710 self._fp.write(struct.pack('>BBH', token | 0xF, 0x11, size))
711
712 elif size < 1 << 32:
713 self._fp.write(struct.pack('>BBL', token | 0xF, 0x12, size))
714
715 else:
716 self._fp.write(struct.pack('>BBQ', token | 0xF, 0x13, size))
717
718 def _write_object(self, value):
719 ref = self._getrefnum(value)
720 self._object_offsets[ref] = self._fp.tell()
721 if value is None:
722 self._fp.write(b'\x00')
723
724 elif value is False:
725 self._fp.write(b'\x08')
726
727 elif value is True:
728 self._fp.write(b'\x09')
729
730 elif isinstance(value, int):
Ronald Oussoren6db66532014-01-15 11:32:35 +0100731 if value < 0:
732 try:
733 self._fp.write(struct.pack('>Bq', 0x13, value))
734 except struct.error:
Ronald Oussoren94e44a92014-02-06 11:19:18 +0100735 raise OverflowError(value) from None
Ronald Oussoren6db66532014-01-15 11:32:35 +0100736 elif value < 1 << 8:
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100737 self._fp.write(struct.pack('>BB', 0x10, value))
738 elif value < 1 << 16:
739 self._fp.write(struct.pack('>BH', 0x11, value))
740 elif value < 1 << 32:
741 self._fp.write(struct.pack('>BL', 0x12, value))
Ronald Oussoren94e44a92014-02-06 11:19:18 +0100742 elif value < 1 << 63:
743 self._fp.write(struct.pack('>BQ', 0x13, value))
744 elif value < 1 << 64:
745 self._fp.write(b'\x14' + value.to_bytes(16, 'big', signed=True))
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100746 else:
Ronald Oussoren94e44a92014-02-06 11:19:18 +0100747 raise OverflowError(value)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100748
749 elif isinstance(value, float):
750 self._fp.write(struct.pack('>Bd', 0x23, value))
751
752 elif isinstance(value, datetime.datetime):
753 f = (value - datetime.datetime(2001, 1, 1)).total_seconds()
754 self._fp.write(struct.pack('>Bd', 0x33, f))
755
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100756 elif isinstance(value, (bytes, bytearray)):
757 self._write_size(0x40, len(value))
758 self._fp.write(value)
759
760 elif isinstance(value, str):
761 try:
762 t = value.encode('ascii')
763 self._write_size(0x50, len(value))
764 except UnicodeEncodeError:
765 t = value.encode('utf-16be')
Serhiy Storchaka7338ebc2016-10-04 20:04:30 +0300766 self._write_size(0x60, len(t) // 2)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100767
768 self._fp.write(t)
769
Jon Janzenc981ad12019-05-15 22:14:38 +0200770 elif isinstance(value, UID):
771 if value.data < 0:
772 raise ValueError("UIDs must be positive")
773 elif value.data < 1 << 8:
774 self._fp.write(struct.pack('>BB', 0x80, value))
775 elif value.data < 1 << 16:
776 self._fp.write(struct.pack('>BH', 0x81, value))
777 elif value.data < 1 << 32:
778 self._fp.write(struct.pack('>BL', 0x83, value))
779 elif value.data < 1 << 64:
780 self._fp.write(struct.pack('>BQ', 0x87, value))
781 else:
782 raise OverflowError(value)
783
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100784 elif isinstance(value, (list, tuple)):
785 refs = [self._getrefnum(o) for o in value]
786 s = len(refs)
787 self._write_size(0xA0, s)
788 self._fp.write(struct.pack('>' + self._ref_format * s, *refs))
789
790 elif isinstance(value, dict):
791 keyRefs, valRefs = [], []
792
793 if self._sort_keys:
794 rootItems = sorted(value.items())
795 else:
796 rootItems = value.items()
797
798 for k, v in rootItems:
799 if not isinstance(k, str):
800 if self._skipkeys:
801 continue
802 raise TypeError("keys must be strings")
803 keyRefs.append(self._getrefnum(k))
804 valRefs.append(self._getrefnum(v))
805
806 s = len(keyRefs)
807 self._write_size(0xD0, s)
808 self._fp.write(struct.pack('>' + self._ref_format * s, *keyRefs))
809 self._fp.write(struct.pack('>' + self._ref_format * s, *valRefs))
810
811 else:
Ronald Oussoren6db66532014-01-15 11:32:35 +0100812 raise TypeError(value)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100813
814
815def _is_fmt_binary(header):
816 return header[:8] == b'bplist00'
817
818
819#
820# Generic bits
821#
822
823_FORMATS={
824 FMT_XML: dict(
825 detect=_is_fmt_xml,
826 parser=_PlistParser,
827 writer=_PlistWriter,
828 ),
829 FMT_BINARY: dict(
830 detect=_is_fmt_binary,
831 parser=_BinaryPlistParser,
832 writer=_BinaryPlistWriter,
833 )
834}
835
836
Jon Janzence81a922019-09-05 03:11:35 -0500837def load(fp, *, fmt=None, dict_type=dict):
Collin Styles0d4f4352019-07-14 02:01:48 -0700838 """Read a .plist file. 'fp' should be a readable and binary file object.
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100839 Return the unpacked root object (which usually is a dictionary).
840 """
841 if fmt is None:
842 header = fp.read(32)
843 fp.seek(0)
844 for info in _FORMATS.values():
845 if info['detect'](header):
Serhiy Storchaka89667592014-07-23 18:49:31 +0300846 P = info['parser']
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100847 break
848
849 else:
850 raise InvalidFileException()
851
852 else:
Serhiy Storchaka89667592014-07-23 18:49:31 +0300853 P = _FORMATS[fmt]['parser']
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100854
Jon Janzence81a922019-09-05 03:11:35 -0500855 p = P(dict_type=dict_type)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100856 return p.parse(fp)
857
858
Jon Janzence81a922019-09-05 03:11:35 -0500859def loads(value, *, fmt=None, dict_type=dict):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100860 """Read a .plist file from a bytes object.
861 Return the unpacked root object (which usually is a dictionary).
862 """
863 fp = BytesIO(value)
Jon Janzence81a922019-09-05 03:11:35 -0500864 return load(fp, fmt=fmt, dict_type=dict_type)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100865
866
867def dump(value, fp, *, fmt=FMT_XML, sort_keys=True, skipkeys=False):
Collin Styles0d4f4352019-07-14 02:01:48 -0700868 """Write 'value' to a .plist file. 'fp' should be a writable,
869 binary file object.
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100870 """
871 if fmt not in _FORMATS:
872 raise ValueError("Unsupported format: %r"%(fmt,))
873
874 writer = _FORMATS[fmt]["writer"](fp, sort_keys=sort_keys, skipkeys=skipkeys)
875 writer.write(value)
876
877
878def dumps(value, *, fmt=FMT_XML, skipkeys=False, sort_keys=True):
879 """Return a bytes object with the contents for a .plist file.
880 """
881 fp = BytesIO()
882 dump(value, fp, fmt=fmt, skipkeys=skipkeys, sort_keys=sort_keys)
883 return fp.getvalue()