blob: 2eeebe4c9a4244bc30dd411fa6f428afd37cc5e7 [file] [log] [blame]
Benjamin Petersonef3e4c22009-04-11 19:48:14 +00001r"""plistlib.py -- a tool to generate and parse MacOSX .plist files.
Christian Heimes7e182542008-01-27 15:20:13 +00002
Ezio Melotti6e9b1df2009-09-16 00:49:03 +00003The property list (.plist) file format is a simple XML pickle supporting
Christian Heimes7e182542008-01-27 15:20:13 +00004basic object types, like dictionaries, lists, numbers and strings.
5Usually the top level object is a dictionary.
6
Ronald Oussorenc5cf7972013-11-21 15:46:49 +01007To write out a plist file, use the dump(value, file)
8function. 'value' is the top level object, 'file' is
9a (writable) file object.
Christian Heimes7e182542008-01-27 15:20:13 +000010
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010011To parse a plist from a file, use the load(file) function,
12with a (readable) file object as the only argument. It
Christian Heimes7e182542008-01-27 15:20:13 +000013returns the top level object (again, usually a dictionary).
14
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010015To work with plist data in bytes objects, you can use loads()
16and dumps().
Christian Heimes7e182542008-01-27 15:20:13 +000017
18Values can be strings, integers, floats, booleans, tuples, lists,
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010019dictionaries (but only with string keys), Data, bytes, bytearray, or
20datetime.datetime objects.
Christian Heimes7e182542008-01-27 15:20:13 +000021
22Generate Plist example:
23
24 pl = dict(
Ezio Melotti6e9b1df2009-09-16 00:49:03 +000025 aString = "Doodah",
26 aList = ["A", "B", 12, 32.1, [1, 2, 3]],
Christian Heimes7e182542008-01-27 15:20:13 +000027 aFloat = 0.1,
28 anInt = 728,
Ezio Melotti6e9b1df2009-09-16 00:49:03 +000029 aDict = dict(
30 anotherString = "<hello & hi there!>",
31 aUnicodeValue = "M\xe4ssig, Ma\xdf",
32 aTrueValue = True,
33 aFalseValue = False,
Christian Heimes7e182542008-01-27 15:20:13 +000034 ),
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010035 someData = b"<binary gunk>",
36 someMoreData = b"<lots of binary gunk>" * 10,
Christian Heimes7e182542008-01-27 15:20:13 +000037 aDate = datetime.datetime.fromtimestamp(time.mktime(time.gmtime())),
38 )
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010039 with open(fileName, 'wb') as fp:
40 dump(pl, fp)
Christian Heimes7e182542008-01-27 15:20:13 +000041
42Parse Plist example:
43
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010044 with open(fileName, 'rb') as fp:
45 pl = load(fp)
46 print(pl["aKey"])
Christian Heimes7e182542008-01-27 15:20:13 +000047"""
Christian Heimes7e182542008-01-27 15:20:13 +000048__all__ = [
Jon Janzence81a922019-09-05 03:11:35 -050049 "InvalidFileException", "FMT_XML", "FMT_BINARY", "load", "dump", "loads", "dumps", "UID"
Christian Heimes7e182542008-01-27 15:20:13 +000050]
Christian Heimes7e182542008-01-27 15:20:13 +000051
52import binascii
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010053import codecs
Christian Heimes7e182542008-01-27 15:20:13 +000054import datetime
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010055import enum
Christian Heimes7e182542008-01-27 15:20:13 +000056from io import BytesIO
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010057import itertools
58import os
Christian Heimes7e182542008-01-27 15:20:13 +000059import re
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010060import struct
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010061from xml.parsers.expat import ParserCreate
Christian Heimes7e182542008-01-27 15:20:13 +000062
63
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010064PlistFormat = enum.Enum('PlistFormat', 'FMT_XML FMT_BINARY', module=__name__)
65globals().update(PlistFormat.__members__)
Christian Heimes7e182542008-01-27 15:20:13 +000066
67
Jon Janzenc981ad12019-05-15 22:14:38 +020068class UID:
69 def __init__(self, data):
70 if not isinstance(data, int):
71 raise TypeError("data must be an int")
72 if data >= 1 << 64:
73 raise ValueError("UIDs cannot be >= 2**64")
74 if data < 0:
75 raise ValueError("UIDs must be positive")
76 self.data = data
77
78 def __index__(self):
79 return self.data
80
81 def __repr__(self):
82 return "%s(%s)" % (self.__class__.__name__, repr(self.data))
83
84 def __reduce__(self):
85 return self.__class__, (self.data,)
86
87 def __eq__(self, other):
88 if not isinstance(other, UID):
89 return NotImplemented
90 return self.data == other.data
91
92 def __hash__(self):
93 return hash(self.data)
94
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010095#
96# XML support
97#
98
99
100# XML 'header'
101PLISTHEADER = b"""\
102<?xml version="1.0" encoding="UTF-8"?>
103<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
104"""
105
106
107# Regex to find any control chars, except for \t \n and \r
108_controlCharPat = re.compile(
109 r"[\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0b\x0c\x0e\x0f"
110 r"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f]")
111
112def _encode_base64(s, maxlinelength=76):
113 # copied from base64.encodebytes(), with added maxlinelength argument
114 maxbinsize = (maxlinelength//4)*3
115 pieces = []
116 for i in range(0, len(s), maxbinsize):
117 chunk = s[i : i + maxbinsize]
118 pieces.append(binascii.b2a_base64(chunk))
119 return b''.join(pieces)
120
121def _decode_base64(s):
122 if isinstance(s, str):
123 return binascii.a2b_base64(s.encode("utf-8"))
124
125 else:
126 return binascii.a2b_base64(s)
127
128# Contents should conform to a subset of ISO 8601
129# (in particular, YYYY '-' MM '-' DD 'T' HH ':' MM ':' SS 'Z'. Smaller units
130# may be omitted with # a loss of precision)
131_dateParser = re.compile(r"(?P<year>\d\d\d\d)(?:-(?P<month>\d\d)(?:-(?P<day>\d\d)(?:T(?P<hour>\d\d)(?::(?P<minute>\d\d)(?::(?P<second>\d\d))?)?)?)?)?Z", re.ASCII)
132
133
134def _date_from_string(s):
135 order = ('year', 'month', 'day', 'hour', 'minute', 'second')
136 gd = _dateParser.match(s).groupdict()
137 lst = []
138 for key in order:
139 val = gd[key]
140 if val is None:
141 break
142 lst.append(int(val))
143 return datetime.datetime(*lst)
144
145
146def _date_to_string(d):
147 return '%04d-%02d-%02dT%02d:%02d:%02dZ' % (
148 d.year, d.month, d.day,
149 d.hour, d.minute, d.second
150 )
151
152def _escape(text):
153 m = _controlCharPat.search(text)
154 if m is not None:
155 raise ValueError("strings can't contains control characters; "
156 "use bytes instead")
157 text = text.replace("\r\n", "\n") # convert DOS line endings
158 text = text.replace("\r", "\n") # convert Mac line endings
159 text = text.replace("&", "&amp;") # escape '&'
160 text = text.replace("<", "&lt;") # escape '<'
161 text = text.replace(">", "&gt;") # escape '>'
162 return text
163
164class _PlistParser:
Jon Janzence81a922019-09-05 03:11:35 -0500165 def __init__(self, dict_type):
Christian Heimes7e182542008-01-27 15:20:13 +0000166 self.stack = []
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100167 self.current_key = None
Christian Heimes7e182542008-01-27 15:20:13 +0000168 self.root = None
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100169 self._dict_type = dict_type
Christian Heimes7e182542008-01-27 15:20:13 +0000170
171 def parse(self, fileobj):
Ned Deilyb8e59f72011-05-28 02:19:19 -0700172 self.parser = ParserCreate()
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100173 self.parser.StartElementHandler = self.handle_begin_element
174 self.parser.EndElementHandler = self.handle_end_element
175 self.parser.CharacterDataHandler = self.handle_data
Ronald Oussoren05ee7902020-10-19 20:13:49 +0200176 self.parser.EntityDeclHandler = self.handle_entity_decl
Ned Deilyb8e59f72011-05-28 02:19:19 -0700177 self.parser.ParseFile(fileobj)
Christian Heimes7e182542008-01-27 15:20:13 +0000178 return self.root
179
Ronald Oussoren05ee7902020-10-19 20:13:49 +0200180 def handle_entity_decl(self, entity_name, is_parameter_entity, value, base, system_id, public_id, notation_name):
181 # Reject plist files with entity declarations to avoid XML vulnerabilies in expat.
182 # Regular plist files don't contain those declerations, and Apple's plutil tool does not
183 # accept them either.
184 raise InvalidFileException("XML entity declarations are not supported in plist files")
185
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100186 def handle_begin_element(self, element, attrs):
Christian Heimes7e182542008-01-27 15:20:13 +0000187 self.data = []
188 handler = getattr(self, "begin_" + element, None)
189 if handler is not None:
190 handler(attrs)
191
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100192 def handle_end_element(self, element):
Christian Heimes7e182542008-01-27 15:20:13 +0000193 handler = getattr(self, "end_" + element, None)
194 if handler is not None:
195 handler()
196
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100197 def handle_data(self, data):
Christian Heimes7e182542008-01-27 15:20:13 +0000198 self.data.append(data)
199
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100200 def add_object(self, value):
201 if self.current_key is not None:
Ned Deilyb8e59f72011-05-28 02:19:19 -0700202 if not isinstance(self.stack[-1], type({})):
203 raise ValueError("unexpected element at line %d" %
204 self.parser.CurrentLineNumber)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100205 self.stack[-1][self.current_key] = value
206 self.current_key = None
Christian Heimes7e182542008-01-27 15:20:13 +0000207 elif not self.stack:
208 # this is the root object
209 self.root = value
210 else:
Ned Deilyb8e59f72011-05-28 02:19:19 -0700211 if not isinstance(self.stack[-1], type([])):
212 raise ValueError("unexpected element at line %d" %
213 self.parser.CurrentLineNumber)
Christian Heimes7e182542008-01-27 15:20:13 +0000214 self.stack[-1].append(value)
215
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100216 def get_data(self):
Christian Heimes7e182542008-01-27 15:20:13 +0000217 data = ''.join(self.data)
218 self.data = []
219 return data
220
221 # element handlers
222
223 def begin_dict(self, attrs):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100224 d = self._dict_type()
225 self.add_object(d)
Christian Heimes7e182542008-01-27 15:20:13 +0000226 self.stack.append(d)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100227
Christian Heimes7e182542008-01-27 15:20:13 +0000228 def end_dict(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100229 if self.current_key:
Ned Deilyb8e59f72011-05-28 02:19:19 -0700230 raise ValueError("missing value for key '%s' at line %d" %
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100231 (self.current_key,self.parser.CurrentLineNumber))
Christian Heimes7e182542008-01-27 15:20:13 +0000232 self.stack.pop()
233
234 def end_key(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100235 if self.current_key or not isinstance(self.stack[-1], type({})):
Ned Deilyb8e59f72011-05-28 02:19:19 -0700236 raise ValueError("unexpected key at line %d" %
237 self.parser.CurrentLineNumber)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100238 self.current_key = self.get_data()
Christian Heimes7e182542008-01-27 15:20:13 +0000239
240 def begin_array(self, attrs):
241 a = []
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100242 self.add_object(a)
Christian Heimes7e182542008-01-27 15:20:13 +0000243 self.stack.append(a)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100244
Christian Heimes7e182542008-01-27 15:20:13 +0000245 def end_array(self):
246 self.stack.pop()
247
248 def end_true(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100249 self.add_object(True)
250
Christian Heimes7e182542008-01-27 15:20:13 +0000251 def end_false(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100252 self.add_object(False)
253
Christian Heimes7e182542008-01-27 15:20:13 +0000254 def end_integer(self):
Ronald Oussoren31852672020-10-20 09:26:33 +0200255 raw = self.get_data()
256 if raw.startswith('0x') or raw.startswith('0X'):
257 self.add_object(int(raw, 16))
258 else:
259 self.add_object(int(raw))
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100260
Christian Heimes7e182542008-01-27 15:20:13 +0000261 def end_real(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100262 self.add_object(float(self.get_data()))
263
Christian Heimes7e182542008-01-27 15:20:13 +0000264 def end_string(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100265 self.add_object(self.get_data())
266
Christian Heimes7e182542008-01-27 15:20:13 +0000267 def end_data(self):
Jon Janzence81a922019-09-05 03:11:35 -0500268 self.add_object(_decode_base64(self.get_data()))
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100269
Christian Heimes7e182542008-01-27 15:20:13 +0000270 def end_date(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100271 self.add_object(_date_from_string(self.get_data()))
272
273
274class _DumbXMLWriter:
275 def __init__(self, file, indent_level=0, indent="\t"):
276 self.file = file
277 self.stack = []
278 self._indent_level = indent_level
279 self.indent = indent
280
281 def begin_element(self, element):
282 self.stack.append(element)
283 self.writeln("<%s>" % element)
284 self._indent_level += 1
285
286 def end_element(self, element):
287 assert self._indent_level > 0
288 assert self.stack.pop() == element
289 self._indent_level -= 1
290 self.writeln("</%s>" % element)
291
292 def simple_element(self, element, value=None):
293 if value is not None:
294 value = _escape(value)
295 self.writeln("<%s>%s</%s>" % (element, value, element))
296
297 else:
298 self.writeln("<%s/>" % element)
299
300 def writeln(self, line):
301 if line:
302 # plist has fixed encoding of utf-8
303
304 # XXX: is this test needed?
305 if isinstance(line, str):
306 line = line.encode('utf-8')
307 self.file.write(self._indent_level * self.indent)
308 self.file.write(line)
309 self.file.write(b'\n')
310
311
312class _PlistWriter(_DumbXMLWriter):
313 def __init__(
314 self, file, indent_level=0, indent=b"\t", writeHeader=1,
315 sort_keys=True, skipkeys=False):
316
317 if writeHeader:
318 file.write(PLISTHEADER)
319 _DumbXMLWriter.__init__(self, file, indent_level, indent)
320 self._sort_keys = sort_keys
321 self._skipkeys = skipkeys
322
323 def write(self, value):
324 self.writeln("<plist version=\"1.0\">")
325 self.write_value(value)
326 self.writeln("</plist>")
327
328 def write_value(self, value):
329 if isinstance(value, str):
330 self.simple_element("string", value)
331
332 elif value is True:
333 self.simple_element("true")
334
335 elif value is False:
336 self.simple_element("false")
337
338 elif isinstance(value, int):
Ronald Oussoren6db66532014-01-15 11:32:35 +0100339 if -1 << 63 <= value < 1 << 64:
340 self.simple_element("integer", "%d" % value)
341 else:
342 raise OverflowError(value)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100343
344 elif isinstance(value, float):
345 self.simple_element("real", repr(value))
346
347 elif isinstance(value, dict):
348 self.write_dict(value)
349
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100350 elif isinstance(value, (bytes, bytearray)):
351 self.write_bytes(value)
352
353 elif isinstance(value, datetime.datetime):
354 self.simple_element("date", _date_to_string(value))
355
356 elif isinstance(value, (tuple, list)):
357 self.write_array(value)
358
359 else:
360 raise TypeError("unsupported type: %s" % type(value))
361
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100362 def write_bytes(self, data):
363 self.begin_element("data")
364 self._indent_level -= 1
365 maxlinelength = max(
366 16,
367 76 - len(self.indent.replace(b"\t", b" " * 8) * self._indent_level))
368
369 for line in _encode_base64(data, maxlinelength).split(b"\n"):
370 if line:
371 self.writeln(line)
372 self._indent_level += 1
373 self.end_element("data")
374
375 def write_dict(self, d):
376 if d:
377 self.begin_element("dict")
378 if self._sort_keys:
379 items = sorted(d.items())
380 else:
381 items = d.items()
382
383 for key, value in items:
384 if not isinstance(key, str):
385 if self._skipkeys:
386 continue
387 raise TypeError("keys must be strings")
388 self.simple_element("key", key)
389 self.write_value(value)
390 self.end_element("dict")
391
392 else:
393 self.simple_element("dict")
394
395 def write_array(self, array):
396 if array:
397 self.begin_element("array")
398 for value in array:
399 self.write_value(value)
400 self.end_element("array")
401
402 else:
403 self.simple_element("array")
404
405
406def _is_fmt_xml(header):
407 prefixes = (b'<?xml', b'<plist')
408
409 for pfx in prefixes:
410 if header.startswith(pfx):
411 return True
412
413 # Also check for alternative XML encodings, this is slightly
414 # overkill because the Apple tools (and plistlib) will not
415 # generate files with these encodings.
416 for bom, encoding in (
417 (codecs.BOM_UTF8, "utf-8"),
418 (codecs.BOM_UTF16_BE, "utf-16-be"),
419 (codecs.BOM_UTF16_LE, "utf-16-le"),
420 # expat does not support utf-32
421 #(codecs.BOM_UTF32_BE, "utf-32-be"),
422 #(codecs.BOM_UTF32_LE, "utf-32-le"),
423 ):
424 if not header.startswith(bom):
425 continue
426
427 for start in prefixes:
428 prefix = bom + start.decode('ascii').encode(encoding)
429 if header[:len(prefix)] == prefix:
430 return True
431
432 return False
433
434#
435# Binary Plist
436#
437
438
439class InvalidFileException (ValueError):
440 def __init__(self, message="Invalid file"):
441 ValueError.__init__(self, message)
442
443_BINARY_FORMAT = {1: 'B', 2: 'H', 4: 'L', 8: 'Q'}
444
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200445_undefined = object()
446
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100447class _BinaryPlistParser:
448 """
449 Read or write a binary plist file, following the description of the binary
450 format. Raise InvalidFileException in case of error, otherwise return the
451 root object.
452
453 see also: http://opensource.apple.com/source/CF/CF-744.18/CFBinaryPList.c
454 """
Jon Janzence81a922019-09-05 03:11:35 -0500455 def __init__(self, dict_type):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100456 self._dict_type = dict_type
457
458 def parse(self, fp):
459 try:
460 # The basic file format:
461 # HEADER
462 # object...
463 # refid->offset...
464 # TRAILER
465 self._fp = fp
466 self._fp.seek(-32, os.SEEK_END)
467 trailer = self._fp.read(32)
468 if len(trailer) != 32:
469 raise InvalidFileException()
470 (
471 offset_size, self._ref_size, num_objects, top_object,
472 offset_table_offset
473 ) = struct.unpack('>6xBBQQQ', trailer)
474 self._fp.seek(offset_table_offset)
Serhiy Storchaka06526642014-05-23 16:13:33 +0300475 self._object_offsets = self._read_ints(num_objects, offset_size)
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200476 self._objects = [_undefined] * num_objects
477 return self._read_object(top_object)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100478
Serhiy Storchakadb91e0f2017-10-31 14:05:53 +0200479 except (OSError, IndexError, struct.error, OverflowError,
Serhiy Storchaka34637a02020-11-02 23:01:40 +0200480 ValueError):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100481 raise InvalidFileException()
482
483 def _get_size(self, tokenL):
484 """ return the size of the next object."""
485 if tokenL == 0xF:
486 m = self._fp.read(1)[0] & 0x3
487 s = 1 << m
488 f = '>' + _BINARY_FORMAT[s]
489 return struct.unpack(f, self._fp.read(s))[0]
490
491 return tokenL
492
Serhiy Storchaka06526642014-05-23 16:13:33 +0300493 def _read_ints(self, n, size):
494 data = self._fp.read(size * n)
495 if size in _BINARY_FORMAT:
Serhiy Storchaka34637a02020-11-02 23:01:40 +0200496 return struct.unpack(f'>{n}{_BINARY_FORMAT[size]}', data)
Serhiy Storchaka06526642014-05-23 16:13:33 +0300497 else:
Serhiy Storchakadb91e0f2017-10-31 14:05:53 +0200498 if not size or len(data) != size * n:
499 raise InvalidFileException()
Serhiy Storchaka06526642014-05-23 16:13:33 +0300500 return tuple(int.from_bytes(data[i: i + size], 'big')
501 for i in range(0, size * n, size))
502
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100503 def _read_refs(self, n):
Serhiy Storchaka06526642014-05-23 16:13:33 +0300504 return self._read_ints(n, self._ref_size)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100505
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200506 def _read_object(self, ref):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100507 """
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200508 read the object by reference.
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100509
510 May recursively read sub-objects (content of an array/dict/set)
511 """
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200512 result = self._objects[ref]
513 if result is not _undefined:
514 return result
515
516 offset = self._object_offsets[ref]
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100517 self._fp.seek(offset)
518 token = self._fp.read(1)[0]
519 tokenH, tokenL = token & 0xF0, token & 0x0F
520
521 if token == 0x00:
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200522 result = None
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100523
524 elif token == 0x08:
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200525 result = False
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100526
527 elif token == 0x09:
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200528 result = True
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100529
530 # The referenced source code also mentions URL (0x0c, 0x0d) and
531 # UUID (0x0e), but neither can be generated using the Cocoa libraries.
532
533 elif token == 0x0f:
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200534 result = b''
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100535
536 elif tokenH == 0x10: # int
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200537 result = int.from_bytes(self._fp.read(1 << tokenL),
538 'big', signed=tokenL >= 3)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100539
540 elif token == 0x22: # real
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200541 result = struct.unpack('>f', self._fp.read(4))[0]
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100542
543 elif token == 0x23: # real
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200544 result = struct.unpack('>d', self._fp.read(8))[0]
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100545
546 elif token == 0x33: # date
547 f = struct.unpack('>d', self._fp.read(8))[0]
548 # timestamp 0 of binary plists corresponds to 1/1/2001
549 # (year of Mac OS X 10.0), instead of 1/1/1970.
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200550 result = (datetime.datetime(2001, 1, 1) +
551 datetime.timedelta(seconds=f))
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100552
553 elif tokenH == 0x40: # data
554 s = self._get_size(tokenL)
Jon Janzence81a922019-09-05 03:11:35 -0500555 result = self._fp.read(s)
Serhiy Storchaka34637a02020-11-02 23:01:40 +0200556 if len(result) != s:
557 raise InvalidFileException()
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100558
559 elif tokenH == 0x50: # ascii string
560 s = self._get_size(tokenL)
Serhiy Storchaka34637a02020-11-02 23:01:40 +0200561 data = self._fp.read(s)
562 if len(data) != s:
563 raise InvalidFileException()
564 result = data.decode('ascii')
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100565
566 elif tokenH == 0x60: # unicode string
Serhiy Storchaka34637a02020-11-02 23:01:40 +0200567 s = self._get_size(tokenL) * 2
568 data = self._fp.read(s)
569 if len(data) != s:
570 raise InvalidFileException()
571 result = data.decode('utf-16be')
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100572
Jon Janzenc981ad12019-05-15 22:14:38 +0200573 elif tokenH == 0x80: # UID
574 # used by Key-Archiver plist files
575 result = UID(int.from_bytes(self._fp.read(1 + tokenL), 'big'))
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100576
577 elif tokenH == 0xA0: # array
578 s = self._get_size(tokenL)
579 obj_refs = self._read_refs(s)
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200580 result = []
581 self._objects[ref] = result
582 result.extend(self._read_object(x) for x in obj_refs)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100583
584 # tokenH == 0xB0 is documented as 'ordset', but is not actually
585 # implemented in the Apple reference code.
586
587 # tokenH == 0xC0 is documented as 'set', but sets cannot be used in
588 # plists.
589
590 elif tokenH == 0xD0: # dict
591 s = self._get_size(tokenL)
592 key_refs = self._read_refs(s)
593 obj_refs = self._read_refs(s)
594 result = self._dict_type()
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200595 self._objects[ref] = result
Serhiy Storchaka34637a02020-11-02 23:01:40 +0200596 try:
597 for k, o in zip(key_refs, obj_refs):
598 result[self._read_object(k)] = self._read_object(o)
599 except TypeError:
600 raise InvalidFileException()
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200601 else:
602 raise InvalidFileException()
603
604 self._objects[ref] = result
605 return result
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100606
607def _count_to_size(count):
608 if count < 1 << 8:
609 return 1
610
611 elif count < 1 << 16:
612 return 2
613
Serhiy Storchaka212d32f2020-11-03 16:15:56 +0200614 elif count < 1 << 32:
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100615 return 4
616
617 else:
618 return 8
619
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200620_scalars = (str, int, float, datetime.datetime, bytes)
621
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100622class _BinaryPlistWriter (object):
623 def __init__(self, fp, sort_keys, skipkeys):
624 self._fp = fp
625 self._sort_keys = sort_keys
626 self._skipkeys = skipkeys
627
628 def write(self, value):
629
630 # Flattened object list:
631 self._objlist = []
632
633 # Mappings from object->objectid
634 # First dict has (type(object), object) as the key,
635 # second dict is used when object is not hashable and
636 # has id(object) as the key.
637 self._objtable = {}
638 self._objidtable = {}
639
640 # Create list of all objects in the plist
641 self._flatten(value)
642
643 # Size of object references in serialized containers
644 # depends on the number of objects in the plist.
645 num_objects = len(self._objlist)
646 self._object_offsets = [0]*num_objects
647 self._ref_size = _count_to_size(num_objects)
648
649 self._ref_format = _BINARY_FORMAT[self._ref_size]
650
651 # Write file header
652 self._fp.write(b'bplist00')
653
654 # Write object list
655 for obj in self._objlist:
656 self._write_object(obj)
657
658 # Write refnum->object offset table
659 top_object = self._getrefnum(value)
660 offset_table_offset = self._fp.tell()
661 offset_size = _count_to_size(offset_table_offset)
662 offset_format = '>' + _BINARY_FORMAT[offset_size] * num_objects
663 self._fp.write(struct.pack(offset_format, *self._object_offsets))
664
665 # Write trailer
666 sort_version = 0
667 trailer = (
668 sort_version, offset_size, self._ref_size, num_objects,
669 top_object, offset_table_offset
670 )
671 self._fp.write(struct.pack('>5xBBBQQQ', *trailer))
672
673 def _flatten(self, value):
674 # First check if the object is in the object table, not used for
675 # containers to ensure that two subcontainers with the same contents
676 # will be serialized as distinct values.
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200677 if isinstance(value, _scalars):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100678 if (type(value), value) in self._objtable:
679 return
680
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200681 elif id(value) in self._objidtable:
682 return
683
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100684 # Add to objectreference map
685 refnum = len(self._objlist)
686 self._objlist.append(value)
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200687 if isinstance(value, _scalars):
688 self._objtable[(type(value), value)] = refnum
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200689 else:
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100690 self._objidtable[id(value)] = refnum
691
692 # And finally recurse into containers
693 if isinstance(value, dict):
694 keys = []
695 values = []
696 items = value.items()
697 if self._sort_keys:
698 items = sorted(items)
699
700 for k, v in items:
701 if not isinstance(k, str):
702 if self._skipkeys:
703 continue
704 raise TypeError("keys must be strings")
705 keys.append(k)
706 values.append(v)
707
708 for o in itertools.chain(keys, values):
709 self._flatten(o)
710
711 elif isinstance(value, (list, tuple)):
712 for o in value:
713 self._flatten(o)
714
715 def _getrefnum(self, value):
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200716 if isinstance(value, _scalars):
717 return self._objtable[(type(value), value)]
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200718 else:
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100719 return self._objidtable[id(value)]
720
721 def _write_size(self, token, size):
722 if size < 15:
723 self._fp.write(struct.pack('>B', token | size))
724
725 elif size < 1 << 8:
726 self._fp.write(struct.pack('>BBB', token | 0xF, 0x10, size))
727
728 elif size < 1 << 16:
729 self._fp.write(struct.pack('>BBH', token | 0xF, 0x11, size))
730
731 elif size < 1 << 32:
732 self._fp.write(struct.pack('>BBL', token | 0xF, 0x12, size))
733
734 else:
735 self._fp.write(struct.pack('>BBQ', token | 0xF, 0x13, size))
736
737 def _write_object(self, value):
738 ref = self._getrefnum(value)
739 self._object_offsets[ref] = self._fp.tell()
740 if value is None:
741 self._fp.write(b'\x00')
742
743 elif value is False:
744 self._fp.write(b'\x08')
745
746 elif value is True:
747 self._fp.write(b'\x09')
748
749 elif isinstance(value, int):
Ronald Oussoren6db66532014-01-15 11:32:35 +0100750 if value < 0:
751 try:
752 self._fp.write(struct.pack('>Bq', 0x13, value))
753 except struct.error:
Ronald Oussoren94e44a92014-02-06 11:19:18 +0100754 raise OverflowError(value) from None
Ronald Oussoren6db66532014-01-15 11:32:35 +0100755 elif value < 1 << 8:
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100756 self._fp.write(struct.pack('>BB', 0x10, value))
757 elif value < 1 << 16:
758 self._fp.write(struct.pack('>BH', 0x11, value))
759 elif value < 1 << 32:
760 self._fp.write(struct.pack('>BL', 0x12, value))
Ronald Oussoren94e44a92014-02-06 11:19:18 +0100761 elif value < 1 << 63:
762 self._fp.write(struct.pack('>BQ', 0x13, value))
763 elif value < 1 << 64:
764 self._fp.write(b'\x14' + value.to_bytes(16, 'big', signed=True))
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100765 else:
Ronald Oussoren94e44a92014-02-06 11:19:18 +0100766 raise OverflowError(value)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100767
768 elif isinstance(value, float):
769 self._fp.write(struct.pack('>Bd', 0x23, value))
770
771 elif isinstance(value, datetime.datetime):
772 f = (value - datetime.datetime(2001, 1, 1)).total_seconds()
773 self._fp.write(struct.pack('>Bd', 0x33, f))
774
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100775 elif isinstance(value, (bytes, bytearray)):
776 self._write_size(0x40, len(value))
777 self._fp.write(value)
778
779 elif isinstance(value, str):
780 try:
781 t = value.encode('ascii')
782 self._write_size(0x50, len(value))
783 except UnicodeEncodeError:
784 t = value.encode('utf-16be')
Serhiy Storchaka7338ebc2016-10-04 20:04:30 +0300785 self._write_size(0x60, len(t) // 2)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100786
787 self._fp.write(t)
788
Jon Janzenc981ad12019-05-15 22:14:38 +0200789 elif isinstance(value, UID):
790 if value.data < 0:
791 raise ValueError("UIDs must be positive")
792 elif value.data < 1 << 8:
793 self._fp.write(struct.pack('>BB', 0x80, value))
794 elif value.data < 1 << 16:
795 self._fp.write(struct.pack('>BH', 0x81, value))
796 elif value.data < 1 << 32:
797 self._fp.write(struct.pack('>BL', 0x83, value))
798 elif value.data < 1 << 64:
799 self._fp.write(struct.pack('>BQ', 0x87, value))
800 else:
801 raise OverflowError(value)
802
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100803 elif isinstance(value, (list, tuple)):
804 refs = [self._getrefnum(o) for o in value]
805 s = len(refs)
806 self._write_size(0xA0, s)
807 self._fp.write(struct.pack('>' + self._ref_format * s, *refs))
808
809 elif isinstance(value, dict):
810 keyRefs, valRefs = [], []
811
812 if self._sort_keys:
813 rootItems = sorted(value.items())
814 else:
815 rootItems = value.items()
816
817 for k, v in rootItems:
818 if not isinstance(k, str):
819 if self._skipkeys:
820 continue
821 raise TypeError("keys must be strings")
822 keyRefs.append(self._getrefnum(k))
823 valRefs.append(self._getrefnum(v))
824
825 s = len(keyRefs)
826 self._write_size(0xD0, s)
827 self._fp.write(struct.pack('>' + self._ref_format * s, *keyRefs))
828 self._fp.write(struct.pack('>' + self._ref_format * s, *valRefs))
829
830 else:
Ronald Oussoren6db66532014-01-15 11:32:35 +0100831 raise TypeError(value)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100832
833
834def _is_fmt_binary(header):
835 return header[:8] == b'bplist00'
836
837
838#
839# Generic bits
840#
841
842_FORMATS={
843 FMT_XML: dict(
844 detect=_is_fmt_xml,
845 parser=_PlistParser,
846 writer=_PlistWriter,
847 ),
848 FMT_BINARY: dict(
849 detect=_is_fmt_binary,
850 parser=_BinaryPlistParser,
851 writer=_BinaryPlistWriter,
852 )
853}
854
855
Jon Janzence81a922019-09-05 03:11:35 -0500856def load(fp, *, fmt=None, dict_type=dict):
Collin Styles0d4f4352019-07-14 02:01:48 -0700857 """Read a .plist file. 'fp' should be a readable and binary file object.
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100858 Return the unpacked root object (which usually is a dictionary).
859 """
860 if fmt is None:
861 header = fp.read(32)
862 fp.seek(0)
863 for info in _FORMATS.values():
864 if info['detect'](header):
Serhiy Storchaka89667592014-07-23 18:49:31 +0300865 P = info['parser']
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100866 break
867
868 else:
869 raise InvalidFileException()
870
871 else:
Serhiy Storchaka89667592014-07-23 18:49:31 +0300872 P = _FORMATS[fmt]['parser']
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100873
Jon Janzence81a922019-09-05 03:11:35 -0500874 p = P(dict_type=dict_type)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100875 return p.parse(fp)
876
877
Jon Janzence81a922019-09-05 03:11:35 -0500878def loads(value, *, fmt=None, dict_type=dict):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100879 """Read a .plist file from a bytes object.
880 Return the unpacked root object (which usually is a dictionary).
881 """
882 fp = BytesIO(value)
Jon Janzence81a922019-09-05 03:11:35 -0500883 return load(fp, fmt=fmt, dict_type=dict_type)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100884
885
886def dump(value, fp, *, fmt=FMT_XML, sort_keys=True, skipkeys=False):
Collin Styles0d4f4352019-07-14 02:01:48 -0700887 """Write 'value' to a .plist file. 'fp' should be a writable,
888 binary file object.
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100889 """
890 if fmt not in _FORMATS:
891 raise ValueError("Unsupported format: %r"%(fmt,))
892
893 writer = _FORMATS[fmt]["writer"](fp, sort_keys=sort_keys, skipkeys=skipkeys)
894 writer.write(value)
895
896
897def dumps(value, *, fmt=FMT_XML, skipkeys=False, sort_keys=True):
898 """Return a bytes object with the contents for a .plist file.
899 """
900 fp = BytesIO()
901 dump(value, fp, fmt=fmt, skipkeys=skipkeys, sort_keys=sort_keys)
902 return fp.getvalue()