blob: 5772efdfe6710a56feb9213745119ceceaa1fdd2 [file] [log] [blame]
Benjamin Petersonef3e4c22009-04-11 19:48:14 +00001r"""plistlib.py -- a tool to generate and parse MacOSX .plist files.
Christian Heimes7e182542008-01-27 15:20:13 +00002
Ezio Melotti6e9b1df2009-09-16 00:49:03 +00003The property list (.plist) file format is a simple XML pickle supporting
Christian Heimes7e182542008-01-27 15:20:13 +00004basic object types, like dictionaries, lists, numbers and strings.
5Usually the top level object is a dictionary.
6
Ronald Oussorenc5cf7972013-11-21 15:46:49 +01007To write out a plist file, use the dump(value, file)
8function. 'value' is the top level object, 'file' is
9a (writable) file object.
Christian Heimes7e182542008-01-27 15:20:13 +000010
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010011To parse a plist from a file, use the load(file) function,
12with a (readable) file object as the only argument. It
Christian Heimes7e182542008-01-27 15:20:13 +000013returns the top level object (again, usually a dictionary).
14
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010015To work with plist data in bytes objects, you can use loads()
16and dumps().
Christian Heimes7e182542008-01-27 15:20:13 +000017
18Values can be strings, integers, floats, booleans, tuples, lists,
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010019dictionaries (but only with string keys), Data, bytes, bytearray, or
20datetime.datetime objects.
Christian Heimes7e182542008-01-27 15:20:13 +000021
22Generate Plist example:
23
24 pl = dict(
Ezio Melotti6e9b1df2009-09-16 00:49:03 +000025 aString = "Doodah",
26 aList = ["A", "B", 12, 32.1, [1, 2, 3]],
Christian Heimes7e182542008-01-27 15:20:13 +000027 aFloat = 0.1,
28 anInt = 728,
Ezio Melotti6e9b1df2009-09-16 00:49:03 +000029 aDict = dict(
30 anotherString = "<hello & hi there!>",
31 aUnicodeValue = "M\xe4ssig, Ma\xdf",
32 aTrueValue = True,
33 aFalseValue = False,
Christian Heimes7e182542008-01-27 15:20:13 +000034 ),
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010035 someData = b"<binary gunk>",
36 someMoreData = b"<lots of binary gunk>" * 10,
Christian Heimes7e182542008-01-27 15:20:13 +000037 aDate = datetime.datetime.fromtimestamp(time.mktime(time.gmtime())),
38 )
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010039 with open(fileName, 'wb') as fp:
40 dump(pl, fp)
Christian Heimes7e182542008-01-27 15:20:13 +000041
42Parse Plist example:
43
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010044 with open(fileName, 'rb') as fp:
45 pl = load(fp)
46 print(pl["aKey"])
Christian Heimes7e182542008-01-27 15:20:13 +000047"""
Christian Heimes7e182542008-01-27 15:20:13 +000048__all__ = [
Jon Janzence81a922019-09-05 03:11:35 -050049 "InvalidFileException", "FMT_XML", "FMT_BINARY", "load", "dump", "loads", "dumps", "UID"
Christian Heimes7e182542008-01-27 15:20:13 +000050]
Christian Heimes7e182542008-01-27 15:20:13 +000051
52import binascii
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010053import codecs
Christian Heimes7e182542008-01-27 15:20:13 +000054import datetime
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010055import enum
Christian Heimes7e182542008-01-27 15:20:13 +000056from io import BytesIO
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010057import itertools
58import os
Christian Heimes7e182542008-01-27 15:20:13 +000059import re
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010060import struct
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010061from xml.parsers.expat import ParserCreate
Christian Heimes7e182542008-01-27 15:20:13 +000062
63
Ethan Furmanb7751062021-03-30 21:17:26 -070064PlistFormat = enum.global_enum(enum.Enum('PlistFormat', 'FMT_XML FMT_BINARY', module=__name__))
Christian Heimes7e182542008-01-27 15:20:13 +000065
66
Jon Janzenc981ad12019-05-15 22:14:38 +020067class UID:
68 def __init__(self, data):
69 if not isinstance(data, int):
70 raise TypeError("data must be an int")
71 if data >= 1 << 64:
72 raise ValueError("UIDs cannot be >= 2**64")
73 if data < 0:
74 raise ValueError("UIDs must be positive")
75 self.data = data
76
77 def __index__(self):
78 return self.data
79
80 def __repr__(self):
81 return "%s(%s)" % (self.__class__.__name__, repr(self.data))
82
83 def __reduce__(self):
84 return self.__class__, (self.data,)
85
86 def __eq__(self, other):
87 if not isinstance(other, UID):
88 return NotImplemented
89 return self.data == other.data
90
91 def __hash__(self):
92 return hash(self.data)
93
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010094#
95# XML support
96#
97
98
99# XML 'header'
100PLISTHEADER = b"""\
101<?xml version="1.0" encoding="UTF-8"?>
102<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
103"""
104
105
106# Regex to find any control chars, except for \t \n and \r
107_controlCharPat = re.compile(
108 r"[\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0b\x0c\x0e\x0f"
109 r"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f]")
110
111def _encode_base64(s, maxlinelength=76):
112 # copied from base64.encodebytes(), with added maxlinelength argument
113 maxbinsize = (maxlinelength//4)*3
114 pieces = []
115 for i in range(0, len(s), maxbinsize):
116 chunk = s[i : i + maxbinsize]
117 pieces.append(binascii.b2a_base64(chunk))
118 return b''.join(pieces)
119
120def _decode_base64(s):
121 if isinstance(s, str):
122 return binascii.a2b_base64(s.encode("utf-8"))
123
124 else:
125 return binascii.a2b_base64(s)
126
127# Contents should conform to a subset of ISO 8601
128# (in particular, YYYY '-' MM '-' DD 'T' HH ':' MM ':' SS 'Z'. Smaller units
129# may be omitted with # a loss of precision)
130_dateParser = re.compile(r"(?P<year>\d\d\d\d)(?:-(?P<month>\d\d)(?:-(?P<day>\d\d)(?:T(?P<hour>\d\d)(?::(?P<minute>\d\d)(?::(?P<second>\d\d))?)?)?)?)?Z", re.ASCII)
131
132
133def _date_from_string(s):
134 order = ('year', 'month', 'day', 'hour', 'minute', 'second')
135 gd = _dateParser.match(s).groupdict()
136 lst = []
137 for key in order:
138 val = gd[key]
139 if val is None:
140 break
141 lst.append(int(val))
142 return datetime.datetime(*lst)
143
144
145def _date_to_string(d):
146 return '%04d-%02d-%02dT%02d:%02d:%02dZ' % (
147 d.year, d.month, d.day,
148 d.hour, d.minute, d.second
149 )
150
151def _escape(text):
152 m = _controlCharPat.search(text)
153 if m is not None:
154 raise ValueError("strings can't contains control characters; "
155 "use bytes instead")
156 text = text.replace("\r\n", "\n") # convert DOS line endings
157 text = text.replace("\r", "\n") # convert Mac line endings
158 text = text.replace("&", "&amp;") # escape '&'
159 text = text.replace("<", "&lt;") # escape '<'
160 text = text.replace(">", "&gt;") # escape '>'
161 return text
162
163class _PlistParser:
Jon Janzence81a922019-09-05 03:11:35 -0500164 def __init__(self, dict_type):
Christian Heimes7e182542008-01-27 15:20:13 +0000165 self.stack = []
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100166 self.current_key = None
Christian Heimes7e182542008-01-27 15:20:13 +0000167 self.root = None
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100168 self._dict_type = dict_type
Christian Heimes7e182542008-01-27 15:20:13 +0000169
170 def parse(self, fileobj):
Ned Deilyb8e59f72011-05-28 02:19:19 -0700171 self.parser = ParserCreate()
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100172 self.parser.StartElementHandler = self.handle_begin_element
173 self.parser.EndElementHandler = self.handle_end_element
174 self.parser.CharacterDataHandler = self.handle_data
Ronald Oussoren05ee7902020-10-19 20:13:49 +0200175 self.parser.EntityDeclHandler = self.handle_entity_decl
Ned Deilyb8e59f72011-05-28 02:19:19 -0700176 self.parser.ParseFile(fileobj)
Christian Heimes7e182542008-01-27 15:20:13 +0000177 return self.root
178
Ronald Oussoren05ee7902020-10-19 20:13:49 +0200179 def handle_entity_decl(self, entity_name, is_parameter_entity, value, base, system_id, public_id, notation_name):
180 # Reject plist files with entity declarations to avoid XML vulnerabilies in expat.
181 # Regular plist files don't contain those declerations, and Apple's plutil tool does not
182 # accept them either.
183 raise InvalidFileException("XML entity declarations are not supported in plist files")
184
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100185 def handle_begin_element(self, element, attrs):
Christian Heimes7e182542008-01-27 15:20:13 +0000186 self.data = []
187 handler = getattr(self, "begin_" + element, None)
188 if handler is not None:
189 handler(attrs)
190
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100191 def handle_end_element(self, element):
Christian Heimes7e182542008-01-27 15:20:13 +0000192 handler = getattr(self, "end_" + element, None)
193 if handler is not None:
194 handler()
195
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100196 def handle_data(self, data):
Christian Heimes7e182542008-01-27 15:20:13 +0000197 self.data.append(data)
198
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100199 def add_object(self, value):
200 if self.current_key is not None:
Ned Deilyb8e59f72011-05-28 02:19:19 -0700201 if not isinstance(self.stack[-1], type({})):
202 raise ValueError("unexpected element at line %d" %
203 self.parser.CurrentLineNumber)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100204 self.stack[-1][self.current_key] = value
205 self.current_key = None
Christian Heimes7e182542008-01-27 15:20:13 +0000206 elif not self.stack:
207 # this is the root object
208 self.root = value
209 else:
Ned Deilyb8e59f72011-05-28 02:19:19 -0700210 if not isinstance(self.stack[-1], type([])):
211 raise ValueError("unexpected element at line %d" %
212 self.parser.CurrentLineNumber)
Christian Heimes7e182542008-01-27 15:20:13 +0000213 self.stack[-1].append(value)
214
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100215 def get_data(self):
Christian Heimes7e182542008-01-27 15:20:13 +0000216 data = ''.join(self.data)
217 self.data = []
218 return data
219
220 # element handlers
221
222 def begin_dict(self, attrs):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100223 d = self._dict_type()
224 self.add_object(d)
Christian Heimes7e182542008-01-27 15:20:13 +0000225 self.stack.append(d)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100226
Christian Heimes7e182542008-01-27 15:20:13 +0000227 def end_dict(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100228 if self.current_key:
Ned Deilyb8e59f72011-05-28 02:19:19 -0700229 raise ValueError("missing value for key '%s' at line %d" %
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100230 (self.current_key,self.parser.CurrentLineNumber))
Christian Heimes7e182542008-01-27 15:20:13 +0000231 self.stack.pop()
232
233 def end_key(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100234 if self.current_key or not isinstance(self.stack[-1], type({})):
Ned Deilyb8e59f72011-05-28 02:19:19 -0700235 raise ValueError("unexpected key at line %d" %
236 self.parser.CurrentLineNumber)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100237 self.current_key = self.get_data()
Christian Heimes7e182542008-01-27 15:20:13 +0000238
239 def begin_array(self, attrs):
240 a = []
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100241 self.add_object(a)
Christian Heimes7e182542008-01-27 15:20:13 +0000242 self.stack.append(a)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100243
Christian Heimes7e182542008-01-27 15:20:13 +0000244 def end_array(self):
245 self.stack.pop()
246
247 def end_true(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100248 self.add_object(True)
249
Christian Heimes7e182542008-01-27 15:20:13 +0000250 def end_false(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100251 self.add_object(False)
252
Christian Heimes7e182542008-01-27 15:20:13 +0000253 def end_integer(self):
Ronald Oussoren31852672020-10-20 09:26:33 +0200254 raw = self.get_data()
255 if raw.startswith('0x') or raw.startswith('0X'):
256 self.add_object(int(raw, 16))
257 else:
258 self.add_object(int(raw))
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100259
Christian Heimes7e182542008-01-27 15:20:13 +0000260 def end_real(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100261 self.add_object(float(self.get_data()))
262
Christian Heimes7e182542008-01-27 15:20:13 +0000263 def end_string(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100264 self.add_object(self.get_data())
265
Christian Heimes7e182542008-01-27 15:20:13 +0000266 def end_data(self):
Jon Janzence81a922019-09-05 03:11:35 -0500267 self.add_object(_decode_base64(self.get_data()))
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100268
Christian Heimes7e182542008-01-27 15:20:13 +0000269 def end_date(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100270 self.add_object(_date_from_string(self.get_data()))
271
272
273class _DumbXMLWriter:
274 def __init__(self, file, indent_level=0, indent="\t"):
275 self.file = file
276 self.stack = []
277 self._indent_level = indent_level
278 self.indent = indent
279
280 def begin_element(self, element):
281 self.stack.append(element)
282 self.writeln("<%s>" % element)
283 self._indent_level += 1
284
285 def end_element(self, element):
286 assert self._indent_level > 0
287 assert self.stack.pop() == element
288 self._indent_level -= 1
289 self.writeln("</%s>" % element)
290
291 def simple_element(self, element, value=None):
292 if value is not None:
293 value = _escape(value)
294 self.writeln("<%s>%s</%s>" % (element, value, element))
295
296 else:
297 self.writeln("<%s/>" % element)
298
299 def writeln(self, line):
300 if line:
301 # plist has fixed encoding of utf-8
302
303 # XXX: is this test needed?
304 if isinstance(line, str):
305 line = line.encode('utf-8')
306 self.file.write(self._indent_level * self.indent)
307 self.file.write(line)
308 self.file.write(b'\n')
309
310
311class _PlistWriter(_DumbXMLWriter):
312 def __init__(
313 self, file, indent_level=0, indent=b"\t", writeHeader=1,
314 sort_keys=True, skipkeys=False):
315
316 if writeHeader:
317 file.write(PLISTHEADER)
318 _DumbXMLWriter.__init__(self, file, indent_level, indent)
319 self._sort_keys = sort_keys
320 self._skipkeys = skipkeys
321
322 def write(self, value):
323 self.writeln("<plist version=\"1.0\">")
324 self.write_value(value)
325 self.writeln("</plist>")
326
327 def write_value(self, value):
328 if isinstance(value, str):
329 self.simple_element("string", value)
330
331 elif value is True:
332 self.simple_element("true")
333
334 elif value is False:
335 self.simple_element("false")
336
337 elif isinstance(value, int):
Ronald Oussoren6db66532014-01-15 11:32:35 +0100338 if -1 << 63 <= value < 1 << 64:
339 self.simple_element("integer", "%d" % value)
340 else:
341 raise OverflowError(value)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100342
343 elif isinstance(value, float):
344 self.simple_element("real", repr(value))
345
346 elif isinstance(value, dict):
347 self.write_dict(value)
348
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100349 elif isinstance(value, (bytes, bytearray)):
350 self.write_bytes(value)
351
352 elif isinstance(value, datetime.datetime):
353 self.simple_element("date", _date_to_string(value))
354
355 elif isinstance(value, (tuple, list)):
356 self.write_array(value)
357
358 else:
359 raise TypeError("unsupported type: %s" % type(value))
360
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100361 def write_bytes(self, data):
362 self.begin_element("data")
363 self._indent_level -= 1
364 maxlinelength = max(
365 16,
366 76 - len(self.indent.replace(b"\t", b" " * 8) * self._indent_level))
367
368 for line in _encode_base64(data, maxlinelength).split(b"\n"):
369 if line:
370 self.writeln(line)
371 self._indent_level += 1
372 self.end_element("data")
373
374 def write_dict(self, d):
375 if d:
376 self.begin_element("dict")
377 if self._sort_keys:
378 items = sorted(d.items())
379 else:
380 items = d.items()
381
382 for key, value in items:
383 if not isinstance(key, str):
384 if self._skipkeys:
385 continue
386 raise TypeError("keys must be strings")
387 self.simple_element("key", key)
388 self.write_value(value)
389 self.end_element("dict")
390
391 else:
392 self.simple_element("dict")
393
394 def write_array(self, array):
395 if array:
396 self.begin_element("array")
397 for value in array:
398 self.write_value(value)
399 self.end_element("array")
400
401 else:
402 self.simple_element("array")
403
404
405def _is_fmt_xml(header):
406 prefixes = (b'<?xml', b'<plist')
407
408 for pfx in prefixes:
409 if header.startswith(pfx):
410 return True
411
412 # Also check for alternative XML encodings, this is slightly
413 # overkill because the Apple tools (and plistlib) will not
414 # generate files with these encodings.
415 for bom, encoding in (
416 (codecs.BOM_UTF8, "utf-8"),
417 (codecs.BOM_UTF16_BE, "utf-16-be"),
418 (codecs.BOM_UTF16_LE, "utf-16-le"),
419 # expat does not support utf-32
420 #(codecs.BOM_UTF32_BE, "utf-32-be"),
421 #(codecs.BOM_UTF32_LE, "utf-32-le"),
422 ):
423 if not header.startswith(bom):
424 continue
425
426 for start in prefixes:
427 prefix = bom + start.decode('ascii').encode(encoding)
428 if header[:len(prefix)] == prefix:
429 return True
430
431 return False
432
433#
434# Binary Plist
435#
436
437
438class InvalidFileException (ValueError):
439 def __init__(self, message="Invalid file"):
440 ValueError.__init__(self, message)
441
442_BINARY_FORMAT = {1: 'B', 2: 'H', 4: 'L', 8: 'Q'}
443
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200444_undefined = object()
445
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100446class _BinaryPlistParser:
447 """
448 Read or write a binary plist file, following the description of the binary
449 format. Raise InvalidFileException in case of error, otherwise return the
450 root object.
451
452 see also: http://opensource.apple.com/source/CF/CF-744.18/CFBinaryPList.c
453 """
Jon Janzence81a922019-09-05 03:11:35 -0500454 def __init__(self, dict_type):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100455 self._dict_type = dict_type
456
457 def parse(self, fp):
458 try:
459 # The basic file format:
460 # HEADER
461 # object...
462 # refid->offset...
463 # TRAILER
464 self._fp = fp
465 self._fp.seek(-32, os.SEEK_END)
466 trailer = self._fp.read(32)
467 if len(trailer) != 32:
468 raise InvalidFileException()
469 (
470 offset_size, self._ref_size, num_objects, top_object,
471 offset_table_offset
472 ) = struct.unpack('>6xBBQQQ', trailer)
473 self._fp.seek(offset_table_offset)
Serhiy Storchaka06526642014-05-23 16:13:33 +0300474 self._object_offsets = self._read_ints(num_objects, offset_size)
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200475 self._objects = [_undefined] * num_objects
476 return self._read_object(top_object)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100477
Serhiy Storchakadb91e0f2017-10-31 14:05:53 +0200478 except (OSError, IndexError, struct.error, OverflowError,
Serhiy Storchaka34637a02020-11-02 23:01:40 +0200479 ValueError):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100480 raise InvalidFileException()
481
482 def _get_size(self, tokenL):
483 """ return the size of the next object."""
484 if tokenL == 0xF:
485 m = self._fp.read(1)[0] & 0x3
486 s = 1 << m
487 f = '>' + _BINARY_FORMAT[s]
488 return struct.unpack(f, self._fp.read(s))[0]
489
490 return tokenL
491
Serhiy Storchaka06526642014-05-23 16:13:33 +0300492 def _read_ints(self, n, size):
493 data = self._fp.read(size * n)
494 if size in _BINARY_FORMAT:
Serhiy Storchaka34637a02020-11-02 23:01:40 +0200495 return struct.unpack(f'>{n}{_BINARY_FORMAT[size]}', data)
Serhiy Storchaka06526642014-05-23 16:13:33 +0300496 else:
Serhiy Storchakadb91e0f2017-10-31 14:05:53 +0200497 if not size or len(data) != size * n:
498 raise InvalidFileException()
Serhiy Storchaka06526642014-05-23 16:13:33 +0300499 return tuple(int.from_bytes(data[i: i + size], 'big')
500 for i in range(0, size * n, size))
501
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100502 def _read_refs(self, n):
Serhiy Storchaka06526642014-05-23 16:13:33 +0300503 return self._read_ints(n, self._ref_size)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100504
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200505 def _read_object(self, ref):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100506 """
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200507 read the object by reference.
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100508
509 May recursively read sub-objects (content of an array/dict/set)
510 """
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200511 result = self._objects[ref]
512 if result is not _undefined:
513 return result
514
515 offset = self._object_offsets[ref]
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100516 self._fp.seek(offset)
517 token = self._fp.read(1)[0]
518 tokenH, tokenL = token & 0xF0, token & 0x0F
519
520 if token == 0x00:
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200521 result = None
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100522
523 elif token == 0x08:
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200524 result = False
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100525
526 elif token == 0x09:
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200527 result = True
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100528
529 # The referenced source code also mentions URL (0x0c, 0x0d) and
530 # UUID (0x0e), but neither can be generated using the Cocoa libraries.
531
532 elif token == 0x0f:
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200533 result = b''
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100534
535 elif tokenH == 0x10: # int
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200536 result = int.from_bytes(self._fp.read(1 << tokenL),
537 'big', signed=tokenL >= 3)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100538
539 elif token == 0x22: # real
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200540 result = struct.unpack('>f', self._fp.read(4))[0]
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100541
542 elif token == 0x23: # real
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200543 result = struct.unpack('>d', self._fp.read(8))[0]
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100544
545 elif token == 0x33: # date
546 f = struct.unpack('>d', self._fp.read(8))[0]
547 # timestamp 0 of binary plists corresponds to 1/1/2001
548 # (year of Mac OS X 10.0), instead of 1/1/1970.
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200549 result = (datetime.datetime(2001, 1, 1) +
550 datetime.timedelta(seconds=f))
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100551
552 elif tokenH == 0x40: # data
553 s = self._get_size(tokenL)
Jon Janzence81a922019-09-05 03:11:35 -0500554 result = self._fp.read(s)
Serhiy Storchaka34637a02020-11-02 23:01:40 +0200555 if len(result) != s:
556 raise InvalidFileException()
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100557
558 elif tokenH == 0x50: # ascii string
559 s = self._get_size(tokenL)
Serhiy Storchaka34637a02020-11-02 23:01:40 +0200560 data = self._fp.read(s)
561 if len(data) != s:
562 raise InvalidFileException()
563 result = data.decode('ascii')
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100564
565 elif tokenH == 0x60: # unicode string
Serhiy Storchaka34637a02020-11-02 23:01:40 +0200566 s = self._get_size(tokenL) * 2
567 data = self._fp.read(s)
568 if len(data) != s:
569 raise InvalidFileException()
570 result = data.decode('utf-16be')
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100571
Jon Janzenc981ad12019-05-15 22:14:38 +0200572 elif tokenH == 0x80: # UID
573 # used by Key-Archiver plist files
574 result = UID(int.from_bytes(self._fp.read(1 + tokenL), 'big'))
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100575
576 elif tokenH == 0xA0: # array
577 s = self._get_size(tokenL)
578 obj_refs = self._read_refs(s)
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200579 result = []
580 self._objects[ref] = result
581 result.extend(self._read_object(x) for x in obj_refs)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100582
583 # tokenH == 0xB0 is documented as 'ordset', but is not actually
584 # implemented in the Apple reference code.
585
586 # tokenH == 0xC0 is documented as 'set', but sets cannot be used in
587 # plists.
588
589 elif tokenH == 0xD0: # dict
590 s = self._get_size(tokenL)
591 key_refs = self._read_refs(s)
592 obj_refs = self._read_refs(s)
593 result = self._dict_type()
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200594 self._objects[ref] = result
Serhiy Storchaka34637a02020-11-02 23:01:40 +0200595 try:
596 for k, o in zip(key_refs, obj_refs):
597 result[self._read_object(k)] = self._read_object(o)
598 except TypeError:
599 raise InvalidFileException()
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200600 else:
601 raise InvalidFileException()
602
603 self._objects[ref] = result
604 return result
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100605
606def _count_to_size(count):
607 if count < 1 << 8:
608 return 1
609
610 elif count < 1 << 16:
611 return 2
612
Serhiy Storchaka212d32f2020-11-03 16:15:56 +0200613 elif count < 1 << 32:
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100614 return 4
615
616 else:
617 return 8
618
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200619_scalars = (str, int, float, datetime.datetime, bytes)
620
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100621class _BinaryPlistWriter (object):
622 def __init__(self, fp, sort_keys, skipkeys):
623 self._fp = fp
624 self._sort_keys = sort_keys
625 self._skipkeys = skipkeys
626
627 def write(self, value):
628
629 # Flattened object list:
630 self._objlist = []
631
632 # Mappings from object->objectid
633 # First dict has (type(object), object) as the key,
634 # second dict is used when object is not hashable and
635 # has id(object) as the key.
636 self._objtable = {}
637 self._objidtable = {}
638
639 # Create list of all objects in the plist
640 self._flatten(value)
641
642 # Size of object references in serialized containers
643 # depends on the number of objects in the plist.
644 num_objects = len(self._objlist)
645 self._object_offsets = [0]*num_objects
646 self._ref_size = _count_to_size(num_objects)
647
648 self._ref_format = _BINARY_FORMAT[self._ref_size]
649
650 # Write file header
651 self._fp.write(b'bplist00')
652
653 # Write object list
654 for obj in self._objlist:
655 self._write_object(obj)
656
657 # Write refnum->object offset table
658 top_object = self._getrefnum(value)
659 offset_table_offset = self._fp.tell()
660 offset_size = _count_to_size(offset_table_offset)
661 offset_format = '>' + _BINARY_FORMAT[offset_size] * num_objects
662 self._fp.write(struct.pack(offset_format, *self._object_offsets))
663
664 # Write trailer
665 sort_version = 0
666 trailer = (
667 sort_version, offset_size, self._ref_size, num_objects,
668 top_object, offset_table_offset
669 )
670 self._fp.write(struct.pack('>5xBBBQQQ', *trailer))
671
672 def _flatten(self, value):
673 # First check if the object is in the object table, not used for
674 # containers to ensure that two subcontainers with the same contents
675 # will be serialized as distinct values.
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200676 if isinstance(value, _scalars):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100677 if (type(value), value) in self._objtable:
678 return
679
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200680 elif id(value) in self._objidtable:
681 return
682
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100683 # Add to objectreference map
684 refnum = len(self._objlist)
685 self._objlist.append(value)
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200686 if isinstance(value, _scalars):
687 self._objtable[(type(value), value)] = refnum
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200688 else:
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100689 self._objidtable[id(value)] = refnum
690
691 # And finally recurse into containers
692 if isinstance(value, dict):
693 keys = []
694 values = []
695 items = value.items()
696 if self._sort_keys:
697 items = sorted(items)
698
699 for k, v in items:
700 if not isinstance(k, str):
701 if self._skipkeys:
702 continue
703 raise TypeError("keys must be strings")
704 keys.append(k)
705 values.append(v)
706
707 for o in itertools.chain(keys, values):
708 self._flatten(o)
709
710 elif isinstance(value, (list, tuple)):
711 for o in value:
712 self._flatten(o)
713
714 def _getrefnum(self, value):
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200715 if isinstance(value, _scalars):
716 return self._objtable[(type(value), value)]
Serhiy Storchakaa897aee2017-11-30 23:26:11 +0200717 else:
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100718 return self._objidtable[id(value)]
719
720 def _write_size(self, token, size):
721 if size < 15:
722 self._fp.write(struct.pack('>B', token | size))
723
724 elif size < 1 << 8:
725 self._fp.write(struct.pack('>BBB', token | 0xF, 0x10, size))
726
727 elif size < 1 << 16:
728 self._fp.write(struct.pack('>BBH', token | 0xF, 0x11, size))
729
730 elif size < 1 << 32:
731 self._fp.write(struct.pack('>BBL', token | 0xF, 0x12, size))
732
733 else:
734 self._fp.write(struct.pack('>BBQ', token | 0xF, 0x13, size))
735
736 def _write_object(self, value):
737 ref = self._getrefnum(value)
738 self._object_offsets[ref] = self._fp.tell()
739 if value is None:
740 self._fp.write(b'\x00')
741
742 elif value is False:
743 self._fp.write(b'\x08')
744
745 elif value is True:
746 self._fp.write(b'\x09')
747
748 elif isinstance(value, int):
Ronald Oussoren6db66532014-01-15 11:32:35 +0100749 if value < 0:
750 try:
751 self._fp.write(struct.pack('>Bq', 0x13, value))
752 except struct.error:
Ronald Oussoren94e44a92014-02-06 11:19:18 +0100753 raise OverflowError(value) from None
Ronald Oussoren6db66532014-01-15 11:32:35 +0100754 elif value < 1 << 8:
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100755 self._fp.write(struct.pack('>BB', 0x10, value))
756 elif value < 1 << 16:
757 self._fp.write(struct.pack('>BH', 0x11, value))
758 elif value < 1 << 32:
759 self._fp.write(struct.pack('>BL', 0x12, value))
Ronald Oussoren94e44a92014-02-06 11:19:18 +0100760 elif value < 1 << 63:
761 self._fp.write(struct.pack('>BQ', 0x13, value))
762 elif value < 1 << 64:
763 self._fp.write(b'\x14' + value.to_bytes(16, 'big', signed=True))
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100764 else:
Ronald Oussoren94e44a92014-02-06 11:19:18 +0100765 raise OverflowError(value)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100766
767 elif isinstance(value, float):
768 self._fp.write(struct.pack('>Bd', 0x23, value))
769
770 elif isinstance(value, datetime.datetime):
771 f = (value - datetime.datetime(2001, 1, 1)).total_seconds()
772 self._fp.write(struct.pack('>Bd', 0x33, f))
773
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100774 elif isinstance(value, (bytes, bytearray)):
775 self._write_size(0x40, len(value))
776 self._fp.write(value)
777
778 elif isinstance(value, str):
779 try:
780 t = value.encode('ascii')
781 self._write_size(0x50, len(value))
782 except UnicodeEncodeError:
783 t = value.encode('utf-16be')
Serhiy Storchaka7338ebc2016-10-04 20:04:30 +0300784 self._write_size(0x60, len(t) // 2)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100785
786 self._fp.write(t)
787
Jon Janzenc981ad12019-05-15 22:14:38 +0200788 elif isinstance(value, UID):
789 if value.data < 0:
790 raise ValueError("UIDs must be positive")
791 elif value.data < 1 << 8:
792 self._fp.write(struct.pack('>BB', 0x80, value))
793 elif value.data < 1 << 16:
794 self._fp.write(struct.pack('>BH', 0x81, value))
795 elif value.data < 1 << 32:
796 self._fp.write(struct.pack('>BL', 0x83, value))
797 elif value.data < 1 << 64:
798 self._fp.write(struct.pack('>BQ', 0x87, value))
799 else:
800 raise OverflowError(value)
801
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100802 elif isinstance(value, (list, tuple)):
803 refs = [self._getrefnum(o) for o in value]
804 s = len(refs)
805 self._write_size(0xA0, s)
806 self._fp.write(struct.pack('>' + self._ref_format * s, *refs))
807
808 elif isinstance(value, dict):
809 keyRefs, valRefs = [], []
810
811 if self._sort_keys:
812 rootItems = sorted(value.items())
813 else:
814 rootItems = value.items()
815
816 for k, v in rootItems:
817 if not isinstance(k, str):
818 if self._skipkeys:
819 continue
820 raise TypeError("keys must be strings")
821 keyRefs.append(self._getrefnum(k))
822 valRefs.append(self._getrefnum(v))
823
824 s = len(keyRefs)
825 self._write_size(0xD0, s)
826 self._fp.write(struct.pack('>' + self._ref_format * s, *keyRefs))
827 self._fp.write(struct.pack('>' + self._ref_format * s, *valRefs))
828
829 else:
Ronald Oussoren6db66532014-01-15 11:32:35 +0100830 raise TypeError(value)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100831
832
833def _is_fmt_binary(header):
834 return header[:8] == b'bplist00'
835
836
837#
838# Generic bits
839#
840
841_FORMATS={
842 FMT_XML: dict(
843 detect=_is_fmt_xml,
844 parser=_PlistParser,
845 writer=_PlistWriter,
846 ),
847 FMT_BINARY: dict(
848 detect=_is_fmt_binary,
849 parser=_BinaryPlistParser,
850 writer=_BinaryPlistWriter,
851 )
852}
853
854
Jon Janzence81a922019-09-05 03:11:35 -0500855def load(fp, *, fmt=None, dict_type=dict):
Collin Styles0d4f4352019-07-14 02:01:48 -0700856 """Read a .plist file. 'fp' should be a readable and binary file object.
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100857 Return the unpacked root object (which usually is a dictionary).
858 """
859 if fmt is None:
860 header = fp.read(32)
861 fp.seek(0)
862 for info in _FORMATS.values():
863 if info['detect'](header):
Serhiy Storchaka89667592014-07-23 18:49:31 +0300864 P = info['parser']
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100865 break
866
867 else:
868 raise InvalidFileException()
869
870 else:
Serhiy Storchaka89667592014-07-23 18:49:31 +0300871 P = _FORMATS[fmt]['parser']
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100872
Jon Janzence81a922019-09-05 03:11:35 -0500873 p = P(dict_type=dict_type)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100874 return p.parse(fp)
875
876
Jon Janzence81a922019-09-05 03:11:35 -0500877def loads(value, *, fmt=None, dict_type=dict):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100878 """Read a .plist file from a bytes object.
879 Return the unpacked root object (which usually is a dictionary).
880 """
881 fp = BytesIO(value)
Jon Janzence81a922019-09-05 03:11:35 -0500882 return load(fp, fmt=fmt, dict_type=dict_type)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100883
884
885def dump(value, fp, *, fmt=FMT_XML, sort_keys=True, skipkeys=False):
Collin Styles0d4f4352019-07-14 02:01:48 -0700886 """Write 'value' to a .plist file. 'fp' should be a writable,
887 binary file object.
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100888 """
889 if fmt not in _FORMATS:
890 raise ValueError("Unsupported format: %r"%(fmt,))
891
892 writer = _FORMATS[fmt]["writer"](fp, sort_keys=sort_keys, skipkeys=skipkeys)
893 writer.write(value)
894
895
896def dumps(value, *, fmt=FMT_XML, skipkeys=False, sort_keys=True):
897 """Return a bytes object with the contents for a .plist file.
898 """
899 fp = BytesIO()
900 dump(value, fp, fmt=fmt, skipkeys=skipkeys, sort_keys=sort_keys)
901 return fp.getvalue()