blob: b9946fd313af0d67fddb74c1d0022f803f518ad6 [file] [log] [blame]
Benjamin Petersonef3e4c22009-04-11 19:48:14 +00001r"""plistlib.py -- a tool to generate and parse MacOSX .plist files.
Christian Heimes7e182542008-01-27 15:20:13 +00002
Ezio Melotti6e9b1df2009-09-16 00:49:03 +00003The property list (.plist) file format is a simple XML pickle supporting
Christian Heimes7e182542008-01-27 15:20:13 +00004basic object types, like dictionaries, lists, numbers and strings.
5Usually the top level object is a dictionary.
6
Ronald Oussorenc5cf7972013-11-21 15:46:49 +01007To write out a plist file, use the dump(value, file)
8function. 'value' is the top level object, 'file' is
9a (writable) file object.
Christian Heimes7e182542008-01-27 15:20:13 +000010
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010011To parse a plist from a file, use the load(file) function,
12with a (readable) file object as the only argument. It
Christian Heimes7e182542008-01-27 15:20:13 +000013returns the top level object (again, usually a dictionary).
14
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010015To work with plist data in bytes objects, you can use loads()
16and dumps().
Christian Heimes7e182542008-01-27 15:20:13 +000017
18Values can be strings, integers, floats, booleans, tuples, lists,
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010019dictionaries (but only with string keys), Data, bytes, bytearray, or
20datetime.datetime objects.
Christian Heimes7e182542008-01-27 15:20:13 +000021
22Generate Plist example:
23
24 pl = dict(
Ezio Melotti6e9b1df2009-09-16 00:49:03 +000025 aString = "Doodah",
26 aList = ["A", "B", 12, 32.1, [1, 2, 3]],
Christian Heimes7e182542008-01-27 15:20:13 +000027 aFloat = 0.1,
28 anInt = 728,
Ezio Melotti6e9b1df2009-09-16 00:49:03 +000029 aDict = dict(
30 anotherString = "<hello & hi there!>",
31 aUnicodeValue = "M\xe4ssig, Ma\xdf",
32 aTrueValue = True,
33 aFalseValue = False,
Christian Heimes7e182542008-01-27 15:20:13 +000034 ),
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010035 someData = b"<binary gunk>",
36 someMoreData = b"<lots of binary gunk>" * 10,
Christian Heimes7e182542008-01-27 15:20:13 +000037 aDate = datetime.datetime.fromtimestamp(time.mktime(time.gmtime())),
38 )
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010039 with open(fileName, 'wb') as fp:
40 dump(pl, fp)
Christian Heimes7e182542008-01-27 15:20:13 +000041
42Parse Plist example:
43
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010044 with open(fileName, 'rb') as fp:
45 pl = load(fp)
46 print(pl["aKey"])
Christian Heimes7e182542008-01-27 15:20:13 +000047"""
Christian Heimes7e182542008-01-27 15:20:13 +000048__all__ = [
49 "readPlist", "writePlist", "readPlistFromBytes", "writePlistToBytes",
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010050 "Plist", "Data", "Dict", "FMT_XML", "FMT_BINARY",
51 "load", "dump", "loads", "dumps"
Christian Heimes7e182542008-01-27 15:20:13 +000052]
Christian Heimes7e182542008-01-27 15:20:13 +000053
54import binascii
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010055import codecs
56import contextlib
Christian Heimes7e182542008-01-27 15:20:13 +000057import datetime
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010058import enum
Christian Heimes7e182542008-01-27 15:20:13 +000059from io import BytesIO
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010060import itertools
61import os
Christian Heimes7e182542008-01-27 15:20:13 +000062import re
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010063import struct
64from warnings import warn
65from xml.parsers.expat import ParserCreate
Christian Heimes7e182542008-01-27 15:20:13 +000066
67
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010068PlistFormat = enum.Enum('PlistFormat', 'FMT_XML FMT_BINARY', module=__name__)
69globals().update(PlistFormat.__members__)
Christian Heimes7e182542008-01-27 15:20:13 +000070
71
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010072#
73#
74# Deprecated functionality
75#
76#
Christian Heimes7e182542008-01-27 15:20:13 +000077
78
79class _InternalDict(dict):
80
81 # This class is needed while Dict is scheduled for deprecation:
82 # we only need to warn when a *user* instantiates Dict or when
83 # the "attribute notation for dict keys" is used.
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010084 __slots__ = ()
Christian Heimes7e182542008-01-27 15:20:13 +000085
86 def __getattr__(self, attr):
87 try:
88 value = self[attr]
89 except KeyError:
90 raise AttributeError(attr)
Christian Heimes7e182542008-01-27 15:20:13 +000091 warn("Attribute access from plist dicts is deprecated, use d[key] "
Victor Stinnerb5752892011-07-04 14:28:45 +020092 "notation instead", DeprecationWarning, 2)
Christian Heimes7e182542008-01-27 15:20:13 +000093 return value
94
95 def __setattr__(self, attr, value):
Christian Heimes7e182542008-01-27 15:20:13 +000096 warn("Attribute access from plist dicts is deprecated, use d[key] "
Victor Stinnerb5752892011-07-04 14:28:45 +020097 "notation instead", DeprecationWarning, 2)
Christian Heimes7e182542008-01-27 15:20:13 +000098 self[attr] = value
99
100 def __delattr__(self, attr):
101 try:
102 del self[attr]
103 except KeyError:
104 raise AttributeError(attr)
Christian Heimes7e182542008-01-27 15:20:13 +0000105 warn("Attribute access from plist dicts is deprecated, use d[key] "
Victor Stinnerb5752892011-07-04 14:28:45 +0200106 "notation instead", DeprecationWarning, 2)
Christian Heimes7e182542008-01-27 15:20:13 +0000107
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100108
Christian Heimes7e182542008-01-27 15:20:13 +0000109class Dict(_InternalDict):
110
111 def __init__(self, **kwargs):
Christian Heimes7e182542008-01-27 15:20:13 +0000112 warn("The plistlib.Dict class is deprecated, use builtin dict instead",
Victor Stinnerb5752892011-07-04 14:28:45 +0200113 DeprecationWarning, 2)
Christian Heimes7e182542008-01-27 15:20:13 +0000114 super().__init__(**kwargs)
115
116
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100117@contextlib.contextmanager
118def _maybe_open(pathOrFile, mode):
119 if isinstance(pathOrFile, str):
120 with open(pathOrFile, mode) as fp:
121 yield fp
Christian Heimes7e182542008-01-27 15:20:13 +0000122
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100123 else:
124 yield pathOrFile
125
126
127class Plist(_InternalDict):
128 """This class has been deprecated. Use dump() and load()
Christian Heimes7e182542008-01-27 15:20:13 +0000129 functions instead, together with regular dict objects.
130 """
131
132 def __init__(self, **kwargs):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100133 warn("The Plist class is deprecated, use the load() and "
134 "dump() functions instead", DeprecationWarning, 2)
Christian Heimes7e182542008-01-27 15:20:13 +0000135 super().__init__(**kwargs)
136
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100137 @classmethod
Christian Heimes7e182542008-01-27 15:20:13 +0000138 def fromFile(cls, pathOrFile):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100139 """Deprecated. Use the load() function instead."""
Ned Deilyc06d6fd2013-11-21 20:56:23 -0800140 with _maybe_open(pathOrFile, 'rb') as fp:
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100141 value = load(fp)
Christian Heimes7e182542008-01-27 15:20:13 +0000142 plist = cls()
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100143 plist.update(value)
Christian Heimes7e182542008-01-27 15:20:13 +0000144 return plist
Christian Heimes7e182542008-01-27 15:20:13 +0000145
146 def write(self, pathOrFile):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100147 """Deprecated. Use the dump() function instead."""
148 with _maybe_open(pathOrFile, 'wb') as fp:
149 dump(self, fp)
Christian Heimes7e182542008-01-27 15:20:13 +0000150
151
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100152def readPlist(pathOrFile):
153 """
154 Read a .plist from a path or file. pathOrFile should either
155 be a file name, or a readable binary file object.
156
157 This function is deprecated, use load instead.
158 """
159 warn("The readPlist function is deprecated, use load() instead",
160 DeprecationWarning, 2)
161
162 with _maybe_open(pathOrFile, 'rb') as fp:
163 return load(fp, fmt=None, use_builtin_types=False,
164 dict_type=_InternalDict)
165
166def writePlist(value, pathOrFile):
167 """
168 Write 'value' to a .plist file. 'pathOrFile' may either be a
169 file name or a (writable) file object.
170
171 This function is deprecated, use dump instead.
172 """
173 warn("The writePlist function is deprecated, use dump() instead",
174 DeprecationWarning, 2)
175 with _maybe_open(pathOrFile, 'wb') as fp:
176 dump(value, fp, fmt=FMT_XML, sort_keys=True, skipkeys=False)
177
178
179def readPlistFromBytes(data):
180 """
181 Read a plist data from a bytes object. Return the root object.
182
183 This function is deprecated, use loads instead.
184 """
185 warn("The readPlistFromBytes function is deprecated, use loads() instead",
186 DeprecationWarning, 2)
187 return load(BytesIO(data), fmt=None, use_builtin_types=False,
188 dict_type=_InternalDict)
189
190
191def writePlistToBytes(value):
192 """
193 Return 'value' as a plist-formatted bytes object.
194
195 This function is deprecated, use dumps instead.
196 """
197 warn("The writePlistToBytes function is deprecated, use dumps() instead",
198 DeprecationWarning, 2)
199 f = BytesIO()
200 dump(value, f, fmt=FMT_XML, sort_keys=True, skipkeys=False)
201 return f.getvalue()
202
Christian Heimes7e182542008-01-27 15:20:13 +0000203
204class Data:
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100205 """
206 Wrapper for binary data.
Christian Heimes7e182542008-01-27 15:20:13 +0000207
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100208 This class is deprecated, use a bytes object instead.
209 """
Christian Heimes7e182542008-01-27 15:20:13 +0000210
211 def __init__(self, data):
212 if not isinstance(data, bytes):
213 raise TypeError("data must be as bytes")
214 self.data = data
215
216 @classmethod
217 def fromBase64(cls, data):
Georg Brandl706824f2009-06-04 09:42:55 +0000218 # base64.decodebytes just calls binascii.a2b_base64;
Christian Heimes7e182542008-01-27 15:20:13 +0000219 # it seems overkill to use both base64 and binascii.
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100220 return cls(_decode_base64(data))
Christian Heimes7e182542008-01-27 15:20:13 +0000221
222 def asBase64(self, maxlinelength=76):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100223 return _encode_base64(self.data, maxlinelength)
Christian Heimes7e182542008-01-27 15:20:13 +0000224
225 def __eq__(self, other):
226 if isinstance(other, self.__class__):
227 return self.data == other.data
228 elif isinstance(other, str):
229 return self.data == other
230 else:
231 return id(self) == id(other)
232
233 def __repr__(self):
234 return "%s(%s)" % (self.__class__.__name__, repr(self.data))
235
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100236#
237#
238# End of deprecated functionality
239#
240#
Christian Heimes7e182542008-01-27 15:20:13 +0000241
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100242
243#
244# XML support
245#
246
247
248# XML 'header'
249PLISTHEADER = b"""\
250<?xml version="1.0" encoding="UTF-8"?>
251<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
252"""
253
254
255# Regex to find any control chars, except for \t \n and \r
256_controlCharPat = re.compile(
257 r"[\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0b\x0c\x0e\x0f"
258 r"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f]")
259
260def _encode_base64(s, maxlinelength=76):
261 # copied from base64.encodebytes(), with added maxlinelength argument
262 maxbinsize = (maxlinelength//4)*3
263 pieces = []
264 for i in range(0, len(s), maxbinsize):
265 chunk = s[i : i + maxbinsize]
266 pieces.append(binascii.b2a_base64(chunk))
267 return b''.join(pieces)
268
269def _decode_base64(s):
270 if isinstance(s, str):
271 return binascii.a2b_base64(s.encode("utf-8"))
272
273 else:
274 return binascii.a2b_base64(s)
275
276# Contents should conform to a subset of ISO 8601
277# (in particular, YYYY '-' MM '-' DD 'T' HH ':' MM ':' SS 'Z'. Smaller units
278# may be omitted with # a loss of precision)
279_dateParser = re.compile(r"(?P<year>\d\d\d\d)(?:-(?P<month>\d\d)(?:-(?P<day>\d\d)(?:T(?P<hour>\d\d)(?::(?P<minute>\d\d)(?::(?P<second>\d\d))?)?)?)?)?Z", re.ASCII)
280
281
282def _date_from_string(s):
283 order = ('year', 'month', 'day', 'hour', 'minute', 'second')
284 gd = _dateParser.match(s).groupdict()
285 lst = []
286 for key in order:
287 val = gd[key]
288 if val is None:
289 break
290 lst.append(int(val))
291 return datetime.datetime(*lst)
292
293
294def _date_to_string(d):
295 return '%04d-%02d-%02dT%02d:%02d:%02dZ' % (
296 d.year, d.month, d.day,
297 d.hour, d.minute, d.second
298 )
299
300def _escape(text):
301 m = _controlCharPat.search(text)
302 if m is not None:
303 raise ValueError("strings can't contains control characters; "
304 "use bytes instead")
305 text = text.replace("\r\n", "\n") # convert DOS line endings
306 text = text.replace("\r", "\n") # convert Mac line endings
307 text = text.replace("&", "&amp;") # escape '&'
308 text = text.replace("<", "&lt;") # escape '<'
309 text = text.replace(">", "&gt;") # escape '>'
310 return text
311
312class _PlistParser:
313 def __init__(self, use_builtin_types, dict_type):
Christian Heimes7e182542008-01-27 15:20:13 +0000314 self.stack = []
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100315 self.current_key = None
Christian Heimes7e182542008-01-27 15:20:13 +0000316 self.root = None
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100317 self._use_builtin_types = use_builtin_types
318 self._dict_type = dict_type
Christian Heimes7e182542008-01-27 15:20:13 +0000319
320 def parse(self, fileobj):
Ned Deilyb8e59f72011-05-28 02:19:19 -0700321 self.parser = ParserCreate()
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100322 self.parser.StartElementHandler = self.handle_begin_element
323 self.parser.EndElementHandler = self.handle_end_element
324 self.parser.CharacterDataHandler = self.handle_data
Ned Deilyb8e59f72011-05-28 02:19:19 -0700325 self.parser.ParseFile(fileobj)
Christian Heimes7e182542008-01-27 15:20:13 +0000326 return self.root
327
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100328 def handle_begin_element(self, element, attrs):
Christian Heimes7e182542008-01-27 15:20:13 +0000329 self.data = []
330 handler = getattr(self, "begin_" + element, None)
331 if handler is not None:
332 handler(attrs)
333
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100334 def handle_end_element(self, element):
Christian Heimes7e182542008-01-27 15:20:13 +0000335 handler = getattr(self, "end_" + element, None)
336 if handler is not None:
337 handler()
338
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100339 def handle_data(self, data):
Christian Heimes7e182542008-01-27 15:20:13 +0000340 self.data.append(data)
341
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100342 def add_object(self, value):
343 if self.current_key is not None:
Ned Deilyb8e59f72011-05-28 02:19:19 -0700344 if not isinstance(self.stack[-1], type({})):
345 raise ValueError("unexpected element at line %d" %
346 self.parser.CurrentLineNumber)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100347 self.stack[-1][self.current_key] = value
348 self.current_key = None
Christian Heimes7e182542008-01-27 15:20:13 +0000349 elif not self.stack:
350 # this is the root object
351 self.root = value
352 else:
Ned Deilyb8e59f72011-05-28 02:19:19 -0700353 if not isinstance(self.stack[-1], type([])):
354 raise ValueError("unexpected element at line %d" %
355 self.parser.CurrentLineNumber)
Christian Heimes7e182542008-01-27 15:20:13 +0000356 self.stack[-1].append(value)
357
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100358 def get_data(self):
Christian Heimes7e182542008-01-27 15:20:13 +0000359 data = ''.join(self.data)
360 self.data = []
361 return data
362
363 # element handlers
364
365 def begin_dict(self, attrs):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100366 d = self._dict_type()
367 self.add_object(d)
Christian Heimes7e182542008-01-27 15:20:13 +0000368 self.stack.append(d)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100369
Christian Heimes7e182542008-01-27 15:20:13 +0000370 def end_dict(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100371 if self.current_key:
Ned Deilyb8e59f72011-05-28 02:19:19 -0700372 raise ValueError("missing value for key '%s' at line %d" %
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100373 (self.current_key,self.parser.CurrentLineNumber))
Christian Heimes7e182542008-01-27 15:20:13 +0000374 self.stack.pop()
375
376 def end_key(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100377 if self.current_key or not isinstance(self.stack[-1], type({})):
Ned Deilyb8e59f72011-05-28 02:19:19 -0700378 raise ValueError("unexpected key at line %d" %
379 self.parser.CurrentLineNumber)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100380 self.current_key = self.get_data()
Christian Heimes7e182542008-01-27 15:20:13 +0000381
382 def begin_array(self, attrs):
383 a = []
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100384 self.add_object(a)
Christian Heimes7e182542008-01-27 15:20:13 +0000385 self.stack.append(a)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100386
Christian Heimes7e182542008-01-27 15:20:13 +0000387 def end_array(self):
388 self.stack.pop()
389
390 def end_true(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100391 self.add_object(True)
392
Christian Heimes7e182542008-01-27 15:20:13 +0000393 def end_false(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100394 self.add_object(False)
395
Christian Heimes7e182542008-01-27 15:20:13 +0000396 def end_integer(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100397 self.add_object(int(self.get_data()))
398
Christian Heimes7e182542008-01-27 15:20:13 +0000399 def end_real(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100400 self.add_object(float(self.get_data()))
401
Christian Heimes7e182542008-01-27 15:20:13 +0000402 def end_string(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100403 self.add_object(self.get_data())
404
Christian Heimes7e182542008-01-27 15:20:13 +0000405 def end_data(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100406 if self._use_builtin_types:
407 self.add_object(_decode_base64(self.get_data()))
408
409 else:
410 self.add_object(Data.fromBase64(self.get_data()))
411
Christian Heimes7e182542008-01-27 15:20:13 +0000412 def end_date(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100413 self.add_object(_date_from_string(self.get_data()))
414
415
416class _DumbXMLWriter:
417 def __init__(self, file, indent_level=0, indent="\t"):
418 self.file = file
419 self.stack = []
420 self._indent_level = indent_level
421 self.indent = indent
422
423 def begin_element(self, element):
424 self.stack.append(element)
425 self.writeln("<%s>" % element)
426 self._indent_level += 1
427
428 def end_element(self, element):
429 assert self._indent_level > 0
430 assert self.stack.pop() == element
431 self._indent_level -= 1
432 self.writeln("</%s>" % element)
433
434 def simple_element(self, element, value=None):
435 if value is not None:
436 value = _escape(value)
437 self.writeln("<%s>%s</%s>" % (element, value, element))
438
439 else:
440 self.writeln("<%s/>" % element)
441
442 def writeln(self, line):
443 if line:
444 # plist has fixed encoding of utf-8
445
446 # XXX: is this test needed?
447 if isinstance(line, str):
448 line = line.encode('utf-8')
449 self.file.write(self._indent_level * self.indent)
450 self.file.write(line)
451 self.file.write(b'\n')
452
453
454class _PlistWriter(_DumbXMLWriter):
455 def __init__(
456 self, file, indent_level=0, indent=b"\t", writeHeader=1,
457 sort_keys=True, skipkeys=False):
458
459 if writeHeader:
460 file.write(PLISTHEADER)
461 _DumbXMLWriter.__init__(self, file, indent_level, indent)
462 self._sort_keys = sort_keys
463 self._skipkeys = skipkeys
464
465 def write(self, value):
466 self.writeln("<plist version=\"1.0\">")
467 self.write_value(value)
468 self.writeln("</plist>")
469
470 def write_value(self, value):
471 if isinstance(value, str):
472 self.simple_element("string", value)
473
474 elif value is True:
475 self.simple_element("true")
476
477 elif value is False:
478 self.simple_element("false")
479
480 elif isinstance(value, int):
Ronald Oussoren6db66532014-01-15 11:32:35 +0100481 if -1 << 63 <= value < 1 << 64:
482 self.simple_element("integer", "%d" % value)
483 else:
484 raise OverflowError(value)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100485
486 elif isinstance(value, float):
487 self.simple_element("real", repr(value))
488
489 elif isinstance(value, dict):
490 self.write_dict(value)
491
492 elif isinstance(value, Data):
493 self.write_data(value)
494
495 elif isinstance(value, (bytes, bytearray)):
496 self.write_bytes(value)
497
498 elif isinstance(value, datetime.datetime):
499 self.simple_element("date", _date_to_string(value))
500
501 elif isinstance(value, (tuple, list)):
502 self.write_array(value)
503
504 else:
505 raise TypeError("unsupported type: %s" % type(value))
506
507 def write_data(self, data):
508 self.write_bytes(data.data)
509
510 def write_bytes(self, data):
511 self.begin_element("data")
512 self._indent_level -= 1
513 maxlinelength = max(
514 16,
515 76 - len(self.indent.replace(b"\t", b" " * 8) * self._indent_level))
516
517 for line in _encode_base64(data, maxlinelength).split(b"\n"):
518 if line:
519 self.writeln(line)
520 self._indent_level += 1
521 self.end_element("data")
522
523 def write_dict(self, d):
524 if d:
525 self.begin_element("dict")
526 if self._sort_keys:
527 items = sorted(d.items())
528 else:
529 items = d.items()
530
531 for key, value in items:
532 if not isinstance(key, str):
533 if self._skipkeys:
534 continue
535 raise TypeError("keys must be strings")
536 self.simple_element("key", key)
537 self.write_value(value)
538 self.end_element("dict")
539
540 else:
541 self.simple_element("dict")
542
543 def write_array(self, array):
544 if array:
545 self.begin_element("array")
546 for value in array:
547 self.write_value(value)
548 self.end_element("array")
549
550 else:
551 self.simple_element("array")
552
553
554def _is_fmt_xml(header):
555 prefixes = (b'<?xml', b'<plist')
556
557 for pfx in prefixes:
558 if header.startswith(pfx):
559 return True
560
561 # Also check for alternative XML encodings, this is slightly
562 # overkill because the Apple tools (and plistlib) will not
563 # generate files with these encodings.
564 for bom, encoding in (
565 (codecs.BOM_UTF8, "utf-8"),
566 (codecs.BOM_UTF16_BE, "utf-16-be"),
567 (codecs.BOM_UTF16_LE, "utf-16-le"),
568 # expat does not support utf-32
569 #(codecs.BOM_UTF32_BE, "utf-32-be"),
570 #(codecs.BOM_UTF32_LE, "utf-32-le"),
571 ):
572 if not header.startswith(bom):
573 continue
574
575 for start in prefixes:
576 prefix = bom + start.decode('ascii').encode(encoding)
577 if header[:len(prefix)] == prefix:
578 return True
579
580 return False
581
582#
583# Binary Plist
584#
585
586
587class InvalidFileException (ValueError):
588 def __init__(self, message="Invalid file"):
589 ValueError.__init__(self, message)
590
591_BINARY_FORMAT = {1: 'B', 2: 'H', 4: 'L', 8: 'Q'}
592
593class _BinaryPlistParser:
594 """
595 Read or write a binary plist file, following the description of the binary
596 format. Raise InvalidFileException in case of error, otherwise return the
597 root object.
598
599 see also: http://opensource.apple.com/source/CF/CF-744.18/CFBinaryPList.c
600 """
601 def __init__(self, use_builtin_types, dict_type):
602 self._use_builtin_types = use_builtin_types
603 self._dict_type = dict_type
604
605 def parse(self, fp):
606 try:
607 # The basic file format:
608 # HEADER
609 # object...
610 # refid->offset...
611 # TRAILER
612 self._fp = fp
613 self._fp.seek(-32, os.SEEK_END)
614 trailer = self._fp.read(32)
615 if len(trailer) != 32:
616 raise InvalidFileException()
617 (
618 offset_size, self._ref_size, num_objects, top_object,
619 offset_table_offset
620 ) = struct.unpack('>6xBBQQQ', trailer)
621 self._fp.seek(offset_table_offset)
Serhiy Storchaka06526642014-05-23 16:13:33 +0300622 self._object_offsets = self._read_ints(num_objects, offset_size)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100623 return self._read_object(self._object_offsets[top_object])
624
625 except (OSError, IndexError, struct.error):
626 raise InvalidFileException()
627
628 def _get_size(self, tokenL):
629 """ return the size of the next object."""
630 if tokenL == 0xF:
631 m = self._fp.read(1)[0] & 0x3
632 s = 1 << m
633 f = '>' + _BINARY_FORMAT[s]
634 return struct.unpack(f, self._fp.read(s))[0]
635
636 return tokenL
637
Serhiy Storchaka06526642014-05-23 16:13:33 +0300638 def _read_ints(self, n, size):
639 data = self._fp.read(size * n)
640 if size in _BINARY_FORMAT:
641 return struct.unpack('>' + _BINARY_FORMAT[size] * n, data)
642 else:
643 return tuple(int.from_bytes(data[i: i + size], 'big')
644 for i in range(0, size * n, size))
645
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100646 def _read_refs(self, n):
Serhiy Storchaka06526642014-05-23 16:13:33 +0300647 return self._read_ints(n, self._ref_size)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100648
649 def _read_object(self, offset):
650 """
651 read the object at offset.
652
653 May recursively read sub-objects (content of an array/dict/set)
654 """
655 self._fp.seek(offset)
656 token = self._fp.read(1)[0]
657 tokenH, tokenL = token & 0xF0, token & 0x0F
658
659 if token == 0x00:
660 return None
661
662 elif token == 0x08:
663 return False
664
665 elif token == 0x09:
666 return True
667
668 # The referenced source code also mentions URL (0x0c, 0x0d) and
669 # UUID (0x0e), but neither can be generated using the Cocoa libraries.
670
671 elif token == 0x0f:
672 return b''
673
674 elif tokenH == 0x10: # int
Ronald Oussoren6db66532014-01-15 11:32:35 +0100675 return int.from_bytes(self._fp.read(1 << tokenL),
676 'big', signed=tokenL >= 3)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100677
678 elif token == 0x22: # real
679 return struct.unpack('>f', self._fp.read(4))[0]
680
681 elif token == 0x23: # real
682 return struct.unpack('>d', self._fp.read(8))[0]
683
684 elif token == 0x33: # date
685 f = struct.unpack('>d', self._fp.read(8))[0]
686 # timestamp 0 of binary plists corresponds to 1/1/2001
687 # (year of Mac OS X 10.0), instead of 1/1/1970.
688 return datetime.datetime.utcfromtimestamp(f + (31 * 365 + 8) * 86400)
689
690 elif tokenH == 0x40: # data
691 s = self._get_size(tokenL)
692 if self._use_builtin_types:
693 return self._fp.read(s)
694 else:
695 return Data(self._fp.read(s))
696
697 elif tokenH == 0x50: # ascii string
698 s = self._get_size(tokenL)
699 result = self._fp.read(s).decode('ascii')
700 return result
701
702 elif tokenH == 0x60: # unicode string
703 s = self._get_size(tokenL)
704 return self._fp.read(s * 2).decode('utf-16be')
705
706 # tokenH == 0x80 is documented as 'UID' and appears to be used for
707 # keyed-archiving, not in plists.
708
709 elif tokenH == 0xA0: # array
710 s = self._get_size(tokenL)
711 obj_refs = self._read_refs(s)
712 return [self._read_object(self._object_offsets[x])
713 for x in obj_refs]
714
715 # tokenH == 0xB0 is documented as 'ordset', but is not actually
716 # implemented in the Apple reference code.
717
718 # tokenH == 0xC0 is documented as 'set', but sets cannot be used in
719 # plists.
720
721 elif tokenH == 0xD0: # dict
722 s = self._get_size(tokenL)
723 key_refs = self._read_refs(s)
724 obj_refs = self._read_refs(s)
725 result = self._dict_type()
726 for k, o in zip(key_refs, obj_refs):
727 result[self._read_object(self._object_offsets[k])
728 ] = self._read_object(self._object_offsets[o])
729 return result
730
731 raise InvalidFileException()
732
733def _count_to_size(count):
734 if count < 1 << 8:
735 return 1
736
737 elif count < 1 << 16:
738 return 2
739
740 elif count << 1 << 32:
741 return 4
742
743 else:
744 return 8
745
746class _BinaryPlistWriter (object):
747 def __init__(self, fp, sort_keys, skipkeys):
748 self._fp = fp
749 self._sort_keys = sort_keys
750 self._skipkeys = skipkeys
751
752 def write(self, value):
753
754 # Flattened object list:
755 self._objlist = []
756
757 # Mappings from object->objectid
758 # First dict has (type(object), object) as the key,
759 # second dict is used when object is not hashable and
760 # has id(object) as the key.
761 self._objtable = {}
762 self._objidtable = {}
763
764 # Create list of all objects in the plist
765 self._flatten(value)
766
767 # Size of object references in serialized containers
768 # depends on the number of objects in the plist.
769 num_objects = len(self._objlist)
770 self._object_offsets = [0]*num_objects
771 self._ref_size = _count_to_size(num_objects)
772
773 self._ref_format = _BINARY_FORMAT[self._ref_size]
774
775 # Write file header
776 self._fp.write(b'bplist00')
777
778 # Write object list
779 for obj in self._objlist:
780 self._write_object(obj)
781
782 # Write refnum->object offset table
783 top_object = self._getrefnum(value)
784 offset_table_offset = self._fp.tell()
785 offset_size = _count_to_size(offset_table_offset)
786 offset_format = '>' + _BINARY_FORMAT[offset_size] * num_objects
787 self._fp.write(struct.pack(offset_format, *self._object_offsets))
788
789 # Write trailer
790 sort_version = 0
791 trailer = (
792 sort_version, offset_size, self._ref_size, num_objects,
793 top_object, offset_table_offset
794 )
795 self._fp.write(struct.pack('>5xBBBQQQ', *trailer))
796
797 def _flatten(self, value):
798 # First check if the object is in the object table, not used for
799 # containers to ensure that two subcontainers with the same contents
800 # will be serialized as distinct values.
801 if isinstance(value, (
802 str, int, float, datetime.datetime, bytes, bytearray)):
803 if (type(value), value) in self._objtable:
804 return
805
806 elif isinstance(value, Data):
807 if (type(value.data), value.data) in self._objtable:
808 return
809
810 # Add to objectreference map
811 refnum = len(self._objlist)
812 self._objlist.append(value)
813 try:
814 if isinstance(value, Data):
815 self._objtable[(type(value.data), value.data)] = refnum
816 else:
817 self._objtable[(type(value), value)] = refnum
818 except TypeError:
819 self._objidtable[id(value)] = refnum
820
821 # And finally recurse into containers
822 if isinstance(value, dict):
823 keys = []
824 values = []
825 items = value.items()
826 if self._sort_keys:
827 items = sorted(items)
828
829 for k, v in items:
830 if not isinstance(k, str):
831 if self._skipkeys:
832 continue
833 raise TypeError("keys must be strings")
834 keys.append(k)
835 values.append(v)
836
837 for o in itertools.chain(keys, values):
838 self._flatten(o)
839
840 elif isinstance(value, (list, tuple)):
841 for o in value:
842 self._flatten(o)
843
844 def _getrefnum(self, value):
845 try:
846 if isinstance(value, Data):
847 return self._objtable[(type(value.data), value.data)]
848 else:
849 return self._objtable[(type(value), value)]
850 except TypeError:
851 return self._objidtable[id(value)]
852
853 def _write_size(self, token, size):
854 if size < 15:
855 self._fp.write(struct.pack('>B', token | size))
856
857 elif size < 1 << 8:
858 self._fp.write(struct.pack('>BBB', token | 0xF, 0x10, size))
859
860 elif size < 1 << 16:
861 self._fp.write(struct.pack('>BBH', token | 0xF, 0x11, size))
862
863 elif size < 1 << 32:
864 self._fp.write(struct.pack('>BBL', token | 0xF, 0x12, size))
865
866 else:
867 self._fp.write(struct.pack('>BBQ', token | 0xF, 0x13, size))
868
869 def _write_object(self, value):
870 ref = self._getrefnum(value)
871 self._object_offsets[ref] = self._fp.tell()
872 if value is None:
873 self._fp.write(b'\x00')
874
875 elif value is False:
876 self._fp.write(b'\x08')
877
878 elif value is True:
879 self._fp.write(b'\x09')
880
881 elif isinstance(value, int):
Ronald Oussoren6db66532014-01-15 11:32:35 +0100882 if value < 0:
883 try:
884 self._fp.write(struct.pack('>Bq', 0x13, value))
885 except struct.error:
Ronald Oussoren94e44a92014-02-06 11:19:18 +0100886 raise OverflowError(value) from None
Ronald Oussoren6db66532014-01-15 11:32:35 +0100887 elif value < 1 << 8:
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100888 self._fp.write(struct.pack('>BB', 0x10, value))
889 elif value < 1 << 16:
890 self._fp.write(struct.pack('>BH', 0x11, value))
891 elif value < 1 << 32:
892 self._fp.write(struct.pack('>BL', 0x12, value))
Ronald Oussoren94e44a92014-02-06 11:19:18 +0100893 elif value < 1 << 63:
894 self._fp.write(struct.pack('>BQ', 0x13, value))
895 elif value < 1 << 64:
896 self._fp.write(b'\x14' + value.to_bytes(16, 'big', signed=True))
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100897 else:
Ronald Oussoren94e44a92014-02-06 11:19:18 +0100898 raise OverflowError(value)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100899
900 elif isinstance(value, float):
901 self._fp.write(struct.pack('>Bd', 0x23, value))
902
903 elif isinstance(value, datetime.datetime):
904 f = (value - datetime.datetime(2001, 1, 1)).total_seconds()
905 self._fp.write(struct.pack('>Bd', 0x33, f))
906
907 elif isinstance(value, Data):
908 self._write_size(0x40, len(value.data))
909 self._fp.write(value.data)
910
911 elif isinstance(value, (bytes, bytearray)):
912 self._write_size(0x40, len(value))
913 self._fp.write(value)
914
915 elif isinstance(value, str):
916 try:
917 t = value.encode('ascii')
918 self._write_size(0x50, len(value))
919 except UnicodeEncodeError:
920 t = value.encode('utf-16be')
921 self._write_size(0x60, len(value))
922
923 self._fp.write(t)
924
925 elif isinstance(value, (list, tuple)):
926 refs = [self._getrefnum(o) for o in value]
927 s = len(refs)
928 self._write_size(0xA0, s)
929 self._fp.write(struct.pack('>' + self._ref_format * s, *refs))
930
931 elif isinstance(value, dict):
932 keyRefs, valRefs = [], []
933
934 if self._sort_keys:
935 rootItems = sorted(value.items())
936 else:
937 rootItems = value.items()
938
939 for k, v in rootItems:
940 if not isinstance(k, str):
941 if self._skipkeys:
942 continue
943 raise TypeError("keys must be strings")
944 keyRefs.append(self._getrefnum(k))
945 valRefs.append(self._getrefnum(v))
946
947 s = len(keyRefs)
948 self._write_size(0xD0, s)
949 self._fp.write(struct.pack('>' + self._ref_format * s, *keyRefs))
950 self._fp.write(struct.pack('>' + self._ref_format * s, *valRefs))
951
952 else:
Ronald Oussoren6db66532014-01-15 11:32:35 +0100953 raise TypeError(value)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100954
955
956def _is_fmt_binary(header):
957 return header[:8] == b'bplist00'
958
959
960#
961# Generic bits
962#
963
964_FORMATS={
965 FMT_XML: dict(
966 detect=_is_fmt_xml,
967 parser=_PlistParser,
968 writer=_PlistWriter,
969 ),
970 FMT_BINARY: dict(
971 detect=_is_fmt_binary,
972 parser=_BinaryPlistParser,
973 writer=_BinaryPlistWriter,
974 )
975}
976
977
978def load(fp, *, fmt=None, use_builtin_types=True, dict_type=dict):
979 """Read a .plist file. 'fp' should be (readable) file object.
980 Return the unpacked root object (which usually is a dictionary).
981 """
982 if fmt is None:
983 header = fp.read(32)
984 fp.seek(0)
985 for info in _FORMATS.values():
986 if info['detect'](header):
Serhiy Storchaka89667592014-07-23 18:49:31 +0300987 P = info['parser']
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100988 break
989
990 else:
991 raise InvalidFileException()
992
993 else:
Serhiy Storchaka89667592014-07-23 18:49:31 +0300994 P = _FORMATS[fmt]['parser']
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100995
Serhiy Storchaka89667592014-07-23 18:49:31 +0300996 p = P(use_builtin_types=use_builtin_types, dict_type=dict_type)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100997 return p.parse(fp)
998
999
1000def loads(value, *, fmt=None, use_builtin_types=True, dict_type=dict):
1001 """Read a .plist file from a bytes object.
1002 Return the unpacked root object (which usually is a dictionary).
1003 """
1004 fp = BytesIO(value)
1005 return load(
1006 fp, fmt=fmt, use_builtin_types=use_builtin_types, dict_type=dict_type)
1007
1008
1009def dump(value, fp, *, fmt=FMT_XML, sort_keys=True, skipkeys=False):
1010 """Write 'value' to a .plist file. 'fp' should be a (writable)
1011 file object.
1012 """
1013 if fmt not in _FORMATS:
1014 raise ValueError("Unsupported format: %r"%(fmt,))
1015
1016 writer = _FORMATS[fmt]["writer"](fp, sort_keys=sort_keys, skipkeys=skipkeys)
1017 writer.write(value)
1018
1019
1020def dumps(value, *, fmt=FMT_XML, skipkeys=False, sort_keys=True):
1021 """Return a bytes object with the contents for a .plist file.
1022 """
1023 fp = BytesIO()
1024 dump(value, fp, fmt=fmt, skipkeys=skipkeys, sort_keys=sort_keys)
1025 return fp.getvalue()