blob: dcb0f9ce964419b021b48b6e14abde344e606627 [file] [log] [blame]
Benjamin Petersonef3e4c22009-04-11 19:48:14 +00001r"""plistlib.py -- a tool to generate and parse MacOSX .plist files.
Christian Heimes7e182542008-01-27 15:20:13 +00002
Ezio Melotti6e9b1df2009-09-16 00:49:03 +00003The property list (.plist) file format is a simple XML pickle supporting
Christian Heimes7e182542008-01-27 15:20:13 +00004basic object types, like dictionaries, lists, numbers and strings.
5Usually the top level object is a dictionary.
6
Ronald Oussorenc5cf7972013-11-21 15:46:49 +01007To write out a plist file, use the dump(value, file)
8function. 'value' is the top level object, 'file' is
9a (writable) file object.
Christian Heimes7e182542008-01-27 15:20:13 +000010
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010011To parse a plist from a file, use the load(file) function,
12with a (readable) file object as the only argument. It
Christian Heimes7e182542008-01-27 15:20:13 +000013returns the top level object (again, usually a dictionary).
14
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010015To work with plist data in bytes objects, you can use loads()
16and dumps().
Christian Heimes7e182542008-01-27 15:20:13 +000017
18Values can be strings, integers, floats, booleans, tuples, lists,
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010019dictionaries (but only with string keys), Data, bytes, bytearray, or
20datetime.datetime objects.
Christian Heimes7e182542008-01-27 15:20:13 +000021
22Generate Plist example:
23
24 pl = dict(
Ezio Melotti6e9b1df2009-09-16 00:49:03 +000025 aString = "Doodah",
26 aList = ["A", "B", 12, 32.1, [1, 2, 3]],
Christian Heimes7e182542008-01-27 15:20:13 +000027 aFloat = 0.1,
28 anInt = 728,
Ezio Melotti6e9b1df2009-09-16 00:49:03 +000029 aDict = dict(
30 anotherString = "<hello & hi there!>",
31 aUnicodeValue = "M\xe4ssig, Ma\xdf",
32 aTrueValue = True,
33 aFalseValue = False,
Christian Heimes7e182542008-01-27 15:20:13 +000034 ),
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010035 someData = b"<binary gunk>",
36 someMoreData = b"<lots of binary gunk>" * 10,
Christian Heimes7e182542008-01-27 15:20:13 +000037 aDate = datetime.datetime.fromtimestamp(time.mktime(time.gmtime())),
38 )
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010039 with open(fileName, 'wb') as fp:
40 dump(pl, fp)
Christian Heimes7e182542008-01-27 15:20:13 +000041
42Parse Plist example:
43
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010044 with open(fileName, 'rb') as fp:
45 pl = load(fp)
46 print(pl["aKey"])
Christian Heimes7e182542008-01-27 15:20:13 +000047"""
Christian Heimes7e182542008-01-27 15:20:13 +000048__all__ = [
49 "readPlist", "writePlist", "readPlistFromBytes", "writePlistToBytes",
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010050 "Plist", "Data", "Dict", "FMT_XML", "FMT_BINARY",
51 "load", "dump", "loads", "dumps"
Christian Heimes7e182542008-01-27 15:20:13 +000052]
Christian Heimes7e182542008-01-27 15:20:13 +000053
54import binascii
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010055import codecs
56import contextlib
Christian Heimes7e182542008-01-27 15:20:13 +000057import datetime
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010058import enum
Christian Heimes7e182542008-01-27 15:20:13 +000059from io import BytesIO
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010060import itertools
61import os
Christian Heimes7e182542008-01-27 15:20:13 +000062import re
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010063import struct
64from warnings import warn
65from xml.parsers.expat import ParserCreate
Christian Heimes7e182542008-01-27 15:20:13 +000066
67
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010068PlistFormat = enum.Enum('PlistFormat', 'FMT_XML FMT_BINARY', module=__name__)
69globals().update(PlistFormat.__members__)
Christian Heimes7e182542008-01-27 15:20:13 +000070
71
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010072#
73#
74# Deprecated functionality
75#
76#
Christian Heimes7e182542008-01-27 15:20:13 +000077
78
79class _InternalDict(dict):
80
81 # This class is needed while Dict is scheduled for deprecation:
82 # we only need to warn when a *user* instantiates Dict or when
83 # the "attribute notation for dict keys" is used.
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010084 __slots__ = ()
Christian Heimes7e182542008-01-27 15:20:13 +000085
86 def __getattr__(self, attr):
87 try:
88 value = self[attr]
89 except KeyError:
90 raise AttributeError(attr)
Christian Heimes7e182542008-01-27 15:20:13 +000091 warn("Attribute access from plist dicts is deprecated, use d[key] "
Victor Stinnerb5752892011-07-04 14:28:45 +020092 "notation instead", DeprecationWarning, 2)
Christian Heimes7e182542008-01-27 15:20:13 +000093 return value
94
95 def __setattr__(self, attr, value):
Christian Heimes7e182542008-01-27 15:20:13 +000096 warn("Attribute access from plist dicts is deprecated, use d[key] "
Victor Stinnerb5752892011-07-04 14:28:45 +020097 "notation instead", DeprecationWarning, 2)
Christian Heimes7e182542008-01-27 15:20:13 +000098 self[attr] = value
99
100 def __delattr__(self, attr):
101 try:
102 del self[attr]
103 except KeyError:
104 raise AttributeError(attr)
Christian Heimes7e182542008-01-27 15:20:13 +0000105 warn("Attribute access from plist dicts is deprecated, use d[key] "
Victor Stinnerb5752892011-07-04 14:28:45 +0200106 "notation instead", DeprecationWarning, 2)
Christian Heimes7e182542008-01-27 15:20:13 +0000107
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100108
Christian Heimes7e182542008-01-27 15:20:13 +0000109class Dict(_InternalDict):
110
111 def __init__(self, **kwargs):
Christian Heimes7e182542008-01-27 15:20:13 +0000112 warn("The plistlib.Dict class is deprecated, use builtin dict instead",
Victor Stinnerb5752892011-07-04 14:28:45 +0200113 DeprecationWarning, 2)
Christian Heimes7e182542008-01-27 15:20:13 +0000114 super().__init__(**kwargs)
115
116
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100117@contextlib.contextmanager
118def _maybe_open(pathOrFile, mode):
119 if isinstance(pathOrFile, str):
120 with open(pathOrFile, mode) as fp:
121 yield fp
Christian Heimes7e182542008-01-27 15:20:13 +0000122
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100123 else:
124 yield pathOrFile
125
126
127class Plist(_InternalDict):
128 """This class has been deprecated. Use dump() and load()
Christian Heimes7e182542008-01-27 15:20:13 +0000129 functions instead, together with regular dict objects.
130 """
131
132 def __init__(self, **kwargs):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100133 warn("The Plist class is deprecated, use the load() and "
134 "dump() functions instead", DeprecationWarning, 2)
Christian Heimes7e182542008-01-27 15:20:13 +0000135 super().__init__(**kwargs)
136
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100137 @classmethod
Christian Heimes7e182542008-01-27 15:20:13 +0000138 def fromFile(cls, pathOrFile):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100139 """Deprecated. Use the load() function instead."""
Ned Deilyc06d6fd2013-11-21 20:56:23 -0800140 with _maybe_open(pathOrFile, 'rb') as fp:
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100141 value = load(fp)
Christian Heimes7e182542008-01-27 15:20:13 +0000142 plist = cls()
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100143 plist.update(value)
Christian Heimes7e182542008-01-27 15:20:13 +0000144 return plist
Christian Heimes7e182542008-01-27 15:20:13 +0000145
146 def write(self, pathOrFile):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100147 """Deprecated. Use the dump() function instead."""
148 with _maybe_open(pathOrFile, 'wb') as fp:
149 dump(self, fp)
Christian Heimes7e182542008-01-27 15:20:13 +0000150
151
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100152def readPlist(pathOrFile):
153 """
154 Read a .plist from a path or file. pathOrFile should either
155 be a file name, or a readable binary file object.
156
157 This function is deprecated, use load instead.
158 """
159 warn("The readPlist function is deprecated, use load() instead",
160 DeprecationWarning, 2)
161
162 with _maybe_open(pathOrFile, 'rb') as fp:
163 return load(fp, fmt=None, use_builtin_types=False,
164 dict_type=_InternalDict)
165
166def writePlist(value, pathOrFile):
167 """
168 Write 'value' to a .plist file. 'pathOrFile' may either be a
169 file name or a (writable) file object.
170
171 This function is deprecated, use dump instead.
172 """
173 warn("The writePlist function is deprecated, use dump() instead",
174 DeprecationWarning, 2)
175 with _maybe_open(pathOrFile, 'wb') as fp:
176 dump(value, fp, fmt=FMT_XML, sort_keys=True, skipkeys=False)
177
178
179def readPlistFromBytes(data):
180 """
181 Read a plist data from a bytes object. Return the root object.
182
183 This function is deprecated, use loads instead.
184 """
185 warn("The readPlistFromBytes function is deprecated, use loads() instead",
186 DeprecationWarning, 2)
187 return load(BytesIO(data), fmt=None, use_builtin_types=False,
188 dict_type=_InternalDict)
189
190
191def writePlistToBytes(value):
192 """
193 Return 'value' as a plist-formatted bytes object.
194
195 This function is deprecated, use dumps instead.
196 """
197 warn("The writePlistToBytes function is deprecated, use dumps() instead",
198 DeprecationWarning, 2)
199 f = BytesIO()
200 dump(value, f, fmt=FMT_XML, sort_keys=True, skipkeys=False)
201 return f.getvalue()
202
Christian Heimes7e182542008-01-27 15:20:13 +0000203
204class Data:
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100205 """
206 Wrapper for binary data.
Christian Heimes7e182542008-01-27 15:20:13 +0000207
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100208 This class is deprecated, use a bytes object instead.
209 """
Christian Heimes7e182542008-01-27 15:20:13 +0000210
211 def __init__(self, data):
212 if not isinstance(data, bytes):
213 raise TypeError("data must be as bytes")
214 self.data = data
215
216 @classmethod
217 def fromBase64(cls, data):
Georg Brandl706824f2009-06-04 09:42:55 +0000218 # base64.decodebytes just calls binascii.a2b_base64;
Christian Heimes7e182542008-01-27 15:20:13 +0000219 # it seems overkill to use both base64 and binascii.
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100220 return cls(_decode_base64(data))
Christian Heimes7e182542008-01-27 15:20:13 +0000221
222 def asBase64(self, maxlinelength=76):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100223 return _encode_base64(self.data, maxlinelength)
Christian Heimes7e182542008-01-27 15:20:13 +0000224
225 def __eq__(self, other):
226 if isinstance(other, self.__class__):
227 return self.data == other.data
228 elif isinstance(other, str):
229 return self.data == other
230 else:
231 return id(self) == id(other)
232
233 def __repr__(self):
234 return "%s(%s)" % (self.__class__.__name__, repr(self.data))
235
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100236#
237#
238# End of deprecated functionality
239#
240#
Christian Heimes7e182542008-01-27 15:20:13 +0000241
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100242
243#
244# XML support
245#
246
247
248# XML 'header'
249PLISTHEADER = b"""\
250<?xml version="1.0" encoding="UTF-8"?>
251<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
252"""
253
254
255# Regex to find any control chars, except for \t \n and \r
256_controlCharPat = re.compile(
257 r"[\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0b\x0c\x0e\x0f"
258 r"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f]")
259
260def _encode_base64(s, maxlinelength=76):
261 # copied from base64.encodebytes(), with added maxlinelength argument
262 maxbinsize = (maxlinelength//4)*3
263 pieces = []
264 for i in range(0, len(s), maxbinsize):
265 chunk = s[i : i + maxbinsize]
266 pieces.append(binascii.b2a_base64(chunk))
267 return b''.join(pieces)
268
269def _decode_base64(s):
270 if isinstance(s, str):
271 return binascii.a2b_base64(s.encode("utf-8"))
272
273 else:
274 return binascii.a2b_base64(s)
275
276# Contents should conform to a subset of ISO 8601
277# (in particular, YYYY '-' MM '-' DD 'T' HH ':' MM ':' SS 'Z'. Smaller units
278# may be omitted with # a loss of precision)
279_dateParser = re.compile(r"(?P<year>\d\d\d\d)(?:-(?P<month>\d\d)(?:-(?P<day>\d\d)(?:T(?P<hour>\d\d)(?::(?P<minute>\d\d)(?::(?P<second>\d\d))?)?)?)?)?Z", re.ASCII)
280
281
282def _date_from_string(s):
283 order = ('year', 'month', 'day', 'hour', 'minute', 'second')
284 gd = _dateParser.match(s).groupdict()
285 lst = []
286 for key in order:
287 val = gd[key]
288 if val is None:
289 break
290 lst.append(int(val))
291 return datetime.datetime(*lst)
292
293
294def _date_to_string(d):
295 return '%04d-%02d-%02dT%02d:%02d:%02dZ' % (
296 d.year, d.month, d.day,
297 d.hour, d.minute, d.second
298 )
299
300def _escape(text):
301 m = _controlCharPat.search(text)
302 if m is not None:
303 raise ValueError("strings can't contains control characters; "
304 "use bytes instead")
305 text = text.replace("\r\n", "\n") # convert DOS line endings
306 text = text.replace("\r", "\n") # convert Mac line endings
307 text = text.replace("&", "&amp;") # escape '&'
308 text = text.replace("<", "&lt;") # escape '<'
309 text = text.replace(">", "&gt;") # escape '>'
310 return text
311
312class _PlistParser:
313 def __init__(self, use_builtin_types, dict_type):
Christian Heimes7e182542008-01-27 15:20:13 +0000314 self.stack = []
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100315 self.current_key = None
Christian Heimes7e182542008-01-27 15:20:13 +0000316 self.root = None
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100317 self._use_builtin_types = use_builtin_types
318 self._dict_type = dict_type
Christian Heimes7e182542008-01-27 15:20:13 +0000319
320 def parse(self, fileobj):
Ned Deilyb8e59f72011-05-28 02:19:19 -0700321 self.parser = ParserCreate()
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100322 self.parser.StartElementHandler = self.handle_begin_element
323 self.parser.EndElementHandler = self.handle_end_element
324 self.parser.CharacterDataHandler = self.handle_data
Ned Deilyb8e59f72011-05-28 02:19:19 -0700325 self.parser.ParseFile(fileobj)
Christian Heimes7e182542008-01-27 15:20:13 +0000326 return self.root
327
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100328 def handle_begin_element(self, element, attrs):
Christian Heimes7e182542008-01-27 15:20:13 +0000329 self.data = []
330 handler = getattr(self, "begin_" + element, None)
331 if handler is not None:
332 handler(attrs)
333
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100334 def handle_end_element(self, element):
Christian Heimes7e182542008-01-27 15:20:13 +0000335 handler = getattr(self, "end_" + element, None)
336 if handler is not None:
337 handler()
338
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100339 def handle_data(self, data):
Christian Heimes7e182542008-01-27 15:20:13 +0000340 self.data.append(data)
341
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100342 def add_object(self, value):
343 if self.current_key is not None:
Ned Deilyb8e59f72011-05-28 02:19:19 -0700344 if not isinstance(self.stack[-1], type({})):
345 raise ValueError("unexpected element at line %d" %
346 self.parser.CurrentLineNumber)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100347 self.stack[-1][self.current_key] = value
348 self.current_key = None
Christian Heimes7e182542008-01-27 15:20:13 +0000349 elif not self.stack:
350 # this is the root object
351 self.root = value
352 else:
Ned Deilyb8e59f72011-05-28 02:19:19 -0700353 if not isinstance(self.stack[-1], type([])):
354 raise ValueError("unexpected element at line %d" %
355 self.parser.CurrentLineNumber)
Christian Heimes7e182542008-01-27 15:20:13 +0000356 self.stack[-1].append(value)
357
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100358 def get_data(self):
Christian Heimes7e182542008-01-27 15:20:13 +0000359 data = ''.join(self.data)
360 self.data = []
361 return data
362
363 # element handlers
364
365 def begin_dict(self, attrs):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100366 d = self._dict_type()
367 self.add_object(d)
Christian Heimes7e182542008-01-27 15:20:13 +0000368 self.stack.append(d)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100369
Christian Heimes7e182542008-01-27 15:20:13 +0000370 def end_dict(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100371 if self.current_key:
Ned Deilyb8e59f72011-05-28 02:19:19 -0700372 raise ValueError("missing value for key '%s' at line %d" %
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100373 (self.current_key,self.parser.CurrentLineNumber))
Christian Heimes7e182542008-01-27 15:20:13 +0000374 self.stack.pop()
375
376 def end_key(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100377 if self.current_key or not isinstance(self.stack[-1], type({})):
Ned Deilyb8e59f72011-05-28 02:19:19 -0700378 raise ValueError("unexpected key at line %d" %
379 self.parser.CurrentLineNumber)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100380 self.current_key = self.get_data()
Christian Heimes7e182542008-01-27 15:20:13 +0000381
382 def begin_array(self, attrs):
383 a = []
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100384 self.add_object(a)
Christian Heimes7e182542008-01-27 15:20:13 +0000385 self.stack.append(a)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100386
Christian Heimes7e182542008-01-27 15:20:13 +0000387 def end_array(self):
388 self.stack.pop()
389
390 def end_true(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100391 self.add_object(True)
392
Christian Heimes7e182542008-01-27 15:20:13 +0000393 def end_false(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100394 self.add_object(False)
395
Christian Heimes7e182542008-01-27 15:20:13 +0000396 def end_integer(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100397 self.add_object(int(self.get_data()))
398
Christian Heimes7e182542008-01-27 15:20:13 +0000399 def end_real(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100400 self.add_object(float(self.get_data()))
401
Christian Heimes7e182542008-01-27 15:20:13 +0000402 def end_string(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100403 self.add_object(self.get_data())
404
Christian Heimes7e182542008-01-27 15:20:13 +0000405 def end_data(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100406 if self._use_builtin_types:
407 self.add_object(_decode_base64(self.get_data()))
408
409 else:
410 self.add_object(Data.fromBase64(self.get_data()))
411
Christian Heimes7e182542008-01-27 15:20:13 +0000412 def end_date(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100413 self.add_object(_date_from_string(self.get_data()))
414
415
416class _DumbXMLWriter:
417 def __init__(self, file, indent_level=0, indent="\t"):
418 self.file = file
419 self.stack = []
420 self._indent_level = indent_level
421 self.indent = indent
422
423 def begin_element(self, element):
424 self.stack.append(element)
425 self.writeln("<%s>" % element)
426 self._indent_level += 1
427
428 def end_element(self, element):
429 assert self._indent_level > 0
430 assert self.stack.pop() == element
431 self._indent_level -= 1
432 self.writeln("</%s>" % element)
433
434 def simple_element(self, element, value=None):
435 if value is not None:
436 value = _escape(value)
437 self.writeln("<%s>%s</%s>" % (element, value, element))
438
439 else:
440 self.writeln("<%s/>" % element)
441
442 def writeln(self, line):
443 if line:
444 # plist has fixed encoding of utf-8
445
446 # XXX: is this test needed?
447 if isinstance(line, str):
448 line = line.encode('utf-8')
449 self.file.write(self._indent_level * self.indent)
450 self.file.write(line)
451 self.file.write(b'\n')
452
453
454class _PlistWriter(_DumbXMLWriter):
455 def __init__(
456 self, file, indent_level=0, indent=b"\t", writeHeader=1,
457 sort_keys=True, skipkeys=False):
458
459 if writeHeader:
460 file.write(PLISTHEADER)
461 _DumbXMLWriter.__init__(self, file, indent_level, indent)
462 self._sort_keys = sort_keys
463 self._skipkeys = skipkeys
464
465 def write(self, value):
466 self.writeln("<plist version=\"1.0\">")
467 self.write_value(value)
468 self.writeln("</plist>")
469
470 def write_value(self, value):
471 if isinstance(value, str):
472 self.simple_element("string", value)
473
474 elif value is True:
475 self.simple_element("true")
476
477 elif value is False:
478 self.simple_element("false")
479
480 elif isinstance(value, int):
Ronald Oussoren6db66532014-01-15 11:32:35 +0100481 if -1 << 63 <= value < 1 << 64:
482 self.simple_element("integer", "%d" % value)
483 else:
484 raise OverflowError(value)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100485
486 elif isinstance(value, float):
487 self.simple_element("real", repr(value))
488
489 elif isinstance(value, dict):
490 self.write_dict(value)
491
492 elif isinstance(value, Data):
493 self.write_data(value)
494
495 elif isinstance(value, (bytes, bytearray)):
496 self.write_bytes(value)
497
498 elif isinstance(value, datetime.datetime):
499 self.simple_element("date", _date_to_string(value))
500
501 elif isinstance(value, (tuple, list)):
502 self.write_array(value)
503
504 else:
505 raise TypeError("unsupported type: %s" % type(value))
506
507 def write_data(self, data):
508 self.write_bytes(data.data)
509
510 def write_bytes(self, data):
511 self.begin_element("data")
512 self._indent_level -= 1
513 maxlinelength = max(
514 16,
515 76 - len(self.indent.replace(b"\t", b" " * 8) * self._indent_level))
516
517 for line in _encode_base64(data, maxlinelength).split(b"\n"):
518 if line:
519 self.writeln(line)
520 self._indent_level += 1
521 self.end_element("data")
522
523 def write_dict(self, d):
524 if d:
525 self.begin_element("dict")
526 if self._sort_keys:
527 items = sorted(d.items())
528 else:
529 items = d.items()
530
531 for key, value in items:
532 if not isinstance(key, str):
533 if self._skipkeys:
534 continue
535 raise TypeError("keys must be strings")
536 self.simple_element("key", key)
537 self.write_value(value)
538 self.end_element("dict")
539
540 else:
541 self.simple_element("dict")
542
543 def write_array(self, array):
544 if array:
545 self.begin_element("array")
546 for value in array:
547 self.write_value(value)
548 self.end_element("array")
549
550 else:
551 self.simple_element("array")
552
553
554def _is_fmt_xml(header):
555 prefixes = (b'<?xml', b'<plist')
556
557 for pfx in prefixes:
558 if header.startswith(pfx):
559 return True
560
561 # Also check for alternative XML encodings, this is slightly
562 # overkill because the Apple tools (and plistlib) will not
563 # generate files with these encodings.
564 for bom, encoding in (
565 (codecs.BOM_UTF8, "utf-8"),
566 (codecs.BOM_UTF16_BE, "utf-16-be"),
567 (codecs.BOM_UTF16_LE, "utf-16-le"),
568 # expat does not support utf-32
569 #(codecs.BOM_UTF32_BE, "utf-32-be"),
570 #(codecs.BOM_UTF32_LE, "utf-32-le"),
571 ):
572 if not header.startswith(bom):
573 continue
574
575 for start in prefixes:
576 prefix = bom + start.decode('ascii').encode(encoding)
577 if header[:len(prefix)] == prefix:
578 return True
579
580 return False
581
582#
583# Binary Plist
584#
585
586
587class InvalidFileException (ValueError):
588 def __init__(self, message="Invalid file"):
589 ValueError.__init__(self, message)
590
591_BINARY_FORMAT = {1: 'B', 2: 'H', 4: 'L', 8: 'Q'}
592
593class _BinaryPlistParser:
594 """
595 Read or write a binary plist file, following the description of the binary
596 format. Raise InvalidFileException in case of error, otherwise return the
597 root object.
598
599 see also: http://opensource.apple.com/source/CF/CF-744.18/CFBinaryPList.c
600 """
601 def __init__(self, use_builtin_types, dict_type):
602 self._use_builtin_types = use_builtin_types
603 self._dict_type = dict_type
604
605 def parse(self, fp):
606 try:
607 # The basic file format:
608 # HEADER
609 # object...
610 # refid->offset...
611 # TRAILER
612 self._fp = fp
613 self._fp.seek(-32, os.SEEK_END)
614 trailer = self._fp.read(32)
615 if len(trailer) != 32:
616 raise InvalidFileException()
617 (
618 offset_size, self._ref_size, num_objects, top_object,
619 offset_table_offset
620 ) = struct.unpack('>6xBBQQQ', trailer)
621 self._fp.seek(offset_table_offset)
622 offset_format = '>' + _BINARY_FORMAT[offset_size] * num_objects
623 self._ref_format = _BINARY_FORMAT[self._ref_size]
624 self._object_offsets = struct.unpack(
625 offset_format, self._fp.read(offset_size * num_objects))
626 return self._read_object(self._object_offsets[top_object])
627
628 except (OSError, IndexError, struct.error):
629 raise InvalidFileException()
630
631 def _get_size(self, tokenL):
632 """ return the size of the next object."""
633 if tokenL == 0xF:
634 m = self._fp.read(1)[0] & 0x3
635 s = 1 << m
636 f = '>' + _BINARY_FORMAT[s]
637 return struct.unpack(f, self._fp.read(s))[0]
638
639 return tokenL
640
641 def _read_refs(self, n):
642 return struct.unpack(
643 '>' + self._ref_format * n, self._fp.read(n * self._ref_size))
644
645 def _read_object(self, offset):
646 """
647 read the object at offset.
648
649 May recursively read sub-objects (content of an array/dict/set)
650 """
651 self._fp.seek(offset)
652 token = self._fp.read(1)[0]
653 tokenH, tokenL = token & 0xF0, token & 0x0F
654
655 if token == 0x00:
656 return None
657
658 elif token == 0x08:
659 return False
660
661 elif token == 0x09:
662 return True
663
664 # The referenced source code also mentions URL (0x0c, 0x0d) and
665 # UUID (0x0e), but neither can be generated using the Cocoa libraries.
666
667 elif token == 0x0f:
668 return b''
669
670 elif tokenH == 0x10: # int
Ronald Oussoren6db66532014-01-15 11:32:35 +0100671 return int.from_bytes(self._fp.read(1 << tokenL),
672 'big', signed=tokenL >= 3)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100673
674 elif token == 0x22: # real
675 return struct.unpack('>f', self._fp.read(4))[0]
676
677 elif token == 0x23: # real
678 return struct.unpack('>d', self._fp.read(8))[0]
679
680 elif token == 0x33: # date
681 f = struct.unpack('>d', self._fp.read(8))[0]
682 # timestamp 0 of binary plists corresponds to 1/1/2001
683 # (year of Mac OS X 10.0), instead of 1/1/1970.
684 return datetime.datetime.utcfromtimestamp(f + (31 * 365 + 8) * 86400)
685
686 elif tokenH == 0x40: # data
687 s = self._get_size(tokenL)
688 if self._use_builtin_types:
689 return self._fp.read(s)
690 else:
691 return Data(self._fp.read(s))
692
693 elif tokenH == 0x50: # ascii string
694 s = self._get_size(tokenL)
695 result = self._fp.read(s).decode('ascii')
696 return result
697
698 elif tokenH == 0x60: # unicode string
699 s = self._get_size(tokenL)
700 return self._fp.read(s * 2).decode('utf-16be')
701
702 # tokenH == 0x80 is documented as 'UID' and appears to be used for
703 # keyed-archiving, not in plists.
704
705 elif tokenH == 0xA0: # array
706 s = self._get_size(tokenL)
707 obj_refs = self._read_refs(s)
708 return [self._read_object(self._object_offsets[x])
709 for x in obj_refs]
710
711 # tokenH == 0xB0 is documented as 'ordset', but is not actually
712 # implemented in the Apple reference code.
713
714 # tokenH == 0xC0 is documented as 'set', but sets cannot be used in
715 # plists.
716
717 elif tokenH == 0xD0: # dict
718 s = self._get_size(tokenL)
719 key_refs = self._read_refs(s)
720 obj_refs = self._read_refs(s)
721 result = self._dict_type()
722 for k, o in zip(key_refs, obj_refs):
723 result[self._read_object(self._object_offsets[k])
724 ] = self._read_object(self._object_offsets[o])
725 return result
726
727 raise InvalidFileException()
728
729def _count_to_size(count):
730 if count < 1 << 8:
731 return 1
732
733 elif count < 1 << 16:
734 return 2
735
736 elif count << 1 << 32:
737 return 4
738
739 else:
740 return 8
741
742class _BinaryPlistWriter (object):
743 def __init__(self, fp, sort_keys, skipkeys):
744 self._fp = fp
745 self._sort_keys = sort_keys
746 self._skipkeys = skipkeys
747
748 def write(self, value):
749
750 # Flattened object list:
751 self._objlist = []
752
753 # Mappings from object->objectid
754 # First dict has (type(object), object) as the key,
755 # second dict is used when object is not hashable and
756 # has id(object) as the key.
757 self._objtable = {}
758 self._objidtable = {}
759
760 # Create list of all objects in the plist
761 self._flatten(value)
762
763 # Size of object references in serialized containers
764 # depends on the number of objects in the plist.
765 num_objects = len(self._objlist)
766 self._object_offsets = [0]*num_objects
767 self._ref_size = _count_to_size(num_objects)
768
769 self._ref_format = _BINARY_FORMAT[self._ref_size]
770
771 # Write file header
772 self._fp.write(b'bplist00')
773
774 # Write object list
775 for obj in self._objlist:
776 self._write_object(obj)
777
778 # Write refnum->object offset table
779 top_object = self._getrefnum(value)
780 offset_table_offset = self._fp.tell()
781 offset_size = _count_to_size(offset_table_offset)
782 offset_format = '>' + _BINARY_FORMAT[offset_size] * num_objects
783 self._fp.write(struct.pack(offset_format, *self._object_offsets))
784
785 # Write trailer
786 sort_version = 0
787 trailer = (
788 sort_version, offset_size, self._ref_size, num_objects,
789 top_object, offset_table_offset
790 )
791 self._fp.write(struct.pack('>5xBBBQQQ', *trailer))
792
793 def _flatten(self, value):
794 # First check if the object is in the object table, not used for
795 # containers to ensure that two subcontainers with the same contents
796 # will be serialized as distinct values.
797 if isinstance(value, (
798 str, int, float, datetime.datetime, bytes, bytearray)):
799 if (type(value), value) in self._objtable:
800 return
801
802 elif isinstance(value, Data):
803 if (type(value.data), value.data) in self._objtable:
804 return
805
806 # Add to objectreference map
807 refnum = len(self._objlist)
808 self._objlist.append(value)
809 try:
810 if isinstance(value, Data):
811 self._objtable[(type(value.data), value.data)] = refnum
812 else:
813 self._objtable[(type(value), value)] = refnum
814 except TypeError:
815 self._objidtable[id(value)] = refnum
816
817 # And finally recurse into containers
818 if isinstance(value, dict):
819 keys = []
820 values = []
821 items = value.items()
822 if self._sort_keys:
823 items = sorted(items)
824
825 for k, v in items:
826 if not isinstance(k, str):
827 if self._skipkeys:
828 continue
829 raise TypeError("keys must be strings")
830 keys.append(k)
831 values.append(v)
832
833 for o in itertools.chain(keys, values):
834 self._flatten(o)
835
836 elif isinstance(value, (list, tuple)):
837 for o in value:
838 self._flatten(o)
839
840 def _getrefnum(self, value):
841 try:
842 if isinstance(value, Data):
843 return self._objtable[(type(value.data), value.data)]
844 else:
845 return self._objtable[(type(value), value)]
846 except TypeError:
847 return self._objidtable[id(value)]
848
849 def _write_size(self, token, size):
850 if size < 15:
851 self._fp.write(struct.pack('>B', token | size))
852
853 elif size < 1 << 8:
854 self._fp.write(struct.pack('>BBB', token | 0xF, 0x10, size))
855
856 elif size < 1 << 16:
857 self._fp.write(struct.pack('>BBH', token | 0xF, 0x11, size))
858
859 elif size < 1 << 32:
860 self._fp.write(struct.pack('>BBL', token | 0xF, 0x12, size))
861
862 else:
863 self._fp.write(struct.pack('>BBQ', token | 0xF, 0x13, size))
864
865 def _write_object(self, value):
866 ref = self._getrefnum(value)
867 self._object_offsets[ref] = self._fp.tell()
868 if value is None:
869 self._fp.write(b'\x00')
870
871 elif value is False:
872 self._fp.write(b'\x08')
873
874 elif value is True:
875 self._fp.write(b'\x09')
876
877 elif isinstance(value, int):
Ronald Oussoren6db66532014-01-15 11:32:35 +0100878 if value < 0:
879 try:
880 self._fp.write(struct.pack('>Bq', 0x13, value))
881 except struct.error:
Ronald Oussoren94e44a92014-02-06 11:19:18 +0100882 raise OverflowError(value) from None
Ronald Oussoren6db66532014-01-15 11:32:35 +0100883 elif value < 1 << 8:
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100884 self._fp.write(struct.pack('>BB', 0x10, value))
885 elif value < 1 << 16:
886 self._fp.write(struct.pack('>BH', 0x11, value))
887 elif value < 1 << 32:
888 self._fp.write(struct.pack('>BL', 0x12, value))
Ronald Oussoren94e44a92014-02-06 11:19:18 +0100889 elif value < 1 << 63:
890 self._fp.write(struct.pack('>BQ', 0x13, value))
891 elif value < 1 << 64:
892 self._fp.write(b'\x14' + value.to_bytes(16, 'big', signed=True))
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100893 else:
Ronald Oussoren94e44a92014-02-06 11:19:18 +0100894 raise OverflowError(value)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100895
896 elif isinstance(value, float):
897 self._fp.write(struct.pack('>Bd', 0x23, value))
898
899 elif isinstance(value, datetime.datetime):
900 f = (value - datetime.datetime(2001, 1, 1)).total_seconds()
901 self._fp.write(struct.pack('>Bd', 0x33, f))
902
903 elif isinstance(value, Data):
904 self._write_size(0x40, len(value.data))
905 self._fp.write(value.data)
906
907 elif isinstance(value, (bytes, bytearray)):
908 self._write_size(0x40, len(value))
909 self._fp.write(value)
910
911 elif isinstance(value, str):
912 try:
913 t = value.encode('ascii')
914 self._write_size(0x50, len(value))
915 except UnicodeEncodeError:
916 t = value.encode('utf-16be')
917 self._write_size(0x60, len(value))
918
919 self._fp.write(t)
920
921 elif isinstance(value, (list, tuple)):
922 refs = [self._getrefnum(o) for o in value]
923 s = len(refs)
924 self._write_size(0xA0, s)
925 self._fp.write(struct.pack('>' + self._ref_format * s, *refs))
926
927 elif isinstance(value, dict):
928 keyRefs, valRefs = [], []
929
930 if self._sort_keys:
931 rootItems = sorted(value.items())
932 else:
933 rootItems = value.items()
934
935 for k, v in rootItems:
936 if not isinstance(k, str):
937 if self._skipkeys:
938 continue
939 raise TypeError("keys must be strings")
940 keyRefs.append(self._getrefnum(k))
941 valRefs.append(self._getrefnum(v))
942
943 s = len(keyRefs)
944 self._write_size(0xD0, s)
945 self._fp.write(struct.pack('>' + self._ref_format * s, *keyRefs))
946 self._fp.write(struct.pack('>' + self._ref_format * s, *valRefs))
947
948 else:
Ronald Oussoren6db66532014-01-15 11:32:35 +0100949 raise TypeError(value)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100950
951
952def _is_fmt_binary(header):
953 return header[:8] == b'bplist00'
954
955
956#
957# Generic bits
958#
959
960_FORMATS={
961 FMT_XML: dict(
962 detect=_is_fmt_xml,
963 parser=_PlistParser,
964 writer=_PlistWriter,
965 ),
966 FMT_BINARY: dict(
967 detect=_is_fmt_binary,
968 parser=_BinaryPlistParser,
969 writer=_BinaryPlistWriter,
970 )
971}
972
973
974def load(fp, *, fmt=None, use_builtin_types=True, dict_type=dict):
975 """Read a .plist file. 'fp' should be (readable) file object.
976 Return the unpacked root object (which usually is a dictionary).
977 """
978 if fmt is None:
979 header = fp.read(32)
980 fp.seek(0)
981 for info in _FORMATS.values():
982 if info['detect'](header):
983 p = info['parser'](
984 use_builtin_types=use_builtin_types,
985 dict_type=dict_type,
986 )
987 break
988
989 else:
990 raise InvalidFileException()
991
992 else:
993 p = _FORMATS[fmt]['parser'](use_builtin_types=use_builtin_types)
994
995 return p.parse(fp)
996
997
998def loads(value, *, fmt=None, use_builtin_types=True, dict_type=dict):
999 """Read a .plist file from a bytes object.
1000 Return the unpacked root object (which usually is a dictionary).
1001 """
1002 fp = BytesIO(value)
1003 return load(
1004 fp, fmt=fmt, use_builtin_types=use_builtin_types, dict_type=dict_type)
1005
1006
1007def dump(value, fp, *, fmt=FMT_XML, sort_keys=True, skipkeys=False):
1008 """Write 'value' to a .plist file. 'fp' should be a (writable)
1009 file object.
1010 """
1011 if fmt not in _FORMATS:
1012 raise ValueError("Unsupported format: %r"%(fmt,))
1013
1014 writer = _FORMATS[fmt]["writer"](fp, sort_keys=sort_keys, skipkeys=skipkeys)
1015 writer.write(value)
1016
1017
1018def dumps(value, *, fmt=FMT_XML, skipkeys=False, sort_keys=True):
1019 """Return a bytes object with the contents for a .plist file.
1020 """
1021 fp = BytesIO()
1022 dump(value, fp, fmt=fmt, skipkeys=skipkeys, sort_keys=sort_keys)
1023 return fp.getvalue()