blob: 277ce622d001e480a2c55f033f91444e116d9a9f [file] [log] [blame]
Benjamin Petersonef3e4c22009-04-11 19:48:14 +00001r"""plistlib.py -- a tool to generate and parse MacOSX .plist files.
Christian Heimes7e182542008-01-27 15:20:13 +00002
Ezio Melotti6e9b1df2009-09-16 00:49:03 +00003The property list (.plist) file format is a simple XML pickle supporting
Christian Heimes7e182542008-01-27 15:20:13 +00004basic object types, like dictionaries, lists, numbers and strings.
5Usually the top level object is a dictionary.
6
Ronald Oussorenc5cf7972013-11-21 15:46:49 +01007To write out a plist file, use the dump(value, file)
8function. 'value' is the top level object, 'file' is
9a (writable) file object.
Christian Heimes7e182542008-01-27 15:20:13 +000010
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010011To parse a plist from a file, use the load(file) function,
12with a (readable) file object as the only argument. It
Christian Heimes7e182542008-01-27 15:20:13 +000013returns the top level object (again, usually a dictionary).
14
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010015To work with plist data in bytes objects, you can use loads()
16and dumps().
Christian Heimes7e182542008-01-27 15:20:13 +000017
18Values can be strings, integers, floats, booleans, tuples, lists,
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010019dictionaries (but only with string keys), Data, bytes, bytearray, or
20datetime.datetime objects.
Christian Heimes7e182542008-01-27 15:20:13 +000021
22Generate Plist example:
23
24 pl = dict(
Ezio Melotti6e9b1df2009-09-16 00:49:03 +000025 aString = "Doodah",
26 aList = ["A", "B", 12, 32.1, [1, 2, 3]],
Christian Heimes7e182542008-01-27 15:20:13 +000027 aFloat = 0.1,
28 anInt = 728,
Ezio Melotti6e9b1df2009-09-16 00:49:03 +000029 aDict = dict(
30 anotherString = "<hello & hi there!>",
31 aUnicodeValue = "M\xe4ssig, Ma\xdf",
32 aTrueValue = True,
33 aFalseValue = False,
Christian Heimes7e182542008-01-27 15:20:13 +000034 ),
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010035 someData = b"<binary gunk>",
36 someMoreData = b"<lots of binary gunk>" * 10,
Christian Heimes7e182542008-01-27 15:20:13 +000037 aDate = datetime.datetime.fromtimestamp(time.mktime(time.gmtime())),
38 )
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010039 with open(fileName, 'wb') as fp:
40 dump(pl, fp)
Christian Heimes7e182542008-01-27 15:20:13 +000041
42Parse Plist example:
43
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010044 with open(fileName, 'rb') as fp:
45 pl = load(fp)
46 print(pl["aKey"])
Christian Heimes7e182542008-01-27 15:20:13 +000047"""
Christian Heimes7e182542008-01-27 15:20:13 +000048__all__ = [
49 "readPlist", "writePlist", "readPlistFromBytes", "writePlistToBytes",
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010050 "Plist", "Data", "Dict", "FMT_XML", "FMT_BINARY",
51 "load", "dump", "loads", "dumps"
Christian Heimes7e182542008-01-27 15:20:13 +000052]
Christian Heimes7e182542008-01-27 15:20:13 +000053
54import binascii
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010055import codecs
56import contextlib
Christian Heimes7e182542008-01-27 15:20:13 +000057import datetime
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010058import enum
Christian Heimes7e182542008-01-27 15:20:13 +000059from io import BytesIO
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010060import itertools
61import os
Christian Heimes7e182542008-01-27 15:20:13 +000062import re
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010063import struct
64from warnings import warn
65from xml.parsers.expat import ParserCreate
Christian Heimes7e182542008-01-27 15:20:13 +000066
67
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010068PlistFormat = enum.Enum('PlistFormat', 'FMT_XML FMT_BINARY', module=__name__)
69globals().update(PlistFormat.__members__)
Christian Heimes7e182542008-01-27 15:20:13 +000070
71
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010072#
73#
74# Deprecated functionality
75#
76#
Christian Heimes7e182542008-01-27 15:20:13 +000077
78
79class _InternalDict(dict):
80
81 # This class is needed while Dict is scheduled for deprecation:
82 # we only need to warn when a *user* instantiates Dict or when
83 # the "attribute notation for dict keys" is used.
Ronald Oussorenc5cf7972013-11-21 15:46:49 +010084 __slots__ = ()
Christian Heimes7e182542008-01-27 15:20:13 +000085
86 def __getattr__(self, attr):
87 try:
88 value = self[attr]
89 except KeyError:
90 raise AttributeError(attr)
Christian Heimes7e182542008-01-27 15:20:13 +000091 warn("Attribute access from plist dicts is deprecated, use d[key] "
Victor Stinnerb5752892011-07-04 14:28:45 +020092 "notation instead", DeprecationWarning, 2)
Christian Heimes7e182542008-01-27 15:20:13 +000093 return value
94
95 def __setattr__(self, attr, value):
Christian Heimes7e182542008-01-27 15:20:13 +000096 warn("Attribute access from plist dicts is deprecated, use d[key] "
Victor Stinnerb5752892011-07-04 14:28:45 +020097 "notation instead", DeprecationWarning, 2)
Christian Heimes7e182542008-01-27 15:20:13 +000098 self[attr] = value
99
100 def __delattr__(self, attr):
101 try:
102 del self[attr]
103 except KeyError:
104 raise AttributeError(attr)
Christian Heimes7e182542008-01-27 15:20:13 +0000105 warn("Attribute access from plist dicts is deprecated, use d[key] "
Victor Stinnerb5752892011-07-04 14:28:45 +0200106 "notation instead", DeprecationWarning, 2)
Christian Heimes7e182542008-01-27 15:20:13 +0000107
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100108
Christian Heimes7e182542008-01-27 15:20:13 +0000109class Dict(_InternalDict):
110
111 def __init__(self, **kwargs):
Christian Heimes7e182542008-01-27 15:20:13 +0000112 warn("The plistlib.Dict class is deprecated, use builtin dict instead",
Victor Stinnerb5752892011-07-04 14:28:45 +0200113 DeprecationWarning, 2)
Christian Heimes7e182542008-01-27 15:20:13 +0000114 super().__init__(**kwargs)
115
116
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100117@contextlib.contextmanager
118def _maybe_open(pathOrFile, mode):
119 if isinstance(pathOrFile, str):
120 with open(pathOrFile, mode) as fp:
121 yield fp
Christian Heimes7e182542008-01-27 15:20:13 +0000122
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100123 else:
124 yield pathOrFile
125
126
127class Plist(_InternalDict):
128 """This class has been deprecated. Use dump() and load()
Christian Heimes7e182542008-01-27 15:20:13 +0000129 functions instead, together with regular dict objects.
130 """
131
132 def __init__(self, **kwargs):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100133 warn("The Plist class is deprecated, use the load() and "
134 "dump() functions instead", DeprecationWarning, 2)
Christian Heimes7e182542008-01-27 15:20:13 +0000135 super().__init__(**kwargs)
136
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100137 @classmethod
Christian Heimes7e182542008-01-27 15:20:13 +0000138 def fromFile(cls, pathOrFile):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100139 """Deprecated. Use the load() function instead."""
Ned Deilyc06d6fd2013-11-21 20:56:23 -0800140 with _maybe_open(pathOrFile, 'rb') as fp:
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100141 value = load(fp)
Christian Heimes7e182542008-01-27 15:20:13 +0000142 plist = cls()
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100143 plist.update(value)
Christian Heimes7e182542008-01-27 15:20:13 +0000144 return plist
Christian Heimes7e182542008-01-27 15:20:13 +0000145
146 def write(self, pathOrFile):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100147 """Deprecated. Use the dump() function instead."""
148 with _maybe_open(pathOrFile, 'wb') as fp:
149 dump(self, fp)
Christian Heimes7e182542008-01-27 15:20:13 +0000150
151
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100152def readPlist(pathOrFile):
153 """
154 Read a .plist from a path or file. pathOrFile should either
155 be a file name, or a readable binary file object.
156
157 This function is deprecated, use load instead.
158 """
159 warn("The readPlist function is deprecated, use load() instead",
160 DeprecationWarning, 2)
161
162 with _maybe_open(pathOrFile, 'rb') as fp:
163 return load(fp, fmt=None, use_builtin_types=False,
164 dict_type=_InternalDict)
165
166def writePlist(value, pathOrFile):
167 """
168 Write 'value' to a .plist file. 'pathOrFile' may either be a
169 file name or a (writable) file object.
170
171 This function is deprecated, use dump instead.
172 """
173 warn("The writePlist function is deprecated, use dump() instead",
174 DeprecationWarning, 2)
175 with _maybe_open(pathOrFile, 'wb') as fp:
176 dump(value, fp, fmt=FMT_XML, sort_keys=True, skipkeys=False)
177
178
179def readPlistFromBytes(data):
180 """
181 Read a plist data from a bytes object. Return the root object.
182
183 This function is deprecated, use loads instead.
184 """
185 warn("The readPlistFromBytes function is deprecated, use loads() instead",
186 DeprecationWarning, 2)
187 return load(BytesIO(data), fmt=None, use_builtin_types=False,
188 dict_type=_InternalDict)
189
190
191def writePlistToBytes(value):
192 """
193 Return 'value' as a plist-formatted bytes object.
194
195 This function is deprecated, use dumps instead.
196 """
197 warn("The writePlistToBytes function is deprecated, use dumps() instead",
198 DeprecationWarning, 2)
199 f = BytesIO()
200 dump(value, f, fmt=FMT_XML, sort_keys=True, skipkeys=False)
201 return f.getvalue()
202
Christian Heimes7e182542008-01-27 15:20:13 +0000203
204class Data:
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100205 """
206 Wrapper for binary data.
Christian Heimes7e182542008-01-27 15:20:13 +0000207
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100208 This class is deprecated, use a bytes object instead.
209 """
Christian Heimes7e182542008-01-27 15:20:13 +0000210
211 def __init__(self, data):
212 if not isinstance(data, bytes):
213 raise TypeError("data must be as bytes")
214 self.data = data
215
216 @classmethod
217 def fromBase64(cls, data):
Georg Brandl706824f2009-06-04 09:42:55 +0000218 # base64.decodebytes just calls binascii.a2b_base64;
Christian Heimes7e182542008-01-27 15:20:13 +0000219 # it seems overkill to use both base64 and binascii.
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100220 return cls(_decode_base64(data))
Christian Heimes7e182542008-01-27 15:20:13 +0000221
222 def asBase64(self, maxlinelength=76):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100223 return _encode_base64(self.data, maxlinelength)
Christian Heimes7e182542008-01-27 15:20:13 +0000224
225 def __eq__(self, other):
226 if isinstance(other, self.__class__):
227 return self.data == other.data
228 elif isinstance(other, str):
229 return self.data == other
230 else:
231 return id(self) == id(other)
232
233 def __repr__(self):
234 return "%s(%s)" % (self.__class__.__name__, repr(self.data))
235
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100236#
237#
238# End of deprecated functionality
239#
240#
Christian Heimes7e182542008-01-27 15:20:13 +0000241
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100242
243#
244# XML support
245#
246
247
248# XML 'header'
249PLISTHEADER = b"""\
250<?xml version="1.0" encoding="UTF-8"?>
251<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
252"""
253
254
255# Regex to find any control chars, except for \t \n and \r
256_controlCharPat = re.compile(
257 r"[\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0b\x0c\x0e\x0f"
258 r"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f]")
259
260def _encode_base64(s, maxlinelength=76):
261 # copied from base64.encodebytes(), with added maxlinelength argument
262 maxbinsize = (maxlinelength//4)*3
263 pieces = []
264 for i in range(0, len(s), maxbinsize):
265 chunk = s[i : i + maxbinsize]
266 pieces.append(binascii.b2a_base64(chunk))
267 return b''.join(pieces)
268
269def _decode_base64(s):
270 if isinstance(s, str):
271 return binascii.a2b_base64(s.encode("utf-8"))
272
273 else:
274 return binascii.a2b_base64(s)
275
276# Contents should conform to a subset of ISO 8601
277# (in particular, YYYY '-' MM '-' DD 'T' HH ':' MM ':' SS 'Z'. Smaller units
278# may be omitted with # a loss of precision)
279_dateParser = re.compile(r"(?P<year>\d\d\d\d)(?:-(?P<month>\d\d)(?:-(?P<day>\d\d)(?:T(?P<hour>\d\d)(?::(?P<minute>\d\d)(?::(?P<second>\d\d))?)?)?)?)?Z", re.ASCII)
280
281
282def _date_from_string(s):
283 order = ('year', 'month', 'day', 'hour', 'minute', 'second')
284 gd = _dateParser.match(s).groupdict()
285 lst = []
286 for key in order:
287 val = gd[key]
288 if val is None:
289 break
290 lst.append(int(val))
291 return datetime.datetime(*lst)
292
293
294def _date_to_string(d):
295 return '%04d-%02d-%02dT%02d:%02d:%02dZ' % (
296 d.year, d.month, d.day,
297 d.hour, d.minute, d.second
298 )
299
300def _escape(text):
301 m = _controlCharPat.search(text)
302 if m is not None:
303 raise ValueError("strings can't contains control characters; "
304 "use bytes instead")
305 text = text.replace("\r\n", "\n") # convert DOS line endings
306 text = text.replace("\r", "\n") # convert Mac line endings
307 text = text.replace("&", "&amp;") # escape '&'
308 text = text.replace("<", "&lt;") # escape '<'
309 text = text.replace(">", "&gt;") # escape '>'
310 return text
311
312class _PlistParser:
313 def __init__(self, use_builtin_types, dict_type):
Christian Heimes7e182542008-01-27 15:20:13 +0000314 self.stack = []
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100315 self.current_key = None
Christian Heimes7e182542008-01-27 15:20:13 +0000316 self.root = None
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100317 self._use_builtin_types = use_builtin_types
318 self._dict_type = dict_type
Christian Heimes7e182542008-01-27 15:20:13 +0000319
320 def parse(self, fileobj):
Ned Deilyb8e59f72011-05-28 02:19:19 -0700321 self.parser = ParserCreate()
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100322 self.parser.StartElementHandler = self.handle_begin_element
323 self.parser.EndElementHandler = self.handle_end_element
324 self.parser.CharacterDataHandler = self.handle_data
Ned Deilyb8e59f72011-05-28 02:19:19 -0700325 self.parser.ParseFile(fileobj)
Christian Heimes7e182542008-01-27 15:20:13 +0000326 return self.root
327
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100328 def handle_begin_element(self, element, attrs):
Christian Heimes7e182542008-01-27 15:20:13 +0000329 self.data = []
330 handler = getattr(self, "begin_" + element, None)
331 if handler is not None:
332 handler(attrs)
333
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100334 def handle_end_element(self, element):
Christian Heimes7e182542008-01-27 15:20:13 +0000335 handler = getattr(self, "end_" + element, None)
336 if handler is not None:
337 handler()
338
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100339 def handle_data(self, data):
Christian Heimes7e182542008-01-27 15:20:13 +0000340 self.data.append(data)
341
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100342 def add_object(self, value):
343 if self.current_key is not None:
Ned Deilyb8e59f72011-05-28 02:19:19 -0700344 if not isinstance(self.stack[-1], type({})):
345 raise ValueError("unexpected element at line %d" %
346 self.parser.CurrentLineNumber)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100347 self.stack[-1][self.current_key] = value
348 self.current_key = None
Christian Heimes7e182542008-01-27 15:20:13 +0000349 elif not self.stack:
350 # this is the root object
351 self.root = value
352 else:
Ned Deilyb8e59f72011-05-28 02:19:19 -0700353 if not isinstance(self.stack[-1], type([])):
354 raise ValueError("unexpected element at line %d" %
355 self.parser.CurrentLineNumber)
Christian Heimes7e182542008-01-27 15:20:13 +0000356 self.stack[-1].append(value)
357
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100358 def get_data(self):
Christian Heimes7e182542008-01-27 15:20:13 +0000359 data = ''.join(self.data)
360 self.data = []
361 return data
362
363 # element handlers
364
365 def begin_dict(self, attrs):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100366 d = self._dict_type()
367 self.add_object(d)
Christian Heimes7e182542008-01-27 15:20:13 +0000368 self.stack.append(d)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100369
Christian Heimes7e182542008-01-27 15:20:13 +0000370 def end_dict(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100371 if self.current_key:
Ned Deilyb8e59f72011-05-28 02:19:19 -0700372 raise ValueError("missing value for key '%s' at line %d" %
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100373 (self.current_key,self.parser.CurrentLineNumber))
Christian Heimes7e182542008-01-27 15:20:13 +0000374 self.stack.pop()
375
376 def end_key(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100377 if self.current_key or not isinstance(self.stack[-1], type({})):
Ned Deilyb8e59f72011-05-28 02:19:19 -0700378 raise ValueError("unexpected key at line %d" %
379 self.parser.CurrentLineNumber)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100380 self.current_key = self.get_data()
Christian Heimes7e182542008-01-27 15:20:13 +0000381
382 def begin_array(self, attrs):
383 a = []
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100384 self.add_object(a)
Christian Heimes7e182542008-01-27 15:20:13 +0000385 self.stack.append(a)
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100386
Christian Heimes7e182542008-01-27 15:20:13 +0000387 def end_array(self):
388 self.stack.pop()
389
390 def end_true(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100391 self.add_object(True)
392
Christian Heimes7e182542008-01-27 15:20:13 +0000393 def end_false(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100394 self.add_object(False)
395
Christian Heimes7e182542008-01-27 15:20:13 +0000396 def end_integer(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100397 self.add_object(int(self.get_data()))
398
Christian Heimes7e182542008-01-27 15:20:13 +0000399 def end_real(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100400 self.add_object(float(self.get_data()))
401
Christian Heimes7e182542008-01-27 15:20:13 +0000402 def end_string(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100403 self.add_object(self.get_data())
404
Christian Heimes7e182542008-01-27 15:20:13 +0000405 def end_data(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100406 if self._use_builtin_types:
407 self.add_object(_decode_base64(self.get_data()))
408
409 else:
410 self.add_object(Data.fromBase64(self.get_data()))
411
Christian Heimes7e182542008-01-27 15:20:13 +0000412 def end_date(self):
Ronald Oussorenc5cf7972013-11-21 15:46:49 +0100413 self.add_object(_date_from_string(self.get_data()))
414
415
416class _DumbXMLWriter:
417 def __init__(self, file, indent_level=0, indent="\t"):
418 self.file = file
419 self.stack = []
420 self._indent_level = indent_level
421 self.indent = indent
422
423 def begin_element(self, element):
424 self.stack.append(element)
425 self.writeln("<%s>" % element)
426 self._indent_level += 1
427
428 def end_element(self, element):
429 assert self._indent_level > 0
430 assert self.stack.pop() == element
431 self._indent_level -= 1
432 self.writeln("</%s>" % element)
433
434 def simple_element(self, element, value=None):
435 if value is not None:
436 value = _escape(value)
437 self.writeln("<%s>%s</%s>" % (element, value, element))
438
439 else:
440 self.writeln("<%s/>" % element)
441
442 def writeln(self, line):
443 if line:
444 # plist has fixed encoding of utf-8
445
446 # XXX: is this test needed?
447 if isinstance(line, str):
448 line = line.encode('utf-8')
449 self.file.write(self._indent_level * self.indent)
450 self.file.write(line)
451 self.file.write(b'\n')
452
453
454class _PlistWriter(_DumbXMLWriter):
455 def __init__(
456 self, file, indent_level=0, indent=b"\t", writeHeader=1,
457 sort_keys=True, skipkeys=False):
458
459 if writeHeader:
460 file.write(PLISTHEADER)
461 _DumbXMLWriter.__init__(self, file, indent_level, indent)
462 self._sort_keys = sort_keys
463 self._skipkeys = skipkeys
464
465 def write(self, value):
466 self.writeln("<plist version=\"1.0\">")
467 self.write_value(value)
468 self.writeln("</plist>")
469
470 def write_value(self, value):
471 if isinstance(value, str):
472 self.simple_element("string", value)
473
474 elif value is True:
475 self.simple_element("true")
476
477 elif value is False:
478 self.simple_element("false")
479
480 elif isinstance(value, int):
481 self.simple_element("integer", "%d" % value)
482
483 elif isinstance(value, float):
484 self.simple_element("real", repr(value))
485
486 elif isinstance(value, dict):
487 self.write_dict(value)
488
489 elif isinstance(value, Data):
490 self.write_data(value)
491
492 elif isinstance(value, (bytes, bytearray)):
493 self.write_bytes(value)
494
495 elif isinstance(value, datetime.datetime):
496 self.simple_element("date", _date_to_string(value))
497
498 elif isinstance(value, (tuple, list)):
499 self.write_array(value)
500
501 else:
502 raise TypeError("unsupported type: %s" % type(value))
503
504 def write_data(self, data):
505 self.write_bytes(data.data)
506
507 def write_bytes(self, data):
508 self.begin_element("data")
509 self._indent_level -= 1
510 maxlinelength = max(
511 16,
512 76 - len(self.indent.replace(b"\t", b" " * 8) * self._indent_level))
513
514 for line in _encode_base64(data, maxlinelength).split(b"\n"):
515 if line:
516 self.writeln(line)
517 self._indent_level += 1
518 self.end_element("data")
519
520 def write_dict(self, d):
521 if d:
522 self.begin_element("dict")
523 if self._sort_keys:
524 items = sorted(d.items())
525 else:
526 items = d.items()
527
528 for key, value in items:
529 if not isinstance(key, str):
530 if self._skipkeys:
531 continue
532 raise TypeError("keys must be strings")
533 self.simple_element("key", key)
534 self.write_value(value)
535 self.end_element("dict")
536
537 else:
538 self.simple_element("dict")
539
540 def write_array(self, array):
541 if array:
542 self.begin_element("array")
543 for value in array:
544 self.write_value(value)
545 self.end_element("array")
546
547 else:
548 self.simple_element("array")
549
550
551def _is_fmt_xml(header):
552 prefixes = (b'<?xml', b'<plist')
553
554 for pfx in prefixes:
555 if header.startswith(pfx):
556 return True
557
558 # Also check for alternative XML encodings, this is slightly
559 # overkill because the Apple tools (and plistlib) will not
560 # generate files with these encodings.
561 for bom, encoding in (
562 (codecs.BOM_UTF8, "utf-8"),
563 (codecs.BOM_UTF16_BE, "utf-16-be"),
564 (codecs.BOM_UTF16_LE, "utf-16-le"),
565 # expat does not support utf-32
566 #(codecs.BOM_UTF32_BE, "utf-32-be"),
567 #(codecs.BOM_UTF32_LE, "utf-32-le"),
568 ):
569 if not header.startswith(bom):
570 continue
571
572 for start in prefixes:
573 prefix = bom + start.decode('ascii').encode(encoding)
574 if header[:len(prefix)] == prefix:
575 return True
576
577 return False
578
579#
580# Binary Plist
581#
582
583
584class InvalidFileException (ValueError):
585 def __init__(self, message="Invalid file"):
586 ValueError.__init__(self, message)
587
588_BINARY_FORMAT = {1: 'B', 2: 'H', 4: 'L', 8: 'Q'}
589
590class _BinaryPlistParser:
591 """
592 Read or write a binary plist file, following the description of the binary
593 format. Raise InvalidFileException in case of error, otherwise return the
594 root object.
595
596 see also: http://opensource.apple.com/source/CF/CF-744.18/CFBinaryPList.c
597 """
598 def __init__(self, use_builtin_types, dict_type):
599 self._use_builtin_types = use_builtin_types
600 self._dict_type = dict_type
601
602 def parse(self, fp):
603 try:
604 # The basic file format:
605 # HEADER
606 # object...
607 # refid->offset...
608 # TRAILER
609 self._fp = fp
610 self._fp.seek(-32, os.SEEK_END)
611 trailer = self._fp.read(32)
612 if len(trailer) != 32:
613 raise InvalidFileException()
614 (
615 offset_size, self._ref_size, num_objects, top_object,
616 offset_table_offset
617 ) = struct.unpack('>6xBBQQQ', trailer)
618 self._fp.seek(offset_table_offset)
619 offset_format = '>' + _BINARY_FORMAT[offset_size] * num_objects
620 self._ref_format = _BINARY_FORMAT[self._ref_size]
621 self._object_offsets = struct.unpack(
622 offset_format, self._fp.read(offset_size * num_objects))
623 return self._read_object(self._object_offsets[top_object])
624
625 except (OSError, IndexError, struct.error):
626 raise InvalidFileException()
627
628 def _get_size(self, tokenL):
629 """ return the size of the next object."""
630 if tokenL == 0xF:
631 m = self._fp.read(1)[0] & 0x3
632 s = 1 << m
633 f = '>' + _BINARY_FORMAT[s]
634 return struct.unpack(f, self._fp.read(s))[0]
635
636 return tokenL
637
638 def _read_refs(self, n):
639 return struct.unpack(
640 '>' + self._ref_format * n, self._fp.read(n * self._ref_size))
641
642 def _read_object(self, offset):
643 """
644 read the object at offset.
645
646 May recursively read sub-objects (content of an array/dict/set)
647 """
648 self._fp.seek(offset)
649 token = self._fp.read(1)[0]
650 tokenH, tokenL = token & 0xF0, token & 0x0F
651
652 if token == 0x00:
653 return None
654
655 elif token == 0x08:
656 return False
657
658 elif token == 0x09:
659 return True
660
661 # The referenced source code also mentions URL (0x0c, 0x0d) and
662 # UUID (0x0e), but neither can be generated using the Cocoa libraries.
663
664 elif token == 0x0f:
665 return b''
666
667 elif tokenH == 0x10: # int
668 return int.from_bytes(self._fp.read(1 << tokenL), 'big')
669
670 elif token == 0x22: # real
671 return struct.unpack('>f', self._fp.read(4))[0]
672
673 elif token == 0x23: # real
674 return struct.unpack('>d', self._fp.read(8))[0]
675
676 elif token == 0x33: # date
677 f = struct.unpack('>d', self._fp.read(8))[0]
678 # timestamp 0 of binary plists corresponds to 1/1/2001
679 # (year of Mac OS X 10.0), instead of 1/1/1970.
680 return datetime.datetime.utcfromtimestamp(f + (31 * 365 + 8) * 86400)
681
682 elif tokenH == 0x40: # data
683 s = self._get_size(tokenL)
684 if self._use_builtin_types:
685 return self._fp.read(s)
686 else:
687 return Data(self._fp.read(s))
688
689 elif tokenH == 0x50: # ascii string
690 s = self._get_size(tokenL)
691 result = self._fp.read(s).decode('ascii')
692 return result
693
694 elif tokenH == 0x60: # unicode string
695 s = self._get_size(tokenL)
696 return self._fp.read(s * 2).decode('utf-16be')
697
698 # tokenH == 0x80 is documented as 'UID' and appears to be used for
699 # keyed-archiving, not in plists.
700
701 elif tokenH == 0xA0: # array
702 s = self._get_size(tokenL)
703 obj_refs = self._read_refs(s)
704 return [self._read_object(self._object_offsets[x])
705 for x in obj_refs]
706
707 # tokenH == 0xB0 is documented as 'ordset', but is not actually
708 # implemented in the Apple reference code.
709
710 # tokenH == 0xC0 is documented as 'set', but sets cannot be used in
711 # plists.
712
713 elif tokenH == 0xD0: # dict
714 s = self._get_size(tokenL)
715 key_refs = self._read_refs(s)
716 obj_refs = self._read_refs(s)
717 result = self._dict_type()
718 for k, o in zip(key_refs, obj_refs):
719 result[self._read_object(self._object_offsets[k])
720 ] = self._read_object(self._object_offsets[o])
721 return result
722
723 raise InvalidFileException()
724
725def _count_to_size(count):
726 if count < 1 << 8:
727 return 1
728
729 elif count < 1 << 16:
730 return 2
731
732 elif count << 1 << 32:
733 return 4
734
735 else:
736 return 8
737
738class _BinaryPlistWriter (object):
739 def __init__(self, fp, sort_keys, skipkeys):
740 self._fp = fp
741 self._sort_keys = sort_keys
742 self._skipkeys = skipkeys
743
744 def write(self, value):
745
746 # Flattened object list:
747 self._objlist = []
748
749 # Mappings from object->objectid
750 # First dict has (type(object), object) as the key,
751 # second dict is used when object is not hashable and
752 # has id(object) as the key.
753 self._objtable = {}
754 self._objidtable = {}
755
756 # Create list of all objects in the plist
757 self._flatten(value)
758
759 # Size of object references in serialized containers
760 # depends on the number of objects in the plist.
761 num_objects = len(self._objlist)
762 self._object_offsets = [0]*num_objects
763 self._ref_size = _count_to_size(num_objects)
764
765 self._ref_format = _BINARY_FORMAT[self._ref_size]
766
767 # Write file header
768 self._fp.write(b'bplist00')
769
770 # Write object list
771 for obj in self._objlist:
772 self._write_object(obj)
773
774 # Write refnum->object offset table
775 top_object = self._getrefnum(value)
776 offset_table_offset = self._fp.tell()
777 offset_size = _count_to_size(offset_table_offset)
778 offset_format = '>' + _BINARY_FORMAT[offset_size] * num_objects
779 self._fp.write(struct.pack(offset_format, *self._object_offsets))
780
781 # Write trailer
782 sort_version = 0
783 trailer = (
784 sort_version, offset_size, self._ref_size, num_objects,
785 top_object, offset_table_offset
786 )
787 self._fp.write(struct.pack('>5xBBBQQQ', *trailer))
788
789 def _flatten(self, value):
790 # First check if the object is in the object table, not used for
791 # containers to ensure that two subcontainers with the same contents
792 # will be serialized as distinct values.
793 if isinstance(value, (
794 str, int, float, datetime.datetime, bytes, bytearray)):
795 if (type(value), value) in self._objtable:
796 return
797
798 elif isinstance(value, Data):
799 if (type(value.data), value.data) in self._objtable:
800 return
801
802 # Add to objectreference map
803 refnum = len(self._objlist)
804 self._objlist.append(value)
805 try:
806 if isinstance(value, Data):
807 self._objtable[(type(value.data), value.data)] = refnum
808 else:
809 self._objtable[(type(value), value)] = refnum
810 except TypeError:
811 self._objidtable[id(value)] = refnum
812
813 # And finally recurse into containers
814 if isinstance(value, dict):
815 keys = []
816 values = []
817 items = value.items()
818 if self._sort_keys:
819 items = sorted(items)
820
821 for k, v in items:
822 if not isinstance(k, str):
823 if self._skipkeys:
824 continue
825 raise TypeError("keys must be strings")
826 keys.append(k)
827 values.append(v)
828
829 for o in itertools.chain(keys, values):
830 self._flatten(o)
831
832 elif isinstance(value, (list, tuple)):
833 for o in value:
834 self._flatten(o)
835
836 def _getrefnum(self, value):
837 try:
838 if isinstance(value, Data):
839 return self._objtable[(type(value.data), value.data)]
840 else:
841 return self._objtable[(type(value), value)]
842 except TypeError:
843 return self._objidtable[id(value)]
844
845 def _write_size(self, token, size):
846 if size < 15:
847 self._fp.write(struct.pack('>B', token | size))
848
849 elif size < 1 << 8:
850 self._fp.write(struct.pack('>BBB', token | 0xF, 0x10, size))
851
852 elif size < 1 << 16:
853 self._fp.write(struct.pack('>BBH', token | 0xF, 0x11, size))
854
855 elif size < 1 << 32:
856 self._fp.write(struct.pack('>BBL', token | 0xF, 0x12, size))
857
858 else:
859 self._fp.write(struct.pack('>BBQ', token | 0xF, 0x13, size))
860
861 def _write_object(self, value):
862 ref = self._getrefnum(value)
863 self._object_offsets[ref] = self._fp.tell()
864 if value is None:
865 self._fp.write(b'\x00')
866
867 elif value is False:
868 self._fp.write(b'\x08')
869
870 elif value is True:
871 self._fp.write(b'\x09')
872
873 elif isinstance(value, int):
874 if value < 1 << 8:
875 self._fp.write(struct.pack('>BB', 0x10, value))
876 elif value < 1 << 16:
877 self._fp.write(struct.pack('>BH', 0x11, value))
878 elif value < 1 << 32:
879 self._fp.write(struct.pack('>BL', 0x12, value))
880 else:
881 self._fp.write(struct.pack('>BQ', 0x13, value))
882
883 elif isinstance(value, float):
884 self._fp.write(struct.pack('>Bd', 0x23, value))
885
886 elif isinstance(value, datetime.datetime):
887 f = (value - datetime.datetime(2001, 1, 1)).total_seconds()
888 self._fp.write(struct.pack('>Bd', 0x33, f))
889
890 elif isinstance(value, Data):
891 self._write_size(0x40, len(value.data))
892 self._fp.write(value.data)
893
894 elif isinstance(value, (bytes, bytearray)):
895 self._write_size(0x40, len(value))
896 self._fp.write(value)
897
898 elif isinstance(value, str):
899 try:
900 t = value.encode('ascii')
901 self._write_size(0x50, len(value))
902 except UnicodeEncodeError:
903 t = value.encode('utf-16be')
904 self._write_size(0x60, len(value))
905
906 self._fp.write(t)
907
908 elif isinstance(value, (list, tuple)):
909 refs = [self._getrefnum(o) for o in value]
910 s = len(refs)
911 self._write_size(0xA0, s)
912 self._fp.write(struct.pack('>' + self._ref_format * s, *refs))
913
914 elif isinstance(value, dict):
915 keyRefs, valRefs = [], []
916
917 if self._sort_keys:
918 rootItems = sorted(value.items())
919 else:
920 rootItems = value.items()
921
922 for k, v in rootItems:
923 if not isinstance(k, str):
924 if self._skipkeys:
925 continue
926 raise TypeError("keys must be strings")
927 keyRefs.append(self._getrefnum(k))
928 valRefs.append(self._getrefnum(v))
929
930 s = len(keyRefs)
931 self._write_size(0xD0, s)
932 self._fp.write(struct.pack('>' + self._ref_format * s, *keyRefs))
933 self._fp.write(struct.pack('>' + self._ref_format * s, *valRefs))
934
935 else:
936 raise InvalidFileException()
937
938
939def _is_fmt_binary(header):
940 return header[:8] == b'bplist00'
941
942
943#
944# Generic bits
945#
946
947_FORMATS={
948 FMT_XML: dict(
949 detect=_is_fmt_xml,
950 parser=_PlistParser,
951 writer=_PlistWriter,
952 ),
953 FMT_BINARY: dict(
954 detect=_is_fmt_binary,
955 parser=_BinaryPlistParser,
956 writer=_BinaryPlistWriter,
957 )
958}
959
960
961def load(fp, *, fmt=None, use_builtin_types=True, dict_type=dict):
962 """Read a .plist file. 'fp' should be (readable) file object.
963 Return the unpacked root object (which usually is a dictionary).
964 """
965 if fmt is None:
966 header = fp.read(32)
967 fp.seek(0)
968 for info in _FORMATS.values():
969 if info['detect'](header):
970 p = info['parser'](
971 use_builtin_types=use_builtin_types,
972 dict_type=dict_type,
973 )
974 break
975
976 else:
977 raise InvalidFileException()
978
979 else:
980 p = _FORMATS[fmt]['parser'](use_builtin_types=use_builtin_types)
981
982 return p.parse(fp)
983
984
985def loads(value, *, fmt=None, use_builtin_types=True, dict_type=dict):
986 """Read a .plist file from a bytes object.
987 Return the unpacked root object (which usually is a dictionary).
988 """
989 fp = BytesIO(value)
990 return load(
991 fp, fmt=fmt, use_builtin_types=use_builtin_types, dict_type=dict_type)
992
993
994def dump(value, fp, *, fmt=FMT_XML, sort_keys=True, skipkeys=False):
995 """Write 'value' to a .plist file. 'fp' should be a (writable)
996 file object.
997 """
998 if fmt not in _FORMATS:
999 raise ValueError("Unsupported format: %r"%(fmt,))
1000
1001 writer = _FORMATS[fmt]["writer"](fp, sort_keys=sort_keys, skipkeys=skipkeys)
1002 writer.write(value)
1003
1004
1005def dumps(value, *, fmt=FMT_XML, skipkeys=False, sort_keys=True):
1006 """Return a bytes object with the contents for a .plist file.
1007 """
1008 fp = BytesIO()
1009 dump(value, fp, fmt=fmt, skipkeys=skipkeys, sort_keys=sort_keys)
1010 return fp.getvalue()