| Benjamin Peterson | ef3e4c2 | 2009-04-11 19:48:14 +0000 | [diff] [blame] | 1 | r"""plistlib.py -- a tool to generate and parse MacOSX .plist files. | 
| Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 2 |  | 
| Ezio Melotti | 6e9b1df | 2009-09-16 00:49:03 +0000 | [diff] [blame] | 3 | The property list (.plist) file format is a simple XML pickle supporting | 
| Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 4 | basic object types, like dictionaries, lists, numbers and strings. | 
|  | 5 | Usually the top level object is a dictionary. | 
|  | 6 |  | 
| Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 7 | To write out a plist file, use the dump(value, file) | 
|  | 8 | function. 'value' is the top level object, 'file' is | 
|  | 9 | a (writable) file object. | 
| Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 10 |  | 
| Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 11 | To parse a plist from a file, use the load(file) function, | 
|  | 12 | with a (readable) file object as the only argument. It | 
| Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 13 | returns the top level object (again, usually a dictionary). | 
|  | 14 |  | 
| Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 15 | To work with plist data in bytes objects, you can use loads() | 
|  | 16 | and dumps(). | 
| Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 17 |  | 
|  | 18 | Values can be strings, integers, floats, booleans, tuples, lists, | 
| Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 19 | dictionaries (but only with string keys), Data, bytes, bytearray, or | 
|  | 20 | datetime.datetime objects. | 
| Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 21 |  | 
|  | 22 | Generate Plist example: | 
|  | 23 |  | 
|  | 24 | pl = dict( | 
| Ezio Melotti | 6e9b1df | 2009-09-16 00:49:03 +0000 | [diff] [blame] | 25 | aString = "Doodah", | 
|  | 26 | aList = ["A", "B", 12, 32.1, [1, 2, 3]], | 
| Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 27 | aFloat = 0.1, | 
|  | 28 | anInt = 728, | 
| Ezio Melotti | 6e9b1df | 2009-09-16 00:49:03 +0000 | [diff] [blame] | 29 | aDict = dict( | 
|  | 30 | anotherString = "<hello & hi there!>", | 
|  | 31 | aUnicodeValue = "M\xe4ssig, Ma\xdf", | 
|  | 32 | aTrueValue = True, | 
|  | 33 | aFalseValue = False, | 
| Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 34 | ), | 
| Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 35 | someData = b"<binary gunk>", | 
|  | 36 | someMoreData = b"<lots of binary gunk>" * 10, | 
| Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 37 | aDate = datetime.datetime.fromtimestamp(time.mktime(time.gmtime())), | 
|  | 38 | ) | 
| Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 39 | with open(fileName, 'wb') as fp: | 
|  | 40 | dump(pl, fp) | 
| Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 41 |  | 
|  | 42 | Parse Plist example: | 
|  | 43 |  | 
| Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 44 | with open(fileName, 'rb') as fp: | 
|  | 45 | pl = load(fp) | 
|  | 46 | print(pl["aKey"]) | 
| Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 47 | """ | 
| Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 48 | __all__ = [ | 
| Jon Janzen | ce81a92 | 2019-09-05 03:11:35 -0500 | [diff] [blame] | 49 | "InvalidFileException", "FMT_XML", "FMT_BINARY", "load", "dump", "loads", "dumps", "UID" | 
| Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 50 | ] | 
| Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 51 |  | 
|  | 52 | import binascii | 
| Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 53 | import codecs | 
| Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 54 | import datetime | 
| Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 55 | import enum | 
| Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 56 | from io import BytesIO | 
| Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 57 | import itertools | 
|  | 58 | import os | 
| Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 59 | import re | 
| Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 60 | import struct | 
| Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 61 | from xml.parsers.expat import ParserCreate | 
| Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 62 |  | 
|  | 63 |  | 
| Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 64 | PlistFormat = enum.Enum('PlistFormat', 'FMT_XML FMT_BINARY', module=__name__) | 
|  | 65 | globals().update(PlistFormat.__members__) | 
| Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 66 |  | 
|  | 67 |  | 
| Jon Janzen | c981ad1 | 2019-05-15 22:14:38 +0200 | [diff] [blame] | 68 | class UID: | 
|  | 69 | def __init__(self, data): | 
|  | 70 | if not isinstance(data, int): | 
|  | 71 | raise TypeError("data must be an int") | 
|  | 72 | if data >= 1 << 64: | 
|  | 73 | raise ValueError("UIDs cannot be >= 2**64") | 
|  | 74 | if data < 0: | 
|  | 75 | raise ValueError("UIDs must be positive") | 
|  | 76 | self.data = data | 
|  | 77 |  | 
|  | 78 | def __index__(self): | 
|  | 79 | return self.data | 
|  | 80 |  | 
|  | 81 | def __repr__(self): | 
|  | 82 | return "%s(%s)" % (self.__class__.__name__, repr(self.data)) | 
|  | 83 |  | 
|  | 84 | def __reduce__(self): | 
|  | 85 | return self.__class__, (self.data,) | 
|  | 86 |  | 
|  | 87 | def __eq__(self, other): | 
|  | 88 | if not isinstance(other, UID): | 
|  | 89 | return NotImplemented | 
|  | 90 | return self.data == other.data | 
|  | 91 |  | 
|  | 92 | def __hash__(self): | 
|  | 93 | return hash(self.data) | 
|  | 94 |  | 
| Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 95 | # | 
|  | 96 | # XML support | 
|  | 97 | # | 
|  | 98 |  | 
|  | 99 |  | 
|  | 100 | # XML 'header' | 
|  | 101 | PLISTHEADER = b"""\ | 
|  | 102 | <?xml version="1.0" encoding="UTF-8"?> | 
|  | 103 | <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd"> | 
|  | 104 | """ | 
|  | 105 |  | 
|  | 106 |  | 
|  | 107 | # Regex to find any control chars, except for \t \n and \r | 
|  | 108 | _controlCharPat = re.compile( | 
|  | 109 | r"[\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0b\x0c\x0e\x0f" | 
|  | 110 | r"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f]") | 
|  | 111 |  | 
|  | 112 | def _encode_base64(s, maxlinelength=76): | 
|  | 113 | # copied from base64.encodebytes(), with added maxlinelength argument | 
|  | 114 | maxbinsize = (maxlinelength//4)*3 | 
|  | 115 | pieces = [] | 
|  | 116 | for i in range(0, len(s), maxbinsize): | 
|  | 117 | chunk = s[i : i + maxbinsize] | 
|  | 118 | pieces.append(binascii.b2a_base64(chunk)) | 
|  | 119 | return b''.join(pieces) | 
|  | 120 |  | 
|  | 121 | def _decode_base64(s): | 
|  | 122 | if isinstance(s, str): | 
|  | 123 | return binascii.a2b_base64(s.encode("utf-8")) | 
|  | 124 |  | 
|  | 125 | else: | 
|  | 126 | return binascii.a2b_base64(s) | 
|  | 127 |  | 
|  | 128 | # Contents should conform to a subset of ISO 8601 | 
|  | 129 | # (in particular, YYYY '-' MM '-' DD 'T' HH ':' MM ':' SS 'Z'.  Smaller units | 
|  | 130 | # may be omitted with #  a loss of precision) | 
|  | 131 | _dateParser = re.compile(r"(?P<year>\d\d\d\d)(?:-(?P<month>\d\d)(?:-(?P<day>\d\d)(?:T(?P<hour>\d\d)(?::(?P<minute>\d\d)(?::(?P<second>\d\d))?)?)?)?)?Z", re.ASCII) | 
|  | 132 |  | 
|  | 133 |  | 
|  | 134 | def _date_from_string(s): | 
|  | 135 | order = ('year', 'month', 'day', 'hour', 'minute', 'second') | 
|  | 136 | gd = _dateParser.match(s).groupdict() | 
|  | 137 | lst = [] | 
|  | 138 | for key in order: | 
|  | 139 | val = gd[key] | 
|  | 140 | if val is None: | 
|  | 141 | break | 
|  | 142 | lst.append(int(val)) | 
|  | 143 | return datetime.datetime(*lst) | 
|  | 144 |  | 
|  | 145 |  | 
|  | 146 | def _date_to_string(d): | 
|  | 147 | return '%04d-%02d-%02dT%02d:%02d:%02dZ' % ( | 
|  | 148 | d.year, d.month, d.day, | 
|  | 149 | d.hour, d.minute, d.second | 
|  | 150 | ) | 
|  | 151 |  | 
|  | 152 | def _escape(text): | 
|  | 153 | m = _controlCharPat.search(text) | 
|  | 154 | if m is not None: | 
|  | 155 | raise ValueError("strings can't contains control characters; " | 
|  | 156 | "use bytes instead") | 
|  | 157 | text = text.replace("\r\n", "\n")       # convert DOS line endings | 
|  | 158 | text = text.replace("\r", "\n")         # convert Mac line endings | 
|  | 159 | text = text.replace("&", "&")       # escape '&' | 
|  | 160 | text = text.replace("<", "<")        # escape '<' | 
|  | 161 | text = text.replace(">", ">")        # escape '>' | 
|  | 162 | return text | 
|  | 163 |  | 
|  | 164 | class _PlistParser: | 
| Jon Janzen | ce81a92 | 2019-09-05 03:11:35 -0500 | [diff] [blame] | 165 | def __init__(self, dict_type): | 
| Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 166 | self.stack = [] | 
| Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 167 | self.current_key = None | 
| Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 168 | self.root = None | 
| Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 169 | self._dict_type = dict_type | 
| Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 170 |  | 
|  | 171 | def parse(self, fileobj): | 
| Ned Deily | b8e59f7 | 2011-05-28 02:19:19 -0700 | [diff] [blame] | 172 | self.parser = ParserCreate() | 
| Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 173 | self.parser.StartElementHandler = self.handle_begin_element | 
|  | 174 | self.parser.EndElementHandler = self.handle_end_element | 
|  | 175 | self.parser.CharacterDataHandler = self.handle_data | 
| Ronald Oussoren | 05ee790 | 2020-10-19 20:13:49 +0200 | [diff] [blame] | 176 | self.parser.EntityDeclHandler = self.handle_entity_decl | 
| Ned Deily | b8e59f7 | 2011-05-28 02:19:19 -0700 | [diff] [blame] | 177 | self.parser.ParseFile(fileobj) | 
| Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 178 | return self.root | 
|  | 179 |  | 
| Ronald Oussoren | 05ee790 | 2020-10-19 20:13:49 +0200 | [diff] [blame] | 180 | def handle_entity_decl(self, entity_name, is_parameter_entity, value, base, system_id, public_id, notation_name): | 
|  | 181 | # Reject plist files with entity declarations to avoid XML vulnerabilies in expat. | 
|  | 182 | # Regular plist files don't contain those declerations, and Apple's plutil tool does not | 
|  | 183 | # accept them either. | 
|  | 184 | raise InvalidFileException("XML entity declarations are not supported in plist files") | 
|  | 185 |  | 
| Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 186 | def handle_begin_element(self, element, attrs): | 
| Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 187 | self.data = [] | 
|  | 188 | handler = getattr(self, "begin_" + element, None) | 
|  | 189 | if handler is not None: | 
|  | 190 | handler(attrs) | 
|  | 191 |  | 
| Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 192 | def handle_end_element(self, element): | 
| Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 193 | handler = getattr(self, "end_" + element, None) | 
|  | 194 | if handler is not None: | 
|  | 195 | handler() | 
|  | 196 |  | 
| Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 197 | def handle_data(self, data): | 
| Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 198 | self.data.append(data) | 
|  | 199 |  | 
| Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 200 | def add_object(self, value): | 
|  | 201 | if self.current_key is not None: | 
| Ned Deily | b8e59f7 | 2011-05-28 02:19:19 -0700 | [diff] [blame] | 202 | if not isinstance(self.stack[-1], type({})): | 
|  | 203 | raise ValueError("unexpected element at line %d" % | 
|  | 204 | self.parser.CurrentLineNumber) | 
| Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 205 | self.stack[-1][self.current_key] = value | 
|  | 206 | self.current_key = None | 
| Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 207 | elif not self.stack: | 
|  | 208 | # this is the root object | 
|  | 209 | self.root = value | 
|  | 210 | else: | 
| Ned Deily | b8e59f7 | 2011-05-28 02:19:19 -0700 | [diff] [blame] | 211 | if not isinstance(self.stack[-1], type([])): | 
|  | 212 | raise ValueError("unexpected element at line %d" % | 
|  | 213 | self.parser.CurrentLineNumber) | 
| Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 214 | self.stack[-1].append(value) | 
|  | 215 |  | 
| Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 216 | def get_data(self): | 
| Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 217 | data = ''.join(self.data) | 
|  | 218 | self.data = [] | 
|  | 219 | return data | 
|  | 220 |  | 
|  | 221 | # element handlers | 
|  | 222 |  | 
|  | 223 | def begin_dict(self, attrs): | 
| Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 224 | d = self._dict_type() | 
|  | 225 | self.add_object(d) | 
| Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 226 | self.stack.append(d) | 
| Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 227 |  | 
| Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 228 | def end_dict(self): | 
| Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 229 | if self.current_key: | 
| Ned Deily | b8e59f7 | 2011-05-28 02:19:19 -0700 | [diff] [blame] | 230 | raise ValueError("missing value for key '%s' at line %d" % | 
| Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 231 | (self.current_key,self.parser.CurrentLineNumber)) | 
| Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 232 | self.stack.pop() | 
|  | 233 |  | 
|  | 234 | def end_key(self): | 
| Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 235 | if self.current_key or not isinstance(self.stack[-1], type({})): | 
| Ned Deily | b8e59f7 | 2011-05-28 02:19:19 -0700 | [diff] [blame] | 236 | raise ValueError("unexpected key at line %d" % | 
|  | 237 | self.parser.CurrentLineNumber) | 
| Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 238 | self.current_key = self.get_data() | 
| Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 239 |  | 
|  | 240 | def begin_array(self, attrs): | 
|  | 241 | a = [] | 
| Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 242 | self.add_object(a) | 
| Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 243 | self.stack.append(a) | 
| Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 244 |  | 
| Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 245 | def end_array(self): | 
|  | 246 | self.stack.pop() | 
|  | 247 |  | 
|  | 248 | def end_true(self): | 
| Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 249 | self.add_object(True) | 
|  | 250 |  | 
| Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 251 | def end_false(self): | 
| Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 252 | self.add_object(False) | 
|  | 253 |  | 
| Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 254 | def end_integer(self): | 
| Ronald Oussoren | 3185267 | 2020-10-20 09:26:33 +0200 | [diff] [blame] | 255 | raw = self.get_data() | 
|  | 256 | if raw.startswith('0x') or raw.startswith('0X'): | 
|  | 257 | self.add_object(int(raw, 16)) | 
|  | 258 | else: | 
|  | 259 | self.add_object(int(raw)) | 
| Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 260 |  | 
| Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 261 | def end_real(self): | 
| Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 262 | self.add_object(float(self.get_data())) | 
|  | 263 |  | 
| Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 264 | def end_string(self): | 
| Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 265 | self.add_object(self.get_data()) | 
|  | 266 |  | 
| Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 267 | def end_data(self): | 
| Jon Janzen | ce81a92 | 2019-09-05 03:11:35 -0500 | [diff] [blame] | 268 | self.add_object(_decode_base64(self.get_data())) | 
| Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 269 |  | 
| Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 270 | def end_date(self): | 
| Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 271 | self.add_object(_date_from_string(self.get_data())) | 
|  | 272 |  | 
|  | 273 |  | 
|  | 274 | class _DumbXMLWriter: | 
|  | 275 | def __init__(self, file, indent_level=0, indent="\t"): | 
|  | 276 | self.file = file | 
|  | 277 | self.stack = [] | 
|  | 278 | self._indent_level = indent_level | 
|  | 279 | self.indent = indent | 
|  | 280 |  | 
|  | 281 | def begin_element(self, element): | 
|  | 282 | self.stack.append(element) | 
|  | 283 | self.writeln("<%s>" % element) | 
|  | 284 | self._indent_level += 1 | 
|  | 285 |  | 
|  | 286 | def end_element(self, element): | 
|  | 287 | assert self._indent_level > 0 | 
|  | 288 | assert self.stack.pop() == element | 
|  | 289 | self._indent_level -= 1 | 
|  | 290 | self.writeln("</%s>" % element) | 
|  | 291 |  | 
|  | 292 | def simple_element(self, element, value=None): | 
|  | 293 | if value is not None: | 
|  | 294 | value = _escape(value) | 
|  | 295 | self.writeln("<%s>%s</%s>" % (element, value, element)) | 
|  | 296 |  | 
|  | 297 | else: | 
|  | 298 | self.writeln("<%s/>" % element) | 
|  | 299 |  | 
|  | 300 | def writeln(self, line): | 
|  | 301 | if line: | 
|  | 302 | # plist has fixed encoding of utf-8 | 
|  | 303 |  | 
|  | 304 | # XXX: is this test needed? | 
|  | 305 | if isinstance(line, str): | 
|  | 306 | line = line.encode('utf-8') | 
|  | 307 | self.file.write(self._indent_level * self.indent) | 
|  | 308 | self.file.write(line) | 
|  | 309 | self.file.write(b'\n') | 
|  | 310 |  | 
|  | 311 |  | 
|  | 312 | class _PlistWriter(_DumbXMLWriter): | 
|  | 313 | def __init__( | 
|  | 314 | self, file, indent_level=0, indent=b"\t", writeHeader=1, | 
|  | 315 | sort_keys=True, skipkeys=False): | 
|  | 316 |  | 
|  | 317 | if writeHeader: | 
|  | 318 | file.write(PLISTHEADER) | 
|  | 319 | _DumbXMLWriter.__init__(self, file, indent_level, indent) | 
|  | 320 | self._sort_keys = sort_keys | 
|  | 321 | self._skipkeys = skipkeys | 
|  | 322 |  | 
|  | 323 | def write(self, value): | 
|  | 324 | self.writeln("<plist version=\"1.0\">") | 
|  | 325 | self.write_value(value) | 
|  | 326 | self.writeln("</plist>") | 
|  | 327 |  | 
|  | 328 | def write_value(self, value): | 
|  | 329 | if isinstance(value, str): | 
|  | 330 | self.simple_element("string", value) | 
|  | 331 |  | 
|  | 332 | elif value is True: | 
|  | 333 | self.simple_element("true") | 
|  | 334 |  | 
|  | 335 | elif value is False: | 
|  | 336 | self.simple_element("false") | 
|  | 337 |  | 
|  | 338 | elif isinstance(value, int): | 
| Ronald Oussoren | 6db6653 | 2014-01-15 11:32:35 +0100 | [diff] [blame] | 339 | if -1 << 63 <= value < 1 << 64: | 
|  | 340 | self.simple_element("integer", "%d" % value) | 
|  | 341 | else: | 
|  | 342 | raise OverflowError(value) | 
| Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 343 |  | 
|  | 344 | elif isinstance(value, float): | 
|  | 345 | self.simple_element("real", repr(value)) | 
|  | 346 |  | 
|  | 347 | elif isinstance(value, dict): | 
|  | 348 | self.write_dict(value) | 
|  | 349 |  | 
| Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 350 | elif isinstance(value, (bytes, bytearray)): | 
|  | 351 | self.write_bytes(value) | 
|  | 352 |  | 
|  | 353 | elif isinstance(value, datetime.datetime): | 
|  | 354 | self.simple_element("date", _date_to_string(value)) | 
|  | 355 |  | 
|  | 356 | elif isinstance(value, (tuple, list)): | 
|  | 357 | self.write_array(value) | 
|  | 358 |  | 
|  | 359 | else: | 
|  | 360 | raise TypeError("unsupported type: %s" % type(value)) | 
|  | 361 |  | 
| Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 362 | def write_bytes(self, data): | 
|  | 363 | self.begin_element("data") | 
|  | 364 | self._indent_level -= 1 | 
|  | 365 | maxlinelength = max( | 
|  | 366 | 16, | 
|  | 367 | 76 - len(self.indent.replace(b"\t", b" " * 8) * self._indent_level)) | 
|  | 368 |  | 
|  | 369 | for line in _encode_base64(data, maxlinelength).split(b"\n"): | 
|  | 370 | if line: | 
|  | 371 | self.writeln(line) | 
|  | 372 | self._indent_level += 1 | 
|  | 373 | self.end_element("data") | 
|  | 374 |  | 
|  | 375 | def write_dict(self, d): | 
|  | 376 | if d: | 
|  | 377 | self.begin_element("dict") | 
|  | 378 | if self._sort_keys: | 
|  | 379 | items = sorted(d.items()) | 
|  | 380 | else: | 
|  | 381 | items = d.items() | 
|  | 382 |  | 
|  | 383 | for key, value in items: | 
|  | 384 | if not isinstance(key, str): | 
|  | 385 | if self._skipkeys: | 
|  | 386 | continue | 
|  | 387 | raise TypeError("keys must be strings") | 
|  | 388 | self.simple_element("key", key) | 
|  | 389 | self.write_value(value) | 
|  | 390 | self.end_element("dict") | 
|  | 391 |  | 
|  | 392 | else: | 
|  | 393 | self.simple_element("dict") | 
|  | 394 |  | 
|  | 395 | def write_array(self, array): | 
|  | 396 | if array: | 
|  | 397 | self.begin_element("array") | 
|  | 398 | for value in array: | 
|  | 399 | self.write_value(value) | 
|  | 400 | self.end_element("array") | 
|  | 401 |  | 
|  | 402 | else: | 
|  | 403 | self.simple_element("array") | 
|  | 404 |  | 
|  | 405 |  | 
|  | 406 | def _is_fmt_xml(header): | 
|  | 407 | prefixes = (b'<?xml', b'<plist') | 
|  | 408 |  | 
|  | 409 | for pfx in prefixes: | 
|  | 410 | if header.startswith(pfx): | 
|  | 411 | return True | 
|  | 412 |  | 
|  | 413 | # Also check for alternative XML encodings, this is slightly | 
|  | 414 | # overkill because the Apple tools (and plistlib) will not | 
|  | 415 | # generate files with these encodings. | 
|  | 416 | for bom, encoding in ( | 
|  | 417 | (codecs.BOM_UTF8, "utf-8"), | 
|  | 418 | (codecs.BOM_UTF16_BE, "utf-16-be"), | 
|  | 419 | (codecs.BOM_UTF16_LE, "utf-16-le"), | 
|  | 420 | # expat does not support utf-32 | 
|  | 421 | #(codecs.BOM_UTF32_BE, "utf-32-be"), | 
|  | 422 | #(codecs.BOM_UTF32_LE, "utf-32-le"), | 
|  | 423 | ): | 
|  | 424 | if not header.startswith(bom): | 
|  | 425 | continue | 
|  | 426 |  | 
|  | 427 | for start in prefixes: | 
|  | 428 | prefix = bom + start.decode('ascii').encode(encoding) | 
|  | 429 | if header[:len(prefix)] == prefix: | 
|  | 430 | return True | 
|  | 431 |  | 
|  | 432 | return False | 
|  | 433 |  | 
|  | 434 | # | 
|  | 435 | # Binary Plist | 
|  | 436 | # | 
|  | 437 |  | 
|  | 438 |  | 
|  | 439 | class InvalidFileException (ValueError): | 
|  | 440 | def __init__(self, message="Invalid file"): | 
|  | 441 | ValueError.__init__(self, message) | 
|  | 442 |  | 
|  | 443 | _BINARY_FORMAT = {1: 'B', 2: 'H', 4: 'L', 8: 'Q'} | 
|  | 444 |  | 
| Serhiy Storchaka | a897aee | 2017-11-30 23:26:11 +0200 | [diff] [blame] | 445 | _undefined = object() | 
|  | 446 |  | 
| Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 447 | class _BinaryPlistParser: | 
|  | 448 | """ | 
|  | 449 | Read or write a binary plist file, following the description of the binary | 
|  | 450 | format.  Raise InvalidFileException in case of error, otherwise return the | 
|  | 451 | root object. | 
|  | 452 |  | 
|  | 453 | see also: http://opensource.apple.com/source/CF/CF-744.18/CFBinaryPList.c | 
|  | 454 | """ | 
| Jon Janzen | ce81a92 | 2019-09-05 03:11:35 -0500 | [diff] [blame] | 455 | def __init__(self, dict_type): | 
| Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 456 | self._dict_type = dict_type | 
|  | 457 |  | 
|  | 458 | def parse(self, fp): | 
|  | 459 | try: | 
|  | 460 | # The basic file format: | 
|  | 461 | # HEADER | 
|  | 462 | # object... | 
|  | 463 | # refid->offset... | 
|  | 464 | # TRAILER | 
|  | 465 | self._fp = fp | 
|  | 466 | self._fp.seek(-32, os.SEEK_END) | 
|  | 467 | trailer = self._fp.read(32) | 
|  | 468 | if len(trailer) != 32: | 
|  | 469 | raise InvalidFileException() | 
|  | 470 | ( | 
|  | 471 | offset_size, self._ref_size, num_objects, top_object, | 
|  | 472 | offset_table_offset | 
|  | 473 | ) = struct.unpack('>6xBBQQQ', trailer) | 
|  | 474 | self._fp.seek(offset_table_offset) | 
| Serhiy Storchaka | 0652664 | 2014-05-23 16:13:33 +0300 | [diff] [blame] | 475 | self._object_offsets = self._read_ints(num_objects, offset_size) | 
| Serhiy Storchaka | a897aee | 2017-11-30 23:26:11 +0200 | [diff] [blame] | 476 | self._objects = [_undefined] * num_objects | 
|  | 477 | return self._read_object(top_object) | 
| Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 478 |  | 
| Serhiy Storchaka | db91e0f | 2017-10-31 14:05:53 +0200 | [diff] [blame] | 479 | except (OSError, IndexError, struct.error, OverflowError, | 
| Serhiy Storchaka | 34637a0 | 2020-11-02 23:01:40 +0200 | [diff] [blame] | 480 | ValueError): | 
| Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 481 | raise InvalidFileException() | 
|  | 482 |  | 
|  | 483 | def _get_size(self, tokenL): | 
|  | 484 | """ return the size of the next object.""" | 
|  | 485 | if tokenL == 0xF: | 
|  | 486 | m = self._fp.read(1)[0] & 0x3 | 
|  | 487 | s = 1 << m | 
|  | 488 | f = '>' + _BINARY_FORMAT[s] | 
|  | 489 | return struct.unpack(f, self._fp.read(s))[0] | 
|  | 490 |  | 
|  | 491 | return tokenL | 
|  | 492 |  | 
| Serhiy Storchaka | 0652664 | 2014-05-23 16:13:33 +0300 | [diff] [blame] | 493 | def _read_ints(self, n, size): | 
|  | 494 | data = self._fp.read(size * n) | 
|  | 495 | if size in _BINARY_FORMAT: | 
| Serhiy Storchaka | 34637a0 | 2020-11-02 23:01:40 +0200 | [diff] [blame] | 496 | return struct.unpack(f'>{n}{_BINARY_FORMAT[size]}', data) | 
| Serhiy Storchaka | 0652664 | 2014-05-23 16:13:33 +0300 | [diff] [blame] | 497 | else: | 
| Serhiy Storchaka | db91e0f | 2017-10-31 14:05:53 +0200 | [diff] [blame] | 498 | if not size or len(data) != size * n: | 
|  | 499 | raise InvalidFileException() | 
| Serhiy Storchaka | 0652664 | 2014-05-23 16:13:33 +0300 | [diff] [blame] | 500 | return tuple(int.from_bytes(data[i: i + size], 'big') | 
|  | 501 | for i in range(0, size * n, size)) | 
|  | 502 |  | 
| Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 503 | def _read_refs(self, n): | 
| Serhiy Storchaka | 0652664 | 2014-05-23 16:13:33 +0300 | [diff] [blame] | 504 | return self._read_ints(n, self._ref_size) | 
| Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 505 |  | 
| Serhiy Storchaka | a897aee | 2017-11-30 23:26:11 +0200 | [diff] [blame] | 506 | def _read_object(self, ref): | 
| Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 507 | """ | 
| Serhiy Storchaka | a897aee | 2017-11-30 23:26:11 +0200 | [diff] [blame] | 508 | read the object by reference. | 
| Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 509 |  | 
|  | 510 | May recursively read sub-objects (content of an array/dict/set) | 
|  | 511 | """ | 
| Serhiy Storchaka | a897aee | 2017-11-30 23:26:11 +0200 | [diff] [blame] | 512 | result = self._objects[ref] | 
|  | 513 | if result is not _undefined: | 
|  | 514 | return result | 
|  | 515 |  | 
|  | 516 | offset = self._object_offsets[ref] | 
| Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 517 | self._fp.seek(offset) | 
|  | 518 | token = self._fp.read(1)[0] | 
|  | 519 | tokenH, tokenL = token & 0xF0, token & 0x0F | 
|  | 520 |  | 
|  | 521 | if token == 0x00: | 
| Serhiy Storchaka | a897aee | 2017-11-30 23:26:11 +0200 | [diff] [blame] | 522 | result = None | 
| Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 523 |  | 
|  | 524 | elif token == 0x08: | 
| Serhiy Storchaka | a897aee | 2017-11-30 23:26:11 +0200 | [diff] [blame] | 525 | result = False | 
| Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 526 |  | 
|  | 527 | elif token == 0x09: | 
| Serhiy Storchaka | a897aee | 2017-11-30 23:26:11 +0200 | [diff] [blame] | 528 | result = True | 
| Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 529 |  | 
|  | 530 | # The referenced source code also mentions URL (0x0c, 0x0d) and | 
|  | 531 | # UUID (0x0e), but neither can be generated using the Cocoa libraries. | 
|  | 532 |  | 
|  | 533 | elif token == 0x0f: | 
| Serhiy Storchaka | a897aee | 2017-11-30 23:26:11 +0200 | [diff] [blame] | 534 | result = b'' | 
| Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 535 |  | 
|  | 536 | elif tokenH == 0x10:  # int | 
| Serhiy Storchaka | a897aee | 2017-11-30 23:26:11 +0200 | [diff] [blame] | 537 | result = int.from_bytes(self._fp.read(1 << tokenL), | 
|  | 538 | 'big', signed=tokenL >= 3) | 
| Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 539 |  | 
|  | 540 | elif token == 0x22: # real | 
| Serhiy Storchaka | a897aee | 2017-11-30 23:26:11 +0200 | [diff] [blame] | 541 | result = struct.unpack('>f', self._fp.read(4))[0] | 
| Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 542 |  | 
|  | 543 | elif token == 0x23: # real | 
| Serhiy Storchaka | a897aee | 2017-11-30 23:26:11 +0200 | [diff] [blame] | 544 | result = struct.unpack('>d', self._fp.read(8))[0] | 
| Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 545 |  | 
|  | 546 | elif token == 0x33:  # date | 
|  | 547 | f = struct.unpack('>d', self._fp.read(8))[0] | 
|  | 548 | # timestamp 0 of binary plists corresponds to 1/1/2001 | 
|  | 549 | # (year of Mac OS X 10.0), instead of 1/1/1970. | 
| Serhiy Storchaka | a897aee | 2017-11-30 23:26:11 +0200 | [diff] [blame] | 550 | result = (datetime.datetime(2001, 1, 1) + | 
|  | 551 | datetime.timedelta(seconds=f)) | 
| Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 552 |  | 
|  | 553 | elif tokenH == 0x40:  # data | 
|  | 554 | s = self._get_size(tokenL) | 
| Jon Janzen | ce81a92 | 2019-09-05 03:11:35 -0500 | [diff] [blame] | 555 | result = self._fp.read(s) | 
| Serhiy Storchaka | 34637a0 | 2020-11-02 23:01:40 +0200 | [diff] [blame] | 556 | if len(result) != s: | 
|  | 557 | raise InvalidFileException() | 
| Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 558 |  | 
|  | 559 | elif tokenH == 0x50:  # ascii string | 
|  | 560 | s = self._get_size(tokenL) | 
| Serhiy Storchaka | 34637a0 | 2020-11-02 23:01:40 +0200 | [diff] [blame] | 561 | data = self._fp.read(s) | 
|  | 562 | if len(data) != s: | 
|  | 563 | raise InvalidFileException() | 
|  | 564 | result = data.decode('ascii') | 
| Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 565 |  | 
|  | 566 | elif tokenH == 0x60:  # unicode string | 
| Serhiy Storchaka | 34637a0 | 2020-11-02 23:01:40 +0200 | [diff] [blame] | 567 | s = self._get_size(tokenL) * 2 | 
|  | 568 | data = self._fp.read(s) | 
|  | 569 | if len(data) != s: | 
|  | 570 | raise InvalidFileException() | 
|  | 571 | result = data.decode('utf-16be') | 
| Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 572 |  | 
| Jon Janzen | c981ad1 | 2019-05-15 22:14:38 +0200 | [diff] [blame] | 573 | elif tokenH == 0x80:  # UID | 
|  | 574 | # used by Key-Archiver plist files | 
|  | 575 | result = UID(int.from_bytes(self._fp.read(1 + tokenL), 'big')) | 
| Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 576 |  | 
|  | 577 | elif tokenH == 0xA0:  # array | 
|  | 578 | s = self._get_size(tokenL) | 
|  | 579 | obj_refs = self._read_refs(s) | 
| Serhiy Storchaka | a897aee | 2017-11-30 23:26:11 +0200 | [diff] [blame] | 580 | result = [] | 
|  | 581 | self._objects[ref] = result | 
|  | 582 | result.extend(self._read_object(x) for x in obj_refs) | 
| Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 583 |  | 
|  | 584 | # tokenH == 0xB0 is documented as 'ordset', but is not actually | 
|  | 585 | # implemented in the Apple reference code. | 
|  | 586 |  | 
|  | 587 | # tokenH == 0xC0 is documented as 'set', but sets cannot be used in | 
|  | 588 | # plists. | 
|  | 589 |  | 
|  | 590 | elif tokenH == 0xD0:  # dict | 
|  | 591 | s = self._get_size(tokenL) | 
|  | 592 | key_refs = self._read_refs(s) | 
|  | 593 | obj_refs = self._read_refs(s) | 
|  | 594 | result = self._dict_type() | 
| Serhiy Storchaka | a897aee | 2017-11-30 23:26:11 +0200 | [diff] [blame] | 595 | self._objects[ref] = result | 
| Serhiy Storchaka | 34637a0 | 2020-11-02 23:01:40 +0200 | [diff] [blame] | 596 | try: | 
|  | 597 | for k, o in zip(key_refs, obj_refs): | 
|  | 598 | result[self._read_object(k)] = self._read_object(o) | 
|  | 599 | except TypeError: | 
|  | 600 | raise InvalidFileException() | 
| Serhiy Storchaka | a897aee | 2017-11-30 23:26:11 +0200 | [diff] [blame] | 601 | else: | 
|  | 602 | raise InvalidFileException() | 
|  | 603 |  | 
|  | 604 | self._objects[ref] = result | 
|  | 605 | return result | 
| Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 606 |  | 
|  | 607 | def _count_to_size(count): | 
|  | 608 | if count < 1 << 8: | 
|  | 609 | return 1 | 
|  | 610 |  | 
|  | 611 | elif count < 1 << 16: | 
|  | 612 | return 2 | 
|  | 613 |  | 
| Serhiy Storchaka | 212d32f | 2020-11-03 16:15:56 +0200 | [diff] [blame] | 614 | elif count < 1 << 32: | 
| Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 615 | return 4 | 
|  | 616 |  | 
|  | 617 | else: | 
|  | 618 | return 8 | 
|  | 619 |  | 
| Serhiy Storchaka | a897aee | 2017-11-30 23:26:11 +0200 | [diff] [blame] | 620 | _scalars = (str, int, float, datetime.datetime, bytes) | 
|  | 621 |  | 
| Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 622 | class _BinaryPlistWriter (object): | 
|  | 623 | def __init__(self, fp, sort_keys, skipkeys): | 
|  | 624 | self._fp = fp | 
|  | 625 | self._sort_keys = sort_keys | 
|  | 626 | self._skipkeys = skipkeys | 
|  | 627 |  | 
|  | 628 | def write(self, value): | 
|  | 629 |  | 
|  | 630 | # Flattened object list: | 
|  | 631 | self._objlist = [] | 
|  | 632 |  | 
|  | 633 | # Mappings from object->objectid | 
|  | 634 | # First dict has (type(object), object) as the key, | 
|  | 635 | # second dict is used when object is not hashable and | 
|  | 636 | # has id(object) as the key. | 
|  | 637 | self._objtable = {} | 
|  | 638 | self._objidtable = {} | 
|  | 639 |  | 
|  | 640 | # Create list of all objects in the plist | 
|  | 641 | self._flatten(value) | 
|  | 642 |  | 
|  | 643 | # Size of object references in serialized containers | 
|  | 644 | # depends on the number of objects in the plist. | 
|  | 645 | num_objects = len(self._objlist) | 
|  | 646 | self._object_offsets = [0]*num_objects | 
|  | 647 | self._ref_size = _count_to_size(num_objects) | 
|  | 648 |  | 
|  | 649 | self._ref_format = _BINARY_FORMAT[self._ref_size] | 
|  | 650 |  | 
|  | 651 | # Write file header | 
|  | 652 | self._fp.write(b'bplist00') | 
|  | 653 |  | 
|  | 654 | # Write object list | 
|  | 655 | for obj in self._objlist: | 
|  | 656 | self._write_object(obj) | 
|  | 657 |  | 
|  | 658 | # Write refnum->object offset table | 
|  | 659 | top_object = self._getrefnum(value) | 
|  | 660 | offset_table_offset = self._fp.tell() | 
|  | 661 | offset_size = _count_to_size(offset_table_offset) | 
|  | 662 | offset_format = '>' + _BINARY_FORMAT[offset_size] * num_objects | 
|  | 663 | self._fp.write(struct.pack(offset_format, *self._object_offsets)) | 
|  | 664 |  | 
|  | 665 | # Write trailer | 
|  | 666 | sort_version = 0 | 
|  | 667 | trailer = ( | 
|  | 668 | sort_version, offset_size, self._ref_size, num_objects, | 
|  | 669 | top_object, offset_table_offset | 
|  | 670 | ) | 
|  | 671 | self._fp.write(struct.pack('>5xBBBQQQ', *trailer)) | 
|  | 672 |  | 
|  | 673 | def _flatten(self, value): | 
|  | 674 | # First check if the object is in the object table, not used for | 
|  | 675 | # containers to ensure that two subcontainers with the same contents | 
|  | 676 | # will be serialized as distinct values. | 
| Serhiy Storchaka | a897aee | 2017-11-30 23:26:11 +0200 | [diff] [blame] | 677 | if isinstance(value, _scalars): | 
| Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 678 | if (type(value), value) in self._objtable: | 
|  | 679 | return | 
|  | 680 |  | 
| Serhiy Storchaka | a897aee | 2017-11-30 23:26:11 +0200 | [diff] [blame] | 681 | elif id(value) in self._objidtable: | 
|  | 682 | return | 
|  | 683 |  | 
| Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 684 | # Add to objectreference map | 
|  | 685 | refnum = len(self._objlist) | 
|  | 686 | self._objlist.append(value) | 
| Serhiy Storchaka | a897aee | 2017-11-30 23:26:11 +0200 | [diff] [blame] | 687 | if isinstance(value, _scalars): | 
|  | 688 | self._objtable[(type(value), value)] = refnum | 
| Serhiy Storchaka | a897aee | 2017-11-30 23:26:11 +0200 | [diff] [blame] | 689 | else: | 
| Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 690 | self._objidtable[id(value)] = refnum | 
|  | 691 |  | 
|  | 692 | # And finally recurse into containers | 
|  | 693 | if isinstance(value, dict): | 
|  | 694 | keys = [] | 
|  | 695 | values = [] | 
|  | 696 | items = value.items() | 
|  | 697 | if self._sort_keys: | 
|  | 698 | items = sorted(items) | 
|  | 699 |  | 
|  | 700 | for k, v in items: | 
|  | 701 | if not isinstance(k, str): | 
|  | 702 | if self._skipkeys: | 
|  | 703 | continue | 
|  | 704 | raise TypeError("keys must be strings") | 
|  | 705 | keys.append(k) | 
|  | 706 | values.append(v) | 
|  | 707 |  | 
|  | 708 | for o in itertools.chain(keys, values): | 
|  | 709 | self._flatten(o) | 
|  | 710 |  | 
|  | 711 | elif isinstance(value, (list, tuple)): | 
|  | 712 | for o in value: | 
|  | 713 | self._flatten(o) | 
|  | 714 |  | 
|  | 715 | def _getrefnum(self, value): | 
| Serhiy Storchaka | a897aee | 2017-11-30 23:26:11 +0200 | [diff] [blame] | 716 | if isinstance(value, _scalars): | 
|  | 717 | return self._objtable[(type(value), value)] | 
| Serhiy Storchaka | a897aee | 2017-11-30 23:26:11 +0200 | [diff] [blame] | 718 | else: | 
| Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 719 | return self._objidtable[id(value)] | 
|  | 720 |  | 
|  | 721 | def _write_size(self, token, size): | 
|  | 722 | if size < 15: | 
|  | 723 | self._fp.write(struct.pack('>B', token | size)) | 
|  | 724 |  | 
|  | 725 | elif size < 1 << 8: | 
|  | 726 | self._fp.write(struct.pack('>BBB', token | 0xF, 0x10, size)) | 
|  | 727 |  | 
|  | 728 | elif size < 1 << 16: | 
|  | 729 | self._fp.write(struct.pack('>BBH', token | 0xF, 0x11, size)) | 
|  | 730 |  | 
|  | 731 | elif size < 1 << 32: | 
|  | 732 | self._fp.write(struct.pack('>BBL', token | 0xF, 0x12, size)) | 
|  | 733 |  | 
|  | 734 | else: | 
|  | 735 | self._fp.write(struct.pack('>BBQ', token | 0xF, 0x13, size)) | 
|  | 736 |  | 
|  | 737 | def _write_object(self, value): | 
|  | 738 | ref = self._getrefnum(value) | 
|  | 739 | self._object_offsets[ref] = self._fp.tell() | 
|  | 740 | if value is None: | 
|  | 741 | self._fp.write(b'\x00') | 
|  | 742 |  | 
|  | 743 | elif value is False: | 
|  | 744 | self._fp.write(b'\x08') | 
|  | 745 |  | 
|  | 746 | elif value is True: | 
|  | 747 | self._fp.write(b'\x09') | 
|  | 748 |  | 
|  | 749 | elif isinstance(value, int): | 
| Ronald Oussoren | 6db6653 | 2014-01-15 11:32:35 +0100 | [diff] [blame] | 750 | if value < 0: | 
|  | 751 | try: | 
|  | 752 | self._fp.write(struct.pack('>Bq', 0x13, value)) | 
|  | 753 | except struct.error: | 
| Ronald Oussoren | 94e44a9 | 2014-02-06 11:19:18 +0100 | [diff] [blame] | 754 | raise OverflowError(value) from None | 
| Ronald Oussoren | 6db6653 | 2014-01-15 11:32:35 +0100 | [diff] [blame] | 755 | elif value < 1 << 8: | 
| Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 756 | self._fp.write(struct.pack('>BB', 0x10, value)) | 
|  | 757 | elif value < 1 << 16: | 
|  | 758 | self._fp.write(struct.pack('>BH', 0x11, value)) | 
|  | 759 | elif value < 1 << 32: | 
|  | 760 | self._fp.write(struct.pack('>BL', 0x12, value)) | 
| Ronald Oussoren | 94e44a9 | 2014-02-06 11:19:18 +0100 | [diff] [blame] | 761 | elif value < 1 << 63: | 
|  | 762 | self._fp.write(struct.pack('>BQ', 0x13, value)) | 
|  | 763 | elif value < 1 << 64: | 
|  | 764 | self._fp.write(b'\x14' + value.to_bytes(16, 'big', signed=True)) | 
| Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 765 | else: | 
| Ronald Oussoren | 94e44a9 | 2014-02-06 11:19:18 +0100 | [diff] [blame] | 766 | raise OverflowError(value) | 
| Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 767 |  | 
|  | 768 | elif isinstance(value, float): | 
|  | 769 | self._fp.write(struct.pack('>Bd', 0x23, value)) | 
|  | 770 |  | 
|  | 771 | elif isinstance(value, datetime.datetime): | 
|  | 772 | f = (value - datetime.datetime(2001, 1, 1)).total_seconds() | 
|  | 773 | self._fp.write(struct.pack('>Bd', 0x33, f)) | 
|  | 774 |  | 
| Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 775 | elif isinstance(value, (bytes, bytearray)): | 
|  | 776 | self._write_size(0x40, len(value)) | 
|  | 777 | self._fp.write(value) | 
|  | 778 |  | 
|  | 779 | elif isinstance(value, str): | 
|  | 780 | try: | 
|  | 781 | t = value.encode('ascii') | 
|  | 782 | self._write_size(0x50, len(value)) | 
|  | 783 | except UnicodeEncodeError: | 
|  | 784 | t = value.encode('utf-16be') | 
| Serhiy Storchaka | 7338ebc | 2016-10-04 20:04:30 +0300 | [diff] [blame] | 785 | self._write_size(0x60, len(t) // 2) | 
| Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 786 |  | 
|  | 787 | self._fp.write(t) | 
|  | 788 |  | 
| Jon Janzen | c981ad1 | 2019-05-15 22:14:38 +0200 | [diff] [blame] | 789 | elif isinstance(value, UID): | 
|  | 790 | if value.data < 0: | 
|  | 791 | raise ValueError("UIDs must be positive") | 
|  | 792 | elif value.data < 1 << 8: | 
|  | 793 | self._fp.write(struct.pack('>BB', 0x80, value)) | 
|  | 794 | elif value.data < 1 << 16: | 
|  | 795 | self._fp.write(struct.pack('>BH', 0x81, value)) | 
|  | 796 | elif value.data < 1 << 32: | 
|  | 797 | self._fp.write(struct.pack('>BL', 0x83, value)) | 
|  | 798 | elif value.data < 1 << 64: | 
|  | 799 | self._fp.write(struct.pack('>BQ', 0x87, value)) | 
|  | 800 | else: | 
|  | 801 | raise OverflowError(value) | 
|  | 802 |  | 
| Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 803 | elif isinstance(value, (list, tuple)): | 
|  | 804 | refs = [self._getrefnum(o) for o in value] | 
|  | 805 | s = len(refs) | 
|  | 806 | self._write_size(0xA0, s) | 
|  | 807 | self._fp.write(struct.pack('>' + self._ref_format * s, *refs)) | 
|  | 808 |  | 
|  | 809 | elif isinstance(value, dict): | 
|  | 810 | keyRefs, valRefs = [], [] | 
|  | 811 |  | 
|  | 812 | if self._sort_keys: | 
|  | 813 | rootItems = sorted(value.items()) | 
|  | 814 | else: | 
|  | 815 | rootItems = value.items() | 
|  | 816 |  | 
|  | 817 | for k, v in rootItems: | 
|  | 818 | if not isinstance(k, str): | 
|  | 819 | if self._skipkeys: | 
|  | 820 | continue | 
|  | 821 | raise TypeError("keys must be strings") | 
|  | 822 | keyRefs.append(self._getrefnum(k)) | 
|  | 823 | valRefs.append(self._getrefnum(v)) | 
|  | 824 |  | 
|  | 825 | s = len(keyRefs) | 
|  | 826 | self._write_size(0xD0, s) | 
|  | 827 | self._fp.write(struct.pack('>' + self._ref_format * s, *keyRefs)) | 
|  | 828 | self._fp.write(struct.pack('>' + self._ref_format * s, *valRefs)) | 
|  | 829 |  | 
|  | 830 | else: | 
| Ronald Oussoren | 6db6653 | 2014-01-15 11:32:35 +0100 | [diff] [blame] | 831 | raise TypeError(value) | 
| Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 832 |  | 
|  | 833 |  | 
|  | 834 | def _is_fmt_binary(header): | 
|  | 835 | return header[:8] == b'bplist00' | 
|  | 836 |  | 
|  | 837 |  | 
|  | 838 | # | 
|  | 839 | # Generic bits | 
|  | 840 | # | 
|  | 841 |  | 
|  | 842 | _FORMATS={ | 
|  | 843 | FMT_XML: dict( | 
|  | 844 | detect=_is_fmt_xml, | 
|  | 845 | parser=_PlistParser, | 
|  | 846 | writer=_PlistWriter, | 
|  | 847 | ), | 
|  | 848 | FMT_BINARY: dict( | 
|  | 849 | detect=_is_fmt_binary, | 
|  | 850 | parser=_BinaryPlistParser, | 
|  | 851 | writer=_BinaryPlistWriter, | 
|  | 852 | ) | 
|  | 853 | } | 
|  | 854 |  | 
|  | 855 |  | 
| Jon Janzen | ce81a92 | 2019-09-05 03:11:35 -0500 | [diff] [blame] | 856 | def load(fp, *, fmt=None, dict_type=dict): | 
| Collin Styles | 0d4f435 | 2019-07-14 02:01:48 -0700 | [diff] [blame] | 857 | """Read a .plist file. 'fp' should be a readable and binary file object. | 
| Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 858 | Return the unpacked root object (which usually is a dictionary). | 
|  | 859 | """ | 
|  | 860 | if fmt is None: | 
|  | 861 | header = fp.read(32) | 
|  | 862 | fp.seek(0) | 
|  | 863 | for info in _FORMATS.values(): | 
|  | 864 | if info['detect'](header): | 
| Serhiy Storchaka | 8966759 | 2014-07-23 18:49:31 +0300 | [diff] [blame] | 865 | P = info['parser'] | 
| Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 866 | break | 
|  | 867 |  | 
|  | 868 | else: | 
|  | 869 | raise InvalidFileException() | 
|  | 870 |  | 
|  | 871 | else: | 
| Serhiy Storchaka | 8966759 | 2014-07-23 18:49:31 +0300 | [diff] [blame] | 872 | P = _FORMATS[fmt]['parser'] | 
| Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 873 |  | 
| Jon Janzen | ce81a92 | 2019-09-05 03:11:35 -0500 | [diff] [blame] | 874 | p = P(dict_type=dict_type) | 
| Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 875 | return p.parse(fp) | 
|  | 876 |  | 
|  | 877 |  | 
| Jon Janzen | ce81a92 | 2019-09-05 03:11:35 -0500 | [diff] [blame] | 878 | def loads(value, *, fmt=None, dict_type=dict): | 
| Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 879 | """Read a .plist file from a bytes object. | 
|  | 880 | Return the unpacked root object (which usually is a dictionary). | 
|  | 881 | """ | 
|  | 882 | fp = BytesIO(value) | 
| Jon Janzen | ce81a92 | 2019-09-05 03:11:35 -0500 | [diff] [blame] | 883 | return load(fp, fmt=fmt, dict_type=dict_type) | 
| Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 884 |  | 
|  | 885 |  | 
|  | 886 | def dump(value, fp, *, fmt=FMT_XML, sort_keys=True, skipkeys=False): | 
| Collin Styles | 0d4f435 | 2019-07-14 02:01:48 -0700 | [diff] [blame] | 887 | """Write 'value' to a .plist file. 'fp' should be a writable, | 
|  | 888 | binary file object. | 
| Ronald Oussoren | c5cf797 | 2013-11-21 15:46:49 +0100 | [diff] [blame] | 889 | """ | 
|  | 890 | if fmt not in _FORMATS: | 
|  | 891 | raise ValueError("Unsupported format: %r"%(fmt,)) | 
|  | 892 |  | 
|  | 893 | writer = _FORMATS[fmt]["writer"](fp, sort_keys=sort_keys, skipkeys=skipkeys) | 
|  | 894 | writer.write(value) | 
|  | 895 |  | 
|  | 896 |  | 
|  | 897 | def dumps(value, *, fmt=FMT_XML, skipkeys=False, sort_keys=True): | 
|  | 898 | """Return a bytes object with the contents for a .plist file. | 
|  | 899 | """ | 
|  | 900 | fp = BytesIO() | 
|  | 901 | dump(value, fp, fmt=fmt, skipkeys=skipkeys, sort_keys=sort_keys) | 
|  | 902 | return fp.getvalue() |