| Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 1 | """plistlib.py -- a tool to generate and parse MacOSX .plist files. | 
 | 2 |  | 
 | 3 | The PropertList (.plist) file format is a simple XML pickle supporting | 
 | 4 | basic object types, like dictionaries, lists, numbers and strings. | 
 | 5 | Usually the top level object is a dictionary. | 
 | 6 |  | 
 | 7 | To write out a plist file, use the writePlist(rootObject, pathOrFile) | 
 | 8 | function. 'rootObject' is the top level object, 'pathOrFile' is a | 
 | 9 | filename or a (writable) file object. | 
 | 10 |  | 
 | 11 | To parse a plist from a file, use the readPlist(pathOrFile) function, | 
 | 12 | with a file name or a (readable) file object as the only argument. It | 
 | 13 | returns the top level object (again, usually a dictionary). | 
 | 14 |  | 
 | 15 | To work with plist data in bytes objects, you can use readPlistFromBytes() | 
 | 16 | and writePlistToBytes(). | 
 | 17 |  | 
 | 18 | Values can be strings, integers, floats, booleans, tuples, lists, | 
 | 19 | dictionaries, Data or datetime.datetime objects. String values (including | 
 | 20 | dictionary keys) may be unicode strings -- they will be written out as | 
 | 21 | UTF-8. | 
 | 22 |  | 
 | 23 | The <data> plist type is supported through the Data class. This is a | 
 | 24 | thin wrapper around a Python bytes object. | 
 | 25 |  | 
 | 26 | Generate Plist example: | 
 | 27 |  | 
 | 28 |     pl = dict( | 
 | 29 |         aString="Doodah", | 
 | 30 |         aList=["A", "B", 12, 32.1, [1, 2, 3]], | 
 | 31 |         aFloat = 0.1, | 
 | 32 |         anInt = 728, | 
 | 33 |         aDict=dict( | 
 | 34 |             anotherString="<hello & hi there!>", | 
 | 35 |             aUnicodeValue=u'M\xe4ssig, Ma\xdf', | 
 | 36 |             aTrueValue=True, | 
 | 37 |             aFalseValue=False, | 
 | 38 |         ), | 
 | 39 |         someData = Data(b"<binary gunk>"), | 
 | 40 |         someMoreData = Data(b"<lots of binary gunk>" * 10), | 
 | 41 |         aDate = datetime.datetime.fromtimestamp(time.mktime(time.gmtime())), | 
 | 42 |     ) | 
 | 43 |     # unicode keys are possible, but a little awkward to use: | 
 | 44 |     pl[u'\xc5benraa'] = "That was a unicode key." | 
 | 45 |     writePlist(pl, fileName) | 
 | 46 |  | 
 | 47 | Parse Plist example: | 
 | 48 |  | 
 | 49 |     pl = readPlist(pathOrFile) | 
 | 50 |     print pl["aKey"] | 
 | 51 | """ | 
 | 52 |  | 
 | 53 |  | 
 | 54 | __all__ = [ | 
 | 55 |     "readPlist", "writePlist", "readPlistFromBytes", "writePlistToBytes", | 
| Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 56 |     "Plist", "Data", "Dict" | 
 | 57 | ] | 
 | 58 | # Note: the Plist and Dict classes have been deprecated. | 
 | 59 |  | 
 | 60 | import binascii | 
 | 61 | import datetime | 
 | 62 | from io import BytesIO | 
 | 63 | import re | 
 | 64 |  | 
 | 65 |  | 
 | 66 | def readPlist(pathOrFile): | 
 | 67 |     """Read a .plist file. 'pathOrFile' may either be a file name or a | 
 | 68 |     (readable) file object. Return the unpacked root object (which | 
 | 69 |     usually is a dictionary). | 
 | 70 |     """ | 
 | 71 |     didOpen = False | 
 | 72 |     if isinstance(pathOrFile, str): | 
 | 73 |         pathOrFile = open(pathOrFile, 'rb') | 
 | 74 |         didOpen = True | 
 | 75 |     p = PlistParser() | 
 | 76 |     rootObject = p.parse(pathOrFile) | 
 | 77 |     if didOpen: | 
 | 78 |         pathOrFile.close() | 
 | 79 |     return rootObject | 
 | 80 |  | 
 | 81 |  | 
 | 82 | def writePlist(rootObject, pathOrFile): | 
 | 83 |     """Write 'rootObject' to a .plist file. 'pathOrFile' may either be a | 
 | 84 |     file name or a (writable) file object. | 
 | 85 |     """ | 
 | 86 |     didOpen = False | 
 | 87 |     if isinstance(pathOrFile, str): | 
 | 88 |         pathOrFile = open(pathOrFile, 'wb') | 
 | 89 |         didOpen = True | 
 | 90 |     writer = PlistWriter(pathOrFile) | 
 | 91 |     writer.writeln("<plist version=\"1.0\">") | 
 | 92 |     writer.writeValue(rootObject) | 
 | 93 |     writer.writeln("</plist>") | 
 | 94 |     if didOpen: | 
 | 95 |         pathOrFile.close() | 
 | 96 |  | 
 | 97 |  | 
 | 98 | def readPlistFromBytes(data): | 
 | 99 |     """Read a plist data from a bytes object. Return the root object. | 
 | 100 |     """ | 
 | 101 |     return readPlist(BytesIO(data)) | 
 | 102 |  | 
 | 103 |  | 
 | 104 | def writePlistToBytes(rootObject): | 
 | 105 |     """Return 'rootObject' as a plist-formatted bytes object. | 
 | 106 |     """ | 
 | 107 |     f = BytesIO() | 
 | 108 |     writePlist(rootObject, f) | 
 | 109 |     return f.getvalue() | 
 | 110 |  | 
 | 111 |  | 
| Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 112 | class DumbXMLWriter: | 
 | 113 |     def __init__(self, file, indentLevel=0, indent="\t"): | 
 | 114 |         self.file = file | 
 | 115 |         self.stack = [] | 
 | 116 |         self.indentLevel = indentLevel | 
 | 117 |         self.indent = indent | 
 | 118 |  | 
 | 119 |     def beginElement(self, element): | 
 | 120 |         self.stack.append(element) | 
 | 121 |         self.writeln("<%s>" % element) | 
 | 122 |         self.indentLevel += 1 | 
 | 123 |  | 
 | 124 |     def endElement(self, element): | 
 | 125 |         assert self.indentLevel > 0 | 
 | 126 |         assert self.stack.pop() == element | 
 | 127 |         self.indentLevel -= 1 | 
 | 128 |         self.writeln("</%s>" % element) | 
 | 129 |  | 
 | 130 |     def simpleElement(self, element, value=None): | 
 | 131 |         if value is not None: | 
 | 132 |             value = _escape(value) | 
 | 133 |             self.writeln("<%s>%s</%s>" % (element, value, element)) | 
 | 134 |         else: | 
 | 135 |             self.writeln("<%s/>" % element) | 
 | 136 |  | 
 | 137 |     def writeln(self, line): | 
 | 138 |         if line: | 
 | 139 |             # plist has fixed encoding of utf-8 | 
 | 140 |             if isinstance(line, str): | 
 | 141 |                 line = line.encode('utf-8') | 
 | 142 |             self.file.write(self.indentLevel * self.indent) | 
 | 143 |             self.file.write(line) | 
 | 144 |         self.file.write(b'\n') | 
 | 145 |  | 
 | 146 |  | 
 | 147 | # Contents should conform to a subset of ISO 8601 | 
 | 148 | # (in particular, YYYY '-' MM '-' DD 'T' HH ':' MM ':' SS 'Z'.  Smaller units may be omitted with | 
 | 149 | #  a loss of precision) | 
| Antoine Pitrou | fd03645 | 2008-08-19 17:56:33 +0000 | [diff] [blame] | 150 | _dateParser = re.compile(r"(?P<year>\d\d\d\d)(?:-(?P<month>\d\d)(?:-(?P<day>\d\d)(?:T(?P<hour>\d\d)(?::(?P<minute>\d\d)(?::(?P<second>\d\d))?)?)?)?)?Z", re.ASCII) | 
| Christian Heimes | 7e18254 | 2008-01-27 15:20:13 +0000 | [diff] [blame] | 151 |  | 
 | 152 | def _dateFromString(s): | 
 | 153 |     order = ('year', 'month', 'day', 'hour', 'minute', 'second') | 
 | 154 |     gd = _dateParser.match(s).groupdict() | 
 | 155 |     lst = [] | 
 | 156 |     for key in order: | 
 | 157 |         val = gd[key] | 
 | 158 |         if val is None: | 
 | 159 |             break | 
 | 160 |         lst.append(int(val)) | 
 | 161 |     return datetime.datetime(*lst) | 
 | 162 |  | 
 | 163 | def _dateToString(d): | 
 | 164 |     return '%04d-%02d-%02dT%02d:%02d:%02dZ' % ( | 
 | 165 |         d.year, d.month, d.day, | 
 | 166 |         d.hour, d.minute, d.second | 
 | 167 |     ) | 
 | 168 |  | 
 | 169 |  | 
 | 170 | # Regex to find any control chars, except for \t \n and \r | 
 | 171 | _controlCharPat = re.compile( | 
 | 172 |     r"[\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0b\x0c\x0e\x0f" | 
 | 173 |     r"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f]") | 
 | 174 |  | 
 | 175 | def _escape(text): | 
 | 176 |     m = _controlCharPat.search(text) | 
 | 177 |     if m is not None: | 
 | 178 |         raise ValueError("strings can't contains control characters; " | 
 | 179 |                          "use plistlib.Data instead") | 
 | 180 |     text = text.replace("\r\n", "\n")       # convert DOS line endings | 
 | 181 |     text = text.replace("\r", "\n")         # convert Mac line endings | 
 | 182 |     text = text.replace("&", "&")       # escape '&' | 
 | 183 |     text = text.replace("<", "<")        # escape '<' | 
 | 184 |     text = text.replace(">", ">")        # escape '>' | 
 | 185 |     return text | 
 | 186 |  | 
 | 187 |  | 
 | 188 | PLISTHEADER = b"""\ | 
 | 189 | <?xml version="1.0" encoding="UTF-8"?> | 
 | 190 | <!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd"> | 
 | 191 | """ | 
 | 192 |  | 
 | 193 | class PlistWriter(DumbXMLWriter): | 
 | 194 |  | 
 | 195 |     def __init__(self, file, indentLevel=0, indent=b"\t", writeHeader=1): | 
 | 196 |         if writeHeader: | 
 | 197 |             file.write(PLISTHEADER) | 
 | 198 |         DumbXMLWriter.__init__(self, file, indentLevel, indent) | 
 | 199 |  | 
 | 200 |     def writeValue(self, value): | 
 | 201 |         if isinstance(value, str): | 
 | 202 |             self.simpleElement("string", value) | 
 | 203 |         elif isinstance(value, bool): | 
 | 204 |             # must switch for bool before int, as bool is a | 
 | 205 |             # subclass of int... | 
 | 206 |             if value: | 
 | 207 |                 self.simpleElement("true") | 
 | 208 |             else: | 
 | 209 |                 self.simpleElement("false") | 
 | 210 |         elif isinstance(value, int): | 
 | 211 |             self.simpleElement("integer", "%d" % value) | 
 | 212 |         elif isinstance(value, float): | 
 | 213 |             self.simpleElement("real", repr(value)) | 
 | 214 |         elif isinstance(value, dict): | 
 | 215 |             self.writeDict(value) | 
 | 216 |         elif isinstance(value, Data): | 
 | 217 |             self.writeData(value) | 
 | 218 |         elif isinstance(value, datetime.datetime): | 
 | 219 |             self.simpleElement("date", _dateToString(value)) | 
 | 220 |         elif isinstance(value, (tuple, list)): | 
 | 221 |             self.writeArray(value) | 
 | 222 |         else: | 
 | 223 |             raise TypeError("unsuported type: %s" % type(value)) | 
 | 224 |  | 
 | 225 |     def writeData(self, data): | 
 | 226 |         self.beginElement("data") | 
 | 227 |         self.indentLevel -= 1 | 
 | 228 |         maxlinelength = 76 - len(self.indent.replace(b"\t", b" " * 8) * | 
 | 229 |                                  self.indentLevel) | 
 | 230 |         for line in data.asBase64(maxlinelength).split(b"\n"): | 
 | 231 |             if line: | 
 | 232 |                 self.writeln(line) | 
 | 233 |         self.indentLevel += 1 | 
 | 234 |         self.endElement("data") | 
 | 235 |  | 
 | 236 |     def writeDict(self, d): | 
 | 237 |         self.beginElement("dict") | 
 | 238 |         items = sorted(d.items()) | 
 | 239 |         for key, value in items: | 
 | 240 |             if not isinstance(key, str): | 
 | 241 |                 raise TypeError("keys must be strings") | 
 | 242 |             self.simpleElement("key", key) | 
 | 243 |             self.writeValue(value) | 
 | 244 |         self.endElement("dict") | 
 | 245 |  | 
 | 246 |     def writeArray(self, array): | 
 | 247 |         self.beginElement("array") | 
 | 248 |         for value in array: | 
 | 249 |             self.writeValue(value) | 
 | 250 |         self.endElement("array") | 
 | 251 |  | 
 | 252 |  | 
 | 253 | class _InternalDict(dict): | 
 | 254 |  | 
 | 255 |     # This class is needed while Dict is scheduled for deprecation: | 
 | 256 |     # we only need to warn when a *user* instantiates Dict or when | 
 | 257 |     # the "attribute notation for dict keys" is used. | 
 | 258 |  | 
 | 259 |     def __getattr__(self, attr): | 
 | 260 |         try: | 
 | 261 |             value = self[attr] | 
 | 262 |         except KeyError: | 
 | 263 |             raise AttributeError(attr) | 
 | 264 |         from warnings import warn | 
 | 265 |         warn("Attribute access from plist dicts is deprecated, use d[key] " | 
 | 266 |              "notation instead", PendingDeprecationWarning) | 
 | 267 |         return value | 
 | 268 |  | 
 | 269 |     def __setattr__(self, attr, value): | 
 | 270 |         from warnings import warn | 
 | 271 |         warn("Attribute access from plist dicts is deprecated, use d[key] " | 
 | 272 |              "notation instead", PendingDeprecationWarning) | 
 | 273 |         self[attr] = value | 
 | 274 |  | 
 | 275 |     def __delattr__(self, attr): | 
 | 276 |         try: | 
 | 277 |             del self[attr] | 
 | 278 |         except KeyError: | 
 | 279 |             raise AttributeError(attr) | 
 | 280 |         from warnings import warn | 
 | 281 |         warn("Attribute access from plist dicts is deprecated, use d[key] " | 
 | 282 |              "notation instead", PendingDeprecationWarning) | 
 | 283 |  | 
 | 284 | class Dict(_InternalDict): | 
 | 285 |  | 
 | 286 |     def __init__(self, **kwargs): | 
 | 287 |         from warnings import warn | 
 | 288 |         warn("The plistlib.Dict class is deprecated, use builtin dict instead", | 
 | 289 |              PendingDeprecationWarning) | 
 | 290 |         super().__init__(**kwargs) | 
 | 291 |  | 
 | 292 |  | 
 | 293 | class Plist(_InternalDict): | 
 | 294 |  | 
 | 295 |     """This class has been deprecated. Use readPlist() and writePlist() | 
 | 296 |     functions instead, together with regular dict objects. | 
 | 297 |     """ | 
 | 298 |  | 
 | 299 |     def __init__(self, **kwargs): | 
 | 300 |         from warnings import warn | 
 | 301 |         warn("The Plist class is deprecated, use the readPlist() and " | 
 | 302 |              "writePlist() functions instead", PendingDeprecationWarning) | 
 | 303 |         super().__init__(**kwargs) | 
 | 304 |  | 
 | 305 |     def fromFile(cls, pathOrFile): | 
 | 306 |         """Deprecated. Use the readPlist() function instead.""" | 
 | 307 |         rootObject = readPlist(pathOrFile) | 
 | 308 |         plist = cls() | 
 | 309 |         plist.update(rootObject) | 
 | 310 |         return plist | 
 | 311 |     fromFile = classmethod(fromFile) | 
 | 312 |  | 
 | 313 |     def write(self, pathOrFile): | 
 | 314 |         """Deprecated. Use the writePlist() function instead.""" | 
 | 315 |         writePlist(self, pathOrFile) | 
 | 316 |  | 
 | 317 |  | 
 | 318 | def _encodeBase64(s, maxlinelength=76): | 
 | 319 |     # copied from base64.encodestring(), with added maxlinelength argument | 
 | 320 |     maxbinsize = (maxlinelength//4)*3 | 
 | 321 |     pieces = [] | 
 | 322 |     for i in range(0, len(s), maxbinsize): | 
 | 323 |         chunk = s[i : i + maxbinsize] | 
 | 324 |         pieces.append(binascii.b2a_base64(chunk)) | 
 | 325 |     return b''.join(pieces) | 
 | 326 |  | 
 | 327 | class Data: | 
 | 328 |  | 
 | 329 |     """Wrapper for binary data.""" | 
 | 330 |  | 
 | 331 |     def __init__(self, data): | 
 | 332 |         if not isinstance(data, bytes): | 
 | 333 |             raise TypeError("data must be as bytes") | 
 | 334 |         self.data = data | 
 | 335 |  | 
 | 336 |     @classmethod | 
 | 337 |     def fromBase64(cls, data): | 
 | 338 |         # base64.decodestring just calls binascii.a2b_base64; | 
 | 339 |         # it seems overkill to use both base64 and binascii. | 
 | 340 |         return cls(binascii.a2b_base64(data)) | 
 | 341 |  | 
 | 342 |     def asBase64(self, maxlinelength=76): | 
 | 343 |         return _encodeBase64(self.data, maxlinelength) | 
 | 344 |  | 
 | 345 |     def __eq__(self, other): | 
 | 346 |         if isinstance(other, self.__class__): | 
 | 347 |             return self.data == other.data | 
 | 348 |         elif isinstance(other, str): | 
 | 349 |             return self.data == other | 
 | 350 |         else: | 
 | 351 |             return id(self) == id(other) | 
 | 352 |  | 
 | 353 |     def __repr__(self): | 
 | 354 |         return "%s(%s)" % (self.__class__.__name__, repr(self.data)) | 
 | 355 |  | 
 | 356 |  | 
 | 357 | class PlistParser: | 
 | 358 |  | 
 | 359 |     def __init__(self): | 
 | 360 |         self.stack = [] | 
 | 361 |         self.currentKey = None | 
 | 362 |         self.root = None | 
 | 363 |  | 
 | 364 |     def parse(self, fileobj): | 
 | 365 |         from xml.parsers.expat import ParserCreate | 
 | 366 |         parser = ParserCreate() | 
 | 367 |         parser.StartElementHandler = self.handleBeginElement | 
 | 368 |         parser.EndElementHandler = self.handleEndElement | 
 | 369 |         parser.CharacterDataHandler = self.handleData | 
 | 370 |         parser.ParseFile(fileobj) | 
 | 371 |         return self.root | 
 | 372 |  | 
 | 373 |     def handleBeginElement(self, element, attrs): | 
 | 374 |         self.data = [] | 
 | 375 |         handler = getattr(self, "begin_" + element, None) | 
 | 376 |         if handler is not None: | 
 | 377 |             handler(attrs) | 
 | 378 |  | 
 | 379 |     def handleEndElement(self, element): | 
 | 380 |         handler = getattr(self, "end_" + element, None) | 
 | 381 |         if handler is not None: | 
 | 382 |             handler() | 
 | 383 |  | 
 | 384 |     def handleData(self, data): | 
 | 385 |         self.data.append(data) | 
 | 386 |  | 
 | 387 |     def addObject(self, value): | 
 | 388 |         if self.currentKey is not None: | 
 | 389 |             self.stack[-1][self.currentKey] = value | 
 | 390 |             self.currentKey = None | 
 | 391 |         elif not self.stack: | 
 | 392 |             # this is the root object | 
 | 393 |             self.root = value | 
 | 394 |         else: | 
 | 395 |             self.stack[-1].append(value) | 
 | 396 |  | 
 | 397 |     def getData(self): | 
 | 398 |         data = ''.join(self.data) | 
 | 399 |         self.data = [] | 
 | 400 |         return data | 
 | 401 |  | 
 | 402 |     # element handlers | 
 | 403 |  | 
 | 404 |     def begin_dict(self, attrs): | 
 | 405 |         d = _InternalDict() | 
 | 406 |         self.addObject(d) | 
 | 407 |         self.stack.append(d) | 
 | 408 |     def end_dict(self): | 
 | 409 |         self.stack.pop() | 
 | 410 |  | 
 | 411 |     def end_key(self): | 
 | 412 |         self.currentKey = self.getData() | 
 | 413 |  | 
 | 414 |     def begin_array(self, attrs): | 
 | 415 |         a = [] | 
 | 416 |         self.addObject(a) | 
 | 417 |         self.stack.append(a) | 
 | 418 |     def end_array(self): | 
 | 419 |         self.stack.pop() | 
 | 420 |  | 
 | 421 |     def end_true(self): | 
 | 422 |         self.addObject(True) | 
 | 423 |     def end_false(self): | 
 | 424 |         self.addObject(False) | 
 | 425 |     def end_integer(self): | 
 | 426 |         self.addObject(int(self.getData())) | 
 | 427 |     def end_real(self): | 
 | 428 |         self.addObject(float(self.getData())) | 
 | 429 |     def end_string(self): | 
 | 430 |         self.addObject(self.getData()) | 
 | 431 |     def end_data(self): | 
 | 432 |         self.addObject(Data.fromBase64(self.getData().encode("utf-8"))) | 
 | 433 |     def end_date(self): | 
 | 434 |         self.addObject(_dateFromString(self.getData())) |