| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 1 | """Mozilla / Netscape cookie loading / saving.""" | 
 | 2 |  | 
 | 3 | import re, time, logging | 
 | 4 |  | 
| Tim Peters | 3b2cdad | 2004-09-01 13:10:32 +0000 | [diff] [blame] | 5 | from cookielib import (reraise_unmasked_exceptions, FileCookieJar, Cookie, | 
| Andrew M. Kuchling | 33ad28b | 2004-08-31 11:38:12 +0000 | [diff] [blame] | 6 |      MISSING_FILENAME_TEXT) | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 7 |  | 
 | 8 | class MozillaCookieJar(FileCookieJar): | 
 | 9 |     """ | 
 | 10 |  | 
 | 11 |     WARNING: you may want to backup your browser's cookies file if you use | 
 | 12 |     this class to save cookies.  I *think* it works, but there have been | 
 | 13 |     bugs in the past! | 
 | 14 |  | 
 | 15 |     This class differs from CookieJar only in the format it uses to save and | 
 | 16 |     load cookies to and from a file.  This class uses the Mozilla/Netscape | 
 | 17 |     `cookies.txt' format.  lynx uses this file format, too. | 
 | 18 |  | 
 | 19 |     Don't expect cookies saved while the browser is running to be noticed by | 
 | 20 |     the browser (in fact, Mozilla on unix will overwrite your saved cookies if | 
 | 21 |     you change them on disk while it's running; on Windows, you probably can't | 
 | 22 |     save at all while the browser is running). | 
 | 23 |  | 
 | 24 |     Note that the Mozilla/Netscape format will downgrade RFC2965 cookies to | 
 | 25 |     Netscape cookies on saving. | 
 | 26 |  | 
 | 27 |     In particular, the cookie version and port number information is lost, | 
 | 28 |     together with information about whether or not Path, Port and Discard were | 
 | 29 |     specified by the Set-Cookie2 (or Set-Cookie) header, and whether or not the | 
 | 30 |     domain as set in the HTTP header started with a dot (yes, I'm aware some | 
 | 31 |     domains in Netscape files start with a dot and some don't -- trust me, you | 
 | 32 |     really don't want to know any more about this). | 
 | 33 |  | 
 | 34 |     Note that though Mozilla and Netscape use the same format, they use | 
 | 35 |     slightly different headers.  The class saves cookies using the Netscape | 
 | 36 |     header by default (Mozilla can cope with that). | 
 | 37 |  | 
 | 38 |     """ | 
 | 39 |     magic_re = "#( Netscape)? HTTP Cookie File" | 
 | 40 |     header = """\ | 
 | 41 |     # Netscape HTTP Cookie File | 
 | 42 |     # http://www.netscape.com/newsref/std/cookie_spec.html | 
 | 43 |     # This is a generated file!  Do not edit. | 
 | 44 |  | 
 | 45 | """ | 
 | 46 |  | 
 | 47 |     def _really_load(self, f, filename, ignore_discard, ignore_expires): | 
 | 48 |         now = time.time() | 
 | 49 |  | 
 | 50 |         magic = f.readline() | 
 | 51 |         if not re.search(self.magic_re, magic): | 
 | 52 |             f.close() | 
 | 53 |             raise IOError( | 
 | 54 |                 "%s does not look like a Netscape format cookies file" % | 
 | 55 |                 filename) | 
 | 56 |  | 
 | 57 |         try: | 
 | 58 |             while 1: | 
 | 59 |                 line = f.readline() | 
 | 60 |                 if line == "": break | 
 | 61 |  | 
 | 62 |                 # last field may be absent, so keep any trailing tab | 
 | 63 |                 if line.endswith("\n"): line = line[:-1] | 
 | 64 |  | 
 | 65 |                 # skip comments and blank lines XXX what is $ for? | 
 | 66 |                 if (line.strip().startswith("#") or | 
 | 67 |                     line.strip().startswith("$") or | 
 | 68 |                     line.strip() == ""): | 
 | 69 |                     continue | 
 | 70 |  | 
 | 71 |                 domain, domain_specified, path, secure, expires, name, value = \ | 
 | 72 |                         line.split("\t") | 
 | 73 |                 secure = (secure == "TRUE") | 
 | 74 |                 domain_specified = (domain_specified == "TRUE") | 
 | 75 |                 if name == "": | 
 | 76 |                     name = value | 
 | 77 |                     value = None | 
 | 78 |  | 
 | 79 |                 initial_dot = domain.startswith(".") | 
 | 80 |                 assert domain_specified == initial_dot | 
 | 81 |  | 
 | 82 |                 discard = False | 
 | 83 |                 if expires == "": | 
 | 84 |                     expires = None | 
 | 85 |                     discard = True | 
 | 86 |  | 
 | 87 |                 # assume path_specified is false | 
 | 88 |                 c = Cookie(0, name, value, | 
 | 89 |                            None, False, | 
 | 90 |                            domain, domain_specified, initial_dot, | 
 | 91 |                            path, False, | 
 | 92 |                            secure, | 
 | 93 |                            expires, | 
 | 94 |                            discard, | 
 | 95 |                            None, | 
 | 96 |                            None, | 
 | 97 |                            {}) | 
 | 98 |                 if not ignore_discard and c.discard: | 
 | 99 |                     continue | 
 | 100 |                 if not ignore_expires and c.is_expired(now): | 
 | 101 |                     continue | 
 | 102 |                 self.set_cookie(c) | 
 | 103 |  | 
 | 104 |         except: | 
 | 105 |             reraise_unmasked_exceptions((IOError,)) | 
 | 106 |             raise IOError("invalid Netscape format file %s: %s" % | 
 | 107 |                           (filename, line)) | 
 | 108 |  | 
 | 109 |     def save(self, filename=None, ignore_discard=False, ignore_expires=False): | 
 | 110 |         if filename is None: | 
 | 111 |             if self.filename is not None: filename = self.filename | 
 | 112 |             else: raise ValueError(MISSING_FILENAME_TEXT) | 
 | 113 |  | 
 | 114 |         f = open(filename, "w") | 
 | 115 |         try: | 
 | 116 |             f.write(self.header) | 
 | 117 |             now = time.time() | 
 | 118 |             for cookie in self: | 
 | 119 |                 if not ignore_discard and cookie.discard: | 
 | 120 |                     continue | 
 | 121 |                 if not ignore_expires and cookie.is_expired(now): | 
 | 122 |                     continue | 
 | 123 |                 if cookie.secure: secure = "TRUE" | 
 | 124 |                 else: secure = "FALSE" | 
 | 125 |                 if cookie.domain.startswith("."): initial_dot = "TRUE" | 
 | 126 |                 else: initial_dot = "FALSE" | 
 | 127 |                 if cookie.expires is not None: | 
 | 128 |                     expires = str(cookie.expires) | 
 | 129 |                 else: | 
 | 130 |                     expires = "" | 
 | 131 |                 if cookie.value is None: | 
 | 132 |                     # cookies.txt regards 'Set-Cookie: foo' as a cookie | 
 | 133 |                     # with no name, whereas cookielib regards it as a | 
 | 134 |                     # cookie with no value. | 
 | 135 |                     name = "" | 
 | 136 |                     value = cookie.name | 
 | 137 |                 else: | 
 | 138 |                     name = cookie.name | 
 | 139 |                     value = cookie.value | 
 | 140 |                 f.write( | 
 | 141 |                     "\t".join([cookie.domain, initial_dot, cookie.path, | 
 | 142 |                                secure, expires, name, value])+ | 
 | 143 |                     "\n") | 
 | 144 |         finally: | 
 | 145 |             f.close() |