Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 1 | """Mozilla / Netscape cookie loading / saving.""" |
| 2 | |
| 3 | import re, time, logging |
| 4 | |
Tim Peters | 3b2cdad | 2004-09-01 13:10:32 +0000 | [diff] [blame] | 5 | from cookielib import (reraise_unmasked_exceptions, FileCookieJar, Cookie, |
Andrew M. Kuchling | 33ad28b | 2004-08-31 11:38:12 +0000 | [diff] [blame] | 6 | MISSING_FILENAME_TEXT) |
Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 7 | |
| 8 | class MozillaCookieJar(FileCookieJar): |
| 9 | """ |
| 10 | |
| 11 | WARNING: you may want to backup your browser's cookies file if you use |
| 12 | this class to save cookies. I *think* it works, but there have been |
| 13 | bugs in the past! |
| 14 | |
| 15 | This class differs from CookieJar only in the format it uses to save and |
| 16 | load cookies to and from a file. This class uses the Mozilla/Netscape |
| 17 | `cookies.txt' format. lynx uses this file format, too. |
| 18 | |
| 19 | Don't expect cookies saved while the browser is running to be noticed by |
| 20 | the browser (in fact, Mozilla on unix will overwrite your saved cookies if |
| 21 | you change them on disk while it's running; on Windows, you probably can't |
| 22 | save at all while the browser is running). |
| 23 | |
| 24 | Note that the Mozilla/Netscape format will downgrade RFC2965 cookies to |
| 25 | Netscape cookies on saving. |
| 26 | |
| 27 | In particular, the cookie version and port number information is lost, |
| 28 | together with information about whether or not Path, Port and Discard were |
| 29 | specified by the Set-Cookie2 (or Set-Cookie) header, and whether or not the |
| 30 | domain as set in the HTTP header started with a dot (yes, I'm aware some |
| 31 | domains in Netscape files start with a dot and some don't -- trust me, you |
| 32 | really don't want to know any more about this). |
| 33 | |
| 34 | Note that though Mozilla and Netscape use the same format, they use |
| 35 | slightly different headers. The class saves cookies using the Netscape |
| 36 | header by default (Mozilla can cope with that). |
| 37 | |
| 38 | """ |
| 39 | magic_re = "#( Netscape)? HTTP Cookie File" |
| 40 | header = """\ |
| 41 | # Netscape HTTP Cookie File |
| 42 | # http://www.netscape.com/newsref/std/cookie_spec.html |
| 43 | # This is a generated file! Do not edit. |
| 44 | |
| 45 | """ |
| 46 | |
| 47 | def _really_load(self, f, filename, ignore_discard, ignore_expires): |
| 48 | now = time.time() |
| 49 | |
| 50 | magic = f.readline() |
| 51 | if not re.search(self.magic_re, magic): |
| 52 | f.close() |
| 53 | raise IOError( |
| 54 | "%s does not look like a Netscape format cookies file" % |
| 55 | filename) |
| 56 | |
| 57 | try: |
| 58 | while 1: |
| 59 | line = f.readline() |
| 60 | if line == "": break |
| 61 | |
| 62 | # last field may be absent, so keep any trailing tab |
| 63 | if line.endswith("\n"): line = line[:-1] |
| 64 | |
| 65 | # skip comments and blank lines XXX what is $ for? |
| 66 | if (line.strip().startswith("#") or |
| 67 | line.strip().startswith("$") or |
| 68 | line.strip() == ""): |
| 69 | continue |
| 70 | |
| 71 | domain, domain_specified, path, secure, expires, name, value = \ |
| 72 | line.split("\t") |
| 73 | secure = (secure == "TRUE") |
| 74 | domain_specified = (domain_specified == "TRUE") |
| 75 | if name == "": |
Martin v. Löwis | c5574e8 | 2005-03-03 10:57:37 +0000 | [diff] [blame^] | 76 | # cookies.txt regards 'Set-Cookie: foo' as a cookie |
| 77 | # with no name, whereas cookielib regards it as a |
| 78 | # cookie with no value. |
Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 79 | name = value |
| 80 | value = None |
| 81 | |
| 82 | initial_dot = domain.startswith(".") |
| 83 | assert domain_specified == initial_dot |
| 84 | |
| 85 | discard = False |
| 86 | if expires == "": |
| 87 | expires = None |
| 88 | discard = True |
| 89 | |
| 90 | # assume path_specified is false |
| 91 | c = Cookie(0, name, value, |
| 92 | None, False, |
| 93 | domain, domain_specified, initial_dot, |
| 94 | path, False, |
| 95 | secure, |
| 96 | expires, |
| 97 | discard, |
| 98 | None, |
| 99 | None, |
| 100 | {}) |
| 101 | if not ignore_discard and c.discard: |
| 102 | continue |
| 103 | if not ignore_expires and c.is_expired(now): |
| 104 | continue |
| 105 | self.set_cookie(c) |
| 106 | |
| 107 | except: |
| 108 | reraise_unmasked_exceptions((IOError,)) |
| 109 | raise IOError("invalid Netscape format file %s: %s" % |
| 110 | (filename, line)) |
| 111 | |
| 112 | def save(self, filename=None, ignore_discard=False, ignore_expires=False): |
| 113 | if filename is None: |
| 114 | if self.filename is not None: filename = self.filename |
| 115 | else: raise ValueError(MISSING_FILENAME_TEXT) |
| 116 | |
| 117 | f = open(filename, "w") |
| 118 | try: |
| 119 | f.write(self.header) |
| 120 | now = time.time() |
| 121 | for cookie in self: |
| 122 | if not ignore_discard and cookie.discard: |
| 123 | continue |
| 124 | if not ignore_expires and cookie.is_expired(now): |
| 125 | continue |
| 126 | if cookie.secure: secure = "TRUE" |
| 127 | else: secure = "FALSE" |
| 128 | if cookie.domain.startswith("."): initial_dot = "TRUE" |
| 129 | else: initial_dot = "FALSE" |
| 130 | if cookie.expires is not None: |
| 131 | expires = str(cookie.expires) |
| 132 | else: |
| 133 | expires = "" |
| 134 | if cookie.value is None: |
| 135 | # cookies.txt regards 'Set-Cookie: foo' as a cookie |
| 136 | # with no name, whereas cookielib regards it as a |
| 137 | # cookie with no value. |
| 138 | name = "" |
| 139 | value = cookie.name |
| 140 | else: |
| 141 | name = cookie.name |
| 142 | value = cookie.value |
| 143 | f.write( |
| 144 | "\t".join([cookie.domain, initial_dot, cookie.path, |
| 145 | secure, expires, name, value])+ |
| 146 | "\n") |
| 147 | finally: |
| 148 | f.close() |