blob: 74b2aec79e86843fbdf98b62081ccc36ec35a329 [file] [log] [blame]
Guido van Rossume7b146f2000-02-04 15:28:42 +00001"""Open an arbitrary URL.
2
3See the following document for more info on URLs:
4"Names and Addresses, URIs, URLs, URNs, URCs", at
5http://www.w3.org/pub/WWW/Addressing/Overview.html
6
7See also the HTTP spec (from which the error codes are derived):
8"HTTP - Hypertext Transfer Protocol", at
9http://www.w3.org/pub/WWW/Protocols/
10
11Related standards and specs:
12- RFC1808: the "relative URL" spec. (authoritative status)
13- RFC1738 - the "URL standard". (authoritative status)
14- RFC1630 - the "URI spec". (informational status)
15
16The object returned by URLopener().open(file) will differ per
17protocol. All you know is that is has methods read(), readline(),
18readlines(), fileno(), close() and info(). The read*(), fileno()
Fredrik Lundhb49f88b2000-09-24 18:51:25 +000019and close() methods work like those of open files.
Guido van Rossume7b146f2000-02-04 15:28:42 +000020The info() method returns a mimetools.Message object which can be
21used to query various info about the object, if available.
22(mimetools.Message objects are queried with the getheader() method.)
23"""
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000024
Guido van Rossum7c395db1994-07-04 22:14:49 +000025import string
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000026import socket
Jack Jansendc3e3f61995-12-15 13:22:13 +000027import os
Guido van Rossumf0713d32001-08-09 17:43:35 +000028import time
Guido van Rossum3c8484e1996-11-20 22:02:24 +000029import sys
Brett Cannon69200fa2004-03-23 21:26:39 +000030from urlparse import urljoin as basejoin
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000031
Skip Montanaro40fc1602001-03-01 04:27:19 +000032__all__ = ["urlopen", "URLopener", "FancyURLopener", "urlretrieve",
33 "urlcleanup", "quote", "quote_plus", "unquote", "unquote_plus",
Skip Montanaro44d5e0c2001-03-13 19:47:16 +000034 "urlencode", "url2pathname", "pathname2url", "splittag",
35 "localhost", "thishost", "ftperrors", "basejoin", "unwrap",
36 "splittype", "splithost", "splituser", "splitpasswd", "splitport",
37 "splitnport", "splitquery", "splitattr", "splitvalue",
38 "splitgophertype", "getproxies"]
Skip Montanaro40fc1602001-03-01 04:27:19 +000039
Brett Cannon69200fa2004-03-23 21:26:39 +000040__version__ = '1.16' # XXX This version is not always updated :-(
Guido van Rossumf668d171997-06-06 21:11:11 +000041
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000042MAXFTPCACHE = 10 # Trim the ftp cache beyond this size
Guido van Rossum6cb15a01995-06-22 19:00:13 +000043
Jack Jansendc3e3f61995-12-15 13:22:13 +000044# Helper for non-unix systems
45if os.name == 'mac':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000046 from macurl2path import url2pathname, pathname2url
Guido van Rossum7e7ca0b1998-03-26 21:01:39 +000047elif os.name == 'nt':
Fredrik Lundhb49f88b2000-09-24 18:51:25 +000048 from nturl2path import url2pathname, pathname2url
Guido van Rossumd74fb6b2001-03-02 06:43:49 +000049elif os.name == 'riscos':
50 from rourl2path import url2pathname, pathname2url
Jack Jansendc3e3f61995-12-15 13:22:13 +000051else:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000052 def url2pathname(pathname):
Guido van Rossum367ac801999-03-12 14:31:10 +000053 return unquote(pathname)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000054 def pathname2url(pathname):
Guido van Rossum367ac801999-03-12 14:31:10 +000055 return quote(pathname)
Guido van Rossum33add0a1998-12-18 15:25:22 +000056
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000057# This really consists of two pieces:
58# (1) a class which handles opening of all sorts of URLs
59# (plus assorted utilities etc.)
60# (2) a set of functions for parsing URLs
61# XXX Should these be separated out into different modules?
62
63
64# Shortcut for basic usage
65_urlopener = None
Fred Drakedf6eca72002-04-04 20:41:34 +000066def urlopen(url, data=None, proxies=None):
Skip Montanaro79f1c172000-08-22 03:00:52 +000067 """urlopen(url [, data]) -> open file-like object"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000068 global _urlopener
Fred Drakedf6eca72002-04-04 20:41:34 +000069 if proxies is not None:
70 opener = FancyURLopener(proxies=proxies)
71 elif not _urlopener:
72 opener = FancyURLopener()
73 _urlopener = opener
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000074 else:
Fred Drakedf6eca72002-04-04 20:41:34 +000075 opener = _urlopener
76 if data is None:
77 return opener.open(url)
78 else:
79 return opener.open(url, data)
Fred Drake316a7932000-08-24 01:01:26 +000080def urlretrieve(url, filename=None, reporthook=None, data=None):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000081 global _urlopener
82 if not _urlopener:
83 _urlopener = FancyURLopener()
Fred Drake316a7932000-08-24 01:01:26 +000084 return _urlopener.retrieve(url, filename, reporthook, data)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000085def urlcleanup():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000086 if _urlopener:
87 _urlopener.cleanup()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000088
89
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000090ftpcache = {}
91class URLopener:
Guido van Rossume7b146f2000-02-04 15:28:42 +000092 """Class to open URLs.
93 This is a class rather than just a subroutine because we may need
94 more than one set of global protocol-specific options.
95 Note -- this is a base class for those who don't want the
96 automatic handling of errors type 302 (relocated) and 401
97 (authorization needed)."""
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000098
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000099 __tempfiles = None
Guido van Rossum29e77811996-11-27 19:39:58 +0000100
Guido van Rossumba311382000-08-24 16:18:04 +0000101 version = "Python-urllib/%s" % __version__
102
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000103 # Constructor
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000104 def __init__(self, proxies=None, **x509):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000105 if proxies is None:
106 proxies = getproxies()
107 assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
108 self.proxies = proxies
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000109 self.key_file = x509.get('key_file')
110 self.cert_file = x509.get('cert_file')
Guido van Rossumba311382000-08-24 16:18:04 +0000111 self.addheaders = [('User-agent', self.version)]
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000112 self.__tempfiles = []
113 self.__unlink = os.unlink # See cleanup()
114 self.tempcache = None
115 # Undocumented feature: if you assign {} to tempcache,
116 # it is used to cache files retrieved with
117 # self.retrieve(). This is not enabled by default
118 # since it does not work for changing documents (and I
119 # haven't got the logic to check expiration headers
120 # yet).
121 self.ftpcache = ftpcache
122 # Undocumented feature: you can use a different
123 # ftp cache by assigning to the .ftpcache member;
124 # in case you want logically independent URL openers
125 # XXX This is not threadsafe. Bah.
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000126
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000127 def __del__(self):
128 self.close()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000129
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000130 def close(self):
131 self.cleanup()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000132
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000133 def cleanup(self):
134 # This code sometimes runs when the rest of this module
135 # has already been deleted, so it can't use any globals
136 # or import anything.
137 if self.__tempfiles:
138 for file in self.__tempfiles:
139 try:
140 self.__unlink(file)
Martin v. Löwis58682b72001-08-11 15:02:57 +0000141 except OSError:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000142 pass
143 del self.__tempfiles[:]
144 if self.tempcache:
145 self.tempcache.clear()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000146
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000147 def addheader(self, *args):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000148 """Add a header to be used by the HTTP interface only
149 e.g. u.addheader('Accept', 'sound/basic')"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000150 self.addheaders.append(args)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000151
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000152 # External interface
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000153 def open(self, fullurl, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000154 """Use URLopener().open(file) instead of open(file, 'r')."""
Martin v. Löwis1d994332000-12-03 18:30:10 +0000155 fullurl = unwrap(toBytes(fullurl))
Raymond Hettinger54f02222002-06-01 14:18:47 +0000156 if self.tempcache and fullurl in self.tempcache:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000157 filename, headers = self.tempcache[fullurl]
158 fp = open(filename, 'rb')
159 return addinfourl(fp, headers, fullurl)
Martin v. Löwis1d994332000-12-03 18:30:10 +0000160 urltype, url = splittype(fullurl)
161 if not urltype:
162 urltype = 'file'
Raymond Hettinger54f02222002-06-01 14:18:47 +0000163 if urltype in self.proxies:
Martin v. Löwis1d994332000-12-03 18:30:10 +0000164 proxy = self.proxies[urltype]
165 urltype, proxyhost = splittype(proxy)
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000166 host, selector = splithost(proxyhost)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000167 url = (host, fullurl) # Signal special case to open_*()
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000168 else:
169 proxy = None
Martin v. Löwis1d994332000-12-03 18:30:10 +0000170 name = 'open_' + urltype
171 self.type = urltype
Brett Cannonaaeffaf2004-03-23 23:50:17 +0000172 name = name.replace('-', '_')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000173 if not hasattr(self, name):
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000174 if proxy:
175 return self.open_unknown_proxy(proxy, fullurl, data)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000176 else:
177 return self.open_unknown(fullurl, data)
178 try:
179 if data is None:
180 return getattr(self, name)(url)
181 else:
182 return getattr(self, name)(url, data)
183 except socket.error, msg:
184 raise IOError, ('socket error', msg), sys.exc_info()[2]
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000185
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000186 def open_unknown(self, fullurl, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000187 """Overridable interface to open unknown URL type."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000188 type, url = splittype(fullurl)
189 raise IOError, ('url error', 'unknown url type', type)
Guido van Rossumca445401995-08-29 19:19:12 +0000190
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000191 def open_unknown_proxy(self, proxy, fullurl, data=None):
192 """Overridable interface to open unknown URL type."""
193 type, url = splittype(fullurl)
194 raise IOError, ('url error', 'invalid proxy for %s' % type, proxy)
195
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000196 # External interface
Sjoerd Mullenderd7b86f02000-08-25 11:23:36 +0000197 def retrieve(self, url, filename=None, reporthook=None, data=None):
Brett Cannon7d618c72003-04-24 02:43:20 +0000198 """retrieve(url) returns (filename, headers) for a local object
Guido van Rossume7b146f2000-02-04 15:28:42 +0000199 or (tempfilename, headers) for a remote object."""
Martin v. Löwis1d994332000-12-03 18:30:10 +0000200 url = unwrap(toBytes(url))
Raymond Hettinger54f02222002-06-01 14:18:47 +0000201 if self.tempcache and url in self.tempcache:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000202 return self.tempcache[url]
203 type, url1 = splittype(url)
Raymond Hettinger10ff7062002-06-02 03:04:52 +0000204 if filename is None and (not type or type == 'file'):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000205 try:
206 fp = self.open_local_file(url1)
207 hdrs = fp.info()
208 del fp
209 return url2pathname(splithost(url1)[1]), hdrs
210 except IOError, msg:
211 pass
Fred Drake316a7932000-08-24 01:01:26 +0000212 fp = self.open(url, data)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000213 headers = fp.info()
Guido van Rossum3b0a3292002-08-09 16:38:32 +0000214 if filename:
215 tfp = open(filename, 'wb')
216 else:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000217 import tempfile
218 garbage, path = splittype(url)
219 garbage, path = splithost(path or "")
220 path, garbage = splitquery(path or "")
221 path, garbage = splitattr(path or "")
222 suffix = os.path.splitext(path)[1]
Guido van Rossum3b0a3292002-08-09 16:38:32 +0000223 (fd, filename) = tempfile.mkstemp(suffix)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000224 self.__tempfiles.append(filename)
Jeremy Hylton3bd6fde2002-10-11 14:36:24 +0000225 tfp = os.fdopen(fd, 'wb')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000226 result = filename, headers
227 if self.tempcache is not None:
228 self.tempcache[url] = result
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000229 bs = 1024*8
230 size = -1
231 blocknum = 1
232 if reporthook:
Raymond Hettinger54f02222002-06-01 14:18:47 +0000233 if "content-length" in headers:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000234 size = int(headers["Content-Length"])
235 reporthook(0, bs, size)
236 block = fp.read(bs)
237 if reporthook:
238 reporthook(1, bs, size)
239 while block:
240 tfp.write(block)
241 block = fp.read(bs)
242 blocknum = blocknum + 1
243 if reporthook:
244 reporthook(blocknum, bs, size)
245 fp.close()
246 tfp.close()
247 del fp
248 del tfp
249 return result
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000250
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000251 # Each method named open_<type> knows how to open that type of URL
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000252
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000253 def open_http(self, url, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000254 """Use HTTP protocol."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000255 import httplib
256 user_passwd = None
Walter Dörwald65230a22002-06-03 15:58:32 +0000257 if isinstance(url, str):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000258 host, selector = splithost(url)
259 if host:
260 user_passwd, host = splituser(host)
261 host = unquote(host)
262 realhost = host
263 else:
264 host, selector = url
265 urltype, rest = splittype(selector)
266 url = rest
267 user_passwd = None
Guido van Rossumb2493f82000-12-15 15:01:37 +0000268 if urltype.lower() != 'http':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000269 realhost = None
270 else:
271 realhost, rest = splithost(rest)
272 if realhost:
273 user_passwd, realhost = splituser(realhost)
274 if user_passwd:
275 selector = "%s://%s%s" % (urltype, realhost, rest)
Tim Peters55c12d42001-08-09 18:04:14 +0000276 if proxy_bypass(realhost):
277 host = realhost
278
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000279 #print "proxy via http:", host, selector
280 if not host: raise IOError, ('http error', 'no host given')
281 if user_passwd:
282 import base64
Guido van Rossumb2493f82000-12-15 15:01:37 +0000283 auth = base64.encodestring(user_passwd).strip()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000284 else:
285 auth = None
286 h = httplib.HTTP(host)
287 if data is not None:
288 h.putrequest('POST', selector)
289 h.putheader('Content-type', 'application/x-www-form-urlencoded')
290 h.putheader('Content-length', '%d' % len(data))
291 else:
292 h.putrequest('GET', selector)
293 if auth: h.putheader('Authorization', 'Basic %s' % auth)
294 if realhost: h.putheader('Host', realhost)
Guido van Rossum68468eb2003-02-27 20:14:51 +0000295 for args in self.addheaders: h.putheader(*args)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000296 h.endheaders()
297 if data is not None:
Fred Drakeec3dfde2001-07-04 05:18:29 +0000298 h.send(data)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000299 errcode, errmsg, headers = h.getreply()
300 fp = h.getfile()
301 if errcode == 200:
302 return addinfourl(fp, headers, "http:" + url)
303 else:
304 if data is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000305 return self.http_error(url, fp, errcode, errmsg, headers)
Guido van Rossum29aab751999-03-09 19:31:21 +0000306 else:
307 return self.http_error(url, fp, errcode, errmsg, headers, data)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000308
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000309 def http_error(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000310 """Handle http errors.
311 Derived class can override this, or provide specific handlers
312 named http_error_DDD where DDD is the 3-digit error code."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000313 # First check if there's a specific handler for this error
314 name = 'http_error_%d' % errcode
315 if hasattr(self, name):
316 method = getattr(self, name)
317 if data is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000318 result = method(url, fp, errcode, errmsg, headers)
Jeremy Hyltonb30f52a1999-02-25 16:14:58 +0000319 else:
320 result = method(url, fp, errcode, errmsg, headers, data)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000321 if result: return result
Jeremy Hyltonb30f52a1999-02-25 16:14:58 +0000322 return self.http_error_default(url, fp, errcode, errmsg, headers)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000323
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000324 def http_error_default(self, url, fp, errcode, errmsg, headers):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000325 """Default error handler: close the connection and raise IOError."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000326 void = fp.read()
327 fp.close()
328 raise IOError, ('http error', errcode, errmsg, headers)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000329
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000330 if hasattr(socket, "ssl"):
Andrew M. Kuchling141e9892000-04-23 02:53:11 +0000331 def open_https(self, url, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000332 """Use HTTPS protocol."""
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000333 import httplib
Fred Drake567ca8e2000-08-21 21:42:42 +0000334 user_passwd = None
Walter Dörwald65230a22002-06-03 15:58:32 +0000335 if isinstance(url, str):
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000336 host, selector = splithost(url)
Fred Drake567ca8e2000-08-21 21:42:42 +0000337 if host:
338 user_passwd, host = splituser(host)
339 host = unquote(host)
340 realhost = host
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000341 else:
342 host, selector = url
343 urltype, rest = splittype(selector)
Fred Drake567ca8e2000-08-21 21:42:42 +0000344 url = rest
345 user_passwd = None
Guido van Rossumb2493f82000-12-15 15:01:37 +0000346 if urltype.lower() != 'https':
Fred Drake567ca8e2000-08-21 21:42:42 +0000347 realhost = None
348 else:
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000349 realhost, rest = splithost(rest)
Fred Drake567ca8e2000-08-21 21:42:42 +0000350 if realhost:
351 user_passwd, realhost = splituser(realhost)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000352 if user_passwd:
353 selector = "%s://%s%s" % (urltype, realhost, rest)
Andrew M. Kuchling7ad47922000-06-10 01:41:48 +0000354 #print "proxy via https:", host, selector
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000355 if not host: raise IOError, ('https error', 'no host given')
356 if user_passwd:
357 import base64
Guido van Rossumb2493f82000-12-15 15:01:37 +0000358 auth = base64.encodestring(user_passwd).strip()
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000359 else:
360 auth = None
361 h = httplib.HTTPS(host, 0,
362 key_file=self.key_file,
363 cert_file=self.cert_file)
Andrew M. Kuchling141e9892000-04-23 02:53:11 +0000364 if data is not None:
365 h.putrequest('POST', selector)
366 h.putheader('Content-type',
367 'application/x-www-form-urlencoded')
368 h.putheader('Content-length', '%d' % len(data))
369 else:
370 h.putrequest('GET', selector)
Andrew M. Kuchlingff638ea2003-08-29 18:12:23 +0000371 if auth: h.putheader('Authorization', 'Basic %s' % auth)
Fred Drake567ca8e2000-08-21 21:42:42 +0000372 if realhost: h.putheader('Host', realhost)
Guido van Rossum68468eb2003-02-27 20:14:51 +0000373 for args in self.addheaders: h.putheader(*args)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000374 h.endheaders()
Andrew M. Kuchling43c5af02000-04-24 14:17:06 +0000375 if data is not None:
Fred Drakeec3dfde2001-07-04 05:18:29 +0000376 h.send(data)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000377 errcode, errmsg, headers = h.getreply()
378 fp = h.getfile()
379 if errcode == 200:
Guido van Rossumb931bf32001-12-08 17:09:07 +0000380 return addinfourl(fp, headers, "https:" + url)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000381 else:
Fred Drake567ca8e2000-08-21 21:42:42 +0000382 if data is None:
383 return self.http_error(url, fp, errcode, errmsg, headers)
384 else:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000385 return self.http_error(url, fp, errcode, errmsg, headers,
386 data)
Fred Drake567ca8e2000-08-21 21:42:42 +0000387
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000388 def open_gopher(self, url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000389 """Use Gopher protocol."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000390 import gopherlib
391 host, selector = splithost(url)
392 if not host: raise IOError, ('gopher error', 'no host given')
393 host = unquote(host)
394 type, selector = splitgophertype(selector)
395 selector, query = splitquery(selector)
396 selector = unquote(selector)
397 if query:
398 query = unquote(query)
399 fp = gopherlib.send_query(selector, query, host)
400 else:
401 fp = gopherlib.send_selector(selector, host)
402 return addinfourl(fp, noheaders(), "gopher:" + url)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000403
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000404 def open_file(self, url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000405 """Use local file or FTP depending on form of URL."""
Jack Jansen4ef11032002-09-12 20:14:04 +0000406 if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000407 return self.open_ftp(url)
408 else:
409 return self.open_local_file(url)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000410
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000411 def open_local_file(self, url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000412 """Use local file."""
Raymond Hettingera6172712004-12-31 19:15:26 +0000413 import mimetypes, mimetools, email.Utils
414 try:
415 from cStringIO import StringIO
416 except ImportError:
417 from StringIO import StringIO
Guido van Rossumf0713d32001-08-09 17:43:35 +0000418 host, file = splithost(url)
419 localname = url2pathname(file)
Guido van Rossuma2da3052002-04-15 00:25:01 +0000420 try:
421 stats = os.stat(localname)
422 except OSError, e:
423 raise IOError(e.errno, e.strerror, e.filename)
Walter Dörwald92b48b72002-03-22 17:30:38 +0000424 size = stats.st_size
Anthony Baxter3dd9e462004-10-11 13:53:08 +0000425 modified = email.Utils.formatdate(stats.st_mtime, usegmt=True)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000426 mtype = mimetypes.guess_type(url)[0]
Raymond Hettingera6172712004-12-31 19:15:26 +0000427 headers = mimetools.Message(StringIO(
Guido van Rossumf0713d32001-08-09 17:43:35 +0000428 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
429 (mtype or 'text/plain', size, modified)))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000430 if not host:
Guido van Rossum336a2011999-06-24 15:27:36 +0000431 urlfile = file
432 if file[:1] == '/':
433 urlfile = 'file://' + file
Guido van Rossumf0713d32001-08-09 17:43:35 +0000434 return addinfourl(open(localname, 'rb'),
Guido van Rossum336a2011999-06-24 15:27:36 +0000435 headers, urlfile)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000436 host, port = splitport(host)
437 if not port \
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000438 and socket.gethostbyname(host) in (localhost(), thishost()):
Guido van Rossum336a2011999-06-24 15:27:36 +0000439 urlfile = file
440 if file[:1] == '/':
441 urlfile = 'file://' + file
Guido van Rossumf0713d32001-08-09 17:43:35 +0000442 return addinfourl(open(localname, 'rb'),
Guido van Rossum336a2011999-06-24 15:27:36 +0000443 headers, urlfile)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000444 raise IOError, ('local file error', 'not on local host')
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000445
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000446 def open_ftp(self, url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000447 """Use FTP protocol."""
Raymond Hettingera6172712004-12-31 19:15:26 +0000448 import mimetypes, mimetools
449 try:
450 from cStringIO import StringIO
451 except ImportError:
452 from StringIO import StringIO
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000453 host, path = splithost(url)
454 if not host: raise IOError, ('ftp error', 'no host given')
455 host, port = splitport(host)
456 user, host = splituser(host)
457 if user: user, passwd = splitpasswd(user)
458 else: passwd = None
459 host = unquote(host)
460 user = unquote(user or '')
461 passwd = unquote(passwd or '')
462 host = socket.gethostbyname(host)
463 if not port:
464 import ftplib
465 port = ftplib.FTP_PORT
466 else:
467 port = int(port)
468 path, attrs = splitattr(path)
469 path = unquote(path)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000470 dirs = path.split('/')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000471 dirs, file = dirs[:-1], dirs[-1]
472 if dirs and not dirs[0]: dirs = dirs[1:]
Guido van Rossum5e006a31999-08-18 17:40:33 +0000473 if dirs and not dirs[0]: dirs[0] = '/'
Guido van Rossumb2493f82000-12-15 15:01:37 +0000474 key = user, host, port, '/'.join(dirs)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000475 # XXX thread unsafe!
476 if len(self.ftpcache) > MAXFTPCACHE:
477 # Prune the cache, rather arbitrarily
478 for k in self.ftpcache.keys():
479 if k != key:
480 v = self.ftpcache[k]
481 del self.ftpcache[k]
482 v.close()
483 try:
Raymond Hettinger54f02222002-06-01 14:18:47 +0000484 if not key in self.ftpcache:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000485 self.ftpcache[key] = \
486 ftpwrapper(user, passwd, host, port, dirs)
487 if not file: type = 'D'
488 else: type = 'I'
489 for attr in attrs:
490 attr, value = splitvalue(attr)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000491 if attr.lower() == 'type' and \
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000492 value in ('a', 'A', 'i', 'I', 'd', 'D'):
Guido van Rossumb2493f82000-12-15 15:01:37 +0000493 type = value.upper()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000494 (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
Guido van Rossum88e0b5b2001-08-23 13:38:15 +0000495 mtype = mimetypes.guess_type("ftp:" + url)[0]
496 headers = ""
497 if mtype:
498 headers += "Content-Type: %s\n" % mtype
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000499 if retrlen is not None and retrlen >= 0:
Guido van Rossum88e0b5b2001-08-23 13:38:15 +0000500 headers += "Content-Length: %d\n" % retrlen
Raymond Hettingera6172712004-12-31 19:15:26 +0000501 headers = mimetools.Message(StringIO(headers))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000502 return addinfourl(fp, headers, "ftp:" + url)
503 except ftperrors(), msg:
504 raise IOError, ('ftp error', msg), sys.exc_info()[2]
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000505
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000506 def open_data(self, url, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000507 """Use "data" URL."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000508 # ignore POSTed data
509 #
510 # syntax of data URLs:
511 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
512 # mediatype := [ type "/" subtype ] *( ";" parameter )
513 # data := *urlchar
514 # parameter := attribute "=" value
Raymond Hettingera6172712004-12-31 19:15:26 +0000515 import mimetools
516 try:
517 from cStringIO import StringIO
518 except ImportError:
519 from StringIO import StringIO
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000520 try:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000521 [type, data] = url.split(',', 1)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000522 except ValueError:
523 raise IOError, ('data error', 'bad data URL')
524 if not type:
525 type = 'text/plain;charset=US-ASCII'
Guido van Rossumb2493f82000-12-15 15:01:37 +0000526 semi = type.rfind(';')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000527 if semi >= 0 and '=' not in type[semi:]:
528 encoding = type[semi+1:]
529 type = type[:semi]
530 else:
531 encoding = ''
532 msg = []
533 msg.append('Date: %s'%time.strftime('%a, %d %b %Y %T GMT',
534 time.gmtime(time.time())))
535 msg.append('Content-type: %s' % type)
536 if encoding == 'base64':
537 import base64
538 data = base64.decodestring(data)
539 else:
540 data = unquote(data)
541 msg.append('Content-length: %d' % len(data))
542 msg.append('')
543 msg.append(data)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000544 msg = '\n'.join(msg)
Raymond Hettingera6172712004-12-31 19:15:26 +0000545 f = StringIO(msg)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000546 headers = mimetools.Message(f, 0)
547 f.fileno = None # needed for addinfourl
548 return addinfourl(f, headers, url)
Guido van Rossum6d4d1c21998-03-12 14:32:55 +0000549
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000550
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000551class FancyURLopener(URLopener):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000552 """Derived class with handlers for errors we can handle (perhaps)."""
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000553
Neal Norwitz60e04cd2002-06-11 13:38:51 +0000554 def __init__(self, *args, **kwargs):
Guido van Rossum68468eb2003-02-27 20:14:51 +0000555 URLopener.__init__(self, *args, **kwargs)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000556 self.auth_cache = {}
Skip Montanaroc3e11d62001-02-15 16:56:36 +0000557 self.tries = 0
558 self.maxtries = 10
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000559
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000560 def http_error_default(self, url, fp, errcode, errmsg, headers):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000561 """Default error handling -- don't raise an exception."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000562 return addinfourl(fp, headers, "http:" + url)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000563
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000564 def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000565 """Error 302 -- relocated (temporarily)."""
Skip Montanaroc3e11d62001-02-15 16:56:36 +0000566 self.tries += 1
567 if self.maxtries and self.tries >= self.maxtries:
568 if hasattr(self, "http_error_500"):
569 meth = self.http_error_500
570 else:
571 meth = self.http_error_default
572 self.tries = 0
573 return meth(url, fp, 500,
574 "Internal Server Error: Redirect Recursion", headers)
575 result = self.redirect_internal(url, fp, errcode, errmsg, headers,
576 data)
577 self.tries = 0
578 return result
579
580 def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
Raymond Hettinger54f02222002-06-01 14:18:47 +0000581 if 'location' in headers:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000582 newurl = headers['location']
Raymond Hettinger54f02222002-06-01 14:18:47 +0000583 elif 'uri' in headers:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000584 newurl = headers['uri']
585 else:
586 return
587 void = fp.read()
588 fp.close()
Guido van Rossum3527f591999-03-29 20:23:41 +0000589 # In case the server sent a relative URL, join with original:
Moshe Zadka5d87d472001-04-09 14:54:21 +0000590 newurl = basejoin(self.type + ":" + url, newurl)
Guido van Rossumfa19f7c2003-05-16 01:46:51 +0000591 return self.open(newurl)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000592
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000593 def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000594 """Error 301 -- also relocated (permanently)."""
595 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
Guido van Rossume6ad8911996-09-10 17:02:56 +0000596
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000597 def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
598 """Error 303 -- also relocated (essentially identical to 302)."""
599 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
600
Guido van Rossumfa19f7c2003-05-16 01:46:51 +0000601 def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
602 """Error 307 -- relocated, but turn POST into error."""
603 if data is None:
604 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
605 else:
606 return self.http_error_default(url, fp, errcode, errmsg, headers)
607
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000608 def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000609 """Error 401 -- authentication required.
610 See this URL for a description of the basic authentication scheme:
611 http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt"""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000612 if not 'www-authenticate' in headers:
Tim Peters85ba6732001-02-28 08:26:44 +0000613 URLopener.http_error_default(self, url, fp,
Fred Drakec680ae82001-10-13 18:37:07 +0000614 errcode, errmsg, headers)
Moshe Zadkae99bd172001-02-27 06:27:04 +0000615 stuff = headers['www-authenticate']
616 import re
617 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
618 if not match:
Tim Peters85ba6732001-02-28 08:26:44 +0000619 URLopener.http_error_default(self, url, fp,
Moshe Zadkae99bd172001-02-27 06:27:04 +0000620 errcode, errmsg, headers)
621 scheme, realm = match.groups()
622 if scheme.lower() != 'basic':
Tim Peters85ba6732001-02-28 08:26:44 +0000623 URLopener.http_error_default(self, url, fp,
Moshe Zadkae99bd172001-02-27 06:27:04 +0000624 errcode, errmsg, headers)
625 name = 'retry_' + self.type + '_basic_auth'
626 if data is None:
627 return getattr(self,name)(url, realm)
628 else:
629 return getattr(self,name)(url, realm, data)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000630
Guido van Rossum3c8baed2000-02-01 23:36:55 +0000631 def retry_http_basic_auth(self, url, realm, data=None):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000632 host, selector = splithost(url)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000633 i = host.find('@') + 1
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000634 host = host[i:]
635 user, passwd = self.get_user_passwd(host, realm, i)
636 if not (user or passwd): return None
Guido van Rossumafc4f042001-01-15 18:31:13 +0000637 host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000638 newurl = 'http://' + host + selector
Guido van Rossum3c8baed2000-02-01 23:36:55 +0000639 if data is None:
640 return self.open(newurl)
641 else:
642 return self.open(newurl, data)
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000643
Guido van Rossum3c8baed2000-02-01 23:36:55 +0000644 def retry_https_basic_auth(self, url, realm, data=None):
Tim Peterse1190062001-01-15 03:34:38 +0000645 host, selector = splithost(url)
646 i = host.find('@') + 1
647 host = host[i:]
648 user, passwd = self.get_user_passwd(host, realm, i)
649 if not (user or passwd): return None
Guido van Rossumafc4f042001-01-15 18:31:13 +0000650 host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
Tim Peterse1190062001-01-15 03:34:38 +0000651 newurl = '//' + host + selector
Guido van Rossumafc4f042001-01-15 18:31:13 +0000652 return self.open_https(newurl, data)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000653
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000654 def get_user_passwd(self, host, realm, clear_cache = 0):
Guido van Rossumb2493f82000-12-15 15:01:37 +0000655 key = realm + '@' + host.lower()
Raymond Hettinger54f02222002-06-01 14:18:47 +0000656 if key in self.auth_cache:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000657 if clear_cache:
658 del self.auth_cache[key]
659 else:
660 return self.auth_cache[key]
661 user, passwd = self.prompt_user_passwd(host, realm)
662 if user or passwd: self.auth_cache[key] = (user, passwd)
663 return user, passwd
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000664
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000665 def prompt_user_passwd(self, host, realm):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000666 """Override this in a GUI environment!"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000667 import getpass
668 try:
669 user = raw_input("Enter username for %s at %s: " % (realm,
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000670 host))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000671 passwd = getpass.getpass("Enter password for %s in %s at %s: " %
672 (user, realm, host))
673 return user, passwd
674 except KeyboardInterrupt:
675 print
676 return None, None
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000677
678
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000679# Utility functions
680
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000681_localhost = None
682def localhost():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000683 """Return the IP address of the magic hostname 'localhost'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000684 global _localhost
Raymond Hettinger10ff7062002-06-02 03:04:52 +0000685 if _localhost is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000686 _localhost = socket.gethostbyname('localhost')
687 return _localhost
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000688
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000689_thishost = None
690def thishost():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000691 """Return the IP address of the current host."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000692 global _thishost
Raymond Hettinger10ff7062002-06-02 03:04:52 +0000693 if _thishost is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000694 _thishost = socket.gethostbyname(socket.gethostname())
695 return _thishost
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000696
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000697_ftperrors = None
698def ftperrors():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000699 """Return the set of errors raised by the FTP class."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000700 global _ftperrors
Raymond Hettinger10ff7062002-06-02 03:04:52 +0000701 if _ftperrors is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000702 import ftplib
703 _ftperrors = ftplib.all_errors
704 return _ftperrors
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000705
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000706_noheaders = None
707def noheaders():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000708 """Return an empty mimetools.Message object."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000709 global _noheaders
Raymond Hettinger10ff7062002-06-02 03:04:52 +0000710 if _noheaders is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000711 import mimetools
Raymond Hettingera6172712004-12-31 19:15:26 +0000712 try:
713 from cStringIO import StringIO
714 except ImportError:
715 from StringIO import StringIO
716 _noheaders = mimetools.Message(StringIO(), 0)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000717 _noheaders.fp.close() # Recycle file descriptor
718 return _noheaders
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000719
720
721# Utility classes
722
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000723class ftpwrapper:
Guido van Rossume7b146f2000-02-04 15:28:42 +0000724 """Class used by open_ftp() for cache of open FTP connections."""
725
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000726 def __init__(self, user, passwd, host, port, dirs):
727 self.user = user
728 self.passwd = passwd
729 self.host = host
730 self.port = port
731 self.dirs = dirs
732 self.init()
Guido van Rossume7b146f2000-02-04 15:28:42 +0000733
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000734 def init(self):
735 import ftplib
736 self.busy = 0
737 self.ftp = ftplib.FTP()
738 self.ftp.connect(self.host, self.port)
739 self.ftp.login(self.user, self.passwd)
740 for dir in self.dirs:
741 self.ftp.cwd(dir)
Guido van Rossume7b146f2000-02-04 15:28:42 +0000742
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000743 def retrfile(self, file, type):
744 import ftplib
745 self.endtransfer()
746 if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
747 else: cmd = 'TYPE ' + type; isdir = 0
748 try:
749 self.ftp.voidcmd(cmd)
750 except ftplib.all_errors:
751 self.init()
752 self.ftp.voidcmd(cmd)
753 conn = None
754 if file and not isdir:
755 # Use nlst to see if the file exists at all
756 try:
757 self.ftp.nlst(file)
758 except ftplib.error_perm, reason:
759 raise IOError, ('ftp error', reason), sys.exc_info()[2]
760 # Restore the transfer mode!
761 self.ftp.voidcmd(cmd)
762 # Try to retrieve as a file
763 try:
764 cmd = 'RETR ' + file
765 conn = self.ftp.ntransfercmd(cmd)
766 except ftplib.error_perm, reason:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000767 if str(reason)[:3] != '550':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000768 raise IOError, ('ftp error', reason), sys.exc_info()[2]
769 if not conn:
770 # Set transfer mode to ASCII!
771 self.ftp.voidcmd('TYPE A')
772 # Try a directory listing
773 if file: cmd = 'LIST ' + file
774 else: cmd = 'LIST'
775 conn = self.ftp.ntransfercmd(cmd)
776 self.busy = 1
777 # Pass back both a suitably decorated object and a retrieval length
778 return (addclosehook(conn[0].makefile('rb'),
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000779 self.endtransfer), conn[1])
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000780 def endtransfer(self):
781 if not self.busy:
782 return
783 self.busy = 0
784 try:
785 self.ftp.voidresp()
786 except ftperrors():
787 pass
Guido van Rossume7b146f2000-02-04 15:28:42 +0000788
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000789 def close(self):
790 self.endtransfer()
791 try:
792 self.ftp.close()
793 except ftperrors():
794 pass
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000795
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000796class addbase:
Guido van Rossume7b146f2000-02-04 15:28:42 +0000797 """Base class for addinfo and addclosehook."""
798
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000799 def __init__(self, fp):
800 self.fp = fp
801 self.read = self.fp.read
802 self.readline = self.fp.readline
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000803 if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines
804 if hasattr(self.fp, "fileno"): self.fileno = self.fp.fileno
Raymond Hettinger42182eb2003-03-09 05:33:33 +0000805 if hasattr(self.fp, "__iter__"):
806 self.__iter__ = self.fp.__iter__
807 if hasattr(self.fp, "next"):
808 self.next = self.fp.next
Guido van Rossume7b146f2000-02-04 15:28:42 +0000809
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000810 def __repr__(self):
Walter Dörwald70a6b492004-02-12 17:35:32 +0000811 return '<%s at %r whose fp = %r>' % (self.__class__.__name__,
812 id(self), self.fp)
Guido van Rossume7b146f2000-02-04 15:28:42 +0000813
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000814 def close(self):
815 self.read = None
816 self.readline = None
817 self.readlines = None
818 self.fileno = None
819 if self.fp: self.fp.close()
820 self.fp = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000821
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000822class addclosehook(addbase):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000823 """Class to add a close hook to an open file."""
824
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000825 def __init__(self, fp, closehook, *hookargs):
826 addbase.__init__(self, fp)
827 self.closehook = closehook
828 self.hookargs = hookargs
Guido van Rossume7b146f2000-02-04 15:28:42 +0000829
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000830 def close(self):
Guido van Rossumc580dae2000-05-24 13:21:46 +0000831 addbase.close(self)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000832 if self.closehook:
Guido van Rossum68468eb2003-02-27 20:14:51 +0000833 self.closehook(*self.hookargs)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000834 self.closehook = None
835 self.hookargs = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000836
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000837class addinfo(addbase):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000838 """class to add an info() method to an open file."""
839
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000840 def __init__(self, fp, headers):
841 addbase.__init__(self, fp)
842 self.headers = headers
Guido van Rossume7b146f2000-02-04 15:28:42 +0000843
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000844 def info(self):
845 return self.headers
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000846
Guido van Rossume6ad8911996-09-10 17:02:56 +0000847class addinfourl(addbase):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000848 """class to add info() and geturl() methods to an open file."""
849
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000850 def __init__(self, fp, headers, url):
851 addbase.__init__(self, fp)
852 self.headers = headers
853 self.url = url
Guido van Rossume7b146f2000-02-04 15:28:42 +0000854
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000855 def info(self):
856 return self.headers
Guido van Rossume7b146f2000-02-04 15:28:42 +0000857
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000858 def geturl(self):
859 return self.url
Guido van Rossume6ad8911996-09-10 17:02:56 +0000860
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000861
Guido van Rossum7c395db1994-07-04 22:14:49 +0000862# Utilities to parse URLs (most of these return None for missing parts):
Sjoerd Mullendere0371b81995-11-10 10:36:07 +0000863# unwrap('<URL:type://host/path>') --> 'type://host/path'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000864# splittype('type:opaquestring') --> 'type', 'opaquestring'
865# splithost('//host[:port]/path') --> 'host[:port]', '/path'
Guido van Rossum7c395db1994-07-04 22:14:49 +0000866# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'
867# splitpasswd('user:passwd') -> 'user', 'passwd'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000868# splitport('host:port') --> 'host', 'port'
869# splitquery('/path?query') --> '/path', 'query'
870# splittag('/path#tag') --> '/path', 'tag'
Guido van Rossum7c395db1994-07-04 22:14:49 +0000871# splitattr('/path;attr1=value1;attr2=value2;...') ->
872# '/path', ['attr1=value1', 'attr2=value2', ...]
873# splitvalue('attr=value') --> 'attr', 'value'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000874# splitgophertype('/Xselector') --> 'X', 'selector'
875# unquote('abc%20def') -> 'abc def'
876# quote('abc def') -> 'abc%20def')
877
Walter Dörwald65230a22002-06-03 15:58:32 +0000878try:
879 unicode
880except NameError:
Guido van Rossum4b46c0a2002-05-24 17:58:05 +0000881 def _is_unicode(x):
882 return 0
Walter Dörwald65230a22002-06-03 15:58:32 +0000883else:
884 def _is_unicode(x):
885 return isinstance(x, unicode)
Guido van Rossum4b46c0a2002-05-24 17:58:05 +0000886
Martin v. Löwis1d994332000-12-03 18:30:10 +0000887def toBytes(url):
888 """toBytes(u"URL") --> 'URL'."""
889 # Most URL schemes require ASCII. If that changes, the conversion
890 # can be relaxed
Guido van Rossum4b46c0a2002-05-24 17:58:05 +0000891 if _is_unicode(url):
Martin v. Löwis1d994332000-12-03 18:30:10 +0000892 try:
893 url = url.encode("ASCII")
894 except UnicodeError:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000895 raise UnicodeError("URL " + repr(url) +
896 " contains non-ASCII characters")
Martin v. Löwis1d994332000-12-03 18:30:10 +0000897 return url
898
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000899def unwrap(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000900 """unwrap('<URL:type://host/path>') --> 'type://host/path'."""
Guido van Rossumb2493f82000-12-15 15:01:37 +0000901 url = url.strip()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000902 if url[:1] == '<' and url[-1:] == '>':
Guido van Rossumb2493f82000-12-15 15:01:37 +0000903 url = url[1:-1].strip()
904 if url[:4] == 'URL:': url = url[4:].strip()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000905 return url
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000906
Guido van Rossum332e1441997-09-29 23:23:46 +0000907_typeprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000908def splittype(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000909 """splittype('type:opaquestring') --> 'type', 'opaquestring'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000910 global _typeprog
911 if _typeprog is None:
912 import re
913 _typeprog = re.compile('^([^/:]+):')
Guido van Rossum332e1441997-09-29 23:23:46 +0000914
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000915 match = _typeprog.match(url)
916 if match:
917 scheme = match.group(1)
Fred Drake9e94afd2000-07-01 07:03:30 +0000918 return scheme.lower(), url[len(scheme) + 1:]
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000919 return None, url
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000920
Guido van Rossum332e1441997-09-29 23:23:46 +0000921_hostprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000922def splithost(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000923 """splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000924 global _hostprog
925 if _hostprog is None:
926 import re
Guido van Rossum3427c1f1999-07-01 23:20:56 +0000927 _hostprog = re.compile('^//([^/]*)(.*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +0000928
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000929 match = _hostprog.match(url)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000930 if match: return match.group(1, 2)
931 return None, url
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000932
Guido van Rossum332e1441997-09-29 23:23:46 +0000933_userprog = None
Guido van Rossum7c395db1994-07-04 22:14:49 +0000934def splituser(host):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000935 """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000936 global _userprog
937 if _userprog is None:
938 import re
Raymond Hettingerf2e45dd2002-08-18 20:08:56 +0000939 _userprog = re.compile('^(.*)@(.*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +0000940
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000941 match = _userprog.match(host)
Fred Drake567ca8e2000-08-21 21:42:42 +0000942 if match: return map(unquote, match.group(1, 2))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000943 return None, host
Guido van Rossum7c395db1994-07-04 22:14:49 +0000944
Guido van Rossum332e1441997-09-29 23:23:46 +0000945_passwdprog = None
Guido van Rossum7c395db1994-07-04 22:14:49 +0000946def splitpasswd(user):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000947 """splitpasswd('user:passwd') -> 'user', 'passwd'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000948 global _passwdprog
949 if _passwdprog is None:
950 import re
951 _passwdprog = re.compile('^([^:]*):(.*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +0000952
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000953 match = _passwdprog.match(user)
954 if match: return match.group(1, 2)
955 return user, None
Guido van Rossum7c395db1994-07-04 22:14:49 +0000956
Guido van Rossume7b146f2000-02-04 15:28:42 +0000957# splittag('/path#tag') --> '/path', 'tag'
Guido van Rossum332e1441997-09-29 23:23:46 +0000958_portprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000959def splitport(host):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000960 """splitport('host:port') --> 'host', 'port'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000961 global _portprog
962 if _portprog is None:
963 import re
964 _portprog = re.compile('^(.*):([0-9]+)$')
Guido van Rossum332e1441997-09-29 23:23:46 +0000965
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000966 match = _portprog.match(host)
967 if match: return match.group(1, 2)
968 return host, None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000969
Guido van Rossum332e1441997-09-29 23:23:46 +0000970_nportprog = None
Guido van Rossum53725a21996-06-13 19:12:35 +0000971def splitnport(host, defport=-1):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000972 """Split host and port, returning numeric port.
973 Return given default port if no ':' found; defaults to -1.
974 Return numerical port if a valid number are found after ':'.
975 Return None if ':' but not a valid number."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000976 global _nportprog
977 if _nportprog is None:
978 import re
979 _nportprog = re.compile('^(.*):(.*)$')
Guido van Rossum7e7ca0b1998-03-26 21:01:39 +0000980
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000981 match = _nportprog.match(host)
982 if match:
983 host, port = match.group(1, 2)
984 try:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000985 if not port: raise ValueError, "no digits"
986 nport = int(port)
987 except ValueError:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000988 nport = None
989 return host, nport
990 return host, defport
Guido van Rossum53725a21996-06-13 19:12:35 +0000991
Guido van Rossum332e1441997-09-29 23:23:46 +0000992_queryprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000993def splitquery(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000994 """splitquery('/path?query') --> '/path', 'query'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000995 global _queryprog
996 if _queryprog is None:
997 import re
998 _queryprog = re.compile('^(.*)\?([^?]*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +0000999
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001000 match = _queryprog.match(url)
1001 if match: return match.group(1, 2)
1002 return url, None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001003
Guido van Rossum332e1441997-09-29 23:23:46 +00001004_tagprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001005def splittag(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001006 """splittag('/path#tag') --> '/path', 'tag'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001007 global _tagprog
1008 if _tagprog is None:
1009 import re
1010 _tagprog = re.compile('^(.*)#([^#]*)$')
Guido van Rossum7e7ca0b1998-03-26 21:01:39 +00001011
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001012 match = _tagprog.match(url)
1013 if match: return match.group(1, 2)
1014 return url, None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001015
Guido van Rossum7c395db1994-07-04 22:14:49 +00001016def splitattr(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001017 """splitattr('/path;attr1=value1;attr2=value2;...') ->
1018 '/path', ['attr1=value1', 'attr2=value2', ...]."""
Guido van Rossumb2493f82000-12-15 15:01:37 +00001019 words = url.split(';')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001020 return words[0], words[1:]
Guido van Rossum7c395db1994-07-04 22:14:49 +00001021
Guido van Rossum332e1441997-09-29 23:23:46 +00001022_valueprog = None
Guido van Rossum7c395db1994-07-04 22:14:49 +00001023def splitvalue(attr):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001024 """splitvalue('attr=value') --> 'attr', 'value'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001025 global _valueprog
1026 if _valueprog is None:
1027 import re
1028 _valueprog = re.compile('^([^=]*)=(.*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +00001029
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001030 match = _valueprog.match(attr)
1031 if match: return match.group(1, 2)
1032 return attr, None
Guido van Rossum7c395db1994-07-04 22:14:49 +00001033
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001034def splitgophertype(selector):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001035 """splitgophertype('/Xselector') --> 'X', 'selector'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001036 if selector[:1] == '/' and selector[1:2]:
1037 return selector[1], selector[2:]
1038 return None, selector
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001039
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001040def unquote(s):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001041 """unquote('abc%20def') -> 'abc def'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001042 mychr = chr
Guido van Rossumb2493f82000-12-15 15:01:37 +00001043 myatoi = int
1044 list = s.split('%')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001045 res = [list[0]]
1046 myappend = res.append
1047 del list[0]
1048 for item in list:
1049 if item[1:2]:
1050 try:
1051 myappend(mychr(myatoi(item[:2], 16))
1052 + item[2:])
Martin v. Löwis58682b72001-08-11 15:02:57 +00001053 except ValueError:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001054 myappend('%' + item)
1055 else:
1056 myappend('%' + item)
Guido van Rossumb2493f82000-12-15 15:01:37 +00001057 return "".join(res)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001058
Guido van Rossum0564e121996-12-13 14:47:36 +00001059def unquote_plus(s):
Skip Montanaro79f1c172000-08-22 03:00:52 +00001060 """unquote('%7e/abc+def') -> '~/abc def'"""
Brett Cannonaaeffaf2004-03-23 23:50:17 +00001061 s = s.replace('+', ' ')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001062 return unquote(s)
Guido van Rossum0564e121996-12-13 14:47:36 +00001063
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001064always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
Jeremy Hylton6102e292000-08-31 15:48:10 +00001065 'abcdefghijklmnopqrstuvwxyz'
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001066 '0123456789' '_.-')
1067
1068_fast_safe_test = always_safe + '/'
1069_fast_safe = None
1070
1071def _fast_quote(s):
1072 global _fast_safe
1073 if _fast_safe is None:
1074 _fast_safe = {}
1075 for c in _fast_safe_test:
1076 _fast_safe[c] = c
1077 res = list(s)
1078 for i in range(len(res)):
1079 c = res[i]
Raymond Hettinger54f02222002-06-01 14:18:47 +00001080 if not c in _fast_safe:
Guido van Rossume27a7b82001-01-19 03:28:15 +00001081 res[i] = '%%%02X' % ord(c)
Guido van Rossumb2493f82000-12-15 15:01:37 +00001082 return ''.join(res)
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001083
Guido van Rossum7c395db1994-07-04 22:14:49 +00001084def quote(s, safe = '/'):
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001085 """quote('abc def') -> 'abc%20def'
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001086
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001087 Each part of a URL, e.g. the path info, the query, etc., has a
1088 different set of reserved characters that must be quoted.
1089
1090 RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
1091 the following reserved characters.
1092
1093 reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
1094 "$" | ","
1095
1096 Each of these characters is reserved in some component of a URL,
1097 but not necessarily in all of them.
1098
1099 By default, the quote function is intended for quoting the path
1100 section of a URL. Thus, it will not encode '/'. This character
1101 is reserved, but in typical usage the quote function is being
1102 called on a path where the existing slash characters are used as
1103 reserved characters.
1104 """
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001105 safe = always_safe + safe
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001106 if _fast_safe_test == safe:
1107 return _fast_quote(s)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001108 res = list(s)
1109 for i in range(len(res)):
1110 c = res[i]
1111 if c not in safe:
Guido van Rossume27a7b82001-01-19 03:28:15 +00001112 res[i] = '%%%02X' % ord(c)
Guido van Rossumb2493f82000-12-15 15:01:37 +00001113 return ''.join(res)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001114
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001115def quote_plus(s, safe = ''):
1116 """Quote the query fragment of a URL; replacing ' ' with '+'"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001117 if ' ' in s:
Guido van Rossumb2493f82000-12-15 15:01:37 +00001118 l = s.split(' ')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001119 for i in range(len(l)):
1120 l[i] = quote(l[i], safe)
Guido van Rossumb2493f82000-12-15 15:01:37 +00001121 return '+'.join(l)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001122 else:
1123 return quote(s, safe)
Guido van Rossum0564e121996-12-13 14:47:36 +00001124
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001125def urlencode(query,doseq=0):
1126 """Encode a sequence of two-element tuples or dictionary into a URL query string.
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001127
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001128 If any values in the query arg are sequences and doseq is true, each
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001129 sequence element is converted to a separate parameter.
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001130
1131 If the query arg is a sequence of two-element tuples, the order of the
1132 parameters in the output will match the order of parameters in the
1133 input.
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001134 """
Tim Peters658cba62001-02-09 20:06:00 +00001135
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001136 if hasattr(query,"items"):
1137 # mapping objects
1138 query = query.items()
1139 else:
1140 # it's a bother at times that strings and string-like objects are
1141 # sequences...
1142 try:
1143 # non-sequence items should not work with len()
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001144 # non-empty strings will fail this
Walter Dörwald65230a22002-06-03 15:58:32 +00001145 if len(query) and not isinstance(query[0], tuple):
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001146 raise TypeError
1147 # zero-length sequences of all types will get here and succeed,
1148 # but that's a minor nit - since the original implementation
1149 # allowed empty dicts that type of behavior probably should be
1150 # preserved for consistency
1151 except TypeError:
1152 ty,va,tb = sys.exc_info()
1153 raise TypeError, "not a valid non-string sequence or mapping object", tb
1154
Guido van Rossume7b146f2000-02-04 15:28:42 +00001155 l = []
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001156 if not doseq:
1157 # preserve old behavior
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001158 for k, v in query:
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001159 k = quote_plus(str(k))
1160 v = quote_plus(str(v))
1161 l.append(k + '=' + v)
1162 else:
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001163 for k, v in query:
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001164 k = quote_plus(str(k))
Walter Dörwald65230a22002-06-03 15:58:32 +00001165 if isinstance(v, str):
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001166 v = quote_plus(v)
1167 l.append(k + '=' + v)
Guido van Rossum4b46c0a2002-05-24 17:58:05 +00001168 elif _is_unicode(v):
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001169 # is there a reasonable way to convert to ASCII?
1170 # encode generates a string, but "replace" or "ignore"
1171 # lose information and "strict" can raise UnicodeError
1172 v = quote_plus(v.encode("ASCII","replace"))
1173 l.append(k + '=' + v)
1174 else:
1175 try:
1176 # is this a sufficient test for sequence-ness?
1177 x = len(v)
1178 except TypeError:
1179 # not a sequence
1180 v = quote_plus(str(v))
1181 l.append(k + '=' + v)
1182 else:
1183 # loop over the sequence
1184 for elt in v:
1185 l.append(k + '=' + quote_plus(str(elt)))
Guido van Rossumb2493f82000-12-15 15:01:37 +00001186 return '&'.join(l)
Guido van Rossum810a3391998-07-22 21:33:23 +00001187
Guido van Rossum442e7201996-03-20 15:33:11 +00001188# Proxy handling
Mark Hammond4f570b92000-07-26 07:04:38 +00001189def getproxies_environment():
1190 """Return a dictionary of scheme -> proxy server URL mappings.
1191
1192 Scan the environment for variables named <scheme>_proxy;
1193 this seems to be the standard convention. If you need a
1194 different way, you can pass a proxies dictionary to the
1195 [Fancy]URLopener constructor.
1196
1197 """
1198 proxies = {}
1199 for name, value in os.environ.items():
Guido van Rossumb2493f82000-12-15 15:01:37 +00001200 name = name.lower()
Mark Hammond4f570b92000-07-26 07:04:38 +00001201 if value and name[-6:] == '_proxy':
1202 proxies[name[:-6]] = value
1203 return proxies
1204
Jack Jansen11d9b062004-07-16 11:45:00 +00001205if sys.platform == 'darwin':
1206 def getproxies_internetconfig():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001207 """Return a dictionary of scheme -> proxy server URL mappings.
Guido van Rossum442e7201996-03-20 15:33:11 +00001208
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001209 By convention the mac uses Internet Config to store
1210 proxies. An HTTP proxy, for instance, is stored under
1211 the HttpProxy key.
Guido van Rossum442e7201996-03-20 15:33:11 +00001212
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001213 """
1214 try:
1215 import ic
1216 except ImportError:
1217 return {}
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001218
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001219 try:
1220 config = ic.IC()
1221 except ic.error:
1222 return {}
1223 proxies = {}
1224 # HTTP:
Raymond Hettinger54f02222002-06-01 14:18:47 +00001225 if 'UseHTTPProxy' in config and config['UseHTTPProxy']:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001226 try:
1227 value = config['HTTPProxyHost']
1228 except ic.error:
1229 pass
1230 else:
1231 proxies['http'] = 'http://%s' % value
1232 # FTP: XXXX To be done.
1233 # Gopher: XXXX To be done.
1234 return proxies
Mark Hammond4f570b92000-07-26 07:04:38 +00001235
Tim Peters55c12d42001-08-09 18:04:14 +00001236 def proxy_bypass(x):
1237 return 0
1238
Jack Jansen11d9b062004-07-16 11:45:00 +00001239 def getproxies():
1240 return getproxies_environment() or getproxies_internetconfig()
Tim Peters182b5ac2004-07-18 06:16:08 +00001241
Mark Hammond4f570b92000-07-26 07:04:38 +00001242elif os.name == 'nt':
1243 def getproxies_registry():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001244 """Return a dictionary of scheme -> proxy server URL mappings.
Mark Hammond4f570b92000-07-26 07:04:38 +00001245
1246 Win32 uses the registry to store proxies.
1247
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001248 """
1249 proxies = {}
Mark Hammond4f570b92000-07-26 07:04:38 +00001250 try:
1251 import _winreg
1252 except ImportError:
1253 # Std module, so should be around - but you never know!
1254 return proxies
1255 try:
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001256 internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1257 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
Mark Hammond4f570b92000-07-26 07:04:38 +00001258 proxyEnable = _winreg.QueryValueEx(internetSettings,
1259 'ProxyEnable')[0]
1260 if proxyEnable:
1261 # Returned as Unicode but problems if not converted to ASCII
1262 proxyServer = str(_winreg.QueryValueEx(internetSettings,
1263 'ProxyServer')[0])
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001264 if '=' in proxyServer:
1265 # Per-protocol settings
Mark Hammond4f570b92000-07-26 07:04:38 +00001266 for p in proxyServer.split(';'):
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001267 protocol, address = p.split('=', 1)
Guido van Rossumb955d6c2002-03-31 23:38:48 +00001268 # See if address has a type:// prefix
Guido van Rossum64e5aa92002-04-02 14:38:16 +00001269 import re
1270 if not re.match('^([^/:]+)://', address):
Guido van Rossumb955d6c2002-03-31 23:38:48 +00001271 address = '%s://%s' % (protocol, address)
1272 proxies[protocol] = address
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001273 else:
1274 # Use one setting for all protocols
1275 if proxyServer[:5] == 'http:':
1276 proxies['http'] = proxyServer
1277 else:
1278 proxies['http'] = 'http://%s' % proxyServer
1279 proxies['ftp'] = 'ftp://%s' % proxyServer
Mark Hammond4f570b92000-07-26 07:04:38 +00001280 internetSettings.Close()
1281 except (WindowsError, ValueError, TypeError):
1282 # Either registry key not found etc, or the value in an
1283 # unexpected format.
1284 # proxies already set up to be empty so nothing to do
1285 pass
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001286 return proxies
Guido van Rossum442e7201996-03-20 15:33:11 +00001287
Mark Hammond4f570b92000-07-26 07:04:38 +00001288 def getproxies():
1289 """Return a dictionary of scheme -> proxy server URL mappings.
1290
1291 Returns settings gathered from the environment, if specified,
1292 or the registry.
1293
1294 """
1295 return getproxies_environment() or getproxies_registry()
Tim Peters55c12d42001-08-09 18:04:14 +00001296
1297 def proxy_bypass(host):
1298 try:
1299 import _winreg
1300 import re
Tim Peters55c12d42001-08-09 18:04:14 +00001301 except ImportError:
1302 # Std modules, so should be around - but you never know!
1303 return 0
1304 try:
1305 internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1306 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1307 proxyEnable = _winreg.QueryValueEx(internetSettings,
1308 'ProxyEnable')[0]
1309 proxyOverride = str(_winreg.QueryValueEx(internetSettings,
1310 'ProxyOverride')[0])
1311 # ^^^^ Returned as Unicode but problems if not converted to ASCII
1312 except WindowsError:
1313 return 0
1314 if not proxyEnable or not proxyOverride:
1315 return 0
1316 # try to make a host list from name and IP address.
1317 host = [host]
1318 try:
1319 addr = socket.gethostbyname(host[0])
1320 if addr != host:
1321 host.append(addr)
1322 except socket.error:
1323 pass
1324 # make a check value list from the registry entry: replace the
1325 # '<local>' string by the localhost entry and the corresponding
1326 # canonical entry.
1327 proxyOverride = proxyOverride.split(';')
1328 i = 0
1329 while i < len(proxyOverride):
1330 if proxyOverride[i] == '<local>':
1331 proxyOverride[i:i+1] = ['localhost',
1332 '127.0.0.1',
1333 socket.gethostname(),
1334 socket.gethostbyname(
1335 socket.gethostname())]
1336 i += 1
1337 # print proxyOverride
1338 # now check if we match one of the registry values.
1339 for test in proxyOverride:
Tim Petersab9ba272001-08-09 21:40:30 +00001340 test = test.replace(".", r"\.") # mask dots
1341 test = test.replace("*", r".*") # change glob sequence
1342 test = test.replace("?", r".") # change glob char
Tim Peters55c12d42001-08-09 18:04:14 +00001343 for val in host:
1344 # print "%s <--> %s" %( test, val )
1345 if re.match(test, val, re.I):
1346 return 1
1347 return 0
1348
Mark Hammond4f570b92000-07-26 07:04:38 +00001349else:
1350 # By default use environment variables
1351 getproxies = getproxies_environment
1352
Tim Peters55c12d42001-08-09 18:04:14 +00001353 def proxy_bypass(host):
1354 return 0
Guido van Rossum442e7201996-03-20 15:33:11 +00001355
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001356# Test and time quote() and unquote()
1357def test1():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001358 s = ''
1359 for i in range(256): s = s + chr(i)
1360 s = s*4
1361 t0 = time.time()
1362 qs = quote(s)
1363 uqs = unquote(qs)
1364 t1 = time.time()
1365 if uqs != s:
1366 print 'Wrong!'
Walter Dörwald70a6b492004-02-12 17:35:32 +00001367 print repr(s)
1368 print repr(qs)
1369 print repr(uqs)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001370 print round(t1 - t0, 3), 'sec'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001371
1372
Guido van Rossum9ab96d41998-09-28 14:07:00 +00001373def reporthook(blocknum, blocksize, totalsize):
1374 # Report during remote transfers
Guido van Rossumb2493f82000-12-15 15:01:37 +00001375 print "Block number: %d, Block size: %d, Total size: %d" % (
1376 blocknum, blocksize, totalsize)
Guido van Rossum9ab96d41998-09-28 14:07:00 +00001377
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001378# Test program
Guido van Rossum23490151998-06-25 02:39:00 +00001379def test(args=[]):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001380 if not args:
1381 args = [
1382 '/etc/passwd',
1383 'file:/etc/passwd',
1384 'file://localhost/etc/passwd',
Andrew M. Kuchling56a42352002-03-18 22:18:46 +00001385 'ftp://ftp.python.org/pub/python/README',
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001386## 'gopher://gopher.micro.umn.edu/1/',
1387 'http://www.python.org/index.html',
1388 ]
Guido van Rossum09c8b6c1999-12-07 21:37:17 +00001389 if hasattr(URLopener, "open_https"):
1390 args.append('https://synergy.as.cmu.edu/~geek/')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001391 try:
1392 for url in args:
1393 print '-'*10, url, '-'*10
1394 fn, h = urlretrieve(url, None, reporthook)
Guido van Rossumb2493f82000-12-15 15:01:37 +00001395 print fn
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001396 if h:
1397 print '======'
1398 for k in h.keys(): print k + ':', h[k]
1399 print '======'
1400 fp = open(fn, 'rb')
1401 data = fp.read()
1402 del fp
1403 if '\r' in data:
1404 table = string.maketrans("", "")
Guido van Rossumb2493f82000-12-15 15:01:37 +00001405 data = data.translate(table, "\r")
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001406 print data
1407 fn, h = None, None
1408 print '-'*40
1409 finally:
1410 urlcleanup()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001411
Guido van Rossum23490151998-06-25 02:39:00 +00001412def main():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001413 import getopt, sys
1414 try:
1415 opts, args = getopt.getopt(sys.argv[1:], "th")
1416 except getopt.error, msg:
1417 print msg
1418 print "Use -h for help"
1419 return
1420 t = 0
1421 for o, a in opts:
1422 if o == '-t':
1423 t = t + 1
1424 if o == '-h':
1425 print "Usage: python urllib.py [-t] [url ...]"
1426 print "-t runs self-test;",
1427 print "otherwise, contents of urls are printed"
1428 return
1429 if t:
1430 if t > 1:
1431 test1()
1432 test(args)
1433 else:
1434 if not args:
1435 print "Use -h for help"
1436 for url in args:
1437 print urlopen(url).read(),
Guido van Rossum23490151998-06-25 02:39:00 +00001438
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001439# Run test program when run as a script
1440if __name__ == '__main__':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001441 main()