blob: e2f01c59c4ed3a9e0100a9a60e40f04b2110ad94 [file] [log] [blame]
Guido van Rossume7b146f2000-02-04 15:28:42 +00001"""Open an arbitrary URL.
2
3See the following document for more info on URLs:
4"Names and Addresses, URIs, URLs, URNs, URCs", at
5http://www.w3.org/pub/WWW/Addressing/Overview.html
6
7See also the HTTP spec (from which the error codes are derived):
8"HTTP - Hypertext Transfer Protocol", at
9http://www.w3.org/pub/WWW/Protocols/
10
11Related standards and specs:
12- RFC1808: the "relative URL" spec. (authoritative status)
13- RFC1738 - the "URL standard". (authoritative status)
14- RFC1630 - the "URI spec". (informational status)
15
16The object returned by URLopener().open(file) will differ per
17protocol. All you know is that is has methods read(), readline(),
18readlines(), fileno(), close() and info(). The read*(), fileno()
Fredrik Lundhb49f88b2000-09-24 18:51:25 +000019and close() methods work like those of open files.
Guido van Rossume7b146f2000-02-04 15:28:42 +000020The info() method returns a mimetools.Message object which can be
21used to query various info about the object, if available.
22(mimetools.Message objects are queried with the getheader() method.)
23"""
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000024
Guido van Rossum7c395db1994-07-04 22:14:49 +000025import string
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000026import socket
Jack Jansendc3e3f61995-12-15 13:22:13 +000027import os
Guido van Rossumf0713d32001-08-09 17:43:35 +000028import time
Guido van Rossum3c8484e1996-11-20 22:02:24 +000029import sys
Brett Cannon69200fa2004-03-23 21:26:39 +000030from urlparse import urljoin as basejoin
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000031
Skip Montanaro40fc1602001-03-01 04:27:19 +000032__all__ = ["urlopen", "URLopener", "FancyURLopener", "urlretrieve",
33 "urlcleanup", "quote", "quote_plus", "unquote", "unquote_plus",
Skip Montanaro44d5e0c2001-03-13 19:47:16 +000034 "urlencode", "url2pathname", "pathname2url", "splittag",
35 "localhost", "thishost", "ftperrors", "basejoin", "unwrap",
36 "splittype", "splithost", "splituser", "splitpasswd", "splitport",
37 "splitnport", "splitquery", "splitattr", "splitvalue",
38 "splitgophertype", "getproxies"]
Skip Montanaro40fc1602001-03-01 04:27:19 +000039
Brett Cannon69200fa2004-03-23 21:26:39 +000040__version__ = '1.16' # XXX This version is not always updated :-(
Guido van Rossumf668d171997-06-06 21:11:11 +000041
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000042MAXFTPCACHE = 10 # Trim the ftp cache beyond this size
Guido van Rossum6cb15a01995-06-22 19:00:13 +000043
Jack Jansendc3e3f61995-12-15 13:22:13 +000044# Helper for non-unix systems
45if os.name == 'mac':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000046 from macurl2path import url2pathname, pathname2url
Guido van Rossum7e7ca0b1998-03-26 21:01:39 +000047elif os.name == 'nt':
Fredrik Lundhb49f88b2000-09-24 18:51:25 +000048 from nturl2path import url2pathname, pathname2url
Guido van Rossumd74fb6b2001-03-02 06:43:49 +000049elif os.name == 'riscos':
50 from rourl2path import url2pathname, pathname2url
Jack Jansendc3e3f61995-12-15 13:22:13 +000051else:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000052 def url2pathname(pathname):
Guido van Rossum367ac801999-03-12 14:31:10 +000053 return unquote(pathname)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000054 def pathname2url(pathname):
Guido van Rossum367ac801999-03-12 14:31:10 +000055 return quote(pathname)
Guido van Rossum33add0a1998-12-18 15:25:22 +000056
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000057# This really consists of two pieces:
58# (1) a class which handles opening of all sorts of URLs
59# (plus assorted utilities etc.)
60# (2) a set of functions for parsing URLs
61# XXX Should these be separated out into different modules?
62
63
64# Shortcut for basic usage
65_urlopener = None
Fred Drakedf6eca72002-04-04 20:41:34 +000066def urlopen(url, data=None, proxies=None):
Skip Montanaro79f1c172000-08-22 03:00:52 +000067 """urlopen(url [, data]) -> open file-like object"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000068 global _urlopener
Fred Drakedf6eca72002-04-04 20:41:34 +000069 if proxies is not None:
70 opener = FancyURLopener(proxies=proxies)
71 elif not _urlopener:
72 opener = FancyURLopener()
73 _urlopener = opener
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000074 else:
Fred Drakedf6eca72002-04-04 20:41:34 +000075 opener = _urlopener
76 if data is None:
77 return opener.open(url)
78 else:
79 return opener.open(url, data)
Fred Drake316a7932000-08-24 01:01:26 +000080def urlretrieve(url, filename=None, reporthook=None, data=None):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000081 global _urlopener
82 if not _urlopener:
83 _urlopener = FancyURLopener()
Fred Drake316a7932000-08-24 01:01:26 +000084 return _urlopener.retrieve(url, filename, reporthook, data)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000085def urlcleanup():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000086 if _urlopener:
87 _urlopener.cleanup()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000088
89
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000090ftpcache = {}
91class URLopener:
Guido van Rossume7b146f2000-02-04 15:28:42 +000092 """Class to open URLs.
93 This is a class rather than just a subroutine because we may need
94 more than one set of global protocol-specific options.
95 Note -- this is a base class for those who don't want the
96 automatic handling of errors type 302 (relocated) and 401
97 (authorization needed)."""
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000098
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000099 __tempfiles = None
Guido van Rossum29e77811996-11-27 19:39:58 +0000100
Guido van Rossumba311382000-08-24 16:18:04 +0000101 version = "Python-urllib/%s" % __version__
102
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000103 # Constructor
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000104 def __init__(self, proxies=None, **x509):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000105 if proxies is None:
106 proxies = getproxies()
107 assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
108 self.proxies = proxies
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000109 self.key_file = x509.get('key_file')
110 self.cert_file = x509.get('cert_file')
Guido van Rossumba311382000-08-24 16:18:04 +0000111 self.addheaders = [('User-agent', self.version)]
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000112 self.__tempfiles = []
113 self.__unlink = os.unlink # See cleanup()
114 self.tempcache = None
115 # Undocumented feature: if you assign {} to tempcache,
116 # it is used to cache files retrieved with
117 # self.retrieve(). This is not enabled by default
118 # since it does not work for changing documents (and I
119 # haven't got the logic to check expiration headers
120 # yet).
121 self.ftpcache = ftpcache
122 # Undocumented feature: you can use a different
123 # ftp cache by assigning to the .ftpcache member;
124 # in case you want logically independent URL openers
125 # XXX This is not threadsafe. Bah.
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000126
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000127 def __del__(self):
128 self.close()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000129
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000130 def close(self):
131 self.cleanup()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000132
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000133 def cleanup(self):
134 # This code sometimes runs when the rest of this module
135 # has already been deleted, so it can't use any globals
136 # or import anything.
137 if self.__tempfiles:
138 for file in self.__tempfiles:
139 try:
140 self.__unlink(file)
Martin v. Löwis58682b72001-08-11 15:02:57 +0000141 except OSError:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000142 pass
143 del self.__tempfiles[:]
144 if self.tempcache:
145 self.tempcache.clear()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000146
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000147 def addheader(self, *args):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000148 """Add a header to be used by the HTTP interface only
149 e.g. u.addheader('Accept', 'sound/basic')"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000150 self.addheaders.append(args)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000151
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000152 # External interface
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000153 def open(self, fullurl, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000154 """Use URLopener().open(file) instead of open(file, 'r')."""
Martin v. Löwis1d994332000-12-03 18:30:10 +0000155 fullurl = unwrap(toBytes(fullurl))
Raymond Hettinger54f02222002-06-01 14:18:47 +0000156 if self.tempcache and fullurl in self.tempcache:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000157 filename, headers = self.tempcache[fullurl]
158 fp = open(filename, 'rb')
159 return addinfourl(fp, headers, fullurl)
Martin v. Löwis1d994332000-12-03 18:30:10 +0000160 urltype, url = splittype(fullurl)
161 if not urltype:
162 urltype = 'file'
Raymond Hettinger54f02222002-06-01 14:18:47 +0000163 if urltype in self.proxies:
Martin v. Löwis1d994332000-12-03 18:30:10 +0000164 proxy = self.proxies[urltype]
165 urltype, proxyhost = splittype(proxy)
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000166 host, selector = splithost(proxyhost)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000167 url = (host, fullurl) # Signal special case to open_*()
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000168 else:
169 proxy = None
Martin v. Löwis1d994332000-12-03 18:30:10 +0000170 name = 'open_' + urltype
171 self.type = urltype
Brett Cannonaaeffaf2004-03-23 23:50:17 +0000172 name = name.replace('-', '_')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000173 if not hasattr(self, name):
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000174 if proxy:
175 return self.open_unknown_proxy(proxy, fullurl, data)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000176 else:
177 return self.open_unknown(fullurl, data)
178 try:
179 if data is None:
180 return getattr(self, name)(url)
181 else:
182 return getattr(self, name)(url, data)
183 except socket.error, msg:
184 raise IOError, ('socket error', msg), sys.exc_info()[2]
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000185
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000186 def open_unknown(self, fullurl, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000187 """Overridable interface to open unknown URL type."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000188 type, url = splittype(fullurl)
189 raise IOError, ('url error', 'unknown url type', type)
Guido van Rossumca445401995-08-29 19:19:12 +0000190
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000191 def open_unknown_proxy(self, proxy, fullurl, data=None):
192 """Overridable interface to open unknown URL type."""
193 type, url = splittype(fullurl)
194 raise IOError, ('url error', 'invalid proxy for %s' % type, proxy)
195
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000196 # External interface
Sjoerd Mullenderd7b86f02000-08-25 11:23:36 +0000197 def retrieve(self, url, filename=None, reporthook=None, data=None):
Brett Cannon7d618c72003-04-24 02:43:20 +0000198 """retrieve(url) returns (filename, headers) for a local object
Guido van Rossume7b146f2000-02-04 15:28:42 +0000199 or (tempfilename, headers) for a remote object."""
Martin v. Löwis1d994332000-12-03 18:30:10 +0000200 url = unwrap(toBytes(url))
Raymond Hettinger54f02222002-06-01 14:18:47 +0000201 if self.tempcache and url in self.tempcache:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000202 return self.tempcache[url]
203 type, url1 = splittype(url)
Raymond Hettinger10ff7062002-06-02 03:04:52 +0000204 if filename is None and (not type or type == 'file'):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000205 try:
206 fp = self.open_local_file(url1)
207 hdrs = fp.info()
208 del fp
209 return url2pathname(splithost(url1)[1]), hdrs
210 except IOError, msg:
211 pass
Fred Drake316a7932000-08-24 01:01:26 +0000212 fp = self.open(url, data)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000213 headers = fp.info()
Guido van Rossum3b0a3292002-08-09 16:38:32 +0000214 if filename:
215 tfp = open(filename, 'wb')
216 else:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000217 import tempfile
218 garbage, path = splittype(url)
219 garbage, path = splithost(path or "")
220 path, garbage = splitquery(path or "")
221 path, garbage = splitattr(path or "")
222 suffix = os.path.splitext(path)[1]
Guido van Rossum3b0a3292002-08-09 16:38:32 +0000223 (fd, filename) = tempfile.mkstemp(suffix)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000224 self.__tempfiles.append(filename)
Jeremy Hylton3bd6fde2002-10-11 14:36:24 +0000225 tfp = os.fdopen(fd, 'wb')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000226 result = filename, headers
227 if self.tempcache is not None:
228 self.tempcache[url] = result
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000229 bs = 1024*8
230 size = -1
231 blocknum = 1
232 if reporthook:
Raymond Hettinger54f02222002-06-01 14:18:47 +0000233 if "content-length" in headers:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000234 size = int(headers["Content-Length"])
235 reporthook(0, bs, size)
236 block = fp.read(bs)
237 if reporthook:
238 reporthook(1, bs, size)
239 while block:
240 tfp.write(block)
241 block = fp.read(bs)
242 blocknum = blocknum + 1
243 if reporthook:
244 reporthook(blocknum, bs, size)
245 fp.close()
246 tfp.close()
247 del fp
248 del tfp
249 return result
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000250
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000251 # Each method named open_<type> knows how to open that type of URL
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000252
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000253 def open_http(self, url, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000254 """Use HTTP protocol."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000255 import httplib
256 user_passwd = None
Walter Dörwald65230a22002-06-03 15:58:32 +0000257 if isinstance(url, str):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000258 host, selector = splithost(url)
259 if host:
260 user_passwd, host = splituser(host)
261 host = unquote(host)
262 realhost = host
263 else:
264 host, selector = url
265 urltype, rest = splittype(selector)
266 url = rest
267 user_passwd = None
Guido van Rossumb2493f82000-12-15 15:01:37 +0000268 if urltype.lower() != 'http':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000269 realhost = None
270 else:
271 realhost, rest = splithost(rest)
272 if realhost:
273 user_passwd, realhost = splituser(realhost)
274 if user_passwd:
275 selector = "%s://%s%s" % (urltype, realhost, rest)
Tim Peters55c12d42001-08-09 18:04:14 +0000276 if proxy_bypass(realhost):
277 host = realhost
278
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000279 #print "proxy via http:", host, selector
280 if not host: raise IOError, ('http error', 'no host given')
281 if user_passwd:
282 import base64
Guido van Rossumb2493f82000-12-15 15:01:37 +0000283 auth = base64.encodestring(user_passwd).strip()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000284 else:
285 auth = None
286 h = httplib.HTTP(host)
287 if data is not None:
288 h.putrequest('POST', selector)
289 h.putheader('Content-type', 'application/x-www-form-urlencoded')
290 h.putheader('Content-length', '%d' % len(data))
291 else:
292 h.putrequest('GET', selector)
293 if auth: h.putheader('Authorization', 'Basic %s' % auth)
294 if realhost: h.putheader('Host', realhost)
Guido van Rossum68468eb2003-02-27 20:14:51 +0000295 for args in self.addheaders: h.putheader(*args)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000296 h.endheaders()
297 if data is not None:
Fred Drakeec3dfde2001-07-04 05:18:29 +0000298 h.send(data)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000299 errcode, errmsg, headers = h.getreply()
300 fp = h.getfile()
301 if errcode == 200:
302 return addinfourl(fp, headers, "http:" + url)
303 else:
304 if data is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000305 return self.http_error(url, fp, errcode, errmsg, headers)
Guido van Rossum29aab751999-03-09 19:31:21 +0000306 else:
307 return self.http_error(url, fp, errcode, errmsg, headers, data)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000308
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000309 def http_error(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000310 """Handle http errors.
311 Derived class can override this, or provide specific handlers
312 named http_error_DDD where DDD is the 3-digit error code."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000313 # First check if there's a specific handler for this error
314 name = 'http_error_%d' % errcode
315 if hasattr(self, name):
316 method = getattr(self, name)
317 if data is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000318 result = method(url, fp, errcode, errmsg, headers)
Jeremy Hyltonb30f52a1999-02-25 16:14:58 +0000319 else:
320 result = method(url, fp, errcode, errmsg, headers, data)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000321 if result: return result
Jeremy Hyltonb30f52a1999-02-25 16:14:58 +0000322 return self.http_error_default(url, fp, errcode, errmsg, headers)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000323
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000324 def http_error_default(self, url, fp, errcode, errmsg, headers):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000325 """Default error handler: close the connection and raise IOError."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000326 void = fp.read()
327 fp.close()
328 raise IOError, ('http error', errcode, errmsg, headers)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000329
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000330 if hasattr(socket, "ssl"):
Andrew M. Kuchling141e9892000-04-23 02:53:11 +0000331 def open_https(self, url, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000332 """Use HTTPS protocol."""
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000333 import httplib
Fred Drake567ca8e2000-08-21 21:42:42 +0000334 user_passwd = None
Walter Dörwald65230a22002-06-03 15:58:32 +0000335 if isinstance(url, str):
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000336 host, selector = splithost(url)
Fred Drake567ca8e2000-08-21 21:42:42 +0000337 if host:
338 user_passwd, host = splituser(host)
339 host = unquote(host)
340 realhost = host
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000341 else:
342 host, selector = url
343 urltype, rest = splittype(selector)
Fred Drake567ca8e2000-08-21 21:42:42 +0000344 url = rest
345 user_passwd = None
Guido van Rossumb2493f82000-12-15 15:01:37 +0000346 if urltype.lower() != 'https':
Fred Drake567ca8e2000-08-21 21:42:42 +0000347 realhost = None
348 else:
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000349 realhost, rest = splithost(rest)
Fred Drake567ca8e2000-08-21 21:42:42 +0000350 if realhost:
351 user_passwd, realhost = splituser(realhost)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000352 if user_passwd:
353 selector = "%s://%s%s" % (urltype, realhost, rest)
Andrew M. Kuchling7ad47922000-06-10 01:41:48 +0000354 #print "proxy via https:", host, selector
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000355 if not host: raise IOError, ('https error', 'no host given')
356 if user_passwd:
357 import base64
Guido van Rossumb2493f82000-12-15 15:01:37 +0000358 auth = base64.encodestring(user_passwd).strip()
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000359 else:
360 auth = None
361 h = httplib.HTTPS(host, 0,
362 key_file=self.key_file,
363 cert_file=self.cert_file)
Andrew M. Kuchling141e9892000-04-23 02:53:11 +0000364 if data is not None:
365 h.putrequest('POST', selector)
366 h.putheader('Content-type',
367 'application/x-www-form-urlencoded')
368 h.putheader('Content-length', '%d' % len(data))
369 else:
370 h.putrequest('GET', selector)
Andrew M. Kuchlingff638ea2003-08-29 18:12:23 +0000371 if auth: h.putheader('Authorization', 'Basic %s' % auth)
Fred Drake567ca8e2000-08-21 21:42:42 +0000372 if realhost: h.putheader('Host', realhost)
Guido van Rossum68468eb2003-02-27 20:14:51 +0000373 for args in self.addheaders: h.putheader(*args)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000374 h.endheaders()
Andrew M. Kuchling43c5af02000-04-24 14:17:06 +0000375 if data is not None:
Fred Drakeec3dfde2001-07-04 05:18:29 +0000376 h.send(data)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000377 errcode, errmsg, headers = h.getreply()
378 fp = h.getfile()
379 if errcode == 200:
Guido van Rossumb931bf32001-12-08 17:09:07 +0000380 return addinfourl(fp, headers, "https:" + url)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000381 else:
Fred Drake567ca8e2000-08-21 21:42:42 +0000382 if data is None:
383 return self.http_error(url, fp, errcode, errmsg, headers)
384 else:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000385 return self.http_error(url, fp, errcode, errmsg, headers,
386 data)
Fred Drake567ca8e2000-08-21 21:42:42 +0000387
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000388 def open_gopher(self, url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000389 """Use Gopher protocol."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000390 import gopherlib
391 host, selector = splithost(url)
392 if not host: raise IOError, ('gopher error', 'no host given')
393 host = unquote(host)
394 type, selector = splitgophertype(selector)
395 selector, query = splitquery(selector)
396 selector = unquote(selector)
397 if query:
398 query = unquote(query)
399 fp = gopherlib.send_query(selector, query, host)
400 else:
401 fp = gopherlib.send_selector(selector, host)
402 return addinfourl(fp, noheaders(), "gopher:" + url)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000403
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000404 def open_file(self, url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000405 """Use local file or FTP depending on form of URL."""
Jack Jansen4ef11032002-09-12 20:14:04 +0000406 if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000407 return self.open_ftp(url)
408 else:
409 return self.open_local_file(url)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000410
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000411 def open_local_file(self, url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000412 """Use local file."""
Anthony Baxter3dd9e462004-10-11 13:53:08 +0000413 import mimetypes, mimetools, email.Utils, StringIO
Guido van Rossumf0713d32001-08-09 17:43:35 +0000414 host, file = splithost(url)
415 localname = url2pathname(file)
Guido van Rossuma2da3052002-04-15 00:25:01 +0000416 try:
417 stats = os.stat(localname)
418 except OSError, e:
419 raise IOError(e.errno, e.strerror, e.filename)
Walter Dörwald92b48b72002-03-22 17:30:38 +0000420 size = stats.st_size
Anthony Baxter3dd9e462004-10-11 13:53:08 +0000421 modified = email.Utils.formatdate(stats.st_mtime, usegmt=True)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000422 mtype = mimetypes.guess_type(url)[0]
423 headers = mimetools.Message(StringIO.StringIO(
Guido van Rossumf0713d32001-08-09 17:43:35 +0000424 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
425 (mtype or 'text/plain', size, modified)))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000426 if not host:
Guido van Rossum336a2011999-06-24 15:27:36 +0000427 urlfile = file
428 if file[:1] == '/':
429 urlfile = 'file://' + file
Guido van Rossumf0713d32001-08-09 17:43:35 +0000430 return addinfourl(open(localname, 'rb'),
Guido van Rossum336a2011999-06-24 15:27:36 +0000431 headers, urlfile)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000432 host, port = splitport(host)
433 if not port \
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000434 and socket.gethostbyname(host) in (localhost(), thishost()):
Guido van Rossum336a2011999-06-24 15:27:36 +0000435 urlfile = file
436 if file[:1] == '/':
437 urlfile = 'file://' + file
Guido van Rossumf0713d32001-08-09 17:43:35 +0000438 return addinfourl(open(localname, 'rb'),
Guido van Rossum336a2011999-06-24 15:27:36 +0000439 headers, urlfile)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000440 raise IOError, ('local file error', 'not on local host')
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000441
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000442 def open_ftp(self, url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000443 """Use FTP protocol."""
Guido van Rossum88e0b5b2001-08-23 13:38:15 +0000444 import mimetypes, mimetools, StringIO
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000445 host, path = splithost(url)
446 if not host: raise IOError, ('ftp error', 'no host given')
447 host, port = splitport(host)
448 user, host = splituser(host)
449 if user: user, passwd = splitpasswd(user)
450 else: passwd = None
451 host = unquote(host)
452 user = unquote(user or '')
453 passwd = unquote(passwd or '')
454 host = socket.gethostbyname(host)
455 if not port:
456 import ftplib
457 port = ftplib.FTP_PORT
458 else:
459 port = int(port)
460 path, attrs = splitattr(path)
461 path = unquote(path)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000462 dirs = path.split('/')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000463 dirs, file = dirs[:-1], dirs[-1]
464 if dirs and not dirs[0]: dirs = dirs[1:]
Guido van Rossum5e006a31999-08-18 17:40:33 +0000465 if dirs and not dirs[0]: dirs[0] = '/'
Guido van Rossumb2493f82000-12-15 15:01:37 +0000466 key = user, host, port, '/'.join(dirs)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000467 # XXX thread unsafe!
468 if len(self.ftpcache) > MAXFTPCACHE:
469 # Prune the cache, rather arbitrarily
470 for k in self.ftpcache.keys():
471 if k != key:
472 v = self.ftpcache[k]
473 del self.ftpcache[k]
474 v.close()
475 try:
Raymond Hettinger54f02222002-06-01 14:18:47 +0000476 if not key in self.ftpcache:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000477 self.ftpcache[key] = \
478 ftpwrapper(user, passwd, host, port, dirs)
479 if not file: type = 'D'
480 else: type = 'I'
481 for attr in attrs:
482 attr, value = splitvalue(attr)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000483 if attr.lower() == 'type' and \
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000484 value in ('a', 'A', 'i', 'I', 'd', 'D'):
Guido van Rossumb2493f82000-12-15 15:01:37 +0000485 type = value.upper()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000486 (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
Guido van Rossum88e0b5b2001-08-23 13:38:15 +0000487 mtype = mimetypes.guess_type("ftp:" + url)[0]
488 headers = ""
489 if mtype:
490 headers += "Content-Type: %s\n" % mtype
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000491 if retrlen is not None and retrlen >= 0:
Guido van Rossum88e0b5b2001-08-23 13:38:15 +0000492 headers += "Content-Length: %d\n" % retrlen
493 headers = mimetools.Message(StringIO.StringIO(headers))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000494 return addinfourl(fp, headers, "ftp:" + url)
495 except ftperrors(), msg:
496 raise IOError, ('ftp error', msg), sys.exc_info()[2]
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000497
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000498 def open_data(self, url, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000499 """Use "data" URL."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000500 # ignore POSTed data
501 #
502 # syntax of data URLs:
503 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
504 # mediatype := [ type "/" subtype ] *( ";" parameter )
505 # data := *urlchar
506 # parameter := attribute "=" value
Neal Norwitzaad18492002-03-26 16:25:01 +0000507 import StringIO, mimetools
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000508 try:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000509 [type, data] = url.split(',', 1)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000510 except ValueError:
511 raise IOError, ('data error', 'bad data URL')
512 if not type:
513 type = 'text/plain;charset=US-ASCII'
Guido van Rossumb2493f82000-12-15 15:01:37 +0000514 semi = type.rfind(';')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000515 if semi >= 0 and '=' not in type[semi:]:
516 encoding = type[semi+1:]
517 type = type[:semi]
518 else:
519 encoding = ''
520 msg = []
521 msg.append('Date: %s'%time.strftime('%a, %d %b %Y %T GMT',
522 time.gmtime(time.time())))
523 msg.append('Content-type: %s' % type)
524 if encoding == 'base64':
525 import base64
526 data = base64.decodestring(data)
527 else:
528 data = unquote(data)
529 msg.append('Content-length: %d' % len(data))
530 msg.append('')
531 msg.append(data)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000532 msg = '\n'.join(msg)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000533 f = StringIO.StringIO(msg)
534 headers = mimetools.Message(f, 0)
535 f.fileno = None # needed for addinfourl
536 return addinfourl(f, headers, url)
Guido van Rossum6d4d1c21998-03-12 14:32:55 +0000537
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000538
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000539class FancyURLopener(URLopener):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000540 """Derived class with handlers for errors we can handle (perhaps)."""
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000541
Neal Norwitz60e04cd2002-06-11 13:38:51 +0000542 def __init__(self, *args, **kwargs):
Guido van Rossum68468eb2003-02-27 20:14:51 +0000543 URLopener.__init__(self, *args, **kwargs)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000544 self.auth_cache = {}
Skip Montanaroc3e11d62001-02-15 16:56:36 +0000545 self.tries = 0
546 self.maxtries = 10
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000547
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000548 def http_error_default(self, url, fp, errcode, errmsg, headers):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000549 """Default error handling -- don't raise an exception."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000550 return addinfourl(fp, headers, "http:" + url)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000551
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000552 def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000553 """Error 302 -- relocated (temporarily)."""
Skip Montanaroc3e11d62001-02-15 16:56:36 +0000554 self.tries += 1
555 if self.maxtries and self.tries >= self.maxtries:
556 if hasattr(self, "http_error_500"):
557 meth = self.http_error_500
558 else:
559 meth = self.http_error_default
560 self.tries = 0
561 return meth(url, fp, 500,
562 "Internal Server Error: Redirect Recursion", headers)
563 result = self.redirect_internal(url, fp, errcode, errmsg, headers,
564 data)
565 self.tries = 0
566 return result
567
568 def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
Raymond Hettinger54f02222002-06-01 14:18:47 +0000569 if 'location' in headers:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000570 newurl = headers['location']
Raymond Hettinger54f02222002-06-01 14:18:47 +0000571 elif 'uri' in headers:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000572 newurl = headers['uri']
573 else:
574 return
575 void = fp.read()
576 fp.close()
Guido van Rossum3527f591999-03-29 20:23:41 +0000577 # In case the server sent a relative URL, join with original:
Moshe Zadka5d87d472001-04-09 14:54:21 +0000578 newurl = basejoin(self.type + ":" + url, newurl)
Guido van Rossumfa19f7c2003-05-16 01:46:51 +0000579 return self.open(newurl)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000580
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000581 def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000582 """Error 301 -- also relocated (permanently)."""
583 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
Guido van Rossume6ad8911996-09-10 17:02:56 +0000584
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000585 def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
586 """Error 303 -- also relocated (essentially identical to 302)."""
587 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
588
Guido van Rossumfa19f7c2003-05-16 01:46:51 +0000589 def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
590 """Error 307 -- relocated, but turn POST into error."""
591 if data is None:
592 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
593 else:
594 return self.http_error_default(url, fp, errcode, errmsg, headers)
595
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000596 def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000597 """Error 401 -- authentication required.
598 See this URL for a description of the basic authentication scheme:
599 http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt"""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000600 if not 'www-authenticate' in headers:
Tim Peters85ba6732001-02-28 08:26:44 +0000601 URLopener.http_error_default(self, url, fp,
Fred Drakec680ae82001-10-13 18:37:07 +0000602 errcode, errmsg, headers)
Moshe Zadkae99bd172001-02-27 06:27:04 +0000603 stuff = headers['www-authenticate']
604 import re
605 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
606 if not match:
Tim Peters85ba6732001-02-28 08:26:44 +0000607 URLopener.http_error_default(self, url, fp,
Moshe Zadkae99bd172001-02-27 06:27:04 +0000608 errcode, errmsg, headers)
609 scheme, realm = match.groups()
610 if scheme.lower() != 'basic':
Tim Peters85ba6732001-02-28 08:26:44 +0000611 URLopener.http_error_default(self, url, fp,
Moshe Zadkae99bd172001-02-27 06:27:04 +0000612 errcode, errmsg, headers)
613 name = 'retry_' + self.type + '_basic_auth'
614 if data is None:
615 return getattr(self,name)(url, realm)
616 else:
617 return getattr(self,name)(url, realm, data)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000618
Guido van Rossum3c8baed2000-02-01 23:36:55 +0000619 def retry_http_basic_auth(self, url, realm, data=None):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000620 host, selector = splithost(url)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000621 i = host.find('@') + 1
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000622 host = host[i:]
623 user, passwd = self.get_user_passwd(host, realm, i)
624 if not (user or passwd): return None
Guido van Rossumafc4f042001-01-15 18:31:13 +0000625 host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000626 newurl = 'http://' + host + selector
Guido van Rossum3c8baed2000-02-01 23:36:55 +0000627 if data is None:
628 return self.open(newurl)
629 else:
630 return self.open(newurl, data)
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000631
Guido van Rossum3c8baed2000-02-01 23:36:55 +0000632 def retry_https_basic_auth(self, url, realm, data=None):
Tim Peterse1190062001-01-15 03:34:38 +0000633 host, selector = splithost(url)
634 i = host.find('@') + 1
635 host = host[i:]
636 user, passwd = self.get_user_passwd(host, realm, i)
637 if not (user or passwd): return None
Guido van Rossumafc4f042001-01-15 18:31:13 +0000638 host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
Tim Peterse1190062001-01-15 03:34:38 +0000639 newurl = '//' + host + selector
Guido van Rossumafc4f042001-01-15 18:31:13 +0000640 return self.open_https(newurl, data)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000641
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000642 def get_user_passwd(self, host, realm, clear_cache = 0):
Guido van Rossumb2493f82000-12-15 15:01:37 +0000643 key = realm + '@' + host.lower()
Raymond Hettinger54f02222002-06-01 14:18:47 +0000644 if key in self.auth_cache:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000645 if clear_cache:
646 del self.auth_cache[key]
647 else:
648 return self.auth_cache[key]
649 user, passwd = self.prompt_user_passwd(host, realm)
650 if user or passwd: self.auth_cache[key] = (user, passwd)
651 return user, passwd
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000652
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000653 def prompt_user_passwd(self, host, realm):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000654 """Override this in a GUI environment!"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000655 import getpass
656 try:
657 user = raw_input("Enter username for %s at %s: " % (realm,
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000658 host))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000659 passwd = getpass.getpass("Enter password for %s in %s at %s: " %
660 (user, realm, host))
661 return user, passwd
662 except KeyboardInterrupt:
663 print
664 return None, None
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000665
666
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000667# Utility functions
668
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000669_localhost = None
670def localhost():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000671 """Return the IP address of the magic hostname 'localhost'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000672 global _localhost
Raymond Hettinger10ff7062002-06-02 03:04:52 +0000673 if _localhost is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000674 _localhost = socket.gethostbyname('localhost')
675 return _localhost
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000676
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000677_thishost = None
678def thishost():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000679 """Return the IP address of the current host."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000680 global _thishost
Raymond Hettinger10ff7062002-06-02 03:04:52 +0000681 if _thishost is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000682 _thishost = socket.gethostbyname(socket.gethostname())
683 return _thishost
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000684
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000685_ftperrors = None
686def ftperrors():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000687 """Return the set of errors raised by the FTP class."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000688 global _ftperrors
Raymond Hettinger10ff7062002-06-02 03:04:52 +0000689 if _ftperrors is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000690 import ftplib
691 _ftperrors = ftplib.all_errors
692 return _ftperrors
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000693
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000694_noheaders = None
695def noheaders():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000696 """Return an empty mimetools.Message object."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000697 global _noheaders
Raymond Hettinger10ff7062002-06-02 03:04:52 +0000698 if _noheaders is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000699 import mimetools
700 import StringIO
701 _noheaders = mimetools.Message(StringIO.StringIO(), 0)
702 _noheaders.fp.close() # Recycle file descriptor
703 return _noheaders
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000704
705
706# Utility classes
707
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000708class ftpwrapper:
Guido van Rossume7b146f2000-02-04 15:28:42 +0000709 """Class used by open_ftp() for cache of open FTP connections."""
710
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000711 def __init__(self, user, passwd, host, port, dirs):
712 self.user = user
713 self.passwd = passwd
714 self.host = host
715 self.port = port
716 self.dirs = dirs
717 self.init()
Guido van Rossume7b146f2000-02-04 15:28:42 +0000718
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000719 def init(self):
720 import ftplib
721 self.busy = 0
722 self.ftp = ftplib.FTP()
723 self.ftp.connect(self.host, self.port)
724 self.ftp.login(self.user, self.passwd)
725 for dir in self.dirs:
726 self.ftp.cwd(dir)
Guido van Rossume7b146f2000-02-04 15:28:42 +0000727
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000728 def retrfile(self, file, type):
729 import ftplib
730 self.endtransfer()
731 if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
732 else: cmd = 'TYPE ' + type; isdir = 0
733 try:
734 self.ftp.voidcmd(cmd)
735 except ftplib.all_errors:
736 self.init()
737 self.ftp.voidcmd(cmd)
738 conn = None
739 if file and not isdir:
740 # Use nlst to see if the file exists at all
741 try:
742 self.ftp.nlst(file)
743 except ftplib.error_perm, reason:
744 raise IOError, ('ftp error', reason), sys.exc_info()[2]
745 # Restore the transfer mode!
746 self.ftp.voidcmd(cmd)
747 # Try to retrieve as a file
748 try:
749 cmd = 'RETR ' + file
750 conn = self.ftp.ntransfercmd(cmd)
751 except ftplib.error_perm, reason:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000752 if str(reason)[:3] != '550':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000753 raise IOError, ('ftp error', reason), sys.exc_info()[2]
754 if not conn:
755 # Set transfer mode to ASCII!
756 self.ftp.voidcmd('TYPE A')
757 # Try a directory listing
758 if file: cmd = 'LIST ' + file
759 else: cmd = 'LIST'
760 conn = self.ftp.ntransfercmd(cmd)
761 self.busy = 1
762 # Pass back both a suitably decorated object and a retrieval length
763 return (addclosehook(conn[0].makefile('rb'),
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000764 self.endtransfer), conn[1])
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000765 def endtransfer(self):
766 if not self.busy:
767 return
768 self.busy = 0
769 try:
770 self.ftp.voidresp()
771 except ftperrors():
772 pass
Guido van Rossume7b146f2000-02-04 15:28:42 +0000773
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000774 def close(self):
775 self.endtransfer()
776 try:
777 self.ftp.close()
778 except ftperrors():
779 pass
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000780
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000781class addbase:
Guido van Rossume7b146f2000-02-04 15:28:42 +0000782 """Base class for addinfo and addclosehook."""
783
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000784 def __init__(self, fp):
785 self.fp = fp
786 self.read = self.fp.read
787 self.readline = self.fp.readline
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000788 if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines
789 if hasattr(self.fp, "fileno"): self.fileno = self.fp.fileno
Raymond Hettinger42182eb2003-03-09 05:33:33 +0000790 if hasattr(self.fp, "__iter__"):
791 self.__iter__ = self.fp.__iter__
792 if hasattr(self.fp, "next"):
793 self.next = self.fp.next
Guido van Rossume7b146f2000-02-04 15:28:42 +0000794
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000795 def __repr__(self):
Walter Dörwald70a6b492004-02-12 17:35:32 +0000796 return '<%s at %r whose fp = %r>' % (self.__class__.__name__,
797 id(self), self.fp)
Guido van Rossume7b146f2000-02-04 15:28:42 +0000798
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000799 def close(self):
800 self.read = None
801 self.readline = None
802 self.readlines = None
803 self.fileno = None
804 if self.fp: self.fp.close()
805 self.fp = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000806
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000807class addclosehook(addbase):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000808 """Class to add a close hook to an open file."""
809
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000810 def __init__(self, fp, closehook, *hookargs):
811 addbase.__init__(self, fp)
812 self.closehook = closehook
813 self.hookargs = hookargs
Guido van Rossume7b146f2000-02-04 15:28:42 +0000814
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000815 def close(self):
Guido van Rossumc580dae2000-05-24 13:21:46 +0000816 addbase.close(self)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000817 if self.closehook:
Guido van Rossum68468eb2003-02-27 20:14:51 +0000818 self.closehook(*self.hookargs)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000819 self.closehook = None
820 self.hookargs = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000821
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000822class addinfo(addbase):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000823 """class to add an info() method to an open file."""
824
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000825 def __init__(self, fp, headers):
826 addbase.__init__(self, fp)
827 self.headers = headers
Guido van Rossume7b146f2000-02-04 15:28:42 +0000828
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000829 def info(self):
830 return self.headers
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000831
Guido van Rossume6ad8911996-09-10 17:02:56 +0000832class addinfourl(addbase):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000833 """class to add info() and geturl() methods to an open file."""
834
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000835 def __init__(self, fp, headers, url):
836 addbase.__init__(self, fp)
837 self.headers = headers
838 self.url = url
Guido van Rossume7b146f2000-02-04 15:28:42 +0000839
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000840 def info(self):
841 return self.headers
Guido van Rossume7b146f2000-02-04 15:28:42 +0000842
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000843 def geturl(self):
844 return self.url
Guido van Rossume6ad8911996-09-10 17:02:56 +0000845
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000846
Guido van Rossum7c395db1994-07-04 22:14:49 +0000847# Utilities to parse URLs (most of these return None for missing parts):
Sjoerd Mullendere0371b81995-11-10 10:36:07 +0000848# unwrap('<URL:type://host/path>') --> 'type://host/path'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000849# splittype('type:opaquestring') --> 'type', 'opaquestring'
850# splithost('//host[:port]/path') --> 'host[:port]', '/path'
Guido van Rossum7c395db1994-07-04 22:14:49 +0000851# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'
852# splitpasswd('user:passwd') -> 'user', 'passwd'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000853# splitport('host:port') --> 'host', 'port'
854# splitquery('/path?query') --> '/path', 'query'
855# splittag('/path#tag') --> '/path', 'tag'
Guido van Rossum7c395db1994-07-04 22:14:49 +0000856# splitattr('/path;attr1=value1;attr2=value2;...') ->
857# '/path', ['attr1=value1', 'attr2=value2', ...]
858# splitvalue('attr=value') --> 'attr', 'value'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000859# splitgophertype('/Xselector') --> 'X', 'selector'
860# unquote('abc%20def') -> 'abc def'
861# quote('abc def') -> 'abc%20def')
862
Walter Dörwald65230a22002-06-03 15:58:32 +0000863try:
864 unicode
865except NameError:
Guido van Rossum4b46c0a2002-05-24 17:58:05 +0000866 def _is_unicode(x):
867 return 0
Walter Dörwald65230a22002-06-03 15:58:32 +0000868else:
869 def _is_unicode(x):
870 return isinstance(x, unicode)
Guido van Rossum4b46c0a2002-05-24 17:58:05 +0000871
Martin v. Löwis1d994332000-12-03 18:30:10 +0000872def toBytes(url):
873 """toBytes(u"URL") --> 'URL'."""
874 # Most URL schemes require ASCII. If that changes, the conversion
875 # can be relaxed
Guido van Rossum4b46c0a2002-05-24 17:58:05 +0000876 if _is_unicode(url):
Martin v. Löwis1d994332000-12-03 18:30:10 +0000877 try:
878 url = url.encode("ASCII")
879 except UnicodeError:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000880 raise UnicodeError("URL " + repr(url) +
881 " contains non-ASCII characters")
Martin v. Löwis1d994332000-12-03 18:30:10 +0000882 return url
883
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000884def unwrap(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000885 """unwrap('<URL:type://host/path>') --> 'type://host/path'."""
Guido van Rossumb2493f82000-12-15 15:01:37 +0000886 url = url.strip()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000887 if url[:1] == '<' and url[-1:] == '>':
Guido van Rossumb2493f82000-12-15 15:01:37 +0000888 url = url[1:-1].strip()
889 if url[:4] == 'URL:': url = url[4:].strip()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000890 return url
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000891
Guido van Rossum332e1441997-09-29 23:23:46 +0000892_typeprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000893def splittype(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000894 """splittype('type:opaquestring') --> 'type', 'opaquestring'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000895 global _typeprog
896 if _typeprog is None:
897 import re
898 _typeprog = re.compile('^([^/:]+):')
Guido van Rossum332e1441997-09-29 23:23:46 +0000899
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000900 match = _typeprog.match(url)
901 if match:
902 scheme = match.group(1)
Fred Drake9e94afd2000-07-01 07:03:30 +0000903 return scheme.lower(), url[len(scheme) + 1:]
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000904 return None, url
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000905
Guido van Rossum332e1441997-09-29 23:23:46 +0000906_hostprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000907def splithost(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000908 """splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000909 global _hostprog
910 if _hostprog is None:
911 import re
Guido van Rossum3427c1f1999-07-01 23:20:56 +0000912 _hostprog = re.compile('^//([^/]*)(.*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +0000913
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000914 match = _hostprog.match(url)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000915 if match: return match.group(1, 2)
916 return None, url
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000917
Guido van Rossum332e1441997-09-29 23:23:46 +0000918_userprog = None
Guido van Rossum7c395db1994-07-04 22:14:49 +0000919def splituser(host):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000920 """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000921 global _userprog
922 if _userprog is None:
923 import re
Raymond Hettingerf2e45dd2002-08-18 20:08:56 +0000924 _userprog = re.compile('^(.*)@(.*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +0000925
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000926 match = _userprog.match(host)
Fred Drake567ca8e2000-08-21 21:42:42 +0000927 if match: return map(unquote, match.group(1, 2))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000928 return None, host
Guido van Rossum7c395db1994-07-04 22:14:49 +0000929
Guido van Rossum332e1441997-09-29 23:23:46 +0000930_passwdprog = None
Guido van Rossum7c395db1994-07-04 22:14:49 +0000931def splitpasswd(user):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000932 """splitpasswd('user:passwd') -> 'user', 'passwd'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000933 global _passwdprog
934 if _passwdprog is None:
935 import re
936 _passwdprog = re.compile('^([^:]*):(.*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +0000937
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000938 match = _passwdprog.match(user)
939 if match: return match.group(1, 2)
940 return user, None
Guido van Rossum7c395db1994-07-04 22:14:49 +0000941
Guido van Rossume7b146f2000-02-04 15:28:42 +0000942# splittag('/path#tag') --> '/path', 'tag'
Guido van Rossum332e1441997-09-29 23:23:46 +0000943_portprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000944def splitport(host):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000945 """splitport('host:port') --> 'host', 'port'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000946 global _portprog
947 if _portprog is None:
948 import re
949 _portprog = re.compile('^(.*):([0-9]+)$')
Guido van Rossum332e1441997-09-29 23:23:46 +0000950
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000951 match = _portprog.match(host)
952 if match: return match.group(1, 2)
953 return host, None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000954
Guido van Rossum332e1441997-09-29 23:23:46 +0000955_nportprog = None
Guido van Rossum53725a21996-06-13 19:12:35 +0000956def splitnport(host, defport=-1):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000957 """Split host and port, returning numeric port.
958 Return given default port if no ':' found; defaults to -1.
959 Return numerical port if a valid number are found after ':'.
960 Return None if ':' but not a valid number."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000961 global _nportprog
962 if _nportprog is None:
963 import re
964 _nportprog = re.compile('^(.*):(.*)$')
Guido van Rossum7e7ca0b1998-03-26 21:01:39 +0000965
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000966 match = _nportprog.match(host)
967 if match:
968 host, port = match.group(1, 2)
969 try:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000970 if not port: raise ValueError, "no digits"
971 nport = int(port)
972 except ValueError:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000973 nport = None
974 return host, nport
975 return host, defport
Guido van Rossum53725a21996-06-13 19:12:35 +0000976
Guido van Rossum332e1441997-09-29 23:23:46 +0000977_queryprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000978def splitquery(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000979 """splitquery('/path?query') --> '/path', 'query'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000980 global _queryprog
981 if _queryprog is None:
982 import re
983 _queryprog = re.compile('^(.*)\?([^?]*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +0000984
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000985 match = _queryprog.match(url)
986 if match: return match.group(1, 2)
987 return url, None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000988
Guido van Rossum332e1441997-09-29 23:23:46 +0000989_tagprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000990def splittag(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000991 """splittag('/path#tag') --> '/path', 'tag'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000992 global _tagprog
993 if _tagprog is None:
994 import re
995 _tagprog = re.compile('^(.*)#([^#]*)$')
Guido van Rossum7e7ca0b1998-03-26 21:01:39 +0000996
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000997 match = _tagprog.match(url)
998 if match: return match.group(1, 2)
999 return url, None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001000
Guido van Rossum7c395db1994-07-04 22:14:49 +00001001def splitattr(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001002 """splitattr('/path;attr1=value1;attr2=value2;...') ->
1003 '/path', ['attr1=value1', 'attr2=value2', ...]."""
Guido van Rossumb2493f82000-12-15 15:01:37 +00001004 words = url.split(';')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001005 return words[0], words[1:]
Guido van Rossum7c395db1994-07-04 22:14:49 +00001006
Guido van Rossum332e1441997-09-29 23:23:46 +00001007_valueprog = None
Guido van Rossum7c395db1994-07-04 22:14:49 +00001008def splitvalue(attr):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001009 """splitvalue('attr=value') --> 'attr', 'value'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001010 global _valueprog
1011 if _valueprog is None:
1012 import re
1013 _valueprog = re.compile('^([^=]*)=(.*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +00001014
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001015 match = _valueprog.match(attr)
1016 if match: return match.group(1, 2)
1017 return attr, None
Guido van Rossum7c395db1994-07-04 22:14:49 +00001018
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001019def splitgophertype(selector):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001020 """splitgophertype('/Xselector') --> 'X', 'selector'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001021 if selector[:1] == '/' and selector[1:2]:
1022 return selector[1], selector[2:]
1023 return None, selector
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001024
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001025def unquote(s):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001026 """unquote('abc%20def') -> 'abc def'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001027 mychr = chr
Guido van Rossumb2493f82000-12-15 15:01:37 +00001028 myatoi = int
1029 list = s.split('%')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001030 res = [list[0]]
1031 myappend = res.append
1032 del list[0]
1033 for item in list:
1034 if item[1:2]:
1035 try:
1036 myappend(mychr(myatoi(item[:2], 16))
1037 + item[2:])
Martin v. Löwis58682b72001-08-11 15:02:57 +00001038 except ValueError:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001039 myappend('%' + item)
1040 else:
1041 myappend('%' + item)
Guido van Rossumb2493f82000-12-15 15:01:37 +00001042 return "".join(res)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001043
Guido van Rossum0564e121996-12-13 14:47:36 +00001044def unquote_plus(s):
Skip Montanaro79f1c172000-08-22 03:00:52 +00001045 """unquote('%7e/abc+def') -> '~/abc def'"""
Brett Cannonaaeffaf2004-03-23 23:50:17 +00001046 s = s.replace('+', ' ')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001047 return unquote(s)
Guido van Rossum0564e121996-12-13 14:47:36 +00001048
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001049always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
Jeremy Hylton6102e292000-08-31 15:48:10 +00001050 'abcdefghijklmnopqrstuvwxyz'
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001051 '0123456789' '_.-')
1052
1053_fast_safe_test = always_safe + '/'
1054_fast_safe = None
1055
1056def _fast_quote(s):
1057 global _fast_safe
1058 if _fast_safe is None:
1059 _fast_safe = {}
1060 for c in _fast_safe_test:
1061 _fast_safe[c] = c
1062 res = list(s)
1063 for i in range(len(res)):
1064 c = res[i]
Raymond Hettinger54f02222002-06-01 14:18:47 +00001065 if not c in _fast_safe:
Guido van Rossume27a7b82001-01-19 03:28:15 +00001066 res[i] = '%%%02X' % ord(c)
Guido van Rossumb2493f82000-12-15 15:01:37 +00001067 return ''.join(res)
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001068
Guido van Rossum7c395db1994-07-04 22:14:49 +00001069def quote(s, safe = '/'):
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001070 """quote('abc def') -> 'abc%20def'
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001071
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001072 Each part of a URL, e.g. the path info, the query, etc., has a
1073 different set of reserved characters that must be quoted.
1074
1075 RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
1076 the following reserved characters.
1077
1078 reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
1079 "$" | ","
1080
1081 Each of these characters is reserved in some component of a URL,
1082 but not necessarily in all of them.
1083
1084 By default, the quote function is intended for quoting the path
1085 section of a URL. Thus, it will not encode '/'. This character
1086 is reserved, but in typical usage the quote function is being
1087 called on a path where the existing slash characters are used as
1088 reserved characters.
1089 """
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001090 safe = always_safe + safe
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001091 if _fast_safe_test == safe:
1092 return _fast_quote(s)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001093 res = list(s)
1094 for i in range(len(res)):
1095 c = res[i]
1096 if c not in safe:
Guido van Rossume27a7b82001-01-19 03:28:15 +00001097 res[i] = '%%%02X' % ord(c)
Guido van Rossumb2493f82000-12-15 15:01:37 +00001098 return ''.join(res)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001099
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001100def quote_plus(s, safe = ''):
1101 """Quote the query fragment of a URL; replacing ' ' with '+'"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001102 if ' ' in s:
Guido van Rossumb2493f82000-12-15 15:01:37 +00001103 l = s.split(' ')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001104 for i in range(len(l)):
1105 l[i] = quote(l[i], safe)
Guido van Rossumb2493f82000-12-15 15:01:37 +00001106 return '+'.join(l)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001107 else:
1108 return quote(s, safe)
Guido van Rossum0564e121996-12-13 14:47:36 +00001109
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001110def urlencode(query,doseq=0):
1111 """Encode a sequence of two-element tuples or dictionary into a URL query string.
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001112
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001113 If any values in the query arg are sequences and doseq is true, each
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001114 sequence element is converted to a separate parameter.
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001115
1116 If the query arg is a sequence of two-element tuples, the order of the
1117 parameters in the output will match the order of parameters in the
1118 input.
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001119 """
Tim Peters658cba62001-02-09 20:06:00 +00001120
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001121 if hasattr(query,"items"):
1122 # mapping objects
1123 query = query.items()
1124 else:
1125 # it's a bother at times that strings and string-like objects are
1126 # sequences...
1127 try:
1128 # non-sequence items should not work with len()
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001129 # non-empty strings will fail this
Walter Dörwald65230a22002-06-03 15:58:32 +00001130 if len(query) and not isinstance(query[0], tuple):
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001131 raise TypeError
1132 # zero-length sequences of all types will get here and succeed,
1133 # but that's a minor nit - since the original implementation
1134 # allowed empty dicts that type of behavior probably should be
1135 # preserved for consistency
1136 except TypeError:
1137 ty,va,tb = sys.exc_info()
1138 raise TypeError, "not a valid non-string sequence or mapping object", tb
1139
Guido van Rossume7b146f2000-02-04 15:28:42 +00001140 l = []
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001141 if not doseq:
1142 # preserve old behavior
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001143 for k, v in query:
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001144 k = quote_plus(str(k))
1145 v = quote_plus(str(v))
1146 l.append(k + '=' + v)
1147 else:
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001148 for k, v in query:
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001149 k = quote_plus(str(k))
Walter Dörwald65230a22002-06-03 15:58:32 +00001150 if isinstance(v, str):
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001151 v = quote_plus(v)
1152 l.append(k + '=' + v)
Guido van Rossum4b46c0a2002-05-24 17:58:05 +00001153 elif _is_unicode(v):
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001154 # is there a reasonable way to convert to ASCII?
1155 # encode generates a string, but "replace" or "ignore"
1156 # lose information and "strict" can raise UnicodeError
1157 v = quote_plus(v.encode("ASCII","replace"))
1158 l.append(k + '=' + v)
1159 else:
1160 try:
1161 # is this a sufficient test for sequence-ness?
1162 x = len(v)
1163 except TypeError:
1164 # not a sequence
1165 v = quote_plus(str(v))
1166 l.append(k + '=' + v)
1167 else:
1168 # loop over the sequence
1169 for elt in v:
1170 l.append(k + '=' + quote_plus(str(elt)))
Guido van Rossumb2493f82000-12-15 15:01:37 +00001171 return '&'.join(l)
Guido van Rossum810a3391998-07-22 21:33:23 +00001172
Guido van Rossum442e7201996-03-20 15:33:11 +00001173# Proxy handling
Mark Hammond4f570b92000-07-26 07:04:38 +00001174def getproxies_environment():
1175 """Return a dictionary of scheme -> proxy server URL mappings.
1176
1177 Scan the environment for variables named <scheme>_proxy;
1178 this seems to be the standard convention. If you need a
1179 different way, you can pass a proxies dictionary to the
1180 [Fancy]URLopener constructor.
1181
1182 """
1183 proxies = {}
1184 for name, value in os.environ.items():
Guido van Rossumb2493f82000-12-15 15:01:37 +00001185 name = name.lower()
Mark Hammond4f570b92000-07-26 07:04:38 +00001186 if value and name[-6:] == '_proxy':
1187 proxies[name[:-6]] = value
1188 return proxies
1189
Jack Jansen11d9b062004-07-16 11:45:00 +00001190if sys.platform == 'darwin':
1191 def getproxies_internetconfig():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001192 """Return a dictionary of scheme -> proxy server URL mappings.
Guido van Rossum442e7201996-03-20 15:33:11 +00001193
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001194 By convention the mac uses Internet Config to store
1195 proxies. An HTTP proxy, for instance, is stored under
1196 the HttpProxy key.
Guido van Rossum442e7201996-03-20 15:33:11 +00001197
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001198 """
1199 try:
1200 import ic
1201 except ImportError:
1202 return {}
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001203
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001204 try:
1205 config = ic.IC()
1206 except ic.error:
1207 return {}
1208 proxies = {}
1209 # HTTP:
Raymond Hettinger54f02222002-06-01 14:18:47 +00001210 if 'UseHTTPProxy' in config and config['UseHTTPProxy']:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001211 try:
1212 value = config['HTTPProxyHost']
1213 except ic.error:
1214 pass
1215 else:
1216 proxies['http'] = 'http://%s' % value
1217 # FTP: XXXX To be done.
1218 # Gopher: XXXX To be done.
1219 return proxies
Mark Hammond4f570b92000-07-26 07:04:38 +00001220
Tim Peters55c12d42001-08-09 18:04:14 +00001221 def proxy_bypass(x):
1222 return 0
1223
Jack Jansen11d9b062004-07-16 11:45:00 +00001224 def getproxies():
1225 return getproxies_environment() or getproxies_internetconfig()
Tim Peters182b5ac2004-07-18 06:16:08 +00001226
Mark Hammond4f570b92000-07-26 07:04:38 +00001227elif os.name == 'nt':
1228 def getproxies_registry():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001229 """Return a dictionary of scheme -> proxy server URL mappings.
Mark Hammond4f570b92000-07-26 07:04:38 +00001230
1231 Win32 uses the registry to store proxies.
1232
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001233 """
1234 proxies = {}
Mark Hammond4f570b92000-07-26 07:04:38 +00001235 try:
1236 import _winreg
1237 except ImportError:
1238 # Std module, so should be around - but you never know!
1239 return proxies
1240 try:
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001241 internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1242 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
Mark Hammond4f570b92000-07-26 07:04:38 +00001243 proxyEnable = _winreg.QueryValueEx(internetSettings,
1244 'ProxyEnable')[0]
1245 if proxyEnable:
1246 # Returned as Unicode but problems if not converted to ASCII
1247 proxyServer = str(_winreg.QueryValueEx(internetSettings,
1248 'ProxyServer')[0])
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001249 if '=' in proxyServer:
1250 # Per-protocol settings
Mark Hammond4f570b92000-07-26 07:04:38 +00001251 for p in proxyServer.split(';'):
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001252 protocol, address = p.split('=', 1)
Guido van Rossumb955d6c2002-03-31 23:38:48 +00001253 # See if address has a type:// prefix
Guido van Rossum64e5aa92002-04-02 14:38:16 +00001254 import re
1255 if not re.match('^([^/:]+)://', address):
Guido van Rossumb955d6c2002-03-31 23:38:48 +00001256 address = '%s://%s' % (protocol, address)
1257 proxies[protocol] = address
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001258 else:
1259 # Use one setting for all protocols
1260 if proxyServer[:5] == 'http:':
1261 proxies['http'] = proxyServer
1262 else:
1263 proxies['http'] = 'http://%s' % proxyServer
1264 proxies['ftp'] = 'ftp://%s' % proxyServer
Mark Hammond4f570b92000-07-26 07:04:38 +00001265 internetSettings.Close()
1266 except (WindowsError, ValueError, TypeError):
1267 # Either registry key not found etc, or the value in an
1268 # unexpected format.
1269 # proxies already set up to be empty so nothing to do
1270 pass
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001271 return proxies
Guido van Rossum442e7201996-03-20 15:33:11 +00001272
Mark Hammond4f570b92000-07-26 07:04:38 +00001273 def getproxies():
1274 """Return a dictionary of scheme -> proxy server URL mappings.
1275
1276 Returns settings gathered from the environment, if specified,
1277 or the registry.
1278
1279 """
1280 return getproxies_environment() or getproxies_registry()
Tim Peters55c12d42001-08-09 18:04:14 +00001281
1282 def proxy_bypass(host):
1283 try:
1284 import _winreg
1285 import re
Tim Peters55c12d42001-08-09 18:04:14 +00001286 except ImportError:
1287 # Std modules, so should be around - but you never know!
1288 return 0
1289 try:
1290 internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1291 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1292 proxyEnable = _winreg.QueryValueEx(internetSettings,
1293 'ProxyEnable')[0]
1294 proxyOverride = str(_winreg.QueryValueEx(internetSettings,
1295 'ProxyOverride')[0])
1296 # ^^^^ Returned as Unicode but problems if not converted to ASCII
1297 except WindowsError:
1298 return 0
1299 if not proxyEnable or not proxyOverride:
1300 return 0
1301 # try to make a host list from name and IP address.
1302 host = [host]
1303 try:
1304 addr = socket.gethostbyname(host[0])
1305 if addr != host:
1306 host.append(addr)
1307 except socket.error:
1308 pass
1309 # make a check value list from the registry entry: replace the
1310 # '<local>' string by the localhost entry and the corresponding
1311 # canonical entry.
1312 proxyOverride = proxyOverride.split(';')
1313 i = 0
1314 while i < len(proxyOverride):
1315 if proxyOverride[i] == '<local>':
1316 proxyOverride[i:i+1] = ['localhost',
1317 '127.0.0.1',
1318 socket.gethostname(),
1319 socket.gethostbyname(
1320 socket.gethostname())]
1321 i += 1
1322 # print proxyOverride
1323 # now check if we match one of the registry values.
1324 for test in proxyOverride:
Tim Petersab9ba272001-08-09 21:40:30 +00001325 test = test.replace(".", r"\.") # mask dots
1326 test = test.replace("*", r".*") # change glob sequence
1327 test = test.replace("?", r".") # change glob char
Tim Peters55c12d42001-08-09 18:04:14 +00001328 for val in host:
1329 # print "%s <--> %s" %( test, val )
1330 if re.match(test, val, re.I):
1331 return 1
1332 return 0
1333
Mark Hammond4f570b92000-07-26 07:04:38 +00001334else:
1335 # By default use environment variables
1336 getproxies = getproxies_environment
1337
Tim Peters55c12d42001-08-09 18:04:14 +00001338 def proxy_bypass(host):
1339 return 0
Guido van Rossum442e7201996-03-20 15:33:11 +00001340
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001341# Test and time quote() and unquote()
1342def test1():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001343 s = ''
1344 for i in range(256): s = s + chr(i)
1345 s = s*4
1346 t0 = time.time()
1347 qs = quote(s)
1348 uqs = unquote(qs)
1349 t1 = time.time()
1350 if uqs != s:
1351 print 'Wrong!'
Walter Dörwald70a6b492004-02-12 17:35:32 +00001352 print repr(s)
1353 print repr(qs)
1354 print repr(uqs)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001355 print round(t1 - t0, 3), 'sec'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001356
1357
Guido van Rossum9ab96d41998-09-28 14:07:00 +00001358def reporthook(blocknum, blocksize, totalsize):
1359 # Report during remote transfers
Guido van Rossumb2493f82000-12-15 15:01:37 +00001360 print "Block number: %d, Block size: %d, Total size: %d" % (
1361 blocknum, blocksize, totalsize)
Guido van Rossum9ab96d41998-09-28 14:07:00 +00001362
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001363# Test program
Guido van Rossum23490151998-06-25 02:39:00 +00001364def test(args=[]):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001365 if not args:
1366 args = [
1367 '/etc/passwd',
1368 'file:/etc/passwd',
1369 'file://localhost/etc/passwd',
Andrew M. Kuchling56a42352002-03-18 22:18:46 +00001370 'ftp://ftp.python.org/pub/python/README',
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001371## 'gopher://gopher.micro.umn.edu/1/',
1372 'http://www.python.org/index.html',
1373 ]
Guido van Rossum09c8b6c1999-12-07 21:37:17 +00001374 if hasattr(URLopener, "open_https"):
1375 args.append('https://synergy.as.cmu.edu/~geek/')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001376 try:
1377 for url in args:
1378 print '-'*10, url, '-'*10
1379 fn, h = urlretrieve(url, None, reporthook)
Guido van Rossumb2493f82000-12-15 15:01:37 +00001380 print fn
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001381 if h:
1382 print '======'
1383 for k in h.keys(): print k + ':', h[k]
1384 print '======'
1385 fp = open(fn, 'rb')
1386 data = fp.read()
1387 del fp
1388 if '\r' in data:
1389 table = string.maketrans("", "")
Guido van Rossumb2493f82000-12-15 15:01:37 +00001390 data = data.translate(table, "\r")
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001391 print data
1392 fn, h = None, None
1393 print '-'*40
1394 finally:
1395 urlcleanup()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001396
Guido van Rossum23490151998-06-25 02:39:00 +00001397def main():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001398 import getopt, sys
1399 try:
1400 opts, args = getopt.getopt(sys.argv[1:], "th")
1401 except getopt.error, msg:
1402 print msg
1403 print "Use -h for help"
1404 return
1405 t = 0
1406 for o, a in opts:
1407 if o == '-t':
1408 t = t + 1
1409 if o == '-h':
1410 print "Usage: python urllib.py [-t] [url ...]"
1411 print "-t runs self-test;",
1412 print "otherwise, contents of urls are printed"
1413 return
1414 if t:
1415 if t > 1:
1416 test1()
1417 test(args)
1418 else:
1419 if not args:
1420 print "Use -h for help"
1421 for url in args:
1422 print urlopen(url).read(),
Guido van Rossum23490151998-06-25 02:39:00 +00001423
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001424# Run test program when run as a script
1425if __name__ == '__main__':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001426 main()