blob: 1e633d8fd25f39f9f5e580e3326846c2b40dfaa7 [file] [log] [blame]
Guido van Rossume7b146f2000-02-04 15:28:42 +00001"""Open an arbitrary URL.
2
3See the following document for more info on URLs:
4"Names and Addresses, URIs, URLs, URNs, URCs", at
5http://www.w3.org/pub/WWW/Addressing/Overview.html
6
7See also the HTTP spec (from which the error codes are derived):
8"HTTP - Hypertext Transfer Protocol", at
9http://www.w3.org/pub/WWW/Protocols/
10
11Related standards and specs:
12- RFC1808: the "relative URL" spec. (authoritative status)
13- RFC1738 - the "URL standard". (authoritative status)
14- RFC1630 - the "URI spec". (informational status)
15
16The object returned by URLopener().open(file) will differ per
17protocol. All you know is that is has methods read(), readline(),
18readlines(), fileno(), close() and info(). The read*(), fileno()
Fredrik Lundhb49f88b2000-09-24 18:51:25 +000019and close() methods work like those of open files.
Guido van Rossume7b146f2000-02-04 15:28:42 +000020The info() method returns a mimetools.Message object which can be
21used to query various info about the object, if available.
22(mimetools.Message objects are queried with the getheader() method.)
23"""
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000024
Guido van Rossum7c395db1994-07-04 22:14:49 +000025import string
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000026import socket
Jack Jansendc3e3f61995-12-15 13:22:13 +000027import os
Guido van Rossumf0713d32001-08-09 17:43:35 +000028import time
Guido van Rossum3c8484e1996-11-20 22:02:24 +000029import sys
Brett Cannon69200fa2004-03-23 21:26:39 +000030from urlparse import urljoin as basejoin
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000031
Skip Montanaro40fc1602001-03-01 04:27:19 +000032__all__ = ["urlopen", "URLopener", "FancyURLopener", "urlretrieve",
33 "urlcleanup", "quote", "quote_plus", "unquote", "unquote_plus",
Skip Montanaro44d5e0c2001-03-13 19:47:16 +000034 "urlencode", "url2pathname", "pathname2url", "splittag",
35 "localhost", "thishost", "ftperrors", "basejoin", "unwrap",
36 "splittype", "splithost", "splituser", "splitpasswd", "splitport",
37 "splitnport", "splitquery", "splitattr", "splitvalue",
38 "splitgophertype", "getproxies"]
Skip Montanaro40fc1602001-03-01 04:27:19 +000039
Brett Cannon69200fa2004-03-23 21:26:39 +000040__version__ = '1.16' # XXX This version is not always updated :-(
Guido van Rossumf668d171997-06-06 21:11:11 +000041
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000042MAXFTPCACHE = 10 # Trim the ftp cache beyond this size
Guido van Rossum6cb15a01995-06-22 19:00:13 +000043
Jack Jansendc3e3f61995-12-15 13:22:13 +000044# Helper for non-unix systems
45if os.name == 'mac':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000046 from macurl2path import url2pathname, pathname2url
Guido van Rossum7e7ca0b1998-03-26 21:01:39 +000047elif os.name == 'nt':
Fredrik Lundhb49f88b2000-09-24 18:51:25 +000048 from nturl2path import url2pathname, pathname2url
Guido van Rossumd74fb6b2001-03-02 06:43:49 +000049elif os.name == 'riscos':
50 from rourl2path import url2pathname, pathname2url
Jack Jansendc3e3f61995-12-15 13:22:13 +000051else:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000052 def url2pathname(pathname):
Guido van Rossum367ac801999-03-12 14:31:10 +000053 return unquote(pathname)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000054 def pathname2url(pathname):
Guido van Rossum367ac801999-03-12 14:31:10 +000055 return quote(pathname)
Guido van Rossum33add0a1998-12-18 15:25:22 +000056
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000057# This really consists of two pieces:
58# (1) a class which handles opening of all sorts of URLs
59# (plus assorted utilities etc.)
60# (2) a set of functions for parsing URLs
61# XXX Should these be separated out into different modules?
62
63
64# Shortcut for basic usage
65_urlopener = None
Fred Drakedf6eca72002-04-04 20:41:34 +000066def urlopen(url, data=None, proxies=None):
Skip Montanaro79f1c172000-08-22 03:00:52 +000067 """urlopen(url [, data]) -> open file-like object"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000068 global _urlopener
Fred Drakedf6eca72002-04-04 20:41:34 +000069 if proxies is not None:
70 opener = FancyURLopener(proxies=proxies)
71 elif not _urlopener:
72 opener = FancyURLopener()
73 _urlopener = opener
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000074 else:
Fred Drakedf6eca72002-04-04 20:41:34 +000075 opener = _urlopener
76 if data is None:
77 return opener.open(url)
78 else:
79 return opener.open(url, data)
Fred Drake316a7932000-08-24 01:01:26 +000080def urlretrieve(url, filename=None, reporthook=None, data=None):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000081 global _urlopener
82 if not _urlopener:
83 _urlopener = FancyURLopener()
Fred Drake316a7932000-08-24 01:01:26 +000084 return _urlopener.retrieve(url, filename, reporthook, data)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000085def urlcleanup():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000086 if _urlopener:
87 _urlopener.cleanup()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000088
89
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000090ftpcache = {}
91class URLopener:
Guido van Rossume7b146f2000-02-04 15:28:42 +000092 """Class to open URLs.
93 This is a class rather than just a subroutine because we may need
94 more than one set of global protocol-specific options.
95 Note -- this is a base class for those who don't want the
96 automatic handling of errors type 302 (relocated) and 401
97 (authorization needed)."""
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000098
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000099 __tempfiles = None
Guido van Rossum29e77811996-11-27 19:39:58 +0000100
Guido van Rossumba311382000-08-24 16:18:04 +0000101 version = "Python-urllib/%s" % __version__
102
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000103 # Constructor
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000104 def __init__(self, proxies=None, **x509):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000105 if proxies is None:
106 proxies = getproxies()
107 assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
108 self.proxies = proxies
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000109 self.key_file = x509.get('key_file')
110 self.cert_file = x509.get('cert_file')
Guido van Rossumba311382000-08-24 16:18:04 +0000111 self.addheaders = [('User-agent', self.version)]
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000112 self.__tempfiles = []
113 self.__unlink = os.unlink # See cleanup()
114 self.tempcache = None
115 # Undocumented feature: if you assign {} to tempcache,
116 # it is used to cache files retrieved with
117 # self.retrieve(). This is not enabled by default
118 # since it does not work for changing documents (and I
119 # haven't got the logic to check expiration headers
120 # yet).
121 self.ftpcache = ftpcache
122 # Undocumented feature: you can use a different
123 # ftp cache by assigning to the .ftpcache member;
124 # in case you want logically independent URL openers
125 # XXX This is not threadsafe. Bah.
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000126
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000127 def __del__(self):
128 self.close()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000129
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000130 def close(self):
131 self.cleanup()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000132
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000133 def cleanup(self):
134 # This code sometimes runs when the rest of this module
135 # has already been deleted, so it can't use any globals
136 # or import anything.
137 if self.__tempfiles:
138 for file in self.__tempfiles:
139 try:
140 self.__unlink(file)
Martin v. Löwis58682b72001-08-11 15:02:57 +0000141 except OSError:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000142 pass
143 del self.__tempfiles[:]
144 if self.tempcache:
145 self.tempcache.clear()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000146
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000147 def addheader(self, *args):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000148 """Add a header to be used by the HTTP interface only
149 e.g. u.addheader('Accept', 'sound/basic')"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000150 self.addheaders.append(args)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000151
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000152 # External interface
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000153 def open(self, fullurl, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000154 """Use URLopener().open(file) instead of open(file, 'r')."""
Martin v. Löwis1d994332000-12-03 18:30:10 +0000155 fullurl = unwrap(toBytes(fullurl))
Raymond Hettinger54f02222002-06-01 14:18:47 +0000156 if self.tempcache and fullurl in self.tempcache:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000157 filename, headers = self.tempcache[fullurl]
158 fp = open(filename, 'rb')
159 return addinfourl(fp, headers, fullurl)
Martin v. Löwis1d994332000-12-03 18:30:10 +0000160 urltype, url = splittype(fullurl)
161 if not urltype:
162 urltype = 'file'
Raymond Hettinger54f02222002-06-01 14:18:47 +0000163 if urltype in self.proxies:
Martin v. Löwis1d994332000-12-03 18:30:10 +0000164 proxy = self.proxies[urltype]
165 urltype, proxyhost = splittype(proxy)
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000166 host, selector = splithost(proxyhost)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000167 url = (host, fullurl) # Signal special case to open_*()
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000168 else:
169 proxy = None
Martin v. Löwis1d994332000-12-03 18:30:10 +0000170 name = 'open_' + urltype
171 self.type = urltype
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000172 if '-' in name:
173 # replace - with _
Guido van Rossumb2493f82000-12-15 15:01:37 +0000174 name = '_'.join(name.split('-'))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000175 if not hasattr(self, name):
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000176 if proxy:
177 return self.open_unknown_proxy(proxy, fullurl, data)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000178 else:
179 return self.open_unknown(fullurl, data)
180 try:
181 if data is None:
182 return getattr(self, name)(url)
183 else:
184 return getattr(self, name)(url, data)
185 except socket.error, msg:
186 raise IOError, ('socket error', msg), sys.exc_info()[2]
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000187
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000188 def open_unknown(self, fullurl, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000189 """Overridable interface to open unknown URL type."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000190 type, url = splittype(fullurl)
191 raise IOError, ('url error', 'unknown url type', type)
Guido van Rossumca445401995-08-29 19:19:12 +0000192
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000193 def open_unknown_proxy(self, proxy, fullurl, data=None):
194 """Overridable interface to open unknown URL type."""
195 type, url = splittype(fullurl)
196 raise IOError, ('url error', 'invalid proxy for %s' % type, proxy)
197
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000198 # External interface
Sjoerd Mullenderd7b86f02000-08-25 11:23:36 +0000199 def retrieve(self, url, filename=None, reporthook=None, data=None):
Brett Cannon7d618c72003-04-24 02:43:20 +0000200 """retrieve(url) returns (filename, headers) for a local object
Guido van Rossume7b146f2000-02-04 15:28:42 +0000201 or (tempfilename, headers) for a remote object."""
Martin v. Löwis1d994332000-12-03 18:30:10 +0000202 url = unwrap(toBytes(url))
Raymond Hettinger54f02222002-06-01 14:18:47 +0000203 if self.tempcache and url in self.tempcache:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000204 return self.tempcache[url]
205 type, url1 = splittype(url)
Raymond Hettinger10ff7062002-06-02 03:04:52 +0000206 if filename is None and (not type or type == 'file'):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000207 try:
208 fp = self.open_local_file(url1)
209 hdrs = fp.info()
210 del fp
211 return url2pathname(splithost(url1)[1]), hdrs
212 except IOError, msg:
213 pass
Fred Drake316a7932000-08-24 01:01:26 +0000214 fp = self.open(url, data)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000215 headers = fp.info()
Guido van Rossum3b0a3292002-08-09 16:38:32 +0000216 if filename:
217 tfp = open(filename, 'wb')
218 else:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000219 import tempfile
220 garbage, path = splittype(url)
221 garbage, path = splithost(path or "")
222 path, garbage = splitquery(path or "")
223 path, garbage = splitattr(path or "")
224 suffix = os.path.splitext(path)[1]
Guido van Rossum3b0a3292002-08-09 16:38:32 +0000225 (fd, filename) = tempfile.mkstemp(suffix)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000226 self.__tempfiles.append(filename)
Jeremy Hylton3bd6fde2002-10-11 14:36:24 +0000227 tfp = os.fdopen(fd, 'wb')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000228 result = filename, headers
229 if self.tempcache is not None:
230 self.tempcache[url] = result
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000231 bs = 1024*8
232 size = -1
233 blocknum = 1
234 if reporthook:
Raymond Hettinger54f02222002-06-01 14:18:47 +0000235 if "content-length" in headers:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000236 size = int(headers["Content-Length"])
237 reporthook(0, bs, size)
238 block = fp.read(bs)
239 if reporthook:
240 reporthook(1, bs, size)
241 while block:
242 tfp.write(block)
243 block = fp.read(bs)
244 blocknum = blocknum + 1
245 if reporthook:
246 reporthook(blocknum, bs, size)
247 fp.close()
248 tfp.close()
249 del fp
250 del tfp
251 return result
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000252
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000253 # Each method named open_<type> knows how to open that type of URL
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000254
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000255 def open_http(self, url, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000256 """Use HTTP protocol."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000257 import httplib
258 user_passwd = None
Walter Dörwald65230a22002-06-03 15:58:32 +0000259 if isinstance(url, str):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000260 host, selector = splithost(url)
261 if host:
262 user_passwd, host = splituser(host)
263 host = unquote(host)
264 realhost = host
265 else:
266 host, selector = url
267 urltype, rest = splittype(selector)
268 url = rest
269 user_passwd = None
Guido van Rossumb2493f82000-12-15 15:01:37 +0000270 if urltype.lower() != 'http':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000271 realhost = None
272 else:
273 realhost, rest = splithost(rest)
274 if realhost:
275 user_passwd, realhost = splituser(realhost)
276 if user_passwd:
277 selector = "%s://%s%s" % (urltype, realhost, rest)
Tim Peters55c12d42001-08-09 18:04:14 +0000278 if proxy_bypass(realhost):
279 host = realhost
280
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000281 #print "proxy via http:", host, selector
282 if not host: raise IOError, ('http error', 'no host given')
283 if user_passwd:
284 import base64
Guido van Rossumb2493f82000-12-15 15:01:37 +0000285 auth = base64.encodestring(user_passwd).strip()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000286 else:
287 auth = None
288 h = httplib.HTTP(host)
289 if data is not None:
290 h.putrequest('POST', selector)
291 h.putheader('Content-type', 'application/x-www-form-urlencoded')
292 h.putheader('Content-length', '%d' % len(data))
293 else:
294 h.putrequest('GET', selector)
295 if auth: h.putheader('Authorization', 'Basic %s' % auth)
296 if realhost: h.putheader('Host', realhost)
Guido van Rossum68468eb2003-02-27 20:14:51 +0000297 for args in self.addheaders: h.putheader(*args)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000298 h.endheaders()
299 if data is not None:
Fred Drakeec3dfde2001-07-04 05:18:29 +0000300 h.send(data)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000301 errcode, errmsg, headers = h.getreply()
302 fp = h.getfile()
303 if errcode == 200:
304 return addinfourl(fp, headers, "http:" + url)
305 else:
306 if data is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000307 return self.http_error(url, fp, errcode, errmsg, headers)
Guido van Rossum29aab751999-03-09 19:31:21 +0000308 else:
309 return self.http_error(url, fp, errcode, errmsg, headers, data)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000310
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000311 def http_error(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000312 """Handle http errors.
313 Derived class can override this, or provide specific handlers
314 named http_error_DDD where DDD is the 3-digit error code."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000315 # First check if there's a specific handler for this error
316 name = 'http_error_%d' % errcode
317 if hasattr(self, name):
318 method = getattr(self, name)
319 if data is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000320 result = method(url, fp, errcode, errmsg, headers)
Jeremy Hyltonb30f52a1999-02-25 16:14:58 +0000321 else:
322 result = method(url, fp, errcode, errmsg, headers, data)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000323 if result: return result
Jeremy Hyltonb30f52a1999-02-25 16:14:58 +0000324 return self.http_error_default(url, fp, errcode, errmsg, headers)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000325
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000326 def http_error_default(self, url, fp, errcode, errmsg, headers):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000327 """Default error handler: close the connection and raise IOError."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000328 void = fp.read()
329 fp.close()
330 raise IOError, ('http error', errcode, errmsg, headers)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000331
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000332 if hasattr(socket, "ssl"):
Andrew M. Kuchling141e9892000-04-23 02:53:11 +0000333 def open_https(self, url, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000334 """Use HTTPS protocol."""
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000335 import httplib
Fred Drake567ca8e2000-08-21 21:42:42 +0000336 user_passwd = None
Walter Dörwald65230a22002-06-03 15:58:32 +0000337 if isinstance(url, str):
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000338 host, selector = splithost(url)
Fred Drake567ca8e2000-08-21 21:42:42 +0000339 if host:
340 user_passwd, host = splituser(host)
341 host = unquote(host)
342 realhost = host
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000343 else:
344 host, selector = url
345 urltype, rest = splittype(selector)
Fred Drake567ca8e2000-08-21 21:42:42 +0000346 url = rest
347 user_passwd = None
Guido van Rossumb2493f82000-12-15 15:01:37 +0000348 if urltype.lower() != 'https':
Fred Drake567ca8e2000-08-21 21:42:42 +0000349 realhost = None
350 else:
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000351 realhost, rest = splithost(rest)
Fred Drake567ca8e2000-08-21 21:42:42 +0000352 if realhost:
353 user_passwd, realhost = splituser(realhost)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000354 if user_passwd:
355 selector = "%s://%s%s" % (urltype, realhost, rest)
Andrew M. Kuchling7ad47922000-06-10 01:41:48 +0000356 #print "proxy via https:", host, selector
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000357 if not host: raise IOError, ('https error', 'no host given')
358 if user_passwd:
359 import base64
Guido van Rossumb2493f82000-12-15 15:01:37 +0000360 auth = base64.encodestring(user_passwd).strip()
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000361 else:
362 auth = None
363 h = httplib.HTTPS(host, 0,
364 key_file=self.key_file,
365 cert_file=self.cert_file)
Andrew M. Kuchling141e9892000-04-23 02:53:11 +0000366 if data is not None:
367 h.putrequest('POST', selector)
368 h.putheader('Content-type',
369 'application/x-www-form-urlencoded')
370 h.putheader('Content-length', '%d' % len(data))
371 else:
372 h.putrequest('GET', selector)
Andrew M. Kuchlingff638ea2003-08-29 18:12:23 +0000373 if auth: h.putheader('Authorization', 'Basic %s' % auth)
Fred Drake567ca8e2000-08-21 21:42:42 +0000374 if realhost: h.putheader('Host', realhost)
Guido van Rossum68468eb2003-02-27 20:14:51 +0000375 for args in self.addheaders: h.putheader(*args)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000376 h.endheaders()
Andrew M. Kuchling43c5af02000-04-24 14:17:06 +0000377 if data is not None:
Fred Drakeec3dfde2001-07-04 05:18:29 +0000378 h.send(data)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000379 errcode, errmsg, headers = h.getreply()
380 fp = h.getfile()
381 if errcode == 200:
Guido van Rossumb931bf32001-12-08 17:09:07 +0000382 return addinfourl(fp, headers, "https:" + url)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000383 else:
Fred Drake567ca8e2000-08-21 21:42:42 +0000384 if data is None:
385 return self.http_error(url, fp, errcode, errmsg, headers)
386 else:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000387 return self.http_error(url, fp, errcode, errmsg, headers,
388 data)
Fred Drake567ca8e2000-08-21 21:42:42 +0000389
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000390 def open_gopher(self, url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000391 """Use Gopher protocol."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000392 import gopherlib
393 host, selector = splithost(url)
394 if not host: raise IOError, ('gopher error', 'no host given')
395 host = unquote(host)
396 type, selector = splitgophertype(selector)
397 selector, query = splitquery(selector)
398 selector = unquote(selector)
399 if query:
400 query = unquote(query)
401 fp = gopherlib.send_query(selector, query, host)
402 else:
403 fp = gopherlib.send_selector(selector, host)
404 return addinfourl(fp, noheaders(), "gopher:" + url)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000405
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000406 def open_file(self, url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000407 """Use local file or FTP depending on form of URL."""
Jack Jansen4ef11032002-09-12 20:14:04 +0000408 if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000409 return self.open_ftp(url)
410 else:
411 return self.open_local_file(url)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000412
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000413 def open_local_file(self, url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000414 """Use local file."""
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +0000415 import mimetypes, mimetools, rfc822, StringIO
Guido van Rossumf0713d32001-08-09 17:43:35 +0000416 host, file = splithost(url)
417 localname = url2pathname(file)
Guido van Rossuma2da3052002-04-15 00:25:01 +0000418 try:
419 stats = os.stat(localname)
420 except OSError, e:
421 raise IOError(e.errno, e.strerror, e.filename)
Walter Dörwald92b48b72002-03-22 17:30:38 +0000422 size = stats.st_size
423 modified = rfc822.formatdate(stats.st_mtime)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000424 mtype = mimetypes.guess_type(url)[0]
425 headers = mimetools.Message(StringIO.StringIO(
Guido van Rossumf0713d32001-08-09 17:43:35 +0000426 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
427 (mtype or 'text/plain', size, modified)))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000428 if not host:
Guido van Rossum336a2011999-06-24 15:27:36 +0000429 urlfile = file
430 if file[:1] == '/':
431 urlfile = 'file://' + file
Guido van Rossumf0713d32001-08-09 17:43:35 +0000432 return addinfourl(open(localname, 'rb'),
Guido van Rossum336a2011999-06-24 15:27:36 +0000433 headers, urlfile)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000434 host, port = splitport(host)
435 if not port \
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000436 and socket.gethostbyname(host) in (localhost(), thishost()):
Guido van Rossum336a2011999-06-24 15:27:36 +0000437 urlfile = file
438 if file[:1] == '/':
439 urlfile = 'file://' + file
Guido van Rossumf0713d32001-08-09 17:43:35 +0000440 return addinfourl(open(localname, 'rb'),
Guido van Rossum336a2011999-06-24 15:27:36 +0000441 headers, urlfile)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000442 raise IOError, ('local file error', 'not on local host')
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000443
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000444 def open_ftp(self, url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000445 """Use FTP protocol."""
Guido van Rossum88e0b5b2001-08-23 13:38:15 +0000446 import mimetypes, mimetools, StringIO
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000447 host, path = splithost(url)
448 if not host: raise IOError, ('ftp error', 'no host given')
449 host, port = splitport(host)
450 user, host = splituser(host)
451 if user: user, passwd = splitpasswd(user)
452 else: passwd = None
453 host = unquote(host)
454 user = unquote(user or '')
455 passwd = unquote(passwd or '')
456 host = socket.gethostbyname(host)
457 if not port:
458 import ftplib
459 port = ftplib.FTP_PORT
460 else:
461 port = int(port)
462 path, attrs = splitattr(path)
463 path = unquote(path)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000464 dirs = path.split('/')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000465 dirs, file = dirs[:-1], dirs[-1]
466 if dirs and not dirs[0]: dirs = dirs[1:]
Guido van Rossum5e006a31999-08-18 17:40:33 +0000467 if dirs and not dirs[0]: dirs[0] = '/'
Guido van Rossumb2493f82000-12-15 15:01:37 +0000468 key = user, host, port, '/'.join(dirs)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000469 # XXX thread unsafe!
470 if len(self.ftpcache) > MAXFTPCACHE:
471 # Prune the cache, rather arbitrarily
472 for k in self.ftpcache.keys():
473 if k != key:
474 v = self.ftpcache[k]
475 del self.ftpcache[k]
476 v.close()
477 try:
Raymond Hettinger54f02222002-06-01 14:18:47 +0000478 if not key in self.ftpcache:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000479 self.ftpcache[key] = \
480 ftpwrapper(user, passwd, host, port, dirs)
481 if not file: type = 'D'
482 else: type = 'I'
483 for attr in attrs:
484 attr, value = splitvalue(attr)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000485 if attr.lower() == 'type' and \
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000486 value in ('a', 'A', 'i', 'I', 'd', 'D'):
Guido van Rossumb2493f82000-12-15 15:01:37 +0000487 type = value.upper()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000488 (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
Guido van Rossum88e0b5b2001-08-23 13:38:15 +0000489 mtype = mimetypes.guess_type("ftp:" + url)[0]
490 headers = ""
491 if mtype:
492 headers += "Content-Type: %s\n" % mtype
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000493 if retrlen is not None and retrlen >= 0:
Guido van Rossum88e0b5b2001-08-23 13:38:15 +0000494 headers += "Content-Length: %d\n" % retrlen
495 headers = mimetools.Message(StringIO.StringIO(headers))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000496 return addinfourl(fp, headers, "ftp:" + url)
497 except ftperrors(), msg:
498 raise IOError, ('ftp error', msg), sys.exc_info()[2]
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000499
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000500 def open_data(self, url, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000501 """Use "data" URL."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000502 # ignore POSTed data
503 #
504 # syntax of data URLs:
505 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
506 # mediatype := [ type "/" subtype ] *( ";" parameter )
507 # data := *urlchar
508 # parameter := attribute "=" value
Neal Norwitzaad18492002-03-26 16:25:01 +0000509 import StringIO, mimetools
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000510 try:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000511 [type, data] = url.split(',', 1)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000512 except ValueError:
513 raise IOError, ('data error', 'bad data URL')
514 if not type:
515 type = 'text/plain;charset=US-ASCII'
Guido van Rossumb2493f82000-12-15 15:01:37 +0000516 semi = type.rfind(';')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000517 if semi >= 0 and '=' not in type[semi:]:
518 encoding = type[semi+1:]
519 type = type[:semi]
520 else:
521 encoding = ''
522 msg = []
523 msg.append('Date: %s'%time.strftime('%a, %d %b %Y %T GMT',
524 time.gmtime(time.time())))
525 msg.append('Content-type: %s' % type)
526 if encoding == 'base64':
527 import base64
528 data = base64.decodestring(data)
529 else:
530 data = unquote(data)
531 msg.append('Content-length: %d' % len(data))
532 msg.append('')
533 msg.append(data)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000534 msg = '\n'.join(msg)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000535 f = StringIO.StringIO(msg)
536 headers = mimetools.Message(f, 0)
537 f.fileno = None # needed for addinfourl
538 return addinfourl(f, headers, url)
Guido van Rossum6d4d1c21998-03-12 14:32:55 +0000539
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000540
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000541class FancyURLopener(URLopener):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000542 """Derived class with handlers for errors we can handle (perhaps)."""
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000543
Neal Norwitz60e04cd2002-06-11 13:38:51 +0000544 def __init__(self, *args, **kwargs):
Guido van Rossum68468eb2003-02-27 20:14:51 +0000545 URLopener.__init__(self, *args, **kwargs)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000546 self.auth_cache = {}
Skip Montanaroc3e11d62001-02-15 16:56:36 +0000547 self.tries = 0
548 self.maxtries = 10
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000549
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000550 def http_error_default(self, url, fp, errcode, errmsg, headers):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000551 """Default error handling -- don't raise an exception."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000552 return addinfourl(fp, headers, "http:" + url)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000553
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000554 def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000555 """Error 302 -- relocated (temporarily)."""
Skip Montanaroc3e11d62001-02-15 16:56:36 +0000556 self.tries += 1
557 if self.maxtries and self.tries >= self.maxtries:
558 if hasattr(self, "http_error_500"):
559 meth = self.http_error_500
560 else:
561 meth = self.http_error_default
562 self.tries = 0
563 return meth(url, fp, 500,
564 "Internal Server Error: Redirect Recursion", headers)
565 result = self.redirect_internal(url, fp, errcode, errmsg, headers,
566 data)
567 self.tries = 0
568 return result
569
570 def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
Raymond Hettinger54f02222002-06-01 14:18:47 +0000571 if 'location' in headers:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000572 newurl = headers['location']
Raymond Hettinger54f02222002-06-01 14:18:47 +0000573 elif 'uri' in headers:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000574 newurl = headers['uri']
575 else:
576 return
577 void = fp.read()
578 fp.close()
Guido van Rossum3527f591999-03-29 20:23:41 +0000579 # In case the server sent a relative URL, join with original:
Moshe Zadka5d87d472001-04-09 14:54:21 +0000580 newurl = basejoin(self.type + ":" + url, newurl)
Guido van Rossumfa19f7c2003-05-16 01:46:51 +0000581 return self.open(newurl)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000582
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000583 def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000584 """Error 301 -- also relocated (permanently)."""
585 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
Guido van Rossume6ad8911996-09-10 17:02:56 +0000586
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000587 def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
588 """Error 303 -- also relocated (essentially identical to 302)."""
589 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
590
Guido van Rossumfa19f7c2003-05-16 01:46:51 +0000591 def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
592 """Error 307 -- relocated, but turn POST into error."""
593 if data is None:
594 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
595 else:
596 return self.http_error_default(url, fp, errcode, errmsg, headers)
597
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000598 def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000599 """Error 401 -- authentication required.
600 See this URL for a description of the basic authentication scheme:
601 http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt"""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000602 if not 'www-authenticate' in headers:
Tim Peters85ba6732001-02-28 08:26:44 +0000603 URLopener.http_error_default(self, url, fp,
Fred Drakec680ae82001-10-13 18:37:07 +0000604 errcode, errmsg, headers)
Moshe Zadkae99bd172001-02-27 06:27:04 +0000605 stuff = headers['www-authenticate']
606 import re
607 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
608 if not match:
Tim Peters85ba6732001-02-28 08:26:44 +0000609 URLopener.http_error_default(self, url, fp,
Moshe Zadkae99bd172001-02-27 06:27:04 +0000610 errcode, errmsg, headers)
611 scheme, realm = match.groups()
612 if scheme.lower() != 'basic':
Tim Peters85ba6732001-02-28 08:26:44 +0000613 URLopener.http_error_default(self, url, fp,
Moshe Zadkae99bd172001-02-27 06:27:04 +0000614 errcode, errmsg, headers)
615 name = 'retry_' + self.type + '_basic_auth'
616 if data is None:
617 return getattr(self,name)(url, realm)
618 else:
619 return getattr(self,name)(url, realm, data)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000620
Guido van Rossum3c8baed2000-02-01 23:36:55 +0000621 def retry_http_basic_auth(self, url, realm, data=None):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000622 host, selector = splithost(url)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000623 i = host.find('@') + 1
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000624 host = host[i:]
625 user, passwd = self.get_user_passwd(host, realm, i)
626 if not (user or passwd): return None
Guido van Rossumafc4f042001-01-15 18:31:13 +0000627 host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000628 newurl = 'http://' + host + selector
Guido van Rossum3c8baed2000-02-01 23:36:55 +0000629 if data is None:
630 return self.open(newurl)
631 else:
632 return self.open(newurl, data)
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000633
Guido van Rossum3c8baed2000-02-01 23:36:55 +0000634 def retry_https_basic_auth(self, url, realm, data=None):
Tim Peterse1190062001-01-15 03:34:38 +0000635 host, selector = splithost(url)
636 i = host.find('@') + 1
637 host = host[i:]
638 user, passwd = self.get_user_passwd(host, realm, i)
639 if not (user or passwd): return None
Guido van Rossumafc4f042001-01-15 18:31:13 +0000640 host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
Tim Peterse1190062001-01-15 03:34:38 +0000641 newurl = '//' + host + selector
Guido van Rossumafc4f042001-01-15 18:31:13 +0000642 return self.open_https(newurl, data)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000643
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000644 def get_user_passwd(self, host, realm, clear_cache = 0):
Guido van Rossumb2493f82000-12-15 15:01:37 +0000645 key = realm + '@' + host.lower()
Raymond Hettinger54f02222002-06-01 14:18:47 +0000646 if key in self.auth_cache:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000647 if clear_cache:
648 del self.auth_cache[key]
649 else:
650 return self.auth_cache[key]
651 user, passwd = self.prompt_user_passwd(host, realm)
652 if user or passwd: self.auth_cache[key] = (user, passwd)
653 return user, passwd
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000654
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000655 def prompt_user_passwd(self, host, realm):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000656 """Override this in a GUI environment!"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000657 import getpass
658 try:
659 user = raw_input("Enter username for %s at %s: " % (realm,
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000660 host))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000661 passwd = getpass.getpass("Enter password for %s in %s at %s: " %
662 (user, realm, host))
663 return user, passwd
664 except KeyboardInterrupt:
665 print
666 return None, None
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000667
668
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000669# Utility functions
670
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000671_localhost = None
672def localhost():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000673 """Return the IP address of the magic hostname 'localhost'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000674 global _localhost
Raymond Hettinger10ff7062002-06-02 03:04:52 +0000675 if _localhost is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000676 _localhost = socket.gethostbyname('localhost')
677 return _localhost
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000678
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000679_thishost = None
680def thishost():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000681 """Return the IP address of the current host."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000682 global _thishost
Raymond Hettinger10ff7062002-06-02 03:04:52 +0000683 if _thishost is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000684 _thishost = socket.gethostbyname(socket.gethostname())
685 return _thishost
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000686
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000687_ftperrors = None
688def ftperrors():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000689 """Return the set of errors raised by the FTP class."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000690 global _ftperrors
Raymond Hettinger10ff7062002-06-02 03:04:52 +0000691 if _ftperrors is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000692 import ftplib
693 _ftperrors = ftplib.all_errors
694 return _ftperrors
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000695
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000696_noheaders = None
697def noheaders():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000698 """Return an empty mimetools.Message object."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000699 global _noheaders
Raymond Hettinger10ff7062002-06-02 03:04:52 +0000700 if _noheaders is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000701 import mimetools
702 import StringIO
703 _noheaders = mimetools.Message(StringIO.StringIO(), 0)
704 _noheaders.fp.close() # Recycle file descriptor
705 return _noheaders
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000706
707
708# Utility classes
709
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000710class ftpwrapper:
Guido van Rossume7b146f2000-02-04 15:28:42 +0000711 """Class used by open_ftp() for cache of open FTP connections."""
712
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000713 def __init__(self, user, passwd, host, port, dirs):
714 self.user = user
715 self.passwd = passwd
716 self.host = host
717 self.port = port
718 self.dirs = dirs
719 self.init()
Guido van Rossume7b146f2000-02-04 15:28:42 +0000720
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000721 def init(self):
722 import ftplib
723 self.busy = 0
724 self.ftp = ftplib.FTP()
725 self.ftp.connect(self.host, self.port)
726 self.ftp.login(self.user, self.passwd)
727 for dir in self.dirs:
728 self.ftp.cwd(dir)
Guido van Rossume7b146f2000-02-04 15:28:42 +0000729
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000730 def retrfile(self, file, type):
731 import ftplib
732 self.endtransfer()
733 if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
734 else: cmd = 'TYPE ' + type; isdir = 0
735 try:
736 self.ftp.voidcmd(cmd)
737 except ftplib.all_errors:
738 self.init()
739 self.ftp.voidcmd(cmd)
740 conn = None
741 if file and not isdir:
742 # Use nlst to see if the file exists at all
743 try:
744 self.ftp.nlst(file)
745 except ftplib.error_perm, reason:
746 raise IOError, ('ftp error', reason), sys.exc_info()[2]
747 # Restore the transfer mode!
748 self.ftp.voidcmd(cmd)
749 # Try to retrieve as a file
750 try:
751 cmd = 'RETR ' + file
752 conn = self.ftp.ntransfercmd(cmd)
753 except ftplib.error_perm, reason:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000754 if str(reason)[:3] != '550':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000755 raise IOError, ('ftp error', reason), sys.exc_info()[2]
756 if not conn:
757 # Set transfer mode to ASCII!
758 self.ftp.voidcmd('TYPE A')
759 # Try a directory listing
760 if file: cmd = 'LIST ' + file
761 else: cmd = 'LIST'
762 conn = self.ftp.ntransfercmd(cmd)
763 self.busy = 1
764 # Pass back both a suitably decorated object and a retrieval length
765 return (addclosehook(conn[0].makefile('rb'),
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000766 self.endtransfer), conn[1])
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000767 def endtransfer(self):
768 if not self.busy:
769 return
770 self.busy = 0
771 try:
772 self.ftp.voidresp()
773 except ftperrors():
774 pass
Guido van Rossume7b146f2000-02-04 15:28:42 +0000775
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000776 def close(self):
777 self.endtransfer()
778 try:
779 self.ftp.close()
780 except ftperrors():
781 pass
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000782
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000783class addbase:
Guido van Rossume7b146f2000-02-04 15:28:42 +0000784 """Base class for addinfo and addclosehook."""
785
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000786 def __init__(self, fp):
787 self.fp = fp
788 self.read = self.fp.read
789 self.readline = self.fp.readline
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000790 if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines
791 if hasattr(self.fp, "fileno"): self.fileno = self.fp.fileno
Raymond Hettinger42182eb2003-03-09 05:33:33 +0000792 if hasattr(self.fp, "__iter__"):
793 self.__iter__ = self.fp.__iter__
794 if hasattr(self.fp, "next"):
795 self.next = self.fp.next
Guido van Rossume7b146f2000-02-04 15:28:42 +0000796
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000797 def __repr__(self):
Walter Dörwald70a6b492004-02-12 17:35:32 +0000798 return '<%s at %r whose fp = %r>' % (self.__class__.__name__,
799 id(self), self.fp)
Guido van Rossume7b146f2000-02-04 15:28:42 +0000800
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000801 def close(self):
802 self.read = None
803 self.readline = None
804 self.readlines = None
805 self.fileno = None
806 if self.fp: self.fp.close()
807 self.fp = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000808
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000809class addclosehook(addbase):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000810 """Class to add a close hook to an open file."""
811
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000812 def __init__(self, fp, closehook, *hookargs):
813 addbase.__init__(self, fp)
814 self.closehook = closehook
815 self.hookargs = hookargs
Guido van Rossume7b146f2000-02-04 15:28:42 +0000816
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000817 def close(self):
Guido van Rossumc580dae2000-05-24 13:21:46 +0000818 addbase.close(self)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000819 if self.closehook:
Guido van Rossum68468eb2003-02-27 20:14:51 +0000820 self.closehook(*self.hookargs)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000821 self.closehook = None
822 self.hookargs = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000823
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000824class addinfo(addbase):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000825 """class to add an info() method to an open file."""
826
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000827 def __init__(self, fp, headers):
828 addbase.__init__(self, fp)
829 self.headers = headers
Guido van Rossume7b146f2000-02-04 15:28:42 +0000830
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000831 def info(self):
832 return self.headers
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000833
Guido van Rossume6ad8911996-09-10 17:02:56 +0000834class addinfourl(addbase):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000835 """class to add info() and geturl() methods to an open file."""
836
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000837 def __init__(self, fp, headers, url):
838 addbase.__init__(self, fp)
839 self.headers = headers
840 self.url = url
Guido van Rossume7b146f2000-02-04 15:28:42 +0000841
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000842 def info(self):
843 return self.headers
Guido van Rossume7b146f2000-02-04 15:28:42 +0000844
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000845 def geturl(self):
846 return self.url
Guido van Rossume6ad8911996-09-10 17:02:56 +0000847
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000848
Guido van Rossum7c395db1994-07-04 22:14:49 +0000849# Utilities to parse URLs (most of these return None for missing parts):
Sjoerd Mullendere0371b81995-11-10 10:36:07 +0000850# unwrap('<URL:type://host/path>') --> 'type://host/path'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000851# splittype('type:opaquestring') --> 'type', 'opaquestring'
852# splithost('//host[:port]/path') --> 'host[:port]', '/path'
Guido van Rossum7c395db1994-07-04 22:14:49 +0000853# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'
854# splitpasswd('user:passwd') -> 'user', 'passwd'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000855# splitport('host:port') --> 'host', 'port'
856# splitquery('/path?query') --> '/path', 'query'
857# splittag('/path#tag') --> '/path', 'tag'
Guido van Rossum7c395db1994-07-04 22:14:49 +0000858# splitattr('/path;attr1=value1;attr2=value2;...') ->
859# '/path', ['attr1=value1', 'attr2=value2', ...]
860# splitvalue('attr=value') --> 'attr', 'value'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000861# splitgophertype('/Xselector') --> 'X', 'selector'
862# unquote('abc%20def') -> 'abc def'
863# quote('abc def') -> 'abc%20def')
864
Walter Dörwald65230a22002-06-03 15:58:32 +0000865try:
866 unicode
867except NameError:
Guido van Rossum4b46c0a2002-05-24 17:58:05 +0000868 def _is_unicode(x):
869 return 0
Walter Dörwald65230a22002-06-03 15:58:32 +0000870else:
871 def _is_unicode(x):
872 return isinstance(x, unicode)
Guido van Rossum4b46c0a2002-05-24 17:58:05 +0000873
Martin v. Löwis1d994332000-12-03 18:30:10 +0000874def toBytes(url):
875 """toBytes(u"URL") --> 'URL'."""
876 # Most URL schemes require ASCII. If that changes, the conversion
877 # can be relaxed
Guido van Rossum4b46c0a2002-05-24 17:58:05 +0000878 if _is_unicode(url):
Martin v. Löwis1d994332000-12-03 18:30:10 +0000879 try:
880 url = url.encode("ASCII")
881 except UnicodeError:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000882 raise UnicodeError("URL " + repr(url) +
883 " contains non-ASCII characters")
Martin v. Löwis1d994332000-12-03 18:30:10 +0000884 return url
885
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000886def unwrap(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000887 """unwrap('<URL:type://host/path>') --> 'type://host/path'."""
Guido van Rossumb2493f82000-12-15 15:01:37 +0000888 url = url.strip()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000889 if url[:1] == '<' and url[-1:] == '>':
Guido van Rossumb2493f82000-12-15 15:01:37 +0000890 url = url[1:-1].strip()
891 if url[:4] == 'URL:': url = url[4:].strip()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000892 return url
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000893
Guido van Rossum332e1441997-09-29 23:23:46 +0000894_typeprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000895def splittype(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000896 """splittype('type:opaquestring') --> 'type', 'opaquestring'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000897 global _typeprog
898 if _typeprog is None:
899 import re
900 _typeprog = re.compile('^([^/:]+):')
Guido van Rossum332e1441997-09-29 23:23:46 +0000901
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000902 match = _typeprog.match(url)
903 if match:
904 scheme = match.group(1)
Fred Drake9e94afd2000-07-01 07:03:30 +0000905 return scheme.lower(), url[len(scheme) + 1:]
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000906 return None, url
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000907
Guido van Rossum332e1441997-09-29 23:23:46 +0000908_hostprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000909def splithost(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000910 """splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000911 global _hostprog
912 if _hostprog is None:
913 import re
Guido van Rossum3427c1f1999-07-01 23:20:56 +0000914 _hostprog = re.compile('^//([^/]*)(.*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +0000915
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000916 match = _hostprog.match(url)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000917 if match: return match.group(1, 2)
918 return None, url
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000919
Guido van Rossum332e1441997-09-29 23:23:46 +0000920_userprog = None
Guido van Rossum7c395db1994-07-04 22:14:49 +0000921def splituser(host):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000922 """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000923 global _userprog
924 if _userprog is None:
925 import re
Raymond Hettingerf2e45dd2002-08-18 20:08:56 +0000926 _userprog = re.compile('^(.*)@(.*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +0000927
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000928 match = _userprog.match(host)
Fred Drake567ca8e2000-08-21 21:42:42 +0000929 if match: return map(unquote, match.group(1, 2))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000930 return None, host
Guido van Rossum7c395db1994-07-04 22:14:49 +0000931
Guido van Rossum332e1441997-09-29 23:23:46 +0000932_passwdprog = None
Guido van Rossum7c395db1994-07-04 22:14:49 +0000933def splitpasswd(user):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000934 """splitpasswd('user:passwd') -> 'user', 'passwd'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000935 global _passwdprog
936 if _passwdprog is None:
937 import re
938 _passwdprog = re.compile('^([^:]*):(.*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +0000939
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000940 match = _passwdprog.match(user)
941 if match: return match.group(1, 2)
942 return user, None
Guido van Rossum7c395db1994-07-04 22:14:49 +0000943
Guido van Rossume7b146f2000-02-04 15:28:42 +0000944# splittag('/path#tag') --> '/path', 'tag'
Guido van Rossum332e1441997-09-29 23:23:46 +0000945_portprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000946def splitport(host):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000947 """splitport('host:port') --> 'host', 'port'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000948 global _portprog
949 if _portprog is None:
950 import re
951 _portprog = re.compile('^(.*):([0-9]+)$')
Guido van Rossum332e1441997-09-29 23:23:46 +0000952
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000953 match = _portprog.match(host)
954 if match: return match.group(1, 2)
955 return host, None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000956
Guido van Rossum332e1441997-09-29 23:23:46 +0000957_nportprog = None
Guido van Rossum53725a21996-06-13 19:12:35 +0000958def splitnport(host, defport=-1):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000959 """Split host and port, returning numeric port.
960 Return given default port if no ':' found; defaults to -1.
961 Return numerical port if a valid number are found after ':'.
962 Return None if ':' but not a valid number."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000963 global _nportprog
964 if _nportprog is None:
965 import re
966 _nportprog = re.compile('^(.*):(.*)$')
Guido van Rossum7e7ca0b1998-03-26 21:01:39 +0000967
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000968 match = _nportprog.match(host)
969 if match:
970 host, port = match.group(1, 2)
971 try:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000972 if not port: raise ValueError, "no digits"
973 nport = int(port)
974 except ValueError:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000975 nport = None
976 return host, nport
977 return host, defport
Guido van Rossum53725a21996-06-13 19:12:35 +0000978
Guido van Rossum332e1441997-09-29 23:23:46 +0000979_queryprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000980def splitquery(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000981 """splitquery('/path?query') --> '/path', 'query'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000982 global _queryprog
983 if _queryprog is None:
984 import re
985 _queryprog = re.compile('^(.*)\?([^?]*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +0000986
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000987 match = _queryprog.match(url)
988 if match: return match.group(1, 2)
989 return url, None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000990
Guido van Rossum332e1441997-09-29 23:23:46 +0000991_tagprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000992def splittag(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000993 """splittag('/path#tag') --> '/path', 'tag'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000994 global _tagprog
995 if _tagprog is None:
996 import re
997 _tagprog = re.compile('^(.*)#([^#]*)$')
Guido van Rossum7e7ca0b1998-03-26 21:01:39 +0000998
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000999 match = _tagprog.match(url)
1000 if match: return match.group(1, 2)
1001 return url, None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001002
Guido van Rossum7c395db1994-07-04 22:14:49 +00001003def splitattr(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001004 """splitattr('/path;attr1=value1;attr2=value2;...') ->
1005 '/path', ['attr1=value1', 'attr2=value2', ...]."""
Guido van Rossumb2493f82000-12-15 15:01:37 +00001006 words = url.split(';')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001007 return words[0], words[1:]
Guido van Rossum7c395db1994-07-04 22:14:49 +00001008
Guido van Rossum332e1441997-09-29 23:23:46 +00001009_valueprog = None
Guido van Rossum7c395db1994-07-04 22:14:49 +00001010def splitvalue(attr):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001011 """splitvalue('attr=value') --> 'attr', 'value'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001012 global _valueprog
1013 if _valueprog is None:
1014 import re
1015 _valueprog = re.compile('^([^=]*)=(.*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +00001016
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001017 match = _valueprog.match(attr)
1018 if match: return match.group(1, 2)
1019 return attr, None
Guido van Rossum7c395db1994-07-04 22:14:49 +00001020
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001021def splitgophertype(selector):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001022 """splitgophertype('/Xselector') --> 'X', 'selector'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001023 if selector[:1] == '/' and selector[1:2]:
1024 return selector[1], selector[2:]
1025 return None, selector
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001026
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001027def unquote(s):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001028 """unquote('abc%20def') -> 'abc def'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001029 mychr = chr
Guido van Rossumb2493f82000-12-15 15:01:37 +00001030 myatoi = int
1031 list = s.split('%')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001032 res = [list[0]]
1033 myappend = res.append
1034 del list[0]
1035 for item in list:
1036 if item[1:2]:
1037 try:
1038 myappend(mychr(myatoi(item[:2], 16))
1039 + item[2:])
Martin v. Löwis58682b72001-08-11 15:02:57 +00001040 except ValueError:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001041 myappend('%' + item)
1042 else:
1043 myappend('%' + item)
Guido van Rossumb2493f82000-12-15 15:01:37 +00001044 return "".join(res)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001045
Guido van Rossum0564e121996-12-13 14:47:36 +00001046def unquote_plus(s):
Skip Montanaro79f1c172000-08-22 03:00:52 +00001047 """unquote('%7e/abc+def') -> '~/abc def'"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001048 if '+' in s:
1049 # replace '+' with ' '
Guido van Rossumb2493f82000-12-15 15:01:37 +00001050 s = ' '.join(s.split('+'))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001051 return unquote(s)
Guido van Rossum0564e121996-12-13 14:47:36 +00001052
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001053always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
Jeremy Hylton6102e292000-08-31 15:48:10 +00001054 'abcdefghijklmnopqrstuvwxyz'
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001055 '0123456789' '_.-')
1056
1057_fast_safe_test = always_safe + '/'
1058_fast_safe = None
1059
1060def _fast_quote(s):
1061 global _fast_safe
1062 if _fast_safe is None:
1063 _fast_safe = {}
1064 for c in _fast_safe_test:
1065 _fast_safe[c] = c
1066 res = list(s)
1067 for i in range(len(res)):
1068 c = res[i]
Raymond Hettinger54f02222002-06-01 14:18:47 +00001069 if not c in _fast_safe:
Guido van Rossume27a7b82001-01-19 03:28:15 +00001070 res[i] = '%%%02X' % ord(c)
Guido van Rossumb2493f82000-12-15 15:01:37 +00001071 return ''.join(res)
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001072
Guido van Rossum7c395db1994-07-04 22:14:49 +00001073def quote(s, safe = '/'):
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001074 """quote('abc def') -> 'abc%20def'
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001075
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001076 Each part of a URL, e.g. the path info, the query, etc., has a
1077 different set of reserved characters that must be quoted.
1078
1079 RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
1080 the following reserved characters.
1081
1082 reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
1083 "$" | ","
1084
1085 Each of these characters is reserved in some component of a URL,
1086 but not necessarily in all of them.
1087
1088 By default, the quote function is intended for quoting the path
1089 section of a URL. Thus, it will not encode '/'. This character
1090 is reserved, but in typical usage the quote function is being
1091 called on a path where the existing slash characters are used as
1092 reserved characters.
1093 """
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001094 safe = always_safe + safe
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001095 if _fast_safe_test == safe:
1096 return _fast_quote(s)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001097 res = list(s)
1098 for i in range(len(res)):
1099 c = res[i]
1100 if c not in safe:
Guido van Rossume27a7b82001-01-19 03:28:15 +00001101 res[i] = '%%%02X' % ord(c)
Guido van Rossumb2493f82000-12-15 15:01:37 +00001102 return ''.join(res)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001103
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001104def quote_plus(s, safe = ''):
1105 """Quote the query fragment of a URL; replacing ' ' with '+'"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001106 if ' ' in s:
Guido van Rossumb2493f82000-12-15 15:01:37 +00001107 l = s.split(' ')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001108 for i in range(len(l)):
1109 l[i] = quote(l[i], safe)
Guido van Rossumb2493f82000-12-15 15:01:37 +00001110 return '+'.join(l)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001111 else:
1112 return quote(s, safe)
Guido van Rossum0564e121996-12-13 14:47:36 +00001113
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001114def urlencode(query,doseq=0):
1115 """Encode a sequence of two-element tuples or dictionary into a URL query string.
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001116
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001117 If any values in the query arg are sequences and doseq is true, each
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001118 sequence element is converted to a separate parameter.
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001119
1120 If the query arg is a sequence of two-element tuples, the order of the
1121 parameters in the output will match the order of parameters in the
1122 input.
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001123 """
Tim Peters658cba62001-02-09 20:06:00 +00001124
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001125 if hasattr(query,"items"):
1126 # mapping objects
1127 query = query.items()
1128 else:
1129 # it's a bother at times that strings and string-like objects are
1130 # sequences...
1131 try:
1132 # non-sequence items should not work with len()
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001133 # non-empty strings will fail this
Walter Dörwald65230a22002-06-03 15:58:32 +00001134 if len(query) and not isinstance(query[0], tuple):
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001135 raise TypeError
1136 # zero-length sequences of all types will get here and succeed,
1137 # but that's a minor nit - since the original implementation
1138 # allowed empty dicts that type of behavior probably should be
1139 # preserved for consistency
1140 except TypeError:
1141 ty,va,tb = sys.exc_info()
1142 raise TypeError, "not a valid non-string sequence or mapping object", tb
1143
Guido van Rossume7b146f2000-02-04 15:28:42 +00001144 l = []
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001145 if not doseq:
1146 # preserve old behavior
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001147 for k, v in query:
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001148 k = quote_plus(str(k))
1149 v = quote_plus(str(v))
1150 l.append(k + '=' + v)
1151 else:
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001152 for k, v in query:
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001153 k = quote_plus(str(k))
Walter Dörwald65230a22002-06-03 15:58:32 +00001154 if isinstance(v, str):
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001155 v = quote_plus(v)
1156 l.append(k + '=' + v)
Guido van Rossum4b46c0a2002-05-24 17:58:05 +00001157 elif _is_unicode(v):
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001158 # is there a reasonable way to convert to ASCII?
1159 # encode generates a string, but "replace" or "ignore"
1160 # lose information and "strict" can raise UnicodeError
1161 v = quote_plus(v.encode("ASCII","replace"))
1162 l.append(k + '=' + v)
1163 else:
1164 try:
1165 # is this a sufficient test for sequence-ness?
1166 x = len(v)
1167 except TypeError:
1168 # not a sequence
1169 v = quote_plus(str(v))
1170 l.append(k + '=' + v)
1171 else:
1172 # loop over the sequence
1173 for elt in v:
1174 l.append(k + '=' + quote_plus(str(elt)))
Guido van Rossumb2493f82000-12-15 15:01:37 +00001175 return '&'.join(l)
Guido van Rossum810a3391998-07-22 21:33:23 +00001176
Guido van Rossum442e7201996-03-20 15:33:11 +00001177# Proxy handling
Mark Hammond4f570b92000-07-26 07:04:38 +00001178def getproxies_environment():
1179 """Return a dictionary of scheme -> proxy server URL mappings.
1180
1181 Scan the environment for variables named <scheme>_proxy;
1182 this seems to be the standard convention. If you need a
1183 different way, you can pass a proxies dictionary to the
1184 [Fancy]URLopener constructor.
1185
1186 """
1187 proxies = {}
1188 for name, value in os.environ.items():
Guido van Rossumb2493f82000-12-15 15:01:37 +00001189 name = name.lower()
Mark Hammond4f570b92000-07-26 07:04:38 +00001190 if value and name[-6:] == '_proxy':
1191 proxies[name[:-6]] = value
1192 return proxies
1193
Guido van Rossum4163e701998-08-06 13:39:09 +00001194if os.name == 'mac':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001195 def getproxies():
1196 """Return a dictionary of scheme -> proxy server URL mappings.
Guido van Rossum442e7201996-03-20 15:33:11 +00001197
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001198 By convention the mac uses Internet Config to store
1199 proxies. An HTTP proxy, for instance, is stored under
1200 the HttpProxy key.
Guido van Rossum442e7201996-03-20 15:33:11 +00001201
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001202 """
1203 try:
1204 import ic
1205 except ImportError:
1206 return {}
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001207
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001208 try:
1209 config = ic.IC()
1210 except ic.error:
1211 return {}
1212 proxies = {}
1213 # HTTP:
Raymond Hettinger54f02222002-06-01 14:18:47 +00001214 if 'UseHTTPProxy' in config and config['UseHTTPProxy']:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001215 try:
1216 value = config['HTTPProxyHost']
1217 except ic.error:
1218 pass
1219 else:
1220 proxies['http'] = 'http://%s' % value
1221 # FTP: XXXX To be done.
1222 # Gopher: XXXX To be done.
1223 return proxies
Mark Hammond4f570b92000-07-26 07:04:38 +00001224
Tim Peters55c12d42001-08-09 18:04:14 +00001225 def proxy_bypass(x):
1226 return 0
1227
Mark Hammond4f570b92000-07-26 07:04:38 +00001228elif os.name == 'nt':
1229 def getproxies_registry():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001230 """Return a dictionary of scheme -> proxy server URL mappings.
Mark Hammond4f570b92000-07-26 07:04:38 +00001231
1232 Win32 uses the registry to store proxies.
1233
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001234 """
1235 proxies = {}
Mark Hammond4f570b92000-07-26 07:04:38 +00001236 try:
1237 import _winreg
1238 except ImportError:
1239 # Std module, so should be around - but you never know!
1240 return proxies
1241 try:
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001242 internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1243 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
Mark Hammond4f570b92000-07-26 07:04:38 +00001244 proxyEnable = _winreg.QueryValueEx(internetSettings,
1245 'ProxyEnable')[0]
1246 if proxyEnable:
1247 # Returned as Unicode but problems if not converted to ASCII
1248 proxyServer = str(_winreg.QueryValueEx(internetSettings,
1249 'ProxyServer')[0])
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001250 if '=' in proxyServer:
1251 # Per-protocol settings
Mark Hammond4f570b92000-07-26 07:04:38 +00001252 for p in proxyServer.split(';'):
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001253 protocol, address = p.split('=', 1)
Guido van Rossumb955d6c2002-03-31 23:38:48 +00001254 # See if address has a type:// prefix
Guido van Rossum64e5aa92002-04-02 14:38:16 +00001255 import re
1256 if not re.match('^([^/:]+)://', address):
Guido van Rossumb955d6c2002-03-31 23:38:48 +00001257 address = '%s://%s' % (protocol, address)
1258 proxies[protocol] = address
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001259 else:
1260 # Use one setting for all protocols
1261 if proxyServer[:5] == 'http:':
1262 proxies['http'] = proxyServer
1263 else:
1264 proxies['http'] = 'http://%s' % proxyServer
1265 proxies['ftp'] = 'ftp://%s' % proxyServer
Mark Hammond4f570b92000-07-26 07:04:38 +00001266 internetSettings.Close()
1267 except (WindowsError, ValueError, TypeError):
1268 # Either registry key not found etc, or the value in an
1269 # unexpected format.
1270 # proxies already set up to be empty so nothing to do
1271 pass
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001272 return proxies
Guido van Rossum442e7201996-03-20 15:33:11 +00001273
Mark Hammond4f570b92000-07-26 07:04:38 +00001274 def getproxies():
1275 """Return a dictionary of scheme -> proxy server URL mappings.
1276
1277 Returns settings gathered from the environment, if specified,
1278 or the registry.
1279
1280 """
1281 return getproxies_environment() or getproxies_registry()
Tim Peters55c12d42001-08-09 18:04:14 +00001282
1283 def proxy_bypass(host):
1284 try:
1285 import _winreg
1286 import re
Tim Peters55c12d42001-08-09 18:04:14 +00001287 except ImportError:
1288 # Std modules, so should be around - but you never know!
1289 return 0
1290 try:
1291 internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1292 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1293 proxyEnable = _winreg.QueryValueEx(internetSettings,
1294 'ProxyEnable')[0]
1295 proxyOverride = str(_winreg.QueryValueEx(internetSettings,
1296 'ProxyOverride')[0])
1297 # ^^^^ Returned as Unicode but problems if not converted to ASCII
1298 except WindowsError:
1299 return 0
1300 if not proxyEnable or not proxyOverride:
1301 return 0
1302 # try to make a host list from name and IP address.
1303 host = [host]
1304 try:
1305 addr = socket.gethostbyname(host[0])
1306 if addr != host:
1307 host.append(addr)
1308 except socket.error:
1309 pass
1310 # make a check value list from the registry entry: replace the
1311 # '<local>' string by the localhost entry and the corresponding
1312 # canonical entry.
1313 proxyOverride = proxyOverride.split(';')
1314 i = 0
1315 while i < len(proxyOverride):
1316 if proxyOverride[i] == '<local>':
1317 proxyOverride[i:i+1] = ['localhost',
1318 '127.0.0.1',
1319 socket.gethostname(),
1320 socket.gethostbyname(
1321 socket.gethostname())]
1322 i += 1
1323 # print proxyOverride
1324 # now check if we match one of the registry values.
1325 for test in proxyOverride:
Tim Petersab9ba272001-08-09 21:40:30 +00001326 test = test.replace(".", r"\.") # mask dots
1327 test = test.replace("*", r".*") # change glob sequence
1328 test = test.replace("?", r".") # change glob char
Tim Peters55c12d42001-08-09 18:04:14 +00001329 for val in host:
1330 # print "%s <--> %s" %( test, val )
1331 if re.match(test, val, re.I):
1332 return 1
1333 return 0
1334
Mark Hammond4f570b92000-07-26 07:04:38 +00001335else:
1336 # By default use environment variables
1337 getproxies = getproxies_environment
1338
Tim Peters55c12d42001-08-09 18:04:14 +00001339 def proxy_bypass(host):
1340 return 0
Guido van Rossum442e7201996-03-20 15:33:11 +00001341
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001342# Test and time quote() and unquote()
1343def test1():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001344 s = ''
1345 for i in range(256): s = s + chr(i)
1346 s = s*4
1347 t0 = time.time()
1348 qs = quote(s)
1349 uqs = unquote(qs)
1350 t1 = time.time()
1351 if uqs != s:
1352 print 'Wrong!'
Walter Dörwald70a6b492004-02-12 17:35:32 +00001353 print repr(s)
1354 print repr(qs)
1355 print repr(uqs)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001356 print round(t1 - t0, 3), 'sec'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001357
1358
Guido van Rossum9ab96d41998-09-28 14:07:00 +00001359def reporthook(blocknum, blocksize, totalsize):
1360 # Report during remote transfers
Guido van Rossumb2493f82000-12-15 15:01:37 +00001361 print "Block number: %d, Block size: %d, Total size: %d" % (
1362 blocknum, blocksize, totalsize)
Guido van Rossum9ab96d41998-09-28 14:07:00 +00001363
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001364# Test program
Guido van Rossum23490151998-06-25 02:39:00 +00001365def test(args=[]):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001366 if not args:
1367 args = [
1368 '/etc/passwd',
1369 'file:/etc/passwd',
1370 'file://localhost/etc/passwd',
Andrew M. Kuchling56a42352002-03-18 22:18:46 +00001371 'ftp://ftp.python.org/pub/python/README',
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001372## 'gopher://gopher.micro.umn.edu/1/',
1373 'http://www.python.org/index.html',
1374 ]
Guido van Rossum09c8b6c1999-12-07 21:37:17 +00001375 if hasattr(URLopener, "open_https"):
1376 args.append('https://synergy.as.cmu.edu/~geek/')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001377 try:
1378 for url in args:
1379 print '-'*10, url, '-'*10
1380 fn, h = urlretrieve(url, None, reporthook)
Guido van Rossumb2493f82000-12-15 15:01:37 +00001381 print fn
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001382 if h:
1383 print '======'
1384 for k in h.keys(): print k + ':', h[k]
1385 print '======'
1386 fp = open(fn, 'rb')
1387 data = fp.read()
1388 del fp
1389 if '\r' in data:
1390 table = string.maketrans("", "")
Guido van Rossumb2493f82000-12-15 15:01:37 +00001391 data = data.translate(table, "\r")
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001392 print data
1393 fn, h = None, None
1394 print '-'*40
1395 finally:
1396 urlcleanup()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001397
Guido van Rossum23490151998-06-25 02:39:00 +00001398def main():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001399 import getopt, sys
1400 try:
1401 opts, args = getopt.getopt(sys.argv[1:], "th")
1402 except getopt.error, msg:
1403 print msg
1404 print "Use -h for help"
1405 return
1406 t = 0
1407 for o, a in opts:
1408 if o == '-t':
1409 t = t + 1
1410 if o == '-h':
1411 print "Usage: python urllib.py [-t] [url ...]"
1412 print "-t runs self-test;",
1413 print "otherwise, contents of urls are printed"
1414 return
1415 if t:
1416 if t > 1:
1417 test1()
1418 test(args)
1419 else:
1420 if not args:
1421 print "Use -h for help"
1422 for url in args:
1423 print urlopen(url).read(),
Guido van Rossum23490151998-06-25 02:39:00 +00001424
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001425# Run test program when run as a script
1426if __name__ == '__main__':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001427 main()