blob: d60ac5c2c5670c740dd9544a8a31f1a55554f173 [file] [log] [blame]
Guido van Rossume7b146f2000-02-04 15:28:42 +00001"""Open an arbitrary URL.
2
3See the following document for more info on URLs:
4"Names and Addresses, URIs, URLs, URNs, URCs", at
5http://www.w3.org/pub/WWW/Addressing/Overview.html
6
7See also the HTTP spec (from which the error codes are derived):
8"HTTP - Hypertext Transfer Protocol", at
9http://www.w3.org/pub/WWW/Protocols/
10
11Related standards and specs:
12- RFC1808: the "relative URL" spec. (authoritative status)
13- RFC1738 - the "URL standard". (authoritative status)
14- RFC1630 - the "URI spec". (informational status)
15
16The object returned by URLopener().open(file) will differ per
17protocol. All you know is that is has methods read(), readline(),
18readlines(), fileno(), close() and info(). The read*(), fileno()
Fredrik Lundhb49f88b2000-09-24 18:51:25 +000019and close() methods work like those of open files.
Barry Warsaw820c1202008-06-12 04:06:45 +000020The info() method returns a email.message.Message object which can be
Guido van Rossume7b146f2000-02-04 15:28:42 +000021used to query various info about the object, if available.
Barry Warsaw820c1202008-06-12 04:06:45 +000022(email.message.Message objects provide a dict-like interface.)
Guido van Rossume7b146f2000-02-04 15:28:42 +000023"""
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000024
Georg Brandl24420152008-05-26 16:32:26 +000025import http.client
Barry Warsaw820c1202008-06-12 04:06:45 +000026import email.message
27import email
Jack Jansendc3e3f61995-12-15 13:22:13 +000028import os
Jeremy Hylton5f22af12007-08-16 17:55:18 +000029import socket
Guido van Rossum3c8484e1996-11-20 22:02:24 +000030import sys
Jeremy Hylton5f22af12007-08-16 17:55:18 +000031import time
Brett Cannon69200fa2004-03-23 21:26:39 +000032from urlparse import urljoin as basejoin
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000033
Skip Montanaro40fc1602001-03-01 04:27:19 +000034__all__ = ["urlopen", "URLopener", "FancyURLopener", "urlretrieve",
35 "urlcleanup", "quote", "quote_plus", "unquote", "unquote_plus",
Skip Montanaro44d5e0c2001-03-13 19:47:16 +000036 "urlencode", "url2pathname", "pathname2url", "splittag",
37 "localhost", "thishost", "ftperrors", "basejoin", "unwrap",
38 "splittype", "splithost", "splituser", "splitpasswd", "splitport",
39 "splitnport", "splitquery", "splitattr", "splitvalue",
Guido van Rossumd59da4b2007-05-22 18:11:13 +000040 "getproxies"]
Skip Montanaro40fc1602001-03-01 04:27:19 +000041
Martin v. Löwis3e865952006-01-24 15:51:21 +000042__version__ = '1.17' # XXX This version is not always updated :-(
Guido van Rossumf668d171997-06-06 21:11:11 +000043
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000044MAXFTPCACHE = 10 # Trim the ftp cache beyond this size
Guido van Rossum6cb15a01995-06-22 19:00:13 +000045
Jack Jansendc3e3f61995-12-15 13:22:13 +000046# Helper for non-unix systems
47if os.name == 'mac':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000048 from macurl2path import url2pathname, pathname2url
Guido van Rossum7e7ca0b1998-03-26 21:01:39 +000049elif os.name == 'nt':
Fredrik Lundhb49f88b2000-09-24 18:51:25 +000050 from nturl2path import url2pathname, pathname2url
Jack Jansendc3e3f61995-12-15 13:22:13 +000051else:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000052 def url2pathname(pathname):
Georg Brandlc0b24732005-12-26 22:53:56 +000053 """OS-specific conversion from a relative URL of the 'file' scheme
54 to a file system path; not recommended for general use."""
Guido van Rossum367ac801999-03-12 14:31:10 +000055 return unquote(pathname)
Georg Brandlc0b24732005-12-26 22:53:56 +000056
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000057 def pathname2url(pathname):
Georg Brandlc0b24732005-12-26 22:53:56 +000058 """OS-specific conversion from a file system path to a relative URL
59 of the 'file' scheme; not recommended for general use."""
Guido van Rossum367ac801999-03-12 14:31:10 +000060 return quote(pathname)
Guido van Rossum33add0a1998-12-18 15:25:22 +000061
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000062# This really consists of two pieces:
63# (1) a class which handles opening of all sorts of URLs
64# (plus assorted utilities etc.)
65# (2) a set of functions for parsing URLs
66# XXX Should these be separated out into different modules?
67
68
69# Shortcut for basic usage
70_urlopener = None
Fred Drakedf6eca72002-04-04 20:41:34 +000071def urlopen(url, data=None, proxies=None):
Skip Montanaro79f1c172000-08-22 03:00:52 +000072 """urlopen(url [, data]) -> open file-like object"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000073 global _urlopener
Fred Drakedf6eca72002-04-04 20:41:34 +000074 if proxies is not None:
75 opener = FancyURLopener(proxies=proxies)
76 elif not _urlopener:
77 opener = FancyURLopener()
78 _urlopener = opener
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000079 else:
Fred Drakedf6eca72002-04-04 20:41:34 +000080 opener = _urlopener
81 if data is None:
82 return opener.open(url)
83 else:
84 return opener.open(url, data)
Jeremy Hylton39b198d2007-08-04 19:22:00 +000085
Fred Drake316a7932000-08-24 01:01:26 +000086def urlretrieve(url, filename=None, reporthook=None, data=None):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000087 global _urlopener
88 if not _urlopener:
89 _urlopener = FancyURLopener()
Fred Drake316a7932000-08-24 01:01:26 +000090 return _urlopener.retrieve(url, filename, reporthook, data)
Jeremy Hylton39b198d2007-08-04 19:22:00 +000091
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000092def urlcleanup():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000093 if _urlopener:
94 _urlopener.cleanup()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000095
Thomas Wouters47b49bf2007-08-30 22:15:33 +000096# check for SSL
97try:
98 import ssl
99except:
100 _have_ssl = False
101else:
102 _have_ssl = True
103
Georg Brandlb9256022005-08-24 18:46:39 +0000104# exception raised when downloaded size does not match content-length
105class ContentTooShortError(IOError):
106 def __init__(self, message, content):
107 IOError.__init__(self, message)
108 self.content = content
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000109
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000110ftpcache = {}
111class URLopener:
Guido van Rossume7b146f2000-02-04 15:28:42 +0000112 """Class to open URLs.
113 This is a class rather than just a subroutine because we may need
114 more than one set of global protocol-specific options.
115 Note -- this is a base class for those who don't want the
116 automatic handling of errors type 302 (relocated) and 401
117 (authorization needed)."""
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000118
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000119 __tempfiles = None
Guido van Rossum29e77811996-11-27 19:39:58 +0000120
Guido van Rossumba311382000-08-24 16:18:04 +0000121 version = "Python-urllib/%s" % __version__
122
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000123 # Constructor
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000124 def __init__(self, proxies=None, **x509):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000125 if proxies is None:
126 proxies = getproxies()
Guido van Rossume2b70bc2006-08-18 22:13:04 +0000127 assert hasattr(proxies, 'keys'), "proxies must be a mapping"
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000128 self.proxies = proxies
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000129 self.key_file = x509.get('key_file')
130 self.cert_file = x509.get('cert_file')
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000131 self.addheaders = [('User-Agent', self.version)]
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000132 self.__tempfiles = []
133 self.__unlink = os.unlink # See cleanup()
134 self.tempcache = None
135 # Undocumented feature: if you assign {} to tempcache,
136 # it is used to cache files retrieved with
137 # self.retrieve(). This is not enabled by default
138 # since it does not work for changing documents (and I
139 # haven't got the logic to check expiration headers
140 # yet).
141 self.ftpcache = ftpcache
142 # Undocumented feature: you can use a different
143 # ftp cache by assigning to the .ftpcache member;
144 # in case you want logically independent URL openers
145 # XXX This is not threadsafe. Bah.
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000146
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000147 def __del__(self):
148 self.close()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000149
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000150 def close(self):
151 self.cleanup()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000152
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000153 def cleanup(self):
154 # This code sometimes runs when the rest of this module
155 # has already been deleted, so it can't use any globals
156 # or import anything.
157 if self.__tempfiles:
158 for file in self.__tempfiles:
159 try:
160 self.__unlink(file)
Martin v. Löwis58682b72001-08-11 15:02:57 +0000161 except OSError:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000162 pass
163 del self.__tempfiles[:]
164 if self.tempcache:
165 self.tempcache.clear()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000166
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000167 def addheader(self, *args):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000168 """Add a header to be used by the HTTP interface only
169 e.g. u.addheader('Accept', 'sound/basic')"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000170 self.addheaders.append(args)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000171
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000172 # External interface
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000173 def open(self, fullurl, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000174 """Use URLopener().open(file) instead of open(file, 'r')."""
Martin v. Löwis1d994332000-12-03 18:30:10 +0000175 fullurl = unwrap(toBytes(fullurl))
Raymond Hettinger54f02222002-06-01 14:18:47 +0000176 if self.tempcache and fullurl in self.tempcache:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000177 filename, headers = self.tempcache[fullurl]
178 fp = open(filename, 'rb')
179 return addinfourl(fp, headers, fullurl)
Martin v. Löwis1d994332000-12-03 18:30:10 +0000180 urltype, url = splittype(fullurl)
181 if not urltype:
182 urltype = 'file'
Raymond Hettinger54f02222002-06-01 14:18:47 +0000183 if urltype in self.proxies:
Martin v. Löwis1d994332000-12-03 18:30:10 +0000184 proxy = self.proxies[urltype]
185 urltype, proxyhost = splittype(proxy)
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000186 host, selector = splithost(proxyhost)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000187 url = (host, fullurl) # Signal special case to open_*()
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000188 else:
189 proxy = None
Martin v. Löwis1d994332000-12-03 18:30:10 +0000190 name = 'open_' + urltype
191 self.type = urltype
Brett Cannonaaeffaf2004-03-23 23:50:17 +0000192 name = name.replace('-', '_')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000193 if not hasattr(self, name):
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000194 if proxy:
195 return self.open_unknown_proxy(proxy, fullurl, data)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000196 else:
197 return self.open_unknown(fullurl, data)
198 try:
199 if data is None:
200 return getattr(self, name)(url)
201 else:
202 return getattr(self, name)(url, data)
Guido van Rossumb940e112007-01-10 16:19:56 +0000203 except socket.error as msg:
Collin Winter828f04a2007-08-31 00:04:24 +0000204 raise IOError('socket error', msg).with_traceback(sys.exc_info()[2])
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000205
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000206 def open_unknown(self, fullurl, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000207 """Overridable interface to open unknown URL type."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000208 type, url = splittype(fullurl)
Collin Winterce36ad82007-08-30 01:19:48 +0000209 raise IOError('url error', 'unknown url type', type)
Guido van Rossumca445401995-08-29 19:19:12 +0000210
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000211 def open_unknown_proxy(self, proxy, fullurl, data=None):
212 """Overridable interface to open unknown URL type."""
213 type, url = splittype(fullurl)
Collin Winterce36ad82007-08-30 01:19:48 +0000214 raise IOError('url error', 'invalid proxy for %s' % type, proxy)
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000215
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000216 # External interface
Sjoerd Mullenderd7b86f02000-08-25 11:23:36 +0000217 def retrieve(self, url, filename=None, reporthook=None, data=None):
Brett Cannon7d618c72003-04-24 02:43:20 +0000218 """retrieve(url) returns (filename, headers) for a local object
Guido van Rossume7b146f2000-02-04 15:28:42 +0000219 or (tempfilename, headers) for a remote object."""
Martin v. Löwis1d994332000-12-03 18:30:10 +0000220 url = unwrap(toBytes(url))
Raymond Hettinger54f02222002-06-01 14:18:47 +0000221 if self.tempcache and url in self.tempcache:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000222 return self.tempcache[url]
223 type, url1 = splittype(url)
Raymond Hettinger10ff7062002-06-02 03:04:52 +0000224 if filename is None and (not type or type == 'file'):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000225 try:
226 fp = self.open_local_file(url1)
227 hdrs = fp.info()
228 del fp
229 return url2pathname(splithost(url1)[1]), hdrs
Guido van Rossumb940e112007-01-10 16:19:56 +0000230 except IOError as msg:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000231 pass
Fred Drake316a7932000-08-24 01:01:26 +0000232 fp = self.open(url, data)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000233 headers = fp.info()
Guido van Rossum3b0a3292002-08-09 16:38:32 +0000234 if filename:
235 tfp = open(filename, 'wb')
236 else:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000237 import tempfile
238 garbage, path = splittype(url)
239 garbage, path = splithost(path or "")
240 path, garbage = splitquery(path or "")
241 path, garbage = splitattr(path or "")
242 suffix = os.path.splitext(path)[1]
Guido van Rossum3b0a3292002-08-09 16:38:32 +0000243 (fd, filename) = tempfile.mkstemp(suffix)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000244 self.__tempfiles.append(filename)
Jeremy Hylton3bd6fde2002-10-11 14:36:24 +0000245 tfp = os.fdopen(fd, 'wb')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000246 result = filename, headers
247 if self.tempcache is not None:
248 self.tempcache[url] = result
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000249 bs = 1024*8
250 size = -1
Georg Brandlb9256022005-08-24 18:46:39 +0000251 read = 0
Georg Brandl5a650a22005-08-26 08:51:34 +0000252 blocknum = 0
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000253 if reporthook:
Raymond Hettinger54f02222002-06-01 14:18:47 +0000254 if "content-length" in headers:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000255 size = int(headers["Content-Length"])
Georg Brandl5a650a22005-08-26 08:51:34 +0000256 reporthook(blocknum, bs, size)
257 while 1:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000258 block = fp.read(bs)
Guido van Rossuma0982942007-07-10 08:30:03 +0000259 if not block:
Georg Brandl5a650a22005-08-26 08:51:34 +0000260 break
Georg Brandlb9256022005-08-24 18:46:39 +0000261 read += len(block)
Georg Brandl5a650a22005-08-26 08:51:34 +0000262 tfp.write(block)
Georg Brandlb9256022005-08-24 18:46:39 +0000263 blocknum += 1
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000264 if reporthook:
265 reporthook(blocknum, bs, size)
266 fp.close()
267 tfp.close()
268 del fp
269 del tfp
Georg Brandlb9256022005-08-24 18:46:39 +0000270
271 # raise exception if actual size does not match content-length header
272 if size >= 0 and read < size:
273 raise ContentTooShortError("retrieval incomplete: got only %i out "
274 "of %i bytes" % (read, size), result)
275
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000276 return result
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000277
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000278 # Each method named open_<type> knows how to open that type of URL
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000279
Jeremy Hylton5f22af12007-08-16 17:55:18 +0000280 def _open_generic_http(self, connection_factory, url, data):
281 """Make an HTTP connection using connection_class.
282
283 This is an internal method that should be called from
284 open_http() or open_https().
285
286 Arguments:
287 - connection_factory should take a host name and return an
288 HTTPConnection instance.
289 - url is the url to retrieval or a host, relative-path pair.
290 - data is payload for a POST request or None.
291 """
292
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000293 user_passwd = None
Martin v. Löwis3e865952006-01-24 15:51:21 +0000294 proxy_passwd= None
Walter Dörwald65230a22002-06-03 15:58:32 +0000295 if isinstance(url, str):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000296 host, selector = splithost(url)
297 if host:
298 user_passwd, host = splituser(host)
299 host = unquote(host)
300 realhost = host
301 else:
302 host, selector = url
Martin v. Löwis3e865952006-01-24 15:51:21 +0000303 # check whether the proxy contains authorization information
304 proxy_passwd, host = splituser(host)
305 # now we proceed with the url we want to obtain
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000306 urltype, rest = splittype(selector)
307 url = rest
308 user_passwd = None
Guido van Rossumb2493f82000-12-15 15:01:37 +0000309 if urltype.lower() != 'http':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000310 realhost = None
311 else:
312 realhost, rest = splithost(rest)
313 if realhost:
314 user_passwd, realhost = splituser(realhost)
315 if user_passwd:
316 selector = "%s://%s%s" % (urltype, realhost, rest)
Tim Peters55c12d42001-08-09 18:04:14 +0000317 if proxy_bypass(realhost):
318 host = realhost
319
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000320 #print "proxy via http:", host, selector
Collin Winterce36ad82007-08-30 01:19:48 +0000321 if not host: raise IOError('http error', 'no host given')
Tim Peters92037a12006-01-24 22:44:08 +0000322
Martin v. Löwis3e865952006-01-24 15:51:21 +0000323 if proxy_passwd:
324 import base64
Thomas Wouters89f507f2006-12-13 04:49:30 +0000325 proxy_auth = base64.b64encode(proxy_passwd).strip()
Martin v. Löwis3e865952006-01-24 15:51:21 +0000326 else:
327 proxy_auth = None
328
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000329 if user_passwd:
330 import base64
Thomas Wouters89f507f2006-12-13 04:49:30 +0000331 auth = base64.b64encode(user_passwd).strip()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000332 else:
333 auth = None
Jeremy Hylton5f22af12007-08-16 17:55:18 +0000334 http_conn = connection_factory(host)
Jeremy Hylton39b198d2007-08-04 19:22:00 +0000335 # XXX We should fix urllib so that it works with HTTP/1.1.
336 http_conn._http_vsn = 10
337 http_conn._http_vsn_str = "HTTP/1.0"
338
339 headers = {}
340 if proxy_auth:
341 headers["Proxy-Authorization"] = "Basic %s" % proxy_auth
342 if auth:
343 headers["Authorization"] = "Basic %s" % auth
344 if realhost:
345 headers["Host"] = realhost
346 for header, value in self.addheaders:
347 headers[header] = value
348
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000349 if data is not None:
Jeremy Hylton39b198d2007-08-04 19:22:00 +0000350 headers["Content-Type"] = "application/x-www-form-urlencoded"
351 http_conn.request("POST", selector, data, headers)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000352 else:
Jeremy Hylton39b198d2007-08-04 19:22:00 +0000353 http_conn.request("GET", selector, headers=headers)
354
355 try:
356 response = http_conn.getresponse()
Georg Brandl24420152008-05-26 16:32:26 +0000357 except http.client.BadStatusLine:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000358 # something went wrong with the HTTP status line
Jeremy Hylton39b198d2007-08-04 19:22:00 +0000359 raise IOError('http protocol error', 0,
360 'got a bad status line', None)
361
Thomas Woutersce272b62007-09-19 21:19:28 +0000362 # According to RFC 2616, "2xx" code indicates that the client's
363 # request was successfully received, understood, and accepted.
Christian Heimes57dddfb2008-01-02 18:30:52 +0000364 if (200 <= response.status < 300):
Georg Brandle3777b42008-01-26 11:23:13 +0000365 return addinfourl(response.fp, response.msg, "http:" + url,
366 response.status)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000367 else:
Jeremy Hylton39b198d2007-08-04 19:22:00 +0000368 return self.http_error(
369 url, response.fp,
370 response.status, response.reason, response.msg, data)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000371
Jeremy Hylton5f22af12007-08-16 17:55:18 +0000372 def open_http(self, url, data=None):
373 """Use HTTP protocol."""
Georg Brandl24420152008-05-26 16:32:26 +0000374 return self._open_generic_http(http.client.HTTPConnection, url, data)
Jeremy Hylton5f22af12007-08-16 17:55:18 +0000375
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000376 def http_error(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000377 """Handle http errors.
Jeremy Hylton39b198d2007-08-04 19:22:00 +0000378
Guido van Rossume7b146f2000-02-04 15:28:42 +0000379 Derived class can override this, or provide specific handlers
380 named http_error_DDD where DDD is the 3-digit error code."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000381 # First check if there's a specific handler for this error
382 name = 'http_error_%d' % errcode
383 if hasattr(self, name):
384 method = getattr(self, name)
385 if data is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000386 result = method(url, fp, errcode, errmsg, headers)
Jeremy Hyltonb30f52a1999-02-25 16:14:58 +0000387 else:
388 result = method(url, fp, errcode, errmsg, headers, data)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000389 if result: return result
Jeremy Hyltonb30f52a1999-02-25 16:14:58 +0000390 return self.http_error_default(url, fp, errcode, errmsg, headers)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000391
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000392 def http_error_default(self, url, fp, errcode, errmsg, headers):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000393 """Default error handler: close the connection and raise IOError."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000394 void = fp.read()
395 fp.close()
Collin Winterce36ad82007-08-30 01:19:48 +0000396 raise IOError('http error', errcode, errmsg, headers)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000397
Thomas Wouters47b49bf2007-08-30 22:15:33 +0000398 if _have_ssl:
Jeremy Hylton5f22af12007-08-16 17:55:18 +0000399 def _https_connection(self, host):
Georg Brandl24420152008-05-26 16:32:26 +0000400 return http.client.HTTPSConnection(host,
401 key_file=self.key_file,
402 cert_file=self.cert_file)
Jeremy Hylton5f22af12007-08-16 17:55:18 +0000403
Andrew M. Kuchling141e9892000-04-23 02:53:11 +0000404 def open_https(self, url, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000405 """Use HTTPS protocol."""
Jeremy Hylton5f22af12007-08-16 17:55:18 +0000406 return self._open_generic_http(self._https_connection, url, data)
Fred Drake567ca8e2000-08-21 21:42:42 +0000407
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000408 def open_file(self, url):
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000409 """Use local file or FTP depending on form of URL."""
Martin v. Löwis3e865952006-01-24 15:51:21 +0000410 if not isinstance(url, str):
Collin Winterce36ad82007-08-30 01:19:48 +0000411 raise IOError('file error', 'proxy support for file protocol currently not implemented')
Jack Jansen4ef11032002-09-12 20:14:04 +0000412 if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000413 return self.open_ftp(url)
414 else:
415 return self.open_local_file(url)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000416
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000417 def open_local_file(self, url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000418 """Use local file."""
Barry Warsaw820c1202008-06-12 04:06:45 +0000419 import mimetypes, email.utils
Guido van Rossumf0713d32001-08-09 17:43:35 +0000420 host, file = splithost(url)
421 localname = url2pathname(file)
Guido van Rossuma2da3052002-04-15 00:25:01 +0000422 try:
423 stats = os.stat(localname)
Guido van Rossumb940e112007-01-10 16:19:56 +0000424 except OSError as e:
Guido van Rossuma2da3052002-04-15 00:25:01 +0000425 raise IOError(e.errno, e.strerror, e.filename)
Walter Dörwald92b48b72002-03-22 17:30:38 +0000426 size = stats.st_size
Neal Norwitz315d8452007-08-30 03:06:59 +0000427 modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000428 mtype = mimetypes.guess_type(url)[0]
Barry Warsaw820c1202008-06-12 04:06:45 +0000429 headers = email.message_from_string(
Guido van Rossumf0713d32001-08-09 17:43:35 +0000430 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
Barry Warsaw820c1202008-06-12 04:06:45 +0000431 (mtype or 'text/plain', size, modified))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000432 if not host:
Guido van Rossum336a2011999-06-24 15:27:36 +0000433 urlfile = file
434 if file[:1] == '/':
435 urlfile = 'file://' + file
Guido van Rossumf0713d32001-08-09 17:43:35 +0000436 return addinfourl(open(localname, 'rb'),
Guido van Rossum336a2011999-06-24 15:27:36 +0000437 headers, urlfile)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000438 host, port = splitport(host)
439 if not port \
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000440 and socket.gethostbyname(host) in (localhost(), thishost()):
Guido van Rossum336a2011999-06-24 15:27:36 +0000441 urlfile = file
442 if file[:1] == '/':
443 urlfile = 'file://' + file
Guido van Rossumf0713d32001-08-09 17:43:35 +0000444 return addinfourl(open(localname, 'rb'),
Guido van Rossum336a2011999-06-24 15:27:36 +0000445 headers, urlfile)
Collin Winterce36ad82007-08-30 01:19:48 +0000446 raise IOError('local file error', 'not on local host')
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000447
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000448 def open_ftp(self, url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000449 """Use FTP protocol."""
Martin v. Löwis3e865952006-01-24 15:51:21 +0000450 if not isinstance(url, str):
Collin Winterce36ad82007-08-30 01:19:48 +0000451 raise IOError('ftp error', 'proxy support for ftp protocol currently not implemented')
Barry Warsaw820c1202008-06-12 04:06:45 +0000452 import mimetypes
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000453 host, path = splithost(url)
Collin Winterce36ad82007-08-30 01:19:48 +0000454 if not host: raise IOError('ftp error', 'no host given')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000455 host, port = splitport(host)
456 user, host = splituser(host)
457 if user: user, passwd = splitpasswd(user)
458 else: passwd = None
459 host = unquote(host)
460 user = unquote(user or '')
461 passwd = unquote(passwd or '')
462 host = socket.gethostbyname(host)
463 if not port:
464 import ftplib
465 port = ftplib.FTP_PORT
466 else:
467 port = int(port)
468 path, attrs = splitattr(path)
469 path = unquote(path)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000470 dirs = path.split('/')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000471 dirs, file = dirs[:-1], dirs[-1]
472 if dirs and not dirs[0]: dirs = dirs[1:]
Guido van Rossum5e006a31999-08-18 17:40:33 +0000473 if dirs and not dirs[0]: dirs[0] = '/'
Guido van Rossumb2493f82000-12-15 15:01:37 +0000474 key = user, host, port, '/'.join(dirs)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000475 # XXX thread unsafe!
476 if len(self.ftpcache) > MAXFTPCACHE:
477 # Prune the cache, rather arbitrarily
478 for k in self.ftpcache.keys():
479 if k != key:
480 v = self.ftpcache[k]
481 del self.ftpcache[k]
482 v.close()
483 try:
Raymond Hettinger54f02222002-06-01 14:18:47 +0000484 if not key in self.ftpcache:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000485 self.ftpcache[key] = \
486 ftpwrapper(user, passwd, host, port, dirs)
487 if not file: type = 'D'
488 else: type = 'I'
489 for attr in attrs:
490 attr, value = splitvalue(attr)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000491 if attr.lower() == 'type' and \
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000492 value in ('a', 'A', 'i', 'I', 'd', 'D'):
Guido van Rossumb2493f82000-12-15 15:01:37 +0000493 type = value.upper()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000494 (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
Guido van Rossum88e0b5b2001-08-23 13:38:15 +0000495 mtype = mimetypes.guess_type("ftp:" + url)[0]
496 headers = ""
497 if mtype:
498 headers += "Content-Type: %s\n" % mtype
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000499 if retrlen is not None and retrlen >= 0:
Guido van Rossum88e0b5b2001-08-23 13:38:15 +0000500 headers += "Content-Length: %d\n" % retrlen
Barry Warsaw820c1202008-06-12 04:06:45 +0000501 headers = email.message_from_string(headers)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000502 return addinfourl(fp, headers, "ftp:" + url)
Guido van Rossumb940e112007-01-10 16:19:56 +0000503 except ftperrors() as msg:
Collin Winter828f04a2007-08-31 00:04:24 +0000504 raise IOError('ftp error', msg).with_traceback(sys.exc_info()[2])
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000505
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000506 def open_data(self, url, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000507 """Use "data" URL."""
Martin v. Löwis3e865952006-01-24 15:51:21 +0000508 if not isinstance(url, str):
Collin Winterce36ad82007-08-30 01:19:48 +0000509 raise IOError('data error', 'proxy support for data protocol currently not implemented')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000510 # ignore POSTed data
511 #
512 # syntax of data URLs:
513 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
514 # mediatype := [ type "/" subtype ] *( ";" parameter )
515 # data := *urlchar
516 # parameter := attribute "=" value
Guido van Rossum68937b42007-05-18 00:51:22 +0000517 from io import StringIO
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000518 try:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000519 [type, data] = url.split(',', 1)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000520 except ValueError:
Collin Winterce36ad82007-08-30 01:19:48 +0000521 raise IOError('data error', 'bad data URL')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000522 if not type:
523 type = 'text/plain;charset=US-ASCII'
Guido van Rossumb2493f82000-12-15 15:01:37 +0000524 semi = type.rfind(';')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000525 if semi >= 0 and '=' not in type[semi:]:
526 encoding = type[semi+1:]
527 type = type[:semi]
528 else:
529 encoding = ''
530 msg = []
531 msg.append('Date: %s'%time.strftime('%a, %d %b %Y %T GMT',
532 time.gmtime(time.time())))
533 msg.append('Content-type: %s' % type)
534 if encoding == 'base64':
535 import base64
536 data = base64.decodestring(data)
537 else:
538 data = unquote(data)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000539 msg.append('Content-Length: %d' % len(data))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000540 msg.append('')
541 msg.append(data)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000542 msg = '\n'.join(msg)
Barry Warsaw820c1202008-06-12 04:06:45 +0000543 headers = email.message_from_string(msg)
Raymond Hettingera6172712004-12-31 19:15:26 +0000544 f = StringIO(msg)
Georg Brandl1f663572005-11-26 16:50:44 +0000545 #f.fileno = None # needed for addinfourl
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000546 return addinfourl(f, headers, url)
Guido van Rossum6d4d1c21998-03-12 14:32:55 +0000547
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000548
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000549class FancyURLopener(URLopener):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000550 """Derived class with handlers for errors we can handle (perhaps)."""
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000551
Neal Norwitz60e04cd2002-06-11 13:38:51 +0000552 def __init__(self, *args, **kwargs):
Guido van Rossum68468eb2003-02-27 20:14:51 +0000553 URLopener.__init__(self, *args, **kwargs)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000554 self.auth_cache = {}
Skip Montanaroc3e11d62001-02-15 16:56:36 +0000555 self.tries = 0
556 self.maxtries = 10
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000557
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000558 def http_error_default(self, url, fp, errcode, errmsg, headers):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000559 """Default error handling -- don't raise an exception."""
Christian Heimes9bd667a2008-01-20 15:14:11 +0000560 return addinfourl(fp, headers, "http:" + url, errcode)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000561
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000562 def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000563 """Error 302 -- relocated (temporarily)."""
Skip Montanaroc3e11d62001-02-15 16:56:36 +0000564 self.tries += 1
565 if self.maxtries and self.tries >= self.maxtries:
566 if hasattr(self, "http_error_500"):
567 meth = self.http_error_500
568 else:
569 meth = self.http_error_default
570 self.tries = 0
571 return meth(url, fp, 500,
572 "Internal Server Error: Redirect Recursion", headers)
573 result = self.redirect_internal(url, fp, errcode, errmsg, headers,
574 data)
575 self.tries = 0
576 return result
577
578 def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
Raymond Hettinger54f02222002-06-01 14:18:47 +0000579 if 'location' in headers:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000580 newurl = headers['location']
Raymond Hettinger54f02222002-06-01 14:18:47 +0000581 elif 'uri' in headers:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000582 newurl = headers['uri']
583 else:
584 return
585 void = fp.read()
586 fp.close()
Guido van Rossum3527f591999-03-29 20:23:41 +0000587 # In case the server sent a relative URL, join with original:
Moshe Zadka5d87d472001-04-09 14:54:21 +0000588 newurl = basejoin(self.type + ":" + url, newurl)
Guido van Rossumfa19f7c2003-05-16 01:46:51 +0000589 return self.open(newurl)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000590
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000591 def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000592 """Error 301 -- also relocated (permanently)."""
593 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
Guido van Rossume6ad8911996-09-10 17:02:56 +0000594
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000595 def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
596 """Error 303 -- also relocated (essentially identical to 302)."""
597 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
598
Guido van Rossumfa19f7c2003-05-16 01:46:51 +0000599 def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
600 """Error 307 -- relocated, but turn POST into error."""
601 if data is None:
602 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
603 else:
604 return self.http_error_default(url, fp, errcode, errmsg, headers)
605
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000606 def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000607 """Error 401 -- authentication required.
Martin v. Löwis3e865952006-01-24 15:51:21 +0000608 This function supports Basic authentication only."""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000609 if not 'www-authenticate' in headers:
Tim Peters85ba6732001-02-28 08:26:44 +0000610 URLopener.http_error_default(self, url, fp,
Fred Drakec680ae82001-10-13 18:37:07 +0000611 errcode, errmsg, headers)
Moshe Zadkae99bd172001-02-27 06:27:04 +0000612 stuff = headers['www-authenticate']
613 import re
614 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
615 if not match:
Tim Peters85ba6732001-02-28 08:26:44 +0000616 URLopener.http_error_default(self, url, fp,
Moshe Zadkae99bd172001-02-27 06:27:04 +0000617 errcode, errmsg, headers)
618 scheme, realm = match.groups()
619 if scheme.lower() != 'basic':
Tim Peters85ba6732001-02-28 08:26:44 +0000620 URLopener.http_error_default(self, url, fp,
Moshe Zadkae99bd172001-02-27 06:27:04 +0000621 errcode, errmsg, headers)
622 name = 'retry_' + self.type + '_basic_auth'
623 if data is None:
624 return getattr(self,name)(url, realm)
625 else:
626 return getattr(self,name)(url, realm, data)
Tim Peters92037a12006-01-24 22:44:08 +0000627
Martin v. Löwis3e865952006-01-24 15:51:21 +0000628 def http_error_407(self, url, fp, errcode, errmsg, headers, data=None):
629 """Error 407 -- proxy authentication required.
630 This function supports Basic authentication only."""
631 if not 'proxy-authenticate' in headers:
632 URLopener.http_error_default(self, url, fp,
633 errcode, errmsg, headers)
634 stuff = headers['proxy-authenticate']
635 import re
636 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
637 if not match:
638 URLopener.http_error_default(self, url, fp,
639 errcode, errmsg, headers)
640 scheme, realm = match.groups()
641 if scheme.lower() != 'basic':
642 URLopener.http_error_default(self, url, fp,
643 errcode, errmsg, headers)
644 name = 'retry_proxy_' + self.type + '_basic_auth'
645 if data is None:
646 return getattr(self,name)(url, realm)
647 else:
648 return getattr(self,name)(url, realm, data)
Tim Peters92037a12006-01-24 22:44:08 +0000649
Martin v. Löwis3e865952006-01-24 15:51:21 +0000650 def retry_proxy_http_basic_auth(self, url, realm, data=None):
651 host, selector = splithost(url)
652 newurl = 'http://' + host + selector
653 proxy = self.proxies['http']
654 urltype, proxyhost = splittype(proxy)
655 proxyhost, proxyselector = splithost(proxyhost)
656 i = proxyhost.find('@') + 1
657 proxyhost = proxyhost[i:]
658 user, passwd = self.get_user_passwd(proxyhost, realm, i)
659 if not (user or passwd): return None
660 proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
661 self.proxies['http'] = 'http://' + proxyhost + proxyselector
662 if data is None:
663 return self.open(newurl)
664 else:
665 return self.open(newurl, data)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000666
Martin v. Löwis3e865952006-01-24 15:51:21 +0000667 def retry_proxy_https_basic_auth(self, url, realm, data=None):
668 host, selector = splithost(url)
669 newurl = 'https://' + host + selector
670 proxy = self.proxies['https']
671 urltype, proxyhost = splittype(proxy)
672 proxyhost, proxyselector = splithost(proxyhost)
673 i = proxyhost.find('@') + 1
674 proxyhost = proxyhost[i:]
675 user, passwd = self.get_user_passwd(proxyhost, realm, i)
676 if not (user or passwd): return None
677 proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
678 self.proxies['https'] = 'https://' + proxyhost + proxyselector
679 if data is None:
680 return self.open(newurl)
681 else:
682 return self.open(newurl, data)
Tim Peters92037a12006-01-24 22:44:08 +0000683
Guido van Rossum3c8baed2000-02-01 23:36:55 +0000684 def retry_http_basic_auth(self, url, realm, data=None):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000685 host, selector = splithost(url)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000686 i = host.find('@') + 1
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000687 host = host[i:]
688 user, passwd = self.get_user_passwd(host, realm, i)
689 if not (user or passwd): return None
Guido van Rossumafc4f042001-01-15 18:31:13 +0000690 host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000691 newurl = 'http://' + host + selector
Guido van Rossum3c8baed2000-02-01 23:36:55 +0000692 if data is None:
693 return self.open(newurl)
694 else:
695 return self.open(newurl, data)
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000696
Guido van Rossum3c8baed2000-02-01 23:36:55 +0000697 def retry_https_basic_auth(self, url, realm, data=None):
Tim Peterse1190062001-01-15 03:34:38 +0000698 host, selector = splithost(url)
699 i = host.find('@') + 1
700 host = host[i:]
701 user, passwd = self.get_user_passwd(host, realm, i)
702 if not (user or passwd): return None
Guido van Rossumafc4f042001-01-15 18:31:13 +0000703 host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
Martin v. Löwis3e865952006-01-24 15:51:21 +0000704 newurl = 'https://' + host + selector
705 if data is None:
706 return self.open(newurl)
707 else:
708 return self.open(newurl, data)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000709
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000710 def get_user_passwd(self, host, realm, clear_cache = 0):
Guido van Rossumb2493f82000-12-15 15:01:37 +0000711 key = realm + '@' + host.lower()
Raymond Hettinger54f02222002-06-01 14:18:47 +0000712 if key in self.auth_cache:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000713 if clear_cache:
714 del self.auth_cache[key]
715 else:
716 return self.auth_cache[key]
717 user, passwd = self.prompt_user_passwd(host, realm)
718 if user or passwd: self.auth_cache[key] = (user, passwd)
719 return user, passwd
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000720
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000721 def prompt_user_passwd(self, host, realm):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000722 """Override this in a GUI environment!"""
Guido van Rossum7cba8502007-03-19 22:23:59 +0000723 import getpass
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000724 try:
Guido van Rossum7cba8502007-03-19 22:23:59 +0000725 user = input("Enter username for %s at %s: " % (realm, host))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000726 passwd = getpass.getpass("Enter password for %s in %s at %s: " %
727 (user, realm, host))
728 return user, passwd
729 except KeyboardInterrupt:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000730 print()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000731 return None, None
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000732
733
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000734# Utility functions
735
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000736_localhost = None
737def localhost():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000738 """Return the IP address of the magic hostname 'localhost'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000739 global _localhost
Raymond Hettinger10ff7062002-06-02 03:04:52 +0000740 if _localhost is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000741 _localhost = socket.gethostbyname('localhost')
742 return _localhost
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000743
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000744_thishost = None
745def thishost():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000746 """Return the IP address of the current host."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000747 global _thishost
Raymond Hettinger10ff7062002-06-02 03:04:52 +0000748 if _thishost is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000749 _thishost = socket.gethostbyname(socket.gethostname())
750 return _thishost
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000751
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000752_ftperrors = None
753def ftperrors():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000754 """Return the set of errors raised by the FTP class."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000755 global _ftperrors
Raymond Hettinger10ff7062002-06-02 03:04:52 +0000756 if _ftperrors is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000757 import ftplib
758 _ftperrors = ftplib.all_errors
759 return _ftperrors
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000760
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000761_noheaders = None
762def noheaders():
Barry Warsaw820c1202008-06-12 04:06:45 +0000763 """Return an empty email.message.Message object."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000764 global _noheaders
Raymond Hettinger10ff7062002-06-02 03:04:52 +0000765 if _noheaders is None:
Barry Warsaw820c1202008-06-12 04:06:45 +0000766 _noheaders = email.message.Message()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000767 return _noheaders
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000768
769
770# Utility classes
771
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000772class ftpwrapper:
Guido van Rossume7b146f2000-02-04 15:28:42 +0000773 """Class used by open_ftp() for cache of open FTP connections."""
774
Georg Brandlf78e02b2008-06-10 17:40:04 +0000775 def __init__(self, user, passwd, host, port, dirs,
776 timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000777 self.user = user
778 self.passwd = passwd
779 self.host = host
780 self.port = port
781 self.dirs = dirs
Guido van Rossume7ba4952007-06-06 23:52:48 +0000782 self.timeout = timeout
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000783 self.init()
Guido van Rossume7b146f2000-02-04 15:28:42 +0000784
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000785 def init(self):
786 import ftplib
787 self.busy = 0
788 self.ftp = ftplib.FTP()
Guido van Rossume7ba4952007-06-06 23:52:48 +0000789 self.ftp.connect(self.host, self.port, self.timeout)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000790 self.ftp.login(self.user, self.passwd)
791 for dir in self.dirs:
792 self.ftp.cwd(dir)
Guido van Rossume7b146f2000-02-04 15:28:42 +0000793
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000794 def retrfile(self, file, type):
795 import ftplib
796 self.endtransfer()
797 if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
798 else: cmd = 'TYPE ' + type; isdir = 0
799 try:
800 self.ftp.voidcmd(cmd)
801 except ftplib.all_errors:
802 self.init()
803 self.ftp.voidcmd(cmd)
804 conn = None
805 if file and not isdir:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000806 # Try to retrieve as a file
807 try:
808 cmd = 'RETR ' + file
809 conn = self.ftp.ntransfercmd(cmd)
Guido van Rossumb940e112007-01-10 16:19:56 +0000810 except ftplib.error_perm as reason:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000811 if str(reason)[:3] != '550':
Collin Winter828f04a2007-08-31 00:04:24 +0000812 raise IOError('ftp error', reason).with_traceback(sys.exc_info()[2])
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000813 if not conn:
814 # Set transfer mode to ASCII!
815 self.ftp.voidcmd('TYPE A')
Christian Heimes9bd667a2008-01-20 15:14:11 +0000816 # Try a directory listing. Verify that directory exists.
817 if file:
818 pwd = self.ftp.pwd()
819 try:
820 try:
821 self.ftp.cwd(file)
822 except ftplib.error_perm as reason:
823 raise IOError('ftp error', reason) from reason
824 finally:
825 self.ftp.cwd(pwd)
826 cmd = 'LIST ' + file
827 else:
828 cmd = 'LIST'
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000829 conn = self.ftp.ntransfercmd(cmd)
830 self.busy = 1
831 # Pass back both a suitably decorated object and a retrieval length
832 return (addclosehook(conn[0].makefile('rb'),
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000833 self.endtransfer), conn[1])
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000834 def endtransfer(self):
835 if not self.busy:
836 return
837 self.busy = 0
838 try:
839 self.ftp.voidresp()
840 except ftperrors():
841 pass
Guido van Rossume7b146f2000-02-04 15:28:42 +0000842
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000843 def close(self):
844 self.endtransfer()
845 try:
846 self.ftp.close()
847 except ftperrors():
848 pass
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000849
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000850class addbase:
Guido van Rossume7b146f2000-02-04 15:28:42 +0000851 """Base class for addinfo and addclosehook."""
852
Jeremy Hylton39b198d2007-08-04 19:22:00 +0000853 # XXX Add a method to expose the timeout on the underlying socket?
854
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000855 def __init__(self, fp):
856 self.fp = fp
857 self.read = self.fp.read
858 self.readline = self.fp.readline
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000859 if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines
Georg Brandl1f663572005-11-26 16:50:44 +0000860 if hasattr(self.fp, "fileno"):
861 self.fileno = self.fp.fileno
862 else:
863 self.fileno = lambda: None
Raymond Hettinger42182eb2003-03-09 05:33:33 +0000864 if hasattr(self.fp, "__iter__"):
865 self.__iter__ = self.fp.__iter__
Georg Brandla18af4e2007-04-21 15:47:16 +0000866 if hasattr(self.fp, "__next__"):
867 self.__next__ = self.fp.__next__
Guido van Rossume7b146f2000-02-04 15:28:42 +0000868
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000869 def __repr__(self):
Walter Dörwald70a6b492004-02-12 17:35:32 +0000870 return '<%s at %r whose fp = %r>' % (self.__class__.__name__,
871 id(self), self.fp)
Guido van Rossume7b146f2000-02-04 15:28:42 +0000872
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000873 def close(self):
874 self.read = None
875 self.readline = None
876 self.readlines = None
877 self.fileno = None
878 if self.fp: self.fp.close()
879 self.fp = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000880
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000881class addclosehook(addbase):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000882 """Class to add a close hook to an open file."""
883
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000884 def __init__(self, fp, closehook, *hookargs):
885 addbase.__init__(self, fp)
886 self.closehook = closehook
887 self.hookargs = hookargs
Guido van Rossume7b146f2000-02-04 15:28:42 +0000888
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000889 def close(self):
Guido van Rossumc580dae2000-05-24 13:21:46 +0000890 addbase.close(self)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000891 if self.closehook:
Guido van Rossum68468eb2003-02-27 20:14:51 +0000892 self.closehook(*self.hookargs)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000893 self.closehook = None
894 self.hookargs = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000895
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000896class addinfo(addbase):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000897 """class to add an info() method to an open file."""
898
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000899 def __init__(self, fp, headers):
900 addbase.__init__(self, fp)
901 self.headers = headers
Guido van Rossume7b146f2000-02-04 15:28:42 +0000902
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000903 def info(self):
904 return self.headers
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000905
Guido van Rossume6ad8911996-09-10 17:02:56 +0000906class addinfourl(addbase):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000907 """class to add info() and geturl() methods to an open file."""
908
Christian Heimes9bd667a2008-01-20 15:14:11 +0000909 def __init__(self, fp, headers, url, code=None):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000910 addbase.__init__(self, fp)
911 self.headers = headers
912 self.url = url
Christian Heimes9bd667a2008-01-20 15:14:11 +0000913 self.code = code
Guido van Rossume7b146f2000-02-04 15:28:42 +0000914
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000915 def info(self):
916 return self.headers
Guido van Rossume7b146f2000-02-04 15:28:42 +0000917
Christian Heimes9bd667a2008-01-20 15:14:11 +0000918 def getcode(self):
919 return self.code
920
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000921 def geturl(self):
922 return self.url
Guido van Rossume6ad8911996-09-10 17:02:56 +0000923
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000924
Guido van Rossum7c395db1994-07-04 22:14:49 +0000925# Utilities to parse URLs (most of these return None for missing parts):
Sjoerd Mullendere0371b81995-11-10 10:36:07 +0000926# unwrap('<URL:type://host/path>') --> 'type://host/path'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000927# splittype('type:opaquestring') --> 'type', 'opaquestring'
928# splithost('//host[:port]/path') --> 'host[:port]', '/path'
Guido van Rossum7c395db1994-07-04 22:14:49 +0000929# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'
930# splitpasswd('user:passwd') -> 'user', 'passwd'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000931# splitport('host:port') --> 'host', 'port'
932# splitquery('/path?query') --> '/path', 'query'
933# splittag('/path#tag') --> '/path', 'tag'
Guido van Rossum7c395db1994-07-04 22:14:49 +0000934# splitattr('/path;attr1=value1;attr2=value2;...') ->
935# '/path', ['attr1=value1', 'attr2=value2', ...]
936# splitvalue('attr=value') --> 'attr', 'value'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000937# unquote('abc%20def') -> 'abc def'
938# quote('abc def') -> 'abc%20def')
939
Martin v. Löwis1d994332000-12-03 18:30:10 +0000940def toBytes(url):
941 """toBytes(u"URL") --> 'URL'."""
942 # Most URL schemes require ASCII. If that changes, the conversion
Guido van Rossum98297ee2007-11-06 21:34:58 +0000943 # can be relaxed.
944 # XXX get rid of toBytes()
945 if isinstance(url, str):
Martin v. Löwis1d994332000-12-03 18:30:10 +0000946 try:
Guido van Rossum98297ee2007-11-06 21:34:58 +0000947 url = url.encode("ASCII").decode()
Martin v. Löwis1d994332000-12-03 18:30:10 +0000948 except UnicodeError:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000949 raise UnicodeError("URL " + repr(url) +
950 " contains non-ASCII characters")
Martin v. Löwis1d994332000-12-03 18:30:10 +0000951 return url
952
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000953def unwrap(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000954 """unwrap('<URL:type://host/path>') --> 'type://host/path'."""
Guido van Rossuma0982942007-07-10 08:30:03 +0000955 url = str(url).strip()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000956 if url[:1] == '<' and url[-1:] == '>':
Guido van Rossumb2493f82000-12-15 15:01:37 +0000957 url = url[1:-1].strip()
958 if url[:4] == 'URL:': url = url[4:].strip()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000959 return url
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000960
Guido van Rossum332e1441997-09-29 23:23:46 +0000961_typeprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000962def splittype(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000963 """splittype('type:opaquestring') --> 'type', 'opaquestring'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000964 global _typeprog
965 if _typeprog is None:
966 import re
967 _typeprog = re.compile('^([^/:]+):')
Guido van Rossum332e1441997-09-29 23:23:46 +0000968
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000969 match = _typeprog.match(url)
970 if match:
971 scheme = match.group(1)
Fred Drake9e94afd2000-07-01 07:03:30 +0000972 return scheme.lower(), url[len(scheme) + 1:]
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000973 return None, url
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000974
Guido van Rossum332e1441997-09-29 23:23:46 +0000975_hostprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000976def splithost(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000977 """splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000978 global _hostprog
979 if _hostprog is None:
980 import re
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000981 _hostprog = re.compile('^//([^/?]*)(.*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +0000982
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000983 match = _hostprog.match(url)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000984 if match: return match.group(1, 2)
985 return None, url
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000986
Guido van Rossum332e1441997-09-29 23:23:46 +0000987_userprog = None
Guido van Rossum7c395db1994-07-04 22:14:49 +0000988def splituser(host):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000989 """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000990 global _userprog
991 if _userprog is None:
992 import re
Raymond Hettingerf2e45dd2002-08-18 20:08:56 +0000993 _userprog = re.compile('^(.*)@(.*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +0000994
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000995 match = _userprog.match(host)
Fred Drake567ca8e2000-08-21 21:42:42 +0000996 if match: return map(unquote, match.group(1, 2))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000997 return None, host
Guido van Rossum7c395db1994-07-04 22:14:49 +0000998
Guido van Rossum332e1441997-09-29 23:23:46 +0000999_passwdprog = None
Guido van Rossum7c395db1994-07-04 22:14:49 +00001000def splitpasswd(user):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001001 """splitpasswd('user:passwd') -> 'user', 'passwd'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001002 global _passwdprog
1003 if _passwdprog is None:
1004 import re
1005 _passwdprog = re.compile('^([^:]*):(.*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +00001006
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001007 match = _passwdprog.match(user)
1008 if match: return match.group(1, 2)
1009 return user, None
Guido van Rossum7c395db1994-07-04 22:14:49 +00001010
Guido van Rossume7b146f2000-02-04 15:28:42 +00001011# splittag('/path#tag') --> '/path', 'tag'
Guido van Rossum332e1441997-09-29 23:23:46 +00001012_portprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001013def splitport(host):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001014 """splitport('host:port') --> 'host', 'port'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001015 global _portprog
1016 if _portprog is None:
1017 import re
1018 _portprog = re.compile('^(.*):([0-9]+)$')
Guido van Rossum332e1441997-09-29 23:23:46 +00001019
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001020 match = _portprog.match(host)
1021 if match: return match.group(1, 2)
1022 return host, None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001023
Guido van Rossum332e1441997-09-29 23:23:46 +00001024_nportprog = None
Guido van Rossum53725a21996-06-13 19:12:35 +00001025def splitnport(host, defport=-1):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001026 """Split host and port, returning numeric port.
1027 Return given default port if no ':' found; defaults to -1.
1028 Return numerical port if a valid number are found after ':'.
1029 Return None if ':' but not a valid number."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001030 global _nportprog
1031 if _nportprog is None:
1032 import re
1033 _nportprog = re.compile('^(.*):(.*)$')
Guido van Rossum7e7ca0b1998-03-26 21:01:39 +00001034
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001035 match = _nportprog.match(host)
1036 if match:
1037 host, port = match.group(1, 2)
1038 try:
Collin Winterce36ad82007-08-30 01:19:48 +00001039 if not port: raise ValueError("no digits")
Guido van Rossumb2493f82000-12-15 15:01:37 +00001040 nport = int(port)
1041 except ValueError:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001042 nport = None
1043 return host, nport
1044 return host, defport
Guido van Rossum53725a21996-06-13 19:12:35 +00001045
Guido van Rossum332e1441997-09-29 23:23:46 +00001046_queryprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001047def splitquery(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001048 """splitquery('/path?query') --> '/path', 'query'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001049 global _queryprog
1050 if _queryprog is None:
1051 import re
1052 _queryprog = re.compile('^(.*)\?([^?]*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +00001053
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001054 match = _queryprog.match(url)
1055 if match: return match.group(1, 2)
1056 return url, None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001057
Guido van Rossum332e1441997-09-29 23:23:46 +00001058_tagprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001059def splittag(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001060 """splittag('/path#tag') --> '/path', 'tag'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001061 global _tagprog
1062 if _tagprog is None:
1063 import re
1064 _tagprog = re.compile('^(.*)#([^#]*)$')
Guido van Rossum7e7ca0b1998-03-26 21:01:39 +00001065
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001066 match = _tagprog.match(url)
1067 if match: return match.group(1, 2)
1068 return url, None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001069
Guido van Rossum7c395db1994-07-04 22:14:49 +00001070def splitattr(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001071 """splitattr('/path;attr1=value1;attr2=value2;...') ->
1072 '/path', ['attr1=value1', 'attr2=value2', ...]."""
Guido van Rossumb2493f82000-12-15 15:01:37 +00001073 words = url.split(';')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001074 return words[0], words[1:]
Guido van Rossum7c395db1994-07-04 22:14:49 +00001075
Guido van Rossum332e1441997-09-29 23:23:46 +00001076_valueprog = None
Guido van Rossum7c395db1994-07-04 22:14:49 +00001077def splitvalue(attr):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001078 """splitvalue('attr=value') --> 'attr', 'value'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001079 global _valueprog
1080 if _valueprog is None:
1081 import re
1082 _valueprog = re.compile('^([^=]*)=(.*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +00001083
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001084 match = _valueprog.match(attr)
1085 if match: return match.group(1, 2)
1086 return attr, None
Guido van Rossum7c395db1994-07-04 22:14:49 +00001087
Raymond Hettinger803ce802005-09-10 06:49:04 +00001088_hextochr = dict(('%02x' % i, chr(i)) for i in range(256))
1089_hextochr.update(('%02X' % i, chr(i)) for i in range(256))
1090
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001091def unquote(s):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001092 """unquote('abc%20def') -> 'abc def'."""
Raymond Hettinger803ce802005-09-10 06:49:04 +00001093 res = s.split('%')
Guido van Rossum805365e2007-05-07 22:24:25 +00001094 for i in range(1, len(res)):
Raymond Hettinger803ce802005-09-10 06:49:04 +00001095 item = res[i]
1096 try:
1097 res[i] = _hextochr[item[:2]] + item[2:]
1098 except KeyError:
1099 res[i] = '%' + item
Raymond Hettinger4b0f20d2005-10-15 16:41:53 +00001100 except UnicodeDecodeError:
Guido van Rossum84fc66d2007-05-03 17:18:26 +00001101 res[i] = chr(int(item[:2], 16)) + item[2:]
Guido van Rossumb2493f82000-12-15 15:01:37 +00001102 return "".join(res)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001103
Guido van Rossum0564e121996-12-13 14:47:36 +00001104def unquote_plus(s):
Skip Montanaro79f1c172000-08-22 03:00:52 +00001105 """unquote('%7e/abc+def') -> '~/abc def'"""
Brett Cannonaaeffaf2004-03-23 23:50:17 +00001106 s = s.replace('+', ' ')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001107 return unquote(s)
Guido van Rossum0564e121996-12-13 14:47:36 +00001108
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001109always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
Jeremy Hylton6102e292000-08-31 15:48:10 +00001110 'abcdefghijklmnopqrstuvwxyz'
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001111 '0123456789' '_.-')
Guido van Rossumc0f2d2d2007-08-03 19:19:24 +00001112_safe_quoters= {}
1113
1114class Quoter:
1115 def __init__(self, safe):
1116 self.cache = {}
1117 self.safe = safe + always_safe
1118
1119 def __call__(self, c):
1120 try:
1121 return self.cache[c]
1122 except KeyError:
1123 if ord(c) < 256:
1124 res = (c in self.safe) and c or ('%%%02X' % ord(c))
1125 self.cache[c] = res
1126 return res
1127 else:
1128 return "".join(['%%%02X' % i for i in c.encode("utf-8")])
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001129
Guido van Rossum7c395db1994-07-04 22:14:49 +00001130def quote(s, safe = '/'):
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001131 """quote('abc def') -> 'abc%20def'
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001132
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001133 Each part of a URL, e.g. the path info, the query, etc., has a
1134 different set of reserved characters that must be quoted.
1135
1136 RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
1137 the following reserved characters.
1138
1139 reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
1140 "$" | ","
1141
1142 Each of these characters is reserved in some component of a URL,
1143 but not necessarily in all of them.
1144
1145 By default, the quote function is intended for quoting the path
1146 section of a URL. Thus, it will not encode '/'. This character
1147 is reserved, but in typical usage the quote function is being
1148 called on a path where the existing slash characters are used as
1149 reserved characters.
1150 """
Raymond Hettinger199d2f72005-09-09 22:27:13 +00001151 cachekey = (safe, always_safe)
1152 try:
Guido van Rossumc0f2d2d2007-08-03 19:19:24 +00001153 quoter = _safe_quoters[cachekey]
Raymond Hettinger199d2f72005-09-09 22:27:13 +00001154 except KeyError:
Guido van Rossumc0f2d2d2007-08-03 19:19:24 +00001155 quoter = Quoter(safe)
1156 _safe_quoters[cachekey] = quoter
1157 res = map(quoter, s)
Guido van Rossumb2493f82000-12-15 15:01:37 +00001158 return ''.join(res)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001159
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001160def quote_plus(s, safe = ''):
1161 """Quote the query fragment of a URL; replacing ' ' with '+'"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001162 if ' ' in s:
Raymond Hettingercf6b6322005-09-10 18:17:54 +00001163 s = quote(s, safe + ' ')
1164 return s.replace(' ', '+')
1165 return quote(s, safe)
Guido van Rossum0564e121996-12-13 14:47:36 +00001166
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001167def urlencode(query,doseq=0):
1168 """Encode a sequence of two-element tuples or dictionary into a URL query string.
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001169
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001170 If any values in the query arg are sequences and doseq is true, each
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001171 sequence element is converted to a separate parameter.
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001172
1173 If the query arg is a sequence of two-element tuples, the order of the
1174 parameters in the output will match the order of parameters in the
1175 input.
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001176 """
Tim Peters658cba62001-02-09 20:06:00 +00001177
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001178 if hasattr(query,"items"):
1179 # mapping objects
1180 query = query.items()
1181 else:
1182 # it's a bother at times that strings and string-like objects are
1183 # sequences...
1184 try:
1185 # non-sequence items should not work with len()
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001186 # non-empty strings will fail this
Walter Dörwald65230a22002-06-03 15:58:32 +00001187 if len(query) and not isinstance(query[0], tuple):
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001188 raise TypeError
1189 # zero-length sequences of all types will get here and succeed,
1190 # but that's a minor nit - since the original implementation
1191 # allowed empty dicts that type of behavior probably should be
1192 # preserved for consistency
1193 except TypeError:
1194 ty,va,tb = sys.exc_info()
Collin Winter828f04a2007-08-31 00:04:24 +00001195 raise TypeError("not a valid non-string sequence or mapping object").with_traceback(tb)
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001196
Guido van Rossume7b146f2000-02-04 15:28:42 +00001197 l = []
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001198 if not doseq:
1199 # preserve old behavior
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001200 for k, v in query:
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001201 k = quote_plus(str(k))
1202 v = quote_plus(str(v))
1203 l.append(k + '=' + v)
1204 else:
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001205 for k, v in query:
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001206 k = quote_plus(str(k))
Walter Dörwald65230a22002-06-03 15:58:32 +00001207 if isinstance(v, str):
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001208 v = quote_plus(v)
1209 l.append(k + '=' + v)
Guido van Rossum98297ee2007-11-06 21:34:58 +00001210 elif isinstance(v, str):
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001211 # is there a reasonable way to convert to ASCII?
1212 # encode generates a string, but "replace" or "ignore"
1213 # lose information and "strict" can raise UnicodeError
1214 v = quote_plus(v.encode("ASCII","replace"))
1215 l.append(k + '=' + v)
1216 else:
1217 try:
1218 # is this a sufficient test for sequence-ness?
1219 x = len(v)
1220 except TypeError:
1221 # not a sequence
1222 v = quote_plus(str(v))
1223 l.append(k + '=' + v)
1224 else:
1225 # loop over the sequence
1226 for elt in v:
1227 l.append(k + '=' + quote_plus(str(elt)))
Guido van Rossumb2493f82000-12-15 15:01:37 +00001228 return '&'.join(l)
Guido van Rossum810a3391998-07-22 21:33:23 +00001229
Guido van Rossum442e7201996-03-20 15:33:11 +00001230# Proxy handling
Mark Hammond4f570b92000-07-26 07:04:38 +00001231def getproxies_environment():
1232 """Return a dictionary of scheme -> proxy server URL mappings.
1233
1234 Scan the environment for variables named <scheme>_proxy;
1235 this seems to be the standard convention. If you need a
1236 different way, you can pass a proxies dictionary to the
1237 [Fancy]URLopener constructor.
1238
1239 """
1240 proxies = {}
1241 for name, value in os.environ.items():
Guido van Rossumb2493f82000-12-15 15:01:37 +00001242 name = name.lower()
Christian Heimes9bd667a2008-01-20 15:14:11 +00001243 if name == 'no_proxy':
1244 # handled in proxy_bypass_environment
1245 continue
Mark Hammond4f570b92000-07-26 07:04:38 +00001246 if value and name[-6:] == '_proxy':
1247 proxies[name[:-6]] = value
1248 return proxies
1249
Christian Heimes9bd667a2008-01-20 15:14:11 +00001250def proxy_bypass_environment(host):
1251 """Test if proxies should not be used for a particular host.
1252
1253 Checks the environment for a variable named no_proxy, which should
1254 be a list of DNS suffixes separated by commas, or '*' for all hosts.
1255 """
1256 no_proxy = os.environ.get('no_proxy', '') or os.environ.get('NO_PROXY', '')
1257 # '*' is special case for always bypass
1258 if no_proxy == '*':
1259 return 1
1260 # strip port off host
1261 hostonly, port = splitport(host)
1262 # check if the host ends with any of the DNS suffixes
1263 for name in no_proxy.split(','):
1264 if name and (hostonly.endswith(name) or host.endswith(name)):
1265 return 1
1266 # otherwise, don't bypass
1267 return 0
1268
1269
Jack Jansen11d9b062004-07-16 11:45:00 +00001270if sys.platform == 'darwin':
Benjamin Petersona37cfc62008-05-26 13:48:34 +00001271
1272 def _CFSetup(sc):
1273 from ctypes import c_int32, c_void_p, c_char_p, c_int
1274 sc.CFStringCreateWithCString.argtypes = [ c_void_p, c_char_p, c_int32 ]
1275 sc.CFStringCreateWithCString.restype = c_void_p
1276 sc.SCDynamicStoreCopyProxies.argtypes = [ c_void_p ]
1277 sc.SCDynamicStoreCopyProxies.restype = c_void_p
1278 sc.CFDictionaryGetValue.argtypes = [ c_void_p, c_void_p ]
1279 sc.CFDictionaryGetValue.restype = c_void_p
1280 sc.CFStringGetLength.argtypes = [ c_void_p ]
1281 sc.CFStringGetLength.restype = c_int32
1282 sc.CFStringGetCString.argtypes = [ c_void_p, c_char_p, c_int32, c_int32 ]
1283 sc.CFStringGetCString.restype = c_int32
1284 sc.CFNumberGetValue.argtypes = [ c_void_p, c_int, c_void_p ]
1285 sc.CFNumberGetValue.restype = c_int32
1286 sc.CFRelease.argtypes = [ c_void_p ]
1287 sc.CFRelease.restype = None
1288
Alexandre Vassalottieca20b62008-05-16 02:54:33 +00001289 def _CStringFromCFString(sc, value):
1290 from ctypes import create_string_buffer
1291 length = sc.CFStringGetLength(value) + 1
1292 buff = create_string_buffer(length)
1293 sc.CFStringGetCString(value, buff, length, 0)
1294 return buff.value
1295
1296 def _CFNumberToInt32(sc, cfnum):
1297 from ctypes import byref, c_int
1298 val = c_int()
1299 kCFNumberSInt32Type = 3
1300 sc.CFNumberGetValue(cfnum, kCFNumberSInt32Type, byref(val))
1301 return val.value
1302
1303
1304 def proxy_bypass_macosx_sysconf(host):
1305 """
1306 Return True iff this host shouldn't be accessed using a proxy
1307
1308 This function uses the MacOSX framework SystemConfiguration
1309 to fetch the proxy information.
1310 """
1311 from ctypes import cdll
1312 from ctypes.util import find_library
1313 import re
1314 import socket
1315 from fnmatch import fnmatch
1316
1317 def ip2num(ipAddr):
1318 parts = ipAddr.split('.')
1319 parts = map(int, parts)
1320 if len(parts) != 4:
1321 parts = (parts + [0, 0, 0, 0])[:4]
1322 return (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3]
1323
1324 sc = cdll.LoadLibrary(find_library("SystemConfiguration"))
Benjamin Petersona37cfc62008-05-26 13:48:34 +00001325 _CFSetup(sc)
Alexandre Vassalottieca20b62008-05-16 02:54:33 +00001326
1327 hostIP = None
1328
1329 if not sc:
1330 return False
1331
1332 kSCPropNetProxiesExceptionsList = sc.CFStringCreateWithCString(0, "ExceptionsList", 0)
1333 kSCPropNetProxiesExcludeSimpleHostnames = sc.CFStringCreateWithCString(0,
1334 "ExcludeSimpleHostnames", 0)
1335
1336
1337 proxyDict = sc.SCDynamicStoreCopyProxies(None)
Benjamin Petersona37cfc62008-05-26 13:48:34 +00001338 if proxyDict is None:
1339 return False
Alexandre Vassalottieca20b62008-05-16 02:54:33 +00001340
1341 try:
1342 # Check for simple host names:
1343 if '.' not in host:
1344 exclude_simple = sc.CFDictionaryGetValue(proxyDict,
1345 kSCPropNetProxiesExcludeSimpleHostnames)
1346 if exclude_simple and _CFNumberToInt32(sc, exclude_simple):
1347 return True
1348
1349
1350 # Check the exceptions list:
1351 exceptions = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesExceptionsList)
1352 if exceptions:
1353 # Items in the list are strings like these: *.local, 169.254/16
1354 for index in xrange(sc.CFArrayGetCount(exceptions)):
1355 value = sc.CFArrayGetValueAtIndex(exceptions, index)
1356 if not value: continue
1357 value = _CStringFromCFString(sc, value)
1358
1359 m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value)
1360 if m is not None:
1361 if hostIP is None:
1362 hostIP = socket.gethostbyname(host)
1363 hostIP = ip2num(hostIP)
1364
1365 base = ip2num(m.group(1))
1366 mask = int(m.group(2)[1:])
1367 mask = 32 - mask
1368
1369 if (hostIP >> mask) == (base >> mask):
1370 return True
1371
1372 elif fnmatch(host, value):
1373 return True
1374
1375 return False
1376
1377 finally:
1378 sc.CFRelease(kSCPropNetProxiesExceptionsList)
1379 sc.CFRelease(kSCPropNetProxiesExcludeSimpleHostnames)
1380
1381
1382
1383 def getproxies_macosx_sysconf():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001384 """Return a dictionary of scheme -> proxy server URL mappings.
Guido van Rossum442e7201996-03-20 15:33:11 +00001385
Alexandre Vassalottieca20b62008-05-16 02:54:33 +00001386 This function uses the MacOSX framework SystemConfiguration
1387 to fetch the proxy information.
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001388 """
Alexandre Vassalottieca20b62008-05-16 02:54:33 +00001389 from ctypes import cdll
1390 from ctypes.util import find_library
1391
1392 sc = cdll.LoadLibrary(find_library("SystemConfiguration"))
Benjamin Petersona37cfc62008-05-26 13:48:34 +00001393 _CFSetup(sc)
Alexandre Vassalottieca20b62008-05-16 02:54:33 +00001394
1395 if not sc:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001396 return {}
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001397
Georg Brandlc4dc0d42008-05-18 11:12:40 +00001398 kSCPropNetProxiesHTTPEnable = sc.CFStringCreateWithCString(0, b"HTTPEnable", 0)
1399 kSCPropNetProxiesHTTPProxy = sc.CFStringCreateWithCString(0, b"HTTPProxy", 0)
1400 kSCPropNetProxiesHTTPPort = sc.CFStringCreateWithCString(0, b"HTTPPort", 0)
Alexandre Vassalottieca20b62008-05-16 02:54:33 +00001401
Georg Brandlc4dc0d42008-05-18 11:12:40 +00001402 kSCPropNetProxiesHTTPSEnable = sc.CFStringCreateWithCString(0, b"HTTPSEnable", 0)
1403 kSCPropNetProxiesHTTPSProxy = sc.CFStringCreateWithCString(0, b"HTTPSProxy", 0)
1404 kSCPropNetProxiesHTTPSPort = sc.CFStringCreateWithCString(0, b"HTTPSPort", 0)
Alexandre Vassalottieca20b62008-05-16 02:54:33 +00001405
Georg Brandlc4dc0d42008-05-18 11:12:40 +00001406 kSCPropNetProxiesFTPEnable = sc.CFStringCreateWithCString(0, b"FTPEnable", 0)
1407 kSCPropNetProxiesFTPPassive = sc.CFStringCreateWithCString(0, b"FTPPassive", 0)
1408 kSCPropNetProxiesFTPPort = sc.CFStringCreateWithCString(0, b"FTPPort", 0)
1409 kSCPropNetProxiesFTPProxy = sc.CFStringCreateWithCString(0, b"FTPProxy", 0)
Alexandre Vassalottieca20b62008-05-16 02:54:33 +00001410
Georg Brandlc4dc0d42008-05-18 11:12:40 +00001411 kSCPropNetProxiesGopherEnable = sc.CFStringCreateWithCString(0, b"GopherEnable", 0)
1412 kSCPropNetProxiesGopherPort = sc.CFStringCreateWithCString(0, b"GopherPort", 0)
1413 kSCPropNetProxiesGopherProxy = sc.CFStringCreateWithCString(0, b"GopherProxy", 0)
Alexandre Vassalottieca20b62008-05-16 02:54:33 +00001414
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001415 proxies = {}
Alexandre Vassalottieca20b62008-05-16 02:54:33 +00001416 proxyDict = sc.SCDynamicStoreCopyProxies(None)
1417
1418 try:
1419 # HTTP:
1420 enabled = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesHTTPEnable)
1421 if enabled and _CFNumberToInt32(sc, enabled):
1422 proxy = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesHTTPProxy)
1423 port = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesHTTPPort)
1424
1425 if proxy:
1426 proxy = _CStringFromCFString(sc, proxy)
1427 if port:
1428 port = _CFNumberToInt32(sc, port)
1429 proxies["http"] = "http://%s:%i" % (proxy, port)
1430 else:
1431 proxies["http"] = "http://%s" % (proxy, )
1432
1433 # HTTPS:
1434 enabled = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesHTTPSEnable)
1435 if enabled and _CFNumberToInt32(sc, enabled):
1436 proxy = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesHTTPSProxy)
1437 port = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesHTTPSPort)
1438
1439 if proxy:
1440 proxy = _CStringFromCFString(sc, proxy)
1441 if port:
1442 port = _CFNumberToInt32(sc, port)
1443 proxies["https"] = "http://%s:%i" % (proxy, port)
1444 else:
1445 proxies["https"] = "http://%s" % (proxy, )
1446
1447 # FTP:
1448 enabled = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesFTPEnable)
1449 if enabled and _CFNumberToInt32(sc, enabled):
1450 proxy = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesFTPProxy)
1451 port = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesFTPPort)
1452
1453 if proxy:
1454 proxy = _CStringFromCFString(sc, proxy)
1455 if port:
1456 port = _CFNumberToInt32(sc, port)
1457 proxies["ftp"] = "http://%s:%i" % (proxy, port)
1458 else:
1459 proxies["ftp"] = "http://%s" % (proxy, )
1460
1461 # Gopher:
1462 enabled = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesGopherEnable)
1463 if enabled and _CFNumberToInt32(sc, enabled):
1464 proxy = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesGopherProxy)
1465 port = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesGopherPort)
1466
1467 if proxy:
1468 proxy = _CStringFromCFString(sc, proxy)
1469 if port:
1470 port = _CFNumberToInt32(sc, port)
1471 proxies["gopher"] = "http://%s:%i" % (proxy, port)
1472 else:
1473 proxies["gopher"] = "http://%s" % (proxy, )
1474 finally:
1475 sc.CFRelease(proxyDict)
1476
1477 sc.CFRelease(kSCPropNetProxiesHTTPEnable)
1478 sc.CFRelease(kSCPropNetProxiesHTTPProxy)
1479 sc.CFRelease(kSCPropNetProxiesHTTPPort)
1480 sc.CFRelease(kSCPropNetProxiesFTPEnable)
1481 sc.CFRelease(kSCPropNetProxiesFTPPassive)
1482 sc.CFRelease(kSCPropNetProxiesFTPPort)
1483 sc.CFRelease(kSCPropNetProxiesFTPProxy)
1484 sc.CFRelease(kSCPropNetProxiesGopherEnable)
1485 sc.CFRelease(kSCPropNetProxiesGopherPort)
1486 sc.CFRelease(kSCPropNetProxiesGopherProxy)
1487
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001488 return proxies
Mark Hammond4f570b92000-07-26 07:04:38 +00001489
Alexandre Vassalottieca20b62008-05-16 02:54:33 +00001490
1491
Christian Heimes9bd667a2008-01-20 15:14:11 +00001492 def proxy_bypass(host):
1493 if getproxies_environment():
1494 return proxy_bypass_environment(host)
1495 else:
Alexandre Vassalottieca20b62008-05-16 02:54:33 +00001496 return proxy_bypass_macosx_sysconf(host)
Tim Peters55c12d42001-08-09 18:04:14 +00001497
Jack Jansen11d9b062004-07-16 11:45:00 +00001498 def getproxies():
Alexandre Vassalottieca20b62008-05-16 02:54:33 +00001499 return getproxies_environment() or getproxies_macosx_sysconf()
Tim Peters182b5ac2004-07-18 06:16:08 +00001500
Mark Hammond4f570b92000-07-26 07:04:38 +00001501elif os.name == 'nt':
1502 def getproxies_registry():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001503 """Return a dictionary of scheme -> proxy server URL mappings.
Mark Hammond4f570b92000-07-26 07:04:38 +00001504
1505 Win32 uses the registry to store proxies.
1506
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001507 """
1508 proxies = {}
Mark Hammond4f570b92000-07-26 07:04:38 +00001509 try:
Georg Brandl38feaf02008-05-25 07:45:51 +00001510 import winreg
Mark Hammond4f570b92000-07-26 07:04:38 +00001511 except ImportError:
1512 # Std module, so should be around - but you never know!
1513 return proxies
1514 try:
Georg Brandl38feaf02008-05-25 07:45:51 +00001515 internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER,
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001516 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
Georg Brandl38feaf02008-05-25 07:45:51 +00001517 proxyEnable = winreg.QueryValueEx(internetSettings,
1518 'ProxyEnable')[0]
Mark Hammond4f570b92000-07-26 07:04:38 +00001519 if proxyEnable:
1520 # Returned as Unicode but problems if not converted to ASCII
Georg Brandl38feaf02008-05-25 07:45:51 +00001521 proxyServer = str(winreg.QueryValueEx(internetSettings,
1522 'ProxyServer')[0])
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001523 if '=' in proxyServer:
1524 # Per-protocol settings
Mark Hammond4f570b92000-07-26 07:04:38 +00001525 for p in proxyServer.split(';'):
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001526 protocol, address = p.split('=', 1)
Guido van Rossumb955d6c2002-03-31 23:38:48 +00001527 # See if address has a type:// prefix
Guido van Rossum64e5aa92002-04-02 14:38:16 +00001528 import re
1529 if not re.match('^([^/:]+)://', address):
Guido van Rossumb955d6c2002-03-31 23:38:48 +00001530 address = '%s://%s' % (protocol, address)
1531 proxies[protocol] = address
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001532 else:
1533 # Use one setting for all protocols
1534 if proxyServer[:5] == 'http:':
1535 proxies['http'] = proxyServer
1536 else:
1537 proxies['http'] = 'http://%s' % proxyServer
1538 proxies['ftp'] = 'ftp://%s' % proxyServer
Mark Hammond4f570b92000-07-26 07:04:38 +00001539 internetSettings.Close()
1540 except (WindowsError, ValueError, TypeError):
1541 # Either registry key not found etc, or the value in an
1542 # unexpected format.
1543 # proxies already set up to be empty so nothing to do
1544 pass
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001545 return proxies
Guido van Rossum442e7201996-03-20 15:33:11 +00001546
Mark Hammond4f570b92000-07-26 07:04:38 +00001547 def getproxies():
1548 """Return a dictionary of scheme -> proxy server URL mappings.
1549
1550 Returns settings gathered from the environment, if specified,
1551 or the registry.
1552
1553 """
1554 return getproxies_environment() or getproxies_registry()
Tim Peters55c12d42001-08-09 18:04:14 +00001555
Christian Heimes9bd667a2008-01-20 15:14:11 +00001556 def proxy_bypass_registry(host):
Tim Peters55c12d42001-08-09 18:04:14 +00001557 try:
Georg Brandl38feaf02008-05-25 07:45:51 +00001558 import winreg
Tim Peters55c12d42001-08-09 18:04:14 +00001559 import re
Tim Peters55c12d42001-08-09 18:04:14 +00001560 except ImportError:
1561 # Std modules, so should be around - but you never know!
1562 return 0
1563 try:
Georg Brandl38feaf02008-05-25 07:45:51 +00001564 internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER,
Tim Peters55c12d42001-08-09 18:04:14 +00001565 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
Georg Brandl38feaf02008-05-25 07:45:51 +00001566 proxyEnable = winreg.QueryValueEx(internetSettings,
1567 'ProxyEnable')[0]
1568 proxyOverride = str(winreg.QueryValueEx(internetSettings,
1569 'ProxyOverride')[0])
Tim Peters55c12d42001-08-09 18:04:14 +00001570 # ^^^^ Returned as Unicode but problems if not converted to ASCII
1571 except WindowsError:
1572 return 0
1573 if not proxyEnable or not proxyOverride:
1574 return 0
1575 # try to make a host list from name and IP address.
Georg Brandl1f636702006-02-18 23:10:23 +00001576 rawHost, port = splitport(host)
1577 host = [rawHost]
Tim Peters55c12d42001-08-09 18:04:14 +00001578 try:
Georg Brandl1f636702006-02-18 23:10:23 +00001579 addr = socket.gethostbyname(rawHost)
1580 if addr != rawHost:
Tim Peters55c12d42001-08-09 18:04:14 +00001581 host.append(addr)
1582 except socket.error:
1583 pass
Georg Brandl1f636702006-02-18 23:10:23 +00001584 try:
1585 fqdn = socket.getfqdn(rawHost)
1586 if fqdn != rawHost:
1587 host.append(fqdn)
1588 except socket.error:
1589 pass
Tim Peters55c12d42001-08-09 18:04:14 +00001590 # make a check value list from the registry entry: replace the
1591 # '<local>' string by the localhost entry and the corresponding
1592 # canonical entry.
1593 proxyOverride = proxyOverride.split(';')
1594 i = 0
1595 while i < len(proxyOverride):
1596 if proxyOverride[i] == '<local>':
1597 proxyOverride[i:i+1] = ['localhost',
1598 '127.0.0.1',
1599 socket.gethostname(),
1600 socket.gethostbyname(
1601 socket.gethostname())]
1602 i += 1
1603 # print proxyOverride
1604 # now check if we match one of the registry values.
1605 for test in proxyOverride:
Tim Petersab9ba272001-08-09 21:40:30 +00001606 test = test.replace(".", r"\.") # mask dots
1607 test = test.replace("*", r".*") # change glob sequence
1608 test = test.replace("?", r".") # change glob char
Tim Peters55c12d42001-08-09 18:04:14 +00001609 for val in host:
1610 # print "%s <--> %s" %( test, val )
1611 if re.match(test, val, re.I):
1612 return 1
1613 return 0
1614
Christian Heimes9bd667a2008-01-20 15:14:11 +00001615 def proxy_bypass(host):
1616 """Return a dictionary of scheme -> proxy server URL mappings.
1617
1618 Returns settings gathered from the environment, if specified,
1619 or the registry.
1620
1621 """
1622 if getproxies_environment():
1623 return proxy_bypass_environment(host)
1624 else:
1625 return proxy_bypass_registry(host)
1626
Mark Hammond4f570b92000-07-26 07:04:38 +00001627else:
1628 # By default use environment variables
1629 getproxies = getproxies_environment
Christian Heimes9bd667a2008-01-20 15:14:11 +00001630 proxy_bypass = proxy_bypass_environment
Guido van Rossum442e7201996-03-20 15:33:11 +00001631
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001632# Test and time quote() and unquote()
1633def test1():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001634 s = ''
1635 for i in range(256): s = s + chr(i)
1636 s = s*4
1637 t0 = time.time()
1638 qs = quote(s)
1639 uqs = unquote(qs)
1640 t1 = time.time()
1641 if uqs != s:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001642 print('Wrong!')
1643 print(repr(s))
1644 print(repr(qs))
1645 print(repr(uqs))
1646 print(round(t1 - t0, 3), 'sec')
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001647
1648
Guido van Rossum9ab96d41998-09-28 14:07:00 +00001649def reporthook(blocknum, blocksize, totalsize):
1650 # Report during remote transfers
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001651 print("Block number: %d, Block size: %d, Total size: %d" % (
1652 blocknum, blocksize, totalsize))
Guido van Rossum9ab96d41998-09-28 14:07:00 +00001653
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001654# Test program
Guido van Rossum23490151998-06-25 02:39:00 +00001655def test(args=[]):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001656 if not args:
1657 args = [
1658 '/etc/passwd',
1659 'file:/etc/passwd',
1660 'file://localhost/etc/passwd',
Guido van Rossumd8faa362007-04-27 19:54:29 +00001661 'ftp://ftp.gnu.org/pub/README',
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001662 'http://www.python.org/index.html',
1663 ]
Guido van Rossum09c8b6c1999-12-07 21:37:17 +00001664 if hasattr(URLopener, "open_https"):
1665 args.append('https://synergy.as.cmu.edu/~geek/')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001666 try:
1667 for url in args:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001668 print('-'*10, url, '-'*10)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001669 fn, h = urlretrieve(url, None, reporthook)
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001670 print(fn)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001671 if h:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001672 print('======')
1673 for k in h.keys(): print(k + ':', h[k])
1674 print('======')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001675 fp = open(fn, 'rb')
1676 data = fp.read()
1677 del fp
Georg Brandl7f13e6b2007-08-31 10:37:15 +00001678 data = data.replace("\r", "")
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001679 print(data)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001680 fn, h = None, None
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001681 print('-'*40)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001682 finally:
1683 urlcleanup()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001684
Guido van Rossum23490151998-06-25 02:39:00 +00001685def main():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001686 import getopt, sys
1687 try:
1688 opts, args = getopt.getopt(sys.argv[1:], "th")
Guido van Rossumb940e112007-01-10 16:19:56 +00001689 except getopt.error as msg:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001690 print(msg)
1691 print("Use -h for help")
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001692 return
1693 t = 0
1694 for o, a in opts:
1695 if o == '-t':
1696 t = t + 1
1697 if o == '-h':
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001698 print("Usage: python urllib.py [-t] [url ...]")
1699 print("-t runs self-test;", end=' ')
1700 print("otherwise, contents of urls are printed")
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001701 return
1702 if t:
1703 if t > 1:
1704 test1()
1705 test(args)
1706 else:
1707 if not args:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001708 print("Use -h for help")
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001709 for url in args:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001710 print(urlopen(url).read(), end=' ')
Guido van Rossum23490151998-06-25 02:39:00 +00001711
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001712# Run test program when run as a script
1713if __name__ == '__main__':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001714 main()