blob: 095123755770117c09eb5505425b71435e0b2c69 [file] [log] [blame]
Guido van Rossume7b146f2000-02-04 15:28:42 +00001"""Open an arbitrary URL.
2
3See the following document for more info on URLs:
4"Names and Addresses, URIs, URLs, URNs, URCs", at
5http://www.w3.org/pub/WWW/Addressing/Overview.html
6
7See also the HTTP spec (from which the error codes are derived):
8"HTTP - Hypertext Transfer Protocol", at
9http://www.w3.org/pub/WWW/Protocols/
10
11Related standards and specs:
12- RFC1808: the "relative URL" spec. (authoritative status)
13- RFC1738 - the "URL standard". (authoritative status)
14- RFC1630 - the "URI spec". (informational status)
15
16The object returned by URLopener().open(file) will differ per
17protocol. All you know is that is has methods read(), readline(),
18readlines(), fileno(), close() and info(). The read*(), fileno()
Fredrik Lundhb49f88b2000-09-24 18:51:25 +000019and close() methods work like those of open files.
Guido van Rossume7b146f2000-02-04 15:28:42 +000020The info() method returns a mimetools.Message object which can be
21used to query various info about the object, if available.
22(mimetools.Message objects are queried with the getheader() method.)
23"""
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000024
Guido van Rossum7c395db1994-07-04 22:14:49 +000025import string
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000026import socket
Jack Jansendc3e3f61995-12-15 13:22:13 +000027import os
Guido van Rossumf0713d32001-08-09 17:43:35 +000028import time
Guido van Rossum3c8484e1996-11-20 22:02:24 +000029import sys
Brett Cannon69200fa2004-03-23 21:26:39 +000030from urlparse import urljoin as basejoin
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000031
Skip Montanaro40fc1602001-03-01 04:27:19 +000032__all__ = ["urlopen", "URLopener", "FancyURLopener", "urlretrieve",
33 "urlcleanup", "quote", "quote_plus", "unquote", "unquote_plus",
Skip Montanaro44d5e0c2001-03-13 19:47:16 +000034 "urlencode", "url2pathname", "pathname2url", "splittag",
35 "localhost", "thishost", "ftperrors", "basejoin", "unwrap",
36 "splittype", "splithost", "splituser", "splitpasswd", "splitport",
37 "splitnport", "splitquery", "splitattr", "splitvalue",
Brett Cannond75f0432007-05-16 22:42:29 +000038 "getproxies"]
Skip Montanaro40fc1602001-03-01 04:27:19 +000039
Martin v. Löwis3e865952006-01-24 15:51:21 +000040__version__ = '1.17' # XXX This version is not always updated :-(
Guido van Rossumf668d171997-06-06 21:11:11 +000041
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000042MAXFTPCACHE = 10 # Trim the ftp cache beyond this size
Guido van Rossum6cb15a01995-06-22 19:00:13 +000043
Jack Jansendc3e3f61995-12-15 13:22:13 +000044# Helper for non-unix systems
45if os.name == 'mac':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000046 from macurl2path import url2pathname, pathname2url
Guido van Rossum7e7ca0b1998-03-26 21:01:39 +000047elif os.name == 'nt':
Fredrik Lundhb49f88b2000-09-24 18:51:25 +000048 from nturl2path import url2pathname, pathname2url
Guido van Rossumd74fb6b2001-03-02 06:43:49 +000049elif os.name == 'riscos':
50 from rourl2path import url2pathname, pathname2url
Jack Jansendc3e3f61995-12-15 13:22:13 +000051else:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000052 def url2pathname(pathname):
Georg Brandlc0b24732005-12-26 22:53:56 +000053 """OS-specific conversion from a relative URL of the 'file' scheme
54 to a file system path; not recommended for general use."""
Guido van Rossum367ac801999-03-12 14:31:10 +000055 return unquote(pathname)
Georg Brandlc0b24732005-12-26 22:53:56 +000056
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000057 def pathname2url(pathname):
Georg Brandlc0b24732005-12-26 22:53:56 +000058 """OS-specific conversion from a file system path to a relative URL
59 of the 'file' scheme; not recommended for general use."""
Guido van Rossum367ac801999-03-12 14:31:10 +000060 return quote(pathname)
Guido van Rossum33add0a1998-12-18 15:25:22 +000061
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000062# This really consists of two pieces:
63# (1) a class which handles opening of all sorts of URLs
64# (plus assorted utilities etc.)
65# (2) a set of functions for parsing URLs
66# XXX Should these be separated out into different modules?
67
68
69# Shortcut for basic usage
70_urlopener = None
Fred Drakedf6eca72002-04-04 20:41:34 +000071def urlopen(url, data=None, proxies=None):
Brett Cannon8bb8fa52008-07-02 01:57:08 +000072 """Create a file-like object for the specified URL to read from."""
73 from warnings import warnpy3k
Georg Brandl48e65f52010-02-06 22:44:17 +000074 warnpy3k("urllib.urlopen() has been removed in Python 3.0 in "
75 "favor of urllib2.urlopen()", stacklevel=2)
Brett Cannon8bb8fa52008-07-02 01:57:08 +000076
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000077 global _urlopener
Fred Drakedf6eca72002-04-04 20:41:34 +000078 if proxies is not None:
79 opener = FancyURLopener(proxies=proxies)
80 elif not _urlopener:
81 opener = FancyURLopener()
82 _urlopener = opener
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000083 else:
Fred Drakedf6eca72002-04-04 20:41:34 +000084 opener = _urlopener
85 if data is None:
86 return opener.open(url)
87 else:
88 return opener.open(url, data)
Fred Drake316a7932000-08-24 01:01:26 +000089def urlretrieve(url, filename=None, reporthook=None, data=None):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000090 global _urlopener
91 if not _urlopener:
92 _urlopener = FancyURLopener()
Fred Drake316a7932000-08-24 01:01:26 +000093 return _urlopener.retrieve(url, filename, reporthook, data)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000094def urlcleanup():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000095 if _urlopener:
96 _urlopener.cleanup()
Antoine Pitrouca173e22009-12-08 19:35:12 +000097 _safemaps.clear()
98 ftpcache.clear()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000099
Bill Janssen426ea0a2007-08-29 22:35:05 +0000100# check for SSL
101try:
102 import ssl
103except:
104 _have_ssl = False
105else:
106 _have_ssl = True
107
Georg Brandlb9256022005-08-24 18:46:39 +0000108# exception raised when downloaded size does not match content-length
109class ContentTooShortError(IOError):
110 def __init__(self, message, content):
111 IOError.__init__(self, message)
112 self.content = content
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000113
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000114ftpcache = {}
115class URLopener:
Guido van Rossume7b146f2000-02-04 15:28:42 +0000116 """Class to open URLs.
117 This is a class rather than just a subroutine because we may need
118 more than one set of global protocol-specific options.
119 Note -- this is a base class for those who don't want the
120 automatic handling of errors type 302 (relocated) and 401
121 (authorization needed)."""
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000122
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000123 __tempfiles = None
Guido van Rossum29e77811996-11-27 19:39:58 +0000124
Guido van Rossumba311382000-08-24 16:18:04 +0000125 version = "Python-urllib/%s" % __version__
126
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000127 # Constructor
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000128 def __init__(self, proxies=None, **x509):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000129 if proxies is None:
130 proxies = getproxies()
131 assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
132 self.proxies = proxies
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000133 self.key_file = x509.get('key_file')
134 self.cert_file = x509.get('cert_file')
Georg Brandl0619a322006-07-26 07:40:17 +0000135 self.addheaders = [('User-Agent', self.version)]
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000136 self.__tempfiles = []
137 self.__unlink = os.unlink # See cleanup()
138 self.tempcache = None
139 # Undocumented feature: if you assign {} to tempcache,
140 # it is used to cache files retrieved with
141 # self.retrieve(). This is not enabled by default
142 # since it does not work for changing documents (and I
143 # haven't got the logic to check expiration headers
144 # yet).
145 self.ftpcache = ftpcache
146 # Undocumented feature: you can use a different
147 # ftp cache by assigning to the .ftpcache member;
148 # in case you want logically independent URL openers
149 # XXX This is not threadsafe. Bah.
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000150
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000151 def __del__(self):
152 self.close()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000153
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000154 def close(self):
155 self.cleanup()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000156
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000157 def cleanup(self):
158 # This code sometimes runs when the rest of this module
159 # has already been deleted, so it can't use any globals
160 # or import anything.
161 if self.__tempfiles:
162 for file in self.__tempfiles:
163 try:
164 self.__unlink(file)
Martin v. Löwis58682b72001-08-11 15:02:57 +0000165 except OSError:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000166 pass
167 del self.__tempfiles[:]
168 if self.tempcache:
169 self.tempcache.clear()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000170
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000171 def addheader(self, *args):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000172 """Add a header to be used by the HTTP interface only
173 e.g. u.addheader('Accept', 'sound/basic')"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000174 self.addheaders.append(args)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000175
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000176 # External interface
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000177 def open(self, fullurl, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000178 """Use URLopener().open(file) instead of open(file, 'r')."""
Martin v. Löwis1d994332000-12-03 18:30:10 +0000179 fullurl = unwrap(toBytes(fullurl))
Senthil Kumaran7c2867f2009-04-21 03:24:19 +0000180 # percent encode url, fixing lame server errors for e.g, like space
181 # within url paths.
182 fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]")
Raymond Hettinger54f02222002-06-01 14:18:47 +0000183 if self.tempcache and fullurl in self.tempcache:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000184 filename, headers = self.tempcache[fullurl]
185 fp = open(filename, 'rb')
186 return addinfourl(fp, headers, fullurl)
Martin v. Löwis1d994332000-12-03 18:30:10 +0000187 urltype, url = splittype(fullurl)
188 if not urltype:
189 urltype = 'file'
Raymond Hettinger54f02222002-06-01 14:18:47 +0000190 if urltype in self.proxies:
Martin v. Löwis1d994332000-12-03 18:30:10 +0000191 proxy = self.proxies[urltype]
192 urltype, proxyhost = splittype(proxy)
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000193 host, selector = splithost(proxyhost)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000194 url = (host, fullurl) # Signal special case to open_*()
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000195 else:
196 proxy = None
Martin v. Löwis1d994332000-12-03 18:30:10 +0000197 name = 'open_' + urltype
198 self.type = urltype
Brett Cannonaaeffaf2004-03-23 23:50:17 +0000199 name = name.replace('-', '_')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000200 if not hasattr(self, name):
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000201 if proxy:
202 return self.open_unknown_proxy(proxy, fullurl, data)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000203 else:
204 return self.open_unknown(fullurl, data)
205 try:
206 if data is None:
207 return getattr(self, name)(url)
208 else:
209 return getattr(self, name)(url, data)
210 except socket.error, msg:
211 raise IOError, ('socket error', msg), sys.exc_info()[2]
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000212
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000213 def open_unknown(self, fullurl, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000214 """Overridable interface to open unknown URL type."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000215 type, url = splittype(fullurl)
216 raise IOError, ('url error', 'unknown url type', type)
Guido van Rossumca445401995-08-29 19:19:12 +0000217
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000218 def open_unknown_proxy(self, proxy, fullurl, data=None):
219 """Overridable interface to open unknown URL type."""
220 type, url = splittype(fullurl)
221 raise IOError, ('url error', 'invalid proxy for %s' % type, proxy)
222
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000223 # External interface
Sjoerd Mullenderd7b86f02000-08-25 11:23:36 +0000224 def retrieve(self, url, filename=None, reporthook=None, data=None):
Brett Cannon7d618c72003-04-24 02:43:20 +0000225 """retrieve(url) returns (filename, headers) for a local object
Guido van Rossume7b146f2000-02-04 15:28:42 +0000226 or (tempfilename, headers) for a remote object."""
Martin v. Löwis1d994332000-12-03 18:30:10 +0000227 url = unwrap(toBytes(url))
Raymond Hettinger54f02222002-06-01 14:18:47 +0000228 if self.tempcache and url in self.tempcache:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000229 return self.tempcache[url]
230 type, url1 = splittype(url)
Raymond Hettinger10ff7062002-06-02 03:04:52 +0000231 if filename is None and (not type or type == 'file'):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000232 try:
233 fp = self.open_local_file(url1)
234 hdrs = fp.info()
Philip Jenvey0299d0d2009-12-03 02:40:13 +0000235 fp.close()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000236 return url2pathname(splithost(url1)[1]), hdrs
237 except IOError, msg:
238 pass
Fred Drake316a7932000-08-24 01:01:26 +0000239 fp = self.open(url, data)
Benjamin Petersonb364bfe2009-03-22 17:45:11 +0000240 try:
241 headers = fp.info()
242 if filename:
243 tfp = open(filename, 'wb')
244 else:
245 import tempfile
246 garbage, path = splittype(url)
247 garbage, path = splithost(path or "")
248 path, garbage = splitquery(path or "")
249 path, garbage = splitattr(path or "")
250 suffix = os.path.splitext(path)[1]
251 (fd, filename) = tempfile.mkstemp(suffix)
252 self.__tempfiles.append(filename)
253 tfp = os.fdopen(fd, 'wb')
254 try:
255 result = filename, headers
256 if self.tempcache is not None:
257 self.tempcache[url] = result
258 bs = 1024*8
259 size = -1
260 read = 0
261 blocknum = 0
262 if reporthook:
263 if "content-length" in headers:
264 size = int(headers["Content-Length"])
265 reporthook(blocknum, bs, size)
266 while 1:
267 block = fp.read(bs)
268 if block == "":
269 break
270 read += len(block)
271 tfp.write(block)
272 blocknum += 1
273 if reporthook:
274 reporthook(blocknum, bs, size)
275 finally:
276 tfp.close()
277 finally:
278 fp.close()
Georg Brandlb9256022005-08-24 18:46:39 +0000279
280 # raise exception if actual size does not match content-length header
281 if size >= 0 and read < size:
282 raise ContentTooShortError("retrieval incomplete: got only %i out "
283 "of %i bytes" % (read, size), result)
284
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000285 return result
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000286
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000287 # Each method named open_<type> knows how to open that type of URL
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000288
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000289 def open_http(self, url, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000290 """Use HTTP protocol."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000291 import httplib
292 user_passwd = None
Martin v. Löwis3e865952006-01-24 15:51:21 +0000293 proxy_passwd= None
Walter Dörwald65230a22002-06-03 15:58:32 +0000294 if isinstance(url, str):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000295 host, selector = splithost(url)
296 if host:
297 user_passwd, host = splituser(host)
298 host = unquote(host)
299 realhost = host
300 else:
301 host, selector = url
Martin v. Löwis3e865952006-01-24 15:51:21 +0000302 # check whether the proxy contains authorization information
303 proxy_passwd, host = splituser(host)
304 # now we proceed with the url we want to obtain
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000305 urltype, rest = splittype(selector)
306 url = rest
307 user_passwd = None
Guido van Rossumb2493f82000-12-15 15:01:37 +0000308 if urltype.lower() != 'http':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000309 realhost = None
310 else:
311 realhost, rest = splithost(rest)
312 if realhost:
313 user_passwd, realhost = splituser(realhost)
314 if user_passwd:
315 selector = "%s://%s%s" % (urltype, realhost, rest)
Tim Peters55c12d42001-08-09 18:04:14 +0000316 if proxy_bypass(realhost):
317 host = realhost
318
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000319 #print "proxy via http:", host, selector
320 if not host: raise IOError, ('http error', 'no host given')
Tim Peters92037a12006-01-24 22:44:08 +0000321
Martin v. Löwis3e865952006-01-24 15:51:21 +0000322 if proxy_passwd:
323 import base64
Andrew M. Kuchling872dba42006-10-27 17:11:23 +0000324 proxy_auth = base64.b64encode(proxy_passwd).strip()
Martin v. Löwis3e865952006-01-24 15:51:21 +0000325 else:
326 proxy_auth = None
327
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000328 if user_passwd:
329 import base64
Andrew M. Kuchling872dba42006-10-27 17:11:23 +0000330 auth = base64.b64encode(user_passwd).strip()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000331 else:
332 auth = None
333 h = httplib.HTTP(host)
334 if data is not None:
335 h.putrequest('POST', selector)
Georg Brandl0619a322006-07-26 07:40:17 +0000336 h.putheader('Content-Type', 'application/x-www-form-urlencoded')
337 h.putheader('Content-Length', '%d' % len(data))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000338 else:
339 h.putrequest('GET', selector)
Martin v. Löwis3e865952006-01-24 15:51:21 +0000340 if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000341 if auth: h.putheader('Authorization', 'Basic %s' % auth)
342 if realhost: h.putheader('Host', realhost)
Guido van Rossum68468eb2003-02-27 20:14:51 +0000343 for args in self.addheaders: h.putheader(*args)
Kristján Valur Jónsson84040db2009-01-09 20:27:16 +0000344 h.endheaders(data)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000345 errcode, errmsg, headers = h.getreply()
Neal Norwitzce55e212007-03-20 08:14:57 +0000346 fp = h.getfile()
Georg Brandlf66b6032007-03-14 08:27:52 +0000347 if errcode == -1:
Neal Norwitzce55e212007-03-20 08:14:57 +0000348 if fp: fp.close()
Georg Brandlf66b6032007-03-14 08:27:52 +0000349 # something went wrong with the HTTP status line
350 raise IOError, ('http protocol error', 0,
351 'got a bad status line', None)
Sean Reifscheidera1afbf62007-09-19 07:52:56 +0000352 # According to RFC 2616, "2xx" code indicates that the client's
353 # request was successfully received, understood, and accepted.
Kurt B. Kaiser0f7c25d2008-01-02 04:11:28 +0000354 if (200 <= errcode < 300):
Georg Brandl9b0d46d2008-01-20 11:43:03 +0000355 return addinfourl(fp, headers, "http:" + url, errcode)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000356 else:
357 if data is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000358 return self.http_error(url, fp, errcode, errmsg, headers)
Guido van Rossum29aab751999-03-09 19:31:21 +0000359 else:
360 return self.http_error(url, fp, errcode, errmsg, headers, data)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000361
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000362 def http_error(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000363 """Handle http errors.
364 Derived class can override this, or provide specific handlers
365 named http_error_DDD where DDD is the 3-digit error code."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000366 # First check if there's a specific handler for this error
367 name = 'http_error_%d' % errcode
368 if hasattr(self, name):
369 method = getattr(self, name)
370 if data is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000371 result = method(url, fp, errcode, errmsg, headers)
Jeremy Hyltonb30f52a1999-02-25 16:14:58 +0000372 else:
373 result = method(url, fp, errcode, errmsg, headers, data)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000374 if result: return result
Jeremy Hyltonb30f52a1999-02-25 16:14:58 +0000375 return self.http_error_default(url, fp, errcode, errmsg, headers)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000376
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000377 def http_error_default(self, url, fp, errcode, errmsg, headers):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000378 """Default error handler: close the connection and raise IOError."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000379 void = fp.read()
380 fp.close()
381 raise IOError, ('http error', errcode, errmsg, headers)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000382
Bill Janssen426ea0a2007-08-29 22:35:05 +0000383 if _have_ssl:
Andrew M. Kuchling141e9892000-04-23 02:53:11 +0000384 def open_https(self, url, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000385 """Use HTTPS protocol."""
Bill Janssen426ea0a2007-08-29 22:35:05 +0000386
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000387 import httplib
Fred Drake567ca8e2000-08-21 21:42:42 +0000388 user_passwd = None
Martin v. Löwis3e865952006-01-24 15:51:21 +0000389 proxy_passwd = None
Walter Dörwald65230a22002-06-03 15:58:32 +0000390 if isinstance(url, str):
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000391 host, selector = splithost(url)
Fred Drake567ca8e2000-08-21 21:42:42 +0000392 if host:
393 user_passwd, host = splituser(host)
394 host = unquote(host)
395 realhost = host
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000396 else:
397 host, selector = url
Martin v. Löwis3e865952006-01-24 15:51:21 +0000398 # here, we determine, whether the proxy contains authorization information
399 proxy_passwd, host = splituser(host)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000400 urltype, rest = splittype(selector)
Fred Drake567ca8e2000-08-21 21:42:42 +0000401 url = rest
402 user_passwd = None
Guido van Rossumb2493f82000-12-15 15:01:37 +0000403 if urltype.lower() != 'https':
Fred Drake567ca8e2000-08-21 21:42:42 +0000404 realhost = None
405 else:
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000406 realhost, rest = splithost(rest)
Fred Drake567ca8e2000-08-21 21:42:42 +0000407 if realhost:
408 user_passwd, realhost = splituser(realhost)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000409 if user_passwd:
410 selector = "%s://%s%s" % (urltype, realhost, rest)
Andrew M. Kuchling7ad47922000-06-10 01:41:48 +0000411 #print "proxy via https:", host, selector
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000412 if not host: raise IOError, ('https error', 'no host given')
Martin v. Löwis3e865952006-01-24 15:51:21 +0000413 if proxy_passwd:
414 import base64
Andrew M. Kuchling872dba42006-10-27 17:11:23 +0000415 proxy_auth = base64.b64encode(proxy_passwd).strip()
Martin v. Löwis3e865952006-01-24 15:51:21 +0000416 else:
417 proxy_auth = None
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000418 if user_passwd:
419 import base64
Andrew M. Kuchling872dba42006-10-27 17:11:23 +0000420 auth = base64.b64encode(user_passwd).strip()
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000421 else:
422 auth = None
423 h = httplib.HTTPS(host, 0,
424 key_file=self.key_file,
425 cert_file=self.cert_file)
Andrew M. Kuchling141e9892000-04-23 02:53:11 +0000426 if data is not None:
427 h.putrequest('POST', selector)
Georg Brandl0619a322006-07-26 07:40:17 +0000428 h.putheader('Content-Type',
Andrew M. Kuchling141e9892000-04-23 02:53:11 +0000429 'application/x-www-form-urlencoded')
Georg Brandl0619a322006-07-26 07:40:17 +0000430 h.putheader('Content-Length', '%d' % len(data))
Andrew M. Kuchling141e9892000-04-23 02:53:11 +0000431 else:
432 h.putrequest('GET', selector)
Andrew M. Kuchling52278572006-12-19 15:11:41 +0000433 if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
434 if auth: h.putheader('Authorization', 'Basic %s' % auth)
Fred Drake567ca8e2000-08-21 21:42:42 +0000435 if realhost: h.putheader('Host', realhost)
Guido van Rossum68468eb2003-02-27 20:14:51 +0000436 for args in self.addheaders: h.putheader(*args)
Kristján Valur Jónsson84040db2009-01-09 20:27:16 +0000437 h.endheaders(data)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000438 errcode, errmsg, headers = h.getreply()
Neal Norwitzce55e212007-03-20 08:14:57 +0000439 fp = h.getfile()
Georg Brandlf66b6032007-03-14 08:27:52 +0000440 if errcode == -1:
Neal Norwitzce55e212007-03-20 08:14:57 +0000441 if fp: fp.close()
Georg Brandlf66b6032007-03-14 08:27:52 +0000442 # something went wrong with the HTTP status line
443 raise IOError, ('http protocol error', 0,
444 'got a bad status line', None)
Georg Brandl9b915672007-09-24 18:08:24 +0000445 # According to RFC 2616, "2xx" code indicates that the client's
446 # request was successfully received, understood, and accepted.
Kurt B. Kaiser0f7c25d2008-01-02 04:11:28 +0000447 if (200 <= errcode < 300):
Georg Brandl9b0d46d2008-01-20 11:43:03 +0000448 return addinfourl(fp, headers, "https:" + url, errcode)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000449 else:
Fred Drake567ca8e2000-08-21 21:42:42 +0000450 if data is None:
451 return self.http_error(url, fp, errcode, errmsg, headers)
452 else:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000453 return self.http_error(url, fp, errcode, errmsg, headers,
454 data)
Fred Drake567ca8e2000-08-21 21:42:42 +0000455
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000456 def open_file(self, url):
Neal Norwitzc5d0dbd2006-04-09 04:00:49 +0000457 """Use local file or FTP depending on form of URL."""
Martin v. Löwis3e865952006-01-24 15:51:21 +0000458 if not isinstance(url, str):
459 raise IOError, ('file error', 'proxy support for file protocol currently not implemented')
Jack Jansen4ef11032002-09-12 20:14:04 +0000460 if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000461 return self.open_ftp(url)
462 else:
463 return self.open_local_file(url)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000464
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000465 def open_local_file(self, url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000466 """Use local file."""
Georg Brandl5a096e12007-01-22 19:40:21 +0000467 import mimetypes, mimetools, email.utils
Raymond Hettingera6172712004-12-31 19:15:26 +0000468 try:
469 from cStringIO import StringIO
470 except ImportError:
471 from StringIO import StringIO
Guido van Rossumf0713d32001-08-09 17:43:35 +0000472 host, file = splithost(url)
473 localname = url2pathname(file)
Guido van Rossuma2da3052002-04-15 00:25:01 +0000474 try:
475 stats = os.stat(localname)
476 except OSError, e:
477 raise IOError(e.errno, e.strerror, e.filename)
Walter Dörwald92b48b72002-03-22 17:30:38 +0000478 size = stats.st_size
Georg Brandl5a096e12007-01-22 19:40:21 +0000479 modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000480 mtype = mimetypes.guess_type(url)[0]
Raymond Hettingera6172712004-12-31 19:15:26 +0000481 headers = mimetools.Message(StringIO(
Guido van Rossumf0713d32001-08-09 17:43:35 +0000482 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
483 (mtype or 'text/plain', size, modified)))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000484 if not host:
Guido van Rossum336a2011999-06-24 15:27:36 +0000485 urlfile = file
486 if file[:1] == '/':
487 urlfile = 'file://' + file
Guido van Rossumf0713d32001-08-09 17:43:35 +0000488 return addinfourl(open(localname, 'rb'),
Guido van Rossum336a2011999-06-24 15:27:36 +0000489 headers, urlfile)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000490 host, port = splitport(host)
491 if not port \
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000492 and socket.gethostbyname(host) in (localhost(), thishost()):
Guido van Rossum336a2011999-06-24 15:27:36 +0000493 urlfile = file
494 if file[:1] == '/':
495 urlfile = 'file://' + file
Guido van Rossumf0713d32001-08-09 17:43:35 +0000496 return addinfourl(open(localname, 'rb'),
Guido van Rossum336a2011999-06-24 15:27:36 +0000497 headers, urlfile)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000498 raise IOError, ('local file error', 'not on local host')
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000499
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000500 def open_ftp(self, url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000501 """Use FTP protocol."""
Martin v. Löwis3e865952006-01-24 15:51:21 +0000502 if not isinstance(url, str):
503 raise IOError, ('ftp error', 'proxy support for ftp protocol currently not implemented')
Raymond Hettingera6172712004-12-31 19:15:26 +0000504 import mimetypes, mimetools
505 try:
506 from cStringIO import StringIO
507 except ImportError:
508 from StringIO import StringIO
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000509 host, path = splithost(url)
510 if not host: raise IOError, ('ftp error', 'no host given')
511 host, port = splitport(host)
512 user, host = splituser(host)
513 if user: user, passwd = splitpasswd(user)
514 else: passwd = None
515 host = unquote(host)
516 user = unquote(user or '')
517 passwd = unquote(passwd or '')
518 host = socket.gethostbyname(host)
519 if not port:
520 import ftplib
521 port = ftplib.FTP_PORT
522 else:
523 port = int(port)
524 path, attrs = splitattr(path)
525 path = unquote(path)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000526 dirs = path.split('/')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000527 dirs, file = dirs[:-1], dirs[-1]
528 if dirs and not dirs[0]: dirs = dirs[1:]
Guido van Rossum5e006a31999-08-18 17:40:33 +0000529 if dirs and not dirs[0]: dirs[0] = '/'
Guido van Rossumb2493f82000-12-15 15:01:37 +0000530 key = user, host, port, '/'.join(dirs)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000531 # XXX thread unsafe!
532 if len(self.ftpcache) > MAXFTPCACHE:
533 # Prune the cache, rather arbitrarily
534 for k in self.ftpcache.keys():
535 if k != key:
536 v = self.ftpcache[k]
537 del self.ftpcache[k]
538 v.close()
539 try:
Raymond Hettinger54f02222002-06-01 14:18:47 +0000540 if not key in self.ftpcache:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000541 self.ftpcache[key] = \
542 ftpwrapper(user, passwd, host, port, dirs)
543 if not file: type = 'D'
544 else: type = 'I'
545 for attr in attrs:
546 attr, value = splitvalue(attr)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000547 if attr.lower() == 'type' and \
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000548 value in ('a', 'A', 'i', 'I', 'd', 'D'):
Guido van Rossumb2493f82000-12-15 15:01:37 +0000549 type = value.upper()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000550 (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
Guido van Rossum88e0b5b2001-08-23 13:38:15 +0000551 mtype = mimetypes.guess_type("ftp:" + url)[0]
552 headers = ""
553 if mtype:
554 headers += "Content-Type: %s\n" % mtype
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000555 if retrlen is not None and retrlen >= 0:
Guido van Rossum88e0b5b2001-08-23 13:38:15 +0000556 headers += "Content-Length: %d\n" % retrlen
Raymond Hettingera6172712004-12-31 19:15:26 +0000557 headers = mimetools.Message(StringIO(headers))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000558 return addinfourl(fp, headers, "ftp:" + url)
559 except ftperrors(), msg:
560 raise IOError, ('ftp error', msg), sys.exc_info()[2]
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000561
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000562 def open_data(self, url, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000563 """Use "data" URL."""
Martin v. Löwis3e865952006-01-24 15:51:21 +0000564 if not isinstance(url, str):
565 raise IOError, ('data error', 'proxy support for data protocol currently not implemented')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000566 # ignore POSTed data
567 #
568 # syntax of data URLs:
569 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
570 # mediatype := [ type "/" subtype ] *( ";" parameter )
571 # data := *urlchar
572 # parameter := attribute "=" value
Raymond Hettingera6172712004-12-31 19:15:26 +0000573 import mimetools
574 try:
575 from cStringIO import StringIO
576 except ImportError:
577 from StringIO import StringIO
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000578 try:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000579 [type, data] = url.split(',', 1)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000580 except ValueError:
581 raise IOError, ('data error', 'bad data URL')
582 if not type:
583 type = 'text/plain;charset=US-ASCII'
Guido van Rossumb2493f82000-12-15 15:01:37 +0000584 semi = type.rfind(';')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000585 if semi >= 0 and '=' not in type[semi:]:
586 encoding = type[semi+1:]
587 type = type[:semi]
588 else:
589 encoding = ''
590 msg = []
591 msg.append('Date: %s'%time.strftime('%a, %d %b %Y %T GMT',
592 time.gmtime(time.time())))
593 msg.append('Content-type: %s' % type)
594 if encoding == 'base64':
595 import base64
596 data = base64.decodestring(data)
597 else:
598 data = unquote(data)
Georg Brandl0619a322006-07-26 07:40:17 +0000599 msg.append('Content-Length: %d' % len(data))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000600 msg.append('')
601 msg.append(data)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000602 msg = '\n'.join(msg)
Raymond Hettingera6172712004-12-31 19:15:26 +0000603 f = StringIO(msg)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000604 headers = mimetools.Message(f, 0)
Georg Brandl1f663572005-11-26 16:50:44 +0000605 #f.fileno = None # needed for addinfourl
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000606 return addinfourl(f, headers, url)
Guido van Rossum6d4d1c21998-03-12 14:32:55 +0000607
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000608
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000609class FancyURLopener(URLopener):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000610 """Derived class with handlers for errors we can handle (perhaps)."""
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000611
Neal Norwitz60e04cd2002-06-11 13:38:51 +0000612 def __init__(self, *args, **kwargs):
Guido van Rossum68468eb2003-02-27 20:14:51 +0000613 URLopener.__init__(self, *args, **kwargs)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000614 self.auth_cache = {}
Skip Montanaroc3e11d62001-02-15 16:56:36 +0000615 self.tries = 0
616 self.maxtries = 10
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000617
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000618 def http_error_default(self, url, fp, errcode, errmsg, headers):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000619 """Default error handling -- don't raise an exception."""
Georg Brandl9b0d46d2008-01-20 11:43:03 +0000620 return addinfourl(fp, headers, "http:" + url, errcode)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000621
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000622 def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000623 """Error 302 -- relocated (temporarily)."""
Skip Montanaroc3e11d62001-02-15 16:56:36 +0000624 self.tries += 1
625 if self.maxtries and self.tries >= self.maxtries:
626 if hasattr(self, "http_error_500"):
627 meth = self.http_error_500
628 else:
629 meth = self.http_error_default
630 self.tries = 0
631 return meth(url, fp, 500,
632 "Internal Server Error: Redirect Recursion", headers)
633 result = self.redirect_internal(url, fp, errcode, errmsg, headers,
634 data)
635 self.tries = 0
636 return result
637
638 def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
Raymond Hettinger54f02222002-06-01 14:18:47 +0000639 if 'location' in headers:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000640 newurl = headers['location']
Raymond Hettinger54f02222002-06-01 14:18:47 +0000641 elif 'uri' in headers:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000642 newurl = headers['uri']
643 else:
644 return
645 void = fp.read()
646 fp.close()
Guido van Rossum3527f591999-03-29 20:23:41 +0000647 # In case the server sent a relative URL, join with original:
Moshe Zadka5d87d472001-04-09 14:54:21 +0000648 newurl = basejoin(self.type + ":" + url, newurl)
Guido van Rossumfa19f7c2003-05-16 01:46:51 +0000649 return self.open(newurl)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000650
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000651 def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000652 """Error 301 -- also relocated (permanently)."""
653 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
Guido van Rossume6ad8911996-09-10 17:02:56 +0000654
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000655 def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
656 """Error 303 -- also relocated (essentially identical to 302)."""
657 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
658
Guido van Rossumfa19f7c2003-05-16 01:46:51 +0000659 def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
660 """Error 307 -- relocated, but turn POST into error."""
661 if data is None:
662 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
663 else:
664 return self.http_error_default(url, fp, errcode, errmsg, headers)
665
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000666 def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000667 """Error 401 -- authentication required.
Martin v. Löwis3e865952006-01-24 15:51:21 +0000668 This function supports Basic authentication only."""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000669 if not 'www-authenticate' in headers:
Tim Peters85ba6732001-02-28 08:26:44 +0000670 URLopener.http_error_default(self, url, fp,
Fred Drakec680ae82001-10-13 18:37:07 +0000671 errcode, errmsg, headers)
Moshe Zadkae99bd172001-02-27 06:27:04 +0000672 stuff = headers['www-authenticate']
673 import re
674 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
675 if not match:
Tim Peters85ba6732001-02-28 08:26:44 +0000676 URLopener.http_error_default(self, url, fp,
Moshe Zadkae99bd172001-02-27 06:27:04 +0000677 errcode, errmsg, headers)
678 scheme, realm = match.groups()
679 if scheme.lower() != 'basic':
Tim Peters85ba6732001-02-28 08:26:44 +0000680 URLopener.http_error_default(self, url, fp,
Moshe Zadkae99bd172001-02-27 06:27:04 +0000681 errcode, errmsg, headers)
682 name = 'retry_' + self.type + '_basic_auth'
683 if data is None:
684 return getattr(self,name)(url, realm)
685 else:
686 return getattr(self,name)(url, realm, data)
Tim Peters92037a12006-01-24 22:44:08 +0000687
Martin v. Löwis3e865952006-01-24 15:51:21 +0000688 def http_error_407(self, url, fp, errcode, errmsg, headers, data=None):
689 """Error 407 -- proxy authentication required.
690 This function supports Basic authentication only."""
691 if not 'proxy-authenticate' in headers:
692 URLopener.http_error_default(self, url, fp,
693 errcode, errmsg, headers)
694 stuff = headers['proxy-authenticate']
695 import re
696 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
697 if not match:
698 URLopener.http_error_default(self, url, fp,
699 errcode, errmsg, headers)
700 scheme, realm = match.groups()
701 if scheme.lower() != 'basic':
702 URLopener.http_error_default(self, url, fp,
703 errcode, errmsg, headers)
704 name = 'retry_proxy_' + self.type + '_basic_auth'
705 if data is None:
706 return getattr(self,name)(url, realm)
707 else:
708 return getattr(self,name)(url, realm, data)
Tim Peters92037a12006-01-24 22:44:08 +0000709
Martin v. Löwis3e865952006-01-24 15:51:21 +0000710 def retry_proxy_http_basic_auth(self, url, realm, data=None):
711 host, selector = splithost(url)
712 newurl = 'http://' + host + selector
713 proxy = self.proxies['http']
714 urltype, proxyhost = splittype(proxy)
715 proxyhost, proxyselector = splithost(proxyhost)
716 i = proxyhost.find('@') + 1
717 proxyhost = proxyhost[i:]
718 user, passwd = self.get_user_passwd(proxyhost, realm, i)
719 if not (user or passwd): return None
720 proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
721 self.proxies['http'] = 'http://' + proxyhost + proxyselector
722 if data is None:
723 return self.open(newurl)
724 else:
725 return self.open(newurl, data)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000726
Martin v. Löwis3e865952006-01-24 15:51:21 +0000727 def retry_proxy_https_basic_auth(self, url, realm, data=None):
728 host, selector = splithost(url)
729 newurl = 'https://' + host + selector
730 proxy = self.proxies['https']
731 urltype, proxyhost = splittype(proxy)
732 proxyhost, proxyselector = splithost(proxyhost)
733 i = proxyhost.find('@') + 1
734 proxyhost = proxyhost[i:]
735 user, passwd = self.get_user_passwd(proxyhost, realm, i)
736 if not (user or passwd): return None
737 proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
738 self.proxies['https'] = 'https://' + proxyhost + proxyselector
739 if data is None:
740 return self.open(newurl)
741 else:
742 return self.open(newurl, data)
Tim Peters92037a12006-01-24 22:44:08 +0000743
Guido van Rossum3c8baed2000-02-01 23:36:55 +0000744 def retry_http_basic_auth(self, url, realm, data=None):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000745 host, selector = splithost(url)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000746 i = host.find('@') + 1
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000747 host = host[i:]
748 user, passwd = self.get_user_passwd(host, realm, i)
749 if not (user or passwd): return None
Guido van Rossumafc4f042001-01-15 18:31:13 +0000750 host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000751 newurl = 'http://' + host + selector
Guido van Rossum3c8baed2000-02-01 23:36:55 +0000752 if data is None:
753 return self.open(newurl)
754 else:
755 return self.open(newurl, data)
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000756
Guido van Rossum3c8baed2000-02-01 23:36:55 +0000757 def retry_https_basic_auth(self, url, realm, data=None):
Tim Peterse1190062001-01-15 03:34:38 +0000758 host, selector = splithost(url)
759 i = host.find('@') + 1
760 host = host[i:]
761 user, passwd = self.get_user_passwd(host, realm, i)
762 if not (user or passwd): return None
Guido van Rossumafc4f042001-01-15 18:31:13 +0000763 host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
Martin v. Löwis3e865952006-01-24 15:51:21 +0000764 newurl = 'https://' + host + selector
765 if data is None:
766 return self.open(newurl)
767 else:
768 return self.open(newurl, data)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000769
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000770 def get_user_passwd(self, host, realm, clear_cache = 0):
Guido van Rossumb2493f82000-12-15 15:01:37 +0000771 key = realm + '@' + host.lower()
Raymond Hettinger54f02222002-06-01 14:18:47 +0000772 if key in self.auth_cache:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000773 if clear_cache:
774 del self.auth_cache[key]
775 else:
776 return self.auth_cache[key]
777 user, passwd = self.prompt_user_passwd(host, realm)
778 if user or passwd: self.auth_cache[key] = (user, passwd)
779 return user, passwd
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000780
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000781 def prompt_user_passwd(self, host, realm):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000782 """Override this in a GUI environment!"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000783 import getpass
784 try:
785 user = raw_input("Enter username for %s at %s: " % (realm,
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000786 host))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000787 passwd = getpass.getpass("Enter password for %s in %s at %s: " %
788 (user, realm, host))
789 return user, passwd
790 except KeyboardInterrupt:
791 print
792 return None, None
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000793
794
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000795# Utility functions
796
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000797_localhost = None
798def localhost():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000799 """Return the IP address of the magic hostname 'localhost'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000800 global _localhost
Raymond Hettinger10ff7062002-06-02 03:04:52 +0000801 if _localhost is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000802 _localhost = socket.gethostbyname('localhost')
803 return _localhost
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000804
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000805_thishost = None
806def thishost():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000807 """Return the IP address of the current host."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000808 global _thishost
Raymond Hettinger10ff7062002-06-02 03:04:52 +0000809 if _thishost is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000810 _thishost = socket.gethostbyname(socket.gethostname())
811 return _thishost
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000812
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000813_ftperrors = None
814def ftperrors():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000815 """Return the set of errors raised by the FTP class."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000816 global _ftperrors
Raymond Hettinger10ff7062002-06-02 03:04:52 +0000817 if _ftperrors is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000818 import ftplib
819 _ftperrors = ftplib.all_errors
820 return _ftperrors
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000821
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000822_noheaders = None
823def noheaders():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000824 """Return an empty mimetools.Message object."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000825 global _noheaders
Raymond Hettinger10ff7062002-06-02 03:04:52 +0000826 if _noheaders is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000827 import mimetools
Raymond Hettingera6172712004-12-31 19:15:26 +0000828 try:
829 from cStringIO import StringIO
830 except ImportError:
831 from StringIO import StringIO
832 _noheaders = mimetools.Message(StringIO(), 0)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000833 _noheaders.fp.close() # Recycle file descriptor
834 return _noheaders
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000835
836
837# Utility classes
838
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000839class ftpwrapper:
Guido van Rossume7b146f2000-02-04 15:28:42 +0000840 """Class used by open_ftp() for cache of open FTP connections."""
841
Facundo Batista4f1b1ed2008-05-29 16:39:26 +0000842 def __init__(self, user, passwd, host, port, dirs,
843 timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000844 self.user = user
845 self.passwd = passwd
846 self.host = host
847 self.port = port
848 self.dirs = dirs
Facundo Batista711a54e2007-05-24 17:50:54 +0000849 self.timeout = timeout
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000850 self.init()
Guido van Rossume7b146f2000-02-04 15:28:42 +0000851
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000852 def init(self):
853 import ftplib
854 self.busy = 0
855 self.ftp = ftplib.FTP()
Facundo Batista711a54e2007-05-24 17:50:54 +0000856 self.ftp.connect(self.host, self.port, self.timeout)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000857 self.ftp.login(self.user, self.passwd)
858 for dir in self.dirs:
859 self.ftp.cwd(dir)
Guido van Rossume7b146f2000-02-04 15:28:42 +0000860
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000861 def retrfile(self, file, type):
862 import ftplib
863 self.endtransfer()
864 if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
865 else: cmd = 'TYPE ' + type; isdir = 0
866 try:
867 self.ftp.voidcmd(cmd)
868 except ftplib.all_errors:
869 self.init()
870 self.ftp.voidcmd(cmd)
871 conn = None
872 if file and not isdir:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000873 # Try to retrieve as a file
874 try:
875 cmd = 'RETR ' + file
876 conn = self.ftp.ntransfercmd(cmd)
877 except ftplib.error_perm, reason:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000878 if str(reason)[:3] != '550':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000879 raise IOError, ('ftp error', reason), sys.exc_info()[2]
880 if not conn:
881 # Set transfer mode to ASCII!
882 self.ftp.voidcmd('TYPE A')
Georg Brandld5e6cf22008-01-20 12:18:17 +0000883 # Try a directory listing. Verify that directory exists.
884 if file:
885 pwd = self.ftp.pwd()
886 try:
887 try:
888 self.ftp.cwd(file)
889 except ftplib.error_perm, reason:
890 raise IOError, ('ftp error', reason), sys.exc_info()[2]
891 finally:
892 self.ftp.cwd(pwd)
893 cmd = 'LIST ' + file
894 else:
895 cmd = 'LIST'
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000896 conn = self.ftp.ntransfercmd(cmd)
897 self.busy = 1
898 # Pass back both a suitably decorated object and a retrieval length
899 return (addclosehook(conn[0].makefile('rb'),
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000900 self.endtransfer), conn[1])
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000901 def endtransfer(self):
902 if not self.busy:
903 return
904 self.busy = 0
905 try:
906 self.ftp.voidresp()
907 except ftperrors():
908 pass
Guido van Rossume7b146f2000-02-04 15:28:42 +0000909
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000910 def close(self):
911 self.endtransfer()
912 try:
913 self.ftp.close()
914 except ftperrors():
915 pass
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000916
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000917class addbase:
Guido van Rossume7b146f2000-02-04 15:28:42 +0000918 """Base class for addinfo and addclosehook."""
919
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000920 def __init__(self, fp):
921 self.fp = fp
922 self.read = self.fp.read
923 self.readline = self.fp.readline
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000924 if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines
Georg Brandl1f663572005-11-26 16:50:44 +0000925 if hasattr(self.fp, "fileno"):
926 self.fileno = self.fp.fileno
927 else:
928 self.fileno = lambda: None
Raymond Hettinger42182eb2003-03-09 05:33:33 +0000929 if hasattr(self.fp, "__iter__"):
930 self.__iter__ = self.fp.__iter__
931 if hasattr(self.fp, "next"):
932 self.next = self.fp.next
Guido van Rossume7b146f2000-02-04 15:28:42 +0000933
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000934 def __repr__(self):
Walter Dörwald70a6b492004-02-12 17:35:32 +0000935 return '<%s at %r whose fp = %r>' % (self.__class__.__name__,
936 id(self), self.fp)
Guido van Rossume7b146f2000-02-04 15:28:42 +0000937
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000938 def close(self):
939 self.read = None
940 self.readline = None
941 self.readlines = None
942 self.fileno = None
943 if self.fp: self.fp.close()
944 self.fp = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000945
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000946class addclosehook(addbase):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000947 """Class to add a close hook to an open file."""
948
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000949 def __init__(self, fp, closehook, *hookargs):
950 addbase.__init__(self, fp)
951 self.closehook = closehook
952 self.hookargs = hookargs
Guido van Rossume7b146f2000-02-04 15:28:42 +0000953
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000954 def close(self):
Guido van Rossumc580dae2000-05-24 13:21:46 +0000955 addbase.close(self)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000956 if self.closehook:
Guido van Rossum68468eb2003-02-27 20:14:51 +0000957 self.closehook(*self.hookargs)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000958 self.closehook = None
959 self.hookargs = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000960
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000961class addinfo(addbase):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000962 """class to add an info() method to an open file."""
963
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000964 def __init__(self, fp, headers):
965 addbase.__init__(self, fp)
966 self.headers = headers
Guido van Rossume7b146f2000-02-04 15:28:42 +0000967
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000968 def info(self):
969 return self.headers
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000970
Guido van Rossume6ad8911996-09-10 17:02:56 +0000971class addinfourl(addbase):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000972 """class to add info() and geturl() methods to an open file."""
973
Georg Brandl9b0d46d2008-01-20 11:43:03 +0000974 def __init__(self, fp, headers, url, code=None):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000975 addbase.__init__(self, fp)
976 self.headers = headers
977 self.url = url
Georg Brandl9b0d46d2008-01-20 11:43:03 +0000978 self.code = code
Guido van Rossume7b146f2000-02-04 15:28:42 +0000979
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000980 def info(self):
981 return self.headers
Guido van Rossume7b146f2000-02-04 15:28:42 +0000982
Georg Brandl9b0d46d2008-01-20 11:43:03 +0000983 def getcode(self):
984 return self.code
985
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000986 def geturl(self):
987 return self.url
Guido van Rossume6ad8911996-09-10 17:02:56 +0000988
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000989
Guido van Rossum7c395db1994-07-04 22:14:49 +0000990# Utilities to parse URLs (most of these return None for missing parts):
Sjoerd Mullendere0371b81995-11-10 10:36:07 +0000991# unwrap('<URL:type://host/path>') --> 'type://host/path'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000992# splittype('type:opaquestring') --> 'type', 'opaquestring'
993# splithost('//host[:port]/path') --> 'host[:port]', '/path'
Guido van Rossum7c395db1994-07-04 22:14:49 +0000994# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'
995# splitpasswd('user:passwd') -> 'user', 'passwd'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000996# splitport('host:port') --> 'host', 'port'
997# splitquery('/path?query') --> '/path', 'query'
998# splittag('/path#tag') --> '/path', 'tag'
Guido van Rossum7c395db1994-07-04 22:14:49 +0000999# splitattr('/path;attr1=value1;attr2=value2;...') ->
1000# '/path', ['attr1=value1', 'attr2=value2', ...]
1001# splitvalue('attr=value') --> 'attr', 'value'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001002# unquote('abc%20def') -> 'abc def'
1003# quote('abc def') -> 'abc%20def')
1004
Walter Dörwald65230a22002-06-03 15:58:32 +00001005try:
1006 unicode
1007except NameError:
Guido van Rossum4b46c0a2002-05-24 17:58:05 +00001008 def _is_unicode(x):
1009 return 0
Walter Dörwald65230a22002-06-03 15:58:32 +00001010else:
1011 def _is_unicode(x):
1012 return isinstance(x, unicode)
Guido van Rossum4b46c0a2002-05-24 17:58:05 +00001013
Martin v. Löwis1d994332000-12-03 18:30:10 +00001014def toBytes(url):
1015 """toBytes(u"URL") --> 'URL'."""
1016 # Most URL schemes require ASCII. If that changes, the conversion
1017 # can be relaxed
Guido van Rossum4b46c0a2002-05-24 17:58:05 +00001018 if _is_unicode(url):
Martin v. Löwis1d994332000-12-03 18:30:10 +00001019 try:
1020 url = url.encode("ASCII")
1021 except UnicodeError:
Guido van Rossumb2493f82000-12-15 15:01:37 +00001022 raise UnicodeError("URL " + repr(url) +
1023 " contains non-ASCII characters")
Martin v. Löwis1d994332000-12-03 18:30:10 +00001024 return url
1025
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001026def unwrap(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001027 """unwrap('<URL:type://host/path>') --> 'type://host/path'."""
Guido van Rossumb2493f82000-12-15 15:01:37 +00001028 url = url.strip()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001029 if url[:1] == '<' and url[-1:] == '>':
Guido van Rossumb2493f82000-12-15 15:01:37 +00001030 url = url[1:-1].strip()
1031 if url[:4] == 'URL:': url = url[4:].strip()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001032 return url
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001033
Guido van Rossum332e1441997-09-29 23:23:46 +00001034_typeprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001035def splittype(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001036 """splittype('type:opaquestring') --> 'type', 'opaquestring'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001037 global _typeprog
1038 if _typeprog is None:
1039 import re
1040 _typeprog = re.compile('^([^/:]+):')
Guido van Rossum332e1441997-09-29 23:23:46 +00001041
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001042 match = _typeprog.match(url)
1043 if match:
1044 scheme = match.group(1)
Fred Drake9e94afd2000-07-01 07:03:30 +00001045 return scheme.lower(), url[len(scheme) + 1:]
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001046 return None, url
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001047
Guido van Rossum332e1441997-09-29 23:23:46 +00001048_hostprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001049def splithost(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001050 """splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001051 global _hostprog
1052 if _hostprog is None:
1053 import re
Georg Brandl1c168d82006-03-26 20:59:38 +00001054 _hostprog = re.compile('^//([^/?]*)(.*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +00001055
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001056 match = _hostprog.match(url)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001057 if match: return match.group(1, 2)
1058 return None, url
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001059
Guido van Rossum332e1441997-09-29 23:23:46 +00001060_userprog = None
Guido van Rossum7c395db1994-07-04 22:14:49 +00001061def splituser(host):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001062 """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001063 global _userprog
1064 if _userprog is None:
1065 import re
Raymond Hettingerf2e45dd2002-08-18 20:08:56 +00001066 _userprog = re.compile('^(.*)@(.*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +00001067
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001068 match = _userprog.match(host)
Fred Drake567ca8e2000-08-21 21:42:42 +00001069 if match: return map(unquote, match.group(1, 2))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001070 return None, host
Guido van Rossum7c395db1994-07-04 22:14:49 +00001071
Guido van Rossum332e1441997-09-29 23:23:46 +00001072_passwdprog = None
Guido van Rossum7c395db1994-07-04 22:14:49 +00001073def splitpasswd(user):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001074 """splitpasswd('user:passwd') -> 'user', 'passwd'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001075 global _passwdprog
1076 if _passwdprog is None:
1077 import re
Senthil Kumaran5e95e762009-03-30 21:51:50 +00001078 _passwdprog = re.compile('^([^:]*):(.*)$',re.S)
Guido van Rossum332e1441997-09-29 23:23:46 +00001079
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001080 match = _passwdprog.match(user)
1081 if match: return match.group(1, 2)
1082 return user, None
Guido van Rossum7c395db1994-07-04 22:14:49 +00001083
Guido van Rossume7b146f2000-02-04 15:28:42 +00001084# splittag('/path#tag') --> '/path', 'tag'
Guido van Rossum332e1441997-09-29 23:23:46 +00001085_portprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001086def splitport(host):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001087 """splitport('host:port') --> 'host', 'port'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001088 global _portprog
1089 if _portprog is None:
1090 import re
1091 _portprog = re.compile('^(.*):([0-9]+)$')
Guido van Rossum332e1441997-09-29 23:23:46 +00001092
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001093 match = _portprog.match(host)
1094 if match: return match.group(1, 2)
1095 return host, None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001096
Guido van Rossum332e1441997-09-29 23:23:46 +00001097_nportprog = None
Guido van Rossum53725a21996-06-13 19:12:35 +00001098def splitnport(host, defport=-1):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001099 """Split host and port, returning numeric port.
1100 Return given default port if no ':' found; defaults to -1.
1101 Return numerical port if a valid number are found after ':'.
1102 Return None if ':' but not a valid number."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001103 global _nportprog
1104 if _nportprog is None:
1105 import re
1106 _nportprog = re.compile('^(.*):(.*)$')
Guido van Rossum7e7ca0b1998-03-26 21:01:39 +00001107
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001108 match = _nportprog.match(host)
1109 if match:
1110 host, port = match.group(1, 2)
1111 try:
Guido van Rossumb2493f82000-12-15 15:01:37 +00001112 if not port: raise ValueError, "no digits"
1113 nport = int(port)
1114 except ValueError:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001115 nport = None
1116 return host, nport
1117 return host, defport
Guido van Rossum53725a21996-06-13 19:12:35 +00001118
Guido van Rossum332e1441997-09-29 23:23:46 +00001119_queryprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001120def splitquery(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001121 """splitquery('/path?query') --> '/path', 'query'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001122 global _queryprog
1123 if _queryprog is None:
1124 import re
1125 _queryprog = re.compile('^(.*)\?([^?]*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +00001126
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001127 match = _queryprog.match(url)
1128 if match: return match.group(1, 2)
1129 return url, None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001130
Guido van Rossum332e1441997-09-29 23:23:46 +00001131_tagprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001132def splittag(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001133 """splittag('/path#tag') --> '/path', 'tag'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001134 global _tagprog
1135 if _tagprog is None:
1136 import re
1137 _tagprog = re.compile('^(.*)#([^#]*)$')
Guido van Rossum7e7ca0b1998-03-26 21:01:39 +00001138
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001139 match = _tagprog.match(url)
1140 if match: return match.group(1, 2)
1141 return url, None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001142
Guido van Rossum7c395db1994-07-04 22:14:49 +00001143def splitattr(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001144 """splitattr('/path;attr1=value1;attr2=value2;...') ->
1145 '/path', ['attr1=value1', 'attr2=value2', ...]."""
Guido van Rossumb2493f82000-12-15 15:01:37 +00001146 words = url.split(';')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001147 return words[0], words[1:]
Guido van Rossum7c395db1994-07-04 22:14:49 +00001148
Guido van Rossum332e1441997-09-29 23:23:46 +00001149_valueprog = None
Guido van Rossum7c395db1994-07-04 22:14:49 +00001150def splitvalue(attr):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001151 """splitvalue('attr=value') --> 'attr', 'value'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001152 global _valueprog
1153 if _valueprog is None:
1154 import re
1155 _valueprog = re.compile('^([^=]*)=(.*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +00001156
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001157 match = _valueprog.match(attr)
1158 if match: return match.group(1, 2)
1159 return attr, None
Guido van Rossum7c395db1994-07-04 22:14:49 +00001160
Raymond Hettinger803ce802005-09-10 06:49:04 +00001161_hextochr = dict(('%02x' % i, chr(i)) for i in range(256))
1162_hextochr.update(('%02X' % i, chr(i)) for i in range(256))
1163
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001164def unquote(s):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001165 """unquote('abc%20def') -> 'abc def'."""
Raymond Hettinger803ce802005-09-10 06:49:04 +00001166 res = s.split('%')
1167 for i in xrange(1, len(res)):
1168 item = res[i]
1169 try:
1170 res[i] = _hextochr[item[:2]] + item[2:]
1171 except KeyError:
1172 res[i] = '%' + item
Raymond Hettinger4b0f20d2005-10-15 16:41:53 +00001173 except UnicodeDecodeError:
1174 res[i] = unichr(int(item[:2], 16)) + item[2:]
Guido van Rossumb2493f82000-12-15 15:01:37 +00001175 return "".join(res)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001176
Guido van Rossum0564e121996-12-13 14:47:36 +00001177def unquote_plus(s):
Skip Montanaro79f1c172000-08-22 03:00:52 +00001178 """unquote('%7e/abc+def') -> '~/abc def'"""
Brett Cannonaaeffaf2004-03-23 23:50:17 +00001179 s = s.replace('+', ' ')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001180 return unquote(s)
Guido van Rossum0564e121996-12-13 14:47:36 +00001181
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001182always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
Jeremy Hylton6102e292000-08-31 15:48:10 +00001183 'abcdefghijklmnopqrstuvwxyz'
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001184 '0123456789' '_.-')
Raymond Hettinger199d2f72005-09-09 22:27:13 +00001185_safemaps = {}
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001186
Guido van Rossum7c395db1994-07-04 22:14:49 +00001187def quote(s, safe = '/'):
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001188 """quote('abc def') -> 'abc%20def'
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001189
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001190 Each part of a URL, e.g. the path info, the query, etc., has a
1191 different set of reserved characters that must be quoted.
1192
1193 RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
1194 the following reserved characters.
1195
1196 reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
1197 "$" | ","
1198
1199 Each of these characters is reserved in some component of a URL,
1200 but not necessarily in all of them.
1201
1202 By default, the quote function is intended for quoting the path
1203 section of a URL. Thus, it will not encode '/'. This character
1204 is reserved, but in typical usage the quote function is being
1205 called on a path where the existing slash characters are used as
1206 reserved characters.
1207 """
Raymond Hettinger199d2f72005-09-09 22:27:13 +00001208 cachekey = (safe, always_safe)
1209 try:
1210 safe_map = _safemaps[cachekey]
1211 except KeyError:
1212 safe += always_safe
1213 safe_map = {}
1214 for i in range(256):
1215 c = chr(i)
1216 safe_map[c] = (c in safe) and c or ('%%%02X' % i)
1217 _safemaps[cachekey] = safe_map
1218 res = map(safe_map.__getitem__, s)
Guido van Rossumb2493f82000-12-15 15:01:37 +00001219 return ''.join(res)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001220
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001221def quote_plus(s, safe = ''):
1222 """Quote the query fragment of a URL; replacing ' ' with '+'"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001223 if ' ' in s:
Raymond Hettingercf6b6322005-09-10 18:17:54 +00001224 s = quote(s, safe + ' ')
1225 return s.replace(' ', '+')
1226 return quote(s, safe)
Guido van Rossum0564e121996-12-13 14:47:36 +00001227
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001228def urlencode(query,doseq=0):
1229 """Encode a sequence of two-element tuples or dictionary into a URL query string.
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001230
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001231 If any values in the query arg are sequences and doseq is true, each
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001232 sequence element is converted to a separate parameter.
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001233
1234 If the query arg is a sequence of two-element tuples, the order of the
1235 parameters in the output will match the order of parameters in the
1236 input.
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001237 """
Tim Peters658cba62001-02-09 20:06:00 +00001238
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001239 if hasattr(query,"items"):
1240 # mapping objects
1241 query = query.items()
1242 else:
1243 # it's a bother at times that strings and string-like objects are
1244 # sequences...
1245 try:
1246 # non-sequence items should not work with len()
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001247 # non-empty strings will fail this
Walter Dörwald65230a22002-06-03 15:58:32 +00001248 if len(query) and not isinstance(query[0], tuple):
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001249 raise TypeError
1250 # zero-length sequences of all types will get here and succeed,
1251 # but that's a minor nit - since the original implementation
1252 # allowed empty dicts that type of behavior probably should be
1253 # preserved for consistency
1254 except TypeError:
1255 ty,va,tb = sys.exc_info()
1256 raise TypeError, "not a valid non-string sequence or mapping object", tb
1257
Guido van Rossume7b146f2000-02-04 15:28:42 +00001258 l = []
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001259 if not doseq:
1260 # preserve old behavior
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001261 for k, v in query:
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001262 k = quote_plus(str(k))
1263 v = quote_plus(str(v))
1264 l.append(k + '=' + v)
1265 else:
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001266 for k, v in query:
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001267 k = quote_plus(str(k))
Walter Dörwald65230a22002-06-03 15:58:32 +00001268 if isinstance(v, str):
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001269 v = quote_plus(v)
1270 l.append(k + '=' + v)
Guido van Rossum4b46c0a2002-05-24 17:58:05 +00001271 elif _is_unicode(v):
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001272 # is there a reasonable way to convert to ASCII?
1273 # encode generates a string, but "replace" or "ignore"
1274 # lose information and "strict" can raise UnicodeError
1275 v = quote_plus(v.encode("ASCII","replace"))
1276 l.append(k + '=' + v)
1277 else:
1278 try:
1279 # is this a sufficient test for sequence-ness?
1280 x = len(v)
1281 except TypeError:
1282 # not a sequence
1283 v = quote_plus(str(v))
1284 l.append(k + '=' + v)
1285 else:
1286 # loop over the sequence
1287 for elt in v:
1288 l.append(k + '=' + quote_plus(str(elt)))
Guido van Rossumb2493f82000-12-15 15:01:37 +00001289 return '&'.join(l)
Guido van Rossum810a3391998-07-22 21:33:23 +00001290
Guido van Rossum442e7201996-03-20 15:33:11 +00001291# Proxy handling
Mark Hammond4f570b92000-07-26 07:04:38 +00001292def getproxies_environment():
1293 """Return a dictionary of scheme -> proxy server URL mappings.
1294
1295 Scan the environment for variables named <scheme>_proxy;
1296 this seems to be the standard convention. If you need a
1297 different way, you can pass a proxies dictionary to the
1298 [Fancy]URLopener constructor.
1299
1300 """
1301 proxies = {}
1302 for name, value in os.environ.items():
Guido van Rossumb2493f82000-12-15 15:01:37 +00001303 name = name.lower()
Mark Hammond4f570b92000-07-26 07:04:38 +00001304 if value and name[-6:] == '_proxy':
1305 proxies[name[:-6]] = value
1306 return proxies
1307
Georg Brandl22350112008-01-20 12:05:43 +00001308def proxy_bypass_environment(host):
1309 """Test if proxies should not be used for a particular host.
1310
1311 Checks the environment for a variable named no_proxy, which should
1312 be a list of DNS suffixes separated by commas, or '*' for all hosts.
1313 """
1314 no_proxy = os.environ.get('no_proxy', '') or os.environ.get('NO_PROXY', '')
1315 # '*' is special case for always bypass
1316 if no_proxy == '*':
1317 return 1
1318 # strip port off host
1319 hostonly, port = splitport(host)
1320 # check if the host ends with any of the DNS suffixes
1321 for name in no_proxy.split(','):
1322 if name and (hostonly.endswith(name) or host.endswith(name)):
1323 return 1
1324 # otherwise, don't bypass
1325 return 0
1326
1327
Jack Jansen11d9b062004-07-16 11:45:00 +00001328if sys.platform == 'darwin':
Ronald Oussoren51f06332009-09-20 10:31:22 +00001329 from _scproxy import _get_proxy_settings, _get_proxies
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001330
1331 def proxy_bypass_macosx_sysconf(host):
1332 """
1333 Return True iff this host shouldn't be accessed using a proxy
1334
1335 This function uses the MacOSX framework SystemConfiguration
1336 to fetch the proxy information.
1337 """
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001338 import re
1339 import socket
1340 from fnmatch import fnmatch
1341
Ronald Oussoren31802d02009-10-18 07:07:00 +00001342 hostonly, port = splitport(host)
1343
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001344 def ip2num(ipAddr):
1345 parts = ipAddr.split('.')
1346 parts = map(int, parts)
1347 if len(parts) != 4:
1348 parts = (parts + [0, 0, 0, 0])[:4]
1349 return (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3]
1350
Ronald Oussoren51f06332009-09-20 10:31:22 +00001351 proxy_settings = _get_proxy_settings()
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001352
Ronald Oussoren51f06332009-09-20 10:31:22 +00001353 # Check for simple host names:
1354 if '.' not in host:
1355 if proxy_settings['exclude_simple']:
1356 return True
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001357
Ronald Oussoren31802d02009-10-18 07:07:00 +00001358 hostIP = None
1359
Ronald Oussoren809073b2009-09-20 10:54:07 +00001360 for value in proxy_settings.get('exceptions', ()):
Ronald Oussoren51f06332009-09-20 10:31:22 +00001361 # Items in the list are strings like these: *.local, 169.254/16
Ronald Oussoren51f06332009-09-20 10:31:22 +00001362 if not value: continue
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001363
Ronald Oussoren51f06332009-09-20 10:31:22 +00001364 m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value)
1365 if m is not None:
1366 if hostIP is None:
Ronald Oussoren31802d02009-10-18 07:07:00 +00001367 try:
1368 hostIP = socket.gethostbyname(hostonly)
1369 hostIP = ip2num(hostIP)
1370 except socket.error:
1371 continue
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001372
Ronald Oussoren51f06332009-09-20 10:31:22 +00001373 base = ip2num(m.group(1))
1374 mask = int(m.group(2)[1:])
1375 mask = 32 - mask
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001376
Ronald Oussoren51f06332009-09-20 10:31:22 +00001377 if (hostIP >> mask) == (base >> mask):
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001378 return True
1379
Ronald Oussoren51f06332009-09-20 10:31:22 +00001380 elif fnmatch(host, value):
1381 return True
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001382
Ronald Oussoren51f06332009-09-20 10:31:22 +00001383 return False
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001384
1385
1386 def getproxies_macosx_sysconf():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001387 """Return a dictionary of scheme -> proxy server URL mappings.
Guido van Rossum442e7201996-03-20 15:33:11 +00001388
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001389 This function uses the MacOSX framework SystemConfiguration
1390 to fetch the proxy information.
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001391 """
Ronald Oussoren51f06332009-09-20 10:31:22 +00001392 return _get_proxies()
Mark Hammond4f570b92000-07-26 07:04:38 +00001393
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001394
1395
Georg Brandl22350112008-01-20 12:05:43 +00001396 def proxy_bypass(host):
1397 if getproxies_environment():
1398 return proxy_bypass_environment(host)
1399 else:
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001400 return proxy_bypass_macosx_sysconf(host)
Tim Peters55c12d42001-08-09 18:04:14 +00001401
Jack Jansen11d9b062004-07-16 11:45:00 +00001402 def getproxies():
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001403 return getproxies_environment() or getproxies_macosx_sysconf()
Tim Peters182b5ac2004-07-18 06:16:08 +00001404
Mark Hammond4f570b92000-07-26 07:04:38 +00001405elif os.name == 'nt':
1406 def getproxies_registry():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001407 """Return a dictionary of scheme -> proxy server URL mappings.
Mark Hammond4f570b92000-07-26 07:04:38 +00001408
1409 Win32 uses the registry to store proxies.
1410
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001411 """
1412 proxies = {}
Mark Hammond4f570b92000-07-26 07:04:38 +00001413 try:
1414 import _winreg
1415 except ImportError:
1416 # Std module, so should be around - but you never know!
1417 return proxies
1418 try:
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001419 internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1420 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
Mark Hammond4f570b92000-07-26 07:04:38 +00001421 proxyEnable = _winreg.QueryValueEx(internetSettings,
1422 'ProxyEnable')[0]
1423 if proxyEnable:
1424 # Returned as Unicode but problems if not converted to ASCII
1425 proxyServer = str(_winreg.QueryValueEx(internetSettings,
1426 'ProxyServer')[0])
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001427 if '=' in proxyServer:
1428 # Per-protocol settings
Mark Hammond4f570b92000-07-26 07:04:38 +00001429 for p in proxyServer.split(';'):
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001430 protocol, address = p.split('=', 1)
Guido van Rossumb955d6c2002-03-31 23:38:48 +00001431 # See if address has a type:// prefix
Guido van Rossum64e5aa92002-04-02 14:38:16 +00001432 import re
1433 if not re.match('^([^/:]+)://', address):
Guido van Rossumb955d6c2002-03-31 23:38:48 +00001434 address = '%s://%s' % (protocol, address)
1435 proxies[protocol] = address
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001436 else:
1437 # Use one setting for all protocols
1438 if proxyServer[:5] == 'http:':
1439 proxies['http'] = proxyServer
1440 else:
1441 proxies['http'] = 'http://%s' % proxyServer
1442 proxies['ftp'] = 'ftp://%s' % proxyServer
Mark Hammond4f570b92000-07-26 07:04:38 +00001443 internetSettings.Close()
1444 except (WindowsError, ValueError, TypeError):
1445 # Either registry key not found etc, or the value in an
1446 # unexpected format.
1447 # proxies already set up to be empty so nothing to do
1448 pass
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001449 return proxies
Guido van Rossum442e7201996-03-20 15:33:11 +00001450
Mark Hammond4f570b92000-07-26 07:04:38 +00001451 def getproxies():
1452 """Return a dictionary of scheme -> proxy server URL mappings.
1453
1454 Returns settings gathered from the environment, if specified,
1455 or the registry.
1456
1457 """
1458 return getproxies_environment() or getproxies_registry()
Tim Peters55c12d42001-08-09 18:04:14 +00001459
Georg Brandl22350112008-01-20 12:05:43 +00001460 def proxy_bypass_registry(host):
Tim Peters55c12d42001-08-09 18:04:14 +00001461 try:
1462 import _winreg
1463 import re
Tim Peters55c12d42001-08-09 18:04:14 +00001464 except ImportError:
1465 # Std modules, so should be around - but you never know!
1466 return 0
1467 try:
1468 internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1469 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1470 proxyEnable = _winreg.QueryValueEx(internetSettings,
1471 'ProxyEnable')[0]
1472 proxyOverride = str(_winreg.QueryValueEx(internetSettings,
1473 'ProxyOverride')[0])
1474 # ^^^^ Returned as Unicode but problems if not converted to ASCII
1475 except WindowsError:
1476 return 0
1477 if not proxyEnable or not proxyOverride:
1478 return 0
1479 # try to make a host list from name and IP address.
Georg Brandl1f636702006-02-18 23:10:23 +00001480 rawHost, port = splitport(host)
1481 host = [rawHost]
Tim Peters55c12d42001-08-09 18:04:14 +00001482 try:
Georg Brandl1f636702006-02-18 23:10:23 +00001483 addr = socket.gethostbyname(rawHost)
1484 if addr != rawHost:
Tim Peters55c12d42001-08-09 18:04:14 +00001485 host.append(addr)
1486 except socket.error:
1487 pass
Georg Brandl1f636702006-02-18 23:10:23 +00001488 try:
1489 fqdn = socket.getfqdn(rawHost)
1490 if fqdn != rawHost:
1491 host.append(fqdn)
1492 except socket.error:
1493 pass
Tim Peters55c12d42001-08-09 18:04:14 +00001494 # make a check value list from the registry entry: replace the
1495 # '<local>' string by the localhost entry and the corresponding
1496 # canonical entry.
1497 proxyOverride = proxyOverride.split(';')
Tim Peters55c12d42001-08-09 18:04:14 +00001498 # now check if we match one of the registry values.
1499 for test in proxyOverride:
Senthil Kumaran4af40d22009-05-01 05:59:52 +00001500 if test == '<local>':
1501 if '.' not in rawHost:
1502 return 1
Tim Petersab9ba272001-08-09 21:40:30 +00001503 test = test.replace(".", r"\.") # mask dots
1504 test = test.replace("*", r".*") # change glob sequence
1505 test = test.replace("?", r".") # change glob char
Tim Peters55c12d42001-08-09 18:04:14 +00001506 for val in host:
1507 # print "%s <--> %s" %( test, val )
1508 if re.match(test, val, re.I):
1509 return 1
1510 return 0
1511
Georg Brandl22350112008-01-20 12:05:43 +00001512 def proxy_bypass(host):
1513 """Return a dictionary of scheme -> proxy server URL mappings.
1514
1515 Returns settings gathered from the environment, if specified,
1516 or the registry.
1517
1518 """
1519 if getproxies_environment():
1520 return proxy_bypass_environment(host)
1521 else:
1522 return proxy_bypass_registry(host)
1523
Mark Hammond4f570b92000-07-26 07:04:38 +00001524else:
1525 # By default use environment variables
1526 getproxies = getproxies_environment
Georg Brandl22350112008-01-20 12:05:43 +00001527 proxy_bypass = proxy_bypass_environment
Guido van Rossum442e7201996-03-20 15:33:11 +00001528
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001529# Test and time quote() and unquote()
1530def test1():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001531 s = ''
1532 for i in range(256): s = s + chr(i)
1533 s = s*4
1534 t0 = time.time()
1535 qs = quote(s)
1536 uqs = unquote(qs)
1537 t1 = time.time()
1538 if uqs != s:
1539 print 'Wrong!'
Walter Dörwald70a6b492004-02-12 17:35:32 +00001540 print repr(s)
1541 print repr(qs)
1542 print repr(uqs)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001543 print round(t1 - t0, 3), 'sec'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001544
1545
Guido van Rossum9ab96d41998-09-28 14:07:00 +00001546def reporthook(blocknum, blocksize, totalsize):
1547 # Report during remote transfers
Guido van Rossumb2493f82000-12-15 15:01:37 +00001548 print "Block number: %d, Block size: %d, Total size: %d" % (
1549 blocknum, blocksize, totalsize)
Guido van Rossum9ab96d41998-09-28 14:07:00 +00001550
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001551# Test program
Guido van Rossum23490151998-06-25 02:39:00 +00001552def test(args=[]):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001553 if not args:
1554 args = [
1555 '/etc/passwd',
1556 'file:/etc/passwd',
1557 'file://localhost/etc/passwd',
Collin Winter071d1ae2007-03-12 01:55:54 +00001558 'ftp://ftp.gnu.org/pub/README',
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001559 'http://www.python.org/index.html',
1560 ]
Guido van Rossum09c8b6c1999-12-07 21:37:17 +00001561 if hasattr(URLopener, "open_https"):
1562 args.append('https://synergy.as.cmu.edu/~geek/')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001563 try:
1564 for url in args:
1565 print '-'*10, url, '-'*10
1566 fn, h = urlretrieve(url, None, reporthook)
Guido van Rossumb2493f82000-12-15 15:01:37 +00001567 print fn
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001568 if h:
1569 print '======'
1570 for k in h.keys(): print k + ':', h[k]
1571 print '======'
Philip Jenvey0299d0d2009-12-03 02:40:13 +00001572 with open(fn, 'rb') as fp:
1573 data = fp.read()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001574 if '\r' in data:
1575 table = string.maketrans("", "")
Guido van Rossumb2493f82000-12-15 15:01:37 +00001576 data = data.translate(table, "\r")
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001577 print data
1578 fn, h = None, None
1579 print '-'*40
1580 finally:
1581 urlcleanup()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001582
Guido van Rossum23490151998-06-25 02:39:00 +00001583def main():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001584 import getopt, sys
1585 try:
1586 opts, args = getopt.getopt(sys.argv[1:], "th")
1587 except getopt.error, msg:
1588 print msg
1589 print "Use -h for help"
1590 return
1591 t = 0
1592 for o, a in opts:
1593 if o == '-t':
1594 t = t + 1
1595 if o == '-h':
1596 print "Usage: python urllib.py [-t] [url ...]"
1597 print "-t runs self-test;",
1598 print "otherwise, contents of urls are printed"
1599 return
1600 if t:
1601 if t > 1:
1602 test1()
1603 test(args)
1604 else:
1605 if not args:
1606 print "Use -h for help"
1607 for url in args:
1608 print urlopen(url).read(),
Guido van Rossum23490151998-06-25 02:39:00 +00001609
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001610# Run test program when run as a script
1611if __name__ == '__main__':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001612 main()