blob: 1c31d48f9c314010a813f73b64bb67203d25b0a8 [file] [log] [blame]
Guido van Rossume7b146f2000-02-04 15:28:42 +00001"""Open an arbitrary URL.
2
3See the following document for more info on URLs:
4"Names and Addresses, URIs, URLs, URNs, URCs", at
5http://www.w3.org/pub/WWW/Addressing/Overview.html
6
7See also the HTTP spec (from which the error codes are derived):
8"HTTP - Hypertext Transfer Protocol", at
9http://www.w3.org/pub/WWW/Protocols/
10
11Related standards and specs:
12- RFC1808: the "relative URL" spec. (authoritative status)
13- RFC1738 - the "URL standard". (authoritative status)
14- RFC1630 - the "URI spec". (informational status)
15
16The object returned by URLopener().open(file) will differ per
17protocol. All you know is that is has methods read(), readline(),
18readlines(), fileno(), close() and info(). The read*(), fileno()
Fredrik Lundhb49f88b2000-09-24 18:51:25 +000019and close() methods work like those of open files.
Guido van Rossume7b146f2000-02-04 15:28:42 +000020The info() method returns a mimetools.Message object which can be
21used to query various info about the object, if available.
22(mimetools.Message objects are queried with the getheader() method.)
23"""
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000024
Guido van Rossum7c395db1994-07-04 22:14:49 +000025import string
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000026import socket
Jack Jansendc3e3f61995-12-15 13:22:13 +000027import os
Guido van Rossumf0713d32001-08-09 17:43:35 +000028import time
Guido van Rossum3c8484e1996-11-20 22:02:24 +000029import sys
Brett Cannon69200fa2004-03-23 21:26:39 +000030from urlparse import urljoin as basejoin
Brett Cannon8bb8fa52008-07-02 01:57:08 +000031import warnings
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000032
Skip Montanaro40fc1602001-03-01 04:27:19 +000033__all__ = ["urlopen", "URLopener", "FancyURLopener", "urlretrieve",
34 "urlcleanup", "quote", "quote_plus", "unquote", "unquote_plus",
Skip Montanaro44d5e0c2001-03-13 19:47:16 +000035 "urlencode", "url2pathname", "pathname2url", "splittag",
36 "localhost", "thishost", "ftperrors", "basejoin", "unwrap",
37 "splittype", "splithost", "splituser", "splitpasswd", "splitport",
38 "splitnport", "splitquery", "splitattr", "splitvalue",
Brett Cannond75f0432007-05-16 22:42:29 +000039 "getproxies"]
Skip Montanaro40fc1602001-03-01 04:27:19 +000040
Martin v. Löwis3e865952006-01-24 15:51:21 +000041__version__ = '1.17' # XXX This version is not always updated :-(
Guido van Rossumf668d171997-06-06 21:11:11 +000042
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000043MAXFTPCACHE = 10 # Trim the ftp cache beyond this size
Guido van Rossum6cb15a01995-06-22 19:00:13 +000044
Jack Jansendc3e3f61995-12-15 13:22:13 +000045# Helper for non-unix systems
46if os.name == 'mac':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000047 from macurl2path import url2pathname, pathname2url
Guido van Rossum7e7ca0b1998-03-26 21:01:39 +000048elif os.name == 'nt':
Fredrik Lundhb49f88b2000-09-24 18:51:25 +000049 from nturl2path import url2pathname, pathname2url
Guido van Rossumd74fb6b2001-03-02 06:43:49 +000050elif os.name == 'riscos':
51 from rourl2path import url2pathname, pathname2url
Jack Jansendc3e3f61995-12-15 13:22:13 +000052else:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000053 def url2pathname(pathname):
Georg Brandlc0b24732005-12-26 22:53:56 +000054 """OS-specific conversion from a relative URL of the 'file' scheme
55 to a file system path; not recommended for general use."""
Guido van Rossum367ac801999-03-12 14:31:10 +000056 return unquote(pathname)
Georg Brandlc0b24732005-12-26 22:53:56 +000057
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000058 def pathname2url(pathname):
Georg Brandlc0b24732005-12-26 22:53:56 +000059 """OS-specific conversion from a file system path to a relative URL
60 of the 'file' scheme; not recommended for general use."""
Guido van Rossum367ac801999-03-12 14:31:10 +000061 return quote(pathname)
Guido van Rossum33add0a1998-12-18 15:25:22 +000062
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000063# This really consists of two pieces:
64# (1) a class which handles opening of all sorts of URLs
65# (plus assorted utilities etc.)
66# (2) a set of functions for parsing URLs
67# XXX Should these be separated out into different modules?
68
69
70# Shortcut for basic usage
71_urlopener = None
Fred Drakedf6eca72002-04-04 20:41:34 +000072def urlopen(url, data=None, proxies=None):
Brett Cannon8bb8fa52008-07-02 01:57:08 +000073 """Create a file-like object for the specified URL to read from."""
74 from warnings import warnpy3k
75 warnings.warnpy3k("urllib.urlopen() has been removed in Python 3.0 in "
76 "favor of urllib2.urlopen()", stacklevel=2)
77
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000078 global _urlopener
Fred Drakedf6eca72002-04-04 20:41:34 +000079 if proxies is not None:
80 opener = FancyURLopener(proxies=proxies)
81 elif not _urlopener:
82 opener = FancyURLopener()
83 _urlopener = opener
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000084 else:
Fred Drakedf6eca72002-04-04 20:41:34 +000085 opener = _urlopener
86 if data is None:
87 return opener.open(url)
88 else:
89 return opener.open(url, data)
Fred Drake316a7932000-08-24 01:01:26 +000090def urlretrieve(url, filename=None, reporthook=None, data=None):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000091 global _urlopener
92 if not _urlopener:
93 _urlopener = FancyURLopener()
Fred Drake316a7932000-08-24 01:01:26 +000094 return _urlopener.retrieve(url, filename, reporthook, data)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000095def urlcleanup():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000096 if _urlopener:
97 _urlopener.cleanup()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000098
Bill Janssen426ea0a2007-08-29 22:35:05 +000099# check for SSL
100try:
101 import ssl
102except:
103 _have_ssl = False
104else:
105 _have_ssl = True
106
Georg Brandlb9256022005-08-24 18:46:39 +0000107# exception raised when downloaded size does not match content-length
108class ContentTooShortError(IOError):
109 def __init__(self, message, content):
110 IOError.__init__(self, message)
111 self.content = content
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000112
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000113ftpcache = {}
114class URLopener:
Guido van Rossume7b146f2000-02-04 15:28:42 +0000115 """Class to open URLs.
116 This is a class rather than just a subroutine because we may need
117 more than one set of global protocol-specific options.
118 Note -- this is a base class for those who don't want the
119 automatic handling of errors type 302 (relocated) and 401
120 (authorization needed)."""
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000121
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000122 __tempfiles = None
Guido van Rossum29e77811996-11-27 19:39:58 +0000123
Guido van Rossumba311382000-08-24 16:18:04 +0000124 version = "Python-urllib/%s" % __version__
125
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000126 # Constructor
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000127 def __init__(self, proxies=None, **x509):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000128 if proxies is None:
129 proxies = getproxies()
130 assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
131 self.proxies = proxies
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000132 self.key_file = x509.get('key_file')
133 self.cert_file = x509.get('cert_file')
Georg Brandl0619a322006-07-26 07:40:17 +0000134 self.addheaders = [('User-Agent', self.version)]
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000135 self.__tempfiles = []
136 self.__unlink = os.unlink # See cleanup()
137 self.tempcache = None
138 # Undocumented feature: if you assign {} to tempcache,
139 # it is used to cache files retrieved with
140 # self.retrieve(). This is not enabled by default
141 # since it does not work for changing documents (and I
142 # haven't got the logic to check expiration headers
143 # yet).
144 self.ftpcache = ftpcache
145 # Undocumented feature: you can use a different
146 # ftp cache by assigning to the .ftpcache member;
147 # in case you want logically independent URL openers
148 # XXX This is not threadsafe. Bah.
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000149
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000150 def __del__(self):
151 self.close()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000152
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000153 def close(self):
154 self.cleanup()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000155
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000156 def cleanup(self):
157 # This code sometimes runs when the rest of this module
158 # has already been deleted, so it can't use any globals
159 # or import anything.
160 if self.__tempfiles:
161 for file in self.__tempfiles:
162 try:
163 self.__unlink(file)
Martin v. Löwis58682b72001-08-11 15:02:57 +0000164 except OSError:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000165 pass
166 del self.__tempfiles[:]
167 if self.tempcache:
168 self.tempcache.clear()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000169
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000170 def addheader(self, *args):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000171 """Add a header to be used by the HTTP interface only
172 e.g. u.addheader('Accept', 'sound/basic')"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000173 self.addheaders.append(args)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000174
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000175 # External interface
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000176 def open(self, fullurl, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000177 """Use URLopener().open(file) instead of open(file, 'r')."""
Martin v. Löwis1d994332000-12-03 18:30:10 +0000178 fullurl = unwrap(toBytes(fullurl))
Senthil Kumaran7c2867f2009-04-21 03:24:19 +0000179 # percent encode url, fixing lame server errors for e.g, like space
180 # within url paths.
181 fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]")
Raymond Hettinger54f02222002-06-01 14:18:47 +0000182 if self.tempcache and fullurl in self.tempcache:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000183 filename, headers = self.tempcache[fullurl]
184 fp = open(filename, 'rb')
185 return addinfourl(fp, headers, fullurl)
Martin v. Löwis1d994332000-12-03 18:30:10 +0000186 urltype, url = splittype(fullurl)
187 if not urltype:
188 urltype = 'file'
Raymond Hettinger54f02222002-06-01 14:18:47 +0000189 if urltype in self.proxies:
Martin v. Löwis1d994332000-12-03 18:30:10 +0000190 proxy = self.proxies[urltype]
191 urltype, proxyhost = splittype(proxy)
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000192 host, selector = splithost(proxyhost)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000193 url = (host, fullurl) # Signal special case to open_*()
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000194 else:
195 proxy = None
Martin v. Löwis1d994332000-12-03 18:30:10 +0000196 name = 'open_' + urltype
197 self.type = urltype
Brett Cannonaaeffaf2004-03-23 23:50:17 +0000198 name = name.replace('-', '_')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000199 if not hasattr(self, name):
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000200 if proxy:
201 return self.open_unknown_proxy(proxy, fullurl, data)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000202 else:
203 return self.open_unknown(fullurl, data)
204 try:
205 if data is None:
206 return getattr(self, name)(url)
207 else:
208 return getattr(self, name)(url, data)
209 except socket.error, msg:
210 raise IOError, ('socket error', msg), sys.exc_info()[2]
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000211
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000212 def open_unknown(self, fullurl, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000213 """Overridable interface to open unknown URL type."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000214 type, url = splittype(fullurl)
215 raise IOError, ('url error', 'unknown url type', type)
Guido van Rossumca445401995-08-29 19:19:12 +0000216
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000217 def open_unknown_proxy(self, proxy, fullurl, data=None):
218 """Overridable interface to open unknown URL type."""
219 type, url = splittype(fullurl)
220 raise IOError, ('url error', 'invalid proxy for %s' % type, proxy)
221
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000222 # External interface
Sjoerd Mullenderd7b86f02000-08-25 11:23:36 +0000223 def retrieve(self, url, filename=None, reporthook=None, data=None):
Brett Cannon7d618c72003-04-24 02:43:20 +0000224 """retrieve(url) returns (filename, headers) for a local object
Guido van Rossume7b146f2000-02-04 15:28:42 +0000225 or (tempfilename, headers) for a remote object."""
Martin v. Löwis1d994332000-12-03 18:30:10 +0000226 url = unwrap(toBytes(url))
Raymond Hettinger54f02222002-06-01 14:18:47 +0000227 if self.tempcache and url in self.tempcache:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000228 return self.tempcache[url]
229 type, url1 = splittype(url)
Raymond Hettinger10ff7062002-06-02 03:04:52 +0000230 if filename is None and (not type or type == 'file'):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000231 try:
232 fp = self.open_local_file(url1)
233 hdrs = fp.info()
Philip Jenvey0299d0d2009-12-03 02:40:13 +0000234 fp.close()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000235 return url2pathname(splithost(url1)[1]), hdrs
236 except IOError, msg:
237 pass
Fred Drake316a7932000-08-24 01:01:26 +0000238 fp = self.open(url, data)
Benjamin Petersonb364bfe2009-03-22 17:45:11 +0000239 try:
240 headers = fp.info()
241 if filename:
242 tfp = open(filename, 'wb')
243 else:
244 import tempfile
245 garbage, path = splittype(url)
246 garbage, path = splithost(path or "")
247 path, garbage = splitquery(path or "")
248 path, garbage = splitattr(path or "")
249 suffix = os.path.splitext(path)[1]
250 (fd, filename) = tempfile.mkstemp(suffix)
251 self.__tempfiles.append(filename)
252 tfp = os.fdopen(fd, 'wb')
253 try:
254 result = filename, headers
255 if self.tempcache is not None:
256 self.tempcache[url] = result
257 bs = 1024*8
258 size = -1
259 read = 0
260 blocknum = 0
261 if reporthook:
262 if "content-length" in headers:
263 size = int(headers["Content-Length"])
264 reporthook(blocknum, bs, size)
265 while 1:
266 block = fp.read(bs)
267 if block == "":
268 break
269 read += len(block)
270 tfp.write(block)
271 blocknum += 1
272 if reporthook:
273 reporthook(blocknum, bs, size)
274 finally:
275 tfp.close()
276 finally:
277 fp.close()
Georg Brandlb9256022005-08-24 18:46:39 +0000278
279 # raise exception if actual size does not match content-length header
280 if size >= 0 and read < size:
281 raise ContentTooShortError("retrieval incomplete: got only %i out "
282 "of %i bytes" % (read, size), result)
283
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000284 return result
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000285
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000286 # Each method named open_<type> knows how to open that type of URL
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000287
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000288 def open_http(self, url, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000289 """Use HTTP protocol."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000290 import httplib
291 user_passwd = None
Martin v. Löwis3e865952006-01-24 15:51:21 +0000292 proxy_passwd= None
Walter Dörwald65230a22002-06-03 15:58:32 +0000293 if isinstance(url, str):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000294 host, selector = splithost(url)
295 if host:
296 user_passwd, host = splituser(host)
297 host = unquote(host)
298 realhost = host
299 else:
300 host, selector = url
Martin v. Löwis3e865952006-01-24 15:51:21 +0000301 # check whether the proxy contains authorization information
302 proxy_passwd, host = splituser(host)
303 # now we proceed with the url we want to obtain
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000304 urltype, rest = splittype(selector)
305 url = rest
306 user_passwd = None
Guido van Rossumb2493f82000-12-15 15:01:37 +0000307 if urltype.lower() != 'http':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000308 realhost = None
309 else:
310 realhost, rest = splithost(rest)
311 if realhost:
312 user_passwd, realhost = splituser(realhost)
313 if user_passwd:
314 selector = "%s://%s%s" % (urltype, realhost, rest)
Tim Peters55c12d42001-08-09 18:04:14 +0000315 if proxy_bypass(realhost):
316 host = realhost
317
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000318 #print "proxy via http:", host, selector
319 if not host: raise IOError, ('http error', 'no host given')
Tim Peters92037a12006-01-24 22:44:08 +0000320
Martin v. Löwis3e865952006-01-24 15:51:21 +0000321 if proxy_passwd:
322 import base64
Andrew M. Kuchling872dba42006-10-27 17:11:23 +0000323 proxy_auth = base64.b64encode(proxy_passwd).strip()
Martin v. Löwis3e865952006-01-24 15:51:21 +0000324 else:
325 proxy_auth = None
326
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000327 if user_passwd:
328 import base64
Andrew M. Kuchling872dba42006-10-27 17:11:23 +0000329 auth = base64.b64encode(user_passwd).strip()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000330 else:
331 auth = None
332 h = httplib.HTTP(host)
333 if data is not None:
334 h.putrequest('POST', selector)
Georg Brandl0619a322006-07-26 07:40:17 +0000335 h.putheader('Content-Type', 'application/x-www-form-urlencoded')
336 h.putheader('Content-Length', '%d' % len(data))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000337 else:
338 h.putrequest('GET', selector)
Martin v. Löwis3e865952006-01-24 15:51:21 +0000339 if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000340 if auth: h.putheader('Authorization', 'Basic %s' % auth)
341 if realhost: h.putheader('Host', realhost)
Guido van Rossum68468eb2003-02-27 20:14:51 +0000342 for args in self.addheaders: h.putheader(*args)
Kristján Valur Jónsson84040db2009-01-09 20:27:16 +0000343 h.endheaders(data)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000344 errcode, errmsg, headers = h.getreply()
Neal Norwitzce55e212007-03-20 08:14:57 +0000345 fp = h.getfile()
Georg Brandlf66b6032007-03-14 08:27:52 +0000346 if errcode == -1:
Neal Norwitzce55e212007-03-20 08:14:57 +0000347 if fp: fp.close()
Georg Brandlf66b6032007-03-14 08:27:52 +0000348 # something went wrong with the HTTP status line
349 raise IOError, ('http protocol error', 0,
350 'got a bad status line', None)
Sean Reifscheidera1afbf62007-09-19 07:52:56 +0000351 # According to RFC 2616, "2xx" code indicates that the client's
352 # request was successfully received, understood, and accepted.
Kurt B. Kaiser0f7c25d2008-01-02 04:11:28 +0000353 if (200 <= errcode < 300):
Georg Brandl9b0d46d2008-01-20 11:43:03 +0000354 return addinfourl(fp, headers, "http:" + url, errcode)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000355 else:
356 if data is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000357 return self.http_error(url, fp, errcode, errmsg, headers)
Guido van Rossum29aab751999-03-09 19:31:21 +0000358 else:
359 return self.http_error(url, fp, errcode, errmsg, headers, data)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000360
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000361 def http_error(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000362 """Handle http errors.
363 Derived class can override this, or provide specific handlers
364 named http_error_DDD where DDD is the 3-digit error code."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000365 # First check if there's a specific handler for this error
366 name = 'http_error_%d' % errcode
367 if hasattr(self, name):
368 method = getattr(self, name)
369 if data is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000370 result = method(url, fp, errcode, errmsg, headers)
Jeremy Hyltonb30f52a1999-02-25 16:14:58 +0000371 else:
372 result = method(url, fp, errcode, errmsg, headers, data)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000373 if result: return result
Jeremy Hyltonb30f52a1999-02-25 16:14:58 +0000374 return self.http_error_default(url, fp, errcode, errmsg, headers)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000375
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000376 def http_error_default(self, url, fp, errcode, errmsg, headers):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000377 """Default error handler: close the connection and raise IOError."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000378 void = fp.read()
379 fp.close()
380 raise IOError, ('http error', errcode, errmsg, headers)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000381
Bill Janssen426ea0a2007-08-29 22:35:05 +0000382 if _have_ssl:
Andrew M. Kuchling141e9892000-04-23 02:53:11 +0000383 def open_https(self, url, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000384 """Use HTTPS protocol."""
Bill Janssen426ea0a2007-08-29 22:35:05 +0000385
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000386 import httplib
Fred Drake567ca8e2000-08-21 21:42:42 +0000387 user_passwd = None
Martin v. Löwis3e865952006-01-24 15:51:21 +0000388 proxy_passwd = None
Walter Dörwald65230a22002-06-03 15:58:32 +0000389 if isinstance(url, str):
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000390 host, selector = splithost(url)
Fred Drake567ca8e2000-08-21 21:42:42 +0000391 if host:
392 user_passwd, host = splituser(host)
393 host = unquote(host)
394 realhost = host
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000395 else:
396 host, selector = url
Martin v. Löwis3e865952006-01-24 15:51:21 +0000397 # here, we determine, whether the proxy contains authorization information
398 proxy_passwd, host = splituser(host)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000399 urltype, rest = splittype(selector)
Fred Drake567ca8e2000-08-21 21:42:42 +0000400 url = rest
401 user_passwd = None
Guido van Rossumb2493f82000-12-15 15:01:37 +0000402 if urltype.lower() != 'https':
Fred Drake567ca8e2000-08-21 21:42:42 +0000403 realhost = None
404 else:
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000405 realhost, rest = splithost(rest)
Fred Drake567ca8e2000-08-21 21:42:42 +0000406 if realhost:
407 user_passwd, realhost = splituser(realhost)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000408 if user_passwd:
409 selector = "%s://%s%s" % (urltype, realhost, rest)
Andrew M. Kuchling7ad47922000-06-10 01:41:48 +0000410 #print "proxy via https:", host, selector
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000411 if not host: raise IOError, ('https error', 'no host given')
Martin v. Löwis3e865952006-01-24 15:51:21 +0000412 if proxy_passwd:
413 import base64
Andrew M. Kuchling872dba42006-10-27 17:11:23 +0000414 proxy_auth = base64.b64encode(proxy_passwd).strip()
Martin v. Löwis3e865952006-01-24 15:51:21 +0000415 else:
416 proxy_auth = None
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000417 if user_passwd:
418 import base64
Andrew M. Kuchling872dba42006-10-27 17:11:23 +0000419 auth = base64.b64encode(user_passwd).strip()
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000420 else:
421 auth = None
422 h = httplib.HTTPS(host, 0,
423 key_file=self.key_file,
424 cert_file=self.cert_file)
Andrew M. Kuchling141e9892000-04-23 02:53:11 +0000425 if data is not None:
426 h.putrequest('POST', selector)
Georg Brandl0619a322006-07-26 07:40:17 +0000427 h.putheader('Content-Type',
Andrew M. Kuchling141e9892000-04-23 02:53:11 +0000428 'application/x-www-form-urlencoded')
Georg Brandl0619a322006-07-26 07:40:17 +0000429 h.putheader('Content-Length', '%d' % len(data))
Andrew M. Kuchling141e9892000-04-23 02:53:11 +0000430 else:
431 h.putrequest('GET', selector)
Andrew M. Kuchling52278572006-12-19 15:11:41 +0000432 if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
433 if auth: h.putheader('Authorization', 'Basic %s' % auth)
Fred Drake567ca8e2000-08-21 21:42:42 +0000434 if realhost: h.putheader('Host', realhost)
Guido van Rossum68468eb2003-02-27 20:14:51 +0000435 for args in self.addheaders: h.putheader(*args)
Kristján Valur Jónsson84040db2009-01-09 20:27:16 +0000436 h.endheaders(data)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000437 errcode, errmsg, headers = h.getreply()
Neal Norwitzce55e212007-03-20 08:14:57 +0000438 fp = h.getfile()
Georg Brandlf66b6032007-03-14 08:27:52 +0000439 if errcode == -1:
Neal Norwitzce55e212007-03-20 08:14:57 +0000440 if fp: fp.close()
Georg Brandlf66b6032007-03-14 08:27:52 +0000441 # something went wrong with the HTTP status line
442 raise IOError, ('http protocol error', 0,
443 'got a bad status line', None)
Georg Brandl9b915672007-09-24 18:08:24 +0000444 # According to RFC 2616, "2xx" code indicates that the client's
445 # request was successfully received, understood, and accepted.
Kurt B. Kaiser0f7c25d2008-01-02 04:11:28 +0000446 if (200 <= errcode < 300):
Georg Brandl9b0d46d2008-01-20 11:43:03 +0000447 return addinfourl(fp, headers, "https:" + url, errcode)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000448 else:
Fred Drake567ca8e2000-08-21 21:42:42 +0000449 if data is None:
450 return self.http_error(url, fp, errcode, errmsg, headers)
451 else:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000452 return self.http_error(url, fp, errcode, errmsg, headers,
453 data)
Fred Drake567ca8e2000-08-21 21:42:42 +0000454
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000455 def open_file(self, url):
Neal Norwitzc5d0dbd2006-04-09 04:00:49 +0000456 """Use local file or FTP depending on form of URL."""
Martin v. Löwis3e865952006-01-24 15:51:21 +0000457 if not isinstance(url, str):
458 raise IOError, ('file error', 'proxy support for file protocol currently not implemented')
Jack Jansen4ef11032002-09-12 20:14:04 +0000459 if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000460 return self.open_ftp(url)
461 else:
462 return self.open_local_file(url)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000463
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000464 def open_local_file(self, url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000465 """Use local file."""
Georg Brandl5a096e12007-01-22 19:40:21 +0000466 import mimetypes, mimetools, email.utils
Raymond Hettingera6172712004-12-31 19:15:26 +0000467 try:
468 from cStringIO import StringIO
469 except ImportError:
470 from StringIO import StringIO
Guido van Rossumf0713d32001-08-09 17:43:35 +0000471 host, file = splithost(url)
472 localname = url2pathname(file)
Guido van Rossuma2da3052002-04-15 00:25:01 +0000473 try:
474 stats = os.stat(localname)
475 except OSError, e:
476 raise IOError(e.errno, e.strerror, e.filename)
Walter Dörwald92b48b72002-03-22 17:30:38 +0000477 size = stats.st_size
Georg Brandl5a096e12007-01-22 19:40:21 +0000478 modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000479 mtype = mimetypes.guess_type(url)[0]
Raymond Hettingera6172712004-12-31 19:15:26 +0000480 headers = mimetools.Message(StringIO(
Guido van Rossumf0713d32001-08-09 17:43:35 +0000481 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
482 (mtype or 'text/plain', size, modified)))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000483 if not host:
Guido van Rossum336a2011999-06-24 15:27:36 +0000484 urlfile = file
485 if file[:1] == '/':
486 urlfile = 'file://' + file
Guido van Rossumf0713d32001-08-09 17:43:35 +0000487 return addinfourl(open(localname, 'rb'),
Guido van Rossum336a2011999-06-24 15:27:36 +0000488 headers, urlfile)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000489 host, port = splitport(host)
490 if not port \
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000491 and socket.gethostbyname(host) in (localhost(), thishost()):
Guido van Rossum336a2011999-06-24 15:27:36 +0000492 urlfile = file
493 if file[:1] == '/':
494 urlfile = 'file://' + file
Guido van Rossumf0713d32001-08-09 17:43:35 +0000495 return addinfourl(open(localname, 'rb'),
Guido van Rossum336a2011999-06-24 15:27:36 +0000496 headers, urlfile)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000497 raise IOError, ('local file error', 'not on local host')
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000498
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000499 def open_ftp(self, url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000500 """Use FTP protocol."""
Martin v. Löwis3e865952006-01-24 15:51:21 +0000501 if not isinstance(url, str):
502 raise IOError, ('ftp error', 'proxy support for ftp protocol currently not implemented')
Raymond Hettingera6172712004-12-31 19:15:26 +0000503 import mimetypes, mimetools
504 try:
505 from cStringIO import StringIO
506 except ImportError:
507 from StringIO import StringIO
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000508 host, path = splithost(url)
509 if not host: raise IOError, ('ftp error', 'no host given')
510 host, port = splitport(host)
511 user, host = splituser(host)
512 if user: user, passwd = splitpasswd(user)
513 else: passwd = None
514 host = unquote(host)
515 user = unquote(user or '')
516 passwd = unquote(passwd or '')
517 host = socket.gethostbyname(host)
518 if not port:
519 import ftplib
520 port = ftplib.FTP_PORT
521 else:
522 port = int(port)
523 path, attrs = splitattr(path)
524 path = unquote(path)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000525 dirs = path.split('/')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000526 dirs, file = dirs[:-1], dirs[-1]
527 if dirs and not dirs[0]: dirs = dirs[1:]
Guido van Rossum5e006a31999-08-18 17:40:33 +0000528 if dirs and not dirs[0]: dirs[0] = '/'
Guido van Rossumb2493f82000-12-15 15:01:37 +0000529 key = user, host, port, '/'.join(dirs)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000530 # XXX thread unsafe!
531 if len(self.ftpcache) > MAXFTPCACHE:
532 # Prune the cache, rather arbitrarily
533 for k in self.ftpcache.keys():
534 if k != key:
535 v = self.ftpcache[k]
536 del self.ftpcache[k]
537 v.close()
538 try:
Raymond Hettinger54f02222002-06-01 14:18:47 +0000539 if not key in self.ftpcache:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000540 self.ftpcache[key] = \
541 ftpwrapper(user, passwd, host, port, dirs)
542 if not file: type = 'D'
543 else: type = 'I'
544 for attr in attrs:
545 attr, value = splitvalue(attr)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000546 if attr.lower() == 'type' and \
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000547 value in ('a', 'A', 'i', 'I', 'd', 'D'):
Guido van Rossumb2493f82000-12-15 15:01:37 +0000548 type = value.upper()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000549 (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
Guido van Rossum88e0b5b2001-08-23 13:38:15 +0000550 mtype = mimetypes.guess_type("ftp:" + url)[0]
551 headers = ""
552 if mtype:
553 headers += "Content-Type: %s\n" % mtype
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000554 if retrlen is not None and retrlen >= 0:
Guido van Rossum88e0b5b2001-08-23 13:38:15 +0000555 headers += "Content-Length: %d\n" % retrlen
Raymond Hettingera6172712004-12-31 19:15:26 +0000556 headers = mimetools.Message(StringIO(headers))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000557 return addinfourl(fp, headers, "ftp:" + url)
558 except ftperrors(), msg:
559 raise IOError, ('ftp error', msg), sys.exc_info()[2]
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000560
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000561 def open_data(self, url, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000562 """Use "data" URL."""
Martin v. Löwis3e865952006-01-24 15:51:21 +0000563 if not isinstance(url, str):
564 raise IOError, ('data error', 'proxy support for data protocol currently not implemented')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000565 # ignore POSTed data
566 #
567 # syntax of data URLs:
568 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
569 # mediatype := [ type "/" subtype ] *( ";" parameter )
570 # data := *urlchar
571 # parameter := attribute "=" value
Raymond Hettingera6172712004-12-31 19:15:26 +0000572 import mimetools
573 try:
574 from cStringIO import StringIO
575 except ImportError:
576 from StringIO import StringIO
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000577 try:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000578 [type, data] = url.split(',', 1)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000579 except ValueError:
580 raise IOError, ('data error', 'bad data URL')
581 if not type:
582 type = 'text/plain;charset=US-ASCII'
Guido van Rossumb2493f82000-12-15 15:01:37 +0000583 semi = type.rfind(';')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000584 if semi >= 0 and '=' not in type[semi:]:
585 encoding = type[semi+1:]
586 type = type[:semi]
587 else:
588 encoding = ''
589 msg = []
590 msg.append('Date: %s'%time.strftime('%a, %d %b %Y %T GMT',
591 time.gmtime(time.time())))
592 msg.append('Content-type: %s' % type)
593 if encoding == 'base64':
594 import base64
595 data = base64.decodestring(data)
596 else:
597 data = unquote(data)
Georg Brandl0619a322006-07-26 07:40:17 +0000598 msg.append('Content-Length: %d' % len(data))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000599 msg.append('')
600 msg.append(data)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000601 msg = '\n'.join(msg)
Raymond Hettingera6172712004-12-31 19:15:26 +0000602 f = StringIO(msg)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000603 headers = mimetools.Message(f, 0)
Georg Brandl1f663572005-11-26 16:50:44 +0000604 #f.fileno = None # needed for addinfourl
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000605 return addinfourl(f, headers, url)
Guido van Rossum6d4d1c21998-03-12 14:32:55 +0000606
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000607
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000608class FancyURLopener(URLopener):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000609 """Derived class with handlers for errors we can handle (perhaps)."""
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000610
Neal Norwitz60e04cd2002-06-11 13:38:51 +0000611 def __init__(self, *args, **kwargs):
Guido van Rossum68468eb2003-02-27 20:14:51 +0000612 URLopener.__init__(self, *args, **kwargs)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000613 self.auth_cache = {}
Skip Montanaroc3e11d62001-02-15 16:56:36 +0000614 self.tries = 0
615 self.maxtries = 10
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000616
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000617 def http_error_default(self, url, fp, errcode, errmsg, headers):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000618 """Default error handling -- don't raise an exception."""
Georg Brandl9b0d46d2008-01-20 11:43:03 +0000619 return addinfourl(fp, headers, "http:" + url, errcode)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000620
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000621 def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000622 """Error 302 -- relocated (temporarily)."""
Skip Montanaroc3e11d62001-02-15 16:56:36 +0000623 self.tries += 1
624 if self.maxtries and self.tries >= self.maxtries:
625 if hasattr(self, "http_error_500"):
626 meth = self.http_error_500
627 else:
628 meth = self.http_error_default
629 self.tries = 0
630 return meth(url, fp, 500,
631 "Internal Server Error: Redirect Recursion", headers)
632 result = self.redirect_internal(url, fp, errcode, errmsg, headers,
633 data)
634 self.tries = 0
635 return result
636
637 def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
Raymond Hettinger54f02222002-06-01 14:18:47 +0000638 if 'location' in headers:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000639 newurl = headers['location']
Raymond Hettinger54f02222002-06-01 14:18:47 +0000640 elif 'uri' in headers:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000641 newurl = headers['uri']
642 else:
643 return
644 void = fp.read()
645 fp.close()
Guido van Rossum3527f591999-03-29 20:23:41 +0000646 # In case the server sent a relative URL, join with original:
Moshe Zadka5d87d472001-04-09 14:54:21 +0000647 newurl = basejoin(self.type + ":" + url, newurl)
Guido van Rossumfa19f7c2003-05-16 01:46:51 +0000648 return self.open(newurl)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000649
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000650 def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000651 """Error 301 -- also relocated (permanently)."""
652 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
Guido van Rossume6ad8911996-09-10 17:02:56 +0000653
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000654 def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
655 """Error 303 -- also relocated (essentially identical to 302)."""
656 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
657
Guido van Rossumfa19f7c2003-05-16 01:46:51 +0000658 def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
659 """Error 307 -- relocated, but turn POST into error."""
660 if data is None:
661 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
662 else:
663 return self.http_error_default(url, fp, errcode, errmsg, headers)
664
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000665 def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000666 """Error 401 -- authentication required.
Martin v. Löwis3e865952006-01-24 15:51:21 +0000667 This function supports Basic authentication only."""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000668 if not 'www-authenticate' in headers:
Tim Peters85ba6732001-02-28 08:26:44 +0000669 URLopener.http_error_default(self, url, fp,
Fred Drakec680ae82001-10-13 18:37:07 +0000670 errcode, errmsg, headers)
Moshe Zadkae99bd172001-02-27 06:27:04 +0000671 stuff = headers['www-authenticate']
672 import re
673 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
674 if not match:
Tim Peters85ba6732001-02-28 08:26:44 +0000675 URLopener.http_error_default(self, url, fp,
Moshe Zadkae99bd172001-02-27 06:27:04 +0000676 errcode, errmsg, headers)
677 scheme, realm = match.groups()
678 if scheme.lower() != 'basic':
Tim Peters85ba6732001-02-28 08:26:44 +0000679 URLopener.http_error_default(self, url, fp,
Moshe Zadkae99bd172001-02-27 06:27:04 +0000680 errcode, errmsg, headers)
681 name = 'retry_' + self.type + '_basic_auth'
682 if data is None:
683 return getattr(self,name)(url, realm)
684 else:
685 return getattr(self,name)(url, realm, data)
Tim Peters92037a12006-01-24 22:44:08 +0000686
Martin v. Löwis3e865952006-01-24 15:51:21 +0000687 def http_error_407(self, url, fp, errcode, errmsg, headers, data=None):
688 """Error 407 -- proxy authentication required.
689 This function supports Basic authentication only."""
690 if not 'proxy-authenticate' in headers:
691 URLopener.http_error_default(self, url, fp,
692 errcode, errmsg, headers)
693 stuff = headers['proxy-authenticate']
694 import re
695 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
696 if not match:
697 URLopener.http_error_default(self, url, fp,
698 errcode, errmsg, headers)
699 scheme, realm = match.groups()
700 if scheme.lower() != 'basic':
701 URLopener.http_error_default(self, url, fp,
702 errcode, errmsg, headers)
703 name = 'retry_proxy_' + self.type + '_basic_auth'
704 if data is None:
705 return getattr(self,name)(url, realm)
706 else:
707 return getattr(self,name)(url, realm, data)
Tim Peters92037a12006-01-24 22:44:08 +0000708
Martin v. Löwis3e865952006-01-24 15:51:21 +0000709 def retry_proxy_http_basic_auth(self, url, realm, data=None):
710 host, selector = splithost(url)
711 newurl = 'http://' + host + selector
712 proxy = self.proxies['http']
713 urltype, proxyhost = splittype(proxy)
714 proxyhost, proxyselector = splithost(proxyhost)
715 i = proxyhost.find('@') + 1
716 proxyhost = proxyhost[i:]
717 user, passwd = self.get_user_passwd(proxyhost, realm, i)
718 if not (user or passwd): return None
719 proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
720 self.proxies['http'] = 'http://' + proxyhost + proxyselector
721 if data is None:
722 return self.open(newurl)
723 else:
724 return self.open(newurl, data)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000725
Martin v. Löwis3e865952006-01-24 15:51:21 +0000726 def retry_proxy_https_basic_auth(self, url, realm, data=None):
727 host, selector = splithost(url)
728 newurl = 'https://' + host + selector
729 proxy = self.proxies['https']
730 urltype, proxyhost = splittype(proxy)
731 proxyhost, proxyselector = splithost(proxyhost)
732 i = proxyhost.find('@') + 1
733 proxyhost = proxyhost[i:]
734 user, passwd = self.get_user_passwd(proxyhost, realm, i)
735 if not (user or passwd): return None
736 proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
737 self.proxies['https'] = 'https://' + proxyhost + proxyselector
738 if data is None:
739 return self.open(newurl)
740 else:
741 return self.open(newurl, data)
Tim Peters92037a12006-01-24 22:44:08 +0000742
Guido van Rossum3c8baed2000-02-01 23:36:55 +0000743 def retry_http_basic_auth(self, url, realm, data=None):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000744 host, selector = splithost(url)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000745 i = host.find('@') + 1
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000746 host = host[i:]
747 user, passwd = self.get_user_passwd(host, realm, i)
748 if not (user or passwd): return None
Guido van Rossumafc4f042001-01-15 18:31:13 +0000749 host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000750 newurl = 'http://' + host + selector
Guido van Rossum3c8baed2000-02-01 23:36:55 +0000751 if data is None:
752 return self.open(newurl)
753 else:
754 return self.open(newurl, data)
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000755
Guido van Rossum3c8baed2000-02-01 23:36:55 +0000756 def retry_https_basic_auth(self, url, realm, data=None):
Tim Peterse1190062001-01-15 03:34:38 +0000757 host, selector = splithost(url)
758 i = host.find('@') + 1
759 host = host[i:]
760 user, passwd = self.get_user_passwd(host, realm, i)
761 if not (user or passwd): return None
Guido van Rossumafc4f042001-01-15 18:31:13 +0000762 host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
Martin v. Löwis3e865952006-01-24 15:51:21 +0000763 newurl = 'https://' + host + selector
764 if data is None:
765 return self.open(newurl)
766 else:
767 return self.open(newurl, data)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000768
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000769 def get_user_passwd(self, host, realm, clear_cache = 0):
Guido van Rossumb2493f82000-12-15 15:01:37 +0000770 key = realm + '@' + host.lower()
Raymond Hettinger54f02222002-06-01 14:18:47 +0000771 if key in self.auth_cache:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000772 if clear_cache:
773 del self.auth_cache[key]
774 else:
775 return self.auth_cache[key]
776 user, passwd = self.prompt_user_passwd(host, realm)
777 if user or passwd: self.auth_cache[key] = (user, passwd)
778 return user, passwd
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000779
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000780 def prompt_user_passwd(self, host, realm):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000781 """Override this in a GUI environment!"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000782 import getpass
783 try:
784 user = raw_input("Enter username for %s at %s: " % (realm,
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000785 host))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000786 passwd = getpass.getpass("Enter password for %s in %s at %s: " %
787 (user, realm, host))
788 return user, passwd
789 except KeyboardInterrupt:
790 print
791 return None, None
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000792
793
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000794# Utility functions
795
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000796_localhost = None
797def localhost():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000798 """Return the IP address of the magic hostname 'localhost'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000799 global _localhost
Raymond Hettinger10ff7062002-06-02 03:04:52 +0000800 if _localhost is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000801 _localhost = socket.gethostbyname('localhost')
802 return _localhost
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000803
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000804_thishost = None
805def thishost():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000806 """Return the IP address of the current host."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000807 global _thishost
Raymond Hettinger10ff7062002-06-02 03:04:52 +0000808 if _thishost is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000809 _thishost = socket.gethostbyname(socket.gethostname())
810 return _thishost
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000811
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000812_ftperrors = None
813def ftperrors():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000814 """Return the set of errors raised by the FTP class."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000815 global _ftperrors
Raymond Hettinger10ff7062002-06-02 03:04:52 +0000816 if _ftperrors is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000817 import ftplib
818 _ftperrors = ftplib.all_errors
819 return _ftperrors
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000820
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000821_noheaders = None
822def noheaders():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000823 """Return an empty mimetools.Message object."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000824 global _noheaders
Raymond Hettinger10ff7062002-06-02 03:04:52 +0000825 if _noheaders is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000826 import mimetools
Raymond Hettingera6172712004-12-31 19:15:26 +0000827 try:
828 from cStringIO import StringIO
829 except ImportError:
830 from StringIO import StringIO
831 _noheaders = mimetools.Message(StringIO(), 0)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000832 _noheaders.fp.close() # Recycle file descriptor
833 return _noheaders
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000834
835
836# Utility classes
837
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000838class ftpwrapper:
Guido van Rossume7b146f2000-02-04 15:28:42 +0000839 """Class used by open_ftp() for cache of open FTP connections."""
840
Facundo Batista4f1b1ed2008-05-29 16:39:26 +0000841 def __init__(self, user, passwd, host, port, dirs,
842 timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000843 self.user = user
844 self.passwd = passwd
845 self.host = host
846 self.port = port
847 self.dirs = dirs
Facundo Batista711a54e2007-05-24 17:50:54 +0000848 self.timeout = timeout
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000849 self.init()
Guido van Rossume7b146f2000-02-04 15:28:42 +0000850
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000851 def init(self):
852 import ftplib
853 self.busy = 0
854 self.ftp = ftplib.FTP()
Facundo Batista711a54e2007-05-24 17:50:54 +0000855 self.ftp.connect(self.host, self.port, self.timeout)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000856 self.ftp.login(self.user, self.passwd)
857 for dir in self.dirs:
858 self.ftp.cwd(dir)
Guido van Rossume7b146f2000-02-04 15:28:42 +0000859
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000860 def retrfile(self, file, type):
861 import ftplib
862 self.endtransfer()
863 if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
864 else: cmd = 'TYPE ' + type; isdir = 0
865 try:
866 self.ftp.voidcmd(cmd)
867 except ftplib.all_errors:
868 self.init()
869 self.ftp.voidcmd(cmd)
870 conn = None
871 if file and not isdir:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000872 # Try to retrieve as a file
873 try:
874 cmd = 'RETR ' + file
875 conn = self.ftp.ntransfercmd(cmd)
876 except ftplib.error_perm, reason:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000877 if str(reason)[:3] != '550':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000878 raise IOError, ('ftp error', reason), sys.exc_info()[2]
879 if not conn:
880 # Set transfer mode to ASCII!
881 self.ftp.voidcmd('TYPE A')
Georg Brandld5e6cf22008-01-20 12:18:17 +0000882 # Try a directory listing. Verify that directory exists.
883 if file:
884 pwd = self.ftp.pwd()
885 try:
886 try:
887 self.ftp.cwd(file)
888 except ftplib.error_perm, reason:
889 raise IOError, ('ftp error', reason), sys.exc_info()[2]
890 finally:
891 self.ftp.cwd(pwd)
892 cmd = 'LIST ' + file
893 else:
894 cmd = 'LIST'
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000895 conn = self.ftp.ntransfercmd(cmd)
896 self.busy = 1
897 # Pass back both a suitably decorated object and a retrieval length
898 return (addclosehook(conn[0].makefile('rb'),
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000899 self.endtransfer), conn[1])
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000900 def endtransfer(self):
901 if not self.busy:
902 return
903 self.busy = 0
904 try:
905 self.ftp.voidresp()
906 except ftperrors():
907 pass
Guido van Rossume7b146f2000-02-04 15:28:42 +0000908
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000909 def close(self):
910 self.endtransfer()
911 try:
912 self.ftp.close()
913 except ftperrors():
914 pass
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000915
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000916class addbase:
Guido van Rossume7b146f2000-02-04 15:28:42 +0000917 """Base class for addinfo and addclosehook."""
918
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000919 def __init__(self, fp):
920 self.fp = fp
921 self.read = self.fp.read
922 self.readline = self.fp.readline
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000923 if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines
Georg Brandl1f663572005-11-26 16:50:44 +0000924 if hasattr(self.fp, "fileno"):
925 self.fileno = self.fp.fileno
926 else:
927 self.fileno = lambda: None
Raymond Hettinger42182eb2003-03-09 05:33:33 +0000928 if hasattr(self.fp, "__iter__"):
929 self.__iter__ = self.fp.__iter__
930 if hasattr(self.fp, "next"):
931 self.next = self.fp.next
Guido van Rossume7b146f2000-02-04 15:28:42 +0000932
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000933 def __repr__(self):
Walter Dörwald70a6b492004-02-12 17:35:32 +0000934 return '<%s at %r whose fp = %r>' % (self.__class__.__name__,
935 id(self), self.fp)
Guido van Rossume7b146f2000-02-04 15:28:42 +0000936
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000937 def close(self):
938 self.read = None
939 self.readline = None
940 self.readlines = None
941 self.fileno = None
942 if self.fp: self.fp.close()
943 self.fp = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000944
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000945class addclosehook(addbase):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000946 """Class to add a close hook to an open file."""
947
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000948 def __init__(self, fp, closehook, *hookargs):
949 addbase.__init__(self, fp)
950 self.closehook = closehook
951 self.hookargs = hookargs
Guido van Rossume7b146f2000-02-04 15:28:42 +0000952
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000953 def close(self):
Guido van Rossumc580dae2000-05-24 13:21:46 +0000954 addbase.close(self)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000955 if self.closehook:
Guido van Rossum68468eb2003-02-27 20:14:51 +0000956 self.closehook(*self.hookargs)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000957 self.closehook = None
958 self.hookargs = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000959
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000960class addinfo(addbase):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000961 """class to add an info() method to an open file."""
962
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000963 def __init__(self, fp, headers):
964 addbase.__init__(self, fp)
965 self.headers = headers
Guido van Rossume7b146f2000-02-04 15:28:42 +0000966
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000967 def info(self):
968 return self.headers
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000969
Guido van Rossume6ad8911996-09-10 17:02:56 +0000970class addinfourl(addbase):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000971 """class to add info() and geturl() methods to an open file."""
972
Georg Brandl9b0d46d2008-01-20 11:43:03 +0000973 def __init__(self, fp, headers, url, code=None):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000974 addbase.__init__(self, fp)
975 self.headers = headers
976 self.url = url
Georg Brandl9b0d46d2008-01-20 11:43:03 +0000977 self.code = code
Guido van Rossume7b146f2000-02-04 15:28:42 +0000978
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000979 def info(self):
980 return self.headers
Guido van Rossume7b146f2000-02-04 15:28:42 +0000981
Georg Brandl9b0d46d2008-01-20 11:43:03 +0000982 def getcode(self):
983 return self.code
984
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000985 def geturl(self):
986 return self.url
Guido van Rossume6ad8911996-09-10 17:02:56 +0000987
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000988
Guido van Rossum7c395db1994-07-04 22:14:49 +0000989# Utilities to parse URLs (most of these return None for missing parts):
Sjoerd Mullendere0371b81995-11-10 10:36:07 +0000990# unwrap('<URL:type://host/path>') --> 'type://host/path'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000991# splittype('type:opaquestring') --> 'type', 'opaquestring'
992# splithost('//host[:port]/path') --> 'host[:port]', '/path'
Guido van Rossum7c395db1994-07-04 22:14:49 +0000993# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'
994# splitpasswd('user:passwd') -> 'user', 'passwd'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000995# splitport('host:port') --> 'host', 'port'
996# splitquery('/path?query') --> '/path', 'query'
997# splittag('/path#tag') --> '/path', 'tag'
Guido van Rossum7c395db1994-07-04 22:14:49 +0000998# splitattr('/path;attr1=value1;attr2=value2;...') ->
999# '/path', ['attr1=value1', 'attr2=value2', ...]
1000# splitvalue('attr=value') --> 'attr', 'value'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001001# unquote('abc%20def') -> 'abc def'
1002# quote('abc def') -> 'abc%20def')
1003
Walter Dörwald65230a22002-06-03 15:58:32 +00001004try:
1005 unicode
1006except NameError:
Guido van Rossum4b46c0a2002-05-24 17:58:05 +00001007 def _is_unicode(x):
1008 return 0
Walter Dörwald65230a22002-06-03 15:58:32 +00001009else:
1010 def _is_unicode(x):
1011 return isinstance(x, unicode)
Guido van Rossum4b46c0a2002-05-24 17:58:05 +00001012
Martin v. Löwis1d994332000-12-03 18:30:10 +00001013def toBytes(url):
1014 """toBytes(u"URL") --> 'URL'."""
1015 # Most URL schemes require ASCII. If that changes, the conversion
1016 # can be relaxed
Guido van Rossum4b46c0a2002-05-24 17:58:05 +00001017 if _is_unicode(url):
Martin v. Löwis1d994332000-12-03 18:30:10 +00001018 try:
1019 url = url.encode("ASCII")
1020 except UnicodeError:
Guido van Rossumb2493f82000-12-15 15:01:37 +00001021 raise UnicodeError("URL " + repr(url) +
1022 " contains non-ASCII characters")
Martin v. Löwis1d994332000-12-03 18:30:10 +00001023 return url
1024
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001025def unwrap(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001026 """unwrap('<URL:type://host/path>') --> 'type://host/path'."""
Guido van Rossumb2493f82000-12-15 15:01:37 +00001027 url = url.strip()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001028 if url[:1] == '<' and url[-1:] == '>':
Guido van Rossumb2493f82000-12-15 15:01:37 +00001029 url = url[1:-1].strip()
1030 if url[:4] == 'URL:': url = url[4:].strip()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001031 return url
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001032
Guido van Rossum332e1441997-09-29 23:23:46 +00001033_typeprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001034def splittype(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001035 """splittype('type:opaquestring') --> 'type', 'opaquestring'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001036 global _typeprog
1037 if _typeprog is None:
1038 import re
1039 _typeprog = re.compile('^([^/:]+):')
Guido van Rossum332e1441997-09-29 23:23:46 +00001040
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001041 match = _typeprog.match(url)
1042 if match:
1043 scheme = match.group(1)
Fred Drake9e94afd2000-07-01 07:03:30 +00001044 return scheme.lower(), url[len(scheme) + 1:]
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001045 return None, url
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001046
Guido van Rossum332e1441997-09-29 23:23:46 +00001047_hostprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001048def splithost(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001049 """splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001050 global _hostprog
1051 if _hostprog is None:
1052 import re
Georg Brandl1c168d82006-03-26 20:59:38 +00001053 _hostprog = re.compile('^//([^/?]*)(.*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +00001054
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001055 match = _hostprog.match(url)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001056 if match: return match.group(1, 2)
1057 return None, url
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001058
Guido van Rossum332e1441997-09-29 23:23:46 +00001059_userprog = None
Guido van Rossum7c395db1994-07-04 22:14:49 +00001060def splituser(host):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001061 """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001062 global _userprog
1063 if _userprog is None:
1064 import re
Raymond Hettingerf2e45dd2002-08-18 20:08:56 +00001065 _userprog = re.compile('^(.*)@(.*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +00001066
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001067 match = _userprog.match(host)
Fred Drake567ca8e2000-08-21 21:42:42 +00001068 if match: return map(unquote, match.group(1, 2))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001069 return None, host
Guido van Rossum7c395db1994-07-04 22:14:49 +00001070
Guido van Rossum332e1441997-09-29 23:23:46 +00001071_passwdprog = None
Guido van Rossum7c395db1994-07-04 22:14:49 +00001072def splitpasswd(user):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001073 """splitpasswd('user:passwd') -> 'user', 'passwd'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001074 global _passwdprog
1075 if _passwdprog is None:
1076 import re
Senthil Kumaran5e95e762009-03-30 21:51:50 +00001077 _passwdprog = re.compile('^([^:]*):(.*)$',re.S)
Guido van Rossum332e1441997-09-29 23:23:46 +00001078
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001079 match = _passwdprog.match(user)
1080 if match: return match.group(1, 2)
1081 return user, None
Guido van Rossum7c395db1994-07-04 22:14:49 +00001082
Guido van Rossume7b146f2000-02-04 15:28:42 +00001083# splittag('/path#tag') --> '/path', 'tag'
Guido van Rossum332e1441997-09-29 23:23:46 +00001084_portprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001085def splitport(host):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001086 """splitport('host:port') --> 'host', 'port'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001087 global _portprog
1088 if _portprog is None:
1089 import re
1090 _portprog = re.compile('^(.*):([0-9]+)$')
Guido van Rossum332e1441997-09-29 23:23:46 +00001091
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001092 match = _portprog.match(host)
1093 if match: return match.group(1, 2)
1094 return host, None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001095
Guido van Rossum332e1441997-09-29 23:23:46 +00001096_nportprog = None
Guido van Rossum53725a21996-06-13 19:12:35 +00001097def splitnport(host, defport=-1):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001098 """Split host and port, returning numeric port.
1099 Return given default port if no ':' found; defaults to -1.
1100 Return numerical port if a valid number are found after ':'.
1101 Return None if ':' but not a valid number."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001102 global _nportprog
1103 if _nportprog is None:
1104 import re
1105 _nportprog = re.compile('^(.*):(.*)$')
Guido van Rossum7e7ca0b1998-03-26 21:01:39 +00001106
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001107 match = _nportprog.match(host)
1108 if match:
1109 host, port = match.group(1, 2)
1110 try:
Guido van Rossumb2493f82000-12-15 15:01:37 +00001111 if not port: raise ValueError, "no digits"
1112 nport = int(port)
1113 except ValueError:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001114 nport = None
1115 return host, nport
1116 return host, defport
Guido van Rossum53725a21996-06-13 19:12:35 +00001117
Guido van Rossum332e1441997-09-29 23:23:46 +00001118_queryprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001119def splitquery(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001120 """splitquery('/path?query') --> '/path', 'query'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001121 global _queryprog
1122 if _queryprog is None:
1123 import re
1124 _queryprog = re.compile('^(.*)\?([^?]*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +00001125
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001126 match = _queryprog.match(url)
1127 if match: return match.group(1, 2)
1128 return url, None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001129
Guido van Rossum332e1441997-09-29 23:23:46 +00001130_tagprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001131def splittag(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001132 """splittag('/path#tag') --> '/path', 'tag'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001133 global _tagprog
1134 if _tagprog is None:
1135 import re
1136 _tagprog = re.compile('^(.*)#([^#]*)$')
Guido van Rossum7e7ca0b1998-03-26 21:01:39 +00001137
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001138 match = _tagprog.match(url)
1139 if match: return match.group(1, 2)
1140 return url, None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001141
Guido van Rossum7c395db1994-07-04 22:14:49 +00001142def splitattr(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001143 """splitattr('/path;attr1=value1;attr2=value2;...') ->
1144 '/path', ['attr1=value1', 'attr2=value2', ...]."""
Guido van Rossumb2493f82000-12-15 15:01:37 +00001145 words = url.split(';')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001146 return words[0], words[1:]
Guido van Rossum7c395db1994-07-04 22:14:49 +00001147
Guido van Rossum332e1441997-09-29 23:23:46 +00001148_valueprog = None
Guido van Rossum7c395db1994-07-04 22:14:49 +00001149def splitvalue(attr):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001150 """splitvalue('attr=value') --> 'attr', 'value'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001151 global _valueprog
1152 if _valueprog is None:
1153 import re
1154 _valueprog = re.compile('^([^=]*)=(.*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +00001155
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001156 match = _valueprog.match(attr)
1157 if match: return match.group(1, 2)
1158 return attr, None
Guido van Rossum7c395db1994-07-04 22:14:49 +00001159
Raymond Hettinger803ce802005-09-10 06:49:04 +00001160_hextochr = dict(('%02x' % i, chr(i)) for i in range(256))
1161_hextochr.update(('%02X' % i, chr(i)) for i in range(256))
1162
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001163def unquote(s):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001164 """unquote('abc%20def') -> 'abc def'."""
Raymond Hettinger803ce802005-09-10 06:49:04 +00001165 res = s.split('%')
1166 for i in xrange(1, len(res)):
1167 item = res[i]
1168 try:
1169 res[i] = _hextochr[item[:2]] + item[2:]
1170 except KeyError:
1171 res[i] = '%' + item
Raymond Hettinger4b0f20d2005-10-15 16:41:53 +00001172 except UnicodeDecodeError:
1173 res[i] = unichr(int(item[:2], 16)) + item[2:]
Guido van Rossumb2493f82000-12-15 15:01:37 +00001174 return "".join(res)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001175
Guido van Rossum0564e121996-12-13 14:47:36 +00001176def unquote_plus(s):
Skip Montanaro79f1c172000-08-22 03:00:52 +00001177 """unquote('%7e/abc+def') -> '~/abc def'"""
Brett Cannonaaeffaf2004-03-23 23:50:17 +00001178 s = s.replace('+', ' ')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001179 return unquote(s)
Guido van Rossum0564e121996-12-13 14:47:36 +00001180
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001181always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
Jeremy Hylton6102e292000-08-31 15:48:10 +00001182 'abcdefghijklmnopqrstuvwxyz'
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001183 '0123456789' '_.-')
Raymond Hettinger199d2f72005-09-09 22:27:13 +00001184_safemaps = {}
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001185
Guido van Rossum7c395db1994-07-04 22:14:49 +00001186def quote(s, safe = '/'):
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001187 """quote('abc def') -> 'abc%20def'
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001188
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001189 Each part of a URL, e.g. the path info, the query, etc., has a
1190 different set of reserved characters that must be quoted.
1191
1192 RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
1193 the following reserved characters.
1194
1195 reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
1196 "$" | ","
1197
1198 Each of these characters is reserved in some component of a URL,
1199 but not necessarily in all of them.
1200
1201 By default, the quote function is intended for quoting the path
1202 section of a URL. Thus, it will not encode '/'. This character
1203 is reserved, but in typical usage the quote function is being
1204 called on a path where the existing slash characters are used as
1205 reserved characters.
1206 """
Raymond Hettinger199d2f72005-09-09 22:27:13 +00001207 cachekey = (safe, always_safe)
1208 try:
1209 safe_map = _safemaps[cachekey]
1210 except KeyError:
1211 safe += always_safe
1212 safe_map = {}
1213 for i in range(256):
1214 c = chr(i)
1215 safe_map[c] = (c in safe) and c or ('%%%02X' % i)
1216 _safemaps[cachekey] = safe_map
1217 res = map(safe_map.__getitem__, s)
Guido van Rossumb2493f82000-12-15 15:01:37 +00001218 return ''.join(res)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001219
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001220def quote_plus(s, safe = ''):
1221 """Quote the query fragment of a URL; replacing ' ' with '+'"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001222 if ' ' in s:
Raymond Hettingercf6b6322005-09-10 18:17:54 +00001223 s = quote(s, safe + ' ')
1224 return s.replace(' ', '+')
1225 return quote(s, safe)
Guido van Rossum0564e121996-12-13 14:47:36 +00001226
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001227def urlencode(query,doseq=0):
1228 """Encode a sequence of two-element tuples or dictionary into a URL query string.
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001229
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001230 If any values in the query arg are sequences and doseq is true, each
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001231 sequence element is converted to a separate parameter.
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001232
1233 If the query arg is a sequence of two-element tuples, the order of the
1234 parameters in the output will match the order of parameters in the
1235 input.
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001236 """
Tim Peters658cba62001-02-09 20:06:00 +00001237
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001238 if hasattr(query,"items"):
1239 # mapping objects
1240 query = query.items()
1241 else:
1242 # it's a bother at times that strings and string-like objects are
1243 # sequences...
1244 try:
1245 # non-sequence items should not work with len()
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001246 # non-empty strings will fail this
Walter Dörwald65230a22002-06-03 15:58:32 +00001247 if len(query) and not isinstance(query[0], tuple):
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001248 raise TypeError
1249 # zero-length sequences of all types will get here and succeed,
1250 # but that's a minor nit - since the original implementation
1251 # allowed empty dicts that type of behavior probably should be
1252 # preserved for consistency
1253 except TypeError:
1254 ty,va,tb = sys.exc_info()
1255 raise TypeError, "not a valid non-string sequence or mapping object", tb
1256
Guido van Rossume7b146f2000-02-04 15:28:42 +00001257 l = []
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001258 if not doseq:
1259 # preserve old behavior
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001260 for k, v in query:
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001261 k = quote_plus(str(k))
1262 v = quote_plus(str(v))
1263 l.append(k + '=' + v)
1264 else:
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001265 for k, v in query:
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001266 k = quote_plus(str(k))
Walter Dörwald65230a22002-06-03 15:58:32 +00001267 if isinstance(v, str):
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001268 v = quote_plus(v)
1269 l.append(k + '=' + v)
Guido van Rossum4b46c0a2002-05-24 17:58:05 +00001270 elif _is_unicode(v):
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001271 # is there a reasonable way to convert to ASCII?
1272 # encode generates a string, but "replace" or "ignore"
1273 # lose information and "strict" can raise UnicodeError
1274 v = quote_plus(v.encode("ASCII","replace"))
1275 l.append(k + '=' + v)
1276 else:
1277 try:
1278 # is this a sufficient test for sequence-ness?
1279 x = len(v)
1280 except TypeError:
1281 # not a sequence
1282 v = quote_plus(str(v))
1283 l.append(k + '=' + v)
1284 else:
1285 # loop over the sequence
1286 for elt in v:
1287 l.append(k + '=' + quote_plus(str(elt)))
Guido van Rossumb2493f82000-12-15 15:01:37 +00001288 return '&'.join(l)
Guido van Rossum810a3391998-07-22 21:33:23 +00001289
Guido van Rossum442e7201996-03-20 15:33:11 +00001290# Proxy handling
Mark Hammond4f570b92000-07-26 07:04:38 +00001291def getproxies_environment():
1292 """Return a dictionary of scheme -> proxy server URL mappings.
1293
1294 Scan the environment for variables named <scheme>_proxy;
1295 this seems to be the standard convention. If you need a
1296 different way, you can pass a proxies dictionary to the
1297 [Fancy]URLopener constructor.
1298
1299 """
1300 proxies = {}
1301 for name, value in os.environ.items():
Guido van Rossumb2493f82000-12-15 15:01:37 +00001302 name = name.lower()
Mark Hammond4f570b92000-07-26 07:04:38 +00001303 if value and name[-6:] == '_proxy':
1304 proxies[name[:-6]] = value
1305 return proxies
1306
Georg Brandl22350112008-01-20 12:05:43 +00001307def proxy_bypass_environment(host):
1308 """Test if proxies should not be used for a particular host.
1309
1310 Checks the environment for a variable named no_proxy, which should
1311 be a list of DNS suffixes separated by commas, or '*' for all hosts.
1312 """
1313 no_proxy = os.environ.get('no_proxy', '') or os.environ.get('NO_PROXY', '')
1314 # '*' is special case for always bypass
1315 if no_proxy == '*':
1316 return 1
1317 # strip port off host
1318 hostonly, port = splitport(host)
1319 # check if the host ends with any of the DNS suffixes
1320 for name in no_proxy.split(','):
1321 if name and (hostonly.endswith(name) or host.endswith(name)):
1322 return 1
1323 # otherwise, don't bypass
1324 return 0
1325
1326
Jack Jansen11d9b062004-07-16 11:45:00 +00001327if sys.platform == 'darwin':
Ronald Oussoren51f06332009-09-20 10:31:22 +00001328 from _scproxy import _get_proxy_settings, _get_proxies
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001329
1330 def proxy_bypass_macosx_sysconf(host):
1331 """
1332 Return True iff this host shouldn't be accessed using a proxy
1333
1334 This function uses the MacOSX framework SystemConfiguration
1335 to fetch the proxy information.
1336 """
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001337 import re
1338 import socket
1339 from fnmatch import fnmatch
1340
Ronald Oussoren31802d02009-10-18 07:07:00 +00001341 hostonly, port = splitport(host)
1342
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001343 def ip2num(ipAddr):
1344 parts = ipAddr.split('.')
1345 parts = map(int, parts)
1346 if len(parts) != 4:
1347 parts = (parts + [0, 0, 0, 0])[:4]
1348 return (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3]
1349
Ronald Oussoren51f06332009-09-20 10:31:22 +00001350 proxy_settings = _get_proxy_settings()
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001351
Ronald Oussoren51f06332009-09-20 10:31:22 +00001352 # Check for simple host names:
1353 if '.' not in host:
1354 if proxy_settings['exclude_simple']:
1355 return True
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001356
Ronald Oussoren31802d02009-10-18 07:07:00 +00001357 hostIP = None
1358
Ronald Oussoren809073b2009-09-20 10:54:07 +00001359 for value in proxy_settings.get('exceptions', ()):
Ronald Oussoren51f06332009-09-20 10:31:22 +00001360 # Items in the list are strings like these: *.local, 169.254/16
Ronald Oussoren51f06332009-09-20 10:31:22 +00001361 if not value: continue
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001362
Ronald Oussoren51f06332009-09-20 10:31:22 +00001363 m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value)
1364 if m is not None:
1365 if hostIP is None:
Ronald Oussoren31802d02009-10-18 07:07:00 +00001366 try:
1367 hostIP = socket.gethostbyname(hostonly)
1368 hostIP = ip2num(hostIP)
1369 except socket.error:
1370 continue
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001371
Ronald Oussoren51f06332009-09-20 10:31:22 +00001372 base = ip2num(m.group(1))
1373 mask = int(m.group(2)[1:])
1374 mask = 32 - mask
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001375
Ronald Oussoren51f06332009-09-20 10:31:22 +00001376 if (hostIP >> mask) == (base >> mask):
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001377 return True
1378
Ronald Oussoren51f06332009-09-20 10:31:22 +00001379 elif fnmatch(host, value):
1380 return True
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001381
Ronald Oussoren51f06332009-09-20 10:31:22 +00001382 return False
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001383
1384
1385 def getproxies_macosx_sysconf():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001386 """Return a dictionary of scheme -> proxy server URL mappings.
Guido van Rossum442e7201996-03-20 15:33:11 +00001387
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001388 This function uses the MacOSX framework SystemConfiguration
1389 to fetch the proxy information.
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001390 """
Ronald Oussoren51f06332009-09-20 10:31:22 +00001391 return _get_proxies()
Mark Hammond4f570b92000-07-26 07:04:38 +00001392
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001393
1394
Georg Brandl22350112008-01-20 12:05:43 +00001395 def proxy_bypass(host):
1396 if getproxies_environment():
1397 return proxy_bypass_environment(host)
1398 else:
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001399 return proxy_bypass_macosx_sysconf(host)
Tim Peters55c12d42001-08-09 18:04:14 +00001400
Jack Jansen11d9b062004-07-16 11:45:00 +00001401 def getproxies():
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001402 return getproxies_environment() or getproxies_macosx_sysconf()
Tim Peters182b5ac2004-07-18 06:16:08 +00001403
Mark Hammond4f570b92000-07-26 07:04:38 +00001404elif os.name == 'nt':
1405 def getproxies_registry():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001406 """Return a dictionary of scheme -> proxy server URL mappings.
Mark Hammond4f570b92000-07-26 07:04:38 +00001407
1408 Win32 uses the registry to store proxies.
1409
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001410 """
1411 proxies = {}
Mark Hammond4f570b92000-07-26 07:04:38 +00001412 try:
1413 import _winreg
1414 except ImportError:
1415 # Std module, so should be around - but you never know!
1416 return proxies
1417 try:
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001418 internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1419 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
Mark Hammond4f570b92000-07-26 07:04:38 +00001420 proxyEnable = _winreg.QueryValueEx(internetSettings,
1421 'ProxyEnable')[0]
1422 if proxyEnable:
1423 # Returned as Unicode but problems if not converted to ASCII
1424 proxyServer = str(_winreg.QueryValueEx(internetSettings,
1425 'ProxyServer')[0])
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001426 if '=' in proxyServer:
1427 # Per-protocol settings
Mark Hammond4f570b92000-07-26 07:04:38 +00001428 for p in proxyServer.split(';'):
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001429 protocol, address = p.split('=', 1)
Guido van Rossumb955d6c2002-03-31 23:38:48 +00001430 # See if address has a type:// prefix
Guido van Rossum64e5aa92002-04-02 14:38:16 +00001431 import re
1432 if not re.match('^([^/:]+)://', address):
Guido van Rossumb955d6c2002-03-31 23:38:48 +00001433 address = '%s://%s' % (protocol, address)
1434 proxies[protocol] = address
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001435 else:
1436 # Use one setting for all protocols
1437 if proxyServer[:5] == 'http:':
1438 proxies['http'] = proxyServer
1439 else:
1440 proxies['http'] = 'http://%s' % proxyServer
1441 proxies['ftp'] = 'ftp://%s' % proxyServer
Mark Hammond4f570b92000-07-26 07:04:38 +00001442 internetSettings.Close()
1443 except (WindowsError, ValueError, TypeError):
1444 # Either registry key not found etc, or the value in an
1445 # unexpected format.
1446 # proxies already set up to be empty so nothing to do
1447 pass
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001448 return proxies
Guido van Rossum442e7201996-03-20 15:33:11 +00001449
Mark Hammond4f570b92000-07-26 07:04:38 +00001450 def getproxies():
1451 """Return a dictionary of scheme -> proxy server URL mappings.
1452
1453 Returns settings gathered from the environment, if specified,
1454 or the registry.
1455
1456 """
1457 return getproxies_environment() or getproxies_registry()
Tim Peters55c12d42001-08-09 18:04:14 +00001458
Georg Brandl22350112008-01-20 12:05:43 +00001459 def proxy_bypass_registry(host):
Tim Peters55c12d42001-08-09 18:04:14 +00001460 try:
1461 import _winreg
1462 import re
Tim Peters55c12d42001-08-09 18:04:14 +00001463 except ImportError:
1464 # Std modules, so should be around - but you never know!
1465 return 0
1466 try:
1467 internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1468 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1469 proxyEnable = _winreg.QueryValueEx(internetSettings,
1470 'ProxyEnable')[0]
1471 proxyOverride = str(_winreg.QueryValueEx(internetSettings,
1472 'ProxyOverride')[0])
1473 # ^^^^ Returned as Unicode but problems if not converted to ASCII
1474 except WindowsError:
1475 return 0
1476 if not proxyEnable or not proxyOverride:
1477 return 0
1478 # try to make a host list from name and IP address.
Georg Brandl1f636702006-02-18 23:10:23 +00001479 rawHost, port = splitport(host)
1480 host = [rawHost]
Tim Peters55c12d42001-08-09 18:04:14 +00001481 try:
Georg Brandl1f636702006-02-18 23:10:23 +00001482 addr = socket.gethostbyname(rawHost)
1483 if addr != rawHost:
Tim Peters55c12d42001-08-09 18:04:14 +00001484 host.append(addr)
1485 except socket.error:
1486 pass
Georg Brandl1f636702006-02-18 23:10:23 +00001487 try:
1488 fqdn = socket.getfqdn(rawHost)
1489 if fqdn != rawHost:
1490 host.append(fqdn)
1491 except socket.error:
1492 pass
Tim Peters55c12d42001-08-09 18:04:14 +00001493 # make a check value list from the registry entry: replace the
1494 # '<local>' string by the localhost entry and the corresponding
1495 # canonical entry.
1496 proxyOverride = proxyOverride.split(';')
Tim Peters55c12d42001-08-09 18:04:14 +00001497 # now check if we match one of the registry values.
1498 for test in proxyOverride:
Senthil Kumaran4af40d22009-05-01 05:59:52 +00001499 if test == '<local>':
1500 if '.' not in rawHost:
1501 return 1
Tim Petersab9ba272001-08-09 21:40:30 +00001502 test = test.replace(".", r"\.") # mask dots
1503 test = test.replace("*", r".*") # change glob sequence
1504 test = test.replace("?", r".") # change glob char
Tim Peters55c12d42001-08-09 18:04:14 +00001505 for val in host:
1506 # print "%s <--> %s" %( test, val )
1507 if re.match(test, val, re.I):
1508 return 1
1509 return 0
1510
Georg Brandl22350112008-01-20 12:05:43 +00001511 def proxy_bypass(host):
1512 """Return a dictionary of scheme -> proxy server URL mappings.
1513
1514 Returns settings gathered from the environment, if specified,
1515 or the registry.
1516
1517 """
1518 if getproxies_environment():
1519 return proxy_bypass_environment(host)
1520 else:
1521 return proxy_bypass_registry(host)
1522
Mark Hammond4f570b92000-07-26 07:04:38 +00001523else:
1524 # By default use environment variables
1525 getproxies = getproxies_environment
Georg Brandl22350112008-01-20 12:05:43 +00001526 proxy_bypass = proxy_bypass_environment
Guido van Rossum442e7201996-03-20 15:33:11 +00001527
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001528# Test and time quote() and unquote()
1529def test1():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001530 s = ''
1531 for i in range(256): s = s + chr(i)
1532 s = s*4
1533 t0 = time.time()
1534 qs = quote(s)
1535 uqs = unquote(qs)
1536 t1 = time.time()
1537 if uqs != s:
1538 print 'Wrong!'
Walter Dörwald70a6b492004-02-12 17:35:32 +00001539 print repr(s)
1540 print repr(qs)
1541 print repr(uqs)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001542 print round(t1 - t0, 3), 'sec'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001543
1544
Guido van Rossum9ab96d41998-09-28 14:07:00 +00001545def reporthook(blocknum, blocksize, totalsize):
1546 # Report during remote transfers
Guido van Rossumb2493f82000-12-15 15:01:37 +00001547 print "Block number: %d, Block size: %d, Total size: %d" % (
1548 blocknum, blocksize, totalsize)
Guido van Rossum9ab96d41998-09-28 14:07:00 +00001549
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001550# Test program
Guido van Rossum23490151998-06-25 02:39:00 +00001551def test(args=[]):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001552 if not args:
1553 args = [
1554 '/etc/passwd',
1555 'file:/etc/passwd',
1556 'file://localhost/etc/passwd',
Collin Winter071d1ae2007-03-12 01:55:54 +00001557 'ftp://ftp.gnu.org/pub/README',
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001558 'http://www.python.org/index.html',
1559 ]
Guido van Rossum09c8b6c1999-12-07 21:37:17 +00001560 if hasattr(URLopener, "open_https"):
1561 args.append('https://synergy.as.cmu.edu/~geek/')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001562 try:
1563 for url in args:
1564 print '-'*10, url, '-'*10
1565 fn, h = urlretrieve(url, None, reporthook)
Guido van Rossumb2493f82000-12-15 15:01:37 +00001566 print fn
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001567 if h:
1568 print '======'
1569 for k in h.keys(): print k + ':', h[k]
1570 print '======'
Philip Jenvey0299d0d2009-12-03 02:40:13 +00001571 with open(fn, 'rb') as fp:
1572 data = fp.read()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001573 if '\r' in data:
1574 table = string.maketrans("", "")
Guido van Rossumb2493f82000-12-15 15:01:37 +00001575 data = data.translate(table, "\r")
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001576 print data
1577 fn, h = None, None
1578 print '-'*40
1579 finally:
1580 urlcleanup()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001581
Guido van Rossum23490151998-06-25 02:39:00 +00001582def main():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001583 import getopt, sys
1584 try:
1585 opts, args = getopt.getopt(sys.argv[1:], "th")
1586 except getopt.error, msg:
1587 print msg
1588 print "Use -h for help"
1589 return
1590 t = 0
1591 for o, a in opts:
1592 if o == '-t':
1593 t = t + 1
1594 if o == '-h':
1595 print "Usage: python urllib.py [-t] [url ...]"
1596 print "-t runs self-test;",
1597 print "otherwise, contents of urls are printed"
1598 return
1599 if t:
1600 if t > 1:
1601 test1()
1602 test(args)
1603 else:
1604 if not args:
1605 print "Use -h for help"
1606 for url in args:
1607 print urlopen(url).read(),
Guido van Rossum23490151998-06-25 02:39:00 +00001608
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001609# Run test program when run as a script
1610if __name__ == '__main__':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001611 main()