blob: ac5d797e33a0a8bce806a58217819a282352359b [file] [log] [blame]
Guido van Rossume7b146f2000-02-04 15:28:42 +00001"""Open an arbitrary URL.
2
3See the following document for more info on URLs:
4"Names and Addresses, URIs, URLs, URNs, URCs", at
5http://www.w3.org/pub/WWW/Addressing/Overview.html
6
7See also the HTTP spec (from which the error codes are derived):
8"HTTP - Hypertext Transfer Protocol", at
9http://www.w3.org/pub/WWW/Protocols/
10
11Related standards and specs:
12- RFC1808: the "relative URL" spec. (authoritative status)
13- RFC1738 - the "URL standard". (authoritative status)
14- RFC1630 - the "URI spec". (informational status)
15
16The object returned by URLopener().open(file) will differ per
17protocol. All you know is that is has methods read(), readline(),
18readlines(), fileno(), close() and info(). The read*(), fileno()
Fredrik Lundhb49f88b2000-09-24 18:51:25 +000019and close() methods work like those of open files.
Guido van Rossume7b146f2000-02-04 15:28:42 +000020The info() method returns a mimetools.Message object which can be
21used to query various info about the object, if available.
22(mimetools.Message objects are queried with the getheader() method.)
23"""
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000024
Guido van Rossum7c395db1994-07-04 22:14:49 +000025import string
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000026import socket
Jack Jansendc3e3f61995-12-15 13:22:13 +000027import os
Guido van Rossumf0713d32001-08-09 17:43:35 +000028import time
Guido van Rossum3c8484e1996-11-20 22:02:24 +000029import sys
Senthil Kumaranbcd833f2012-01-11 00:09:24 +080030import base64
Serhiy Storchaka923baea2013-03-14 21:31:09 +020031import re
Senthil Kumaranbcd833f2012-01-11 00:09:24 +080032
Brett Cannon69200fa2004-03-23 21:26:39 +000033from urlparse import urljoin as basejoin
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000034
Skip Montanaro40fc1602001-03-01 04:27:19 +000035__all__ = ["urlopen", "URLopener", "FancyURLopener", "urlretrieve",
36 "urlcleanup", "quote", "quote_plus", "unquote", "unquote_plus",
Skip Montanaro44d5e0c2001-03-13 19:47:16 +000037 "urlencode", "url2pathname", "pathname2url", "splittag",
38 "localhost", "thishost", "ftperrors", "basejoin", "unwrap",
39 "splittype", "splithost", "splituser", "splitpasswd", "splitport",
40 "splitnport", "splitquery", "splitattr", "splitvalue",
Brett Cannond75f0432007-05-16 22:42:29 +000041 "getproxies"]
Skip Montanaro40fc1602001-03-01 04:27:19 +000042
Martin v. Löwis3e865952006-01-24 15:51:21 +000043__version__ = '1.17' # XXX This version is not always updated :-(
Guido van Rossumf668d171997-06-06 21:11:11 +000044
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000045MAXFTPCACHE = 10 # Trim the ftp cache beyond this size
Guido van Rossum6cb15a01995-06-22 19:00:13 +000046
Jack Jansendc3e3f61995-12-15 13:22:13 +000047# Helper for non-unix systems
Ronald Oussoren9545a232010-05-05 19:09:31 +000048if os.name == 'nt':
Fredrik Lundhb49f88b2000-09-24 18:51:25 +000049 from nturl2path import url2pathname, pathname2url
Guido van Rossumd74fb6b2001-03-02 06:43:49 +000050elif os.name == 'riscos':
51 from rourl2path import url2pathname, pathname2url
Jack Jansendc3e3f61995-12-15 13:22:13 +000052else:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000053 def url2pathname(pathname):
Georg Brandlc0b24732005-12-26 22:53:56 +000054 """OS-specific conversion from a relative URL of the 'file' scheme
55 to a file system path; not recommended for general use."""
Guido van Rossum367ac801999-03-12 14:31:10 +000056 return unquote(pathname)
Georg Brandlc0b24732005-12-26 22:53:56 +000057
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000058 def pathname2url(pathname):
Georg Brandlc0b24732005-12-26 22:53:56 +000059 """OS-specific conversion from a file system path to a relative URL
60 of the 'file' scheme; not recommended for general use."""
Guido van Rossum367ac801999-03-12 14:31:10 +000061 return quote(pathname)
Guido van Rossum33add0a1998-12-18 15:25:22 +000062
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000063# This really consists of two pieces:
64# (1) a class which handles opening of all sorts of URLs
65# (plus assorted utilities etc.)
66# (2) a set of functions for parsing URLs
67# XXX Should these be separated out into different modules?
68
69
70# Shortcut for basic usage
71_urlopener = None
Fred Drakedf6eca72002-04-04 20:41:34 +000072def urlopen(url, data=None, proxies=None):
Brett Cannon8bb8fa52008-07-02 01:57:08 +000073 """Create a file-like object for the specified URL to read from."""
74 from warnings import warnpy3k
Georg Brandl48e65f52010-02-06 22:44:17 +000075 warnpy3k("urllib.urlopen() has been removed in Python 3.0 in "
76 "favor of urllib2.urlopen()", stacklevel=2)
Brett Cannon8bb8fa52008-07-02 01:57:08 +000077
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000078 global _urlopener
Fred Drakedf6eca72002-04-04 20:41:34 +000079 if proxies is not None:
80 opener = FancyURLopener(proxies=proxies)
81 elif not _urlopener:
82 opener = FancyURLopener()
83 _urlopener = opener
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000084 else:
Fred Drakedf6eca72002-04-04 20:41:34 +000085 opener = _urlopener
86 if data is None:
87 return opener.open(url)
88 else:
89 return opener.open(url, data)
Fred Drake316a7932000-08-24 01:01:26 +000090def urlretrieve(url, filename=None, reporthook=None, data=None):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000091 global _urlopener
92 if not _urlopener:
93 _urlopener = FancyURLopener()
Fred Drake316a7932000-08-24 01:01:26 +000094 return _urlopener.retrieve(url, filename, reporthook, data)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000095def urlcleanup():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000096 if _urlopener:
97 _urlopener.cleanup()
Florent Xiclunaaf87f9f2010-05-17 13:35:09 +000098 _safe_quoters.clear()
Antoine Pitrouca173e22009-12-08 19:35:12 +000099 ftpcache.clear()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000100
Bill Janssen426ea0a2007-08-29 22:35:05 +0000101# check for SSL
102try:
103 import ssl
104except:
105 _have_ssl = False
106else:
107 _have_ssl = True
108
Georg Brandlb9256022005-08-24 18:46:39 +0000109# exception raised when downloaded size does not match content-length
110class ContentTooShortError(IOError):
111 def __init__(self, message, content):
112 IOError.__init__(self, message)
113 self.content = content
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000114
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000115ftpcache = {}
116class URLopener:
Guido van Rossume7b146f2000-02-04 15:28:42 +0000117 """Class to open URLs.
118 This is a class rather than just a subroutine because we may need
119 more than one set of global protocol-specific options.
120 Note -- this is a base class for those who don't want the
121 automatic handling of errors type 302 (relocated) and 401
122 (authorization needed)."""
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000123
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000124 __tempfiles = None
Guido van Rossum29e77811996-11-27 19:39:58 +0000125
Guido van Rossumba311382000-08-24 16:18:04 +0000126 version = "Python-urllib/%s" % __version__
127
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000128 # Constructor
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000129 def __init__(self, proxies=None, **x509):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000130 if proxies is None:
131 proxies = getproxies()
132 assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
133 self.proxies = proxies
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000134 self.key_file = x509.get('key_file')
135 self.cert_file = x509.get('cert_file')
Georg Brandl0619a322006-07-26 07:40:17 +0000136 self.addheaders = [('User-Agent', self.version)]
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000137 self.__tempfiles = []
138 self.__unlink = os.unlink # See cleanup()
139 self.tempcache = None
140 # Undocumented feature: if you assign {} to tempcache,
141 # it is used to cache files retrieved with
142 # self.retrieve(). This is not enabled by default
143 # since it does not work for changing documents (and I
144 # haven't got the logic to check expiration headers
145 # yet).
146 self.ftpcache = ftpcache
147 # Undocumented feature: you can use a different
148 # ftp cache by assigning to the .ftpcache member;
149 # in case you want logically independent URL openers
150 # XXX This is not threadsafe. Bah.
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000151
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000152 def __del__(self):
153 self.close()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000154
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000155 def close(self):
156 self.cleanup()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000157
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000158 def cleanup(self):
159 # This code sometimes runs when the rest of this module
160 # has already been deleted, so it can't use any globals
161 # or import anything.
162 if self.__tempfiles:
163 for file in self.__tempfiles:
164 try:
165 self.__unlink(file)
Martin v. Löwis58682b72001-08-11 15:02:57 +0000166 except OSError:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000167 pass
168 del self.__tempfiles[:]
169 if self.tempcache:
170 self.tempcache.clear()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000171
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000172 def addheader(self, *args):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000173 """Add a header to be used by the HTTP interface only
174 e.g. u.addheader('Accept', 'sound/basic')"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000175 self.addheaders.append(args)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000176
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000177 # External interface
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000178 def open(self, fullurl, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000179 """Use URLopener().open(file) instead of open(file, 'r')."""
Martin v. Löwis1d994332000-12-03 18:30:10 +0000180 fullurl = unwrap(toBytes(fullurl))
Senthil Kumaran7c2867f2009-04-21 03:24:19 +0000181 # percent encode url, fixing lame server errors for e.g, like space
182 # within url paths.
Senthil Kumaran18d5a692010-02-20 22:05:34 +0000183 fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]|")
Raymond Hettinger54f02222002-06-01 14:18:47 +0000184 if self.tempcache and fullurl in self.tempcache:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000185 filename, headers = self.tempcache[fullurl]
186 fp = open(filename, 'rb')
187 return addinfourl(fp, headers, fullurl)
Martin v. Löwis1d994332000-12-03 18:30:10 +0000188 urltype, url = splittype(fullurl)
189 if not urltype:
190 urltype = 'file'
Raymond Hettinger54f02222002-06-01 14:18:47 +0000191 if urltype in self.proxies:
Martin v. Löwis1d994332000-12-03 18:30:10 +0000192 proxy = self.proxies[urltype]
193 urltype, proxyhost = splittype(proxy)
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000194 host, selector = splithost(proxyhost)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000195 url = (host, fullurl) # Signal special case to open_*()
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000196 else:
197 proxy = None
Martin v. Löwis1d994332000-12-03 18:30:10 +0000198 name = 'open_' + urltype
199 self.type = urltype
Brett Cannonaaeffaf2004-03-23 23:50:17 +0000200 name = name.replace('-', '_')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000201 if not hasattr(self, name):
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000202 if proxy:
203 return self.open_unknown_proxy(proxy, fullurl, data)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000204 else:
205 return self.open_unknown(fullurl, data)
206 try:
207 if data is None:
208 return getattr(self, name)(url)
209 else:
210 return getattr(self, name)(url, data)
211 except socket.error, msg:
212 raise IOError, ('socket error', msg), sys.exc_info()[2]
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000213
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000214 def open_unknown(self, fullurl, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000215 """Overridable interface to open unknown URL type."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000216 type, url = splittype(fullurl)
217 raise IOError, ('url error', 'unknown url type', type)
Guido van Rossumca445401995-08-29 19:19:12 +0000218
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000219 def open_unknown_proxy(self, proxy, fullurl, data=None):
220 """Overridable interface to open unknown URL type."""
221 type, url = splittype(fullurl)
222 raise IOError, ('url error', 'invalid proxy for %s' % type, proxy)
223
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000224 # External interface
Sjoerd Mullenderd7b86f02000-08-25 11:23:36 +0000225 def retrieve(self, url, filename=None, reporthook=None, data=None):
Brett Cannon7d618c72003-04-24 02:43:20 +0000226 """retrieve(url) returns (filename, headers) for a local object
Guido van Rossume7b146f2000-02-04 15:28:42 +0000227 or (tempfilename, headers) for a remote object."""
Martin v. Löwis1d994332000-12-03 18:30:10 +0000228 url = unwrap(toBytes(url))
Raymond Hettinger54f02222002-06-01 14:18:47 +0000229 if self.tempcache and url in self.tempcache:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000230 return self.tempcache[url]
231 type, url1 = splittype(url)
Raymond Hettinger10ff7062002-06-02 03:04:52 +0000232 if filename is None and (not type or type == 'file'):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000233 try:
234 fp = self.open_local_file(url1)
235 hdrs = fp.info()
Philip Jenvey0299d0d2009-12-03 02:40:13 +0000236 fp.close()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000237 return url2pathname(splithost(url1)[1]), hdrs
Georg Brandl84fedf72010-02-06 22:59:15 +0000238 except IOError:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000239 pass
Fred Drake316a7932000-08-24 01:01:26 +0000240 fp = self.open(url, data)
Benjamin Petersonb364bfe2009-03-22 17:45:11 +0000241 try:
242 headers = fp.info()
243 if filename:
244 tfp = open(filename, 'wb')
245 else:
246 import tempfile
247 garbage, path = splittype(url)
248 garbage, path = splithost(path or "")
249 path, garbage = splitquery(path or "")
250 path, garbage = splitattr(path or "")
251 suffix = os.path.splitext(path)[1]
252 (fd, filename) = tempfile.mkstemp(suffix)
253 self.__tempfiles.append(filename)
254 tfp = os.fdopen(fd, 'wb')
255 try:
256 result = filename, headers
257 if self.tempcache is not None:
258 self.tempcache[url] = result
259 bs = 1024*8
260 size = -1
261 read = 0
262 blocknum = 0
Senthil Kumaran87e58552011-11-01 02:44:45 +0800263 if "content-length" in headers:
264 size = int(headers["Content-Length"])
Benjamin Petersonb364bfe2009-03-22 17:45:11 +0000265 if reporthook:
Benjamin Petersonb364bfe2009-03-22 17:45:11 +0000266 reporthook(blocknum, bs, size)
267 while 1:
268 block = fp.read(bs)
269 if block == "":
270 break
271 read += len(block)
272 tfp.write(block)
273 blocknum += 1
274 if reporthook:
275 reporthook(blocknum, bs, size)
276 finally:
277 tfp.close()
278 finally:
279 fp.close()
Georg Brandlb9256022005-08-24 18:46:39 +0000280
281 # raise exception if actual size does not match content-length header
282 if size >= 0 and read < size:
283 raise ContentTooShortError("retrieval incomplete: got only %i out "
284 "of %i bytes" % (read, size), result)
285
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000286 return result
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000287
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000288 # Each method named open_<type> knows how to open that type of URL
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000289
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000290 def open_http(self, url, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000291 """Use HTTP protocol."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000292 import httplib
293 user_passwd = None
Martin v. Löwis3e865952006-01-24 15:51:21 +0000294 proxy_passwd= None
Walter Dörwald65230a22002-06-03 15:58:32 +0000295 if isinstance(url, str):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000296 host, selector = splithost(url)
297 if host:
298 user_passwd, host = splituser(host)
299 host = unquote(host)
300 realhost = host
301 else:
302 host, selector = url
Martin v. Löwis3e865952006-01-24 15:51:21 +0000303 # check whether the proxy contains authorization information
304 proxy_passwd, host = splituser(host)
305 # now we proceed with the url we want to obtain
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000306 urltype, rest = splittype(selector)
307 url = rest
308 user_passwd = None
Guido van Rossumb2493f82000-12-15 15:01:37 +0000309 if urltype.lower() != 'http':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000310 realhost = None
311 else:
312 realhost, rest = splithost(rest)
313 if realhost:
314 user_passwd, realhost = splituser(realhost)
315 if user_passwd:
316 selector = "%s://%s%s" % (urltype, realhost, rest)
Tim Peters55c12d42001-08-09 18:04:14 +0000317 if proxy_bypass(realhost):
318 host = realhost
319
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000320 #print "proxy via http:", host, selector
321 if not host: raise IOError, ('http error', 'no host given')
Tim Peters92037a12006-01-24 22:44:08 +0000322
Martin v. Löwis3e865952006-01-24 15:51:21 +0000323 if proxy_passwd:
Senthil Kumaranbcd833f2012-01-11 00:09:24 +0800324 proxy_passwd = unquote(proxy_passwd)
Andrew M. Kuchling872dba42006-10-27 17:11:23 +0000325 proxy_auth = base64.b64encode(proxy_passwd).strip()
Martin v. Löwis3e865952006-01-24 15:51:21 +0000326 else:
327 proxy_auth = None
328
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000329 if user_passwd:
Senthil Kumaranbcd833f2012-01-11 00:09:24 +0800330 user_passwd = unquote(user_passwd)
Andrew M. Kuchling872dba42006-10-27 17:11:23 +0000331 auth = base64.b64encode(user_passwd).strip()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000332 else:
333 auth = None
334 h = httplib.HTTP(host)
335 if data is not None:
336 h.putrequest('POST', selector)
Georg Brandl0619a322006-07-26 07:40:17 +0000337 h.putheader('Content-Type', 'application/x-www-form-urlencoded')
338 h.putheader('Content-Length', '%d' % len(data))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000339 else:
340 h.putrequest('GET', selector)
Martin v. Löwis3e865952006-01-24 15:51:21 +0000341 if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000342 if auth: h.putheader('Authorization', 'Basic %s' % auth)
343 if realhost: h.putheader('Host', realhost)
Guido van Rossum68468eb2003-02-27 20:14:51 +0000344 for args in self.addheaders: h.putheader(*args)
Kristján Valur Jónsson84040db2009-01-09 20:27:16 +0000345 h.endheaders(data)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000346 errcode, errmsg, headers = h.getreply()
Neal Norwitzce55e212007-03-20 08:14:57 +0000347 fp = h.getfile()
Georg Brandlf66b6032007-03-14 08:27:52 +0000348 if errcode == -1:
Neal Norwitzce55e212007-03-20 08:14:57 +0000349 if fp: fp.close()
Georg Brandlf66b6032007-03-14 08:27:52 +0000350 # something went wrong with the HTTP status line
351 raise IOError, ('http protocol error', 0,
352 'got a bad status line', None)
Sean Reifscheidera1afbf62007-09-19 07:52:56 +0000353 # According to RFC 2616, "2xx" code indicates that the client's
354 # request was successfully received, understood, and accepted.
Kurt B. Kaiser0f7c25d2008-01-02 04:11:28 +0000355 if (200 <= errcode < 300):
Georg Brandl9b0d46d2008-01-20 11:43:03 +0000356 return addinfourl(fp, headers, "http:" + url, errcode)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000357 else:
358 if data is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000359 return self.http_error(url, fp, errcode, errmsg, headers)
Guido van Rossum29aab751999-03-09 19:31:21 +0000360 else:
361 return self.http_error(url, fp, errcode, errmsg, headers, data)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000362
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000363 def http_error(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000364 """Handle http errors.
365 Derived class can override this, or provide specific handlers
366 named http_error_DDD where DDD is the 3-digit error code."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000367 # First check if there's a specific handler for this error
368 name = 'http_error_%d' % errcode
369 if hasattr(self, name):
370 method = getattr(self, name)
371 if data is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000372 result = method(url, fp, errcode, errmsg, headers)
Jeremy Hyltonb30f52a1999-02-25 16:14:58 +0000373 else:
374 result = method(url, fp, errcode, errmsg, headers, data)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000375 if result: return result
Jeremy Hyltonb30f52a1999-02-25 16:14:58 +0000376 return self.http_error_default(url, fp, errcode, errmsg, headers)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000377
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000378 def http_error_default(self, url, fp, errcode, errmsg, headers):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000379 """Default error handler: close the connection and raise IOError."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000380 fp.close()
381 raise IOError, ('http error', errcode, errmsg, headers)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000382
Bill Janssen426ea0a2007-08-29 22:35:05 +0000383 if _have_ssl:
Andrew M. Kuchling141e9892000-04-23 02:53:11 +0000384 def open_https(self, url, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000385 """Use HTTPS protocol."""
Bill Janssen426ea0a2007-08-29 22:35:05 +0000386
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000387 import httplib
Fred Drake567ca8e2000-08-21 21:42:42 +0000388 user_passwd = None
Martin v. Löwis3e865952006-01-24 15:51:21 +0000389 proxy_passwd = None
Walter Dörwald65230a22002-06-03 15:58:32 +0000390 if isinstance(url, str):
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000391 host, selector = splithost(url)
Fred Drake567ca8e2000-08-21 21:42:42 +0000392 if host:
393 user_passwd, host = splituser(host)
394 host = unquote(host)
395 realhost = host
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000396 else:
397 host, selector = url
Martin v. Löwis3e865952006-01-24 15:51:21 +0000398 # here, we determine, whether the proxy contains authorization information
399 proxy_passwd, host = splituser(host)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000400 urltype, rest = splittype(selector)
Fred Drake567ca8e2000-08-21 21:42:42 +0000401 url = rest
402 user_passwd = None
Guido van Rossumb2493f82000-12-15 15:01:37 +0000403 if urltype.lower() != 'https':
Fred Drake567ca8e2000-08-21 21:42:42 +0000404 realhost = None
405 else:
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000406 realhost, rest = splithost(rest)
Fred Drake567ca8e2000-08-21 21:42:42 +0000407 if realhost:
408 user_passwd, realhost = splituser(realhost)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000409 if user_passwd:
410 selector = "%s://%s%s" % (urltype, realhost, rest)
Andrew M. Kuchling7ad47922000-06-10 01:41:48 +0000411 #print "proxy via https:", host, selector
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000412 if not host: raise IOError, ('https error', 'no host given')
Martin v. Löwis3e865952006-01-24 15:51:21 +0000413 if proxy_passwd:
Senthil Kumaranbcd833f2012-01-11 00:09:24 +0800414 proxy_passwd = unquote(proxy_passwd)
Andrew M. Kuchling872dba42006-10-27 17:11:23 +0000415 proxy_auth = base64.b64encode(proxy_passwd).strip()
Martin v. Löwis3e865952006-01-24 15:51:21 +0000416 else:
417 proxy_auth = None
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000418 if user_passwd:
Senthil Kumaranbcd833f2012-01-11 00:09:24 +0800419 user_passwd = unquote(user_passwd)
Andrew M. Kuchling872dba42006-10-27 17:11:23 +0000420 auth = base64.b64encode(user_passwd).strip()
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000421 else:
422 auth = None
423 h = httplib.HTTPS(host, 0,
424 key_file=self.key_file,
425 cert_file=self.cert_file)
Andrew M. Kuchling141e9892000-04-23 02:53:11 +0000426 if data is not None:
427 h.putrequest('POST', selector)
Georg Brandl0619a322006-07-26 07:40:17 +0000428 h.putheader('Content-Type',
Andrew M. Kuchling141e9892000-04-23 02:53:11 +0000429 'application/x-www-form-urlencoded')
Georg Brandl0619a322006-07-26 07:40:17 +0000430 h.putheader('Content-Length', '%d' % len(data))
Andrew M. Kuchling141e9892000-04-23 02:53:11 +0000431 else:
432 h.putrequest('GET', selector)
Andrew M. Kuchling52278572006-12-19 15:11:41 +0000433 if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
434 if auth: h.putheader('Authorization', 'Basic %s' % auth)
Fred Drake567ca8e2000-08-21 21:42:42 +0000435 if realhost: h.putheader('Host', realhost)
Guido van Rossum68468eb2003-02-27 20:14:51 +0000436 for args in self.addheaders: h.putheader(*args)
Kristján Valur Jónsson84040db2009-01-09 20:27:16 +0000437 h.endheaders(data)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000438 errcode, errmsg, headers = h.getreply()
Neal Norwitzce55e212007-03-20 08:14:57 +0000439 fp = h.getfile()
Georg Brandlf66b6032007-03-14 08:27:52 +0000440 if errcode == -1:
Neal Norwitzce55e212007-03-20 08:14:57 +0000441 if fp: fp.close()
Georg Brandlf66b6032007-03-14 08:27:52 +0000442 # something went wrong with the HTTP status line
443 raise IOError, ('http protocol error', 0,
444 'got a bad status line', None)
Georg Brandl9b915672007-09-24 18:08:24 +0000445 # According to RFC 2616, "2xx" code indicates that the client's
446 # request was successfully received, understood, and accepted.
Kurt B. Kaiser0f7c25d2008-01-02 04:11:28 +0000447 if (200 <= errcode < 300):
Georg Brandl9b0d46d2008-01-20 11:43:03 +0000448 return addinfourl(fp, headers, "https:" + url, errcode)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000449 else:
Fred Drake567ca8e2000-08-21 21:42:42 +0000450 if data is None:
451 return self.http_error(url, fp, errcode, errmsg, headers)
452 else:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000453 return self.http_error(url, fp, errcode, errmsg, headers,
454 data)
Fred Drake567ca8e2000-08-21 21:42:42 +0000455
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000456 def open_file(self, url):
Neal Norwitzc5d0dbd2006-04-09 04:00:49 +0000457 """Use local file or FTP depending on form of URL."""
Martin v. Löwis3e865952006-01-24 15:51:21 +0000458 if not isinstance(url, str):
459 raise IOError, ('file error', 'proxy support for file protocol currently not implemented')
Jack Jansen4ef11032002-09-12 20:14:04 +0000460 if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000461 return self.open_ftp(url)
462 else:
463 return self.open_local_file(url)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000464
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000465 def open_local_file(self, url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000466 """Use local file."""
Georg Brandl5a096e12007-01-22 19:40:21 +0000467 import mimetypes, mimetools, email.utils
Raymond Hettingera6172712004-12-31 19:15:26 +0000468 try:
469 from cStringIO import StringIO
470 except ImportError:
471 from StringIO import StringIO
Guido van Rossumf0713d32001-08-09 17:43:35 +0000472 host, file = splithost(url)
473 localname = url2pathname(file)
Guido van Rossuma2da3052002-04-15 00:25:01 +0000474 try:
475 stats = os.stat(localname)
476 except OSError, e:
477 raise IOError(e.errno, e.strerror, e.filename)
Walter Dörwald92b48b72002-03-22 17:30:38 +0000478 size = stats.st_size
Georg Brandl5a096e12007-01-22 19:40:21 +0000479 modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000480 mtype = mimetypes.guess_type(url)[0]
Raymond Hettingera6172712004-12-31 19:15:26 +0000481 headers = mimetools.Message(StringIO(
Guido van Rossumf0713d32001-08-09 17:43:35 +0000482 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
483 (mtype or 'text/plain', size, modified)))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000484 if not host:
Guido van Rossum336a2011999-06-24 15:27:36 +0000485 urlfile = file
486 if file[:1] == '/':
487 urlfile = 'file://' + file
Senthil Kumaran58c60622012-01-21 11:43:02 +0800488 elif file[:2] == './':
489 raise ValueError("local file url may start with / or file:. Unknown url of type: %s" % url)
Guido van Rossumf0713d32001-08-09 17:43:35 +0000490 return addinfourl(open(localname, 'rb'),
Guido van Rossum336a2011999-06-24 15:27:36 +0000491 headers, urlfile)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000492 host, port = splitport(host)
493 if not port \
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000494 and socket.gethostbyname(host) in (localhost(), thishost()):
Guido van Rossum336a2011999-06-24 15:27:36 +0000495 urlfile = file
496 if file[:1] == '/':
497 urlfile = 'file://' + file
Guido van Rossumf0713d32001-08-09 17:43:35 +0000498 return addinfourl(open(localname, 'rb'),
Guido van Rossum336a2011999-06-24 15:27:36 +0000499 headers, urlfile)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000500 raise IOError, ('local file error', 'not on local host')
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000501
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000502 def open_ftp(self, url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000503 """Use FTP protocol."""
Martin v. Löwis3e865952006-01-24 15:51:21 +0000504 if not isinstance(url, str):
505 raise IOError, ('ftp error', 'proxy support for ftp protocol currently not implemented')
Raymond Hettingera6172712004-12-31 19:15:26 +0000506 import mimetypes, mimetools
507 try:
508 from cStringIO import StringIO
509 except ImportError:
510 from StringIO import StringIO
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000511 host, path = splithost(url)
512 if not host: raise IOError, ('ftp error', 'no host given')
513 host, port = splitport(host)
514 user, host = splituser(host)
515 if user: user, passwd = splitpasswd(user)
516 else: passwd = None
517 host = unquote(host)
Senthil Kumaran9fce5512010-11-20 11:24:08 +0000518 user = user or ''
519 passwd = passwd or ''
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000520 host = socket.gethostbyname(host)
521 if not port:
522 import ftplib
523 port = ftplib.FTP_PORT
524 else:
525 port = int(port)
526 path, attrs = splitattr(path)
527 path = unquote(path)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000528 dirs = path.split('/')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000529 dirs, file = dirs[:-1], dirs[-1]
530 if dirs and not dirs[0]: dirs = dirs[1:]
Guido van Rossum5e006a31999-08-18 17:40:33 +0000531 if dirs and not dirs[0]: dirs[0] = '/'
Guido van Rossumb2493f82000-12-15 15:01:37 +0000532 key = user, host, port, '/'.join(dirs)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000533 # XXX thread unsafe!
534 if len(self.ftpcache) > MAXFTPCACHE:
535 # Prune the cache, rather arbitrarily
536 for k in self.ftpcache.keys():
537 if k != key:
538 v = self.ftpcache[k]
539 del self.ftpcache[k]
540 v.close()
541 try:
Raymond Hettinger54f02222002-06-01 14:18:47 +0000542 if not key in self.ftpcache:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000543 self.ftpcache[key] = \
544 ftpwrapper(user, passwd, host, port, dirs)
545 if not file: type = 'D'
546 else: type = 'I'
547 for attr in attrs:
548 attr, value = splitvalue(attr)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000549 if attr.lower() == 'type' and \
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000550 value in ('a', 'A', 'i', 'I', 'd', 'D'):
Guido van Rossumb2493f82000-12-15 15:01:37 +0000551 type = value.upper()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000552 (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
Guido van Rossum88e0b5b2001-08-23 13:38:15 +0000553 mtype = mimetypes.guess_type("ftp:" + url)[0]
554 headers = ""
555 if mtype:
556 headers += "Content-Type: %s\n" % mtype
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000557 if retrlen is not None and retrlen >= 0:
Guido van Rossum88e0b5b2001-08-23 13:38:15 +0000558 headers += "Content-Length: %d\n" % retrlen
Raymond Hettingera6172712004-12-31 19:15:26 +0000559 headers = mimetools.Message(StringIO(headers))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000560 return addinfourl(fp, headers, "ftp:" + url)
561 except ftperrors(), msg:
562 raise IOError, ('ftp error', msg), sys.exc_info()[2]
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000563
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000564 def open_data(self, url, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000565 """Use "data" URL."""
Martin v. Löwis3e865952006-01-24 15:51:21 +0000566 if not isinstance(url, str):
567 raise IOError, ('data error', 'proxy support for data protocol currently not implemented')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000568 # ignore POSTed data
569 #
570 # syntax of data URLs:
571 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
572 # mediatype := [ type "/" subtype ] *( ";" parameter )
573 # data := *urlchar
574 # parameter := attribute "=" value
Raymond Hettingera6172712004-12-31 19:15:26 +0000575 import mimetools
576 try:
577 from cStringIO import StringIO
578 except ImportError:
579 from StringIO import StringIO
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000580 try:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000581 [type, data] = url.split(',', 1)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000582 except ValueError:
583 raise IOError, ('data error', 'bad data URL')
584 if not type:
585 type = 'text/plain;charset=US-ASCII'
Guido van Rossumb2493f82000-12-15 15:01:37 +0000586 semi = type.rfind(';')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000587 if semi >= 0 and '=' not in type[semi:]:
588 encoding = type[semi+1:]
589 type = type[:semi]
590 else:
591 encoding = ''
592 msg = []
Senthil Kumaran1b7f9e52010-05-01 08:01:56 +0000593 msg.append('Date: %s'%time.strftime('%a, %d %b %Y %H:%M:%S GMT',
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000594 time.gmtime(time.time())))
595 msg.append('Content-type: %s' % type)
596 if encoding == 'base64':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000597 data = base64.decodestring(data)
598 else:
599 data = unquote(data)
Georg Brandl0619a322006-07-26 07:40:17 +0000600 msg.append('Content-Length: %d' % len(data))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000601 msg.append('')
602 msg.append(data)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000603 msg = '\n'.join(msg)
Raymond Hettingera6172712004-12-31 19:15:26 +0000604 f = StringIO(msg)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000605 headers = mimetools.Message(f, 0)
Georg Brandl1f663572005-11-26 16:50:44 +0000606 #f.fileno = None # needed for addinfourl
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000607 return addinfourl(f, headers, url)
Guido van Rossum6d4d1c21998-03-12 14:32:55 +0000608
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000609
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000610class FancyURLopener(URLopener):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000611 """Derived class with handlers for errors we can handle (perhaps)."""
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000612
Neal Norwitz60e04cd2002-06-11 13:38:51 +0000613 def __init__(self, *args, **kwargs):
Guido van Rossum68468eb2003-02-27 20:14:51 +0000614 URLopener.__init__(self, *args, **kwargs)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000615 self.auth_cache = {}
Skip Montanaroc3e11d62001-02-15 16:56:36 +0000616 self.tries = 0
617 self.maxtries = 10
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000618
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000619 def http_error_default(self, url, fp, errcode, errmsg, headers):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000620 """Default error handling -- don't raise an exception."""
Georg Brandl9b0d46d2008-01-20 11:43:03 +0000621 return addinfourl(fp, headers, "http:" + url, errcode)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000622
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000623 def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000624 """Error 302 -- relocated (temporarily)."""
Skip Montanaroc3e11d62001-02-15 16:56:36 +0000625 self.tries += 1
626 if self.maxtries and self.tries >= self.maxtries:
627 if hasattr(self, "http_error_500"):
628 meth = self.http_error_500
629 else:
630 meth = self.http_error_default
631 self.tries = 0
632 return meth(url, fp, 500,
633 "Internal Server Error: Redirect Recursion", headers)
634 result = self.redirect_internal(url, fp, errcode, errmsg, headers,
635 data)
636 self.tries = 0
637 return result
638
639 def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
Raymond Hettinger54f02222002-06-01 14:18:47 +0000640 if 'location' in headers:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000641 newurl = headers['location']
Raymond Hettinger54f02222002-06-01 14:18:47 +0000642 elif 'uri' in headers:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000643 newurl = headers['uri']
644 else:
645 return
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000646 fp.close()
Guido van Rossum3527f591999-03-29 20:23:41 +0000647 # In case the server sent a relative URL, join with original:
Moshe Zadka5d87d472001-04-09 14:54:21 +0000648 newurl = basejoin(self.type + ":" + url, newurl)
guido@google.com60a4a902011-03-24 08:07:45 -0700649
650 # For security reasons we do not allow redirects to protocols
guido@google.com2bc23b82011-03-24 10:44:17 -0700651 # other than HTTP, HTTPS or FTP.
guido@google.com60a4a902011-03-24 08:07:45 -0700652 newurl_lower = newurl.lower()
653 if not (newurl_lower.startswith('http://') or
guido@google.com2bc23b82011-03-24 10:44:17 -0700654 newurl_lower.startswith('https://') or
655 newurl_lower.startswith('ftp://')):
guido@google.comf1509302011-03-28 13:47:01 -0700656 raise IOError('redirect error', errcode,
657 errmsg + " - Redirection to url '%s' is not allowed" %
658 newurl,
659 headers)
guido@google.com60a4a902011-03-24 08:07:45 -0700660
Guido van Rossumfa19f7c2003-05-16 01:46:51 +0000661 return self.open(newurl)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000662
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000663 def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000664 """Error 301 -- also relocated (permanently)."""
665 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
Guido van Rossume6ad8911996-09-10 17:02:56 +0000666
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000667 def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
668 """Error 303 -- also relocated (essentially identical to 302)."""
669 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
670
Guido van Rossumfa19f7c2003-05-16 01:46:51 +0000671 def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
672 """Error 307 -- relocated, but turn POST into error."""
673 if data is None:
674 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
675 else:
676 return self.http_error_default(url, fp, errcode, errmsg, headers)
677
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000678 def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000679 """Error 401 -- authentication required.
Martin v. Löwis3e865952006-01-24 15:51:21 +0000680 This function supports Basic authentication only."""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000681 if not 'www-authenticate' in headers:
Tim Peters85ba6732001-02-28 08:26:44 +0000682 URLopener.http_error_default(self, url, fp,
Fred Drakec680ae82001-10-13 18:37:07 +0000683 errcode, errmsg, headers)
Moshe Zadkae99bd172001-02-27 06:27:04 +0000684 stuff = headers['www-authenticate']
685 import re
686 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
687 if not match:
Tim Peters85ba6732001-02-28 08:26:44 +0000688 URLopener.http_error_default(self, url, fp,
Moshe Zadkae99bd172001-02-27 06:27:04 +0000689 errcode, errmsg, headers)
690 scheme, realm = match.groups()
691 if scheme.lower() != 'basic':
Tim Peters85ba6732001-02-28 08:26:44 +0000692 URLopener.http_error_default(self, url, fp,
Moshe Zadkae99bd172001-02-27 06:27:04 +0000693 errcode, errmsg, headers)
694 name = 'retry_' + self.type + '_basic_auth'
695 if data is None:
696 return getattr(self,name)(url, realm)
697 else:
698 return getattr(self,name)(url, realm, data)
Tim Peters92037a12006-01-24 22:44:08 +0000699
Martin v. Löwis3e865952006-01-24 15:51:21 +0000700 def http_error_407(self, url, fp, errcode, errmsg, headers, data=None):
701 """Error 407 -- proxy authentication required.
702 This function supports Basic authentication only."""
703 if not 'proxy-authenticate' in headers:
704 URLopener.http_error_default(self, url, fp,
705 errcode, errmsg, headers)
706 stuff = headers['proxy-authenticate']
707 import re
708 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
709 if not match:
710 URLopener.http_error_default(self, url, fp,
711 errcode, errmsg, headers)
712 scheme, realm = match.groups()
713 if scheme.lower() != 'basic':
714 URLopener.http_error_default(self, url, fp,
715 errcode, errmsg, headers)
716 name = 'retry_proxy_' + self.type + '_basic_auth'
717 if data is None:
718 return getattr(self,name)(url, realm)
719 else:
720 return getattr(self,name)(url, realm, data)
Tim Peters92037a12006-01-24 22:44:08 +0000721
Martin v. Löwis3e865952006-01-24 15:51:21 +0000722 def retry_proxy_http_basic_auth(self, url, realm, data=None):
723 host, selector = splithost(url)
724 newurl = 'http://' + host + selector
725 proxy = self.proxies['http']
726 urltype, proxyhost = splittype(proxy)
727 proxyhost, proxyselector = splithost(proxyhost)
728 i = proxyhost.find('@') + 1
729 proxyhost = proxyhost[i:]
730 user, passwd = self.get_user_passwd(proxyhost, realm, i)
731 if not (user or passwd): return None
732 proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
733 self.proxies['http'] = 'http://' + proxyhost + proxyselector
734 if data is None:
735 return self.open(newurl)
736 else:
737 return self.open(newurl, data)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000738
Martin v. Löwis3e865952006-01-24 15:51:21 +0000739 def retry_proxy_https_basic_auth(self, url, realm, data=None):
740 host, selector = splithost(url)
741 newurl = 'https://' + host + selector
742 proxy = self.proxies['https']
743 urltype, proxyhost = splittype(proxy)
744 proxyhost, proxyselector = splithost(proxyhost)
745 i = proxyhost.find('@') + 1
746 proxyhost = proxyhost[i:]
747 user, passwd = self.get_user_passwd(proxyhost, realm, i)
748 if not (user or passwd): return None
749 proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
750 self.proxies['https'] = 'https://' + proxyhost + proxyselector
751 if data is None:
752 return self.open(newurl)
753 else:
754 return self.open(newurl, data)
Tim Peters92037a12006-01-24 22:44:08 +0000755
Guido van Rossum3c8baed2000-02-01 23:36:55 +0000756 def retry_http_basic_auth(self, url, realm, data=None):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000757 host, selector = splithost(url)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000758 i = host.find('@') + 1
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000759 host = host[i:]
760 user, passwd = self.get_user_passwd(host, realm, i)
761 if not (user or passwd): return None
Guido van Rossumafc4f042001-01-15 18:31:13 +0000762 host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000763 newurl = 'http://' + host + selector
Guido van Rossum3c8baed2000-02-01 23:36:55 +0000764 if data is None:
765 return self.open(newurl)
766 else:
767 return self.open(newurl, data)
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000768
Guido van Rossum3c8baed2000-02-01 23:36:55 +0000769 def retry_https_basic_auth(self, url, realm, data=None):
Tim Peterse1190062001-01-15 03:34:38 +0000770 host, selector = splithost(url)
771 i = host.find('@') + 1
772 host = host[i:]
773 user, passwd = self.get_user_passwd(host, realm, i)
774 if not (user or passwd): return None
Guido van Rossumafc4f042001-01-15 18:31:13 +0000775 host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
Martin v. Löwis3e865952006-01-24 15:51:21 +0000776 newurl = 'https://' + host + selector
777 if data is None:
778 return self.open(newurl)
779 else:
780 return self.open(newurl, data)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000781
Florent Xiclunae127e242010-05-17 10:39:07 +0000782 def get_user_passwd(self, host, realm, clear_cache=0):
Guido van Rossumb2493f82000-12-15 15:01:37 +0000783 key = realm + '@' + host.lower()
Raymond Hettinger54f02222002-06-01 14:18:47 +0000784 if key in self.auth_cache:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000785 if clear_cache:
786 del self.auth_cache[key]
787 else:
788 return self.auth_cache[key]
789 user, passwd = self.prompt_user_passwd(host, realm)
790 if user or passwd: self.auth_cache[key] = (user, passwd)
791 return user, passwd
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000792
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000793 def prompt_user_passwd(self, host, realm):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000794 """Override this in a GUI environment!"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000795 import getpass
796 try:
797 user = raw_input("Enter username for %s at %s: " % (realm,
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000798 host))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000799 passwd = getpass.getpass("Enter password for %s in %s at %s: " %
800 (user, realm, host))
801 return user, passwd
802 except KeyboardInterrupt:
803 print
804 return None, None
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000805
806
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000807# Utility functions
808
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000809_localhost = None
810def localhost():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000811 """Return the IP address of the magic hostname 'localhost'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000812 global _localhost
Raymond Hettinger10ff7062002-06-02 03:04:52 +0000813 if _localhost is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000814 _localhost = socket.gethostbyname('localhost')
815 return _localhost
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000816
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000817_thishost = None
818def thishost():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000819 """Return the IP address of the current host."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000820 global _thishost
Raymond Hettinger10ff7062002-06-02 03:04:52 +0000821 if _thishost is None:
Senthil Kumaran7351b662013-06-01 11:11:30 -0700822 try:
823 _thishost = socket.gethostbyname(socket.gethostname())
824 except socket.gaierror:
825 _thishost = socket.gethostbyname('localhost')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000826 return _thishost
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000827
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000828_ftperrors = None
829def ftperrors():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000830 """Return the set of errors raised by the FTP class."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000831 global _ftperrors
Raymond Hettinger10ff7062002-06-02 03:04:52 +0000832 if _ftperrors is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000833 import ftplib
834 _ftperrors = ftplib.all_errors
835 return _ftperrors
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000836
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000837_noheaders = None
838def noheaders():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000839 """Return an empty mimetools.Message object."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000840 global _noheaders
Raymond Hettinger10ff7062002-06-02 03:04:52 +0000841 if _noheaders is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000842 import mimetools
Raymond Hettingera6172712004-12-31 19:15:26 +0000843 try:
844 from cStringIO import StringIO
845 except ImportError:
846 from StringIO import StringIO
847 _noheaders = mimetools.Message(StringIO(), 0)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000848 _noheaders.fp.close() # Recycle file descriptor
849 return _noheaders
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000850
851
852# Utility classes
853
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000854class ftpwrapper:
Guido van Rossume7b146f2000-02-04 15:28:42 +0000855 """Class used by open_ftp() for cache of open FTP connections."""
856
Facundo Batista4f1b1ed2008-05-29 16:39:26 +0000857 def __init__(self, user, passwd, host, port, dirs,
Nadeem Vawdab42c53e2011-07-23 15:51:16 +0200858 timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
Nadeem Vawdaa620fac2011-07-23 17:04:42 +0200859 persistent=True):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000860 self.user = user
861 self.passwd = passwd
862 self.host = host
863 self.port = port
864 self.dirs = dirs
Facundo Batista711a54e2007-05-24 17:50:54 +0000865 self.timeout = timeout
Nadeem Vawdab42c53e2011-07-23 15:51:16 +0200866 self.refcount = 0
867 self.keepalive = persistent
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000868 self.init()
Guido van Rossume7b146f2000-02-04 15:28:42 +0000869
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000870 def init(self):
871 import ftplib
872 self.busy = 0
873 self.ftp = ftplib.FTP()
Facundo Batista711a54e2007-05-24 17:50:54 +0000874 self.ftp.connect(self.host, self.port, self.timeout)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000875 self.ftp.login(self.user, self.passwd)
Senthil Kumaran964c25f2013-06-02 11:59:09 -0700876 _target = '/'.join(self.dirs)
877 self.ftp.cwd(_target)
Guido van Rossume7b146f2000-02-04 15:28:42 +0000878
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000879 def retrfile(self, file, type):
880 import ftplib
881 self.endtransfer()
882 if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
883 else: cmd = 'TYPE ' + type; isdir = 0
884 try:
885 self.ftp.voidcmd(cmd)
886 except ftplib.all_errors:
887 self.init()
888 self.ftp.voidcmd(cmd)
889 conn = None
890 if file and not isdir:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000891 # Try to retrieve as a file
892 try:
893 cmd = 'RETR ' + file
Nadeem Vawdab42c53e2011-07-23 15:51:16 +0200894 conn, retrlen = self.ftp.ntransfercmd(cmd)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000895 except ftplib.error_perm, reason:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000896 if str(reason)[:3] != '550':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000897 raise IOError, ('ftp error', reason), sys.exc_info()[2]
898 if not conn:
899 # Set transfer mode to ASCII!
900 self.ftp.voidcmd('TYPE A')
Georg Brandld5e6cf22008-01-20 12:18:17 +0000901 # Try a directory listing. Verify that directory exists.
902 if file:
903 pwd = self.ftp.pwd()
904 try:
905 try:
906 self.ftp.cwd(file)
907 except ftplib.error_perm, reason:
908 raise IOError, ('ftp error', reason), sys.exc_info()[2]
909 finally:
910 self.ftp.cwd(pwd)
911 cmd = 'LIST ' + file
912 else:
913 cmd = 'LIST'
Nadeem Vawdab42c53e2011-07-23 15:51:16 +0200914 conn, retrlen = self.ftp.ntransfercmd(cmd)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000915 self.busy = 1
Nadeem Vawdab42c53e2011-07-23 15:51:16 +0200916 ftpobj = addclosehook(conn.makefile('rb'), self.file_close)
917 self.refcount += 1
918 conn.close()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000919 # Pass back both a suitably decorated object and a retrieval length
Nadeem Vawdab42c53e2011-07-23 15:51:16 +0200920 return (ftpobj, retrlen)
921
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000922 def endtransfer(self):
923 if not self.busy:
924 return
925 self.busy = 0
926 try:
927 self.ftp.voidresp()
928 except ftperrors():
929 pass
Guido van Rossume7b146f2000-02-04 15:28:42 +0000930
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000931 def close(self):
Nadeem Vawdab42c53e2011-07-23 15:51:16 +0200932 self.keepalive = False
933 if self.refcount <= 0:
934 self.real_close()
935
936 def file_close(self):
937 self.endtransfer()
938 self.refcount -= 1
939 if self.refcount <= 0 and not self.keepalive:
940 self.real_close()
941
942 def real_close(self):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000943 self.endtransfer()
944 try:
945 self.ftp.close()
946 except ftperrors():
947 pass
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000948
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000949class addbase:
Guido van Rossume7b146f2000-02-04 15:28:42 +0000950 """Base class for addinfo and addclosehook."""
951
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000952 def __init__(self, fp):
953 self.fp = fp
954 self.read = self.fp.read
955 self.readline = self.fp.readline
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000956 if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines
Georg Brandl1f663572005-11-26 16:50:44 +0000957 if hasattr(self.fp, "fileno"):
958 self.fileno = self.fp.fileno
959 else:
960 self.fileno = lambda: None
Raymond Hettinger42182eb2003-03-09 05:33:33 +0000961 if hasattr(self.fp, "__iter__"):
962 self.__iter__ = self.fp.__iter__
963 if hasattr(self.fp, "next"):
964 self.next = self.fp.next
Guido van Rossume7b146f2000-02-04 15:28:42 +0000965
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000966 def __repr__(self):
Walter Dörwald70a6b492004-02-12 17:35:32 +0000967 return '<%s at %r whose fp = %r>' % (self.__class__.__name__,
968 id(self), self.fp)
Guido van Rossume7b146f2000-02-04 15:28:42 +0000969
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000970 def close(self):
971 self.read = None
972 self.readline = None
973 self.readlines = None
974 self.fileno = None
975 if self.fp: self.fp.close()
976 self.fp = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000977
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000978class addclosehook(addbase):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000979 """Class to add a close hook to an open file."""
980
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000981 def __init__(self, fp, closehook, *hookargs):
982 addbase.__init__(self, fp)
983 self.closehook = closehook
984 self.hookargs = hookargs
Guido van Rossume7b146f2000-02-04 15:28:42 +0000985
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000986 def close(self):
987 if self.closehook:
Guido van Rossum68468eb2003-02-27 20:14:51 +0000988 self.closehook(*self.hookargs)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000989 self.closehook = None
990 self.hookargs = None
Senthil Kumaran4c592112012-03-15 13:24:40 -0700991 addbase.close(self)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000992
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000993class addinfo(addbase):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000994 """class to add an info() method to an open file."""
995
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000996 def __init__(self, fp, headers):
997 addbase.__init__(self, fp)
998 self.headers = headers
Guido van Rossume7b146f2000-02-04 15:28:42 +0000999
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001000 def info(self):
1001 return self.headers
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001002
Guido van Rossume6ad8911996-09-10 17:02:56 +00001003class addinfourl(addbase):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001004 """class to add info() and geturl() methods to an open file."""
1005
Georg Brandl9b0d46d2008-01-20 11:43:03 +00001006 def __init__(self, fp, headers, url, code=None):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001007 addbase.__init__(self, fp)
1008 self.headers = headers
1009 self.url = url
Georg Brandl9b0d46d2008-01-20 11:43:03 +00001010 self.code = code
Guido van Rossume7b146f2000-02-04 15:28:42 +00001011
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001012 def info(self):
1013 return self.headers
Guido van Rossume7b146f2000-02-04 15:28:42 +00001014
Georg Brandl9b0d46d2008-01-20 11:43:03 +00001015 def getcode(self):
1016 return self.code
1017
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001018 def geturl(self):
1019 return self.url
Guido van Rossume6ad8911996-09-10 17:02:56 +00001020
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001021
Guido van Rossum7c395db1994-07-04 22:14:49 +00001022# Utilities to parse URLs (most of these return None for missing parts):
Sjoerd Mullendere0371b81995-11-10 10:36:07 +00001023# unwrap('<URL:type://host/path>') --> 'type://host/path'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001024# splittype('type:opaquestring') --> 'type', 'opaquestring'
1025# splithost('//host[:port]/path') --> 'host[:port]', '/path'
Guido van Rossum7c395db1994-07-04 22:14:49 +00001026# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'
1027# splitpasswd('user:passwd') -> 'user', 'passwd'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001028# splitport('host:port') --> 'host', 'port'
1029# splitquery('/path?query') --> '/path', 'query'
1030# splittag('/path#tag') --> '/path', 'tag'
Guido van Rossum7c395db1994-07-04 22:14:49 +00001031# splitattr('/path;attr1=value1;attr2=value2;...') ->
1032# '/path', ['attr1=value1', 'attr2=value2', ...]
1033# splitvalue('attr=value') --> 'attr', 'value'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001034# unquote('abc%20def') -> 'abc def'
1035# quote('abc def') -> 'abc%20def')
1036
Walter Dörwald65230a22002-06-03 15:58:32 +00001037try:
1038 unicode
1039except NameError:
Guido van Rossum4b46c0a2002-05-24 17:58:05 +00001040 def _is_unicode(x):
1041 return 0
Walter Dörwald65230a22002-06-03 15:58:32 +00001042else:
1043 def _is_unicode(x):
1044 return isinstance(x, unicode)
Guido van Rossum4b46c0a2002-05-24 17:58:05 +00001045
Martin v. Löwis1d994332000-12-03 18:30:10 +00001046def toBytes(url):
1047 """toBytes(u"URL") --> 'URL'."""
1048 # Most URL schemes require ASCII. If that changes, the conversion
1049 # can be relaxed
Guido van Rossum4b46c0a2002-05-24 17:58:05 +00001050 if _is_unicode(url):
Martin v. Löwis1d994332000-12-03 18:30:10 +00001051 try:
1052 url = url.encode("ASCII")
1053 except UnicodeError:
Guido van Rossumb2493f82000-12-15 15:01:37 +00001054 raise UnicodeError("URL " + repr(url) +
1055 " contains non-ASCII characters")
Martin v. Löwis1d994332000-12-03 18:30:10 +00001056 return url
1057
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001058def unwrap(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001059 """unwrap('<URL:type://host/path>') --> 'type://host/path'."""
Guido van Rossumb2493f82000-12-15 15:01:37 +00001060 url = url.strip()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001061 if url[:1] == '<' and url[-1:] == '>':
Guido van Rossumb2493f82000-12-15 15:01:37 +00001062 url = url[1:-1].strip()
1063 if url[:4] == 'URL:': url = url[4:].strip()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001064 return url
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001065
Guido van Rossum332e1441997-09-29 23:23:46 +00001066_typeprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001067def splittype(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001068 """splittype('type:opaquestring') --> 'type', 'opaquestring'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001069 global _typeprog
1070 if _typeprog is None:
1071 import re
1072 _typeprog = re.compile('^([^/:]+):')
Guido van Rossum332e1441997-09-29 23:23:46 +00001073
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001074 match = _typeprog.match(url)
1075 if match:
1076 scheme = match.group(1)
Fred Drake9e94afd2000-07-01 07:03:30 +00001077 return scheme.lower(), url[len(scheme) + 1:]
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001078 return None, url
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001079
Guido van Rossum332e1441997-09-29 23:23:46 +00001080_hostprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001081def splithost(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001082 """splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001083 global _hostprog
1084 if _hostprog is None:
1085 import re
Georg Brandl1c168d82006-03-26 20:59:38 +00001086 _hostprog = re.compile('^//([^/?]*)(.*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +00001087
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001088 match = _hostprog.match(url)
Senthil Kumaran0b7cac12010-11-22 05:04:33 +00001089 if match:
1090 host_port = match.group(1)
1091 path = match.group(2)
1092 if path and not path.startswith('/'):
1093 path = '/' + path
1094 return host_port, path
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001095 return None, url
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001096
Guido van Rossum332e1441997-09-29 23:23:46 +00001097_userprog = None
Guido van Rossum7c395db1994-07-04 22:14:49 +00001098def splituser(host):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001099 """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001100 global _userprog
1101 if _userprog is None:
1102 import re
Raymond Hettingerf2e45dd2002-08-18 20:08:56 +00001103 _userprog = re.compile('^(.*)@(.*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +00001104
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001105 match = _userprog.match(host)
Senthil Kumaran9fce5512010-11-20 11:24:08 +00001106 if match: return match.group(1, 2)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001107 return None, host
Guido van Rossum7c395db1994-07-04 22:14:49 +00001108
Guido van Rossum332e1441997-09-29 23:23:46 +00001109_passwdprog = None
Guido van Rossum7c395db1994-07-04 22:14:49 +00001110def splitpasswd(user):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001111 """splitpasswd('user:passwd') -> 'user', 'passwd'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001112 global _passwdprog
1113 if _passwdprog is None:
1114 import re
Senthil Kumaran5e95e762009-03-30 21:51:50 +00001115 _passwdprog = re.compile('^([^:]*):(.*)$',re.S)
Guido van Rossum332e1441997-09-29 23:23:46 +00001116
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001117 match = _passwdprog.match(user)
1118 if match: return match.group(1, 2)
1119 return user, None
Guido van Rossum7c395db1994-07-04 22:14:49 +00001120
Guido van Rossume7b146f2000-02-04 15:28:42 +00001121# splittag('/path#tag') --> '/path', 'tag'
Guido van Rossum332e1441997-09-29 23:23:46 +00001122_portprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001123def splitport(host):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001124 """splitport('host:port') --> 'host', 'port'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001125 global _portprog
1126 if _portprog is None:
1127 import re
Serhiy Storchaka326b5ab2014-01-18 18:30:09 +02001128 _portprog = re.compile('^(.*):([0-9]*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +00001129
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001130 match = _portprog.match(host)
Serhiy Storchaka326b5ab2014-01-18 18:30:09 +02001131 if match:
1132 host, port = match.groups()
1133 if port:
1134 return host, port
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001135 return host, None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001136
Guido van Rossum332e1441997-09-29 23:23:46 +00001137_nportprog = None
Guido van Rossum53725a21996-06-13 19:12:35 +00001138def splitnport(host, defport=-1):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001139 """Split host and port, returning numeric port.
1140 Return given default port if no ':' found; defaults to -1.
1141 Return numerical port if a valid number are found after ':'.
1142 Return None if ':' but not a valid number."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001143 global _nportprog
1144 if _nportprog is None:
1145 import re
1146 _nportprog = re.compile('^(.*):(.*)$')
Guido van Rossum7e7ca0b1998-03-26 21:01:39 +00001147
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001148 match = _nportprog.match(host)
1149 if match:
1150 host, port = match.group(1, 2)
Serhiy Storchaka326b5ab2014-01-18 18:30:09 +02001151 if port:
1152 try:
1153 nport = int(port)
1154 except ValueError:
1155 nport = None
1156 return host, nport
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001157 return host, defport
Guido van Rossum53725a21996-06-13 19:12:35 +00001158
Guido van Rossum332e1441997-09-29 23:23:46 +00001159_queryprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001160def splitquery(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001161 """splitquery('/path?query') --> '/path', 'query'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001162 global _queryprog
1163 if _queryprog is None:
1164 import re
1165 _queryprog = re.compile('^(.*)\?([^?]*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +00001166
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001167 match = _queryprog.match(url)
1168 if match: return match.group(1, 2)
1169 return url, None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001170
Guido van Rossum332e1441997-09-29 23:23:46 +00001171_tagprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001172def splittag(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001173 """splittag('/path#tag') --> '/path', 'tag'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001174 global _tagprog
1175 if _tagprog is None:
1176 import re
1177 _tagprog = re.compile('^(.*)#([^#]*)$')
Guido van Rossum7e7ca0b1998-03-26 21:01:39 +00001178
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001179 match = _tagprog.match(url)
1180 if match: return match.group(1, 2)
1181 return url, None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001182
Guido van Rossum7c395db1994-07-04 22:14:49 +00001183def splitattr(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001184 """splitattr('/path;attr1=value1;attr2=value2;...') ->
1185 '/path', ['attr1=value1', 'attr2=value2', ...]."""
Guido van Rossumb2493f82000-12-15 15:01:37 +00001186 words = url.split(';')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001187 return words[0], words[1:]
Guido van Rossum7c395db1994-07-04 22:14:49 +00001188
Guido van Rossum332e1441997-09-29 23:23:46 +00001189_valueprog = None
Guido van Rossum7c395db1994-07-04 22:14:49 +00001190def splitvalue(attr):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001191 """splitvalue('attr=value') --> 'attr', 'value'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001192 global _valueprog
1193 if _valueprog is None:
1194 import re
1195 _valueprog = re.compile('^([^=]*)=(.*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +00001196
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001197 match = _valueprog.match(attr)
1198 if match: return match.group(1, 2)
1199 return attr, None
Guido van Rossum7c395db1994-07-04 22:14:49 +00001200
R. David Murraybfbdefe2010-05-25 15:20:46 +00001201# urlparse contains a duplicate of this method to avoid a circular import. If
1202# you update this method, also update the copy in urlparse. This code
1203# duplication does not exist in Python3.
1204
Senthil Kumaranf3e9b2a2010-03-18 12:14:15 +00001205_hexdig = '0123456789ABCDEFabcdef'
Florent Xiclunae127e242010-05-17 10:39:07 +00001206_hextochr = dict((a + b, chr(int(a + b, 16)))
1207 for a in _hexdig for b in _hexdig)
Serhiy Storchaka923baea2013-03-14 21:31:09 +02001208_asciire = re.compile('([\x00-\x7f]+)')
Raymond Hettinger803ce802005-09-10 06:49:04 +00001209
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001210def unquote(s):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001211 """unquote('abc%20def') -> 'abc def'."""
Serhiy Storchaka923baea2013-03-14 21:31:09 +02001212 if _is_unicode(s):
1213 if '%' not in s:
1214 return s
1215 bits = _asciire.split(s)
1216 res = [bits[0]]
1217 append = res.append
1218 for i in range(1, len(bits), 2):
1219 append(unquote(str(bits[i])).decode('latin1'))
1220 append(bits[i + 1])
1221 return ''.join(res)
1222
1223 bits = s.split('%')
Florent Xiclunaaf87f9f2010-05-17 13:35:09 +00001224 # fastpath
Serhiy Storchaka923baea2013-03-14 21:31:09 +02001225 if len(bits) == 1:
Florent Xiclunaaf87f9f2010-05-17 13:35:09 +00001226 return s
Serhiy Storchaka923baea2013-03-14 21:31:09 +02001227 res = [bits[0]]
1228 append = res.append
1229 for item in bits[1:]:
Raymond Hettinger803ce802005-09-10 06:49:04 +00001230 try:
Serhiy Storchaka923baea2013-03-14 21:31:09 +02001231 append(_hextochr[item[:2]])
1232 append(item[2:])
Raymond Hettinger803ce802005-09-10 06:49:04 +00001233 except KeyError:
Serhiy Storchaka923baea2013-03-14 21:31:09 +02001234 append('%')
1235 append(item)
1236 return ''.join(res)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001237
Guido van Rossum0564e121996-12-13 14:47:36 +00001238def unquote_plus(s):
Skip Montanaro79f1c172000-08-22 03:00:52 +00001239 """unquote('%7e/abc+def') -> '~/abc def'"""
Brett Cannonaaeffaf2004-03-23 23:50:17 +00001240 s = s.replace('+', ' ')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001241 return unquote(s)
Guido van Rossum0564e121996-12-13 14:47:36 +00001242
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001243always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
Jeremy Hylton6102e292000-08-31 15:48:10 +00001244 'abcdefghijklmnopqrstuvwxyz'
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001245 '0123456789' '_.-')
Florent Xiclunaaf87f9f2010-05-17 13:35:09 +00001246_safe_map = {}
1247for i, c in zip(xrange(256), str(bytearray(xrange(256)))):
1248 _safe_map[c] = c if (i < 128 and c in always_safe) else '%{:02X}'.format(i)
1249_safe_quoters = {}
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001250
Senthil Kumaran880685f2010-07-22 01:47:30 +00001251def quote(s, safe='/'):
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001252 """quote('abc def') -> 'abc%20def'
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001253
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001254 Each part of a URL, e.g. the path info, the query, etc., has a
1255 different set of reserved characters that must be quoted.
1256
1257 RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
1258 the following reserved characters.
1259
1260 reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
1261 "$" | ","
1262
1263 Each of these characters is reserved in some component of a URL,
1264 but not necessarily in all of them.
1265
1266 By default, the quote function is intended for quoting the path
1267 section of a URL. Thus, it will not encode '/'. This character
1268 is reserved, but in typical usage the quote function is being
1269 called on a path where the existing slash characters are used as
1270 reserved characters.
1271 """
Florent Xiclunaaf87f9f2010-05-17 13:35:09 +00001272 # fastpath
1273 if not s:
Senthil Kumaranc7743aa2010-07-19 17:35:50 +00001274 if s is None:
1275 raise TypeError('None object cannot be quoted')
Florent Xiclunaaf87f9f2010-05-17 13:35:09 +00001276 return s
Raymond Hettinger199d2f72005-09-09 22:27:13 +00001277 cachekey = (safe, always_safe)
1278 try:
Florent Xiclunaaf87f9f2010-05-17 13:35:09 +00001279 (quoter, safe) = _safe_quoters[cachekey]
Raymond Hettinger199d2f72005-09-09 22:27:13 +00001280 except KeyError:
Florent Xiclunaaf87f9f2010-05-17 13:35:09 +00001281 safe_map = _safe_map.copy()
1282 safe_map.update([(c, c) for c in safe])
1283 quoter = safe_map.__getitem__
1284 safe = always_safe + safe
1285 _safe_quoters[cachekey] = (quoter, safe)
1286 if not s.rstrip(safe):
1287 return s
1288 return ''.join(map(quoter, s))
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001289
Senthil Kumaran880685f2010-07-22 01:47:30 +00001290def quote_plus(s, safe=''):
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001291 """Quote the query fragment of a URL; replacing ' ' with '+'"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001292 if ' ' in s:
Senthil Kumaran880685f2010-07-22 01:47:30 +00001293 s = quote(s, safe + ' ')
Raymond Hettingercf6b6322005-09-10 18:17:54 +00001294 return s.replace(' ', '+')
Senthil Kumaran880685f2010-07-22 01:47:30 +00001295 return quote(s, safe)
Guido van Rossum0564e121996-12-13 14:47:36 +00001296
Florent Xiclunae127e242010-05-17 10:39:07 +00001297def urlencode(query, doseq=0):
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001298 """Encode a sequence of two-element tuples or dictionary into a URL query string.
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001299
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001300 If any values in the query arg are sequences and doseq is true, each
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001301 sequence element is converted to a separate parameter.
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001302
1303 If the query arg is a sequence of two-element tuples, the order of the
1304 parameters in the output will match the order of parameters in the
1305 input.
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001306 """
Tim Peters658cba62001-02-09 20:06:00 +00001307
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001308 if hasattr(query,"items"):
1309 # mapping objects
1310 query = query.items()
1311 else:
1312 # it's a bother at times that strings and string-like objects are
1313 # sequences...
1314 try:
1315 # non-sequence items should not work with len()
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001316 # non-empty strings will fail this
Walter Dörwald65230a22002-06-03 15:58:32 +00001317 if len(query) and not isinstance(query[0], tuple):
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001318 raise TypeError
1319 # zero-length sequences of all types will get here and succeed,
1320 # but that's a minor nit - since the original implementation
1321 # allowed empty dicts that type of behavior probably should be
1322 # preserved for consistency
1323 except TypeError:
1324 ty,va,tb = sys.exc_info()
1325 raise TypeError, "not a valid non-string sequence or mapping object", tb
1326
Guido van Rossume7b146f2000-02-04 15:28:42 +00001327 l = []
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001328 if not doseq:
1329 # preserve old behavior
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001330 for k, v in query:
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001331 k = quote_plus(str(k))
1332 v = quote_plus(str(v))
1333 l.append(k + '=' + v)
1334 else:
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001335 for k, v in query:
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001336 k = quote_plus(str(k))
Walter Dörwald65230a22002-06-03 15:58:32 +00001337 if isinstance(v, str):
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001338 v = quote_plus(v)
1339 l.append(k + '=' + v)
Guido van Rossum4b46c0a2002-05-24 17:58:05 +00001340 elif _is_unicode(v):
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001341 # is there a reasonable way to convert to ASCII?
1342 # encode generates a string, but "replace" or "ignore"
1343 # lose information and "strict" can raise UnicodeError
1344 v = quote_plus(v.encode("ASCII","replace"))
1345 l.append(k + '=' + v)
1346 else:
1347 try:
1348 # is this a sufficient test for sequence-ness?
Georg Brandl84fedf72010-02-06 22:59:15 +00001349 len(v)
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001350 except TypeError:
1351 # not a sequence
1352 v = quote_plus(str(v))
1353 l.append(k + '=' + v)
1354 else:
1355 # loop over the sequence
1356 for elt in v:
1357 l.append(k + '=' + quote_plus(str(elt)))
Guido van Rossumb2493f82000-12-15 15:01:37 +00001358 return '&'.join(l)
Guido van Rossum810a3391998-07-22 21:33:23 +00001359
Guido van Rossum442e7201996-03-20 15:33:11 +00001360# Proxy handling
Mark Hammond4f570b92000-07-26 07:04:38 +00001361def getproxies_environment():
1362 """Return a dictionary of scheme -> proxy server URL mappings.
1363
1364 Scan the environment for variables named <scheme>_proxy;
1365 this seems to be the standard convention. If you need a
1366 different way, you can pass a proxies dictionary to the
1367 [Fancy]URLopener constructor.
1368
1369 """
1370 proxies = {}
1371 for name, value in os.environ.items():
Guido van Rossumb2493f82000-12-15 15:01:37 +00001372 name = name.lower()
Mark Hammond4f570b92000-07-26 07:04:38 +00001373 if value and name[-6:] == '_proxy':
1374 proxies[name[:-6]] = value
1375 return proxies
1376
Georg Brandl22350112008-01-20 12:05:43 +00001377def proxy_bypass_environment(host):
1378 """Test if proxies should not be used for a particular host.
1379
1380 Checks the environment for a variable named no_proxy, which should
1381 be a list of DNS suffixes separated by commas, or '*' for all hosts.
1382 """
1383 no_proxy = os.environ.get('no_proxy', '') or os.environ.get('NO_PROXY', '')
1384 # '*' is special case for always bypass
1385 if no_proxy == '*':
1386 return 1
1387 # strip port off host
1388 hostonly, port = splitport(host)
1389 # check if the host ends with any of the DNS suffixes
Senthil Kumaranb5bd4c82011-08-06 12:24:33 +08001390 no_proxy_list = [proxy.strip() for proxy in no_proxy.split(',')]
1391 for name in no_proxy_list:
Georg Brandl22350112008-01-20 12:05:43 +00001392 if name and (hostonly.endswith(name) or host.endswith(name)):
1393 return 1
1394 # otherwise, don't bypass
1395 return 0
1396
1397
Jack Jansen11d9b062004-07-16 11:45:00 +00001398if sys.platform == 'darwin':
Ronald Oussoren51f06332009-09-20 10:31:22 +00001399 from _scproxy import _get_proxy_settings, _get_proxies
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001400
1401 def proxy_bypass_macosx_sysconf(host):
1402 """
1403 Return True iff this host shouldn't be accessed using a proxy
1404
1405 This function uses the MacOSX framework SystemConfiguration
1406 to fetch the proxy information.
1407 """
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001408 import re
1409 import socket
1410 from fnmatch import fnmatch
1411
Ronald Oussoren31802d02009-10-18 07:07:00 +00001412 hostonly, port = splitport(host)
1413
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001414 def ip2num(ipAddr):
1415 parts = ipAddr.split('.')
1416 parts = map(int, parts)
1417 if len(parts) != 4:
1418 parts = (parts + [0, 0, 0, 0])[:4]
1419 return (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3]
1420
Ronald Oussoren51f06332009-09-20 10:31:22 +00001421 proxy_settings = _get_proxy_settings()
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001422
Ronald Oussoren51f06332009-09-20 10:31:22 +00001423 # Check for simple host names:
1424 if '.' not in host:
1425 if proxy_settings['exclude_simple']:
1426 return True
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001427
Ronald Oussoren31802d02009-10-18 07:07:00 +00001428 hostIP = None
1429
Ronald Oussoren809073b2009-09-20 10:54:07 +00001430 for value in proxy_settings.get('exceptions', ()):
Ronald Oussoren51f06332009-09-20 10:31:22 +00001431 # Items in the list are strings like these: *.local, 169.254/16
Ronald Oussoren51f06332009-09-20 10:31:22 +00001432 if not value: continue
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001433
Ronald Oussoren51f06332009-09-20 10:31:22 +00001434 m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value)
1435 if m is not None:
1436 if hostIP is None:
Ronald Oussoren31802d02009-10-18 07:07:00 +00001437 try:
1438 hostIP = socket.gethostbyname(hostonly)
1439 hostIP = ip2num(hostIP)
1440 except socket.error:
1441 continue
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001442
Ronald Oussoren51f06332009-09-20 10:31:22 +00001443 base = ip2num(m.group(1))
Ronald Oussorenb96fbb82010-06-27 13:59:39 +00001444 mask = m.group(2)
1445 if mask is None:
1446 mask = 8 * (m.group(1).count('.') + 1)
1447
1448 else:
1449 mask = int(mask[1:])
Ronald Oussoren1aa999c2011-03-14 18:53:59 -04001450 mask = 32 - mask
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001451
Ronald Oussoren51f06332009-09-20 10:31:22 +00001452 if (hostIP >> mask) == (base >> mask):
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001453 return True
1454
Ronald Oussoren51f06332009-09-20 10:31:22 +00001455 elif fnmatch(host, value):
1456 return True
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001457
Ronald Oussoren51f06332009-09-20 10:31:22 +00001458 return False
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001459
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001460 def getproxies_macosx_sysconf():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001461 """Return a dictionary of scheme -> proxy server URL mappings.
Guido van Rossum442e7201996-03-20 15:33:11 +00001462
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001463 This function uses the MacOSX framework SystemConfiguration
1464 to fetch the proxy information.
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001465 """
Ronald Oussoren51f06332009-09-20 10:31:22 +00001466 return _get_proxies()
Mark Hammond4f570b92000-07-26 07:04:38 +00001467
Georg Brandl22350112008-01-20 12:05:43 +00001468 def proxy_bypass(host):
1469 if getproxies_environment():
1470 return proxy_bypass_environment(host)
1471 else:
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001472 return proxy_bypass_macosx_sysconf(host)
Tim Peters55c12d42001-08-09 18:04:14 +00001473
Jack Jansen11d9b062004-07-16 11:45:00 +00001474 def getproxies():
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001475 return getproxies_environment() or getproxies_macosx_sysconf()
Tim Peters182b5ac2004-07-18 06:16:08 +00001476
Mark Hammond4f570b92000-07-26 07:04:38 +00001477elif os.name == 'nt':
1478 def getproxies_registry():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001479 """Return a dictionary of scheme -> proxy server URL mappings.
Mark Hammond4f570b92000-07-26 07:04:38 +00001480
1481 Win32 uses the registry to store proxies.
1482
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001483 """
1484 proxies = {}
Mark Hammond4f570b92000-07-26 07:04:38 +00001485 try:
1486 import _winreg
1487 except ImportError:
1488 # Std module, so should be around - but you never know!
1489 return proxies
1490 try:
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001491 internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1492 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
Mark Hammond4f570b92000-07-26 07:04:38 +00001493 proxyEnable = _winreg.QueryValueEx(internetSettings,
1494 'ProxyEnable')[0]
1495 if proxyEnable:
1496 # Returned as Unicode but problems if not converted to ASCII
1497 proxyServer = str(_winreg.QueryValueEx(internetSettings,
1498 'ProxyServer')[0])
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001499 if '=' in proxyServer:
1500 # Per-protocol settings
Mark Hammond4f570b92000-07-26 07:04:38 +00001501 for p in proxyServer.split(';'):
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001502 protocol, address = p.split('=', 1)
Guido van Rossumb955d6c2002-03-31 23:38:48 +00001503 # See if address has a type:// prefix
Guido van Rossum64e5aa92002-04-02 14:38:16 +00001504 import re
1505 if not re.match('^([^/:]+)://', address):
Guido van Rossumb955d6c2002-03-31 23:38:48 +00001506 address = '%s://%s' % (protocol, address)
1507 proxies[protocol] = address
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001508 else:
1509 # Use one setting for all protocols
1510 if proxyServer[:5] == 'http:':
1511 proxies['http'] = proxyServer
1512 else:
1513 proxies['http'] = 'http://%s' % proxyServer
Senthil Kumaran0fdd3852010-07-14 20:22:17 +00001514 proxies['https'] = 'https://%s' % proxyServer
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001515 proxies['ftp'] = 'ftp://%s' % proxyServer
Mark Hammond4f570b92000-07-26 07:04:38 +00001516 internetSettings.Close()
1517 except (WindowsError, ValueError, TypeError):
1518 # Either registry key not found etc, or the value in an
1519 # unexpected format.
1520 # proxies already set up to be empty so nothing to do
1521 pass
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001522 return proxies
Guido van Rossum442e7201996-03-20 15:33:11 +00001523
Mark Hammond4f570b92000-07-26 07:04:38 +00001524 def getproxies():
1525 """Return a dictionary of scheme -> proxy server URL mappings.
1526
1527 Returns settings gathered from the environment, if specified,
1528 or the registry.
1529
1530 """
1531 return getproxies_environment() or getproxies_registry()
Tim Peters55c12d42001-08-09 18:04:14 +00001532
Georg Brandl22350112008-01-20 12:05:43 +00001533 def proxy_bypass_registry(host):
Tim Peters55c12d42001-08-09 18:04:14 +00001534 try:
1535 import _winreg
1536 import re
Tim Peters55c12d42001-08-09 18:04:14 +00001537 except ImportError:
1538 # Std modules, so should be around - but you never know!
1539 return 0
1540 try:
1541 internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1542 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1543 proxyEnable = _winreg.QueryValueEx(internetSettings,
1544 'ProxyEnable')[0]
1545 proxyOverride = str(_winreg.QueryValueEx(internetSettings,
1546 'ProxyOverride')[0])
1547 # ^^^^ Returned as Unicode but problems if not converted to ASCII
1548 except WindowsError:
1549 return 0
1550 if not proxyEnable or not proxyOverride:
1551 return 0
1552 # try to make a host list from name and IP address.
Georg Brandl1f636702006-02-18 23:10:23 +00001553 rawHost, port = splitport(host)
1554 host = [rawHost]
Tim Peters55c12d42001-08-09 18:04:14 +00001555 try:
Georg Brandl1f636702006-02-18 23:10:23 +00001556 addr = socket.gethostbyname(rawHost)
1557 if addr != rawHost:
Tim Peters55c12d42001-08-09 18:04:14 +00001558 host.append(addr)
1559 except socket.error:
1560 pass
Georg Brandl1f636702006-02-18 23:10:23 +00001561 try:
1562 fqdn = socket.getfqdn(rawHost)
1563 if fqdn != rawHost:
1564 host.append(fqdn)
1565 except socket.error:
1566 pass
Tim Peters55c12d42001-08-09 18:04:14 +00001567 # make a check value list from the registry entry: replace the
1568 # '<local>' string by the localhost entry and the corresponding
1569 # canonical entry.
1570 proxyOverride = proxyOverride.split(';')
Tim Peters55c12d42001-08-09 18:04:14 +00001571 # now check if we match one of the registry values.
1572 for test in proxyOverride:
Senthil Kumaran4af40d22009-05-01 05:59:52 +00001573 if test == '<local>':
1574 if '.' not in rawHost:
1575 return 1
Tim Petersab9ba272001-08-09 21:40:30 +00001576 test = test.replace(".", r"\.") # mask dots
1577 test = test.replace("*", r".*") # change glob sequence
1578 test = test.replace("?", r".") # change glob char
Tim Peters55c12d42001-08-09 18:04:14 +00001579 for val in host:
1580 # print "%s <--> %s" %( test, val )
1581 if re.match(test, val, re.I):
1582 return 1
1583 return 0
1584
Georg Brandl22350112008-01-20 12:05:43 +00001585 def proxy_bypass(host):
1586 """Return a dictionary of scheme -> proxy server URL mappings.
1587
1588 Returns settings gathered from the environment, if specified,
1589 or the registry.
1590
1591 """
1592 if getproxies_environment():
1593 return proxy_bypass_environment(host)
1594 else:
1595 return proxy_bypass_registry(host)
1596
Mark Hammond4f570b92000-07-26 07:04:38 +00001597else:
1598 # By default use environment variables
1599 getproxies = getproxies_environment
Georg Brandl22350112008-01-20 12:05:43 +00001600 proxy_bypass = proxy_bypass_environment
Guido van Rossum442e7201996-03-20 15:33:11 +00001601
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001602# Test and time quote() and unquote()
1603def test1():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001604 s = ''
1605 for i in range(256): s = s + chr(i)
1606 s = s*4
1607 t0 = time.time()
1608 qs = quote(s)
1609 uqs = unquote(qs)
1610 t1 = time.time()
1611 if uqs != s:
1612 print 'Wrong!'
Walter Dörwald70a6b492004-02-12 17:35:32 +00001613 print repr(s)
1614 print repr(qs)
1615 print repr(uqs)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001616 print round(t1 - t0, 3), 'sec'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001617
1618
Guido van Rossum9ab96d41998-09-28 14:07:00 +00001619def reporthook(blocknum, blocksize, totalsize):
1620 # Report during remote transfers
Guido van Rossumb2493f82000-12-15 15:01:37 +00001621 print "Block number: %d, Block size: %d, Total size: %d" % (
1622 blocknum, blocksize, totalsize)