blob: 09a054b623cbcb5b4e02ffeda16cb18757ca43a0 [file] [log] [blame]
Guido van Rossume7b146f2000-02-04 15:28:42 +00001"""Open an arbitrary URL.
2
3See the following document for more info on URLs:
4"Names and Addresses, URIs, URLs, URNs, URCs", at
5http://www.w3.org/pub/WWW/Addressing/Overview.html
6
7See also the HTTP spec (from which the error codes are derived):
8"HTTP - Hypertext Transfer Protocol", at
9http://www.w3.org/pub/WWW/Protocols/
10
11Related standards and specs:
12- RFC1808: the "relative URL" spec. (authoritative status)
13- RFC1738 - the "URL standard". (authoritative status)
14- RFC1630 - the "URI spec". (informational status)
15
16The object returned by URLopener().open(file) will differ per
17protocol. All you know is that is has methods read(), readline(),
18readlines(), fileno(), close() and info(). The read*(), fileno()
Fredrik Lundhb49f88b2000-09-24 18:51:25 +000019and close() methods work like those of open files.
Guido van Rossume7b146f2000-02-04 15:28:42 +000020The info() method returns a mimetools.Message object which can be
21used to query various info about the object, if available.
22(mimetools.Message objects are queried with the getheader() method.)
23"""
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000024
Guido van Rossum7c395db1994-07-04 22:14:49 +000025import string
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000026import socket
Jack Jansendc3e3f61995-12-15 13:22:13 +000027import os
Guido van Rossumf0713d32001-08-09 17:43:35 +000028import time
Guido van Rossum3c8484e1996-11-20 22:02:24 +000029import sys
Senthil Kumaranbcd833f2012-01-11 00:09:24 +080030import base64
Serhiy Storchaka923baea2013-03-14 21:31:09 +020031import re
Senthil Kumaranbcd833f2012-01-11 00:09:24 +080032
Brett Cannon69200fa2004-03-23 21:26:39 +000033from urlparse import urljoin as basejoin
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000034
Skip Montanaro40fc1602001-03-01 04:27:19 +000035__all__ = ["urlopen", "URLopener", "FancyURLopener", "urlretrieve",
36 "urlcleanup", "quote", "quote_plus", "unquote", "unquote_plus",
Skip Montanaro44d5e0c2001-03-13 19:47:16 +000037 "urlencode", "url2pathname", "pathname2url", "splittag",
38 "localhost", "thishost", "ftperrors", "basejoin", "unwrap",
39 "splittype", "splithost", "splituser", "splitpasswd", "splitport",
40 "splitnport", "splitquery", "splitattr", "splitvalue",
Brett Cannond75f0432007-05-16 22:42:29 +000041 "getproxies"]
Skip Montanaro40fc1602001-03-01 04:27:19 +000042
Martin v. Löwis3e865952006-01-24 15:51:21 +000043__version__ = '1.17' # XXX This version is not always updated :-(
Guido van Rossumf668d171997-06-06 21:11:11 +000044
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000045MAXFTPCACHE = 10 # Trim the ftp cache beyond this size
Guido van Rossum6cb15a01995-06-22 19:00:13 +000046
Jack Jansendc3e3f61995-12-15 13:22:13 +000047# Helper for non-unix systems
Ronald Oussoren9545a232010-05-05 19:09:31 +000048if os.name == 'nt':
Fredrik Lundhb49f88b2000-09-24 18:51:25 +000049 from nturl2path import url2pathname, pathname2url
Guido van Rossumd74fb6b2001-03-02 06:43:49 +000050elif os.name == 'riscos':
51 from rourl2path import url2pathname, pathname2url
Jack Jansendc3e3f61995-12-15 13:22:13 +000052else:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000053 def url2pathname(pathname):
Georg Brandlc0b24732005-12-26 22:53:56 +000054 """OS-specific conversion from a relative URL of the 'file' scheme
55 to a file system path; not recommended for general use."""
Guido van Rossum367ac801999-03-12 14:31:10 +000056 return unquote(pathname)
Georg Brandlc0b24732005-12-26 22:53:56 +000057
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000058 def pathname2url(pathname):
Georg Brandlc0b24732005-12-26 22:53:56 +000059 """OS-specific conversion from a file system path to a relative URL
60 of the 'file' scheme; not recommended for general use."""
Guido van Rossum367ac801999-03-12 14:31:10 +000061 return quote(pathname)
Guido van Rossum33add0a1998-12-18 15:25:22 +000062
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000063# This really consists of two pieces:
64# (1) a class which handles opening of all sorts of URLs
65# (plus assorted utilities etc.)
66# (2) a set of functions for parsing URLs
67# XXX Should these be separated out into different modules?
68
69
70# Shortcut for basic usage
71_urlopener = None
Fred Drakedf6eca72002-04-04 20:41:34 +000072def urlopen(url, data=None, proxies=None):
Brett Cannon8bb8fa52008-07-02 01:57:08 +000073 """Create a file-like object for the specified URL to read from."""
74 from warnings import warnpy3k
Georg Brandl48e65f52010-02-06 22:44:17 +000075 warnpy3k("urllib.urlopen() has been removed in Python 3.0 in "
76 "favor of urllib2.urlopen()", stacklevel=2)
Brett Cannon8bb8fa52008-07-02 01:57:08 +000077
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000078 global _urlopener
Fred Drakedf6eca72002-04-04 20:41:34 +000079 if proxies is not None:
80 opener = FancyURLopener(proxies=proxies)
81 elif not _urlopener:
82 opener = FancyURLopener()
83 _urlopener = opener
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000084 else:
Fred Drakedf6eca72002-04-04 20:41:34 +000085 opener = _urlopener
86 if data is None:
87 return opener.open(url)
88 else:
89 return opener.open(url, data)
Fred Drake316a7932000-08-24 01:01:26 +000090def urlretrieve(url, filename=None, reporthook=None, data=None):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000091 global _urlopener
92 if not _urlopener:
93 _urlopener = FancyURLopener()
Fred Drake316a7932000-08-24 01:01:26 +000094 return _urlopener.retrieve(url, filename, reporthook, data)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000095def urlcleanup():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000096 if _urlopener:
97 _urlopener.cleanup()
Florent Xiclunaaf87f9f2010-05-17 13:35:09 +000098 _safe_quoters.clear()
Antoine Pitrouca173e22009-12-08 19:35:12 +000099 ftpcache.clear()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000100
Bill Janssen426ea0a2007-08-29 22:35:05 +0000101# check for SSL
102try:
103 import ssl
104except:
105 _have_ssl = False
106else:
107 _have_ssl = True
108
Georg Brandlb9256022005-08-24 18:46:39 +0000109# exception raised when downloaded size does not match content-length
110class ContentTooShortError(IOError):
111 def __init__(self, message, content):
112 IOError.__init__(self, message)
113 self.content = content
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000114
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000115ftpcache = {}
116class URLopener:
Guido van Rossume7b146f2000-02-04 15:28:42 +0000117 """Class to open URLs.
118 This is a class rather than just a subroutine because we may need
119 more than one set of global protocol-specific options.
120 Note -- this is a base class for those who don't want the
121 automatic handling of errors type 302 (relocated) and 401
122 (authorization needed)."""
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000123
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000124 __tempfiles = None
Guido van Rossum29e77811996-11-27 19:39:58 +0000125
Guido van Rossumba311382000-08-24 16:18:04 +0000126 version = "Python-urllib/%s" % __version__
127
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000128 # Constructor
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000129 def __init__(self, proxies=None, **x509):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000130 if proxies is None:
131 proxies = getproxies()
132 assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
133 self.proxies = proxies
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000134 self.key_file = x509.get('key_file')
135 self.cert_file = x509.get('cert_file')
Georg Brandl0619a322006-07-26 07:40:17 +0000136 self.addheaders = [('User-Agent', self.version)]
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000137 self.__tempfiles = []
138 self.__unlink = os.unlink # See cleanup()
139 self.tempcache = None
140 # Undocumented feature: if you assign {} to tempcache,
141 # it is used to cache files retrieved with
142 # self.retrieve(). This is not enabled by default
143 # since it does not work for changing documents (and I
144 # haven't got the logic to check expiration headers
145 # yet).
146 self.ftpcache = ftpcache
147 # Undocumented feature: you can use a different
148 # ftp cache by assigning to the .ftpcache member;
149 # in case you want logically independent URL openers
150 # XXX This is not threadsafe. Bah.
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000151
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000152 def __del__(self):
153 self.close()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000154
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000155 def close(self):
156 self.cleanup()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000157
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000158 def cleanup(self):
159 # This code sometimes runs when the rest of this module
160 # has already been deleted, so it can't use any globals
161 # or import anything.
162 if self.__tempfiles:
163 for file in self.__tempfiles:
164 try:
165 self.__unlink(file)
Martin v. Löwis58682b72001-08-11 15:02:57 +0000166 except OSError:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000167 pass
168 del self.__tempfiles[:]
169 if self.tempcache:
170 self.tempcache.clear()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000171
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000172 def addheader(self, *args):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000173 """Add a header to be used by the HTTP interface only
174 e.g. u.addheader('Accept', 'sound/basic')"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000175 self.addheaders.append(args)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000176
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000177 # External interface
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000178 def open(self, fullurl, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000179 """Use URLopener().open(file) instead of open(file, 'r')."""
Martin v. Löwis1d994332000-12-03 18:30:10 +0000180 fullurl = unwrap(toBytes(fullurl))
Senthil Kumaran7c2867f2009-04-21 03:24:19 +0000181 # percent encode url, fixing lame server errors for e.g, like space
182 # within url paths.
Senthil Kumaran18d5a692010-02-20 22:05:34 +0000183 fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]|")
Raymond Hettinger54f02222002-06-01 14:18:47 +0000184 if self.tempcache and fullurl in self.tempcache:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000185 filename, headers = self.tempcache[fullurl]
186 fp = open(filename, 'rb')
187 return addinfourl(fp, headers, fullurl)
Martin v. Löwis1d994332000-12-03 18:30:10 +0000188 urltype, url = splittype(fullurl)
189 if not urltype:
190 urltype = 'file'
Raymond Hettinger54f02222002-06-01 14:18:47 +0000191 if urltype in self.proxies:
Martin v. Löwis1d994332000-12-03 18:30:10 +0000192 proxy = self.proxies[urltype]
193 urltype, proxyhost = splittype(proxy)
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000194 host, selector = splithost(proxyhost)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000195 url = (host, fullurl) # Signal special case to open_*()
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000196 else:
197 proxy = None
Martin v. Löwis1d994332000-12-03 18:30:10 +0000198 name = 'open_' + urltype
199 self.type = urltype
Brett Cannonaaeffaf2004-03-23 23:50:17 +0000200 name = name.replace('-', '_')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000201 if not hasattr(self, name):
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000202 if proxy:
203 return self.open_unknown_proxy(proxy, fullurl, data)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000204 else:
205 return self.open_unknown(fullurl, data)
206 try:
207 if data is None:
208 return getattr(self, name)(url)
209 else:
210 return getattr(self, name)(url, data)
211 except socket.error, msg:
212 raise IOError, ('socket error', msg), sys.exc_info()[2]
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000213
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000214 def open_unknown(self, fullurl, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000215 """Overridable interface to open unknown URL type."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000216 type, url = splittype(fullurl)
217 raise IOError, ('url error', 'unknown url type', type)
Guido van Rossumca445401995-08-29 19:19:12 +0000218
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000219 def open_unknown_proxy(self, proxy, fullurl, data=None):
220 """Overridable interface to open unknown URL type."""
221 type, url = splittype(fullurl)
222 raise IOError, ('url error', 'invalid proxy for %s' % type, proxy)
223
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000224 # External interface
Sjoerd Mullenderd7b86f02000-08-25 11:23:36 +0000225 def retrieve(self, url, filename=None, reporthook=None, data=None):
Brett Cannon7d618c72003-04-24 02:43:20 +0000226 """retrieve(url) returns (filename, headers) for a local object
Guido van Rossume7b146f2000-02-04 15:28:42 +0000227 or (tempfilename, headers) for a remote object."""
Martin v. Löwis1d994332000-12-03 18:30:10 +0000228 url = unwrap(toBytes(url))
Raymond Hettinger54f02222002-06-01 14:18:47 +0000229 if self.tempcache and url in self.tempcache:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000230 return self.tempcache[url]
231 type, url1 = splittype(url)
Raymond Hettinger10ff7062002-06-02 03:04:52 +0000232 if filename is None and (not type or type == 'file'):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000233 try:
234 fp = self.open_local_file(url1)
235 hdrs = fp.info()
Philip Jenvey0299d0d2009-12-03 02:40:13 +0000236 fp.close()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000237 return url2pathname(splithost(url1)[1]), hdrs
Georg Brandl84fedf72010-02-06 22:59:15 +0000238 except IOError:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000239 pass
Fred Drake316a7932000-08-24 01:01:26 +0000240 fp = self.open(url, data)
Benjamin Petersonb364bfe2009-03-22 17:45:11 +0000241 try:
242 headers = fp.info()
243 if filename:
244 tfp = open(filename, 'wb')
245 else:
246 import tempfile
247 garbage, path = splittype(url)
248 garbage, path = splithost(path or "")
249 path, garbage = splitquery(path or "")
250 path, garbage = splitattr(path or "")
251 suffix = os.path.splitext(path)[1]
252 (fd, filename) = tempfile.mkstemp(suffix)
253 self.__tempfiles.append(filename)
254 tfp = os.fdopen(fd, 'wb')
255 try:
256 result = filename, headers
257 if self.tempcache is not None:
258 self.tempcache[url] = result
259 bs = 1024*8
260 size = -1
261 read = 0
262 blocknum = 0
Senthil Kumaran87e58552011-11-01 02:44:45 +0800263 if "content-length" in headers:
264 size = int(headers["Content-Length"])
Benjamin Petersonb364bfe2009-03-22 17:45:11 +0000265 if reporthook:
Benjamin Petersonb364bfe2009-03-22 17:45:11 +0000266 reporthook(blocknum, bs, size)
267 while 1:
268 block = fp.read(bs)
269 if block == "":
270 break
271 read += len(block)
272 tfp.write(block)
273 blocknum += 1
274 if reporthook:
275 reporthook(blocknum, bs, size)
276 finally:
277 tfp.close()
278 finally:
279 fp.close()
Georg Brandlb9256022005-08-24 18:46:39 +0000280
281 # raise exception if actual size does not match content-length header
282 if size >= 0 and read < size:
283 raise ContentTooShortError("retrieval incomplete: got only %i out "
284 "of %i bytes" % (read, size), result)
285
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000286 return result
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000287
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000288 # Each method named open_<type> knows how to open that type of URL
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000289
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000290 def open_http(self, url, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000291 """Use HTTP protocol."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000292 import httplib
293 user_passwd = None
Martin v. Löwis3e865952006-01-24 15:51:21 +0000294 proxy_passwd= None
Walter Dörwald65230a22002-06-03 15:58:32 +0000295 if isinstance(url, str):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000296 host, selector = splithost(url)
297 if host:
298 user_passwd, host = splituser(host)
299 host = unquote(host)
300 realhost = host
301 else:
302 host, selector = url
Martin v. Löwis3e865952006-01-24 15:51:21 +0000303 # check whether the proxy contains authorization information
304 proxy_passwd, host = splituser(host)
305 # now we proceed with the url we want to obtain
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000306 urltype, rest = splittype(selector)
307 url = rest
308 user_passwd = None
Guido van Rossumb2493f82000-12-15 15:01:37 +0000309 if urltype.lower() != 'http':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000310 realhost = None
311 else:
312 realhost, rest = splithost(rest)
313 if realhost:
314 user_passwd, realhost = splituser(realhost)
315 if user_passwd:
316 selector = "%s://%s%s" % (urltype, realhost, rest)
Tim Peters55c12d42001-08-09 18:04:14 +0000317 if proxy_bypass(realhost):
318 host = realhost
319
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000320 #print "proxy via http:", host, selector
321 if not host: raise IOError, ('http error', 'no host given')
Tim Peters92037a12006-01-24 22:44:08 +0000322
Martin v. Löwis3e865952006-01-24 15:51:21 +0000323 if proxy_passwd:
Senthil Kumaranbcd833f2012-01-11 00:09:24 +0800324 proxy_passwd = unquote(proxy_passwd)
Andrew M. Kuchling872dba42006-10-27 17:11:23 +0000325 proxy_auth = base64.b64encode(proxy_passwd).strip()
Martin v. Löwis3e865952006-01-24 15:51:21 +0000326 else:
327 proxy_auth = None
328
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000329 if user_passwd:
Senthil Kumaranbcd833f2012-01-11 00:09:24 +0800330 user_passwd = unquote(user_passwd)
Andrew M. Kuchling872dba42006-10-27 17:11:23 +0000331 auth = base64.b64encode(user_passwd).strip()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000332 else:
333 auth = None
334 h = httplib.HTTP(host)
335 if data is not None:
336 h.putrequest('POST', selector)
Georg Brandl0619a322006-07-26 07:40:17 +0000337 h.putheader('Content-Type', 'application/x-www-form-urlencoded')
338 h.putheader('Content-Length', '%d' % len(data))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000339 else:
340 h.putrequest('GET', selector)
Martin v. Löwis3e865952006-01-24 15:51:21 +0000341 if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000342 if auth: h.putheader('Authorization', 'Basic %s' % auth)
343 if realhost: h.putheader('Host', realhost)
Guido van Rossum68468eb2003-02-27 20:14:51 +0000344 for args in self.addheaders: h.putheader(*args)
Kristján Valur Jónsson84040db2009-01-09 20:27:16 +0000345 h.endheaders(data)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000346 errcode, errmsg, headers = h.getreply()
Neal Norwitzce55e212007-03-20 08:14:57 +0000347 fp = h.getfile()
Georg Brandlf66b6032007-03-14 08:27:52 +0000348 if errcode == -1:
Neal Norwitzce55e212007-03-20 08:14:57 +0000349 if fp: fp.close()
Georg Brandlf66b6032007-03-14 08:27:52 +0000350 # something went wrong with the HTTP status line
351 raise IOError, ('http protocol error', 0,
352 'got a bad status line', None)
Sean Reifscheidera1afbf62007-09-19 07:52:56 +0000353 # According to RFC 2616, "2xx" code indicates that the client's
354 # request was successfully received, understood, and accepted.
Kurt B. Kaiser0f7c25d2008-01-02 04:11:28 +0000355 if (200 <= errcode < 300):
Georg Brandl9b0d46d2008-01-20 11:43:03 +0000356 return addinfourl(fp, headers, "http:" + url, errcode)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000357 else:
358 if data is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000359 return self.http_error(url, fp, errcode, errmsg, headers)
Guido van Rossum29aab751999-03-09 19:31:21 +0000360 else:
361 return self.http_error(url, fp, errcode, errmsg, headers, data)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000362
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000363 def http_error(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000364 """Handle http errors.
365 Derived class can override this, or provide specific handlers
366 named http_error_DDD where DDD is the 3-digit error code."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000367 # First check if there's a specific handler for this error
368 name = 'http_error_%d' % errcode
369 if hasattr(self, name):
370 method = getattr(self, name)
371 if data is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000372 result = method(url, fp, errcode, errmsg, headers)
Jeremy Hyltonb30f52a1999-02-25 16:14:58 +0000373 else:
374 result = method(url, fp, errcode, errmsg, headers, data)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000375 if result: return result
Jeremy Hyltonb30f52a1999-02-25 16:14:58 +0000376 return self.http_error_default(url, fp, errcode, errmsg, headers)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000377
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000378 def http_error_default(self, url, fp, errcode, errmsg, headers):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000379 """Default error handler: close the connection and raise IOError."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000380 fp.close()
381 raise IOError, ('http error', errcode, errmsg, headers)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000382
Bill Janssen426ea0a2007-08-29 22:35:05 +0000383 if _have_ssl:
Andrew M. Kuchling141e9892000-04-23 02:53:11 +0000384 def open_https(self, url, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000385 """Use HTTPS protocol."""
Bill Janssen426ea0a2007-08-29 22:35:05 +0000386
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000387 import httplib
Fred Drake567ca8e2000-08-21 21:42:42 +0000388 user_passwd = None
Martin v. Löwis3e865952006-01-24 15:51:21 +0000389 proxy_passwd = None
Walter Dörwald65230a22002-06-03 15:58:32 +0000390 if isinstance(url, str):
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000391 host, selector = splithost(url)
Fred Drake567ca8e2000-08-21 21:42:42 +0000392 if host:
393 user_passwd, host = splituser(host)
394 host = unquote(host)
395 realhost = host
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000396 else:
397 host, selector = url
Martin v. Löwis3e865952006-01-24 15:51:21 +0000398 # here, we determine, whether the proxy contains authorization information
399 proxy_passwd, host = splituser(host)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000400 urltype, rest = splittype(selector)
Fred Drake567ca8e2000-08-21 21:42:42 +0000401 url = rest
402 user_passwd = None
Guido van Rossumb2493f82000-12-15 15:01:37 +0000403 if urltype.lower() != 'https':
Fred Drake567ca8e2000-08-21 21:42:42 +0000404 realhost = None
405 else:
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000406 realhost, rest = splithost(rest)
Fred Drake567ca8e2000-08-21 21:42:42 +0000407 if realhost:
408 user_passwd, realhost = splituser(realhost)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000409 if user_passwd:
410 selector = "%s://%s%s" % (urltype, realhost, rest)
Andrew M. Kuchling7ad47922000-06-10 01:41:48 +0000411 #print "proxy via https:", host, selector
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000412 if not host: raise IOError, ('https error', 'no host given')
Martin v. Löwis3e865952006-01-24 15:51:21 +0000413 if proxy_passwd:
Senthil Kumaranbcd833f2012-01-11 00:09:24 +0800414 proxy_passwd = unquote(proxy_passwd)
Andrew M. Kuchling872dba42006-10-27 17:11:23 +0000415 proxy_auth = base64.b64encode(proxy_passwd).strip()
Martin v. Löwis3e865952006-01-24 15:51:21 +0000416 else:
417 proxy_auth = None
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000418 if user_passwd:
Senthil Kumaranbcd833f2012-01-11 00:09:24 +0800419 user_passwd = unquote(user_passwd)
Andrew M. Kuchling872dba42006-10-27 17:11:23 +0000420 auth = base64.b64encode(user_passwd).strip()
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000421 else:
422 auth = None
423 h = httplib.HTTPS(host, 0,
424 key_file=self.key_file,
425 cert_file=self.cert_file)
Andrew M. Kuchling141e9892000-04-23 02:53:11 +0000426 if data is not None:
427 h.putrequest('POST', selector)
Georg Brandl0619a322006-07-26 07:40:17 +0000428 h.putheader('Content-Type',
Andrew M. Kuchling141e9892000-04-23 02:53:11 +0000429 'application/x-www-form-urlencoded')
Georg Brandl0619a322006-07-26 07:40:17 +0000430 h.putheader('Content-Length', '%d' % len(data))
Andrew M. Kuchling141e9892000-04-23 02:53:11 +0000431 else:
432 h.putrequest('GET', selector)
Andrew M. Kuchling52278572006-12-19 15:11:41 +0000433 if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
434 if auth: h.putheader('Authorization', 'Basic %s' % auth)
Fred Drake567ca8e2000-08-21 21:42:42 +0000435 if realhost: h.putheader('Host', realhost)
Guido van Rossum68468eb2003-02-27 20:14:51 +0000436 for args in self.addheaders: h.putheader(*args)
Kristján Valur Jónsson84040db2009-01-09 20:27:16 +0000437 h.endheaders(data)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000438 errcode, errmsg, headers = h.getreply()
Neal Norwitzce55e212007-03-20 08:14:57 +0000439 fp = h.getfile()
Georg Brandlf66b6032007-03-14 08:27:52 +0000440 if errcode == -1:
Neal Norwitzce55e212007-03-20 08:14:57 +0000441 if fp: fp.close()
Georg Brandlf66b6032007-03-14 08:27:52 +0000442 # something went wrong with the HTTP status line
443 raise IOError, ('http protocol error', 0,
444 'got a bad status line', None)
Georg Brandl9b915672007-09-24 18:08:24 +0000445 # According to RFC 2616, "2xx" code indicates that the client's
446 # request was successfully received, understood, and accepted.
Kurt B. Kaiser0f7c25d2008-01-02 04:11:28 +0000447 if (200 <= errcode < 300):
Georg Brandl9b0d46d2008-01-20 11:43:03 +0000448 return addinfourl(fp, headers, "https:" + url, errcode)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000449 else:
Fred Drake567ca8e2000-08-21 21:42:42 +0000450 if data is None:
451 return self.http_error(url, fp, errcode, errmsg, headers)
452 else:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000453 return self.http_error(url, fp, errcode, errmsg, headers,
454 data)
Fred Drake567ca8e2000-08-21 21:42:42 +0000455
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000456 def open_file(self, url):
Neal Norwitzc5d0dbd2006-04-09 04:00:49 +0000457 """Use local file or FTP depending on form of URL."""
Martin v. Löwis3e865952006-01-24 15:51:21 +0000458 if not isinstance(url, str):
459 raise IOError, ('file error', 'proxy support for file protocol currently not implemented')
Jack Jansen4ef11032002-09-12 20:14:04 +0000460 if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000461 return self.open_ftp(url)
462 else:
463 return self.open_local_file(url)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000464
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000465 def open_local_file(self, url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000466 """Use local file."""
Georg Brandl5a096e12007-01-22 19:40:21 +0000467 import mimetypes, mimetools, email.utils
Raymond Hettingera6172712004-12-31 19:15:26 +0000468 try:
469 from cStringIO import StringIO
470 except ImportError:
471 from StringIO import StringIO
Guido van Rossumf0713d32001-08-09 17:43:35 +0000472 host, file = splithost(url)
473 localname = url2pathname(file)
Guido van Rossuma2da3052002-04-15 00:25:01 +0000474 try:
475 stats = os.stat(localname)
476 except OSError, e:
477 raise IOError(e.errno, e.strerror, e.filename)
Walter Dörwald92b48b72002-03-22 17:30:38 +0000478 size = stats.st_size
Georg Brandl5a096e12007-01-22 19:40:21 +0000479 modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000480 mtype = mimetypes.guess_type(url)[0]
Raymond Hettingera6172712004-12-31 19:15:26 +0000481 headers = mimetools.Message(StringIO(
Guido van Rossumf0713d32001-08-09 17:43:35 +0000482 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
483 (mtype or 'text/plain', size, modified)))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000484 if not host:
Guido van Rossum336a2011999-06-24 15:27:36 +0000485 urlfile = file
486 if file[:1] == '/':
487 urlfile = 'file://' + file
Senthil Kumaran58c60622012-01-21 11:43:02 +0800488 elif file[:2] == './':
489 raise ValueError("local file url may start with / or file:. Unknown url of type: %s" % url)
Guido van Rossumf0713d32001-08-09 17:43:35 +0000490 return addinfourl(open(localname, 'rb'),
Guido van Rossum336a2011999-06-24 15:27:36 +0000491 headers, urlfile)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000492 host, port = splitport(host)
493 if not port \
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000494 and socket.gethostbyname(host) in (localhost(), thishost()):
Guido van Rossum336a2011999-06-24 15:27:36 +0000495 urlfile = file
496 if file[:1] == '/':
497 urlfile = 'file://' + file
Guido van Rossumf0713d32001-08-09 17:43:35 +0000498 return addinfourl(open(localname, 'rb'),
Guido van Rossum336a2011999-06-24 15:27:36 +0000499 headers, urlfile)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000500 raise IOError, ('local file error', 'not on local host')
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000501
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000502 def open_ftp(self, url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000503 """Use FTP protocol."""
Martin v. Löwis3e865952006-01-24 15:51:21 +0000504 if not isinstance(url, str):
505 raise IOError, ('ftp error', 'proxy support for ftp protocol currently not implemented')
Raymond Hettingera6172712004-12-31 19:15:26 +0000506 import mimetypes, mimetools
507 try:
508 from cStringIO import StringIO
509 except ImportError:
510 from StringIO import StringIO
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000511 host, path = splithost(url)
512 if not host: raise IOError, ('ftp error', 'no host given')
513 host, port = splitport(host)
514 user, host = splituser(host)
515 if user: user, passwd = splitpasswd(user)
516 else: passwd = None
517 host = unquote(host)
Senthil Kumaran9fce5512010-11-20 11:24:08 +0000518 user = user or ''
519 passwd = passwd or ''
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000520 host = socket.gethostbyname(host)
521 if not port:
522 import ftplib
523 port = ftplib.FTP_PORT
524 else:
525 port = int(port)
526 path, attrs = splitattr(path)
527 path = unquote(path)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000528 dirs = path.split('/')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000529 dirs, file = dirs[:-1], dirs[-1]
530 if dirs and not dirs[0]: dirs = dirs[1:]
Guido van Rossum5e006a31999-08-18 17:40:33 +0000531 if dirs and not dirs[0]: dirs[0] = '/'
Guido van Rossumb2493f82000-12-15 15:01:37 +0000532 key = user, host, port, '/'.join(dirs)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000533 # XXX thread unsafe!
534 if len(self.ftpcache) > MAXFTPCACHE:
535 # Prune the cache, rather arbitrarily
536 for k in self.ftpcache.keys():
537 if k != key:
538 v = self.ftpcache[k]
539 del self.ftpcache[k]
540 v.close()
541 try:
Raymond Hettinger54f02222002-06-01 14:18:47 +0000542 if not key in self.ftpcache:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000543 self.ftpcache[key] = \
544 ftpwrapper(user, passwd, host, port, dirs)
545 if not file: type = 'D'
546 else: type = 'I'
547 for attr in attrs:
548 attr, value = splitvalue(attr)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000549 if attr.lower() == 'type' and \
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000550 value in ('a', 'A', 'i', 'I', 'd', 'D'):
Guido van Rossumb2493f82000-12-15 15:01:37 +0000551 type = value.upper()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000552 (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
Guido van Rossum88e0b5b2001-08-23 13:38:15 +0000553 mtype = mimetypes.guess_type("ftp:" + url)[0]
554 headers = ""
555 if mtype:
556 headers += "Content-Type: %s\n" % mtype
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000557 if retrlen is not None and retrlen >= 0:
Guido van Rossum88e0b5b2001-08-23 13:38:15 +0000558 headers += "Content-Length: %d\n" % retrlen
Raymond Hettingera6172712004-12-31 19:15:26 +0000559 headers = mimetools.Message(StringIO(headers))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000560 return addinfourl(fp, headers, "ftp:" + url)
561 except ftperrors(), msg:
562 raise IOError, ('ftp error', msg), sys.exc_info()[2]
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000563
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000564 def open_data(self, url, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000565 """Use "data" URL."""
Martin v. Löwis3e865952006-01-24 15:51:21 +0000566 if not isinstance(url, str):
567 raise IOError, ('data error', 'proxy support for data protocol currently not implemented')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000568 # ignore POSTed data
569 #
570 # syntax of data URLs:
571 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
572 # mediatype := [ type "/" subtype ] *( ";" parameter )
573 # data := *urlchar
574 # parameter := attribute "=" value
Raymond Hettingera6172712004-12-31 19:15:26 +0000575 import mimetools
576 try:
577 from cStringIO import StringIO
578 except ImportError:
579 from StringIO import StringIO
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000580 try:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000581 [type, data] = url.split(',', 1)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000582 except ValueError:
583 raise IOError, ('data error', 'bad data URL')
584 if not type:
585 type = 'text/plain;charset=US-ASCII'
Guido van Rossumb2493f82000-12-15 15:01:37 +0000586 semi = type.rfind(';')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000587 if semi >= 0 and '=' not in type[semi:]:
588 encoding = type[semi+1:]
589 type = type[:semi]
590 else:
591 encoding = ''
592 msg = []
Senthil Kumaran1b7f9e52010-05-01 08:01:56 +0000593 msg.append('Date: %s'%time.strftime('%a, %d %b %Y %H:%M:%S GMT',
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000594 time.gmtime(time.time())))
595 msg.append('Content-type: %s' % type)
596 if encoding == 'base64':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000597 data = base64.decodestring(data)
598 else:
599 data = unquote(data)
Georg Brandl0619a322006-07-26 07:40:17 +0000600 msg.append('Content-Length: %d' % len(data))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000601 msg.append('')
602 msg.append(data)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000603 msg = '\n'.join(msg)
Raymond Hettingera6172712004-12-31 19:15:26 +0000604 f = StringIO(msg)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000605 headers = mimetools.Message(f, 0)
Georg Brandl1f663572005-11-26 16:50:44 +0000606 #f.fileno = None # needed for addinfourl
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000607 return addinfourl(f, headers, url)
Guido van Rossum6d4d1c21998-03-12 14:32:55 +0000608
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000609
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000610class FancyURLopener(URLopener):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000611 """Derived class with handlers for errors we can handle (perhaps)."""
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000612
Neal Norwitz60e04cd2002-06-11 13:38:51 +0000613 def __init__(self, *args, **kwargs):
Guido van Rossum68468eb2003-02-27 20:14:51 +0000614 URLopener.__init__(self, *args, **kwargs)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000615 self.auth_cache = {}
Skip Montanaroc3e11d62001-02-15 16:56:36 +0000616 self.tries = 0
617 self.maxtries = 10
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000618
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000619 def http_error_default(self, url, fp, errcode, errmsg, headers):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000620 """Default error handling -- don't raise an exception."""
Georg Brandl9b0d46d2008-01-20 11:43:03 +0000621 return addinfourl(fp, headers, "http:" + url, errcode)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000622
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000623 def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000624 """Error 302 -- relocated (temporarily)."""
Skip Montanaroc3e11d62001-02-15 16:56:36 +0000625 self.tries += 1
626 if self.maxtries and self.tries >= self.maxtries:
627 if hasattr(self, "http_error_500"):
628 meth = self.http_error_500
629 else:
630 meth = self.http_error_default
631 self.tries = 0
632 return meth(url, fp, 500,
633 "Internal Server Error: Redirect Recursion", headers)
634 result = self.redirect_internal(url, fp, errcode, errmsg, headers,
635 data)
636 self.tries = 0
637 return result
638
639 def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
Raymond Hettinger54f02222002-06-01 14:18:47 +0000640 if 'location' in headers:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000641 newurl = headers['location']
Raymond Hettinger54f02222002-06-01 14:18:47 +0000642 elif 'uri' in headers:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000643 newurl = headers['uri']
644 else:
645 return
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000646 fp.close()
Guido van Rossum3527f591999-03-29 20:23:41 +0000647 # In case the server sent a relative URL, join with original:
Moshe Zadka5d87d472001-04-09 14:54:21 +0000648 newurl = basejoin(self.type + ":" + url, newurl)
guido@google.com60a4a902011-03-24 08:07:45 -0700649
650 # For security reasons we do not allow redirects to protocols
guido@google.com2bc23b82011-03-24 10:44:17 -0700651 # other than HTTP, HTTPS or FTP.
guido@google.com60a4a902011-03-24 08:07:45 -0700652 newurl_lower = newurl.lower()
653 if not (newurl_lower.startswith('http://') or
guido@google.com2bc23b82011-03-24 10:44:17 -0700654 newurl_lower.startswith('https://') or
655 newurl_lower.startswith('ftp://')):
guido@google.comf1509302011-03-28 13:47:01 -0700656 raise IOError('redirect error', errcode,
657 errmsg + " - Redirection to url '%s' is not allowed" %
658 newurl,
659 headers)
guido@google.com60a4a902011-03-24 08:07:45 -0700660
Guido van Rossumfa19f7c2003-05-16 01:46:51 +0000661 return self.open(newurl)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000662
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000663 def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000664 """Error 301 -- also relocated (permanently)."""
665 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
Guido van Rossume6ad8911996-09-10 17:02:56 +0000666
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000667 def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
668 """Error 303 -- also relocated (essentially identical to 302)."""
669 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
670
Guido van Rossumfa19f7c2003-05-16 01:46:51 +0000671 def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
672 """Error 307 -- relocated, but turn POST into error."""
673 if data is None:
674 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
675 else:
676 return self.http_error_default(url, fp, errcode, errmsg, headers)
677
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000678 def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000679 """Error 401 -- authentication required.
Martin v. Löwis3e865952006-01-24 15:51:21 +0000680 This function supports Basic authentication only."""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000681 if not 'www-authenticate' in headers:
Tim Peters85ba6732001-02-28 08:26:44 +0000682 URLopener.http_error_default(self, url, fp,
Fred Drakec680ae82001-10-13 18:37:07 +0000683 errcode, errmsg, headers)
Moshe Zadkae99bd172001-02-27 06:27:04 +0000684 stuff = headers['www-authenticate']
685 import re
686 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
687 if not match:
Tim Peters85ba6732001-02-28 08:26:44 +0000688 URLopener.http_error_default(self, url, fp,
Moshe Zadkae99bd172001-02-27 06:27:04 +0000689 errcode, errmsg, headers)
690 scheme, realm = match.groups()
691 if scheme.lower() != 'basic':
Tim Peters85ba6732001-02-28 08:26:44 +0000692 URLopener.http_error_default(self, url, fp,
Moshe Zadkae99bd172001-02-27 06:27:04 +0000693 errcode, errmsg, headers)
694 name = 'retry_' + self.type + '_basic_auth'
695 if data is None:
696 return getattr(self,name)(url, realm)
697 else:
698 return getattr(self,name)(url, realm, data)
Tim Peters92037a12006-01-24 22:44:08 +0000699
Martin v. Löwis3e865952006-01-24 15:51:21 +0000700 def http_error_407(self, url, fp, errcode, errmsg, headers, data=None):
701 """Error 407 -- proxy authentication required.
702 This function supports Basic authentication only."""
703 if not 'proxy-authenticate' in headers:
704 URLopener.http_error_default(self, url, fp,
705 errcode, errmsg, headers)
706 stuff = headers['proxy-authenticate']
707 import re
708 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
709 if not match:
710 URLopener.http_error_default(self, url, fp,
711 errcode, errmsg, headers)
712 scheme, realm = match.groups()
713 if scheme.lower() != 'basic':
714 URLopener.http_error_default(self, url, fp,
715 errcode, errmsg, headers)
716 name = 'retry_proxy_' + self.type + '_basic_auth'
717 if data is None:
718 return getattr(self,name)(url, realm)
719 else:
720 return getattr(self,name)(url, realm, data)
Tim Peters92037a12006-01-24 22:44:08 +0000721
Martin v. Löwis3e865952006-01-24 15:51:21 +0000722 def retry_proxy_http_basic_auth(self, url, realm, data=None):
723 host, selector = splithost(url)
724 newurl = 'http://' + host + selector
725 proxy = self.proxies['http']
726 urltype, proxyhost = splittype(proxy)
727 proxyhost, proxyselector = splithost(proxyhost)
728 i = proxyhost.find('@') + 1
729 proxyhost = proxyhost[i:]
730 user, passwd = self.get_user_passwd(proxyhost, realm, i)
731 if not (user or passwd): return None
732 proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
733 self.proxies['http'] = 'http://' + proxyhost + proxyselector
734 if data is None:
735 return self.open(newurl)
736 else:
737 return self.open(newurl, data)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000738
Martin v. Löwis3e865952006-01-24 15:51:21 +0000739 def retry_proxy_https_basic_auth(self, url, realm, data=None):
740 host, selector = splithost(url)
741 newurl = 'https://' + host + selector
742 proxy = self.proxies['https']
743 urltype, proxyhost = splittype(proxy)
744 proxyhost, proxyselector = splithost(proxyhost)
745 i = proxyhost.find('@') + 1
746 proxyhost = proxyhost[i:]
747 user, passwd = self.get_user_passwd(proxyhost, realm, i)
748 if not (user or passwd): return None
749 proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
750 self.proxies['https'] = 'https://' + proxyhost + proxyselector
751 if data is None:
752 return self.open(newurl)
753 else:
754 return self.open(newurl, data)
Tim Peters92037a12006-01-24 22:44:08 +0000755
Guido van Rossum3c8baed2000-02-01 23:36:55 +0000756 def retry_http_basic_auth(self, url, realm, data=None):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000757 host, selector = splithost(url)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000758 i = host.find('@') + 1
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000759 host = host[i:]
760 user, passwd = self.get_user_passwd(host, realm, i)
761 if not (user or passwd): return None
Guido van Rossumafc4f042001-01-15 18:31:13 +0000762 host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000763 newurl = 'http://' + host + selector
Guido van Rossum3c8baed2000-02-01 23:36:55 +0000764 if data is None:
765 return self.open(newurl)
766 else:
767 return self.open(newurl, data)
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000768
Guido van Rossum3c8baed2000-02-01 23:36:55 +0000769 def retry_https_basic_auth(self, url, realm, data=None):
Tim Peterse1190062001-01-15 03:34:38 +0000770 host, selector = splithost(url)
771 i = host.find('@') + 1
772 host = host[i:]
773 user, passwd = self.get_user_passwd(host, realm, i)
774 if not (user or passwd): return None
Guido van Rossumafc4f042001-01-15 18:31:13 +0000775 host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
Martin v. Löwis3e865952006-01-24 15:51:21 +0000776 newurl = 'https://' + host + selector
777 if data is None:
778 return self.open(newurl)
779 else:
780 return self.open(newurl, data)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000781
Florent Xiclunae127e242010-05-17 10:39:07 +0000782 def get_user_passwd(self, host, realm, clear_cache=0):
Guido van Rossumb2493f82000-12-15 15:01:37 +0000783 key = realm + '@' + host.lower()
Raymond Hettinger54f02222002-06-01 14:18:47 +0000784 if key in self.auth_cache:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000785 if clear_cache:
786 del self.auth_cache[key]
787 else:
788 return self.auth_cache[key]
789 user, passwd = self.prompt_user_passwd(host, realm)
790 if user or passwd: self.auth_cache[key] = (user, passwd)
791 return user, passwd
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000792
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000793 def prompt_user_passwd(self, host, realm):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000794 """Override this in a GUI environment!"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000795 import getpass
796 try:
797 user = raw_input("Enter username for %s at %s: " % (realm,
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000798 host))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000799 passwd = getpass.getpass("Enter password for %s in %s at %s: " %
800 (user, realm, host))
801 return user, passwd
802 except KeyboardInterrupt:
803 print
804 return None, None
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000805
806
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000807# Utility functions
808
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000809_localhost = None
810def localhost():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000811 """Return the IP address of the magic hostname 'localhost'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000812 global _localhost
Raymond Hettinger10ff7062002-06-02 03:04:52 +0000813 if _localhost is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000814 _localhost = socket.gethostbyname('localhost')
815 return _localhost
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000816
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000817_thishost = None
818def thishost():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000819 """Return the IP address of the current host."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000820 global _thishost
Raymond Hettinger10ff7062002-06-02 03:04:52 +0000821 if _thishost is None:
Senthil Kumaran7351b662013-06-01 11:11:30 -0700822 try:
823 _thishost = socket.gethostbyname(socket.gethostname())
824 except socket.gaierror:
825 _thishost = socket.gethostbyname('localhost')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000826 return _thishost
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000827
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000828_ftperrors = None
829def ftperrors():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000830 """Return the set of errors raised by the FTP class."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000831 global _ftperrors
Raymond Hettinger10ff7062002-06-02 03:04:52 +0000832 if _ftperrors is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000833 import ftplib
834 _ftperrors = ftplib.all_errors
835 return _ftperrors
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000836
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000837_noheaders = None
838def noheaders():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000839 """Return an empty mimetools.Message object."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000840 global _noheaders
Raymond Hettinger10ff7062002-06-02 03:04:52 +0000841 if _noheaders is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000842 import mimetools
Raymond Hettingera6172712004-12-31 19:15:26 +0000843 try:
844 from cStringIO import StringIO
845 except ImportError:
846 from StringIO import StringIO
847 _noheaders = mimetools.Message(StringIO(), 0)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000848 _noheaders.fp.close() # Recycle file descriptor
849 return _noheaders
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000850
851
852# Utility classes
853
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000854class ftpwrapper:
Guido van Rossume7b146f2000-02-04 15:28:42 +0000855 """Class used by open_ftp() for cache of open FTP connections."""
856
Facundo Batista4f1b1ed2008-05-29 16:39:26 +0000857 def __init__(self, user, passwd, host, port, dirs,
Nadeem Vawdab42c53e2011-07-23 15:51:16 +0200858 timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
Nadeem Vawdaa620fac2011-07-23 17:04:42 +0200859 persistent=True):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000860 self.user = user
861 self.passwd = passwd
862 self.host = host
863 self.port = port
864 self.dirs = dirs
Facundo Batista711a54e2007-05-24 17:50:54 +0000865 self.timeout = timeout
Nadeem Vawdab42c53e2011-07-23 15:51:16 +0200866 self.refcount = 0
867 self.keepalive = persistent
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000868 self.init()
Guido van Rossume7b146f2000-02-04 15:28:42 +0000869
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000870 def init(self):
871 import ftplib
872 self.busy = 0
873 self.ftp = ftplib.FTP()
Facundo Batista711a54e2007-05-24 17:50:54 +0000874 self.ftp.connect(self.host, self.port, self.timeout)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000875 self.ftp.login(self.user, self.passwd)
Senthil Kumaran243cb802013-06-01 08:24:31 -0700876 self.ftp.cwd(os.path.join(*self.dirs))
Guido van Rossume7b146f2000-02-04 15:28:42 +0000877
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000878 def retrfile(self, file, type):
879 import ftplib
880 self.endtransfer()
881 if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
882 else: cmd = 'TYPE ' + type; isdir = 0
883 try:
884 self.ftp.voidcmd(cmd)
885 except ftplib.all_errors:
886 self.init()
887 self.ftp.voidcmd(cmd)
888 conn = None
889 if file and not isdir:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000890 # Try to retrieve as a file
891 try:
892 cmd = 'RETR ' + file
Nadeem Vawdab42c53e2011-07-23 15:51:16 +0200893 conn, retrlen = self.ftp.ntransfercmd(cmd)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000894 except ftplib.error_perm, reason:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000895 if str(reason)[:3] != '550':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000896 raise IOError, ('ftp error', reason), sys.exc_info()[2]
897 if not conn:
898 # Set transfer mode to ASCII!
899 self.ftp.voidcmd('TYPE A')
Georg Brandld5e6cf22008-01-20 12:18:17 +0000900 # Try a directory listing. Verify that directory exists.
901 if file:
902 pwd = self.ftp.pwd()
903 try:
904 try:
905 self.ftp.cwd(file)
906 except ftplib.error_perm, reason:
907 raise IOError, ('ftp error', reason), sys.exc_info()[2]
908 finally:
909 self.ftp.cwd(pwd)
910 cmd = 'LIST ' + file
911 else:
912 cmd = 'LIST'
Nadeem Vawdab42c53e2011-07-23 15:51:16 +0200913 conn, retrlen = self.ftp.ntransfercmd(cmd)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000914 self.busy = 1
Nadeem Vawdab42c53e2011-07-23 15:51:16 +0200915 ftpobj = addclosehook(conn.makefile('rb'), self.file_close)
916 self.refcount += 1
917 conn.close()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000918 # Pass back both a suitably decorated object and a retrieval length
Nadeem Vawdab42c53e2011-07-23 15:51:16 +0200919 return (ftpobj, retrlen)
920
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000921 def endtransfer(self):
922 if not self.busy:
923 return
924 self.busy = 0
925 try:
926 self.ftp.voidresp()
927 except ftperrors():
928 pass
Guido van Rossume7b146f2000-02-04 15:28:42 +0000929
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000930 def close(self):
Nadeem Vawdab42c53e2011-07-23 15:51:16 +0200931 self.keepalive = False
932 if self.refcount <= 0:
933 self.real_close()
934
935 def file_close(self):
936 self.endtransfer()
937 self.refcount -= 1
938 if self.refcount <= 0 and not self.keepalive:
939 self.real_close()
940
941 def real_close(self):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000942 self.endtransfer()
943 try:
944 self.ftp.close()
945 except ftperrors():
946 pass
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000947
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000948class addbase:
Guido van Rossume7b146f2000-02-04 15:28:42 +0000949 """Base class for addinfo and addclosehook."""
950
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000951 def __init__(self, fp):
952 self.fp = fp
953 self.read = self.fp.read
954 self.readline = self.fp.readline
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000955 if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines
Georg Brandl1f663572005-11-26 16:50:44 +0000956 if hasattr(self.fp, "fileno"):
957 self.fileno = self.fp.fileno
958 else:
959 self.fileno = lambda: None
Raymond Hettinger42182eb2003-03-09 05:33:33 +0000960 if hasattr(self.fp, "__iter__"):
961 self.__iter__ = self.fp.__iter__
962 if hasattr(self.fp, "next"):
963 self.next = self.fp.next
Guido van Rossume7b146f2000-02-04 15:28:42 +0000964
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000965 def __repr__(self):
Walter Dörwald70a6b492004-02-12 17:35:32 +0000966 return '<%s at %r whose fp = %r>' % (self.__class__.__name__,
967 id(self), self.fp)
Guido van Rossume7b146f2000-02-04 15:28:42 +0000968
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000969 def close(self):
970 self.read = None
971 self.readline = None
972 self.readlines = None
973 self.fileno = None
974 if self.fp: self.fp.close()
975 self.fp = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000976
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000977class addclosehook(addbase):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000978 """Class to add a close hook to an open file."""
979
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000980 def __init__(self, fp, closehook, *hookargs):
981 addbase.__init__(self, fp)
982 self.closehook = closehook
983 self.hookargs = hookargs
Guido van Rossume7b146f2000-02-04 15:28:42 +0000984
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000985 def close(self):
986 if self.closehook:
Guido van Rossum68468eb2003-02-27 20:14:51 +0000987 self.closehook(*self.hookargs)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000988 self.closehook = None
989 self.hookargs = None
Senthil Kumaran4c592112012-03-15 13:24:40 -0700990 addbase.close(self)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000991
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000992class addinfo(addbase):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000993 """class to add an info() method to an open file."""
994
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000995 def __init__(self, fp, headers):
996 addbase.__init__(self, fp)
997 self.headers = headers
Guido van Rossume7b146f2000-02-04 15:28:42 +0000998
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000999 def info(self):
1000 return self.headers
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001001
Guido van Rossume6ad8911996-09-10 17:02:56 +00001002class addinfourl(addbase):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001003 """class to add info() and geturl() methods to an open file."""
1004
Georg Brandl9b0d46d2008-01-20 11:43:03 +00001005 def __init__(self, fp, headers, url, code=None):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001006 addbase.__init__(self, fp)
1007 self.headers = headers
1008 self.url = url
Georg Brandl9b0d46d2008-01-20 11:43:03 +00001009 self.code = code
Guido van Rossume7b146f2000-02-04 15:28:42 +00001010
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001011 def info(self):
1012 return self.headers
Guido van Rossume7b146f2000-02-04 15:28:42 +00001013
Georg Brandl9b0d46d2008-01-20 11:43:03 +00001014 def getcode(self):
1015 return self.code
1016
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001017 def geturl(self):
1018 return self.url
Guido van Rossume6ad8911996-09-10 17:02:56 +00001019
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001020
Guido van Rossum7c395db1994-07-04 22:14:49 +00001021# Utilities to parse URLs (most of these return None for missing parts):
Sjoerd Mullendere0371b81995-11-10 10:36:07 +00001022# unwrap('<URL:type://host/path>') --> 'type://host/path'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001023# splittype('type:opaquestring') --> 'type', 'opaquestring'
1024# splithost('//host[:port]/path') --> 'host[:port]', '/path'
Guido van Rossum7c395db1994-07-04 22:14:49 +00001025# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'
1026# splitpasswd('user:passwd') -> 'user', 'passwd'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001027# splitport('host:port') --> 'host', 'port'
1028# splitquery('/path?query') --> '/path', 'query'
1029# splittag('/path#tag') --> '/path', 'tag'
Guido van Rossum7c395db1994-07-04 22:14:49 +00001030# splitattr('/path;attr1=value1;attr2=value2;...') ->
1031# '/path', ['attr1=value1', 'attr2=value2', ...]
1032# splitvalue('attr=value') --> 'attr', 'value'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001033# unquote('abc%20def') -> 'abc def'
1034# quote('abc def') -> 'abc%20def')
1035
Walter Dörwald65230a22002-06-03 15:58:32 +00001036try:
1037 unicode
1038except NameError:
Guido van Rossum4b46c0a2002-05-24 17:58:05 +00001039 def _is_unicode(x):
1040 return 0
Walter Dörwald65230a22002-06-03 15:58:32 +00001041else:
1042 def _is_unicode(x):
1043 return isinstance(x, unicode)
Guido van Rossum4b46c0a2002-05-24 17:58:05 +00001044
Martin v. Löwis1d994332000-12-03 18:30:10 +00001045def toBytes(url):
1046 """toBytes(u"URL") --> 'URL'."""
1047 # Most URL schemes require ASCII. If that changes, the conversion
1048 # can be relaxed
Guido van Rossum4b46c0a2002-05-24 17:58:05 +00001049 if _is_unicode(url):
Martin v. Löwis1d994332000-12-03 18:30:10 +00001050 try:
1051 url = url.encode("ASCII")
1052 except UnicodeError:
Guido van Rossumb2493f82000-12-15 15:01:37 +00001053 raise UnicodeError("URL " + repr(url) +
1054 " contains non-ASCII characters")
Martin v. Löwis1d994332000-12-03 18:30:10 +00001055 return url
1056
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001057def unwrap(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001058 """unwrap('<URL:type://host/path>') --> 'type://host/path'."""
Guido van Rossumb2493f82000-12-15 15:01:37 +00001059 url = url.strip()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001060 if url[:1] == '<' and url[-1:] == '>':
Guido van Rossumb2493f82000-12-15 15:01:37 +00001061 url = url[1:-1].strip()
1062 if url[:4] == 'URL:': url = url[4:].strip()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001063 return url
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001064
Guido van Rossum332e1441997-09-29 23:23:46 +00001065_typeprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001066def splittype(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001067 """splittype('type:opaquestring') --> 'type', 'opaquestring'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001068 global _typeprog
1069 if _typeprog is None:
1070 import re
1071 _typeprog = re.compile('^([^/:]+):')
Guido van Rossum332e1441997-09-29 23:23:46 +00001072
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001073 match = _typeprog.match(url)
1074 if match:
1075 scheme = match.group(1)
Fred Drake9e94afd2000-07-01 07:03:30 +00001076 return scheme.lower(), url[len(scheme) + 1:]
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001077 return None, url
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001078
Guido van Rossum332e1441997-09-29 23:23:46 +00001079_hostprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001080def splithost(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001081 """splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001082 global _hostprog
1083 if _hostprog is None:
1084 import re
Georg Brandl1c168d82006-03-26 20:59:38 +00001085 _hostprog = re.compile('^//([^/?]*)(.*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +00001086
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001087 match = _hostprog.match(url)
Senthil Kumaran0b7cac12010-11-22 05:04:33 +00001088 if match:
1089 host_port = match.group(1)
1090 path = match.group(2)
1091 if path and not path.startswith('/'):
1092 path = '/' + path
1093 return host_port, path
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001094 return None, url
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001095
Guido van Rossum332e1441997-09-29 23:23:46 +00001096_userprog = None
Guido van Rossum7c395db1994-07-04 22:14:49 +00001097def splituser(host):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001098 """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001099 global _userprog
1100 if _userprog is None:
1101 import re
Raymond Hettingerf2e45dd2002-08-18 20:08:56 +00001102 _userprog = re.compile('^(.*)@(.*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +00001103
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001104 match = _userprog.match(host)
Senthil Kumaran9fce5512010-11-20 11:24:08 +00001105 if match: return match.group(1, 2)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001106 return None, host
Guido van Rossum7c395db1994-07-04 22:14:49 +00001107
Guido van Rossum332e1441997-09-29 23:23:46 +00001108_passwdprog = None
Guido van Rossum7c395db1994-07-04 22:14:49 +00001109def splitpasswd(user):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001110 """splitpasswd('user:passwd') -> 'user', 'passwd'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001111 global _passwdprog
1112 if _passwdprog is None:
1113 import re
Senthil Kumaran5e95e762009-03-30 21:51:50 +00001114 _passwdprog = re.compile('^([^:]*):(.*)$',re.S)
Guido van Rossum332e1441997-09-29 23:23:46 +00001115
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001116 match = _passwdprog.match(user)
1117 if match: return match.group(1, 2)
1118 return user, None
Guido van Rossum7c395db1994-07-04 22:14:49 +00001119
Guido van Rossume7b146f2000-02-04 15:28:42 +00001120# splittag('/path#tag') --> '/path', 'tag'
Guido van Rossum332e1441997-09-29 23:23:46 +00001121_portprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001122def splitport(host):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001123 """splitport('host:port') --> 'host', 'port'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001124 global _portprog
1125 if _portprog is None:
1126 import re
1127 _portprog = re.compile('^(.*):([0-9]+)$')
Guido van Rossum332e1441997-09-29 23:23:46 +00001128
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001129 match = _portprog.match(host)
1130 if match: return match.group(1, 2)
1131 return host, None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001132
Guido van Rossum332e1441997-09-29 23:23:46 +00001133_nportprog = None
Guido van Rossum53725a21996-06-13 19:12:35 +00001134def splitnport(host, defport=-1):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001135 """Split host and port, returning numeric port.
1136 Return given default port if no ':' found; defaults to -1.
1137 Return numerical port if a valid number are found after ':'.
1138 Return None if ':' but not a valid number."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001139 global _nportprog
1140 if _nportprog is None:
1141 import re
1142 _nportprog = re.compile('^(.*):(.*)$')
Guido van Rossum7e7ca0b1998-03-26 21:01:39 +00001143
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001144 match = _nportprog.match(host)
1145 if match:
1146 host, port = match.group(1, 2)
1147 try:
Guido van Rossumb2493f82000-12-15 15:01:37 +00001148 if not port: raise ValueError, "no digits"
1149 nport = int(port)
1150 except ValueError:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001151 nport = None
1152 return host, nport
1153 return host, defport
Guido van Rossum53725a21996-06-13 19:12:35 +00001154
Guido van Rossum332e1441997-09-29 23:23:46 +00001155_queryprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001156def splitquery(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001157 """splitquery('/path?query') --> '/path', 'query'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001158 global _queryprog
1159 if _queryprog is None:
1160 import re
1161 _queryprog = re.compile('^(.*)\?([^?]*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +00001162
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001163 match = _queryprog.match(url)
1164 if match: return match.group(1, 2)
1165 return url, None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001166
Guido van Rossum332e1441997-09-29 23:23:46 +00001167_tagprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001168def splittag(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001169 """splittag('/path#tag') --> '/path', 'tag'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001170 global _tagprog
1171 if _tagprog is None:
1172 import re
1173 _tagprog = re.compile('^(.*)#([^#]*)$')
Guido van Rossum7e7ca0b1998-03-26 21:01:39 +00001174
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001175 match = _tagprog.match(url)
1176 if match: return match.group(1, 2)
1177 return url, None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001178
Guido van Rossum7c395db1994-07-04 22:14:49 +00001179def splitattr(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001180 """splitattr('/path;attr1=value1;attr2=value2;...') ->
1181 '/path', ['attr1=value1', 'attr2=value2', ...]."""
Guido van Rossumb2493f82000-12-15 15:01:37 +00001182 words = url.split(';')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001183 return words[0], words[1:]
Guido van Rossum7c395db1994-07-04 22:14:49 +00001184
Guido van Rossum332e1441997-09-29 23:23:46 +00001185_valueprog = None
Guido van Rossum7c395db1994-07-04 22:14:49 +00001186def splitvalue(attr):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001187 """splitvalue('attr=value') --> 'attr', 'value'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001188 global _valueprog
1189 if _valueprog is None:
1190 import re
1191 _valueprog = re.compile('^([^=]*)=(.*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +00001192
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001193 match = _valueprog.match(attr)
1194 if match: return match.group(1, 2)
1195 return attr, None
Guido van Rossum7c395db1994-07-04 22:14:49 +00001196
R. David Murraybfbdefe2010-05-25 15:20:46 +00001197# urlparse contains a duplicate of this method to avoid a circular import. If
1198# you update this method, also update the copy in urlparse. This code
1199# duplication does not exist in Python3.
1200
Senthil Kumaranf3e9b2a2010-03-18 12:14:15 +00001201_hexdig = '0123456789ABCDEFabcdef'
Florent Xiclunae127e242010-05-17 10:39:07 +00001202_hextochr = dict((a + b, chr(int(a + b, 16)))
1203 for a in _hexdig for b in _hexdig)
Serhiy Storchaka923baea2013-03-14 21:31:09 +02001204_asciire = re.compile('([\x00-\x7f]+)')
Raymond Hettinger803ce802005-09-10 06:49:04 +00001205
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001206def unquote(s):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001207 """unquote('abc%20def') -> 'abc def'."""
Serhiy Storchaka923baea2013-03-14 21:31:09 +02001208 if _is_unicode(s):
1209 if '%' not in s:
1210 return s
1211 bits = _asciire.split(s)
1212 res = [bits[0]]
1213 append = res.append
1214 for i in range(1, len(bits), 2):
1215 append(unquote(str(bits[i])).decode('latin1'))
1216 append(bits[i + 1])
1217 return ''.join(res)
1218
1219 bits = s.split('%')
Florent Xiclunaaf87f9f2010-05-17 13:35:09 +00001220 # fastpath
Serhiy Storchaka923baea2013-03-14 21:31:09 +02001221 if len(bits) == 1:
Florent Xiclunaaf87f9f2010-05-17 13:35:09 +00001222 return s
Serhiy Storchaka923baea2013-03-14 21:31:09 +02001223 res = [bits[0]]
1224 append = res.append
1225 for item in bits[1:]:
Raymond Hettinger803ce802005-09-10 06:49:04 +00001226 try:
Serhiy Storchaka923baea2013-03-14 21:31:09 +02001227 append(_hextochr[item[:2]])
1228 append(item[2:])
Raymond Hettinger803ce802005-09-10 06:49:04 +00001229 except KeyError:
Serhiy Storchaka923baea2013-03-14 21:31:09 +02001230 append('%')
1231 append(item)
1232 return ''.join(res)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001233
Guido van Rossum0564e121996-12-13 14:47:36 +00001234def unquote_plus(s):
Skip Montanaro79f1c172000-08-22 03:00:52 +00001235 """unquote('%7e/abc+def') -> '~/abc def'"""
Brett Cannonaaeffaf2004-03-23 23:50:17 +00001236 s = s.replace('+', ' ')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001237 return unquote(s)
Guido van Rossum0564e121996-12-13 14:47:36 +00001238
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001239always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
Jeremy Hylton6102e292000-08-31 15:48:10 +00001240 'abcdefghijklmnopqrstuvwxyz'
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001241 '0123456789' '_.-')
Florent Xiclunaaf87f9f2010-05-17 13:35:09 +00001242_safe_map = {}
1243for i, c in zip(xrange(256), str(bytearray(xrange(256)))):
1244 _safe_map[c] = c if (i < 128 and c in always_safe) else '%{:02X}'.format(i)
1245_safe_quoters = {}
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001246
Senthil Kumaran880685f2010-07-22 01:47:30 +00001247def quote(s, safe='/'):
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001248 """quote('abc def') -> 'abc%20def'
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001249
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001250 Each part of a URL, e.g. the path info, the query, etc., has a
1251 different set of reserved characters that must be quoted.
1252
1253 RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
1254 the following reserved characters.
1255
1256 reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
1257 "$" | ","
1258
1259 Each of these characters is reserved in some component of a URL,
1260 but not necessarily in all of them.
1261
1262 By default, the quote function is intended for quoting the path
1263 section of a URL. Thus, it will not encode '/'. This character
1264 is reserved, but in typical usage the quote function is being
1265 called on a path where the existing slash characters are used as
1266 reserved characters.
1267 """
Florent Xiclunaaf87f9f2010-05-17 13:35:09 +00001268 # fastpath
1269 if not s:
Senthil Kumaranc7743aa2010-07-19 17:35:50 +00001270 if s is None:
1271 raise TypeError('None object cannot be quoted')
Florent Xiclunaaf87f9f2010-05-17 13:35:09 +00001272 return s
Raymond Hettinger199d2f72005-09-09 22:27:13 +00001273 cachekey = (safe, always_safe)
1274 try:
Florent Xiclunaaf87f9f2010-05-17 13:35:09 +00001275 (quoter, safe) = _safe_quoters[cachekey]
Raymond Hettinger199d2f72005-09-09 22:27:13 +00001276 except KeyError:
Florent Xiclunaaf87f9f2010-05-17 13:35:09 +00001277 safe_map = _safe_map.copy()
1278 safe_map.update([(c, c) for c in safe])
1279 quoter = safe_map.__getitem__
1280 safe = always_safe + safe
1281 _safe_quoters[cachekey] = (quoter, safe)
1282 if not s.rstrip(safe):
1283 return s
1284 return ''.join(map(quoter, s))
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001285
Senthil Kumaran880685f2010-07-22 01:47:30 +00001286def quote_plus(s, safe=''):
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001287 """Quote the query fragment of a URL; replacing ' ' with '+'"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001288 if ' ' in s:
Senthil Kumaran880685f2010-07-22 01:47:30 +00001289 s = quote(s, safe + ' ')
Raymond Hettingercf6b6322005-09-10 18:17:54 +00001290 return s.replace(' ', '+')
Senthil Kumaran880685f2010-07-22 01:47:30 +00001291 return quote(s, safe)
Guido van Rossum0564e121996-12-13 14:47:36 +00001292
Florent Xiclunae127e242010-05-17 10:39:07 +00001293def urlencode(query, doseq=0):
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001294 """Encode a sequence of two-element tuples or dictionary into a URL query string.
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001295
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001296 If any values in the query arg are sequences and doseq is true, each
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001297 sequence element is converted to a separate parameter.
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001298
1299 If the query arg is a sequence of two-element tuples, the order of the
1300 parameters in the output will match the order of parameters in the
1301 input.
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001302 """
Tim Peters658cba62001-02-09 20:06:00 +00001303
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001304 if hasattr(query,"items"):
1305 # mapping objects
1306 query = query.items()
1307 else:
1308 # it's a bother at times that strings and string-like objects are
1309 # sequences...
1310 try:
1311 # non-sequence items should not work with len()
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001312 # non-empty strings will fail this
Walter Dörwald65230a22002-06-03 15:58:32 +00001313 if len(query) and not isinstance(query[0], tuple):
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001314 raise TypeError
1315 # zero-length sequences of all types will get here and succeed,
1316 # but that's a minor nit - since the original implementation
1317 # allowed empty dicts that type of behavior probably should be
1318 # preserved for consistency
1319 except TypeError:
1320 ty,va,tb = sys.exc_info()
1321 raise TypeError, "not a valid non-string sequence or mapping object", tb
1322
Guido van Rossume7b146f2000-02-04 15:28:42 +00001323 l = []
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001324 if not doseq:
1325 # preserve old behavior
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001326 for k, v in query:
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001327 k = quote_plus(str(k))
1328 v = quote_plus(str(v))
1329 l.append(k + '=' + v)
1330 else:
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001331 for k, v in query:
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001332 k = quote_plus(str(k))
Walter Dörwald65230a22002-06-03 15:58:32 +00001333 if isinstance(v, str):
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001334 v = quote_plus(v)
1335 l.append(k + '=' + v)
Guido van Rossum4b46c0a2002-05-24 17:58:05 +00001336 elif _is_unicode(v):
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001337 # is there a reasonable way to convert to ASCII?
1338 # encode generates a string, but "replace" or "ignore"
1339 # lose information and "strict" can raise UnicodeError
1340 v = quote_plus(v.encode("ASCII","replace"))
1341 l.append(k + '=' + v)
1342 else:
1343 try:
1344 # is this a sufficient test for sequence-ness?
Georg Brandl84fedf72010-02-06 22:59:15 +00001345 len(v)
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001346 except TypeError:
1347 # not a sequence
1348 v = quote_plus(str(v))
1349 l.append(k + '=' + v)
1350 else:
1351 # loop over the sequence
1352 for elt in v:
1353 l.append(k + '=' + quote_plus(str(elt)))
Guido van Rossumb2493f82000-12-15 15:01:37 +00001354 return '&'.join(l)
Guido van Rossum810a3391998-07-22 21:33:23 +00001355
Guido van Rossum442e7201996-03-20 15:33:11 +00001356# Proxy handling
Mark Hammond4f570b92000-07-26 07:04:38 +00001357def getproxies_environment():
1358 """Return a dictionary of scheme -> proxy server URL mappings.
1359
1360 Scan the environment for variables named <scheme>_proxy;
1361 this seems to be the standard convention. If you need a
1362 different way, you can pass a proxies dictionary to the
1363 [Fancy]URLopener constructor.
1364
1365 """
1366 proxies = {}
1367 for name, value in os.environ.items():
Guido van Rossumb2493f82000-12-15 15:01:37 +00001368 name = name.lower()
Mark Hammond4f570b92000-07-26 07:04:38 +00001369 if value and name[-6:] == '_proxy':
1370 proxies[name[:-6]] = value
1371 return proxies
1372
Georg Brandl22350112008-01-20 12:05:43 +00001373def proxy_bypass_environment(host):
1374 """Test if proxies should not be used for a particular host.
1375
1376 Checks the environment for a variable named no_proxy, which should
1377 be a list of DNS suffixes separated by commas, or '*' for all hosts.
1378 """
1379 no_proxy = os.environ.get('no_proxy', '') or os.environ.get('NO_PROXY', '')
1380 # '*' is special case for always bypass
1381 if no_proxy == '*':
1382 return 1
1383 # strip port off host
1384 hostonly, port = splitport(host)
1385 # check if the host ends with any of the DNS suffixes
Senthil Kumaranb5bd4c82011-08-06 12:24:33 +08001386 no_proxy_list = [proxy.strip() for proxy in no_proxy.split(',')]
1387 for name in no_proxy_list:
Georg Brandl22350112008-01-20 12:05:43 +00001388 if name and (hostonly.endswith(name) or host.endswith(name)):
1389 return 1
1390 # otherwise, don't bypass
1391 return 0
1392
1393
Jack Jansen11d9b062004-07-16 11:45:00 +00001394if sys.platform == 'darwin':
Ronald Oussoren51f06332009-09-20 10:31:22 +00001395 from _scproxy import _get_proxy_settings, _get_proxies
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001396
1397 def proxy_bypass_macosx_sysconf(host):
1398 """
1399 Return True iff this host shouldn't be accessed using a proxy
1400
1401 This function uses the MacOSX framework SystemConfiguration
1402 to fetch the proxy information.
1403 """
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001404 import re
1405 import socket
1406 from fnmatch import fnmatch
1407
Ronald Oussoren31802d02009-10-18 07:07:00 +00001408 hostonly, port = splitport(host)
1409
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001410 def ip2num(ipAddr):
1411 parts = ipAddr.split('.')
1412 parts = map(int, parts)
1413 if len(parts) != 4:
1414 parts = (parts + [0, 0, 0, 0])[:4]
1415 return (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3]
1416
Ronald Oussoren51f06332009-09-20 10:31:22 +00001417 proxy_settings = _get_proxy_settings()
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001418
Ronald Oussoren51f06332009-09-20 10:31:22 +00001419 # Check for simple host names:
1420 if '.' not in host:
1421 if proxy_settings['exclude_simple']:
1422 return True
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001423
Ronald Oussoren31802d02009-10-18 07:07:00 +00001424 hostIP = None
1425
Ronald Oussoren809073b2009-09-20 10:54:07 +00001426 for value in proxy_settings.get('exceptions', ()):
Ronald Oussoren51f06332009-09-20 10:31:22 +00001427 # Items in the list are strings like these: *.local, 169.254/16
Ronald Oussoren51f06332009-09-20 10:31:22 +00001428 if not value: continue
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001429
Ronald Oussoren51f06332009-09-20 10:31:22 +00001430 m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value)
1431 if m is not None:
1432 if hostIP is None:
Ronald Oussoren31802d02009-10-18 07:07:00 +00001433 try:
1434 hostIP = socket.gethostbyname(hostonly)
1435 hostIP = ip2num(hostIP)
1436 except socket.error:
1437 continue
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001438
Ronald Oussoren51f06332009-09-20 10:31:22 +00001439 base = ip2num(m.group(1))
Ronald Oussorenb96fbb82010-06-27 13:59:39 +00001440 mask = m.group(2)
1441 if mask is None:
1442 mask = 8 * (m.group(1).count('.') + 1)
1443
1444 else:
1445 mask = int(mask[1:])
Ronald Oussoren1aa999c2011-03-14 18:53:59 -04001446 mask = 32 - mask
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001447
Ronald Oussoren51f06332009-09-20 10:31:22 +00001448 if (hostIP >> mask) == (base >> mask):
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001449 return True
1450
Ronald Oussoren51f06332009-09-20 10:31:22 +00001451 elif fnmatch(host, value):
1452 return True
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001453
Ronald Oussoren51f06332009-09-20 10:31:22 +00001454 return False
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001455
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001456 def getproxies_macosx_sysconf():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001457 """Return a dictionary of scheme -> proxy server URL mappings.
Guido van Rossum442e7201996-03-20 15:33:11 +00001458
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001459 This function uses the MacOSX framework SystemConfiguration
1460 to fetch the proxy information.
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001461 """
Ronald Oussoren51f06332009-09-20 10:31:22 +00001462 return _get_proxies()
Mark Hammond4f570b92000-07-26 07:04:38 +00001463
Georg Brandl22350112008-01-20 12:05:43 +00001464 def proxy_bypass(host):
1465 if getproxies_environment():
1466 return proxy_bypass_environment(host)
1467 else:
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001468 return proxy_bypass_macosx_sysconf(host)
Tim Peters55c12d42001-08-09 18:04:14 +00001469
Jack Jansen11d9b062004-07-16 11:45:00 +00001470 def getproxies():
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001471 return getproxies_environment() or getproxies_macosx_sysconf()
Tim Peters182b5ac2004-07-18 06:16:08 +00001472
Mark Hammond4f570b92000-07-26 07:04:38 +00001473elif os.name == 'nt':
1474 def getproxies_registry():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001475 """Return a dictionary of scheme -> proxy server URL mappings.
Mark Hammond4f570b92000-07-26 07:04:38 +00001476
1477 Win32 uses the registry to store proxies.
1478
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001479 """
1480 proxies = {}
Mark Hammond4f570b92000-07-26 07:04:38 +00001481 try:
1482 import _winreg
1483 except ImportError:
1484 # Std module, so should be around - but you never know!
1485 return proxies
1486 try:
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001487 internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1488 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
Mark Hammond4f570b92000-07-26 07:04:38 +00001489 proxyEnable = _winreg.QueryValueEx(internetSettings,
1490 'ProxyEnable')[0]
1491 if proxyEnable:
1492 # Returned as Unicode but problems if not converted to ASCII
1493 proxyServer = str(_winreg.QueryValueEx(internetSettings,
1494 'ProxyServer')[0])
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001495 if '=' in proxyServer:
1496 # Per-protocol settings
Mark Hammond4f570b92000-07-26 07:04:38 +00001497 for p in proxyServer.split(';'):
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001498 protocol, address = p.split('=', 1)
Guido van Rossumb955d6c2002-03-31 23:38:48 +00001499 # See if address has a type:// prefix
Guido van Rossum64e5aa92002-04-02 14:38:16 +00001500 import re
1501 if not re.match('^([^/:]+)://', address):
Guido van Rossumb955d6c2002-03-31 23:38:48 +00001502 address = '%s://%s' % (protocol, address)
1503 proxies[protocol] = address
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001504 else:
1505 # Use one setting for all protocols
1506 if proxyServer[:5] == 'http:':
1507 proxies['http'] = proxyServer
1508 else:
1509 proxies['http'] = 'http://%s' % proxyServer
Senthil Kumaran0fdd3852010-07-14 20:22:17 +00001510 proxies['https'] = 'https://%s' % proxyServer
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001511 proxies['ftp'] = 'ftp://%s' % proxyServer
Mark Hammond4f570b92000-07-26 07:04:38 +00001512 internetSettings.Close()
1513 except (WindowsError, ValueError, TypeError):
1514 # Either registry key not found etc, or the value in an
1515 # unexpected format.
1516 # proxies already set up to be empty so nothing to do
1517 pass
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001518 return proxies
Guido van Rossum442e7201996-03-20 15:33:11 +00001519
Mark Hammond4f570b92000-07-26 07:04:38 +00001520 def getproxies():
1521 """Return a dictionary of scheme -> proxy server URL mappings.
1522
1523 Returns settings gathered from the environment, if specified,
1524 or the registry.
1525
1526 """
1527 return getproxies_environment() or getproxies_registry()
Tim Peters55c12d42001-08-09 18:04:14 +00001528
Georg Brandl22350112008-01-20 12:05:43 +00001529 def proxy_bypass_registry(host):
Tim Peters55c12d42001-08-09 18:04:14 +00001530 try:
1531 import _winreg
1532 import re
Tim Peters55c12d42001-08-09 18:04:14 +00001533 except ImportError:
1534 # Std modules, so should be around - but you never know!
1535 return 0
1536 try:
1537 internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1538 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1539 proxyEnable = _winreg.QueryValueEx(internetSettings,
1540 'ProxyEnable')[0]
1541 proxyOverride = str(_winreg.QueryValueEx(internetSettings,
1542 'ProxyOverride')[0])
1543 # ^^^^ Returned as Unicode but problems if not converted to ASCII
1544 except WindowsError:
1545 return 0
1546 if not proxyEnable or not proxyOverride:
1547 return 0
1548 # try to make a host list from name and IP address.
Georg Brandl1f636702006-02-18 23:10:23 +00001549 rawHost, port = splitport(host)
1550 host = [rawHost]
Tim Peters55c12d42001-08-09 18:04:14 +00001551 try:
Georg Brandl1f636702006-02-18 23:10:23 +00001552 addr = socket.gethostbyname(rawHost)
1553 if addr != rawHost:
Tim Peters55c12d42001-08-09 18:04:14 +00001554 host.append(addr)
1555 except socket.error:
1556 pass
Georg Brandl1f636702006-02-18 23:10:23 +00001557 try:
1558 fqdn = socket.getfqdn(rawHost)
1559 if fqdn != rawHost:
1560 host.append(fqdn)
1561 except socket.error:
1562 pass
Tim Peters55c12d42001-08-09 18:04:14 +00001563 # make a check value list from the registry entry: replace the
1564 # '<local>' string by the localhost entry and the corresponding
1565 # canonical entry.
1566 proxyOverride = proxyOverride.split(';')
Tim Peters55c12d42001-08-09 18:04:14 +00001567 # now check if we match one of the registry values.
1568 for test in proxyOverride:
Senthil Kumaran4af40d22009-05-01 05:59:52 +00001569 if test == '<local>':
1570 if '.' not in rawHost:
1571 return 1
Tim Petersab9ba272001-08-09 21:40:30 +00001572 test = test.replace(".", r"\.") # mask dots
1573 test = test.replace("*", r".*") # change glob sequence
1574 test = test.replace("?", r".") # change glob char
Tim Peters55c12d42001-08-09 18:04:14 +00001575 for val in host:
1576 # print "%s <--> %s" %( test, val )
1577 if re.match(test, val, re.I):
1578 return 1
1579 return 0
1580
Georg Brandl22350112008-01-20 12:05:43 +00001581 def proxy_bypass(host):
1582 """Return a dictionary of scheme -> proxy server URL mappings.
1583
1584 Returns settings gathered from the environment, if specified,
1585 or the registry.
1586
1587 """
1588 if getproxies_environment():
1589 return proxy_bypass_environment(host)
1590 else:
1591 return proxy_bypass_registry(host)
1592
Mark Hammond4f570b92000-07-26 07:04:38 +00001593else:
1594 # By default use environment variables
1595 getproxies = getproxies_environment
Georg Brandl22350112008-01-20 12:05:43 +00001596 proxy_bypass = proxy_bypass_environment
Guido van Rossum442e7201996-03-20 15:33:11 +00001597
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001598# Test and time quote() and unquote()
1599def test1():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001600 s = ''
1601 for i in range(256): s = s + chr(i)
1602 s = s*4
1603 t0 = time.time()
1604 qs = quote(s)
1605 uqs = unquote(qs)
1606 t1 = time.time()
1607 if uqs != s:
1608 print 'Wrong!'
Walter Dörwald70a6b492004-02-12 17:35:32 +00001609 print repr(s)
1610 print repr(qs)
1611 print repr(uqs)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001612 print round(t1 - t0, 3), 'sec'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001613
1614
Guido van Rossum9ab96d41998-09-28 14:07:00 +00001615def reporthook(blocknum, blocksize, totalsize):
1616 # Report during remote transfers
Guido van Rossumb2493f82000-12-15 15:01:37 +00001617 print "Block number: %d, Block size: %d, Total size: %d" % (
1618 blocknum, blocksize, totalsize)