blob: 33641a5700125f7f251210dc917b48f31adb618e [file] [log] [blame]
Guido van Rossume7b146f2000-02-04 15:28:42 +00001"""Open an arbitrary URL.
2
3See the following document for more info on URLs:
4"Names and Addresses, URIs, URLs, URNs, URCs", at
5http://www.w3.org/pub/WWW/Addressing/Overview.html
6
7See also the HTTP spec (from which the error codes are derived):
8"HTTP - Hypertext Transfer Protocol", at
9http://www.w3.org/pub/WWW/Protocols/
10
11Related standards and specs:
12- RFC1808: the "relative URL" spec. (authoritative status)
13- RFC1738 - the "URL standard". (authoritative status)
14- RFC1630 - the "URI spec". (informational status)
15
16The object returned by URLopener().open(file) will differ per
17protocol. All you know is that is has methods read(), readline(),
18readlines(), fileno(), close() and info(). The read*(), fileno()
Fredrik Lundhb49f88b2000-09-24 18:51:25 +000019and close() methods work like those of open files.
Guido van Rossume7b146f2000-02-04 15:28:42 +000020The info() method returns a mimetools.Message object which can be
21used to query various info about the object, if available.
22(mimetools.Message objects are queried with the getheader() method.)
23"""
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000024
Guido van Rossum7c395db1994-07-04 22:14:49 +000025import string
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000026import socket
Jack Jansendc3e3f61995-12-15 13:22:13 +000027import os
Guido van Rossumf0713d32001-08-09 17:43:35 +000028import time
Guido van Rossum3c8484e1996-11-20 22:02:24 +000029import sys
Senthil Kumaranbcd833f2012-01-11 00:09:24 +080030import base64
31
Brett Cannon69200fa2004-03-23 21:26:39 +000032from urlparse import urljoin as basejoin
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000033
Skip Montanaro40fc1602001-03-01 04:27:19 +000034__all__ = ["urlopen", "URLopener", "FancyURLopener", "urlretrieve",
35 "urlcleanup", "quote", "quote_plus", "unquote", "unquote_plus",
Skip Montanaro44d5e0c2001-03-13 19:47:16 +000036 "urlencode", "url2pathname", "pathname2url", "splittag",
37 "localhost", "thishost", "ftperrors", "basejoin", "unwrap",
38 "splittype", "splithost", "splituser", "splitpasswd", "splitport",
39 "splitnport", "splitquery", "splitattr", "splitvalue",
Brett Cannond75f0432007-05-16 22:42:29 +000040 "getproxies"]
Skip Montanaro40fc1602001-03-01 04:27:19 +000041
Martin v. Löwis3e865952006-01-24 15:51:21 +000042__version__ = '1.17' # XXX This version is not always updated :-(
Guido van Rossumf668d171997-06-06 21:11:11 +000043
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000044MAXFTPCACHE = 10 # Trim the ftp cache beyond this size
Guido van Rossum6cb15a01995-06-22 19:00:13 +000045
Jack Jansendc3e3f61995-12-15 13:22:13 +000046# Helper for non-unix systems
Ronald Oussoren9545a232010-05-05 19:09:31 +000047if os.name == 'nt':
Fredrik Lundhb49f88b2000-09-24 18:51:25 +000048 from nturl2path import url2pathname, pathname2url
Guido van Rossumd74fb6b2001-03-02 06:43:49 +000049elif os.name == 'riscos':
50 from rourl2path import url2pathname, pathname2url
Jack Jansendc3e3f61995-12-15 13:22:13 +000051else:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000052 def url2pathname(pathname):
Georg Brandlc0b24732005-12-26 22:53:56 +000053 """OS-specific conversion from a relative URL of the 'file' scheme
54 to a file system path; not recommended for general use."""
Guido van Rossum367ac801999-03-12 14:31:10 +000055 return unquote(pathname)
Georg Brandlc0b24732005-12-26 22:53:56 +000056
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000057 def pathname2url(pathname):
Georg Brandlc0b24732005-12-26 22:53:56 +000058 """OS-specific conversion from a file system path to a relative URL
59 of the 'file' scheme; not recommended for general use."""
Guido van Rossum367ac801999-03-12 14:31:10 +000060 return quote(pathname)
Guido van Rossum33add0a1998-12-18 15:25:22 +000061
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000062# This really consists of two pieces:
63# (1) a class which handles opening of all sorts of URLs
64# (plus assorted utilities etc.)
65# (2) a set of functions for parsing URLs
66# XXX Should these be separated out into different modules?
67
68
69# Shortcut for basic usage
70_urlopener = None
Fred Drakedf6eca72002-04-04 20:41:34 +000071def urlopen(url, data=None, proxies=None):
Brett Cannon8bb8fa52008-07-02 01:57:08 +000072 """Create a file-like object for the specified URL to read from."""
73 from warnings import warnpy3k
Georg Brandl48e65f52010-02-06 22:44:17 +000074 warnpy3k("urllib.urlopen() has been removed in Python 3.0 in "
75 "favor of urllib2.urlopen()", stacklevel=2)
Brett Cannon8bb8fa52008-07-02 01:57:08 +000076
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000077 global _urlopener
Fred Drakedf6eca72002-04-04 20:41:34 +000078 if proxies is not None:
79 opener = FancyURLopener(proxies=proxies)
80 elif not _urlopener:
81 opener = FancyURLopener()
82 _urlopener = opener
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000083 else:
Fred Drakedf6eca72002-04-04 20:41:34 +000084 opener = _urlopener
85 if data is None:
86 return opener.open(url)
87 else:
88 return opener.open(url, data)
Fred Drake316a7932000-08-24 01:01:26 +000089def urlretrieve(url, filename=None, reporthook=None, data=None):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000090 global _urlopener
91 if not _urlopener:
92 _urlopener = FancyURLopener()
Fred Drake316a7932000-08-24 01:01:26 +000093 return _urlopener.retrieve(url, filename, reporthook, data)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000094def urlcleanup():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000095 if _urlopener:
96 _urlopener.cleanup()
Florent Xiclunaaf87f9f2010-05-17 13:35:09 +000097 _safe_quoters.clear()
Antoine Pitrouca173e22009-12-08 19:35:12 +000098 ftpcache.clear()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000099
Bill Janssen426ea0a2007-08-29 22:35:05 +0000100# check for SSL
101try:
102 import ssl
103except:
104 _have_ssl = False
105else:
106 _have_ssl = True
107
Georg Brandlb9256022005-08-24 18:46:39 +0000108# exception raised when downloaded size does not match content-length
109class ContentTooShortError(IOError):
110 def __init__(self, message, content):
111 IOError.__init__(self, message)
112 self.content = content
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000113
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000114ftpcache = {}
115class URLopener:
Guido van Rossume7b146f2000-02-04 15:28:42 +0000116 """Class to open URLs.
117 This is a class rather than just a subroutine because we may need
118 more than one set of global protocol-specific options.
119 Note -- this is a base class for those who don't want the
120 automatic handling of errors type 302 (relocated) and 401
121 (authorization needed)."""
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000122
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000123 __tempfiles = None
Guido van Rossum29e77811996-11-27 19:39:58 +0000124
Guido van Rossumba311382000-08-24 16:18:04 +0000125 version = "Python-urllib/%s" % __version__
126
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000127 # Constructor
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000128 def __init__(self, proxies=None, **x509):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000129 if proxies is None:
130 proxies = getproxies()
131 assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
132 self.proxies = proxies
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000133 self.key_file = x509.get('key_file')
134 self.cert_file = x509.get('cert_file')
Georg Brandl0619a322006-07-26 07:40:17 +0000135 self.addheaders = [('User-Agent', self.version)]
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000136 self.__tempfiles = []
137 self.__unlink = os.unlink # See cleanup()
138 self.tempcache = None
139 # Undocumented feature: if you assign {} to tempcache,
140 # it is used to cache files retrieved with
141 # self.retrieve(). This is not enabled by default
142 # since it does not work for changing documents (and I
143 # haven't got the logic to check expiration headers
144 # yet).
145 self.ftpcache = ftpcache
146 # Undocumented feature: you can use a different
147 # ftp cache by assigning to the .ftpcache member;
148 # in case you want logically independent URL openers
149 # XXX This is not threadsafe. Bah.
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000150
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000151 def __del__(self):
152 self.close()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000153
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000154 def close(self):
155 self.cleanup()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000156
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000157 def cleanup(self):
158 # This code sometimes runs when the rest of this module
159 # has already been deleted, so it can't use any globals
160 # or import anything.
161 if self.__tempfiles:
162 for file in self.__tempfiles:
163 try:
164 self.__unlink(file)
Martin v. Löwis58682b72001-08-11 15:02:57 +0000165 except OSError:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000166 pass
167 del self.__tempfiles[:]
168 if self.tempcache:
169 self.tempcache.clear()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000170
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000171 def addheader(self, *args):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000172 """Add a header to be used by the HTTP interface only
173 e.g. u.addheader('Accept', 'sound/basic')"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000174 self.addheaders.append(args)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000175
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000176 # External interface
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000177 def open(self, fullurl, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000178 """Use URLopener().open(file) instead of open(file, 'r')."""
Martin v. Löwis1d994332000-12-03 18:30:10 +0000179 fullurl = unwrap(toBytes(fullurl))
Senthil Kumaran7c2867f2009-04-21 03:24:19 +0000180 # percent encode url, fixing lame server errors for e.g, like space
181 # within url paths.
Senthil Kumaran18d5a692010-02-20 22:05:34 +0000182 fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]|")
Raymond Hettinger54f02222002-06-01 14:18:47 +0000183 if self.tempcache and fullurl in self.tempcache:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000184 filename, headers = self.tempcache[fullurl]
185 fp = open(filename, 'rb')
186 return addinfourl(fp, headers, fullurl)
Martin v. Löwis1d994332000-12-03 18:30:10 +0000187 urltype, url = splittype(fullurl)
188 if not urltype:
189 urltype = 'file'
Raymond Hettinger54f02222002-06-01 14:18:47 +0000190 if urltype in self.proxies:
Martin v. Löwis1d994332000-12-03 18:30:10 +0000191 proxy = self.proxies[urltype]
192 urltype, proxyhost = splittype(proxy)
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000193 host, selector = splithost(proxyhost)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000194 url = (host, fullurl) # Signal special case to open_*()
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000195 else:
196 proxy = None
Martin v. Löwis1d994332000-12-03 18:30:10 +0000197 name = 'open_' + urltype
198 self.type = urltype
Brett Cannonaaeffaf2004-03-23 23:50:17 +0000199 name = name.replace('-', '_')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000200 if not hasattr(self, name):
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000201 if proxy:
202 return self.open_unknown_proxy(proxy, fullurl, data)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000203 else:
204 return self.open_unknown(fullurl, data)
205 try:
206 if data is None:
207 return getattr(self, name)(url)
208 else:
209 return getattr(self, name)(url, data)
210 except socket.error, msg:
211 raise IOError, ('socket error', msg), sys.exc_info()[2]
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000212
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000213 def open_unknown(self, fullurl, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000214 """Overridable interface to open unknown URL type."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000215 type, url = splittype(fullurl)
216 raise IOError, ('url error', 'unknown url type', type)
Guido van Rossumca445401995-08-29 19:19:12 +0000217
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000218 def open_unknown_proxy(self, proxy, fullurl, data=None):
219 """Overridable interface to open unknown URL type."""
220 type, url = splittype(fullurl)
221 raise IOError, ('url error', 'invalid proxy for %s' % type, proxy)
222
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000223 # External interface
Sjoerd Mullenderd7b86f02000-08-25 11:23:36 +0000224 def retrieve(self, url, filename=None, reporthook=None, data=None):
Brett Cannon7d618c72003-04-24 02:43:20 +0000225 """retrieve(url) returns (filename, headers) for a local object
Guido van Rossume7b146f2000-02-04 15:28:42 +0000226 or (tempfilename, headers) for a remote object."""
Martin v. Löwis1d994332000-12-03 18:30:10 +0000227 url = unwrap(toBytes(url))
Raymond Hettinger54f02222002-06-01 14:18:47 +0000228 if self.tempcache and url in self.tempcache:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000229 return self.tempcache[url]
230 type, url1 = splittype(url)
Raymond Hettinger10ff7062002-06-02 03:04:52 +0000231 if filename is None and (not type or type == 'file'):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000232 try:
233 fp = self.open_local_file(url1)
234 hdrs = fp.info()
Philip Jenvey0299d0d2009-12-03 02:40:13 +0000235 fp.close()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000236 return url2pathname(splithost(url1)[1]), hdrs
Georg Brandl84fedf72010-02-06 22:59:15 +0000237 except IOError:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000238 pass
Fred Drake316a7932000-08-24 01:01:26 +0000239 fp = self.open(url, data)
Benjamin Petersonb364bfe2009-03-22 17:45:11 +0000240 try:
241 headers = fp.info()
242 if filename:
243 tfp = open(filename, 'wb')
244 else:
245 import tempfile
246 garbage, path = splittype(url)
247 garbage, path = splithost(path or "")
248 path, garbage = splitquery(path or "")
249 path, garbage = splitattr(path or "")
250 suffix = os.path.splitext(path)[1]
251 (fd, filename) = tempfile.mkstemp(suffix)
252 self.__tempfiles.append(filename)
253 tfp = os.fdopen(fd, 'wb')
254 try:
255 result = filename, headers
256 if self.tempcache is not None:
257 self.tempcache[url] = result
258 bs = 1024*8
259 size = -1
260 read = 0
261 blocknum = 0
Senthil Kumaran87e58552011-11-01 02:44:45 +0800262 if "content-length" in headers:
263 size = int(headers["Content-Length"])
Benjamin Petersonb364bfe2009-03-22 17:45:11 +0000264 if reporthook:
Benjamin Petersonb364bfe2009-03-22 17:45:11 +0000265 reporthook(blocknum, bs, size)
266 while 1:
267 block = fp.read(bs)
268 if block == "":
269 break
270 read += len(block)
271 tfp.write(block)
272 blocknum += 1
273 if reporthook:
274 reporthook(blocknum, bs, size)
275 finally:
276 tfp.close()
277 finally:
278 fp.close()
Georg Brandlb9256022005-08-24 18:46:39 +0000279
280 # raise exception if actual size does not match content-length header
281 if size >= 0 and read < size:
282 raise ContentTooShortError("retrieval incomplete: got only %i out "
283 "of %i bytes" % (read, size), result)
284
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000285 return result
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000286
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000287 # Each method named open_<type> knows how to open that type of URL
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000288
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000289 def open_http(self, url, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000290 """Use HTTP protocol."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000291 import httplib
292 user_passwd = None
Martin v. Löwis3e865952006-01-24 15:51:21 +0000293 proxy_passwd= None
Walter Dörwald65230a22002-06-03 15:58:32 +0000294 if isinstance(url, str):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000295 host, selector = splithost(url)
296 if host:
297 user_passwd, host = splituser(host)
298 host = unquote(host)
299 realhost = host
300 else:
301 host, selector = url
Martin v. Löwis3e865952006-01-24 15:51:21 +0000302 # check whether the proxy contains authorization information
303 proxy_passwd, host = splituser(host)
304 # now we proceed with the url we want to obtain
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000305 urltype, rest = splittype(selector)
306 url = rest
307 user_passwd = None
Guido van Rossumb2493f82000-12-15 15:01:37 +0000308 if urltype.lower() != 'http':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000309 realhost = None
310 else:
311 realhost, rest = splithost(rest)
312 if realhost:
313 user_passwd, realhost = splituser(realhost)
314 if user_passwd:
315 selector = "%s://%s%s" % (urltype, realhost, rest)
Tim Peters55c12d42001-08-09 18:04:14 +0000316 if proxy_bypass(realhost):
317 host = realhost
318
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000319 #print "proxy via http:", host, selector
320 if not host: raise IOError, ('http error', 'no host given')
Tim Peters92037a12006-01-24 22:44:08 +0000321
Martin v. Löwis3e865952006-01-24 15:51:21 +0000322 if proxy_passwd:
Senthil Kumaranbcd833f2012-01-11 00:09:24 +0800323 proxy_passwd = unquote(proxy_passwd)
Andrew M. Kuchling872dba42006-10-27 17:11:23 +0000324 proxy_auth = base64.b64encode(proxy_passwd).strip()
Martin v. Löwis3e865952006-01-24 15:51:21 +0000325 else:
326 proxy_auth = None
327
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000328 if user_passwd:
Senthil Kumaranbcd833f2012-01-11 00:09:24 +0800329 user_passwd = unquote(user_passwd)
Andrew M. Kuchling872dba42006-10-27 17:11:23 +0000330 auth = base64.b64encode(user_passwd).strip()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000331 else:
332 auth = None
333 h = httplib.HTTP(host)
334 if data is not None:
335 h.putrequest('POST', selector)
Georg Brandl0619a322006-07-26 07:40:17 +0000336 h.putheader('Content-Type', 'application/x-www-form-urlencoded')
337 h.putheader('Content-Length', '%d' % len(data))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000338 else:
339 h.putrequest('GET', selector)
Martin v. Löwis3e865952006-01-24 15:51:21 +0000340 if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000341 if auth: h.putheader('Authorization', 'Basic %s' % auth)
342 if realhost: h.putheader('Host', realhost)
Guido van Rossum68468eb2003-02-27 20:14:51 +0000343 for args in self.addheaders: h.putheader(*args)
Kristján Valur Jónsson84040db2009-01-09 20:27:16 +0000344 h.endheaders(data)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000345 errcode, errmsg, headers = h.getreply()
Neal Norwitzce55e212007-03-20 08:14:57 +0000346 fp = h.getfile()
Georg Brandlf66b6032007-03-14 08:27:52 +0000347 if errcode == -1:
Neal Norwitzce55e212007-03-20 08:14:57 +0000348 if fp: fp.close()
Georg Brandlf66b6032007-03-14 08:27:52 +0000349 # something went wrong with the HTTP status line
350 raise IOError, ('http protocol error', 0,
351 'got a bad status line', None)
Sean Reifscheidera1afbf62007-09-19 07:52:56 +0000352 # According to RFC 2616, "2xx" code indicates that the client's
353 # request was successfully received, understood, and accepted.
Kurt B. Kaiser0f7c25d2008-01-02 04:11:28 +0000354 if (200 <= errcode < 300):
Georg Brandl9b0d46d2008-01-20 11:43:03 +0000355 return addinfourl(fp, headers, "http:" + url, errcode)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000356 else:
357 if data is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000358 return self.http_error(url, fp, errcode, errmsg, headers)
Guido van Rossum29aab751999-03-09 19:31:21 +0000359 else:
360 return self.http_error(url, fp, errcode, errmsg, headers, data)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000361
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000362 def http_error(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000363 """Handle http errors.
364 Derived class can override this, or provide specific handlers
365 named http_error_DDD where DDD is the 3-digit error code."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000366 # First check if there's a specific handler for this error
367 name = 'http_error_%d' % errcode
368 if hasattr(self, name):
369 method = getattr(self, name)
370 if data is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000371 result = method(url, fp, errcode, errmsg, headers)
Jeremy Hyltonb30f52a1999-02-25 16:14:58 +0000372 else:
373 result = method(url, fp, errcode, errmsg, headers, data)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000374 if result: return result
Jeremy Hyltonb30f52a1999-02-25 16:14:58 +0000375 return self.http_error_default(url, fp, errcode, errmsg, headers)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000376
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000377 def http_error_default(self, url, fp, errcode, errmsg, headers):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000378 """Default error handler: close the connection and raise IOError."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000379 fp.close()
380 raise IOError, ('http error', errcode, errmsg, headers)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000381
Bill Janssen426ea0a2007-08-29 22:35:05 +0000382 if _have_ssl:
Andrew M. Kuchling141e9892000-04-23 02:53:11 +0000383 def open_https(self, url, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000384 """Use HTTPS protocol."""
Bill Janssen426ea0a2007-08-29 22:35:05 +0000385
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000386 import httplib
Fred Drake567ca8e2000-08-21 21:42:42 +0000387 user_passwd = None
Martin v. Löwis3e865952006-01-24 15:51:21 +0000388 proxy_passwd = None
Walter Dörwald65230a22002-06-03 15:58:32 +0000389 if isinstance(url, str):
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000390 host, selector = splithost(url)
Fred Drake567ca8e2000-08-21 21:42:42 +0000391 if host:
392 user_passwd, host = splituser(host)
393 host = unquote(host)
394 realhost = host
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000395 else:
396 host, selector = url
Martin v. Löwis3e865952006-01-24 15:51:21 +0000397 # here, we determine, whether the proxy contains authorization information
398 proxy_passwd, host = splituser(host)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000399 urltype, rest = splittype(selector)
Fred Drake567ca8e2000-08-21 21:42:42 +0000400 url = rest
401 user_passwd = None
Guido van Rossumb2493f82000-12-15 15:01:37 +0000402 if urltype.lower() != 'https':
Fred Drake567ca8e2000-08-21 21:42:42 +0000403 realhost = None
404 else:
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000405 realhost, rest = splithost(rest)
Fred Drake567ca8e2000-08-21 21:42:42 +0000406 if realhost:
407 user_passwd, realhost = splituser(realhost)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000408 if user_passwd:
409 selector = "%s://%s%s" % (urltype, realhost, rest)
Andrew M. Kuchling7ad47922000-06-10 01:41:48 +0000410 #print "proxy via https:", host, selector
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000411 if not host: raise IOError, ('https error', 'no host given')
Martin v. Löwis3e865952006-01-24 15:51:21 +0000412 if proxy_passwd:
Senthil Kumaranbcd833f2012-01-11 00:09:24 +0800413 proxy_passwd = unquote(proxy_passwd)
Andrew M. Kuchling872dba42006-10-27 17:11:23 +0000414 proxy_auth = base64.b64encode(proxy_passwd).strip()
Martin v. Löwis3e865952006-01-24 15:51:21 +0000415 else:
416 proxy_auth = None
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000417 if user_passwd:
Senthil Kumaranbcd833f2012-01-11 00:09:24 +0800418 user_passwd = unquote(user_passwd)
Andrew M. Kuchling872dba42006-10-27 17:11:23 +0000419 auth = base64.b64encode(user_passwd).strip()
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000420 else:
421 auth = None
422 h = httplib.HTTPS(host, 0,
423 key_file=self.key_file,
424 cert_file=self.cert_file)
Andrew M. Kuchling141e9892000-04-23 02:53:11 +0000425 if data is not None:
426 h.putrequest('POST', selector)
Georg Brandl0619a322006-07-26 07:40:17 +0000427 h.putheader('Content-Type',
Andrew M. Kuchling141e9892000-04-23 02:53:11 +0000428 'application/x-www-form-urlencoded')
Georg Brandl0619a322006-07-26 07:40:17 +0000429 h.putheader('Content-Length', '%d' % len(data))
Andrew M. Kuchling141e9892000-04-23 02:53:11 +0000430 else:
431 h.putrequest('GET', selector)
Andrew M. Kuchling52278572006-12-19 15:11:41 +0000432 if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
433 if auth: h.putheader('Authorization', 'Basic %s' % auth)
Fred Drake567ca8e2000-08-21 21:42:42 +0000434 if realhost: h.putheader('Host', realhost)
Guido van Rossum68468eb2003-02-27 20:14:51 +0000435 for args in self.addheaders: h.putheader(*args)
Kristján Valur Jónsson84040db2009-01-09 20:27:16 +0000436 h.endheaders(data)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000437 errcode, errmsg, headers = h.getreply()
Neal Norwitzce55e212007-03-20 08:14:57 +0000438 fp = h.getfile()
Georg Brandlf66b6032007-03-14 08:27:52 +0000439 if errcode == -1:
Neal Norwitzce55e212007-03-20 08:14:57 +0000440 if fp: fp.close()
Georg Brandlf66b6032007-03-14 08:27:52 +0000441 # something went wrong with the HTTP status line
442 raise IOError, ('http protocol error', 0,
443 'got a bad status line', None)
Georg Brandl9b915672007-09-24 18:08:24 +0000444 # According to RFC 2616, "2xx" code indicates that the client's
445 # request was successfully received, understood, and accepted.
Kurt B. Kaiser0f7c25d2008-01-02 04:11:28 +0000446 if (200 <= errcode < 300):
Georg Brandl9b0d46d2008-01-20 11:43:03 +0000447 return addinfourl(fp, headers, "https:" + url, errcode)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000448 else:
Fred Drake567ca8e2000-08-21 21:42:42 +0000449 if data is None:
450 return self.http_error(url, fp, errcode, errmsg, headers)
451 else:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000452 return self.http_error(url, fp, errcode, errmsg, headers,
453 data)
Fred Drake567ca8e2000-08-21 21:42:42 +0000454
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000455 def open_file(self, url):
Neal Norwitzc5d0dbd2006-04-09 04:00:49 +0000456 """Use local file or FTP depending on form of URL."""
Martin v. Löwis3e865952006-01-24 15:51:21 +0000457 if not isinstance(url, str):
458 raise IOError, ('file error', 'proxy support for file protocol currently not implemented')
Jack Jansen4ef11032002-09-12 20:14:04 +0000459 if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000460 return self.open_ftp(url)
461 else:
462 return self.open_local_file(url)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000463
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000464 def open_local_file(self, url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000465 """Use local file."""
Georg Brandl5a096e12007-01-22 19:40:21 +0000466 import mimetypes, mimetools, email.utils
Raymond Hettingera6172712004-12-31 19:15:26 +0000467 try:
468 from cStringIO import StringIO
469 except ImportError:
470 from StringIO import StringIO
Guido van Rossumf0713d32001-08-09 17:43:35 +0000471 host, file = splithost(url)
472 localname = url2pathname(file)
Guido van Rossuma2da3052002-04-15 00:25:01 +0000473 try:
474 stats = os.stat(localname)
475 except OSError, e:
476 raise IOError(e.errno, e.strerror, e.filename)
Walter Dörwald92b48b72002-03-22 17:30:38 +0000477 size = stats.st_size
Georg Brandl5a096e12007-01-22 19:40:21 +0000478 modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000479 mtype = mimetypes.guess_type(url)[0]
Raymond Hettingera6172712004-12-31 19:15:26 +0000480 headers = mimetools.Message(StringIO(
Guido van Rossumf0713d32001-08-09 17:43:35 +0000481 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
482 (mtype or 'text/plain', size, modified)))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000483 if not host:
Guido van Rossum336a2011999-06-24 15:27:36 +0000484 urlfile = file
485 if file[:1] == '/':
486 urlfile = 'file://' + file
Senthil Kumaran58c60622012-01-21 11:43:02 +0800487 elif file[:2] == './':
488 raise ValueError("local file url may start with / or file:. Unknown url of type: %s" % url)
Guido van Rossumf0713d32001-08-09 17:43:35 +0000489 return addinfourl(open(localname, 'rb'),
Guido van Rossum336a2011999-06-24 15:27:36 +0000490 headers, urlfile)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000491 host, port = splitport(host)
492 if not port \
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000493 and socket.gethostbyname(host) in (localhost(), thishost()):
Guido van Rossum336a2011999-06-24 15:27:36 +0000494 urlfile = file
495 if file[:1] == '/':
496 urlfile = 'file://' + file
Guido van Rossumf0713d32001-08-09 17:43:35 +0000497 return addinfourl(open(localname, 'rb'),
Guido van Rossum336a2011999-06-24 15:27:36 +0000498 headers, urlfile)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000499 raise IOError, ('local file error', 'not on local host')
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000500
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000501 def open_ftp(self, url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000502 """Use FTP protocol."""
Martin v. Löwis3e865952006-01-24 15:51:21 +0000503 if not isinstance(url, str):
504 raise IOError, ('ftp error', 'proxy support for ftp protocol currently not implemented')
Raymond Hettingera6172712004-12-31 19:15:26 +0000505 import mimetypes, mimetools
506 try:
507 from cStringIO import StringIO
508 except ImportError:
509 from StringIO import StringIO
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000510 host, path = splithost(url)
511 if not host: raise IOError, ('ftp error', 'no host given')
512 host, port = splitport(host)
513 user, host = splituser(host)
514 if user: user, passwd = splitpasswd(user)
515 else: passwd = None
516 host = unquote(host)
Senthil Kumaran9fce5512010-11-20 11:24:08 +0000517 user = user or ''
518 passwd = passwd or ''
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000519 host = socket.gethostbyname(host)
520 if not port:
521 import ftplib
522 port = ftplib.FTP_PORT
523 else:
524 port = int(port)
525 path, attrs = splitattr(path)
526 path = unquote(path)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000527 dirs = path.split('/')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000528 dirs, file = dirs[:-1], dirs[-1]
529 if dirs and not dirs[0]: dirs = dirs[1:]
Guido van Rossum5e006a31999-08-18 17:40:33 +0000530 if dirs and not dirs[0]: dirs[0] = '/'
Guido van Rossumb2493f82000-12-15 15:01:37 +0000531 key = user, host, port, '/'.join(dirs)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000532 # XXX thread unsafe!
533 if len(self.ftpcache) > MAXFTPCACHE:
534 # Prune the cache, rather arbitrarily
535 for k in self.ftpcache.keys():
536 if k != key:
537 v = self.ftpcache[k]
538 del self.ftpcache[k]
539 v.close()
540 try:
Raymond Hettinger54f02222002-06-01 14:18:47 +0000541 if not key in self.ftpcache:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000542 self.ftpcache[key] = \
543 ftpwrapper(user, passwd, host, port, dirs)
544 if not file: type = 'D'
545 else: type = 'I'
546 for attr in attrs:
547 attr, value = splitvalue(attr)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000548 if attr.lower() == 'type' and \
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000549 value in ('a', 'A', 'i', 'I', 'd', 'D'):
Guido van Rossumb2493f82000-12-15 15:01:37 +0000550 type = value.upper()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000551 (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
Guido van Rossum88e0b5b2001-08-23 13:38:15 +0000552 mtype = mimetypes.guess_type("ftp:" + url)[0]
553 headers = ""
554 if mtype:
555 headers += "Content-Type: %s\n" % mtype
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000556 if retrlen is not None and retrlen >= 0:
Guido van Rossum88e0b5b2001-08-23 13:38:15 +0000557 headers += "Content-Length: %d\n" % retrlen
Raymond Hettingera6172712004-12-31 19:15:26 +0000558 headers = mimetools.Message(StringIO(headers))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000559 return addinfourl(fp, headers, "ftp:" + url)
560 except ftperrors(), msg:
561 raise IOError, ('ftp error', msg), sys.exc_info()[2]
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000562
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000563 def open_data(self, url, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000564 """Use "data" URL."""
Martin v. Löwis3e865952006-01-24 15:51:21 +0000565 if not isinstance(url, str):
566 raise IOError, ('data error', 'proxy support for data protocol currently not implemented')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000567 # ignore POSTed data
568 #
569 # syntax of data URLs:
570 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
571 # mediatype := [ type "/" subtype ] *( ";" parameter )
572 # data := *urlchar
573 # parameter := attribute "=" value
Raymond Hettingera6172712004-12-31 19:15:26 +0000574 import mimetools
575 try:
576 from cStringIO import StringIO
577 except ImportError:
578 from StringIO import StringIO
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000579 try:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000580 [type, data] = url.split(',', 1)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000581 except ValueError:
582 raise IOError, ('data error', 'bad data URL')
583 if not type:
584 type = 'text/plain;charset=US-ASCII'
Guido van Rossumb2493f82000-12-15 15:01:37 +0000585 semi = type.rfind(';')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000586 if semi >= 0 and '=' not in type[semi:]:
587 encoding = type[semi+1:]
588 type = type[:semi]
589 else:
590 encoding = ''
591 msg = []
Senthil Kumaran1b7f9e52010-05-01 08:01:56 +0000592 msg.append('Date: %s'%time.strftime('%a, %d %b %Y %H:%M:%S GMT',
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000593 time.gmtime(time.time())))
594 msg.append('Content-type: %s' % type)
595 if encoding == 'base64':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000596 data = base64.decodestring(data)
597 else:
598 data = unquote(data)
Georg Brandl0619a322006-07-26 07:40:17 +0000599 msg.append('Content-Length: %d' % len(data))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000600 msg.append('')
601 msg.append(data)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000602 msg = '\n'.join(msg)
Raymond Hettingera6172712004-12-31 19:15:26 +0000603 f = StringIO(msg)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000604 headers = mimetools.Message(f, 0)
Georg Brandl1f663572005-11-26 16:50:44 +0000605 #f.fileno = None # needed for addinfourl
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000606 return addinfourl(f, headers, url)
Guido van Rossum6d4d1c21998-03-12 14:32:55 +0000607
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000608
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000609class FancyURLopener(URLopener):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000610 """Derived class with handlers for errors we can handle (perhaps)."""
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000611
Neal Norwitz60e04cd2002-06-11 13:38:51 +0000612 def __init__(self, *args, **kwargs):
Guido van Rossum68468eb2003-02-27 20:14:51 +0000613 URLopener.__init__(self, *args, **kwargs)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000614 self.auth_cache = {}
Skip Montanaroc3e11d62001-02-15 16:56:36 +0000615 self.tries = 0
616 self.maxtries = 10
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000617
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000618 def http_error_default(self, url, fp, errcode, errmsg, headers):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000619 """Default error handling -- don't raise an exception."""
Georg Brandl9b0d46d2008-01-20 11:43:03 +0000620 return addinfourl(fp, headers, "http:" + url, errcode)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000621
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000622 def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000623 """Error 302 -- relocated (temporarily)."""
Skip Montanaroc3e11d62001-02-15 16:56:36 +0000624 self.tries += 1
625 if self.maxtries and self.tries >= self.maxtries:
626 if hasattr(self, "http_error_500"):
627 meth = self.http_error_500
628 else:
629 meth = self.http_error_default
630 self.tries = 0
631 return meth(url, fp, 500,
632 "Internal Server Error: Redirect Recursion", headers)
633 result = self.redirect_internal(url, fp, errcode, errmsg, headers,
634 data)
635 self.tries = 0
636 return result
637
638 def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
Raymond Hettinger54f02222002-06-01 14:18:47 +0000639 if 'location' in headers:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000640 newurl = headers['location']
Raymond Hettinger54f02222002-06-01 14:18:47 +0000641 elif 'uri' in headers:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000642 newurl = headers['uri']
643 else:
644 return
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000645 fp.close()
Guido van Rossum3527f591999-03-29 20:23:41 +0000646 # In case the server sent a relative URL, join with original:
Moshe Zadka5d87d472001-04-09 14:54:21 +0000647 newurl = basejoin(self.type + ":" + url, newurl)
guido@google.com60a4a902011-03-24 08:07:45 -0700648
649 # For security reasons we do not allow redirects to protocols
guido@google.com2bc23b82011-03-24 10:44:17 -0700650 # other than HTTP, HTTPS or FTP.
guido@google.com60a4a902011-03-24 08:07:45 -0700651 newurl_lower = newurl.lower()
652 if not (newurl_lower.startswith('http://') or
guido@google.com2bc23b82011-03-24 10:44:17 -0700653 newurl_lower.startswith('https://') or
654 newurl_lower.startswith('ftp://')):
guido@google.comf1509302011-03-28 13:47:01 -0700655 raise IOError('redirect error', errcode,
656 errmsg + " - Redirection to url '%s' is not allowed" %
657 newurl,
658 headers)
guido@google.com60a4a902011-03-24 08:07:45 -0700659
Guido van Rossumfa19f7c2003-05-16 01:46:51 +0000660 return self.open(newurl)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000661
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000662 def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000663 """Error 301 -- also relocated (permanently)."""
664 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
Guido van Rossume6ad8911996-09-10 17:02:56 +0000665
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000666 def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
667 """Error 303 -- also relocated (essentially identical to 302)."""
668 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
669
Guido van Rossumfa19f7c2003-05-16 01:46:51 +0000670 def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
671 """Error 307 -- relocated, but turn POST into error."""
672 if data is None:
673 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
674 else:
675 return self.http_error_default(url, fp, errcode, errmsg, headers)
676
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000677 def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000678 """Error 401 -- authentication required.
Martin v. Löwis3e865952006-01-24 15:51:21 +0000679 This function supports Basic authentication only."""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000680 if not 'www-authenticate' in headers:
Tim Peters85ba6732001-02-28 08:26:44 +0000681 URLopener.http_error_default(self, url, fp,
Fred Drakec680ae82001-10-13 18:37:07 +0000682 errcode, errmsg, headers)
Moshe Zadkae99bd172001-02-27 06:27:04 +0000683 stuff = headers['www-authenticate']
684 import re
685 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
686 if not match:
Tim Peters85ba6732001-02-28 08:26:44 +0000687 URLopener.http_error_default(self, url, fp,
Moshe Zadkae99bd172001-02-27 06:27:04 +0000688 errcode, errmsg, headers)
689 scheme, realm = match.groups()
690 if scheme.lower() != 'basic':
Tim Peters85ba6732001-02-28 08:26:44 +0000691 URLopener.http_error_default(self, url, fp,
Moshe Zadkae99bd172001-02-27 06:27:04 +0000692 errcode, errmsg, headers)
693 name = 'retry_' + self.type + '_basic_auth'
694 if data is None:
695 return getattr(self,name)(url, realm)
696 else:
697 return getattr(self,name)(url, realm, data)
Tim Peters92037a12006-01-24 22:44:08 +0000698
Martin v. Löwis3e865952006-01-24 15:51:21 +0000699 def http_error_407(self, url, fp, errcode, errmsg, headers, data=None):
700 """Error 407 -- proxy authentication required.
701 This function supports Basic authentication only."""
702 if not 'proxy-authenticate' in headers:
703 URLopener.http_error_default(self, url, fp,
704 errcode, errmsg, headers)
705 stuff = headers['proxy-authenticate']
706 import re
707 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
708 if not match:
709 URLopener.http_error_default(self, url, fp,
710 errcode, errmsg, headers)
711 scheme, realm = match.groups()
712 if scheme.lower() != 'basic':
713 URLopener.http_error_default(self, url, fp,
714 errcode, errmsg, headers)
715 name = 'retry_proxy_' + self.type + '_basic_auth'
716 if data is None:
717 return getattr(self,name)(url, realm)
718 else:
719 return getattr(self,name)(url, realm, data)
Tim Peters92037a12006-01-24 22:44:08 +0000720
Martin v. Löwis3e865952006-01-24 15:51:21 +0000721 def retry_proxy_http_basic_auth(self, url, realm, data=None):
722 host, selector = splithost(url)
723 newurl = 'http://' + host + selector
724 proxy = self.proxies['http']
725 urltype, proxyhost = splittype(proxy)
726 proxyhost, proxyselector = splithost(proxyhost)
727 i = proxyhost.find('@') + 1
728 proxyhost = proxyhost[i:]
729 user, passwd = self.get_user_passwd(proxyhost, realm, i)
730 if not (user or passwd): return None
731 proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
732 self.proxies['http'] = 'http://' + proxyhost + proxyselector
733 if data is None:
734 return self.open(newurl)
735 else:
736 return self.open(newurl, data)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000737
Martin v. Löwis3e865952006-01-24 15:51:21 +0000738 def retry_proxy_https_basic_auth(self, url, realm, data=None):
739 host, selector = splithost(url)
740 newurl = 'https://' + host + selector
741 proxy = self.proxies['https']
742 urltype, proxyhost = splittype(proxy)
743 proxyhost, proxyselector = splithost(proxyhost)
744 i = proxyhost.find('@') + 1
745 proxyhost = proxyhost[i:]
746 user, passwd = self.get_user_passwd(proxyhost, realm, i)
747 if not (user or passwd): return None
748 proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
749 self.proxies['https'] = 'https://' + proxyhost + proxyselector
750 if data is None:
751 return self.open(newurl)
752 else:
753 return self.open(newurl, data)
Tim Peters92037a12006-01-24 22:44:08 +0000754
Guido van Rossum3c8baed2000-02-01 23:36:55 +0000755 def retry_http_basic_auth(self, url, realm, data=None):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000756 host, selector = splithost(url)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000757 i = host.find('@') + 1
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000758 host = host[i:]
759 user, passwd = self.get_user_passwd(host, realm, i)
760 if not (user or passwd): return None
Guido van Rossumafc4f042001-01-15 18:31:13 +0000761 host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000762 newurl = 'http://' + host + selector
Guido van Rossum3c8baed2000-02-01 23:36:55 +0000763 if data is None:
764 return self.open(newurl)
765 else:
766 return self.open(newurl, data)
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000767
Guido van Rossum3c8baed2000-02-01 23:36:55 +0000768 def retry_https_basic_auth(self, url, realm, data=None):
Tim Peterse1190062001-01-15 03:34:38 +0000769 host, selector = splithost(url)
770 i = host.find('@') + 1
771 host = host[i:]
772 user, passwd = self.get_user_passwd(host, realm, i)
773 if not (user or passwd): return None
Guido van Rossumafc4f042001-01-15 18:31:13 +0000774 host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
Martin v. Löwis3e865952006-01-24 15:51:21 +0000775 newurl = 'https://' + host + selector
776 if data is None:
777 return self.open(newurl)
778 else:
779 return self.open(newurl, data)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000780
Florent Xiclunae127e242010-05-17 10:39:07 +0000781 def get_user_passwd(self, host, realm, clear_cache=0):
Guido van Rossumb2493f82000-12-15 15:01:37 +0000782 key = realm + '@' + host.lower()
Raymond Hettinger54f02222002-06-01 14:18:47 +0000783 if key in self.auth_cache:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000784 if clear_cache:
785 del self.auth_cache[key]
786 else:
787 return self.auth_cache[key]
788 user, passwd = self.prompt_user_passwd(host, realm)
789 if user or passwd: self.auth_cache[key] = (user, passwd)
790 return user, passwd
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000791
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000792 def prompt_user_passwd(self, host, realm):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000793 """Override this in a GUI environment!"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000794 import getpass
795 try:
796 user = raw_input("Enter username for %s at %s: " % (realm,
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000797 host))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000798 passwd = getpass.getpass("Enter password for %s in %s at %s: " %
799 (user, realm, host))
800 return user, passwd
801 except KeyboardInterrupt:
802 print
803 return None, None
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000804
805
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000806# Utility functions
807
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000808_localhost = None
809def localhost():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000810 """Return the IP address of the magic hostname 'localhost'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000811 global _localhost
Raymond Hettinger10ff7062002-06-02 03:04:52 +0000812 if _localhost is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000813 _localhost = socket.gethostbyname('localhost')
814 return _localhost
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000815
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000816_thishost = None
817def thishost():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000818 """Return the IP address of the current host."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000819 global _thishost
Raymond Hettinger10ff7062002-06-02 03:04:52 +0000820 if _thishost is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000821 _thishost = socket.gethostbyname(socket.gethostname())
822 return _thishost
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000823
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000824_ftperrors = None
825def ftperrors():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000826 """Return the set of errors raised by the FTP class."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000827 global _ftperrors
Raymond Hettinger10ff7062002-06-02 03:04:52 +0000828 if _ftperrors is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000829 import ftplib
830 _ftperrors = ftplib.all_errors
831 return _ftperrors
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000832
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000833_noheaders = None
834def noheaders():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000835 """Return an empty mimetools.Message object."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000836 global _noheaders
Raymond Hettinger10ff7062002-06-02 03:04:52 +0000837 if _noheaders is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000838 import mimetools
Raymond Hettingera6172712004-12-31 19:15:26 +0000839 try:
840 from cStringIO import StringIO
841 except ImportError:
842 from StringIO import StringIO
843 _noheaders = mimetools.Message(StringIO(), 0)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000844 _noheaders.fp.close() # Recycle file descriptor
845 return _noheaders
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000846
847
848# Utility classes
849
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000850class ftpwrapper:
Guido van Rossume7b146f2000-02-04 15:28:42 +0000851 """Class used by open_ftp() for cache of open FTP connections."""
852
Facundo Batista4f1b1ed2008-05-29 16:39:26 +0000853 def __init__(self, user, passwd, host, port, dirs,
Nadeem Vawdab42c53e2011-07-23 15:51:16 +0200854 timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
Nadeem Vawdaa620fac2011-07-23 17:04:42 +0200855 persistent=True):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000856 self.user = user
857 self.passwd = passwd
858 self.host = host
859 self.port = port
860 self.dirs = dirs
Facundo Batista711a54e2007-05-24 17:50:54 +0000861 self.timeout = timeout
Nadeem Vawdab42c53e2011-07-23 15:51:16 +0200862 self.refcount = 0
863 self.keepalive = persistent
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000864 self.init()
Guido van Rossume7b146f2000-02-04 15:28:42 +0000865
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000866 def init(self):
867 import ftplib
868 self.busy = 0
869 self.ftp = ftplib.FTP()
Facundo Batista711a54e2007-05-24 17:50:54 +0000870 self.ftp.connect(self.host, self.port, self.timeout)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000871 self.ftp.login(self.user, self.passwd)
872 for dir in self.dirs:
873 self.ftp.cwd(dir)
Guido van Rossume7b146f2000-02-04 15:28:42 +0000874
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000875 def retrfile(self, file, type):
876 import ftplib
877 self.endtransfer()
878 if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
879 else: cmd = 'TYPE ' + type; isdir = 0
880 try:
881 self.ftp.voidcmd(cmd)
882 except ftplib.all_errors:
883 self.init()
884 self.ftp.voidcmd(cmd)
885 conn = None
886 if file and not isdir:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000887 # Try to retrieve as a file
888 try:
889 cmd = 'RETR ' + file
Nadeem Vawdab42c53e2011-07-23 15:51:16 +0200890 conn, retrlen = self.ftp.ntransfercmd(cmd)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000891 except ftplib.error_perm, reason:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000892 if str(reason)[:3] != '550':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000893 raise IOError, ('ftp error', reason), sys.exc_info()[2]
894 if not conn:
895 # Set transfer mode to ASCII!
896 self.ftp.voidcmd('TYPE A')
Georg Brandld5e6cf22008-01-20 12:18:17 +0000897 # Try a directory listing. Verify that directory exists.
898 if file:
899 pwd = self.ftp.pwd()
900 try:
901 try:
902 self.ftp.cwd(file)
903 except ftplib.error_perm, reason:
904 raise IOError, ('ftp error', reason), sys.exc_info()[2]
905 finally:
906 self.ftp.cwd(pwd)
907 cmd = 'LIST ' + file
908 else:
909 cmd = 'LIST'
Nadeem Vawdab42c53e2011-07-23 15:51:16 +0200910 conn, retrlen = self.ftp.ntransfercmd(cmd)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000911 self.busy = 1
Nadeem Vawdab42c53e2011-07-23 15:51:16 +0200912 ftpobj = addclosehook(conn.makefile('rb'), self.file_close)
913 self.refcount += 1
914 conn.close()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000915 # Pass back both a suitably decorated object and a retrieval length
Nadeem Vawdab42c53e2011-07-23 15:51:16 +0200916 return (ftpobj, retrlen)
917
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000918 def endtransfer(self):
919 if not self.busy:
920 return
921 self.busy = 0
922 try:
923 self.ftp.voidresp()
924 except ftperrors():
925 pass
Guido van Rossume7b146f2000-02-04 15:28:42 +0000926
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000927 def close(self):
Nadeem Vawdab42c53e2011-07-23 15:51:16 +0200928 self.keepalive = False
929 if self.refcount <= 0:
930 self.real_close()
931
932 def file_close(self):
933 self.endtransfer()
934 self.refcount -= 1
935 if self.refcount <= 0 and not self.keepalive:
936 self.real_close()
937
938 def real_close(self):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000939 self.endtransfer()
940 try:
941 self.ftp.close()
942 except ftperrors():
943 pass
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000944
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000945class addbase:
Guido van Rossume7b146f2000-02-04 15:28:42 +0000946 """Base class for addinfo and addclosehook."""
947
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000948 def __init__(self, fp):
949 self.fp = fp
950 self.read = self.fp.read
951 self.readline = self.fp.readline
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000952 if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines
Georg Brandl1f663572005-11-26 16:50:44 +0000953 if hasattr(self.fp, "fileno"):
954 self.fileno = self.fp.fileno
955 else:
956 self.fileno = lambda: None
Raymond Hettinger42182eb2003-03-09 05:33:33 +0000957 if hasattr(self.fp, "__iter__"):
958 self.__iter__ = self.fp.__iter__
959 if hasattr(self.fp, "next"):
960 self.next = self.fp.next
Guido van Rossume7b146f2000-02-04 15:28:42 +0000961
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000962 def __repr__(self):
Walter Dörwald70a6b492004-02-12 17:35:32 +0000963 return '<%s at %r whose fp = %r>' % (self.__class__.__name__,
964 id(self), self.fp)
Guido van Rossume7b146f2000-02-04 15:28:42 +0000965
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000966 def close(self):
967 self.read = None
968 self.readline = None
969 self.readlines = None
970 self.fileno = None
971 if self.fp: self.fp.close()
972 self.fp = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000973
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000974class addclosehook(addbase):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000975 """Class to add a close hook to an open file."""
976
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000977 def __init__(self, fp, closehook, *hookargs):
978 addbase.__init__(self, fp)
979 self.closehook = closehook
980 self.hookargs = hookargs
Guido van Rossume7b146f2000-02-04 15:28:42 +0000981
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000982 def close(self):
983 if self.closehook:
Guido van Rossum68468eb2003-02-27 20:14:51 +0000984 self.closehook(*self.hookargs)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000985 self.closehook = None
986 self.hookargs = None
Senthil Kumaran4c592112012-03-15 13:24:40 -0700987 addbase.close(self)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000988
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000989class addinfo(addbase):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000990 """class to add an info() method to an open file."""
991
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000992 def __init__(self, fp, headers):
993 addbase.__init__(self, fp)
994 self.headers = headers
Guido van Rossume7b146f2000-02-04 15:28:42 +0000995
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000996 def info(self):
997 return self.headers
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000998
Guido van Rossume6ad8911996-09-10 17:02:56 +0000999class addinfourl(addbase):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001000 """class to add info() and geturl() methods to an open file."""
1001
Georg Brandl9b0d46d2008-01-20 11:43:03 +00001002 def __init__(self, fp, headers, url, code=None):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001003 addbase.__init__(self, fp)
1004 self.headers = headers
1005 self.url = url
Georg Brandl9b0d46d2008-01-20 11:43:03 +00001006 self.code = code
Guido van Rossume7b146f2000-02-04 15:28:42 +00001007
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001008 def info(self):
1009 return self.headers
Guido van Rossume7b146f2000-02-04 15:28:42 +00001010
Georg Brandl9b0d46d2008-01-20 11:43:03 +00001011 def getcode(self):
1012 return self.code
1013
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001014 def geturl(self):
1015 return self.url
Guido van Rossume6ad8911996-09-10 17:02:56 +00001016
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001017
Guido van Rossum7c395db1994-07-04 22:14:49 +00001018# Utilities to parse URLs (most of these return None for missing parts):
Sjoerd Mullendere0371b81995-11-10 10:36:07 +00001019# unwrap('<URL:type://host/path>') --> 'type://host/path'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001020# splittype('type:opaquestring') --> 'type', 'opaquestring'
1021# splithost('//host[:port]/path') --> 'host[:port]', '/path'
Guido van Rossum7c395db1994-07-04 22:14:49 +00001022# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'
1023# splitpasswd('user:passwd') -> 'user', 'passwd'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001024# splitport('host:port') --> 'host', 'port'
1025# splitquery('/path?query') --> '/path', 'query'
1026# splittag('/path#tag') --> '/path', 'tag'
Guido van Rossum7c395db1994-07-04 22:14:49 +00001027# splitattr('/path;attr1=value1;attr2=value2;...') ->
1028# '/path', ['attr1=value1', 'attr2=value2', ...]
1029# splitvalue('attr=value') --> 'attr', 'value'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001030# unquote('abc%20def') -> 'abc def'
1031# quote('abc def') -> 'abc%20def')
1032
Walter Dörwald65230a22002-06-03 15:58:32 +00001033try:
1034 unicode
1035except NameError:
Guido van Rossum4b46c0a2002-05-24 17:58:05 +00001036 def _is_unicode(x):
1037 return 0
Walter Dörwald65230a22002-06-03 15:58:32 +00001038else:
1039 def _is_unicode(x):
1040 return isinstance(x, unicode)
Guido van Rossum4b46c0a2002-05-24 17:58:05 +00001041
Martin v. Löwis1d994332000-12-03 18:30:10 +00001042def toBytes(url):
1043 """toBytes(u"URL") --> 'URL'."""
1044 # Most URL schemes require ASCII. If that changes, the conversion
1045 # can be relaxed
Guido van Rossum4b46c0a2002-05-24 17:58:05 +00001046 if _is_unicode(url):
Martin v. Löwis1d994332000-12-03 18:30:10 +00001047 try:
1048 url = url.encode("ASCII")
1049 except UnicodeError:
Guido van Rossumb2493f82000-12-15 15:01:37 +00001050 raise UnicodeError("URL " + repr(url) +
1051 " contains non-ASCII characters")
Martin v. Löwis1d994332000-12-03 18:30:10 +00001052 return url
1053
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001054def unwrap(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001055 """unwrap('<URL:type://host/path>') --> 'type://host/path'."""
Guido van Rossumb2493f82000-12-15 15:01:37 +00001056 url = url.strip()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001057 if url[:1] == '<' and url[-1:] == '>':
Guido van Rossumb2493f82000-12-15 15:01:37 +00001058 url = url[1:-1].strip()
1059 if url[:4] == 'URL:': url = url[4:].strip()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001060 return url
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001061
Guido van Rossum332e1441997-09-29 23:23:46 +00001062_typeprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001063def splittype(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001064 """splittype('type:opaquestring') --> 'type', 'opaquestring'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001065 global _typeprog
1066 if _typeprog is None:
1067 import re
1068 _typeprog = re.compile('^([^/:]+):')
Guido van Rossum332e1441997-09-29 23:23:46 +00001069
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001070 match = _typeprog.match(url)
1071 if match:
1072 scheme = match.group(1)
Fred Drake9e94afd2000-07-01 07:03:30 +00001073 return scheme.lower(), url[len(scheme) + 1:]
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001074 return None, url
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001075
Guido van Rossum332e1441997-09-29 23:23:46 +00001076_hostprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001077def splithost(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001078 """splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001079 global _hostprog
1080 if _hostprog is None:
1081 import re
Georg Brandl1c168d82006-03-26 20:59:38 +00001082 _hostprog = re.compile('^//([^/?]*)(.*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +00001083
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001084 match = _hostprog.match(url)
Senthil Kumaran0b7cac12010-11-22 05:04:33 +00001085 if match:
1086 host_port = match.group(1)
1087 path = match.group(2)
1088 if path and not path.startswith('/'):
1089 path = '/' + path
1090 return host_port, path
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001091 return None, url
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001092
Guido van Rossum332e1441997-09-29 23:23:46 +00001093_userprog = None
Guido van Rossum7c395db1994-07-04 22:14:49 +00001094def splituser(host):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001095 """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001096 global _userprog
1097 if _userprog is None:
1098 import re
Raymond Hettingerf2e45dd2002-08-18 20:08:56 +00001099 _userprog = re.compile('^(.*)@(.*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +00001100
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001101 match = _userprog.match(host)
Senthil Kumaran9fce5512010-11-20 11:24:08 +00001102 if match: return match.group(1, 2)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001103 return None, host
Guido van Rossum7c395db1994-07-04 22:14:49 +00001104
Guido van Rossum332e1441997-09-29 23:23:46 +00001105_passwdprog = None
Guido van Rossum7c395db1994-07-04 22:14:49 +00001106def splitpasswd(user):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001107 """splitpasswd('user:passwd') -> 'user', 'passwd'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001108 global _passwdprog
1109 if _passwdprog is None:
1110 import re
Senthil Kumaran5e95e762009-03-30 21:51:50 +00001111 _passwdprog = re.compile('^([^:]*):(.*)$',re.S)
Guido van Rossum332e1441997-09-29 23:23:46 +00001112
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001113 match = _passwdprog.match(user)
1114 if match: return match.group(1, 2)
1115 return user, None
Guido van Rossum7c395db1994-07-04 22:14:49 +00001116
Guido van Rossume7b146f2000-02-04 15:28:42 +00001117# splittag('/path#tag') --> '/path', 'tag'
Guido van Rossum332e1441997-09-29 23:23:46 +00001118_portprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001119def splitport(host):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001120 """splitport('host:port') --> 'host', 'port'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001121 global _portprog
1122 if _portprog is None:
1123 import re
1124 _portprog = re.compile('^(.*):([0-9]+)$')
Guido van Rossum332e1441997-09-29 23:23:46 +00001125
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001126 match = _portprog.match(host)
1127 if match: return match.group(1, 2)
1128 return host, None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001129
Guido van Rossum332e1441997-09-29 23:23:46 +00001130_nportprog = None
Guido van Rossum53725a21996-06-13 19:12:35 +00001131def splitnport(host, defport=-1):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001132 """Split host and port, returning numeric port.
1133 Return given default port if no ':' found; defaults to -1.
1134 Return numerical port if a valid number are found after ':'.
1135 Return None if ':' but not a valid number."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001136 global _nportprog
1137 if _nportprog is None:
1138 import re
1139 _nportprog = re.compile('^(.*):(.*)$')
Guido van Rossum7e7ca0b1998-03-26 21:01:39 +00001140
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001141 match = _nportprog.match(host)
1142 if match:
1143 host, port = match.group(1, 2)
1144 try:
Guido van Rossumb2493f82000-12-15 15:01:37 +00001145 if not port: raise ValueError, "no digits"
1146 nport = int(port)
1147 except ValueError:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001148 nport = None
1149 return host, nport
1150 return host, defport
Guido van Rossum53725a21996-06-13 19:12:35 +00001151
Guido van Rossum332e1441997-09-29 23:23:46 +00001152_queryprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001153def splitquery(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001154 """splitquery('/path?query') --> '/path', 'query'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001155 global _queryprog
1156 if _queryprog is None:
1157 import re
1158 _queryprog = re.compile('^(.*)\?([^?]*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +00001159
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001160 match = _queryprog.match(url)
1161 if match: return match.group(1, 2)
1162 return url, None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001163
Guido van Rossum332e1441997-09-29 23:23:46 +00001164_tagprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001165def splittag(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001166 """splittag('/path#tag') --> '/path', 'tag'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001167 global _tagprog
1168 if _tagprog is None:
1169 import re
1170 _tagprog = re.compile('^(.*)#([^#]*)$')
Guido van Rossum7e7ca0b1998-03-26 21:01:39 +00001171
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001172 match = _tagprog.match(url)
1173 if match: return match.group(1, 2)
1174 return url, None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001175
Guido van Rossum7c395db1994-07-04 22:14:49 +00001176def splitattr(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001177 """splitattr('/path;attr1=value1;attr2=value2;...') ->
1178 '/path', ['attr1=value1', 'attr2=value2', ...]."""
Guido van Rossumb2493f82000-12-15 15:01:37 +00001179 words = url.split(';')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001180 return words[0], words[1:]
Guido van Rossum7c395db1994-07-04 22:14:49 +00001181
Guido van Rossum332e1441997-09-29 23:23:46 +00001182_valueprog = None
Guido van Rossum7c395db1994-07-04 22:14:49 +00001183def splitvalue(attr):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001184 """splitvalue('attr=value') --> 'attr', 'value'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001185 global _valueprog
1186 if _valueprog is None:
1187 import re
1188 _valueprog = re.compile('^([^=]*)=(.*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +00001189
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001190 match = _valueprog.match(attr)
1191 if match: return match.group(1, 2)
1192 return attr, None
Guido van Rossum7c395db1994-07-04 22:14:49 +00001193
R. David Murraybfbdefe2010-05-25 15:20:46 +00001194# urlparse contains a duplicate of this method to avoid a circular import. If
1195# you update this method, also update the copy in urlparse. This code
1196# duplication does not exist in Python3.
1197
Senthil Kumaranf3e9b2a2010-03-18 12:14:15 +00001198_hexdig = '0123456789ABCDEFabcdef'
Florent Xiclunae127e242010-05-17 10:39:07 +00001199_hextochr = dict((a + b, chr(int(a + b, 16)))
1200 for a in _hexdig for b in _hexdig)
Raymond Hettinger803ce802005-09-10 06:49:04 +00001201
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001202def unquote(s):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001203 """unquote('abc%20def') -> 'abc def'."""
Raymond Hettinger803ce802005-09-10 06:49:04 +00001204 res = s.split('%')
Florent Xiclunaaf87f9f2010-05-17 13:35:09 +00001205 # fastpath
1206 if len(res) == 1:
1207 return s
1208 s = res[0]
1209 for item in res[1:]:
Raymond Hettinger803ce802005-09-10 06:49:04 +00001210 try:
Florent Xiclunaaf87f9f2010-05-17 13:35:09 +00001211 s += _hextochr[item[:2]] + item[2:]
Raymond Hettinger803ce802005-09-10 06:49:04 +00001212 except KeyError:
Florent Xiclunaaf87f9f2010-05-17 13:35:09 +00001213 s += '%' + item
Raymond Hettinger4b0f20d2005-10-15 16:41:53 +00001214 except UnicodeDecodeError:
Florent Xiclunaaf87f9f2010-05-17 13:35:09 +00001215 s += unichr(int(item[:2], 16)) + item[2:]
1216 return s
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001217
Guido van Rossum0564e121996-12-13 14:47:36 +00001218def unquote_plus(s):
Skip Montanaro79f1c172000-08-22 03:00:52 +00001219 """unquote('%7e/abc+def') -> '~/abc def'"""
Brett Cannonaaeffaf2004-03-23 23:50:17 +00001220 s = s.replace('+', ' ')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001221 return unquote(s)
Guido van Rossum0564e121996-12-13 14:47:36 +00001222
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001223always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
Jeremy Hylton6102e292000-08-31 15:48:10 +00001224 'abcdefghijklmnopqrstuvwxyz'
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001225 '0123456789' '_.-')
Florent Xiclunaaf87f9f2010-05-17 13:35:09 +00001226_safe_map = {}
1227for i, c in zip(xrange(256), str(bytearray(xrange(256)))):
1228 _safe_map[c] = c if (i < 128 and c in always_safe) else '%{:02X}'.format(i)
1229_safe_quoters = {}
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001230
Senthil Kumaran880685f2010-07-22 01:47:30 +00001231def quote(s, safe='/'):
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001232 """quote('abc def') -> 'abc%20def'
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001233
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001234 Each part of a URL, e.g. the path info, the query, etc., has a
1235 different set of reserved characters that must be quoted.
1236
1237 RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
1238 the following reserved characters.
1239
1240 reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
1241 "$" | ","
1242
1243 Each of these characters is reserved in some component of a URL,
1244 but not necessarily in all of them.
1245
1246 By default, the quote function is intended for quoting the path
1247 section of a URL. Thus, it will not encode '/'. This character
1248 is reserved, but in typical usage the quote function is being
1249 called on a path where the existing slash characters are used as
1250 reserved characters.
1251 """
Florent Xiclunaaf87f9f2010-05-17 13:35:09 +00001252 # fastpath
1253 if not s:
Senthil Kumaranc7743aa2010-07-19 17:35:50 +00001254 if s is None:
1255 raise TypeError('None object cannot be quoted')
Florent Xiclunaaf87f9f2010-05-17 13:35:09 +00001256 return s
Raymond Hettinger199d2f72005-09-09 22:27:13 +00001257 cachekey = (safe, always_safe)
1258 try:
Florent Xiclunaaf87f9f2010-05-17 13:35:09 +00001259 (quoter, safe) = _safe_quoters[cachekey]
Raymond Hettinger199d2f72005-09-09 22:27:13 +00001260 except KeyError:
Florent Xiclunaaf87f9f2010-05-17 13:35:09 +00001261 safe_map = _safe_map.copy()
1262 safe_map.update([(c, c) for c in safe])
1263 quoter = safe_map.__getitem__
1264 safe = always_safe + safe
1265 _safe_quoters[cachekey] = (quoter, safe)
1266 if not s.rstrip(safe):
1267 return s
1268 return ''.join(map(quoter, s))
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001269
Senthil Kumaran880685f2010-07-22 01:47:30 +00001270def quote_plus(s, safe=''):
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001271 """Quote the query fragment of a URL; replacing ' ' with '+'"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001272 if ' ' in s:
Senthil Kumaran880685f2010-07-22 01:47:30 +00001273 s = quote(s, safe + ' ')
Raymond Hettingercf6b6322005-09-10 18:17:54 +00001274 return s.replace(' ', '+')
Senthil Kumaran880685f2010-07-22 01:47:30 +00001275 return quote(s, safe)
Guido van Rossum0564e121996-12-13 14:47:36 +00001276
Florent Xiclunae127e242010-05-17 10:39:07 +00001277def urlencode(query, doseq=0):
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001278 """Encode a sequence of two-element tuples or dictionary into a URL query string.
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001279
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001280 If any values in the query arg are sequences and doseq is true, each
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001281 sequence element is converted to a separate parameter.
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001282
1283 If the query arg is a sequence of two-element tuples, the order of the
1284 parameters in the output will match the order of parameters in the
1285 input.
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001286 """
Tim Peters658cba62001-02-09 20:06:00 +00001287
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001288 if hasattr(query,"items"):
1289 # mapping objects
1290 query = query.items()
1291 else:
1292 # it's a bother at times that strings and string-like objects are
1293 # sequences...
1294 try:
1295 # non-sequence items should not work with len()
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001296 # non-empty strings will fail this
Walter Dörwald65230a22002-06-03 15:58:32 +00001297 if len(query) and not isinstance(query[0], tuple):
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001298 raise TypeError
1299 # zero-length sequences of all types will get here and succeed,
1300 # but that's a minor nit - since the original implementation
1301 # allowed empty dicts that type of behavior probably should be
1302 # preserved for consistency
1303 except TypeError:
1304 ty,va,tb = sys.exc_info()
1305 raise TypeError, "not a valid non-string sequence or mapping object", tb
1306
Guido van Rossume7b146f2000-02-04 15:28:42 +00001307 l = []
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001308 if not doseq:
1309 # preserve old behavior
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001310 for k, v in query:
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001311 k = quote_plus(str(k))
1312 v = quote_plus(str(v))
1313 l.append(k + '=' + v)
1314 else:
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001315 for k, v in query:
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001316 k = quote_plus(str(k))
Walter Dörwald65230a22002-06-03 15:58:32 +00001317 if isinstance(v, str):
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001318 v = quote_plus(v)
1319 l.append(k + '=' + v)
Guido van Rossum4b46c0a2002-05-24 17:58:05 +00001320 elif _is_unicode(v):
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001321 # is there a reasonable way to convert to ASCII?
1322 # encode generates a string, but "replace" or "ignore"
1323 # lose information and "strict" can raise UnicodeError
1324 v = quote_plus(v.encode("ASCII","replace"))
1325 l.append(k + '=' + v)
1326 else:
1327 try:
1328 # is this a sufficient test for sequence-ness?
Georg Brandl84fedf72010-02-06 22:59:15 +00001329 len(v)
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001330 except TypeError:
1331 # not a sequence
1332 v = quote_plus(str(v))
1333 l.append(k + '=' + v)
1334 else:
1335 # loop over the sequence
1336 for elt in v:
1337 l.append(k + '=' + quote_plus(str(elt)))
Guido van Rossumb2493f82000-12-15 15:01:37 +00001338 return '&'.join(l)
Guido van Rossum810a3391998-07-22 21:33:23 +00001339
Guido van Rossum442e7201996-03-20 15:33:11 +00001340# Proxy handling
Mark Hammond4f570b92000-07-26 07:04:38 +00001341def getproxies_environment():
1342 """Return a dictionary of scheme -> proxy server URL mappings.
1343
1344 Scan the environment for variables named <scheme>_proxy;
1345 this seems to be the standard convention. If you need a
1346 different way, you can pass a proxies dictionary to the
1347 [Fancy]URLopener constructor.
1348
1349 """
1350 proxies = {}
1351 for name, value in os.environ.items():
Guido van Rossumb2493f82000-12-15 15:01:37 +00001352 name = name.lower()
Mark Hammond4f570b92000-07-26 07:04:38 +00001353 if value and name[-6:] == '_proxy':
1354 proxies[name[:-6]] = value
1355 return proxies
1356
Georg Brandl22350112008-01-20 12:05:43 +00001357def proxy_bypass_environment(host):
1358 """Test if proxies should not be used for a particular host.
1359
1360 Checks the environment for a variable named no_proxy, which should
1361 be a list of DNS suffixes separated by commas, or '*' for all hosts.
1362 """
1363 no_proxy = os.environ.get('no_proxy', '') or os.environ.get('NO_PROXY', '')
1364 # '*' is special case for always bypass
1365 if no_proxy == '*':
1366 return 1
1367 # strip port off host
1368 hostonly, port = splitport(host)
1369 # check if the host ends with any of the DNS suffixes
Senthil Kumaranb5bd4c82011-08-06 12:24:33 +08001370 no_proxy_list = [proxy.strip() for proxy in no_proxy.split(',')]
1371 for name in no_proxy_list:
Georg Brandl22350112008-01-20 12:05:43 +00001372 if name and (hostonly.endswith(name) or host.endswith(name)):
1373 return 1
1374 # otherwise, don't bypass
1375 return 0
1376
1377
Jack Jansen11d9b062004-07-16 11:45:00 +00001378if sys.platform == 'darwin':
Ronald Oussoren51f06332009-09-20 10:31:22 +00001379 from _scproxy import _get_proxy_settings, _get_proxies
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001380
1381 def proxy_bypass_macosx_sysconf(host):
1382 """
1383 Return True iff this host shouldn't be accessed using a proxy
1384
1385 This function uses the MacOSX framework SystemConfiguration
1386 to fetch the proxy information.
1387 """
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001388 import re
1389 import socket
1390 from fnmatch import fnmatch
1391
Ronald Oussoren31802d02009-10-18 07:07:00 +00001392 hostonly, port = splitport(host)
1393
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001394 def ip2num(ipAddr):
1395 parts = ipAddr.split('.')
1396 parts = map(int, parts)
1397 if len(parts) != 4:
1398 parts = (parts + [0, 0, 0, 0])[:4]
1399 return (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3]
1400
Ronald Oussoren51f06332009-09-20 10:31:22 +00001401 proxy_settings = _get_proxy_settings()
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001402
Ronald Oussoren51f06332009-09-20 10:31:22 +00001403 # Check for simple host names:
1404 if '.' not in host:
1405 if proxy_settings['exclude_simple']:
1406 return True
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001407
Ronald Oussoren31802d02009-10-18 07:07:00 +00001408 hostIP = None
1409
Ronald Oussoren809073b2009-09-20 10:54:07 +00001410 for value in proxy_settings.get('exceptions', ()):
Ronald Oussoren51f06332009-09-20 10:31:22 +00001411 # Items in the list are strings like these: *.local, 169.254/16
Ronald Oussoren51f06332009-09-20 10:31:22 +00001412 if not value: continue
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001413
Ronald Oussoren51f06332009-09-20 10:31:22 +00001414 m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value)
1415 if m is not None:
1416 if hostIP is None:
Ronald Oussoren31802d02009-10-18 07:07:00 +00001417 try:
1418 hostIP = socket.gethostbyname(hostonly)
1419 hostIP = ip2num(hostIP)
1420 except socket.error:
1421 continue
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001422
Ronald Oussoren51f06332009-09-20 10:31:22 +00001423 base = ip2num(m.group(1))
Ronald Oussorenb96fbb82010-06-27 13:59:39 +00001424 mask = m.group(2)
1425 if mask is None:
1426 mask = 8 * (m.group(1).count('.') + 1)
1427
1428 else:
1429 mask = int(mask[1:])
Ronald Oussoren1aa999c2011-03-14 18:53:59 -04001430 mask = 32 - mask
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001431
Ronald Oussoren51f06332009-09-20 10:31:22 +00001432 if (hostIP >> mask) == (base >> mask):
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001433 return True
1434
Ronald Oussoren51f06332009-09-20 10:31:22 +00001435 elif fnmatch(host, value):
1436 return True
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001437
Ronald Oussoren51f06332009-09-20 10:31:22 +00001438 return False
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001439
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001440 def getproxies_macosx_sysconf():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001441 """Return a dictionary of scheme -> proxy server URL mappings.
Guido van Rossum442e7201996-03-20 15:33:11 +00001442
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001443 This function uses the MacOSX framework SystemConfiguration
1444 to fetch the proxy information.
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001445 """
Ronald Oussoren51f06332009-09-20 10:31:22 +00001446 return _get_proxies()
Mark Hammond4f570b92000-07-26 07:04:38 +00001447
Georg Brandl22350112008-01-20 12:05:43 +00001448 def proxy_bypass(host):
1449 if getproxies_environment():
1450 return proxy_bypass_environment(host)
1451 else:
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001452 return proxy_bypass_macosx_sysconf(host)
Tim Peters55c12d42001-08-09 18:04:14 +00001453
Jack Jansen11d9b062004-07-16 11:45:00 +00001454 def getproxies():
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001455 return getproxies_environment() or getproxies_macosx_sysconf()
Tim Peters182b5ac2004-07-18 06:16:08 +00001456
Mark Hammond4f570b92000-07-26 07:04:38 +00001457elif os.name == 'nt':
1458 def getproxies_registry():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001459 """Return a dictionary of scheme -> proxy server URL mappings.
Mark Hammond4f570b92000-07-26 07:04:38 +00001460
1461 Win32 uses the registry to store proxies.
1462
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001463 """
1464 proxies = {}
Mark Hammond4f570b92000-07-26 07:04:38 +00001465 try:
1466 import _winreg
1467 except ImportError:
1468 # Std module, so should be around - but you never know!
1469 return proxies
1470 try:
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001471 internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1472 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
Mark Hammond4f570b92000-07-26 07:04:38 +00001473 proxyEnable = _winreg.QueryValueEx(internetSettings,
1474 'ProxyEnable')[0]
1475 if proxyEnable:
1476 # Returned as Unicode but problems if not converted to ASCII
1477 proxyServer = str(_winreg.QueryValueEx(internetSettings,
1478 'ProxyServer')[0])
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001479 if '=' in proxyServer:
1480 # Per-protocol settings
Mark Hammond4f570b92000-07-26 07:04:38 +00001481 for p in proxyServer.split(';'):
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001482 protocol, address = p.split('=', 1)
Guido van Rossumb955d6c2002-03-31 23:38:48 +00001483 # See if address has a type:// prefix
Guido van Rossum64e5aa92002-04-02 14:38:16 +00001484 import re
1485 if not re.match('^([^/:]+)://', address):
Guido van Rossumb955d6c2002-03-31 23:38:48 +00001486 address = '%s://%s' % (protocol, address)
1487 proxies[protocol] = address
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001488 else:
1489 # Use one setting for all protocols
1490 if proxyServer[:5] == 'http:':
1491 proxies['http'] = proxyServer
1492 else:
1493 proxies['http'] = 'http://%s' % proxyServer
Senthil Kumaran0fdd3852010-07-14 20:22:17 +00001494 proxies['https'] = 'https://%s' % proxyServer
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001495 proxies['ftp'] = 'ftp://%s' % proxyServer
Mark Hammond4f570b92000-07-26 07:04:38 +00001496 internetSettings.Close()
1497 except (WindowsError, ValueError, TypeError):
1498 # Either registry key not found etc, or the value in an
1499 # unexpected format.
1500 # proxies already set up to be empty so nothing to do
1501 pass
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001502 return proxies
Guido van Rossum442e7201996-03-20 15:33:11 +00001503
Mark Hammond4f570b92000-07-26 07:04:38 +00001504 def getproxies():
1505 """Return a dictionary of scheme -> proxy server URL mappings.
1506
1507 Returns settings gathered from the environment, if specified,
1508 or the registry.
1509
1510 """
1511 return getproxies_environment() or getproxies_registry()
Tim Peters55c12d42001-08-09 18:04:14 +00001512
Georg Brandl22350112008-01-20 12:05:43 +00001513 def proxy_bypass_registry(host):
Tim Peters55c12d42001-08-09 18:04:14 +00001514 try:
1515 import _winreg
1516 import re
Tim Peters55c12d42001-08-09 18:04:14 +00001517 except ImportError:
1518 # Std modules, so should be around - but you never know!
1519 return 0
1520 try:
1521 internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1522 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1523 proxyEnable = _winreg.QueryValueEx(internetSettings,
1524 'ProxyEnable')[0]
1525 proxyOverride = str(_winreg.QueryValueEx(internetSettings,
1526 'ProxyOverride')[0])
1527 # ^^^^ Returned as Unicode but problems if not converted to ASCII
1528 except WindowsError:
1529 return 0
1530 if not proxyEnable or not proxyOverride:
1531 return 0
1532 # try to make a host list from name and IP address.
Georg Brandl1f636702006-02-18 23:10:23 +00001533 rawHost, port = splitport(host)
1534 host = [rawHost]
Tim Peters55c12d42001-08-09 18:04:14 +00001535 try:
Georg Brandl1f636702006-02-18 23:10:23 +00001536 addr = socket.gethostbyname(rawHost)
1537 if addr != rawHost:
Tim Peters55c12d42001-08-09 18:04:14 +00001538 host.append(addr)
1539 except socket.error:
1540 pass
Georg Brandl1f636702006-02-18 23:10:23 +00001541 try:
1542 fqdn = socket.getfqdn(rawHost)
1543 if fqdn != rawHost:
1544 host.append(fqdn)
1545 except socket.error:
1546 pass
Tim Peters55c12d42001-08-09 18:04:14 +00001547 # make a check value list from the registry entry: replace the
1548 # '<local>' string by the localhost entry and the corresponding
1549 # canonical entry.
1550 proxyOverride = proxyOverride.split(';')
Tim Peters55c12d42001-08-09 18:04:14 +00001551 # now check if we match one of the registry values.
1552 for test in proxyOverride:
Senthil Kumaran4af40d22009-05-01 05:59:52 +00001553 if test == '<local>':
1554 if '.' not in rawHost:
1555 return 1
Tim Petersab9ba272001-08-09 21:40:30 +00001556 test = test.replace(".", r"\.") # mask dots
1557 test = test.replace("*", r".*") # change glob sequence
1558 test = test.replace("?", r".") # change glob char
Tim Peters55c12d42001-08-09 18:04:14 +00001559 for val in host:
1560 # print "%s <--> %s" %( test, val )
1561 if re.match(test, val, re.I):
1562 return 1
1563 return 0
1564
Georg Brandl22350112008-01-20 12:05:43 +00001565 def proxy_bypass(host):
1566 """Return a dictionary of scheme -> proxy server URL mappings.
1567
1568 Returns settings gathered from the environment, if specified,
1569 or the registry.
1570
1571 """
1572 if getproxies_environment():
1573 return proxy_bypass_environment(host)
1574 else:
1575 return proxy_bypass_registry(host)
1576
Mark Hammond4f570b92000-07-26 07:04:38 +00001577else:
1578 # By default use environment variables
1579 getproxies = getproxies_environment
Georg Brandl22350112008-01-20 12:05:43 +00001580 proxy_bypass = proxy_bypass_environment
Guido van Rossum442e7201996-03-20 15:33:11 +00001581
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001582# Test and time quote() and unquote()
1583def test1():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001584 s = ''
1585 for i in range(256): s = s + chr(i)
1586 s = s*4
1587 t0 = time.time()
1588 qs = quote(s)
1589 uqs = unquote(qs)
1590 t1 = time.time()
1591 if uqs != s:
1592 print 'Wrong!'
Walter Dörwald70a6b492004-02-12 17:35:32 +00001593 print repr(s)
1594 print repr(qs)
1595 print repr(uqs)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001596 print round(t1 - t0, 3), 'sec'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001597
1598
Guido van Rossum9ab96d41998-09-28 14:07:00 +00001599def reporthook(blocknum, blocksize, totalsize):
1600 # Report during remote transfers
Guido van Rossumb2493f82000-12-15 15:01:37 +00001601 print "Block number: %d, Block size: %d, Total size: %d" % (
1602 blocknum, blocksize, totalsize)