blob: 7b0a81cade78515f8efd149f266fd3a4e8cf717f [file] [log] [blame]
Guido van Rossume7b146f2000-02-04 15:28:42 +00001"""Open an arbitrary URL.
2
3See the following document for more info on URLs:
4"Names and Addresses, URIs, URLs, URNs, URCs", at
5http://www.w3.org/pub/WWW/Addressing/Overview.html
6
7See also the HTTP spec (from which the error codes are derived):
8"HTTP - Hypertext Transfer Protocol", at
9http://www.w3.org/pub/WWW/Protocols/
10
11Related standards and specs:
12- RFC1808: the "relative URL" spec. (authoritative status)
13- RFC1738 - the "URL standard". (authoritative status)
14- RFC1630 - the "URI spec". (informational status)
15
16The object returned by URLopener().open(file) will differ per
17protocol. All you know is that is has methods read(), readline(),
18readlines(), fileno(), close() and info(). The read*(), fileno()
Fredrik Lundhb49f88b2000-09-24 18:51:25 +000019and close() methods work like those of open files.
Guido van Rossume7b146f2000-02-04 15:28:42 +000020The info() method returns a mimetools.Message object which can be
21used to query various info about the object, if available.
22(mimetools.Message objects are queried with the getheader() method.)
23"""
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000024
Guido van Rossum7c395db1994-07-04 22:14:49 +000025import string
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000026import socket
Jack Jansendc3e3f61995-12-15 13:22:13 +000027import os
Guido van Rossumf0713d32001-08-09 17:43:35 +000028import time
Guido van Rossum3c8484e1996-11-20 22:02:24 +000029import sys
Brett Cannon69200fa2004-03-23 21:26:39 +000030from urlparse import urljoin as basejoin
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000031
Skip Montanaro40fc1602001-03-01 04:27:19 +000032__all__ = ["urlopen", "URLopener", "FancyURLopener", "urlretrieve",
33 "urlcleanup", "quote", "quote_plus", "unquote", "unquote_plus",
Skip Montanaro44d5e0c2001-03-13 19:47:16 +000034 "urlencode", "url2pathname", "pathname2url", "splittag",
35 "localhost", "thishost", "ftperrors", "basejoin", "unwrap",
36 "splittype", "splithost", "splituser", "splitpasswd", "splitport",
37 "splitnport", "splitquery", "splitattr", "splitvalue",
Brett Cannond75f0432007-05-16 22:42:29 +000038 "getproxies"]
Skip Montanaro40fc1602001-03-01 04:27:19 +000039
Martin v. Löwis3e865952006-01-24 15:51:21 +000040__version__ = '1.17' # XXX This version is not always updated :-(
Guido van Rossumf668d171997-06-06 21:11:11 +000041
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000042MAXFTPCACHE = 10 # Trim the ftp cache beyond this size
Guido van Rossum6cb15a01995-06-22 19:00:13 +000043
Jack Jansendc3e3f61995-12-15 13:22:13 +000044# Helper for non-unix systems
Ronald Oussoren9545a232010-05-05 19:09:31 +000045if os.name == 'nt':
Fredrik Lundhb49f88b2000-09-24 18:51:25 +000046 from nturl2path import url2pathname, pathname2url
Guido van Rossumd74fb6b2001-03-02 06:43:49 +000047elif os.name == 'riscos':
48 from rourl2path import url2pathname, pathname2url
Jack Jansendc3e3f61995-12-15 13:22:13 +000049else:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000050 def url2pathname(pathname):
Georg Brandlc0b24732005-12-26 22:53:56 +000051 """OS-specific conversion from a relative URL of the 'file' scheme
52 to a file system path; not recommended for general use."""
Guido van Rossum367ac801999-03-12 14:31:10 +000053 return unquote(pathname)
Georg Brandlc0b24732005-12-26 22:53:56 +000054
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000055 def pathname2url(pathname):
Georg Brandlc0b24732005-12-26 22:53:56 +000056 """OS-specific conversion from a file system path to a relative URL
57 of the 'file' scheme; not recommended for general use."""
Guido van Rossum367ac801999-03-12 14:31:10 +000058 return quote(pathname)
Guido van Rossum33add0a1998-12-18 15:25:22 +000059
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000060# This really consists of two pieces:
61# (1) a class which handles opening of all sorts of URLs
62# (plus assorted utilities etc.)
63# (2) a set of functions for parsing URLs
64# XXX Should these be separated out into different modules?
65
66
67# Shortcut for basic usage
68_urlopener = None
Fred Drakedf6eca72002-04-04 20:41:34 +000069def urlopen(url, data=None, proxies=None):
Brett Cannon8bb8fa52008-07-02 01:57:08 +000070 """Create a file-like object for the specified URL to read from."""
71 from warnings import warnpy3k
Georg Brandl48e65f52010-02-06 22:44:17 +000072 warnpy3k("urllib.urlopen() has been removed in Python 3.0 in "
73 "favor of urllib2.urlopen()", stacklevel=2)
Brett Cannon8bb8fa52008-07-02 01:57:08 +000074
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000075 global _urlopener
Fred Drakedf6eca72002-04-04 20:41:34 +000076 if proxies is not None:
77 opener = FancyURLopener(proxies=proxies)
78 elif not _urlopener:
79 opener = FancyURLopener()
80 _urlopener = opener
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000081 else:
Fred Drakedf6eca72002-04-04 20:41:34 +000082 opener = _urlopener
83 if data is None:
84 return opener.open(url)
85 else:
86 return opener.open(url, data)
Fred Drake316a7932000-08-24 01:01:26 +000087def urlretrieve(url, filename=None, reporthook=None, data=None):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000088 global _urlopener
89 if not _urlopener:
90 _urlopener = FancyURLopener()
Fred Drake316a7932000-08-24 01:01:26 +000091 return _urlopener.retrieve(url, filename, reporthook, data)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000092def urlcleanup():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000093 if _urlopener:
94 _urlopener.cleanup()
Florent Xiclunaaf87f9f2010-05-17 13:35:09 +000095 _safe_quoters.clear()
Antoine Pitrouca173e22009-12-08 19:35:12 +000096 ftpcache.clear()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000097
Bill Janssen426ea0a2007-08-29 22:35:05 +000098# check for SSL
99try:
100 import ssl
101except:
102 _have_ssl = False
103else:
104 _have_ssl = True
105
Georg Brandlb9256022005-08-24 18:46:39 +0000106# exception raised when downloaded size does not match content-length
107class ContentTooShortError(IOError):
108 def __init__(self, message, content):
109 IOError.__init__(self, message)
110 self.content = content
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000111
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000112ftpcache = {}
113class URLopener:
Guido van Rossume7b146f2000-02-04 15:28:42 +0000114 """Class to open URLs.
115 This is a class rather than just a subroutine because we may need
116 more than one set of global protocol-specific options.
117 Note -- this is a base class for those who don't want the
118 automatic handling of errors type 302 (relocated) and 401
119 (authorization needed)."""
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000120
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000121 __tempfiles = None
Guido van Rossum29e77811996-11-27 19:39:58 +0000122
Guido van Rossumba311382000-08-24 16:18:04 +0000123 version = "Python-urllib/%s" % __version__
124
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000125 # Constructor
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000126 def __init__(self, proxies=None, **x509):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000127 if proxies is None:
128 proxies = getproxies()
129 assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
130 self.proxies = proxies
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000131 self.key_file = x509.get('key_file')
132 self.cert_file = x509.get('cert_file')
Georg Brandl0619a322006-07-26 07:40:17 +0000133 self.addheaders = [('User-Agent', self.version)]
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000134 self.__tempfiles = []
135 self.__unlink = os.unlink # See cleanup()
136 self.tempcache = None
137 # Undocumented feature: if you assign {} to tempcache,
138 # it is used to cache files retrieved with
139 # self.retrieve(). This is not enabled by default
140 # since it does not work for changing documents (and I
141 # haven't got the logic to check expiration headers
142 # yet).
143 self.ftpcache = ftpcache
144 # Undocumented feature: you can use a different
145 # ftp cache by assigning to the .ftpcache member;
146 # in case you want logically independent URL openers
147 # XXX This is not threadsafe. Bah.
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000148
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000149 def __del__(self):
150 self.close()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000151
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000152 def close(self):
153 self.cleanup()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000154
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000155 def cleanup(self):
156 # This code sometimes runs when the rest of this module
157 # has already been deleted, so it can't use any globals
158 # or import anything.
159 if self.__tempfiles:
160 for file in self.__tempfiles:
161 try:
162 self.__unlink(file)
Martin v. Löwis58682b72001-08-11 15:02:57 +0000163 except OSError:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000164 pass
165 del self.__tempfiles[:]
166 if self.tempcache:
167 self.tempcache.clear()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000168
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000169 def addheader(self, *args):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000170 """Add a header to be used by the HTTP interface only
171 e.g. u.addheader('Accept', 'sound/basic')"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000172 self.addheaders.append(args)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000173
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000174 # External interface
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000175 def open(self, fullurl, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000176 """Use URLopener().open(file) instead of open(file, 'r')."""
Martin v. Löwis1d994332000-12-03 18:30:10 +0000177 fullurl = unwrap(toBytes(fullurl))
Senthil Kumaran7c2867f2009-04-21 03:24:19 +0000178 # percent encode url, fixing lame server errors for e.g, like space
179 # within url paths.
Senthil Kumaran18d5a692010-02-20 22:05:34 +0000180 fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]|")
Raymond Hettinger54f02222002-06-01 14:18:47 +0000181 if self.tempcache and fullurl in self.tempcache:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000182 filename, headers = self.tempcache[fullurl]
183 fp = open(filename, 'rb')
184 return addinfourl(fp, headers, fullurl)
Martin v. Löwis1d994332000-12-03 18:30:10 +0000185 urltype, url = splittype(fullurl)
186 if not urltype:
187 urltype = 'file'
Raymond Hettinger54f02222002-06-01 14:18:47 +0000188 if urltype in self.proxies:
Martin v. Löwis1d994332000-12-03 18:30:10 +0000189 proxy = self.proxies[urltype]
190 urltype, proxyhost = splittype(proxy)
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000191 host, selector = splithost(proxyhost)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000192 url = (host, fullurl) # Signal special case to open_*()
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000193 else:
194 proxy = None
Martin v. Löwis1d994332000-12-03 18:30:10 +0000195 name = 'open_' + urltype
196 self.type = urltype
Brett Cannonaaeffaf2004-03-23 23:50:17 +0000197 name = name.replace('-', '_')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000198 if not hasattr(self, name):
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000199 if proxy:
200 return self.open_unknown_proxy(proxy, fullurl, data)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000201 else:
202 return self.open_unknown(fullurl, data)
203 try:
204 if data is None:
205 return getattr(self, name)(url)
206 else:
207 return getattr(self, name)(url, data)
208 except socket.error, msg:
209 raise IOError, ('socket error', msg), sys.exc_info()[2]
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000210
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000211 def open_unknown(self, fullurl, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000212 """Overridable interface to open unknown URL type."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000213 type, url = splittype(fullurl)
214 raise IOError, ('url error', 'unknown url type', type)
Guido van Rossumca445401995-08-29 19:19:12 +0000215
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000216 def open_unknown_proxy(self, proxy, fullurl, data=None):
217 """Overridable interface to open unknown URL type."""
218 type, url = splittype(fullurl)
219 raise IOError, ('url error', 'invalid proxy for %s' % type, proxy)
220
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000221 # External interface
Sjoerd Mullenderd7b86f02000-08-25 11:23:36 +0000222 def retrieve(self, url, filename=None, reporthook=None, data=None):
Brett Cannon7d618c72003-04-24 02:43:20 +0000223 """retrieve(url) returns (filename, headers) for a local object
Guido van Rossume7b146f2000-02-04 15:28:42 +0000224 or (tempfilename, headers) for a remote object."""
Martin v. Löwis1d994332000-12-03 18:30:10 +0000225 url = unwrap(toBytes(url))
Raymond Hettinger54f02222002-06-01 14:18:47 +0000226 if self.tempcache and url in self.tempcache:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000227 return self.tempcache[url]
228 type, url1 = splittype(url)
Raymond Hettinger10ff7062002-06-02 03:04:52 +0000229 if filename is None and (not type or type == 'file'):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000230 try:
231 fp = self.open_local_file(url1)
232 hdrs = fp.info()
Philip Jenvey0299d0d2009-12-03 02:40:13 +0000233 fp.close()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000234 return url2pathname(splithost(url1)[1]), hdrs
Georg Brandl84fedf72010-02-06 22:59:15 +0000235 except IOError:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000236 pass
Fred Drake316a7932000-08-24 01:01:26 +0000237 fp = self.open(url, data)
Benjamin Petersonb364bfe2009-03-22 17:45:11 +0000238 try:
239 headers = fp.info()
240 if filename:
241 tfp = open(filename, 'wb')
242 else:
243 import tempfile
244 garbage, path = splittype(url)
245 garbage, path = splithost(path or "")
246 path, garbage = splitquery(path or "")
247 path, garbage = splitattr(path or "")
248 suffix = os.path.splitext(path)[1]
249 (fd, filename) = tempfile.mkstemp(suffix)
250 self.__tempfiles.append(filename)
251 tfp = os.fdopen(fd, 'wb')
252 try:
253 result = filename, headers
254 if self.tempcache is not None:
255 self.tempcache[url] = result
256 bs = 1024*8
257 size = -1
258 read = 0
259 blocknum = 0
Senthil Kumaran87e58552011-11-01 02:44:45 +0800260 if "content-length" in headers:
261 size = int(headers["Content-Length"])
Benjamin Petersonb364bfe2009-03-22 17:45:11 +0000262 if reporthook:
Benjamin Petersonb364bfe2009-03-22 17:45:11 +0000263 reporthook(blocknum, bs, size)
264 while 1:
265 block = fp.read(bs)
266 if block == "":
267 break
268 read += len(block)
269 tfp.write(block)
270 blocknum += 1
271 if reporthook:
272 reporthook(blocknum, bs, size)
273 finally:
274 tfp.close()
275 finally:
276 fp.close()
Georg Brandlb9256022005-08-24 18:46:39 +0000277
278 # raise exception if actual size does not match content-length header
279 if size >= 0 and read < size:
280 raise ContentTooShortError("retrieval incomplete: got only %i out "
281 "of %i bytes" % (read, size), result)
282
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000283 return result
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000284
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000285 # Each method named open_<type> knows how to open that type of URL
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000286
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000287 def open_http(self, url, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000288 """Use HTTP protocol."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000289 import httplib
290 user_passwd = None
Martin v. Löwis3e865952006-01-24 15:51:21 +0000291 proxy_passwd= None
Walter Dörwald65230a22002-06-03 15:58:32 +0000292 if isinstance(url, str):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000293 host, selector = splithost(url)
294 if host:
295 user_passwd, host = splituser(host)
296 host = unquote(host)
297 realhost = host
298 else:
299 host, selector = url
Martin v. Löwis3e865952006-01-24 15:51:21 +0000300 # check whether the proxy contains authorization information
301 proxy_passwd, host = splituser(host)
302 # now we proceed with the url we want to obtain
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000303 urltype, rest = splittype(selector)
304 url = rest
305 user_passwd = None
Guido van Rossumb2493f82000-12-15 15:01:37 +0000306 if urltype.lower() != 'http':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000307 realhost = None
308 else:
309 realhost, rest = splithost(rest)
310 if realhost:
311 user_passwd, realhost = splituser(realhost)
312 if user_passwd:
313 selector = "%s://%s%s" % (urltype, realhost, rest)
Tim Peters55c12d42001-08-09 18:04:14 +0000314 if proxy_bypass(realhost):
315 host = realhost
316
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000317 #print "proxy via http:", host, selector
318 if not host: raise IOError, ('http error', 'no host given')
Tim Peters92037a12006-01-24 22:44:08 +0000319
Martin v. Löwis3e865952006-01-24 15:51:21 +0000320 if proxy_passwd:
321 import base64
Andrew M. Kuchling872dba42006-10-27 17:11:23 +0000322 proxy_auth = base64.b64encode(proxy_passwd).strip()
Martin v. Löwis3e865952006-01-24 15:51:21 +0000323 else:
324 proxy_auth = None
325
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000326 if user_passwd:
327 import base64
Andrew M. Kuchling872dba42006-10-27 17:11:23 +0000328 auth = base64.b64encode(user_passwd).strip()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000329 else:
330 auth = None
331 h = httplib.HTTP(host)
332 if data is not None:
333 h.putrequest('POST', selector)
Georg Brandl0619a322006-07-26 07:40:17 +0000334 h.putheader('Content-Type', 'application/x-www-form-urlencoded')
335 h.putheader('Content-Length', '%d' % len(data))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000336 else:
337 h.putrequest('GET', selector)
Martin v. Löwis3e865952006-01-24 15:51:21 +0000338 if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000339 if auth: h.putheader('Authorization', 'Basic %s' % auth)
340 if realhost: h.putheader('Host', realhost)
Guido van Rossum68468eb2003-02-27 20:14:51 +0000341 for args in self.addheaders: h.putheader(*args)
Kristján Valur Jónsson84040db2009-01-09 20:27:16 +0000342 h.endheaders(data)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000343 errcode, errmsg, headers = h.getreply()
Neal Norwitzce55e212007-03-20 08:14:57 +0000344 fp = h.getfile()
Georg Brandlf66b6032007-03-14 08:27:52 +0000345 if errcode == -1:
Neal Norwitzce55e212007-03-20 08:14:57 +0000346 if fp: fp.close()
Georg Brandlf66b6032007-03-14 08:27:52 +0000347 # something went wrong with the HTTP status line
348 raise IOError, ('http protocol error', 0,
349 'got a bad status line', None)
Sean Reifscheidera1afbf62007-09-19 07:52:56 +0000350 # According to RFC 2616, "2xx" code indicates that the client's
351 # request was successfully received, understood, and accepted.
Kurt B. Kaiser0f7c25d2008-01-02 04:11:28 +0000352 if (200 <= errcode < 300):
Georg Brandl9b0d46d2008-01-20 11:43:03 +0000353 return addinfourl(fp, headers, "http:" + url, errcode)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000354 else:
355 if data is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000356 return self.http_error(url, fp, errcode, errmsg, headers)
Guido van Rossum29aab751999-03-09 19:31:21 +0000357 else:
358 return self.http_error(url, fp, errcode, errmsg, headers, data)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000359
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000360 def http_error(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000361 """Handle http errors.
362 Derived class can override this, or provide specific handlers
363 named http_error_DDD where DDD is the 3-digit error code."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000364 # First check if there's a specific handler for this error
365 name = 'http_error_%d' % errcode
366 if hasattr(self, name):
367 method = getattr(self, name)
368 if data is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000369 result = method(url, fp, errcode, errmsg, headers)
Jeremy Hyltonb30f52a1999-02-25 16:14:58 +0000370 else:
371 result = method(url, fp, errcode, errmsg, headers, data)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000372 if result: return result
Jeremy Hyltonb30f52a1999-02-25 16:14:58 +0000373 return self.http_error_default(url, fp, errcode, errmsg, headers)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000374
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000375 def http_error_default(self, url, fp, errcode, errmsg, headers):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000376 """Default error handler: close the connection and raise IOError."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000377 fp.close()
378 raise IOError, ('http error', errcode, errmsg, headers)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000379
Bill Janssen426ea0a2007-08-29 22:35:05 +0000380 if _have_ssl:
Andrew M. Kuchling141e9892000-04-23 02:53:11 +0000381 def open_https(self, url, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000382 """Use HTTPS protocol."""
Bill Janssen426ea0a2007-08-29 22:35:05 +0000383
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000384 import httplib
Fred Drake567ca8e2000-08-21 21:42:42 +0000385 user_passwd = None
Martin v. Löwis3e865952006-01-24 15:51:21 +0000386 proxy_passwd = None
Walter Dörwald65230a22002-06-03 15:58:32 +0000387 if isinstance(url, str):
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000388 host, selector = splithost(url)
Fred Drake567ca8e2000-08-21 21:42:42 +0000389 if host:
390 user_passwd, host = splituser(host)
391 host = unquote(host)
392 realhost = host
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000393 else:
394 host, selector = url
Martin v. Löwis3e865952006-01-24 15:51:21 +0000395 # here, we determine, whether the proxy contains authorization information
396 proxy_passwd, host = splituser(host)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000397 urltype, rest = splittype(selector)
Fred Drake567ca8e2000-08-21 21:42:42 +0000398 url = rest
399 user_passwd = None
Guido van Rossumb2493f82000-12-15 15:01:37 +0000400 if urltype.lower() != 'https':
Fred Drake567ca8e2000-08-21 21:42:42 +0000401 realhost = None
402 else:
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000403 realhost, rest = splithost(rest)
Fred Drake567ca8e2000-08-21 21:42:42 +0000404 if realhost:
405 user_passwd, realhost = splituser(realhost)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000406 if user_passwd:
407 selector = "%s://%s%s" % (urltype, realhost, rest)
Andrew M. Kuchling7ad47922000-06-10 01:41:48 +0000408 #print "proxy via https:", host, selector
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000409 if not host: raise IOError, ('https error', 'no host given')
Martin v. Löwis3e865952006-01-24 15:51:21 +0000410 if proxy_passwd:
411 import base64
Andrew M. Kuchling872dba42006-10-27 17:11:23 +0000412 proxy_auth = base64.b64encode(proxy_passwd).strip()
Martin v. Löwis3e865952006-01-24 15:51:21 +0000413 else:
414 proxy_auth = None
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000415 if user_passwd:
416 import base64
Andrew M. Kuchling872dba42006-10-27 17:11:23 +0000417 auth = base64.b64encode(user_passwd).strip()
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000418 else:
419 auth = None
420 h = httplib.HTTPS(host, 0,
421 key_file=self.key_file,
422 cert_file=self.cert_file)
Andrew M. Kuchling141e9892000-04-23 02:53:11 +0000423 if data is not None:
424 h.putrequest('POST', selector)
Georg Brandl0619a322006-07-26 07:40:17 +0000425 h.putheader('Content-Type',
Andrew M. Kuchling141e9892000-04-23 02:53:11 +0000426 'application/x-www-form-urlencoded')
Georg Brandl0619a322006-07-26 07:40:17 +0000427 h.putheader('Content-Length', '%d' % len(data))
Andrew M. Kuchling141e9892000-04-23 02:53:11 +0000428 else:
429 h.putrequest('GET', selector)
Andrew M. Kuchling52278572006-12-19 15:11:41 +0000430 if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
431 if auth: h.putheader('Authorization', 'Basic %s' % auth)
Fred Drake567ca8e2000-08-21 21:42:42 +0000432 if realhost: h.putheader('Host', realhost)
Guido van Rossum68468eb2003-02-27 20:14:51 +0000433 for args in self.addheaders: h.putheader(*args)
Kristján Valur Jónsson84040db2009-01-09 20:27:16 +0000434 h.endheaders(data)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000435 errcode, errmsg, headers = h.getreply()
Neal Norwitzce55e212007-03-20 08:14:57 +0000436 fp = h.getfile()
Georg Brandlf66b6032007-03-14 08:27:52 +0000437 if errcode == -1:
Neal Norwitzce55e212007-03-20 08:14:57 +0000438 if fp: fp.close()
Georg Brandlf66b6032007-03-14 08:27:52 +0000439 # something went wrong with the HTTP status line
440 raise IOError, ('http protocol error', 0,
441 'got a bad status line', None)
Georg Brandl9b915672007-09-24 18:08:24 +0000442 # According to RFC 2616, "2xx" code indicates that the client's
443 # request was successfully received, understood, and accepted.
Kurt B. Kaiser0f7c25d2008-01-02 04:11:28 +0000444 if (200 <= errcode < 300):
Georg Brandl9b0d46d2008-01-20 11:43:03 +0000445 return addinfourl(fp, headers, "https:" + url, errcode)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000446 else:
Fred Drake567ca8e2000-08-21 21:42:42 +0000447 if data is None:
448 return self.http_error(url, fp, errcode, errmsg, headers)
449 else:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000450 return self.http_error(url, fp, errcode, errmsg, headers,
451 data)
Fred Drake567ca8e2000-08-21 21:42:42 +0000452
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000453 def open_file(self, url):
Neal Norwitzc5d0dbd2006-04-09 04:00:49 +0000454 """Use local file or FTP depending on form of URL."""
Martin v. Löwis3e865952006-01-24 15:51:21 +0000455 if not isinstance(url, str):
456 raise IOError, ('file error', 'proxy support for file protocol currently not implemented')
Jack Jansen4ef11032002-09-12 20:14:04 +0000457 if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000458 return self.open_ftp(url)
459 else:
460 return self.open_local_file(url)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000461
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000462 def open_local_file(self, url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000463 """Use local file."""
Georg Brandl5a096e12007-01-22 19:40:21 +0000464 import mimetypes, mimetools, email.utils
Raymond Hettingera6172712004-12-31 19:15:26 +0000465 try:
466 from cStringIO import StringIO
467 except ImportError:
468 from StringIO import StringIO
Guido van Rossumf0713d32001-08-09 17:43:35 +0000469 host, file = splithost(url)
470 localname = url2pathname(file)
Guido van Rossuma2da3052002-04-15 00:25:01 +0000471 try:
472 stats = os.stat(localname)
473 except OSError, e:
474 raise IOError(e.errno, e.strerror, e.filename)
Walter Dörwald92b48b72002-03-22 17:30:38 +0000475 size = stats.st_size
Georg Brandl5a096e12007-01-22 19:40:21 +0000476 modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000477 mtype = mimetypes.guess_type(url)[0]
Raymond Hettingera6172712004-12-31 19:15:26 +0000478 headers = mimetools.Message(StringIO(
Guido van Rossumf0713d32001-08-09 17:43:35 +0000479 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
480 (mtype or 'text/plain', size, modified)))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000481 if not host:
Guido van Rossum336a2011999-06-24 15:27:36 +0000482 urlfile = file
483 if file[:1] == '/':
484 urlfile = 'file://' + file
Guido van Rossumf0713d32001-08-09 17:43:35 +0000485 return addinfourl(open(localname, 'rb'),
Guido van Rossum336a2011999-06-24 15:27:36 +0000486 headers, urlfile)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000487 host, port = splitport(host)
488 if not port \
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000489 and socket.gethostbyname(host) in (localhost(), thishost()):
Guido van Rossum336a2011999-06-24 15:27:36 +0000490 urlfile = file
491 if file[:1] == '/':
492 urlfile = 'file://' + file
Guido van Rossumf0713d32001-08-09 17:43:35 +0000493 return addinfourl(open(localname, 'rb'),
Guido van Rossum336a2011999-06-24 15:27:36 +0000494 headers, urlfile)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000495 raise IOError, ('local file error', 'not on local host')
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000496
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000497 def open_ftp(self, url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000498 """Use FTP protocol."""
Martin v. Löwis3e865952006-01-24 15:51:21 +0000499 if not isinstance(url, str):
500 raise IOError, ('ftp error', 'proxy support for ftp protocol currently not implemented')
Raymond Hettingera6172712004-12-31 19:15:26 +0000501 import mimetypes, mimetools
502 try:
503 from cStringIO import StringIO
504 except ImportError:
505 from StringIO import StringIO
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000506 host, path = splithost(url)
507 if not host: raise IOError, ('ftp error', 'no host given')
508 host, port = splitport(host)
509 user, host = splituser(host)
510 if user: user, passwd = splitpasswd(user)
511 else: passwd = None
512 host = unquote(host)
Senthil Kumaran9fce5512010-11-20 11:24:08 +0000513 user = user or ''
514 passwd = passwd or ''
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000515 host = socket.gethostbyname(host)
516 if not port:
517 import ftplib
518 port = ftplib.FTP_PORT
519 else:
520 port = int(port)
521 path, attrs = splitattr(path)
522 path = unquote(path)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000523 dirs = path.split('/')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000524 dirs, file = dirs[:-1], dirs[-1]
525 if dirs and not dirs[0]: dirs = dirs[1:]
Guido van Rossum5e006a31999-08-18 17:40:33 +0000526 if dirs and not dirs[0]: dirs[0] = '/'
Guido van Rossumb2493f82000-12-15 15:01:37 +0000527 key = user, host, port, '/'.join(dirs)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000528 # XXX thread unsafe!
529 if len(self.ftpcache) > MAXFTPCACHE:
530 # Prune the cache, rather arbitrarily
531 for k in self.ftpcache.keys():
532 if k != key:
533 v = self.ftpcache[k]
534 del self.ftpcache[k]
535 v.close()
536 try:
Raymond Hettinger54f02222002-06-01 14:18:47 +0000537 if not key in self.ftpcache:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000538 self.ftpcache[key] = \
539 ftpwrapper(user, passwd, host, port, dirs)
540 if not file: type = 'D'
541 else: type = 'I'
542 for attr in attrs:
543 attr, value = splitvalue(attr)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000544 if attr.lower() == 'type' and \
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000545 value in ('a', 'A', 'i', 'I', 'd', 'D'):
Guido van Rossumb2493f82000-12-15 15:01:37 +0000546 type = value.upper()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000547 (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
Guido van Rossum88e0b5b2001-08-23 13:38:15 +0000548 mtype = mimetypes.guess_type("ftp:" + url)[0]
549 headers = ""
550 if mtype:
551 headers += "Content-Type: %s\n" % mtype
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000552 if retrlen is not None and retrlen >= 0:
Guido van Rossum88e0b5b2001-08-23 13:38:15 +0000553 headers += "Content-Length: %d\n" % retrlen
Raymond Hettingera6172712004-12-31 19:15:26 +0000554 headers = mimetools.Message(StringIO(headers))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000555 return addinfourl(fp, headers, "ftp:" + url)
556 except ftperrors(), msg:
557 raise IOError, ('ftp error', msg), sys.exc_info()[2]
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000558
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000559 def open_data(self, url, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000560 """Use "data" URL."""
Martin v. Löwis3e865952006-01-24 15:51:21 +0000561 if not isinstance(url, str):
562 raise IOError, ('data error', 'proxy support for data protocol currently not implemented')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000563 # ignore POSTed data
564 #
565 # syntax of data URLs:
566 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
567 # mediatype := [ type "/" subtype ] *( ";" parameter )
568 # data := *urlchar
569 # parameter := attribute "=" value
Raymond Hettingera6172712004-12-31 19:15:26 +0000570 import mimetools
571 try:
572 from cStringIO import StringIO
573 except ImportError:
574 from StringIO import StringIO
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000575 try:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000576 [type, data] = url.split(',', 1)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000577 except ValueError:
578 raise IOError, ('data error', 'bad data URL')
579 if not type:
580 type = 'text/plain;charset=US-ASCII'
Guido van Rossumb2493f82000-12-15 15:01:37 +0000581 semi = type.rfind(';')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000582 if semi >= 0 and '=' not in type[semi:]:
583 encoding = type[semi+1:]
584 type = type[:semi]
585 else:
586 encoding = ''
587 msg = []
Senthil Kumaran1b7f9e52010-05-01 08:01:56 +0000588 msg.append('Date: %s'%time.strftime('%a, %d %b %Y %H:%M:%S GMT',
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000589 time.gmtime(time.time())))
590 msg.append('Content-type: %s' % type)
591 if encoding == 'base64':
592 import base64
593 data = base64.decodestring(data)
594 else:
595 data = unquote(data)
Georg Brandl0619a322006-07-26 07:40:17 +0000596 msg.append('Content-Length: %d' % len(data))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000597 msg.append('')
598 msg.append(data)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000599 msg = '\n'.join(msg)
Raymond Hettingera6172712004-12-31 19:15:26 +0000600 f = StringIO(msg)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000601 headers = mimetools.Message(f, 0)
Georg Brandl1f663572005-11-26 16:50:44 +0000602 #f.fileno = None # needed for addinfourl
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000603 return addinfourl(f, headers, url)
Guido van Rossum6d4d1c21998-03-12 14:32:55 +0000604
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000605
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000606class FancyURLopener(URLopener):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000607 """Derived class with handlers for errors we can handle (perhaps)."""
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000608
Neal Norwitz60e04cd2002-06-11 13:38:51 +0000609 def __init__(self, *args, **kwargs):
Guido van Rossum68468eb2003-02-27 20:14:51 +0000610 URLopener.__init__(self, *args, **kwargs)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000611 self.auth_cache = {}
Skip Montanaroc3e11d62001-02-15 16:56:36 +0000612 self.tries = 0
613 self.maxtries = 10
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000614
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000615 def http_error_default(self, url, fp, errcode, errmsg, headers):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000616 """Default error handling -- don't raise an exception."""
Georg Brandl9b0d46d2008-01-20 11:43:03 +0000617 return addinfourl(fp, headers, "http:" + url, errcode)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000618
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000619 def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000620 """Error 302 -- relocated (temporarily)."""
Skip Montanaroc3e11d62001-02-15 16:56:36 +0000621 self.tries += 1
622 if self.maxtries and self.tries >= self.maxtries:
623 if hasattr(self, "http_error_500"):
624 meth = self.http_error_500
625 else:
626 meth = self.http_error_default
627 self.tries = 0
628 return meth(url, fp, 500,
629 "Internal Server Error: Redirect Recursion", headers)
630 result = self.redirect_internal(url, fp, errcode, errmsg, headers,
631 data)
632 self.tries = 0
633 return result
634
635 def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
Raymond Hettinger54f02222002-06-01 14:18:47 +0000636 if 'location' in headers:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000637 newurl = headers['location']
Raymond Hettinger54f02222002-06-01 14:18:47 +0000638 elif 'uri' in headers:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000639 newurl = headers['uri']
640 else:
641 return
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000642 fp.close()
Guido van Rossum3527f591999-03-29 20:23:41 +0000643 # In case the server sent a relative URL, join with original:
Moshe Zadka5d87d472001-04-09 14:54:21 +0000644 newurl = basejoin(self.type + ":" + url, newurl)
guido@google.com60a4a902011-03-24 08:07:45 -0700645
646 # For security reasons we do not allow redirects to protocols
guido@google.com2bc23b82011-03-24 10:44:17 -0700647 # other than HTTP, HTTPS or FTP.
guido@google.com60a4a902011-03-24 08:07:45 -0700648 newurl_lower = newurl.lower()
649 if not (newurl_lower.startswith('http://') or
guido@google.com2bc23b82011-03-24 10:44:17 -0700650 newurl_lower.startswith('https://') or
651 newurl_lower.startswith('ftp://')):
guido@google.comf1509302011-03-28 13:47:01 -0700652 raise IOError('redirect error', errcode,
653 errmsg + " - Redirection to url '%s' is not allowed" %
654 newurl,
655 headers)
guido@google.com60a4a902011-03-24 08:07:45 -0700656
Guido van Rossumfa19f7c2003-05-16 01:46:51 +0000657 return self.open(newurl)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000658
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000659 def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000660 """Error 301 -- also relocated (permanently)."""
661 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
Guido van Rossume6ad8911996-09-10 17:02:56 +0000662
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000663 def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
664 """Error 303 -- also relocated (essentially identical to 302)."""
665 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
666
Guido van Rossumfa19f7c2003-05-16 01:46:51 +0000667 def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
668 """Error 307 -- relocated, but turn POST into error."""
669 if data is None:
670 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
671 else:
672 return self.http_error_default(url, fp, errcode, errmsg, headers)
673
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000674 def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000675 """Error 401 -- authentication required.
Martin v. Löwis3e865952006-01-24 15:51:21 +0000676 This function supports Basic authentication only."""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000677 if not 'www-authenticate' in headers:
Tim Peters85ba6732001-02-28 08:26:44 +0000678 URLopener.http_error_default(self, url, fp,
Fred Drakec680ae82001-10-13 18:37:07 +0000679 errcode, errmsg, headers)
Moshe Zadkae99bd172001-02-27 06:27:04 +0000680 stuff = headers['www-authenticate']
681 import re
682 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
683 if not match:
Tim Peters85ba6732001-02-28 08:26:44 +0000684 URLopener.http_error_default(self, url, fp,
Moshe Zadkae99bd172001-02-27 06:27:04 +0000685 errcode, errmsg, headers)
686 scheme, realm = match.groups()
687 if scheme.lower() != 'basic':
Tim Peters85ba6732001-02-28 08:26:44 +0000688 URLopener.http_error_default(self, url, fp,
Moshe Zadkae99bd172001-02-27 06:27:04 +0000689 errcode, errmsg, headers)
690 name = 'retry_' + self.type + '_basic_auth'
691 if data is None:
692 return getattr(self,name)(url, realm)
693 else:
694 return getattr(self,name)(url, realm, data)
Tim Peters92037a12006-01-24 22:44:08 +0000695
Martin v. Löwis3e865952006-01-24 15:51:21 +0000696 def http_error_407(self, url, fp, errcode, errmsg, headers, data=None):
697 """Error 407 -- proxy authentication required.
698 This function supports Basic authentication only."""
699 if not 'proxy-authenticate' in headers:
700 URLopener.http_error_default(self, url, fp,
701 errcode, errmsg, headers)
702 stuff = headers['proxy-authenticate']
703 import re
704 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
705 if not match:
706 URLopener.http_error_default(self, url, fp,
707 errcode, errmsg, headers)
708 scheme, realm = match.groups()
709 if scheme.lower() != 'basic':
710 URLopener.http_error_default(self, url, fp,
711 errcode, errmsg, headers)
712 name = 'retry_proxy_' + self.type + '_basic_auth'
713 if data is None:
714 return getattr(self,name)(url, realm)
715 else:
716 return getattr(self,name)(url, realm, data)
Tim Peters92037a12006-01-24 22:44:08 +0000717
Martin v. Löwis3e865952006-01-24 15:51:21 +0000718 def retry_proxy_http_basic_auth(self, url, realm, data=None):
719 host, selector = splithost(url)
720 newurl = 'http://' + host + selector
721 proxy = self.proxies['http']
722 urltype, proxyhost = splittype(proxy)
723 proxyhost, proxyselector = splithost(proxyhost)
724 i = proxyhost.find('@') + 1
725 proxyhost = proxyhost[i:]
726 user, passwd = self.get_user_passwd(proxyhost, realm, i)
727 if not (user or passwd): return None
728 proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
729 self.proxies['http'] = 'http://' + proxyhost + proxyselector
730 if data is None:
731 return self.open(newurl)
732 else:
733 return self.open(newurl, data)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000734
Martin v. Löwis3e865952006-01-24 15:51:21 +0000735 def retry_proxy_https_basic_auth(self, url, realm, data=None):
736 host, selector = splithost(url)
737 newurl = 'https://' + host + selector
738 proxy = self.proxies['https']
739 urltype, proxyhost = splittype(proxy)
740 proxyhost, proxyselector = splithost(proxyhost)
741 i = proxyhost.find('@') + 1
742 proxyhost = proxyhost[i:]
743 user, passwd = self.get_user_passwd(proxyhost, realm, i)
744 if not (user or passwd): return None
745 proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
746 self.proxies['https'] = 'https://' + proxyhost + proxyselector
747 if data is None:
748 return self.open(newurl)
749 else:
750 return self.open(newurl, data)
Tim Peters92037a12006-01-24 22:44:08 +0000751
Guido van Rossum3c8baed2000-02-01 23:36:55 +0000752 def retry_http_basic_auth(self, url, realm, data=None):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000753 host, selector = splithost(url)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000754 i = host.find('@') + 1
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000755 host = host[i:]
756 user, passwd = self.get_user_passwd(host, realm, i)
757 if not (user or passwd): return None
Guido van Rossumafc4f042001-01-15 18:31:13 +0000758 host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000759 newurl = 'http://' + host + selector
Guido van Rossum3c8baed2000-02-01 23:36:55 +0000760 if data is None:
761 return self.open(newurl)
762 else:
763 return self.open(newurl, data)
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000764
Guido van Rossum3c8baed2000-02-01 23:36:55 +0000765 def retry_https_basic_auth(self, url, realm, data=None):
Tim Peterse1190062001-01-15 03:34:38 +0000766 host, selector = splithost(url)
767 i = host.find('@') + 1
768 host = host[i:]
769 user, passwd = self.get_user_passwd(host, realm, i)
770 if not (user or passwd): return None
Guido van Rossumafc4f042001-01-15 18:31:13 +0000771 host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
Martin v. Löwis3e865952006-01-24 15:51:21 +0000772 newurl = 'https://' + host + selector
773 if data is None:
774 return self.open(newurl)
775 else:
776 return self.open(newurl, data)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000777
Florent Xiclunae127e242010-05-17 10:39:07 +0000778 def get_user_passwd(self, host, realm, clear_cache=0):
Guido van Rossumb2493f82000-12-15 15:01:37 +0000779 key = realm + '@' + host.lower()
Raymond Hettinger54f02222002-06-01 14:18:47 +0000780 if key in self.auth_cache:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000781 if clear_cache:
782 del self.auth_cache[key]
783 else:
784 return self.auth_cache[key]
785 user, passwd = self.prompt_user_passwd(host, realm)
786 if user or passwd: self.auth_cache[key] = (user, passwd)
787 return user, passwd
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000788
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000789 def prompt_user_passwd(self, host, realm):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000790 """Override this in a GUI environment!"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000791 import getpass
792 try:
793 user = raw_input("Enter username for %s at %s: " % (realm,
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000794 host))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000795 passwd = getpass.getpass("Enter password for %s in %s at %s: " %
796 (user, realm, host))
797 return user, passwd
798 except KeyboardInterrupt:
799 print
800 return None, None
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000801
802
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000803# Utility functions
804
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000805_localhost = None
806def localhost():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000807 """Return the IP address of the magic hostname 'localhost'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000808 global _localhost
Raymond Hettinger10ff7062002-06-02 03:04:52 +0000809 if _localhost is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000810 _localhost = socket.gethostbyname('localhost')
811 return _localhost
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000812
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000813_thishost = None
814def thishost():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000815 """Return the IP address of the current host."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000816 global _thishost
Raymond Hettinger10ff7062002-06-02 03:04:52 +0000817 if _thishost is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000818 _thishost = socket.gethostbyname(socket.gethostname())
819 return _thishost
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000820
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000821_ftperrors = None
822def ftperrors():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000823 """Return the set of errors raised by the FTP class."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000824 global _ftperrors
Raymond Hettinger10ff7062002-06-02 03:04:52 +0000825 if _ftperrors is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000826 import ftplib
827 _ftperrors = ftplib.all_errors
828 return _ftperrors
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000829
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000830_noheaders = None
831def noheaders():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000832 """Return an empty mimetools.Message object."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000833 global _noheaders
Raymond Hettinger10ff7062002-06-02 03:04:52 +0000834 if _noheaders is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000835 import mimetools
Raymond Hettingera6172712004-12-31 19:15:26 +0000836 try:
837 from cStringIO import StringIO
838 except ImportError:
839 from StringIO import StringIO
840 _noheaders = mimetools.Message(StringIO(), 0)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000841 _noheaders.fp.close() # Recycle file descriptor
842 return _noheaders
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000843
844
845# Utility classes
846
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000847class ftpwrapper:
Guido van Rossume7b146f2000-02-04 15:28:42 +0000848 """Class used by open_ftp() for cache of open FTP connections."""
849
Facundo Batista4f1b1ed2008-05-29 16:39:26 +0000850 def __init__(self, user, passwd, host, port, dirs,
Nadeem Vawdab42c53e2011-07-23 15:51:16 +0200851 timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
Nadeem Vawdaa620fac2011-07-23 17:04:42 +0200852 persistent=True):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000853 self.user = user
854 self.passwd = passwd
855 self.host = host
856 self.port = port
857 self.dirs = dirs
Facundo Batista711a54e2007-05-24 17:50:54 +0000858 self.timeout = timeout
Nadeem Vawdab42c53e2011-07-23 15:51:16 +0200859 self.refcount = 0
860 self.keepalive = persistent
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000861 self.init()
Guido van Rossume7b146f2000-02-04 15:28:42 +0000862
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000863 def init(self):
864 import ftplib
865 self.busy = 0
866 self.ftp = ftplib.FTP()
Facundo Batista711a54e2007-05-24 17:50:54 +0000867 self.ftp.connect(self.host, self.port, self.timeout)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000868 self.ftp.login(self.user, self.passwd)
869 for dir in self.dirs:
870 self.ftp.cwd(dir)
Guido van Rossume7b146f2000-02-04 15:28:42 +0000871
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000872 def retrfile(self, file, type):
873 import ftplib
874 self.endtransfer()
875 if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
876 else: cmd = 'TYPE ' + type; isdir = 0
877 try:
878 self.ftp.voidcmd(cmd)
879 except ftplib.all_errors:
880 self.init()
881 self.ftp.voidcmd(cmd)
882 conn = None
883 if file and not isdir:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000884 # Try to retrieve as a file
885 try:
886 cmd = 'RETR ' + file
Nadeem Vawdab42c53e2011-07-23 15:51:16 +0200887 conn, retrlen = self.ftp.ntransfercmd(cmd)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000888 except ftplib.error_perm, reason:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000889 if str(reason)[:3] != '550':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000890 raise IOError, ('ftp error', reason), sys.exc_info()[2]
891 if not conn:
892 # Set transfer mode to ASCII!
893 self.ftp.voidcmd('TYPE A')
Georg Brandld5e6cf22008-01-20 12:18:17 +0000894 # Try a directory listing. Verify that directory exists.
895 if file:
896 pwd = self.ftp.pwd()
897 try:
898 try:
899 self.ftp.cwd(file)
900 except ftplib.error_perm, reason:
901 raise IOError, ('ftp error', reason), sys.exc_info()[2]
902 finally:
903 self.ftp.cwd(pwd)
904 cmd = 'LIST ' + file
905 else:
906 cmd = 'LIST'
Nadeem Vawdab42c53e2011-07-23 15:51:16 +0200907 conn, retrlen = self.ftp.ntransfercmd(cmd)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000908 self.busy = 1
Nadeem Vawdab42c53e2011-07-23 15:51:16 +0200909 ftpobj = addclosehook(conn.makefile('rb'), self.file_close)
910 self.refcount += 1
911 conn.close()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000912 # Pass back both a suitably decorated object and a retrieval length
Nadeem Vawdab42c53e2011-07-23 15:51:16 +0200913 return (ftpobj, retrlen)
914
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000915 def endtransfer(self):
916 if not self.busy:
917 return
918 self.busy = 0
919 try:
920 self.ftp.voidresp()
921 except ftperrors():
922 pass
Guido van Rossume7b146f2000-02-04 15:28:42 +0000923
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000924 def close(self):
Nadeem Vawdab42c53e2011-07-23 15:51:16 +0200925 self.keepalive = False
926 if self.refcount <= 0:
927 self.real_close()
928
929 def file_close(self):
930 self.endtransfer()
931 self.refcount -= 1
932 if self.refcount <= 0 and not self.keepalive:
933 self.real_close()
934
935 def real_close(self):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000936 self.endtransfer()
937 try:
938 self.ftp.close()
939 except ftperrors():
940 pass
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000941
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000942class addbase:
Guido van Rossume7b146f2000-02-04 15:28:42 +0000943 """Base class for addinfo and addclosehook."""
944
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000945 def __init__(self, fp):
946 self.fp = fp
947 self.read = self.fp.read
948 self.readline = self.fp.readline
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000949 if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines
Georg Brandl1f663572005-11-26 16:50:44 +0000950 if hasattr(self.fp, "fileno"):
951 self.fileno = self.fp.fileno
952 else:
953 self.fileno = lambda: None
Raymond Hettinger42182eb2003-03-09 05:33:33 +0000954 if hasattr(self.fp, "__iter__"):
955 self.__iter__ = self.fp.__iter__
956 if hasattr(self.fp, "next"):
957 self.next = self.fp.next
Guido van Rossume7b146f2000-02-04 15:28:42 +0000958
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000959 def __repr__(self):
Walter Dörwald70a6b492004-02-12 17:35:32 +0000960 return '<%s at %r whose fp = %r>' % (self.__class__.__name__,
961 id(self), self.fp)
Guido van Rossume7b146f2000-02-04 15:28:42 +0000962
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000963 def close(self):
964 self.read = None
965 self.readline = None
966 self.readlines = None
967 self.fileno = None
968 if self.fp: self.fp.close()
969 self.fp = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000970
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000971class addclosehook(addbase):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000972 """Class to add a close hook to an open file."""
973
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000974 def __init__(self, fp, closehook, *hookargs):
975 addbase.__init__(self, fp)
976 self.closehook = closehook
977 self.hookargs = hookargs
Guido van Rossume7b146f2000-02-04 15:28:42 +0000978
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000979 def close(self):
Guido van Rossumc580dae2000-05-24 13:21:46 +0000980 addbase.close(self)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000981 if self.closehook:
Guido van Rossum68468eb2003-02-27 20:14:51 +0000982 self.closehook(*self.hookargs)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000983 self.closehook = None
984 self.hookargs = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000985
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000986class addinfo(addbase):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000987 """class to add an info() method to an open file."""
988
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000989 def __init__(self, fp, headers):
990 addbase.__init__(self, fp)
991 self.headers = headers
Guido van Rossume7b146f2000-02-04 15:28:42 +0000992
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000993 def info(self):
994 return self.headers
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000995
Guido van Rossume6ad8911996-09-10 17:02:56 +0000996class addinfourl(addbase):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000997 """class to add info() and geturl() methods to an open file."""
998
Georg Brandl9b0d46d2008-01-20 11:43:03 +0000999 def __init__(self, fp, headers, url, code=None):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001000 addbase.__init__(self, fp)
1001 self.headers = headers
1002 self.url = url
Georg Brandl9b0d46d2008-01-20 11:43:03 +00001003 self.code = code
Guido van Rossume7b146f2000-02-04 15:28:42 +00001004
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001005 def info(self):
1006 return self.headers
Guido van Rossume7b146f2000-02-04 15:28:42 +00001007
Georg Brandl9b0d46d2008-01-20 11:43:03 +00001008 def getcode(self):
1009 return self.code
1010
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001011 def geturl(self):
1012 return self.url
Guido van Rossume6ad8911996-09-10 17:02:56 +00001013
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001014
Guido van Rossum7c395db1994-07-04 22:14:49 +00001015# Utilities to parse URLs (most of these return None for missing parts):
Sjoerd Mullendere0371b81995-11-10 10:36:07 +00001016# unwrap('<URL:type://host/path>') --> 'type://host/path'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001017# splittype('type:opaquestring') --> 'type', 'opaquestring'
1018# splithost('//host[:port]/path') --> 'host[:port]', '/path'
Guido van Rossum7c395db1994-07-04 22:14:49 +00001019# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'
1020# splitpasswd('user:passwd') -> 'user', 'passwd'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001021# splitport('host:port') --> 'host', 'port'
1022# splitquery('/path?query') --> '/path', 'query'
1023# splittag('/path#tag') --> '/path', 'tag'
Guido van Rossum7c395db1994-07-04 22:14:49 +00001024# splitattr('/path;attr1=value1;attr2=value2;...') ->
1025# '/path', ['attr1=value1', 'attr2=value2', ...]
1026# splitvalue('attr=value') --> 'attr', 'value'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001027# unquote('abc%20def') -> 'abc def'
1028# quote('abc def') -> 'abc%20def')
1029
Walter Dörwald65230a22002-06-03 15:58:32 +00001030try:
1031 unicode
1032except NameError:
Guido van Rossum4b46c0a2002-05-24 17:58:05 +00001033 def _is_unicode(x):
1034 return 0
Walter Dörwald65230a22002-06-03 15:58:32 +00001035else:
1036 def _is_unicode(x):
1037 return isinstance(x, unicode)
Guido van Rossum4b46c0a2002-05-24 17:58:05 +00001038
Martin v. Löwis1d994332000-12-03 18:30:10 +00001039def toBytes(url):
1040 """toBytes(u"URL") --> 'URL'."""
1041 # Most URL schemes require ASCII. If that changes, the conversion
1042 # can be relaxed
Guido van Rossum4b46c0a2002-05-24 17:58:05 +00001043 if _is_unicode(url):
Martin v. Löwis1d994332000-12-03 18:30:10 +00001044 try:
1045 url = url.encode("ASCII")
1046 except UnicodeError:
Guido van Rossumb2493f82000-12-15 15:01:37 +00001047 raise UnicodeError("URL " + repr(url) +
1048 " contains non-ASCII characters")
Martin v. Löwis1d994332000-12-03 18:30:10 +00001049 return url
1050
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001051def unwrap(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001052 """unwrap('<URL:type://host/path>') --> 'type://host/path'."""
Guido van Rossumb2493f82000-12-15 15:01:37 +00001053 url = url.strip()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001054 if url[:1] == '<' and url[-1:] == '>':
Guido van Rossumb2493f82000-12-15 15:01:37 +00001055 url = url[1:-1].strip()
1056 if url[:4] == 'URL:': url = url[4:].strip()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001057 return url
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001058
Guido van Rossum332e1441997-09-29 23:23:46 +00001059_typeprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001060def splittype(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001061 """splittype('type:opaquestring') --> 'type', 'opaquestring'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001062 global _typeprog
1063 if _typeprog is None:
1064 import re
1065 _typeprog = re.compile('^([^/:]+):')
Guido van Rossum332e1441997-09-29 23:23:46 +00001066
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001067 match = _typeprog.match(url)
1068 if match:
1069 scheme = match.group(1)
Fred Drake9e94afd2000-07-01 07:03:30 +00001070 return scheme.lower(), url[len(scheme) + 1:]
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001071 return None, url
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001072
Guido van Rossum332e1441997-09-29 23:23:46 +00001073_hostprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001074def splithost(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001075 """splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001076 global _hostprog
1077 if _hostprog is None:
1078 import re
Georg Brandl1c168d82006-03-26 20:59:38 +00001079 _hostprog = re.compile('^//([^/?]*)(.*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +00001080
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001081 match = _hostprog.match(url)
Senthil Kumaran0b7cac12010-11-22 05:04:33 +00001082 if match:
1083 host_port = match.group(1)
1084 path = match.group(2)
1085 if path and not path.startswith('/'):
1086 path = '/' + path
1087 return host_port, path
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001088 return None, url
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001089
Guido van Rossum332e1441997-09-29 23:23:46 +00001090_userprog = None
Guido van Rossum7c395db1994-07-04 22:14:49 +00001091def splituser(host):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001092 """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001093 global _userprog
1094 if _userprog is None:
1095 import re
Raymond Hettingerf2e45dd2002-08-18 20:08:56 +00001096 _userprog = re.compile('^(.*)@(.*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +00001097
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001098 match = _userprog.match(host)
Senthil Kumaran9fce5512010-11-20 11:24:08 +00001099 if match: return match.group(1, 2)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001100 return None, host
Guido van Rossum7c395db1994-07-04 22:14:49 +00001101
Guido van Rossum332e1441997-09-29 23:23:46 +00001102_passwdprog = None
Guido van Rossum7c395db1994-07-04 22:14:49 +00001103def splitpasswd(user):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001104 """splitpasswd('user:passwd') -> 'user', 'passwd'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001105 global _passwdprog
1106 if _passwdprog is None:
1107 import re
Senthil Kumaran5e95e762009-03-30 21:51:50 +00001108 _passwdprog = re.compile('^([^:]*):(.*)$',re.S)
Guido van Rossum332e1441997-09-29 23:23:46 +00001109
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001110 match = _passwdprog.match(user)
1111 if match: return match.group(1, 2)
1112 return user, None
Guido van Rossum7c395db1994-07-04 22:14:49 +00001113
Guido van Rossume7b146f2000-02-04 15:28:42 +00001114# splittag('/path#tag') --> '/path', 'tag'
Guido van Rossum332e1441997-09-29 23:23:46 +00001115_portprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001116def splitport(host):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001117 """splitport('host:port') --> 'host', 'port'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001118 global _portprog
1119 if _portprog is None:
1120 import re
1121 _portprog = re.compile('^(.*):([0-9]+)$')
Guido van Rossum332e1441997-09-29 23:23:46 +00001122
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001123 match = _portprog.match(host)
1124 if match: return match.group(1, 2)
1125 return host, None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001126
Guido van Rossum332e1441997-09-29 23:23:46 +00001127_nportprog = None
Guido van Rossum53725a21996-06-13 19:12:35 +00001128def splitnport(host, defport=-1):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001129 """Split host and port, returning numeric port.
1130 Return given default port if no ':' found; defaults to -1.
1131 Return numerical port if a valid number are found after ':'.
1132 Return None if ':' but not a valid number."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001133 global _nportprog
1134 if _nportprog is None:
1135 import re
1136 _nportprog = re.compile('^(.*):(.*)$')
Guido van Rossum7e7ca0b1998-03-26 21:01:39 +00001137
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001138 match = _nportprog.match(host)
1139 if match:
1140 host, port = match.group(1, 2)
1141 try:
Guido van Rossumb2493f82000-12-15 15:01:37 +00001142 if not port: raise ValueError, "no digits"
1143 nport = int(port)
1144 except ValueError:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001145 nport = None
1146 return host, nport
1147 return host, defport
Guido van Rossum53725a21996-06-13 19:12:35 +00001148
Guido van Rossum332e1441997-09-29 23:23:46 +00001149_queryprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001150def splitquery(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001151 """splitquery('/path?query') --> '/path', 'query'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001152 global _queryprog
1153 if _queryprog is None:
1154 import re
1155 _queryprog = re.compile('^(.*)\?([^?]*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +00001156
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001157 match = _queryprog.match(url)
1158 if match: return match.group(1, 2)
1159 return url, None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001160
Guido van Rossum332e1441997-09-29 23:23:46 +00001161_tagprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001162def splittag(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001163 """splittag('/path#tag') --> '/path', 'tag'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001164 global _tagprog
1165 if _tagprog is None:
1166 import re
1167 _tagprog = re.compile('^(.*)#([^#]*)$')
Guido van Rossum7e7ca0b1998-03-26 21:01:39 +00001168
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001169 match = _tagprog.match(url)
1170 if match: return match.group(1, 2)
1171 return url, None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001172
Guido van Rossum7c395db1994-07-04 22:14:49 +00001173def splitattr(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001174 """splitattr('/path;attr1=value1;attr2=value2;...') ->
1175 '/path', ['attr1=value1', 'attr2=value2', ...]."""
Guido van Rossumb2493f82000-12-15 15:01:37 +00001176 words = url.split(';')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001177 return words[0], words[1:]
Guido van Rossum7c395db1994-07-04 22:14:49 +00001178
Guido van Rossum332e1441997-09-29 23:23:46 +00001179_valueprog = None
Guido van Rossum7c395db1994-07-04 22:14:49 +00001180def splitvalue(attr):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001181 """splitvalue('attr=value') --> 'attr', 'value'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001182 global _valueprog
1183 if _valueprog is None:
1184 import re
1185 _valueprog = re.compile('^([^=]*)=(.*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +00001186
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001187 match = _valueprog.match(attr)
1188 if match: return match.group(1, 2)
1189 return attr, None
Guido van Rossum7c395db1994-07-04 22:14:49 +00001190
R. David Murraybfbdefe2010-05-25 15:20:46 +00001191# urlparse contains a duplicate of this method to avoid a circular import. If
1192# you update this method, also update the copy in urlparse. This code
1193# duplication does not exist in Python3.
1194
Senthil Kumaranf3e9b2a2010-03-18 12:14:15 +00001195_hexdig = '0123456789ABCDEFabcdef'
Florent Xiclunae127e242010-05-17 10:39:07 +00001196_hextochr = dict((a + b, chr(int(a + b, 16)))
1197 for a in _hexdig for b in _hexdig)
Raymond Hettinger803ce802005-09-10 06:49:04 +00001198
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001199def unquote(s):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001200 """unquote('abc%20def') -> 'abc def'."""
Raymond Hettinger803ce802005-09-10 06:49:04 +00001201 res = s.split('%')
Florent Xiclunaaf87f9f2010-05-17 13:35:09 +00001202 # fastpath
1203 if len(res) == 1:
1204 return s
1205 s = res[0]
1206 for item in res[1:]:
Raymond Hettinger803ce802005-09-10 06:49:04 +00001207 try:
Florent Xiclunaaf87f9f2010-05-17 13:35:09 +00001208 s += _hextochr[item[:2]] + item[2:]
Raymond Hettinger803ce802005-09-10 06:49:04 +00001209 except KeyError:
Florent Xiclunaaf87f9f2010-05-17 13:35:09 +00001210 s += '%' + item
Raymond Hettinger4b0f20d2005-10-15 16:41:53 +00001211 except UnicodeDecodeError:
Florent Xiclunaaf87f9f2010-05-17 13:35:09 +00001212 s += unichr(int(item[:2], 16)) + item[2:]
1213 return s
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001214
Guido van Rossum0564e121996-12-13 14:47:36 +00001215def unquote_plus(s):
Skip Montanaro79f1c172000-08-22 03:00:52 +00001216 """unquote('%7e/abc+def') -> '~/abc def'"""
Brett Cannonaaeffaf2004-03-23 23:50:17 +00001217 s = s.replace('+', ' ')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001218 return unquote(s)
Guido van Rossum0564e121996-12-13 14:47:36 +00001219
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001220always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
Jeremy Hylton6102e292000-08-31 15:48:10 +00001221 'abcdefghijklmnopqrstuvwxyz'
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001222 '0123456789' '_.-')
Florent Xiclunaaf87f9f2010-05-17 13:35:09 +00001223_safe_map = {}
1224for i, c in zip(xrange(256), str(bytearray(xrange(256)))):
1225 _safe_map[c] = c if (i < 128 and c in always_safe) else '%{:02X}'.format(i)
1226_safe_quoters = {}
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001227
Senthil Kumaran880685f2010-07-22 01:47:30 +00001228def quote(s, safe='/'):
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001229 """quote('abc def') -> 'abc%20def'
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001230
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001231 Each part of a URL, e.g. the path info, the query, etc., has a
1232 different set of reserved characters that must be quoted.
1233
1234 RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
1235 the following reserved characters.
1236
1237 reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
1238 "$" | ","
1239
1240 Each of these characters is reserved in some component of a URL,
1241 but not necessarily in all of them.
1242
1243 By default, the quote function is intended for quoting the path
1244 section of a URL. Thus, it will not encode '/'. This character
1245 is reserved, but in typical usage the quote function is being
1246 called on a path where the existing slash characters are used as
1247 reserved characters.
1248 """
Florent Xiclunaaf87f9f2010-05-17 13:35:09 +00001249 # fastpath
1250 if not s:
Senthil Kumaranc7743aa2010-07-19 17:35:50 +00001251 if s is None:
1252 raise TypeError('None object cannot be quoted')
Florent Xiclunaaf87f9f2010-05-17 13:35:09 +00001253 return s
Raymond Hettinger199d2f72005-09-09 22:27:13 +00001254 cachekey = (safe, always_safe)
1255 try:
Florent Xiclunaaf87f9f2010-05-17 13:35:09 +00001256 (quoter, safe) = _safe_quoters[cachekey]
Raymond Hettinger199d2f72005-09-09 22:27:13 +00001257 except KeyError:
Florent Xiclunaaf87f9f2010-05-17 13:35:09 +00001258 safe_map = _safe_map.copy()
1259 safe_map.update([(c, c) for c in safe])
1260 quoter = safe_map.__getitem__
1261 safe = always_safe + safe
1262 _safe_quoters[cachekey] = (quoter, safe)
1263 if not s.rstrip(safe):
1264 return s
1265 return ''.join(map(quoter, s))
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001266
Senthil Kumaran880685f2010-07-22 01:47:30 +00001267def quote_plus(s, safe=''):
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001268 """Quote the query fragment of a URL; replacing ' ' with '+'"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001269 if ' ' in s:
Senthil Kumaran880685f2010-07-22 01:47:30 +00001270 s = quote(s, safe + ' ')
Raymond Hettingercf6b6322005-09-10 18:17:54 +00001271 return s.replace(' ', '+')
Senthil Kumaran880685f2010-07-22 01:47:30 +00001272 return quote(s, safe)
Guido van Rossum0564e121996-12-13 14:47:36 +00001273
Florent Xiclunae127e242010-05-17 10:39:07 +00001274def urlencode(query, doseq=0):
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001275 """Encode a sequence of two-element tuples or dictionary into a URL query string.
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001276
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001277 If any values in the query arg are sequences and doseq is true, each
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001278 sequence element is converted to a separate parameter.
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001279
1280 If the query arg is a sequence of two-element tuples, the order of the
1281 parameters in the output will match the order of parameters in the
1282 input.
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001283 """
Tim Peters658cba62001-02-09 20:06:00 +00001284
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001285 if hasattr(query,"items"):
1286 # mapping objects
1287 query = query.items()
1288 else:
1289 # it's a bother at times that strings and string-like objects are
1290 # sequences...
1291 try:
1292 # non-sequence items should not work with len()
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001293 # non-empty strings will fail this
Walter Dörwald65230a22002-06-03 15:58:32 +00001294 if len(query) and not isinstance(query[0], tuple):
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001295 raise TypeError
1296 # zero-length sequences of all types will get here and succeed,
1297 # but that's a minor nit - since the original implementation
1298 # allowed empty dicts that type of behavior probably should be
1299 # preserved for consistency
1300 except TypeError:
1301 ty,va,tb = sys.exc_info()
1302 raise TypeError, "not a valid non-string sequence or mapping object", tb
1303
Guido van Rossume7b146f2000-02-04 15:28:42 +00001304 l = []
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001305 if not doseq:
1306 # preserve old behavior
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001307 for k, v in query:
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001308 k = quote_plus(str(k))
1309 v = quote_plus(str(v))
1310 l.append(k + '=' + v)
1311 else:
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001312 for k, v in query:
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001313 k = quote_plus(str(k))
Walter Dörwald65230a22002-06-03 15:58:32 +00001314 if isinstance(v, str):
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001315 v = quote_plus(v)
1316 l.append(k + '=' + v)
Guido van Rossum4b46c0a2002-05-24 17:58:05 +00001317 elif _is_unicode(v):
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001318 # is there a reasonable way to convert to ASCII?
1319 # encode generates a string, but "replace" or "ignore"
1320 # lose information and "strict" can raise UnicodeError
1321 v = quote_plus(v.encode("ASCII","replace"))
1322 l.append(k + '=' + v)
1323 else:
1324 try:
1325 # is this a sufficient test for sequence-ness?
Georg Brandl84fedf72010-02-06 22:59:15 +00001326 len(v)
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001327 except TypeError:
1328 # not a sequence
1329 v = quote_plus(str(v))
1330 l.append(k + '=' + v)
1331 else:
1332 # loop over the sequence
1333 for elt in v:
1334 l.append(k + '=' + quote_plus(str(elt)))
Guido van Rossumb2493f82000-12-15 15:01:37 +00001335 return '&'.join(l)
Guido van Rossum810a3391998-07-22 21:33:23 +00001336
Guido van Rossum442e7201996-03-20 15:33:11 +00001337# Proxy handling
Mark Hammond4f570b92000-07-26 07:04:38 +00001338def getproxies_environment():
1339 """Return a dictionary of scheme -> proxy server URL mappings.
1340
1341 Scan the environment for variables named <scheme>_proxy;
1342 this seems to be the standard convention. If you need a
1343 different way, you can pass a proxies dictionary to the
1344 [Fancy]URLopener constructor.
1345
1346 """
1347 proxies = {}
1348 for name, value in os.environ.items():
Guido van Rossumb2493f82000-12-15 15:01:37 +00001349 name = name.lower()
Mark Hammond4f570b92000-07-26 07:04:38 +00001350 if value and name[-6:] == '_proxy':
1351 proxies[name[:-6]] = value
1352 return proxies
1353
Georg Brandl22350112008-01-20 12:05:43 +00001354def proxy_bypass_environment(host):
1355 """Test if proxies should not be used for a particular host.
1356
1357 Checks the environment for a variable named no_proxy, which should
1358 be a list of DNS suffixes separated by commas, or '*' for all hosts.
1359 """
1360 no_proxy = os.environ.get('no_proxy', '') or os.environ.get('NO_PROXY', '')
1361 # '*' is special case for always bypass
1362 if no_proxy == '*':
1363 return 1
1364 # strip port off host
1365 hostonly, port = splitport(host)
1366 # check if the host ends with any of the DNS suffixes
Senthil Kumaranb5bd4c82011-08-06 12:24:33 +08001367 no_proxy_list = [proxy.strip() for proxy in no_proxy.split(',')]
1368 for name in no_proxy_list:
Georg Brandl22350112008-01-20 12:05:43 +00001369 if name and (hostonly.endswith(name) or host.endswith(name)):
1370 return 1
1371 # otherwise, don't bypass
1372 return 0
1373
1374
Jack Jansen11d9b062004-07-16 11:45:00 +00001375if sys.platform == 'darwin':
Ronald Oussoren51f06332009-09-20 10:31:22 +00001376 from _scproxy import _get_proxy_settings, _get_proxies
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001377
1378 def proxy_bypass_macosx_sysconf(host):
1379 """
1380 Return True iff this host shouldn't be accessed using a proxy
1381
1382 This function uses the MacOSX framework SystemConfiguration
1383 to fetch the proxy information.
1384 """
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001385 import re
1386 import socket
1387 from fnmatch import fnmatch
1388
Ronald Oussoren31802d02009-10-18 07:07:00 +00001389 hostonly, port = splitport(host)
1390
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001391 def ip2num(ipAddr):
1392 parts = ipAddr.split('.')
1393 parts = map(int, parts)
1394 if len(parts) != 4:
1395 parts = (parts + [0, 0, 0, 0])[:4]
1396 return (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3]
1397
Ronald Oussoren51f06332009-09-20 10:31:22 +00001398 proxy_settings = _get_proxy_settings()
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001399
Ronald Oussoren51f06332009-09-20 10:31:22 +00001400 # Check for simple host names:
1401 if '.' not in host:
1402 if proxy_settings['exclude_simple']:
1403 return True
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001404
Ronald Oussoren31802d02009-10-18 07:07:00 +00001405 hostIP = None
1406
Ronald Oussoren809073b2009-09-20 10:54:07 +00001407 for value in proxy_settings.get('exceptions', ()):
Ronald Oussoren51f06332009-09-20 10:31:22 +00001408 # Items in the list are strings like these: *.local, 169.254/16
Ronald Oussoren51f06332009-09-20 10:31:22 +00001409 if not value: continue
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001410
Ronald Oussoren51f06332009-09-20 10:31:22 +00001411 m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value)
1412 if m is not None:
1413 if hostIP is None:
Ronald Oussoren31802d02009-10-18 07:07:00 +00001414 try:
1415 hostIP = socket.gethostbyname(hostonly)
1416 hostIP = ip2num(hostIP)
1417 except socket.error:
1418 continue
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001419
Ronald Oussoren51f06332009-09-20 10:31:22 +00001420 base = ip2num(m.group(1))
Ronald Oussorenb96fbb82010-06-27 13:59:39 +00001421 mask = m.group(2)
1422 if mask is None:
1423 mask = 8 * (m.group(1).count('.') + 1)
1424
1425 else:
1426 mask = int(mask[1:])
Ronald Oussoren1aa999c2011-03-14 18:53:59 -04001427 mask = 32 - mask
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001428
Ronald Oussoren51f06332009-09-20 10:31:22 +00001429 if (hostIP >> mask) == (base >> mask):
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001430 return True
1431
Ronald Oussoren51f06332009-09-20 10:31:22 +00001432 elif fnmatch(host, value):
1433 return True
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001434
Ronald Oussoren51f06332009-09-20 10:31:22 +00001435 return False
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001436
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001437 def getproxies_macosx_sysconf():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001438 """Return a dictionary of scheme -> proxy server URL mappings.
Guido van Rossum442e7201996-03-20 15:33:11 +00001439
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001440 This function uses the MacOSX framework SystemConfiguration
1441 to fetch the proxy information.
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001442 """
Ronald Oussoren51f06332009-09-20 10:31:22 +00001443 return _get_proxies()
Mark Hammond4f570b92000-07-26 07:04:38 +00001444
Georg Brandl22350112008-01-20 12:05:43 +00001445 def proxy_bypass(host):
1446 if getproxies_environment():
1447 return proxy_bypass_environment(host)
1448 else:
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001449 return proxy_bypass_macosx_sysconf(host)
Tim Peters55c12d42001-08-09 18:04:14 +00001450
Jack Jansen11d9b062004-07-16 11:45:00 +00001451 def getproxies():
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001452 return getproxies_environment() or getproxies_macosx_sysconf()
Tim Peters182b5ac2004-07-18 06:16:08 +00001453
Mark Hammond4f570b92000-07-26 07:04:38 +00001454elif os.name == 'nt':
1455 def getproxies_registry():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001456 """Return a dictionary of scheme -> proxy server URL mappings.
Mark Hammond4f570b92000-07-26 07:04:38 +00001457
1458 Win32 uses the registry to store proxies.
1459
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001460 """
1461 proxies = {}
Mark Hammond4f570b92000-07-26 07:04:38 +00001462 try:
1463 import _winreg
1464 except ImportError:
1465 # Std module, so should be around - but you never know!
1466 return proxies
1467 try:
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001468 internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1469 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
Mark Hammond4f570b92000-07-26 07:04:38 +00001470 proxyEnable = _winreg.QueryValueEx(internetSettings,
1471 'ProxyEnable')[0]
1472 if proxyEnable:
1473 # Returned as Unicode but problems if not converted to ASCII
1474 proxyServer = str(_winreg.QueryValueEx(internetSettings,
1475 'ProxyServer')[0])
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001476 if '=' in proxyServer:
1477 # Per-protocol settings
Mark Hammond4f570b92000-07-26 07:04:38 +00001478 for p in proxyServer.split(';'):
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001479 protocol, address = p.split('=', 1)
Guido van Rossumb955d6c2002-03-31 23:38:48 +00001480 # See if address has a type:// prefix
Guido van Rossum64e5aa92002-04-02 14:38:16 +00001481 import re
1482 if not re.match('^([^/:]+)://', address):
Guido van Rossumb955d6c2002-03-31 23:38:48 +00001483 address = '%s://%s' % (protocol, address)
1484 proxies[protocol] = address
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001485 else:
1486 # Use one setting for all protocols
1487 if proxyServer[:5] == 'http:':
1488 proxies['http'] = proxyServer
1489 else:
1490 proxies['http'] = 'http://%s' % proxyServer
Senthil Kumaran0fdd3852010-07-14 20:22:17 +00001491 proxies['https'] = 'https://%s' % proxyServer
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001492 proxies['ftp'] = 'ftp://%s' % proxyServer
Mark Hammond4f570b92000-07-26 07:04:38 +00001493 internetSettings.Close()
1494 except (WindowsError, ValueError, TypeError):
1495 # Either registry key not found etc, or the value in an
1496 # unexpected format.
1497 # proxies already set up to be empty so nothing to do
1498 pass
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001499 return proxies
Guido van Rossum442e7201996-03-20 15:33:11 +00001500
Mark Hammond4f570b92000-07-26 07:04:38 +00001501 def getproxies():
1502 """Return a dictionary of scheme -> proxy server URL mappings.
1503
1504 Returns settings gathered from the environment, if specified,
1505 or the registry.
1506
1507 """
1508 return getproxies_environment() or getproxies_registry()
Tim Peters55c12d42001-08-09 18:04:14 +00001509
Georg Brandl22350112008-01-20 12:05:43 +00001510 def proxy_bypass_registry(host):
Tim Peters55c12d42001-08-09 18:04:14 +00001511 try:
1512 import _winreg
1513 import re
Tim Peters55c12d42001-08-09 18:04:14 +00001514 except ImportError:
1515 # Std modules, so should be around - but you never know!
1516 return 0
1517 try:
1518 internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1519 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1520 proxyEnable = _winreg.QueryValueEx(internetSettings,
1521 'ProxyEnable')[0]
1522 proxyOverride = str(_winreg.QueryValueEx(internetSettings,
1523 'ProxyOverride')[0])
1524 # ^^^^ Returned as Unicode but problems if not converted to ASCII
1525 except WindowsError:
1526 return 0
1527 if not proxyEnable or not proxyOverride:
1528 return 0
1529 # try to make a host list from name and IP address.
Georg Brandl1f636702006-02-18 23:10:23 +00001530 rawHost, port = splitport(host)
1531 host = [rawHost]
Tim Peters55c12d42001-08-09 18:04:14 +00001532 try:
Georg Brandl1f636702006-02-18 23:10:23 +00001533 addr = socket.gethostbyname(rawHost)
1534 if addr != rawHost:
Tim Peters55c12d42001-08-09 18:04:14 +00001535 host.append(addr)
1536 except socket.error:
1537 pass
Georg Brandl1f636702006-02-18 23:10:23 +00001538 try:
1539 fqdn = socket.getfqdn(rawHost)
1540 if fqdn != rawHost:
1541 host.append(fqdn)
1542 except socket.error:
1543 pass
Tim Peters55c12d42001-08-09 18:04:14 +00001544 # make a check value list from the registry entry: replace the
1545 # '<local>' string by the localhost entry and the corresponding
1546 # canonical entry.
1547 proxyOverride = proxyOverride.split(';')
Tim Peters55c12d42001-08-09 18:04:14 +00001548 # now check if we match one of the registry values.
1549 for test in proxyOverride:
Senthil Kumaran4af40d22009-05-01 05:59:52 +00001550 if test == '<local>':
1551 if '.' not in rawHost:
1552 return 1
Tim Petersab9ba272001-08-09 21:40:30 +00001553 test = test.replace(".", r"\.") # mask dots
1554 test = test.replace("*", r".*") # change glob sequence
1555 test = test.replace("?", r".") # change glob char
Tim Peters55c12d42001-08-09 18:04:14 +00001556 for val in host:
1557 # print "%s <--> %s" %( test, val )
1558 if re.match(test, val, re.I):
1559 return 1
1560 return 0
1561
Georg Brandl22350112008-01-20 12:05:43 +00001562 def proxy_bypass(host):
1563 """Return a dictionary of scheme -> proxy server URL mappings.
1564
1565 Returns settings gathered from the environment, if specified,
1566 or the registry.
1567
1568 """
1569 if getproxies_environment():
1570 return proxy_bypass_environment(host)
1571 else:
1572 return proxy_bypass_registry(host)
1573
Mark Hammond4f570b92000-07-26 07:04:38 +00001574else:
1575 # By default use environment variables
1576 getproxies = getproxies_environment
Georg Brandl22350112008-01-20 12:05:43 +00001577 proxy_bypass = proxy_bypass_environment
Guido van Rossum442e7201996-03-20 15:33:11 +00001578
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001579# Test and time quote() and unquote()
1580def test1():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001581 s = ''
1582 for i in range(256): s = s + chr(i)
1583 s = s*4
1584 t0 = time.time()
1585 qs = quote(s)
1586 uqs = unquote(qs)
1587 t1 = time.time()
1588 if uqs != s:
1589 print 'Wrong!'
Walter Dörwald70a6b492004-02-12 17:35:32 +00001590 print repr(s)
1591 print repr(qs)
1592 print repr(uqs)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001593 print round(t1 - t0, 3), 'sec'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001594
1595
Guido van Rossum9ab96d41998-09-28 14:07:00 +00001596def reporthook(blocknum, blocksize, totalsize):
1597 # Report during remote transfers
Guido van Rossumb2493f82000-12-15 15:01:37 +00001598 print "Block number: %d, Block size: %d, Total size: %d" % (
1599 blocknum, blocksize, totalsize)