blob: a56f16273ab122d906f1c0ef85e484ef4a8ad215 [file] [log] [blame]
Guido van Rossume7b146f2000-02-04 15:28:42 +00001"""Open an arbitrary URL.
2
3See the following document for more info on URLs:
4"Names and Addresses, URIs, URLs, URNs, URCs", at
5http://www.w3.org/pub/WWW/Addressing/Overview.html
6
7See also the HTTP spec (from which the error codes are derived):
8"HTTP - Hypertext Transfer Protocol", at
9http://www.w3.org/pub/WWW/Protocols/
10
11Related standards and specs:
12- RFC1808: the "relative URL" spec. (authoritative status)
13- RFC1738 - the "URL standard". (authoritative status)
14- RFC1630 - the "URI spec". (informational status)
15
16The object returned by URLopener().open(file) will differ per
17protocol. All you know is that is has methods read(), readline(),
18readlines(), fileno(), close() and info(). The read*(), fileno()
Fredrik Lundhb49f88b2000-09-24 18:51:25 +000019and close() methods work like those of open files.
Guido van Rossume7b146f2000-02-04 15:28:42 +000020The info() method returns a mimetools.Message object which can be
21used to query various info about the object, if available.
22(mimetools.Message objects are queried with the getheader() method.)
23"""
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000024
Guido van Rossum7c395db1994-07-04 22:14:49 +000025import string
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000026import socket
Jack Jansendc3e3f61995-12-15 13:22:13 +000027import os
Guido van Rossumf0713d32001-08-09 17:43:35 +000028import time
Guido van Rossum3c8484e1996-11-20 22:02:24 +000029import sys
Brett Cannon69200fa2004-03-23 21:26:39 +000030from urlparse import urljoin as basejoin
Brett Cannon8bb8fa52008-07-02 01:57:08 +000031import warnings
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000032
Skip Montanaro40fc1602001-03-01 04:27:19 +000033__all__ = ["urlopen", "URLopener", "FancyURLopener", "urlretrieve",
34 "urlcleanup", "quote", "quote_plus", "unquote", "unquote_plus",
Skip Montanaro44d5e0c2001-03-13 19:47:16 +000035 "urlencode", "url2pathname", "pathname2url", "splittag",
36 "localhost", "thishost", "ftperrors", "basejoin", "unwrap",
37 "splittype", "splithost", "splituser", "splitpasswd", "splitport",
38 "splitnport", "splitquery", "splitattr", "splitvalue",
Brett Cannond75f0432007-05-16 22:42:29 +000039 "getproxies"]
Skip Montanaro40fc1602001-03-01 04:27:19 +000040
Martin v. Löwis3e865952006-01-24 15:51:21 +000041__version__ = '1.17' # XXX This version is not always updated :-(
Guido van Rossumf668d171997-06-06 21:11:11 +000042
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000043MAXFTPCACHE = 10 # Trim the ftp cache beyond this size
Guido van Rossum6cb15a01995-06-22 19:00:13 +000044
Jack Jansendc3e3f61995-12-15 13:22:13 +000045# Helper for non-unix systems
46if os.name == 'mac':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000047 from macurl2path import url2pathname, pathname2url
Guido van Rossum7e7ca0b1998-03-26 21:01:39 +000048elif os.name == 'nt':
Fredrik Lundhb49f88b2000-09-24 18:51:25 +000049 from nturl2path import url2pathname, pathname2url
Guido van Rossumd74fb6b2001-03-02 06:43:49 +000050elif os.name == 'riscos':
51 from rourl2path import url2pathname, pathname2url
Jack Jansendc3e3f61995-12-15 13:22:13 +000052else:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000053 def url2pathname(pathname):
Georg Brandlc0b24732005-12-26 22:53:56 +000054 """OS-specific conversion from a relative URL of the 'file' scheme
55 to a file system path; not recommended for general use."""
Guido van Rossum367ac801999-03-12 14:31:10 +000056 return unquote(pathname)
Georg Brandlc0b24732005-12-26 22:53:56 +000057
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000058 def pathname2url(pathname):
Georg Brandlc0b24732005-12-26 22:53:56 +000059 """OS-specific conversion from a file system path to a relative URL
60 of the 'file' scheme; not recommended for general use."""
Guido van Rossum367ac801999-03-12 14:31:10 +000061 return quote(pathname)
Guido van Rossum33add0a1998-12-18 15:25:22 +000062
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000063# This really consists of two pieces:
64# (1) a class which handles opening of all sorts of URLs
65# (plus assorted utilities etc.)
66# (2) a set of functions for parsing URLs
67# XXX Should these be separated out into different modules?
68
69
70# Shortcut for basic usage
71_urlopener = None
Fred Drakedf6eca72002-04-04 20:41:34 +000072def urlopen(url, data=None, proxies=None):
Brett Cannon8bb8fa52008-07-02 01:57:08 +000073 """Create a file-like object for the specified URL to read from."""
74 from warnings import warnpy3k
75 warnings.warnpy3k("urllib.urlopen() has been removed in Python 3.0 in "
76 "favor of urllib2.urlopen()", stacklevel=2)
77
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000078 global _urlopener
Fred Drakedf6eca72002-04-04 20:41:34 +000079 if proxies is not None:
80 opener = FancyURLopener(proxies=proxies)
81 elif not _urlopener:
82 opener = FancyURLopener()
83 _urlopener = opener
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000084 else:
Fred Drakedf6eca72002-04-04 20:41:34 +000085 opener = _urlopener
86 if data is None:
87 return opener.open(url)
88 else:
89 return opener.open(url, data)
Fred Drake316a7932000-08-24 01:01:26 +000090def urlretrieve(url, filename=None, reporthook=None, data=None):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000091 global _urlopener
92 if not _urlopener:
93 _urlopener = FancyURLopener()
Fred Drake316a7932000-08-24 01:01:26 +000094 return _urlopener.retrieve(url, filename, reporthook, data)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000095def urlcleanup():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000096 if _urlopener:
97 _urlopener.cleanup()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000098
Bill Janssen426ea0a2007-08-29 22:35:05 +000099# check for SSL
100try:
101 import ssl
102except:
103 _have_ssl = False
104else:
105 _have_ssl = True
106
Georg Brandlb9256022005-08-24 18:46:39 +0000107# exception raised when downloaded size does not match content-length
108class ContentTooShortError(IOError):
109 def __init__(self, message, content):
110 IOError.__init__(self, message)
111 self.content = content
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000112
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000113ftpcache = {}
114class URLopener:
Guido van Rossume7b146f2000-02-04 15:28:42 +0000115 """Class to open URLs.
116 This is a class rather than just a subroutine because we may need
117 more than one set of global protocol-specific options.
118 Note -- this is a base class for those who don't want the
119 automatic handling of errors type 302 (relocated) and 401
120 (authorization needed)."""
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000121
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000122 __tempfiles = None
Guido van Rossum29e77811996-11-27 19:39:58 +0000123
Guido van Rossumba311382000-08-24 16:18:04 +0000124 version = "Python-urllib/%s" % __version__
125
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000126 # Constructor
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000127 def __init__(self, proxies=None, **x509):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000128 if proxies is None:
129 proxies = getproxies()
130 assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
131 self.proxies = proxies
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000132 self.key_file = x509.get('key_file')
133 self.cert_file = x509.get('cert_file')
Georg Brandl0619a322006-07-26 07:40:17 +0000134 self.addheaders = [('User-Agent', self.version)]
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000135 self.__tempfiles = []
136 self.__unlink = os.unlink # See cleanup()
137 self.tempcache = None
138 # Undocumented feature: if you assign {} to tempcache,
139 # it is used to cache files retrieved with
140 # self.retrieve(). This is not enabled by default
141 # since it does not work for changing documents (and I
142 # haven't got the logic to check expiration headers
143 # yet).
144 self.ftpcache = ftpcache
145 # Undocumented feature: you can use a different
146 # ftp cache by assigning to the .ftpcache member;
147 # in case you want logically independent URL openers
148 # XXX This is not threadsafe. Bah.
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000149
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000150 def __del__(self):
151 self.close()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000152
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000153 def close(self):
154 self.cleanup()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000155
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000156 def cleanup(self):
157 # This code sometimes runs when the rest of this module
158 # has already been deleted, so it can't use any globals
159 # or import anything.
160 if self.__tempfiles:
161 for file in self.__tempfiles:
162 try:
163 self.__unlink(file)
Martin v. Löwis58682b72001-08-11 15:02:57 +0000164 except OSError:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000165 pass
166 del self.__tempfiles[:]
167 if self.tempcache:
168 self.tempcache.clear()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000169
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000170 def addheader(self, *args):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000171 """Add a header to be used by the HTTP interface only
172 e.g. u.addheader('Accept', 'sound/basic')"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000173 self.addheaders.append(args)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000174
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000175 # External interface
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000176 def open(self, fullurl, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000177 """Use URLopener().open(file) instead of open(file, 'r')."""
Martin v. Löwis1d994332000-12-03 18:30:10 +0000178 fullurl = unwrap(toBytes(fullurl))
Senthil Kumaran7c2867f2009-04-21 03:24:19 +0000179 # percent encode url, fixing lame server errors for e.g, like space
180 # within url paths.
181 fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]")
Raymond Hettinger54f02222002-06-01 14:18:47 +0000182 if self.tempcache and fullurl in self.tempcache:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000183 filename, headers = self.tempcache[fullurl]
184 fp = open(filename, 'rb')
185 return addinfourl(fp, headers, fullurl)
Martin v. Löwis1d994332000-12-03 18:30:10 +0000186 urltype, url = splittype(fullurl)
187 if not urltype:
188 urltype = 'file'
Raymond Hettinger54f02222002-06-01 14:18:47 +0000189 if urltype in self.proxies:
Martin v. Löwis1d994332000-12-03 18:30:10 +0000190 proxy = self.proxies[urltype]
191 urltype, proxyhost = splittype(proxy)
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000192 host, selector = splithost(proxyhost)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000193 url = (host, fullurl) # Signal special case to open_*()
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000194 else:
195 proxy = None
Martin v. Löwis1d994332000-12-03 18:30:10 +0000196 name = 'open_' + urltype
197 self.type = urltype
Brett Cannonaaeffaf2004-03-23 23:50:17 +0000198 name = name.replace('-', '_')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000199 if not hasattr(self, name):
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000200 if proxy:
201 return self.open_unknown_proxy(proxy, fullurl, data)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000202 else:
203 return self.open_unknown(fullurl, data)
204 try:
205 if data is None:
206 return getattr(self, name)(url)
207 else:
208 return getattr(self, name)(url, data)
209 except socket.error, msg:
210 raise IOError, ('socket error', msg), sys.exc_info()[2]
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000211
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000212 def open_unknown(self, fullurl, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000213 """Overridable interface to open unknown URL type."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000214 type, url = splittype(fullurl)
215 raise IOError, ('url error', 'unknown url type', type)
Guido van Rossumca445401995-08-29 19:19:12 +0000216
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000217 def open_unknown_proxy(self, proxy, fullurl, data=None):
218 """Overridable interface to open unknown URL type."""
219 type, url = splittype(fullurl)
220 raise IOError, ('url error', 'invalid proxy for %s' % type, proxy)
221
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000222 # External interface
Sjoerd Mullenderd7b86f02000-08-25 11:23:36 +0000223 def retrieve(self, url, filename=None, reporthook=None, data=None):
Brett Cannon7d618c72003-04-24 02:43:20 +0000224 """retrieve(url) returns (filename, headers) for a local object
Guido van Rossume7b146f2000-02-04 15:28:42 +0000225 or (tempfilename, headers) for a remote object."""
Martin v. Löwis1d994332000-12-03 18:30:10 +0000226 url = unwrap(toBytes(url))
Raymond Hettinger54f02222002-06-01 14:18:47 +0000227 if self.tempcache and url in self.tempcache:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000228 return self.tempcache[url]
229 type, url1 = splittype(url)
Raymond Hettinger10ff7062002-06-02 03:04:52 +0000230 if filename is None and (not type or type == 'file'):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000231 try:
232 fp = self.open_local_file(url1)
233 hdrs = fp.info()
234 del fp
235 return url2pathname(splithost(url1)[1]), hdrs
236 except IOError, msg:
237 pass
Fred Drake316a7932000-08-24 01:01:26 +0000238 fp = self.open(url, data)
Benjamin Petersonb364bfe2009-03-22 17:45:11 +0000239 try:
240 headers = fp.info()
241 if filename:
242 tfp = open(filename, 'wb')
243 else:
244 import tempfile
245 garbage, path = splittype(url)
246 garbage, path = splithost(path or "")
247 path, garbage = splitquery(path or "")
248 path, garbage = splitattr(path or "")
249 suffix = os.path.splitext(path)[1]
250 (fd, filename) = tempfile.mkstemp(suffix)
251 self.__tempfiles.append(filename)
252 tfp = os.fdopen(fd, 'wb')
253 try:
254 result = filename, headers
255 if self.tempcache is not None:
256 self.tempcache[url] = result
257 bs = 1024*8
258 size = -1
259 read = 0
260 blocknum = 0
261 if reporthook:
262 if "content-length" in headers:
263 size = int(headers["Content-Length"])
264 reporthook(blocknum, bs, size)
265 while 1:
266 block = fp.read(bs)
267 if block == "":
268 break
269 read += len(block)
270 tfp.write(block)
271 blocknum += 1
272 if reporthook:
273 reporthook(blocknum, bs, size)
274 finally:
275 tfp.close()
276 finally:
277 fp.close()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000278 del fp
279 del tfp
Georg Brandlb9256022005-08-24 18:46:39 +0000280
281 # raise exception if actual size does not match content-length header
282 if size >= 0 and read < size:
283 raise ContentTooShortError("retrieval incomplete: got only %i out "
284 "of %i bytes" % (read, size), result)
285
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000286 return result
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000287
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000288 # Each method named open_<type> knows how to open that type of URL
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000289
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000290 def open_http(self, url, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000291 """Use HTTP protocol."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000292 import httplib
293 user_passwd = None
Martin v. Löwis3e865952006-01-24 15:51:21 +0000294 proxy_passwd= None
Walter Dörwald65230a22002-06-03 15:58:32 +0000295 if isinstance(url, str):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000296 host, selector = splithost(url)
297 if host:
298 user_passwd, host = splituser(host)
299 host = unquote(host)
300 realhost = host
301 else:
302 host, selector = url
Martin v. Löwis3e865952006-01-24 15:51:21 +0000303 # check whether the proxy contains authorization information
304 proxy_passwd, host = splituser(host)
305 # now we proceed with the url we want to obtain
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000306 urltype, rest = splittype(selector)
307 url = rest
308 user_passwd = None
Guido van Rossumb2493f82000-12-15 15:01:37 +0000309 if urltype.lower() != 'http':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000310 realhost = None
311 else:
312 realhost, rest = splithost(rest)
313 if realhost:
314 user_passwd, realhost = splituser(realhost)
315 if user_passwd:
316 selector = "%s://%s%s" % (urltype, realhost, rest)
Tim Peters55c12d42001-08-09 18:04:14 +0000317 if proxy_bypass(realhost):
318 host = realhost
319
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000320 #print "proxy via http:", host, selector
321 if not host: raise IOError, ('http error', 'no host given')
Tim Peters92037a12006-01-24 22:44:08 +0000322
Martin v. Löwis3e865952006-01-24 15:51:21 +0000323 if proxy_passwd:
324 import base64
Andrew M. Kuchling872dba42006-10-27 17:11:23 +0000325 proxy_auth = base64.b64encode(proxy_passwd).strip()
Martin v. Löwis3e865952006-01-24 15:51:21 +0000326 else:
327 proxy_auth = None
328
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000329 if user_passwd:
330 import base64
Andrew M. Kuchling872dba42006-10-27 17:11:23 +0000331 auth = base64.b64encode(user_passwd).strip()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000332 else:
333 auth = None
334 h = httplib.HTTP(host)
335 if data is not None:
336 h.putrequest('POST', selector)
Georg Brandl0619a322006-07-26 07:40:17 +0000337 h.putheader('Content-Type', 'application/x-www-form-urlencoded')
338 h.putheader('Content-Length', '%d' % len(data))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000339 else:
340 h.putrequest('GET', selector)
Martin v. Löwis3e865952006-01-24 15:51:21 +0000341 if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000342 if auth: h.putheader('Authorization', 'Basic %s' % auth)
343 if realhost: h.putheader('Host', realhost)
Guido van Rossum68468eb2003-02-27 20:14:51 +0000344 for args in self.addheaders: h.putheader(*args)
Kristján Valur Jónsson84040db2009-01-09 20:27:16 +0000345 h.endheaders(data)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000346 errcode, errmsg, headers = h.getreply()
Neal Norwitzce55e212007-03-20 08:14:57 +0000347 fp = h.getfile()
Georg Brandlf66b6032007-03-14 08:27:52 +0000348 if errcode == -1:
Neal Norwitzce55e212007-03-20 08:14:57 +0000349 if fp: fp.close()
Georg Brandlf66b6032007-03-14 08:27:52 +0000350 # something went wrong with the HTTP status line
351 raise IOError, ('http protocol error', 0,
352 'got a bad status line', None)
Sean Reifscheidera1afbf62007-09-19 07:52:56 +0000353 # According to RFC 2616, "2xx" code indicates that the client's
354 # request was successfully received, understood, and accepted.
Kurt B. Kaiser0f7c25d2008-01-02 04:11:28 +0000355 if (200 <= errcode < 300):
Georg Brandl9b0d46d2008-01-20 11:43:03 +0000356 return addinfourl(fp, headers, "http:" + url, errcode)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000357 else:
358 if data is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000359 return self.http_error(url, fp, errcode, errmsg, headers)
Guido van Rossum29aab751999-03-09 19:31:21 +0000360 else:
361 return self.http_error(url, fp, errcode, errmsg, headers, data)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000362
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000363 def http_error(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000364 """Handle http errors.
365 Derived class can override this, or provide specific handlers
366 named http_error_DDD where DDD is the 3-digit error code."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000367 # First check if there's a specific handler for this error
368 name = 'http_error_%d' % errcode
369 if hasattr(self, name):
370 method = getattr(self, name)
371 if data is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000372 result = method(url, fp, errcode, errmsg, headers)
Jeremy Hyltonb30f52a1999-02-25 16:14:58 +0000373 else:
374 result = method(url, fp, errcode, errmsg, headers, data)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000375 if result: return result
Jeremy Hyltonb30f52a1999-02-25 16:14:58 +0000376 return self.http_error_default(url, fp, errcode, errmsg, headers)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000377
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000378 def http_error_default(self, url, fp, errcode, errmsg, headers):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000379 """Default error handler: close the connection and raise IOError."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000380 void = fp.read()
381 fp.close()
382 raise IOError, ('http error', errcode, errmsg, headers)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000383
Bill Janssen426ea0a2007-08-29 22:35:05 +0000384 if _have_ssl:
Andrew M. Kuchling141e9892000-04-23 02:53:11 +0000385 def open_https(self, url, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000386 """Use HTTPS protocol."""
Bill Janssen426ea0a2007-08-29 22:35:05 +0000387
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000388 import httplib
Fred Drake567ca8e2000-08-21 21:42:42 +0000389 user_passwd = None
Martin v. Löwis3e865952006-01-24 15:51:21 +0000390 proxy_passwd = None
Walter Dörwald65230a22002-06-03 15:58:32 +0000391 if isinstance(url, str):
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000392 host, selector = splithost(url)
Fred Drake567ca8e2000-08-21 21:42:42 +0000393 if host:
394 user_passwd, host = splituser(host)
395 host = unquote(host)
396 realhost = host
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000397 else:
398 host, selector = url
Martin v. Löwis3e865952006-01-24 15:51:21 +0000399 # here, we determine, whether the proxy contains authorization information
400 proxy_passwd, host = splituser(host)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000401 urltype, rest = splittype(selector)
Fred Drake567ca8e2000-08-21 21:42:42 +0000402 url = rest
403 user_passwd = None
Guido van Rossumb2493f82000-12-15 15:01:37 +0000404 if urltype.lower() != 'https':
Fred Drake567ca8e2000-08-21 21:42:42 +0000405 realhost = None
406 else:
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000407 realhost, rest = splithost(rest)
Fred Drake567ca8e2000-08-21 21:42:42 +0000408 if realhost:
409 user_passwd, realhost = splituser(realhost)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000410 if user_passwd:
411 selector = "%s://%s%s" % (urltype, realhost, rest)
Andrew M. Kuchling7ad47922000-06-10 01:41:48 +0000412 #print "proxy via https:", host, selector
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000413 if not host: raise IOError, ('https error', 'no host given')
Martin v. Löwis3e865952006-01-24 15:51:21 +0000414 if proxy_passwd:
415 import base64
Andrew M. Kuchling872dba42006-10-27 17:11:23 +0000416 proxy_auth = base64.b64encode(proxy_passwd).strip()
Martin v. Löwis3e865952006-01-24 15:51:21 +0000417 else:
418 proxy_auth = None
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000419 if user_passwd:
420 import base64
Andrew M. Kuchling872dba42006-10-27 17:11:23 +0000421 auth = base64.b64encode(user_passwd).strip()
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000422 else:
423 auth = None
424 h = httplib.HTTPS(host, 0,
425 key_file=self.key_file,
426 cert_file=self.cert_file)
Andrew M. Kuchling141e9892000-04-23 02:53:11 +0000427 if data is not None:
428 h.putrequest('POST', selector)
Georg Brandl0619a322006-07-26 07:40:17 +0000429 h.putheader('Content-Type',
Andrew M. Kuchling141e9892000-04-23 02:53:11 +0000430 'application/x-www-form-urlencoded')
Georg Brandl0619a322006-07-26 07:40:17 +0000431 h.putheader('Content-Length', '%d' % len(data))
Andrew M. Kuchling141e9892000-04-23 02:53:11 +0000432 else:
433 h.putrequest('GET', selector)
Andrew M. Kuchling52278572006-12-19 15:11:41 +0000434 if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
435 if auth: h.putheader('Authorization', 'Basic %s' % auth)
Fred Drake567ca8e2000-08-21 21:42:42 +0000436 if realhost: h.putheader('Host', realhost)
Guido van Rossum68468eb2003-02-27 20:14:51 +0000437 for args in self.addheaders: h.putheader(*args)
Kristján Valur Jónsson84040db2009-01-09 20:27:16 +0000438 h.endheaders(data)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000439 errcode, errmsg, headers = h.getreply()
Neal Norwitzce55e212007-03-20 08:14:57 +0000440 fp = h.getfile()
Georg Brandlf66b6032007-03-14 08:27:52 +0000441 if errcode == -1:
Neal Norwitzce55e212007-03-20 08:14:57 +0000442 if fp: fp.close()
Georg Brandlf66b6032007-03-14 08:27:52 +0000443 # something went wrong with the HTTP status line
444 raise IOError, ('http protocol error', 0,
445 'got a bad status line', None)
Georg Brandl9b915672007-09-24 18:08:24 +0000446 # According to RFC 2616, "2xx" code indicates that the client's
447 # request was successfully received, understood, and accepted.
Kurt B. Kaiser0f7c25d2008-01-02 04:11:28 +0000448 if (200 <= errcode < 300):
Georg Brandl9b0d46d2008-01-20 11:43:03 +0000449 return addinfourl(fp, headers, "https:" + url, errcode)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000450 else:
Fred Drake567ca8e2000-08-21 21:42:42 +0000451 if data is None:
452 return self.http_error(url, fp, errcode, errmsg, headers)
453 else:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000454 return self.http_error(url, fp, errcode, errmsg, headers,
455 data)
Fred Drake567ca8e2000-08-21 21:42:42 +0000456
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000457 def open_file(self, url):
Neal Norwitzc5d0dbd2006-04-09 04:00:49 +0000458 """Use local file or FTP depending on form of URL."""
Martin v. Löwis3e865952006-01-24 15:51:21 +0000459 if not isinstance(url, str):
460 raise IOError, ('file error', 'proxy support for file protocol currently not implemented')
Jack Jansen4ef11032002-09-12 20:14:04 +0000461 if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000462 return self.open_ftp(url)
463 else:
464 return self.open_local_file(url)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000465
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000466 def open_local_file(self, url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000467 """Use local file."""
Georg Brandl5a096e12007-01-22 19:40:21 +0000468 import mimetypes, mimetools, email.utils
Raymond Hettingera6172712004-12-31 19:15:26 +0000469 try:
470 from cStringIO import StringIO
471 except ImportError:
472 from StringIO import StringIO
Guido van Rossumf0713d32001-08-09 17:43:35 +0000473 host, file = splithost(url)
474 localname = url2pathname(file)
Guido van Rossuma2da3052002-04-15 00:25:01 +0000475 try:
476 stats = os.stat(localname)
477 except OSError, e:
478 raise IOError(e.errno, e.strerror, e.filename)
Walter Dörwald92b48b72002-03-22 17:30:38 +0000479 size = stats.st_size
Georg Brandl5a096e12007-01-22 19:40:21 +0000480 modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000481 mtype = mimetypes.guess_type(url)[0]
Raymond Hettingera6172712004-12-31 19:15:26 +0000482 headers = mimetools.Message(StringIO(
Guido van Rossumf0713d32001-08-09 17:43:35 +0000483 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
484 (mtype or 'text/plain', size, modified)))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000485 if not host:
Guido van Rossum336a2011999-06-24 15:27:36 +0000486 urlfile = file
487 if file[:1] == '/':
488 urlfile = 'file://' + file
Guido van Rossumf0713d32001-08-09 17:43:35 +0000489 return addinfourl(open(localname, 'rb'),
Guido van Rossum336a2011999-06-24 15:27:36 +0000490 headers, urlfile)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000491 host, port = splitport(host)
492 if not port \
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000493 and socket.gethostbyname(host) in (localhost(), thishost()):
Guido van Rossum336a2011999-06-24 15:27:36 +0000494 urlfile = file
495 if file[:1] == '/':
496 urlfile = 'file://' + file
Guido van Rossumf0713d32001-08-09 17:43:35 +0000497 return addinfourl(open(localname, 'rb'),
Guido van Rossum336a2011999-06-24 15:27:36 +0000498 headers, urlfile)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000499 raise IOError, ('local file error', 'not on local host')
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000500
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000501 def open_ftp(self, url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000502 """Use FTP protocol."""
Martin v. Löwis3e865952006-01-24 15:51:21 +0000503 if not isinstance(url, str):
504 raise IOError, ('ftp error', 'proxy support for ftp protocol currently not implemented')
Raymond Hettingera6172712004-12-31 19:15:26 +0000505 import mimetypes, mimetools
506 try:
507 from cStringIO import StringIO
508 except ImportError:
509 from StringIO import StringIO
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000510 host, path = splithost(url)
511 if not host: raise IOError, ('ftp error', 'no host given')
512 host, port = splitport(host)
513 user, host = splituser(host)
514 if user: user, passwd = splitpasswd(user)
515 else: passwd = None
516 host = unquote(host)
517 user = unquote(user or '')
518 passwd = unquote(passwd or '')
519 host = socket.gethostbyname(host)
520 if not port:
521 import ftplib
522 port = ftplib.FTP_PORT
523 else:
524 port = int(port)
525 path, attrs = splitattr(path)
526 path = unquote(path)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000527 dirs = path.split('/')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000528 dirs, file = dirs[:-1], dirs[-1]
529 if dirs and not dirs[0]: dirs = dirs[1:]
Guido van Rossum5e006a31999-08-18 17:40:33 +0000530 if dirs and not dirs[0]: dirs[0] = '/'
Guido van Rossumb2493f82000-12-15 15:01:37 +0000531 key = user, host, port, '/'.join(dirs)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000532 # XXX thread unsafe!
533 if len(self.ftpcache) > MAXFTPCACHE:
534 # Prune the cache, rather arbitrarily
535 for k in self.ftpcache.keys():
536 if k != key:
537 v = self.ftpcache[k]
538 del self.ftpcache[k]
539 v.close()
540 try:
Raymond Hettinger54f02222002-06-01 14:18:47 +0000541 if not key in self.ftpcache:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000542 self.ftpcache[key] = \
543 ftpwrapper(user, passwd, host, port, dirs)
544 if not file: type = 'D'
545 else: type = 'I'
546 for attr in attrs:
547 attr, value = splitvalue(attr)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000548 if attr.lower() == 'type' and \
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000549 value in ('a', 'A', 'i', 'I', 'd', 'D'):
Guido van Rossumb2493f82000-12-15 15:01:37 +0000550 type = value.upper()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000551 (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
Guido van Rossum88e0b5b2001-08-23 13:38:15 +0000552 mtype = mimetypes.guess_type("ftp:" + url)[0]
553 headers = ""
554 if mtype:
555 headers += "Content-Type: %s\n" % mtype
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000556 if retrlen is not None and retrlen >= 0:
Guido van Rossum88e0b5b2001-08-23 13:38:15 +0000557 headers += "Content-Length: %d\n" % retrlen
Raymond Hettingera6172712004-12-31 19:15:26 +0000558 headers = mimetools.Message(StringIO(headers))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000559 return addinfourl(fp, headers, "ftp:" + url)
560 except ftperrors(), msg:
561 raise IOError, ('ftp error', msg), sys.exc_info()[2]
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000562
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000563 def open_data(self, url, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000564 """Use "data" URL."""
Martin v. Löwis3e865952006-01-24 15:51:21 +0000565 if not isinstance(url, str):
566 raise IOError, ('data error', 'proxy support for data protocol currently not implemented')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000567 # ignore POSTed data
568 #
569 # syntax of data URLs:
570 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
571 # mediatype := [ type "/" subtype ] *( ";" parameter )
572 # data := *urlchar
573 # parameter := attribute "=" value
Raymond Hettingera6172712004-12-31 19:15:26 +0000574 import mimetools
575 try:
576 from cStringIO import StringIO
577 except ImportError:
578 from StringIO import StringIO
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000579 try:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000580 [type, data] = url.split(',', 1)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000581 except ValueError:
582 raise IOError, ('data error', 'bad data URL')
583 if not type:
584 type = 'text/plain;charset=US-ASCII'
Guido van Rossumb2493f82000-12-15 15:01:37 +0000585 semi = type.rfind(';')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000586 if semi >= 0 and '=' not in type[semi:]:
587 encoding = type[semi+1:]
588 type = type[:semi]
589 else:
590 encoding = ''
591 msg = []
592 msg.append('Date: %s'%time.strftime('%a, %d %b %Y %T GMT',
593 time.gmtime(time.time())))
594 msg.append('Content-type: %s' % type)
595 if encoding == 'base64':
596 import base64
597 data = base64.decodestring(data)
598 else:
599 data = unquote(data)
Georg Brandl0619a322006-07-26 07:40:17 +0000600 msg.append('Content-Length: %d' % len(data))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000601 msg.append('')
602 msg.append(data)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000603 msg = '\n'.join(msg)
Raymond Hettingera6172712004-12-31 19:15:26 +0000604 f = StringIO(msg)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000605 headers = mimetools.Message(f, 0)
Georg Brandl1f663572005-11-26 16:50:44 +0000606 #f.fileno = None # needed for addinfourl
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000607 return addinfourl(f, headers, url)
Guido van Rossum6d4d1c21998-03-12 14:32:55 +0000608
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000609
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000610class FancyURLopener(URLopener):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000611 """Derived class with handlers for errors we can handle (perhaps)."""
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000612
Neal Norwitz60e04cd2002-06-11 13:38:51 +0000613 def __init__(self, *args, **kwargs):
Guido van Rossum68468eb2003-02-27 20:14:51 +0000614 URLopener.__init__(self, *args, **kwargs)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000615 self.auth_cache = {}
Skip Montanaroc3e11d62001-02-15 16:56:36 +0000616 self.tries = 0
617 self.maxtries = 10
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000618
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000619 def http_error_default(self, url, fp, errcode, errmsg, headers):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000620 """Default error handling -- don't raise an exception."""
Georg Brandl9b0d46d2008-01-20 11:43:03 +0000621 return addinfourl(fp, headers, "http:" + url, errcode)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000622
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000623 def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000624 """Error 302 -- relocated (temporarily)."""
Skip Montanaroc3e11d62001-02-15 16:56:36 +0000625 self.tries += 1
626 if self.maxtries and self.tries >= self.maxtries:
627 if hasattr(self, "http_error_500"):
628 meth = self.http_error_500
629 else:
630 meth = self.http_error_default
631 self.tries = 0
632 return meth(url, fp, 500,
633 "Internal Server Error: Redirect Recursion", headers)
634 result = self.redirect_internal(url, fp, errcode, errmsg, headers,
635 data)
636 self.tries = 0
637 return result
638
639 def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
Raymond Hettinger54f02222002-06-01 14:18:47 +0000640 if 'location' in headers:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000641 newurl = headers['location']
Raymond Hettinger54f02222002-06-01 14:18:47 +0000642 elif 'uri' in headers:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000643 newurl = headers['uri']
644 else:
645 return
646 void = fp.read()
647 fp.close()
Guido van Rossum3527f591999-03-29 20:23:41 +0000648 # In case the server sent a relative URL, join with original:
Moshe Zadka5d87d472001-04-09 14:54:21 +0000649 newurl = basejoin(self.type + ":" + url, newurl)
Guido van Rossumfa19f7c2003-05-16 01:46:51 +0000650 return self.open(newurl)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000651
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000652 def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000653 """Error 301 -- also relocated (permanently)."""
654 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
Guido van Rossume6ad8911996-09-10 17:02:56 +0000655
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000656 def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
657 """Error 303 -- also relocated (essentially identical to 302)."""
658 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
659
Guido van Rossumfa19f7c2003-05-16 01:46:51 +0000660 def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
661 """Error 307 -- relocated, but turn POST into error."""
662 if data is None:
663 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
664 else:
665 return self.http_error_default(url, fp, errcode, errmsg, headers)
666
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000667 def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000668 """Error 401 -- authentication required.
Martin v. Löwis3e865952006-01-24 15:51:21 +0000669 This function supports Basic authentication only."""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000670 if not 'www-authenticate' in headers:
Tim Peters85ba6732001-02-28 08:26:44 +0000671 URLopener.http_error_default(self, url, fp,
Fred Drakec680ae82001-10-13 18:37:07 +0000672 errcode, errmsg, headers)
Moshe Zadkae99bd172001-02-27 06:27:04 +0000673 stuff = headers['www-authenticate']
674 import re
675 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
676 if not match:
Tim Peters85ba6732001-02-28 08:26:44 +0000677 URLopener.http_error_default(self, url, fp,
Moshe Zadkae99bd172001-02-27 06:27:04 +0000678 errcode, errmsg, headers)
679 scheme, realm = match.groups()
680 if scheme.lower() != 'basic':
Tim Peters85ba6732001-02-28 08:26:44 +0000681 URLopener.http_error_default(self, url, fp,
Moshe Zadkae99bd172001-02-27 06:27:04 +0000682 errcode, errmsg, headers)
683 name = 'retry_' + self.type + '_basic_auth'
684 if data is None:
685 return getattr(self,name)(url, realm)
686 else:
687 return getattr(self,name)(url, realm, data)
Tim Peters92037a12006-01-24 22:44:08 +0000688
Martin v. Löwis3e865952006-01-24 15:51:21 +0000689 def http_error_407(self, url, fp, errcode, errmsg, headers, data=None):
690 """Error 407 -- proxy authentication required.
691 This function supports Basic authentication only."""
692 if not 'proxy-authenticate' in headers:
693 URLopener.http_error_default(self, url, fp,
694 errcode, errmsg, headers)
695 stuff = headers['proxy-authenticate']
696 import re
697 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
698 if not match:
699 URLopener.http_error_default(self, url, fp,
700 errcode, errmsg, headers)
701 scheme, realm = match.groups()
702 if scheme.lower() != 'basic':
703 URLopener.http_error_default(self, url, fp,
704 errcode, errmsg, headers)
705 name = 'retry_proxy_' + self.type + '_basic_auth'
706 if data is None:
707 return getattr(self,name)(url, realm)
708 else:
709 return getattr(self,name)(url, realm, data)
Tim Peters92037a12006-01-24 22:44:08 +0000710
Martin v. Löwis3e865952006-01-24 15:51:21 +0000711 def retry_proxy_http_basic_auth(self, url, realm, data=None):
712 host, selector = splithost(url)
713 newurl = 'http://' + host + selector
714 proxy = self.proxies['http']
715 urltype, proxyhost = splittype(proxy)
716 proxyhost, proxyselector = splithost(proxyhost)
717 i = proxyhost.find('@') + 1
718 proxyhost = proxyhost[i:]
719 user, passwd = self.get_user_passwd(proxyhost, realm, i)
720 if not (user or passwd): return None
721 proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
722 self.proxies['http'] = 'http://' + proxyhost + proxyselector
723 if data is None:
724 return self.open(newurl)
725 else:
726 return self.open(newurl, data)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000727
Martin v. Löwis3e865952006-01-24 15:51:21 +0000728 def retry_proxy_https_basic_auth(self, url, realm, data=None):
729 host, selector = splithost(url)
730 newurl = 'https://' + host + selector
731 proxy = self.proxies['https']
732 urltype, proxyhost = splittype(proxy)
733 proxyhost, proxyselector = splithost(proxyhost)
734 i = proxyhost.find('@') + 1
735 proxyhost = proxyhost[i:]
736 user, passwd = self.get_user_passwd(proxyhost, realm, i)
737 if not (user or passwd): return None
738 proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
739 self.proxies['https'] = 'https://' + proxyhost + proxyselector
740 if data is None:
741 return self.open(newurl)
742 else:
743 return self.open(newurl, data)
Tim Peters92037a12006-01-24 22:44:08 +0000744
Guido van Rossum3c8baed2000-02-01 23:36:55 +0000745 def retry_http_basic_auth(self, url, realm, data=None):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000746 host, selector = splithost(url)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000747 i = host.find('@') + 1
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000748 host = host[i:]
749 user, passwd = self.get_user_passwd(host, realm, i)
750 if not (user or passwd): return None
Guido van Rossumafc4f042001-01-15 18:31:13 +0000751 host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000752 newurl = 'http://' + host + selector
Guido van Rossum3c8baed2000-02-01 23:36:55 +0000753 if data is None:
754 return self.open(newurl)
755 else:
756 return self.open(newurl, data)
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000757
Guido van Rossum3c8baed2000-02-01 23:36:55 +0000758 def retry_https_basic_auth(self, url, realm, data=None):
Tim Peterse1190062001-01-15 03:34:38 +0000759 host, selector = splithost(url)
760 i = host.find('@') + 1
761 host = host[i:]
762 user, passwd = self.get_user_passwd(host, realm, i)
763 if not (user or passwd): return None
Guido van Rossumafc4f042001-01-15 18:31:13 +0000764 host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
Martin v. Löwis3e865952006-01-24 15:51:21 +0000765 newurl = 'https://' + host + selector
766 if data is None:
767 return self.open(newurl)
768 else:
769 return self.open(newurl, data)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000770
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000771 def get_user_passwd(self, host, realm, clear_cache = 0):
Guido van Rossumb2493f82000-12-15 15:01:37 +0000772 key = realm + '@' + host.lower()
Raymond Hettinger54f02222002-06-01 14:18:47 +0000773 if key in self.auth_cache:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000774 if clear_cache:
775 del self.auth_cache[key]
776 else:
777 return self.auth_cache[key]
778 user, passwd = self.prompt_user_passwd(host, realm)
779 if user or passwd: self.auth_cache[key] = (user, passwd)
780 return user, passwd
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000781
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000782 def prompt_user_passwd(self, host, realm):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000783 """Override this in a GUI environment!"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000784 import getpass
785 try:
786 user = raw_input("Enter username for %s at %s: " % (realm,
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000787 host))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000788 passwd = getpass.getpass("Enter password for %s in %s at %s: " %
789 (user, realm, host))
790 return user, passwd
791 except KeyboardInterrupt:
792 print
793 return None, None
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000794
795
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000796# Utility functions
797
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000798_localhost = None
799def localhost():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000800 """Return the IP address of the magic hostname 'localhost'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000801 global _localhost
Raymond Hettinger10ff7062002-06-02 03:04:52 +0000802 if _localhost is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000803 _localhost = socket.gethostbyname('localhost')
804 return _localhost
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000805
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000806_thishost = None
807def thishost():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000808 """Return the IP address of the current host."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000809 global _thishost
Raymond Hettinger10ff7062002-06-02 03:04:52 +0000810 if _thishost is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000811 _thishost = socket.gethostbyname(socket.gethostname())
812 return _thishost
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000813
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000814_ftperrors = None
815def ftperrors():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000816 """Return the set of errors raised by the FTP class."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000817 global _ftperrors
Raymond Hettinger10ff7062002-06-02 03:04:52 +0000818 if _ftperrors is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000819 import ftplib
820 _ftperrors = ftplib.all_errors
821 return _ftperrors
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000822
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000823_noheaders = None
824def noheaders():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000825 """Return an empty mimetools.Message object."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000826 global _noheaders
Raymond Hettinger10ff7062002-06-02 03:04:52 +0000827 if _noheaders is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000828 import mimetools
Raymond Hettingera6172712004-12-31 19:15:26 +0000829 try:
830 from cStringIO import StringIO
831 except ImportError:
832 from StringIO import StringIO
833 _noheaders = mimetools.Message(StringIO(), 0)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000834 _noheaders.fp.close() # Recycle file descriptor
835 return _noheaders
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000836
837
838# Utility classes
839
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000840class ftpwrapper:
Guido van Rossume7b146f2000-02-04 15:28:42 +0000841 """Class used by open_ftp() for cache of open FTP connections."""
842
Facundo Batista4f1b1ed2008-05-29 16:39:26 +0000843 def __init__(self, user, passwd, host, port, dirs,
844 timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000845 self.user = user
846 self.passwd = passwd
847 self.host = host
848 self.port = port
849 self.dirs = dirs
Facundo Batista711a54e2007-05-24 17:50:54 +0000850 self.timeout = timeout
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000851 self.init()
Guido van Rossume7b146f2000-02-04 15:28:42 +0000852
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000853 def init(self):
854 import ftplib
855 self.busy = 0
856 self.ftp = ftplib.FTP()
Facundo Batista711a54e2007-05-24 17:50:54 +0000857 self.ftp.connect(self.host, self.port, self.timeout)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000858 self.ftp.login(self.user, self.passwd)
859 for dir in self.dirs:
860 self.ftp.cwd(dir)
Guido van Rossume7b146f2000-02-04 15:28:42 +0000861
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000862 def retrfile(self, file, type):
863 import ftplib
864 self.endtransfer()
865 if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
866 else: cmd = 'TYPE ' + type; isdir = 0
867 try:
868 self.ftp.voidcmd(cmd)
869 except ftplib.all_errors:
870 self.init()
871 self.ftp.voidcmd(cmd)
872 conn = None
873 if file and not isdir:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000874 # Try to retrieve as a file
875 try:
876 cmd = 'RETR ' + file
877 conn = self.ftp.ntransfercmd(cmd)
878 except ftplib.error_perm, reason:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000879 if str(reason)[:3] != '550':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000880 raise IOError, ('ftp error', reason), sys.exc_info()[2]
881 if not conn:
882 # Set transfer mode to ASCII!
883 self.ftp.voidcmd('TYPE A')
Georg Brandld5e6cf22008-01-20 12:18:17 +0000884 # Try a directory listing. Verify that directory exists.
885 if file:
886 pwd = self.ftp.pwd()
887 try:
888 try:
889 self.ftp.cwd(file)
890 except ftplib.error_perm, reason:
891 raise IOError, ('ftp error', reason), sys.exc_info()[2]
892 finally:
893 self.ftp.cwd(pwd)
894 cmd = 'LIST ' + file
895 else:
896 cmd = 'LIST'
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000897 conn = self.ftp.ntransfercmd(cmd)
898 self.busy = 1
899 # Pass back both a suitably decorated object and a retrieval length
900 return (addclosehook(conn[0].makefile('rb'),
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000901 self.endtransfer), conn[1])
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000902 def endtransfer(self):
903 if not self.busy:
904 return
905 self.busy = 0
906 try:
907 self.ftp.voidresp()
908 except ftperrors():
909 pass
Guido van Rossume7b146f2000-02-04 15:28:42 +0000910
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000911 def close(self):
912 self.endtransfer()
913 try:
914 self.ftp.close()
915 except ftperrors():
916 pass
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000917
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000918class addbase:
Guido van Rossume7b146f2000-02-04 15:28:42 +0000919 """Base class for addinfo and addclosehook."""
920
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000921 def __init__(self, fp):
922 self.fp = fp
923 self.read = self.fp.read
924 self.readline = self.fp.readline
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000925 if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines
Georg Brandl1f663572005-11-26 16:50:44 +0000926 if hasattr(self.fp, "fileno"):
927 self.fileno = self.fp.fileno
928 else:
929 self.fileno = lambda: None
Raymond Hettinger42182eb2003-03-09 05:33:33 +0000930 if hasattr(self.fp, "__iter__"):
931 self.__iter__ = self.fp.__iter__
932 if hasattr(self.fp, "next"):
933 self.next = self.fp.next
Guido van Rossume7b146f2000-02-04 15:28:42 +0000934
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000935 def __repr__(self):
Walter Dörwald70a6b492004-02-12 17:35:32 +0000936 return '<%s at %r whose fp = %r>' % (self.__class__.__name__,
937 id(self), self.fp)
Guido van Rossume7b146f2000-02-04 15:28:42 +0000938
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000939 def close(self):
940 self.read = None
941 self.readline = None
942 self.readlines = None
943 self.fileno = None
944 if self.fp: self.fp.close()
945 self.fp = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000946
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000947class addclosehook(addbase):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000948 """Class to add a close hook to an open file."""
949
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000950 def __init__(self, fp, closehook, *hookargs):
951 addbase.__init__(self, fp)
952 self.closehook = closehook
953 self.hookargs = hookargs
Guido van Rossume7b146f2000-02-04 15:28:42 +0000954
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000955 def close(self):
Guido van Rossumc580dae2000-05-24 13:21:46 +0000956 addbase.close(self)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000957 if self.closehook:
Guido van Rossum68468eb2003-02-27 20:14:51 +0000958 self.closehook(*self.hookargs)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000959 self.closehook = None
960 self.hookargs = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000961
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000962class addinfo(addbase):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000963 """class to add an info() method to an open file."""
964
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000965 def __init__(self, fp, headers):
966 addbase.__init__(self, fp)
967 self.headers = headers
Guido van Rossume7b146f2000-02-04 15:28:42 +0000968
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000969 def info(self):
970 return self.headers
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000971
Guido van Rossume6ad8911996-09-10 17:02:56 +0000972class addinfourl(addbase):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000973 """class to add info() and geturl() methods to an open file."""
974
Georg Brandl9b0d46d2008-01-20 11:43:03 +0000975 def __init__(self, fp, headers, url, code=None):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000976 addbase.__init__(self, fp)
977 self.headers = headers
978 self.url = url
Georg Brandl9b0d46d2008-01-20 11:43:03 +0000979 self.code = code
Guido van Rossume7b146f2000-02-04 15:28:42 +0000980
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000981 def info(self):
982 return self.headers
Guido van Rossume7b146f2000-02-04 15:28:42 +0000983
Georg Brandl9b0d46d2008-01-20 11:43:03 +0000984 def getcode(self):
985 return self.code
986
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000987 def geturl(self):
988 return self.url
Guido van Rossume6ad8911996-09-10 17:02:56 +0000989
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000990
Guido van Rossum7c395db1994-07-04 22:14:49 +0000991# Utilities to parse URLs (most of these return None for missing parts):
Sjoerd Mullendere0371b81995-11-10 10:36:07 +0000992# unwrap('<URL:type://host/path>') --> 'type://host/path'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000993# splittype('type:opaquestring') --> 'type', 'opaquestring'
994# splithost('//host[:port]/path') --> 'host[:port]', '/path'
Guido van Rossum7c395db1994-07-04 22:14:49 +0000995# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'
996# splitpasswd('user:passwd') -> 'user', 'passwd'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000997# splitport('host:port') --> 'host', 'port'
998# splitquery('/path?query') --> '/path', 'query'
999# splittag('/path#tag') --> '/path', 'tag'
Guido van Rossum7c395db1994-07-04 22:14:49 +00001000# splitattr('/path;attr1=value1;attr2=value2;...') ->
1001# '/path', ['attr1=value1', 'attr2=value2', ...]
1002# splitvalue('attr=value') --> 'attr', 'value'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001003# unquote('abc%20def') -> 'abc def'
1004# quote('abc def') -> 'abc%20def')
1005
Walter Dörwald65230a22002-06-03 15:58:32 +00001006try:
1007 unicode
1008except NameError:
Guido van Rossum4b46c0a2002-05-24 17:58:05 +00001009 def _is_unicode(x):
1010 return 0
Walter Dörwald65230a22002-06-03 15:58:32 +00001011else:
1012 def _is_unicode(x):
1013 return isinstance(x, unicode)
Guido van Rossum4b46c0a2002-05-24 17:58:05 +00001014
Martin v. Löwis1d994332000-12-03 18:30:10 +00001015def toBytes(url):
1016 """toBytes(u"URL") --> 'URL'."""
1017 # Most URL schemes require ASCII. If that changes, the conversion
1018 # can be relaxed
Guido van Rossum4b46c0a2002-05-24 17:58:05 +00001019 if _is_unicode(url):
Martin v. Löwis1d994332000-12-03 18:30:10 +00001020 try:
1021 url = url.encode("ASCII")
1022 except UnicodeError:
Guido van Rossumb2493f82000-12-15 15:01:37 +00001023 raise UnicodeError("URL " + repr(url) +
1024 " contains non-ASCII characters")
Martin v. Löwis1d994332000-12-03 18:30:10 +00001025 return url
1026
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001027def unwrap(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001028 """unwrap('<URL:type://host/path>') --> 'type://host/path'."""
Guido van Rossumb2493f82000-12-15 15:01:37 +00001029 url = url.strip()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001030 if url[:1] == '<' and url[-1:] == '>':
Guido van Rossumb2493f82000-12-15 15:01:37 +00001031 url = url[1:-1].strip()
1032 if url[:4] == 'URL:': url = url[4:].strip()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001033 return url
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001034
Guido van Rossum332e1441997-09-29 23:23:46 +00001035_typeprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001036def splittype(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001037 """splittype('type:opaquestring') --> 'type', 'opaquestring'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001038 global _typeprog
1039 if _typeprog is None:
1040 import re
1041 _typeprog = re.compile('^([^/:]+):')
Guido van Rossum332e1441997-09-29 23:23:46 +00001042
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001043 match = _typeprog.match(url)
1044 if match:
1045 scheme = match.group(1)
Fred Drake9e94afd2000-07-01 07:03:30 +00001046 return scheme.lower(), url[len(scheme) + 1:]
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001047 return None, url
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001048
Guido van Rossum332e1441997-09-29 23:23:46 +00001049_hostprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001050def splithost(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001051 """splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001052 global _hostprog
1053 if _hostprog is None:
1054 import re
Georg Brandl1c168d82006-03-26 20:59:38 +00001055 _hostprog = re.compile('^//([^/?]*)(.*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +00001056
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001057 match = _hostprog.match(url)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001058 if match: return match.group(1, 2)
1059 return None, url
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001060
Guido van Rossum332e1441997-09-29 23:23:46 +00001061_userprog = None
Guido van Rossum7c395db1994-07-04 22:14:49 +00001062def splituser(host):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001063 """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001064 global _userprog
1065 if _userprog is None:
1066 import re
Raymond Hettingerf2e45dd2002-08-18 20:08:56 +00001067 _userprog = re.compile('^(.*)@(.*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +00001068
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001069 match = _userprog.match(host)
Fred Drake567ca8e2000-08-21 21:42:42 +00001070 if match: return map(unquote, match.group(1, 2))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001071 return None, host
Guido van Rossum7c395db1994-07-04 22:14:49 +00001072
Guido van Rossum332e1441997-09-29 23:23:46 +00001073_passwdprog = None
Guido van Rossum7c395db1994-07-04 22:14:49 +00001074def splitpasswd(user):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001075 """splitpasswd('user:passwd') -> 'user', 'passwd'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001076 global _passwdprog
1077 if _passwdprog is None:
1078 import re
Senthil Kumaran5e95e762009-03-30 21:51:50 +00001079 _passwdprog = re.compile('^([^:]*):(.*)$',re.S)
Guido van Rossum332e1441997-09-29 23:23:46 +00001080
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001081 match = _passwdprog.match(user)
1082 if match: return match.group(1, 2)
1083 return user, None
Guido van Rossum7c395db1994-07-04 22:14:49 +00001084
Guido van Rossume7b146f2000-02-04 15:28:42 +00001085# splittag('/path#tag') --> '/path', 'tag'
Guido van Rossum332e1441997-09-29 23:23:46 +00001086_portprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001087def splitport(host):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001088 """splitport('host:port') --> 'host', 'port'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001089 global _portprog
1090 if _portprog is None:
1091 import re
1092 _portprog = re.compile('^(.*):([0-9]+)$')
Guido van Rossum332e1441997-09-29 23:23:46 +00001093
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001094 match = _portprog.match(host)
1095 if match: return match.group(1, 2)
1096 return host, None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001097
Guido van Rossum332e1441997-09-29 23:23:46 +00001098_nportprog = None
Guido van Rossum53725a21996-06-13 19:12:35 +00001099def splitnport(host, defport=-1):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001100 """Split host and port, returning numeric port.
1101 Return given default port if no ':' found; defaults to -1.
1102 Return numerical port if a valid number are found after ':'.
1103 Return None if ':' but not a valid number."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001104 global _nportprog
1105 if _nportprog is None:
1106 import re
1107 _nportprog = re.compile('^(.*):(.*)$')
Guido van Rossum7e7ca0b1998-03-26 21:01:39 +00001108
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001109 match = _nportprog.match(host)
1110 if match:
1111 host, port = match.group(1, 2)
1112 try:
Guido van Rossumb2493f82000-12-15 15:01:37 +00001113 if not port: raise ValueError, "no digits"
1114 nport = int(port)
1115 except ValueError:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001116 nport = None
1117 return host, nport
1118 return host, defport
Guido van Rossum53725a21996-06-13 19:12:35 +00001119
Guido van Rossum332e1441997-09-29 23:23:46 +00001120_queryprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001121def splitquery(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001122 """splitquery('/path?query') --> '/path', 'query'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001123 global _queryprog
1124 if _queryprog is None:
1125 import re
1126 _queryprog = re.compile('^(.*)\?([^?]*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +00001127
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001128 match = _queryprog.match(url)
1129 if match: return match.group(1, 2)
1130 return url, None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001131
Guido van Rossum332e1441997-09-29 23:23:46 +00001132_tagprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001133def splittag(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001134 """splittag('/path#tag') --> '/path', 'tag'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001135 global _tagprog
1136 if _tagprog is None:
1137 import re
1138 _tagprog = re.compile('^(.*)#([^#]*)$')
Guido van Rossum7e7ca0b1998-03-26 21:01:39 +00001139
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001140 match = _tagprog.match(url)
1141 if match: return match.group(1, 2)
1142 return url, None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001143
Guido van Rossum7c395db1994-07-04 22:14:49 +00001144def splitattr(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001145 """splitattr('/path;attr1=value1;attr2=value2;...') ->
1146 '/path', ['attr1=value1', 'attr2=value2', ...]."""
Guido van Rossumb2493f82000-12-15 15:01:37 +00001147 words = url.split(';')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001148 return words[0], words[1:]
Guido van Rossum7c395db1994-07-04 22:14:49 +00001149
Guido van Rossum332e1441997-09-29 23:23:46 +00001150_valueprog = None
Guido van Rossum7c395db1994-07-04 22:14:49 +00001151def splitvalue(attr):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001152 """splitvalue('attr=value') --> 'attr', 'value'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001153 global _valueprog
1154 if _valueprog is None:
1155 import re
1156 _valueprog = re.compile('^([^=]*)=(.*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +00001157
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001158 match = _valueprog.match(attr)
1159 if match: return match.group(1, 2)
1160 return attr, None
Guido van Rossum7c395db1994-07-04 22:14:49 +00001161
Raymond Hettinger803ce802005-09-10 06:49:04 +00001162_hextochr = dict(('%02x' % i, chr(i)) for i in range(256))
1163_hextochr.update(('%02X' % i, chr(i)) for i in range(256))
1164
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001165def unquote(s):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001166 """unquote('abc%20def') -> 'abc def'."""
Raymond Hettinger803ce802005-09-10 06:49:04 +00001167 res = s.split('%')
1168 for i in xrange(1, len(res)):
1169 item = res[i]
1170 try:
1171 res[i] = _hextochr[item[:2]] + item[2:]
1172 except KeyError:
1173 res[i] = '%' + item
Raymond Hettinger4b0f20d2005-10-15 16:41:53 +00001174 except UnicodeDecodeError:
1175 res[i] = unichr(int(item[:2], 16)) + item[2:]
Guido van Rossumb2493f82000-12-15 15:01:37 +00001176 return "".join(res)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001177
Guido van Rossum0564e121996-12-13 14:47:36 +00001178def unquote_plus(s):
Skip Montanaro79f1c172000-08-22 03:00:52 +00001179 """unquote('%7e/abc+def') -> '~/abc def'"""
Brett Cannonaaeffaf2004-03-23 23:50:17 +00001180 s = s.replace('+', ' ')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001181 return unquote(s)
Guido van Rossum0564e121996-12-13 14:47:36 +00001182
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001183always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
Jeremy Hylton6102e292000-08-31 15:48:10 +00001184 'abcdefghijklmnopqrstuvwxyz'
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001185 '0123456789' '_.-')
Raymond Hettinger199d2f72005-09-09 22:27:13 +00001186_safemaps = {}
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001187
Guido van Rossum7c395db1994-07-04 22:14:49 +00001188def quote(s, safe = '/'):
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001189 """quote('abc def') -> 'abc%20def'
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001190
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001191 Each part of a URL, e.g. the path info, the query, etc., has a
1192 different set of reserved characters that must be quoted.
1193
1194 RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
1195 the following reserved characters.
1196
1197 reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
1198 "$" | ","
1199
1200 Each of these characters is reserved in some component of a URL,
1201 but not necessarily in all of them.
1202
1203 By default, the quote function is intended for quoting the path
1204 section of a URL. Thus, it will not encode '/'. This character
1205 is reserved, but in typical usage the quote function is being
1206 called on a path where the existing slash characters are used as
1207 reserved characters.
1208 """
Raymond Hettinger199d2f72005-09-09 22:27:13 +00001209 cachekey = (safe, always_safe)
1210 try:
1211 safe_map = _safemaps[cachekey]
1212 except KeyError:
1213 safe += always_safe
1214 safe_map = {}
1215 for i in range(256):
1216 c = chr(i)
1217 safe_map[c] = (c in safe) and c or ('%%%02X' % i)
1218 _safemaps[cachekey] = safe_map
1219 res = map(safe_map.__getitem__, s)
Guido van Rossumb2493f82000-12-15 15:01:37 +00001220 return ''.join(res)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001221
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001222def quote_plus(s, safe = ''):
1223 """Quote the query fragment of a URL; replacing ' ' with '+'"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001224 if ' ' in s:
Raymond Hettingercf6b6322005-09-10 18:17:54 +00001225 s = quote(s, safe + ' ')
1226 return s.replace(' ', '+')
1227 return quote(s, safe)
Guido van Rossum0564e121996-12-13 14:47:36 +00001228
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001229def urlencode(query,doseq=0):
1230 """Encode a sequence of two-element tuples or dictionary into a URL query string.
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001231
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001232 If any values in the query arg are sequences and doseq is true, each
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001233 sequence element is converted to a separate parameter.
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001234
1235 If the query arg is a sequence of two-element tuples, the order of the
1236 parameters in the output will match the order of parameters in the
1237 input.
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001238 """
Tim Peters658cba62001-02-09 20:06:00 +00001239
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001240 if hasattr(query,"items"):
1241 # mapping objects
1242 query = query.items()
1243 else:
1244 # it's a bother at times that strings and string-like objects are
1245 # sequences...
1246 try:
1247 # non-sequence items should not work with len()
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001248 # non-empty strings will fail this
Walter Dörwald65230a22002-06-03 15:58:32 +00001249 if len(query) and not isinstance(query[0], tuple):
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001250 raise TypeError
1251 # zero-length sequences of all types will get here and succeed,
1252 # but that's a minor nit - since the original implementation
1253 # allowed empty dicts that type of behavior probably should be
1254 # preserved for consistency
1255 except TypeError:
1256 ty,va,tb = sys.exc_info()
1257 raise TypeError, "not a valid non-string sequence or mapping object", tb
1258
Guido van Rossume7b146f2000-02-04 15:28:42 +00001259 l = []
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001260 if not doseq:
1261 # preserve old behavior
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001262 for k, v in query:
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001263 k = quote_plus(str(k))
1264 v = quote_plus(str(v))
1265 l.append(k + '=' + v)
1266 else:
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001267 for k, v in query:
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001268 k = quote_plus(str(k))
Walter Dörwald65230a22002-06-03 15:58:32 +00001269 if isinstance(v, str):
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001270 v = quote_plus(v)
1271 l.append(k + '=' + v)
Guido van Rossum4b46c0a2002-05-24 17:58:05 +00001272 elif _is_unicode(v):
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001273 # is there a reasonable way to convert to ASCII?
1274 # encode generates a string, but "replace" or "ignore"
1275 # lose information and "strict" can raise UnicodeError
1276 v = quote_plus(v.encode("ASCII","replace"))
1277 l.append(k + '=' + v)
1278 else:
1279 try:
1280 # is this a sufficient test for sequence-ness?
1281 x = len(v)
1282 except TypeError:
1283 # not a sequence
1284 v = quote_plus(str(v))
1285 l.append(k + '=' + v)
1286 else:
1287 # loop over the sequence
1288 for elt in v:
1289 l.append(k + '=' + quote_plus(str(elt)))
Guido van Rossumb2493f82000-12-15 15:01:37 +00001290 return '&'.join(l)
Guido van Rossum810a3391998-07-22 21:33:23 +00001291
Guido van Rossum442e7201996-03-20 15:33:11 +00001292# Proxy handling
Mark Hammond4f570b92000-07-26 07:04:38 +00001293def getproxies_environment():
1294 """Return a dictionary of scheme -> proxy server URL mappings.
1295
1296 Scan the environment for variables named <scheme>_proxy;
1297 this seems to be the standard convention. If you need a
1298 different way, you can pass a proxies dictionary to the
1299 [Fancy]URLopener constructor.
1300
1301 """
1302 proxies = {}
1303 for name, value in os.environ.items():
Guido van Rossumb2493f82000-12-15 15:01:37 +00001304 name = name.lower()
Mark Hammond4f570b92000-07-26 07:04:38 +00001305 if value and name[-6:] == '_proxy':
1306 proxies[name[:-6]] = value
1307 return proxies
1308
Georg Brandl22350112008-01-20 12:05:43 +00001309def proxy_bypass_environment(host):
1310 """Test if proxies should not be used for a particular host.
1311
1312 Checks the environment for a variable named no_proxy, which should
1313 be a list of DNS suffixes separated by commas, or '*' for all hosts.
1314 """
1315 no_proxy = os.environ.get('no_proxy', '') or os.environ.get('NO_PROXY', '')
1316 # '*' is special case for always bypass
1317 if no_proxy == '*':
1318 return 1
1319 # strip port off host
1320 hostonly, port = splitport(host)
1321 # check if the host ends with any of the DNS suffixes
1322 for name in no_proxy.split(','):
1323 if name and (hostonly.endswith(name) or host.endswith(name)):
1324 return 1
1325 # otherwise, don't bypass
1326 return 0
1327
1328
Jack Jansen11d9b062004-07-16 11:45:00 +00001329if sys.platform == 'darwin':
Ronald Oussoren099646f2008-05-18 20:09:54 +00001330
1331 def _CFSetup(sc):
1332 from ctypes import c_int32, c_void_p, c_char_p, c_int
1333 sc.CFStringCreateWithCString.argtypes = [ c_void_p, c_char_p, c_int32 ]
1334 sc.CFStringCreateWithCString.restype = c_void_p
1335 sc.SCDynamicStoreCopyProxies.argtypes = [ c_void_p ]
1336 sc.SCDynamicStoreCopyProxies.restype = c_void_p
1337 sc.CFDictionaryGetValue.argtypes = [ c_void_p, c_void_p ]
1338 sc.CFDictionaryGetValue.restype = c_void_p
1339 sc.CFStringGetLength.argtypes = [ c_void_p ]
1340 sc.CFStringGetLength.restype = c_int32
1341 sc.CFStringGetCString.argtypes = [ c_void_p, c_char_p, c_int32, c_int32 ]
1342 sc.CFStringGetCString.restype = c_int32
1343 sc.CFNumberGetValue.argtypes = [ c_void_p, c_int, c_void_p ]
1344 sc.CFNumberGetValue.restype = c_int32
1345 sc.CFRelease.argtypes = [ c_void_p ]
1346 sc.CFRelease.restype = None
1347
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001348 def _CStringFromCFString(sc, value):
1349 from ctypes import create_string_buffer
1350 length = sc.CFStringGetLength(value) + 1
1351 buff = create_string_buffer(length)
1352 sc.CFStringGetCString(value, buff, length, 0)
1353 return buff.value
1354
1355 def _CFNumberToInt32(sc, cfnum):
1356 from ctypes import byref, c_int
1357 val = c_int()
1358 kCFNumberSInt32Type = 3
1359 sc.CFNumberGetValue(cfnum, kCFNumberSInt32Type, byref(val))
1360 return val.value
1361
1362
1363 def proxy_bypass_macosx_sysconf(host):
1364 """
1365 Return True iff this host shouldn't be accessed using a proxy
1366
1367 This function uses the MacOSX framework SystemConfiguration
1368 to fetch the proxy information.
1369 """
1370 from ctypes import cdll
1371 from ctypes.util import find_library
1372 import re
1373 import socket
1374 from fnmatch import fnmatch
1375
1376 def ip2num(ipAddr):
1377 parts = ipAddr.split('.')
1378 parts = map(int, parts)
1379 if len(parts) != 4:
1380 parts = (parts + [0, 0, 0, 0])[:4]
1381 return (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3]
1382
1383 sc = cdll.LoadLibrary(find_library("SystemConfiguration"))
Ronald Oussoren099646f2008-05-18 20:09:54 +00001384 _CFSetup(sc)
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001385
1386 hostIP = None
1387
1388 if not sc:
1389 return False
1390
1391 kSCPropNetProxiesExceptionsList = sc.CFStringCreateWithCString(0, "ExceptionsList", 0)
1392 kSCPropNetProxiesExcludeSimpleHostnames = sc.CFStringCreateWithCString(0,
1393 "ExcludeSimpleHostnames", 0)
1394
1395
1396 proxyDict = sc.SCDynamicStoreCopyProxies(None)
Ronald Oussoren099646f2008-05-18 20:09:54 +00001397 if proxyDict is None:
1398 return False
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001399
1400 try:
1401 # Check for simple host names:
1402 if '.' not in host:
1403 exclude_simple = sc.CFDictionaryGetValue(proxyDict,
1404 kSCPropNetProxiesExcludeSimpleHostnames)
1405 if exclude_simple and _CFNumberToInt32(sc, exclude_simple):
1406 return True
1407
1408
1409 # Check the exceptions list:
1410 exceptions = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesExceptionsList)
1411 if exceptions:
1412 # Items in the list are strings like these: *.local, 169.254/16
1413 for index in xrange(sc.CFArrayGetCount(exceptions)):
1414 value = sc.CFArrayGetValueAtIndex(exceptions, index)
1415 if not value: continue
1416 value = _CStringFromCFString(sc, value)
1417
1418 m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value)
1419 if m is not None:
1420 if hostIP is None:
1421 hostIP = socket.gethostbyname(host)
1422 hostIP = ip2num(hostIP)
1423
1424 base = ip2num(m.group(1))
1425 mask = int(m.group(2)[1:])
1426 mask = 32 - mask
1427
1428 if (hostIP >> mask) == (base >> mask):
1429 return True
1430
1431 elif fnmatch(host, value):
1432 return True
1433
1434 return False
1435
1436 finally:
1437 sc.CFRelease(kSCPropNetProxiesExceptionsList)
1438 sc.CFRelease(kSCPropNetProxiesExcludeSimpleHostnames)
1439
1440
1441
1442 def getproxies_macosx_sysconf():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001443 """Return a dictionary of scheme -> proxy server URL mappings.
Guido van Rossum442e7201996-03-20 15:33:11 +00001444
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001445 This function uses the MacOSX framework SystemConfiguration
1446 to fetch the proxy information.
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001447 """
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001448 from ctypes import cdll
1449 from ctypes.util import find_library
1450
1451 sc = cdll.LoadLibrary(find_library("SystemConfiguration"))
Ronald Oussoren099646f2008-05-18 20:09:54 +00001452 _CFSetup(sc)
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001453
1454 if not sc:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001455 return {}
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001456
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001457 kSCPropNetProxiesHTTPEnable = sc.CFStringCreateWithCString(0, "HTTPEnable", 0)
1458 kSCPropNetProxiesHTTPProxy = sc.CFStringCreateWithCString(0, "HTTPProxy", 0)
1459 kSCPropNetProxiesHTTPPort = sc.CFStringCreateWithCString(0, "HTTPPort", 0)
1460
1461 kSCPropNetProxiesHTTPSEnable = sc.CFStringCreateWithCString(0, "HTTPSEnable", 0)
1462 kSCPropNetProxiesHTTPSProxy = sc.CFStringCreateWithCString(0, "HTTPSProxy", 0)
1463 kSCPropNetProxiesHTTPSPort = sc.CFStringCreateWithCString(0, "HTTPSPort", 0)
1464
1465 kSCPropNetProxiesFTPEnable = sc.CFStringCreateWithCString(0, "FTPEnable", 0)
1466 kSCPropNetProxiesFTPPassive = sc.CFStringCreateWithCString(0, "FTPPassive", 0)
1467 kSCPropNetProxiesFTPPort = sc.CFStringCreateWithCString(0, "FTPPort", 0)
1468 kSCPropNetProxiesFTPProxy = sc.CFStringCreateWithCString(0, "FTPProxy", 0)
1469
1470 kSCPropNetProxiesGopherEnable = sc.CFStringCreateWithCString(0, "GopherEnable", 0)
1471 kSCPropNetProxiesGopherPort = sc.CFStringCreateWithCString(0, "GopherPort", 0)
1472 kSCPropNetProxiesGopherProxy = sc.CFStringCreateWithCString(0, "GopherProxy", 0)
1473
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001474 proxies = {}
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001475 proxyDict = sc.SCDynamicStoreCopyProxies(None)
1476
1477 try:
1478 # HTTP:
1479 enabled = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesHTTPEnable)
1480 if enabled and _CFNumberToInt32(sc, enabled):
1481 proxy = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesHTTPProxy)
1482 port = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesHTTPPort)
1483
1484 if proxy:
1485 proxy = _CStringFromCFString(sc, proxy)
1486 if port:
1487 port = _CFNumberToInt32(sc, port)
1488 proxies["http"] = "http://%s:%i" % (proxy, port)
1489 else:
1490 proxies["http"] = "http://%s" % (proxy, )
1491
1492 # HTTPS:
1493 enabled = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesHTTPSEnable)
1494 if enabled and _CFNumberToInt32(sc, enabled):
1495 proxy = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesHTTPSProxy)
1496 port = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesHTTPSPort)
1497
1498 if proxy:
1499 proxy = _CStringFromCFString(sc, proxy)
1500 if port:
1501 port = _CFNumberToInt32(sc, port)
1502 proxies["https"] = "http://%s:%i" % (proxy, port)
1503 else:
1504 proxies["https"] = "http://%s" % (proxy, )
1505
1506 # FTP:
1507 enabled = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesFTPEnable)
1508 if enabled and _CFNumberToInt32(sc, enabled):
1509 proxy = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesFTPProxy)
1510 port = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesFTPPort)
1511
1512 if proxy:
1513 proxy = _CStringFromCFString(sc, proxy)
1514 if port:
1515 port = _CFNumberToInt32(sc, port)
1516 proxies["ftp"] = "http://%s:%i" % (proxy, port)
1517 else:
1518 proxies["ftp"] = "http://%s" % (proxy, )
1519
1520 # Gopher:
1521 enabled = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesGopherEnable)
1522 if enabled and _CFNumberToInt32(sc, enabled):
1523 proxy = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesGopherProxy)
1524 port = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesGopherPort)
1525
1526 if proxy:
1527 proxy = _CStringFromCFString(sc, proxy)
1528 if port:
1529 port = _CFNumberToInt32(sc, port)
1530 proxies["gopher"] = "http://%s:%i" % (proxy, port)
1531 else:
1532 proxies["gopher"] = "http://%s" % (proxy, )
1533 finally:
1534 sc.CFRelease(proxyDict)
1535
1536 sc.CFRelease(kSCPropNetProxiesHTTPEnable)
1537 sc.CFRelease(kSCPropNetProxiesHTTPProxy)
1538 sc.CFRelease(kSCPropNetProxiesHTTPPort)
1539 sc.CFRelease(kSCPropNetProxiesFTPEnable)
1540 sc.CFRelease(kSCPropNetProxiesFTPPassive)
1541 sc.CFRelease(kSCPropNetProxiesFTPPort)
1542 sc.CFRelease(kSCPropNetProxiesFTPProxy)
1543 sc.CFRelease(kSCPropNetProxiesGopherEnable)
1544 sc.CFRelease(kSCPropNetProxiesGopherPort)
1545 sc.CFRelease(kSCPropNetProxiesGopherProxy)
1546
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001547 return proxies
Mark Hammond4f570b92000-07-26 07:04:38 +00001548
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001549
1550
Georg Brandl22350112008-01-20 12:05:43 +00001551 def proxy_bypass(host):
1552 if getproxies_environment():
1553 return proxy_bypass_environment(host)
1554 else:
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001555 return proxy_bypass_macosx_sysconf(host)
Tim Peters55c12d42001-08-09 18:04:14 +00001556
Jack Jansen11d9b062004-07-16 11:45:00 +00001557 def getproxies():
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001558 return getproxies_environment() or getproxies_macosx_sysconf()
Tim Peters182b5ac2004-07-18 06:16:08 +00001559
Mark Hammond4f570b92000-07-26 07:04:38 +00001560elif os.name == 'nt':
1561 def getproxies_registry():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001562 """Return a dictionary of scheme -> proxy server URL mappings.
Mark Hammond4f570b92000-07-26 07:04:38 +00001563
1564 Win32 uses the registry to store proxies.
1565
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001566 """
1567 proxies = {}
Mark Hammond4f570b92000-07-26 07:04:38 +00001568 try:
1569 import _winreg
1570 except ImportError:
1571 # Std module, so should be around - but you never know!
1572 return proxies
1573 try:
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001574 internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1575 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
Mark Hammond4f570b92000-07-26 07:04:38 +00001576 proxyEnable = _winreg.QueryValueEx(internetSettings,
1577 'ProxyEnable')[0]
1578 if proxyEnable:
1579 # Returned as Unicode but problems if not converted to ASCII
1580 proxyServer = str(_winreg.QueryValueEx(internetSettings,
1581 'ProxyServer')[0])
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001582 if '=' in proxyServer:
1583 # Per-protocol settings
Mark Hammond4f570b92000-07-26 07:04:38 +00001584 for p in proxyServer.split(';'):
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001585 protocol, address = p.split('=', 1)
Guido van Rossumb955d6c2002-03-31 23:38:48 +00001586 # See if address has a type:// prefix
Guido van Rossum64e5aa92002-04-02 14:38:16 +00001587 import re
1588 if not re.match('^([^/:]+)://', address):
Guido van Rossumb955d6c2002-03-31 23:38:48 +00001589 address = '%s://%s' % (protocol, address)
1590 proxies[protocol] = address
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001591 else:
1592 # Use one setting for all protocols
1593 if proxyServer[:5] == 'http:':
1594 proxies['http'] = proxyServer
1595 else:
1596 proxies['http'] = 'http://%s' % proxyServer
1597 proxies['ftp'] = 'ftp://%s' % proxyServer
Mark Hammond4f570b92000-07-26 07:04:38 +00001598 internetSettings.Close()
1599 except (WindowsError, ValueError, TypeError):
1600 # Either registry key not found etc, or the value in an
1601 # unexpected format.
1602 # proxies already set up to be empty so nothing to do
1603 pass
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001604 return proxies
Guido van Rossum442e7201996-03-20 15:33:11 +00001605
Mark Hammond4f570b92000-07-26 07:04:38 +00001606 def getproxies():
1607 """Return a dictionary of scheme -> proxy server URL mappings.
1608
1609 Returns settings gathered from the environment, if specified,
1610 or the registry.
1611
1612 """
1613 return getproxies_environment() or getproxies_registry()
Tim Peters55c12d42001-08-09 18:04:14 +00001614
Georg Brandl22350112008-01-20 12:05:43 +00001615 def proxy_bypass_registry(host):
Tim Peters55c12d42001-08-09 18:04:14 +00001616 try:
1617 import _winreg
1618 import re
Tim Peters55c12d42001-08-09 18:04:14 +00001619 except ImportError:
1620 # Std modules, so should be around - but you never know!
1621 return 0
1622 try:
1623 internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1624 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1625 proxyEnable = _winreg.QueryValueEx(internetSettings,
1626 'ProxyEnable')[0]
1627 proxyOverride = str(_winreg.QueryValueEx(internetSettings,
1628 'ProxyOverride')[0])
1629 # ^^^^ Returned as Unicode but problems if not converted to ASCII
1630 except WindowsError:
1631 return 0
1632 if not proxyEnable or not proxyOverride:
1633 return 0
1634 # try to make a host list from name and IP address.
Georg Brandl1f636702006-02-18 23:10:23 +00001635 rawHost, port = splitport(host)
1636 host = [rawHost]
Tim Peters55c12d42001-08-09 18:04:14 +00001637 try:
Georg Brandl1f636702006-02-18 23:10:23 +00001638 addr = socket.gethostbyname(rawHost)
1639 if addr != rawHost:
Tim Peters55c12d42001-08-09 18:04:14 +00001640 host.append(addr)
1641 except socket.error:
1642 pass
Georg Brandl1f636702006-02-18 23:10:23 +00001643 try:
1644 fqdn = socket.getfqdn(rawHost)
1645 if fqdn != rawHost:
1646 host.append(fqdn)
1647 except socket.error:
1648 pass
Tim Peters55c12d42001-08-09 18:04:14 +00001649 # make a check value list from the registry entry: replace the
1650 # '<local>' string by the localhost entry and the corresponding
1651 # canonical entry.
1652 proxyOverride = proxyOverride.split(';')
Tim Peters55c12d42001-08-09 18:04:14 +00001653 # now check if we match one of the registry values.
1654 for test in proxyOverride:
Senthil Kumaran4af40d22009-05-01 05:59:52 +00001655 if test == '<local>':
1656 if '.' not in rawHost:
1657 return 1
Tim Petersab9ba272001-08-09 21:40:30 +00001658 test = test.replace(".", r"\.") # mask dots
1659 test = test.replace("*", r".*") # change glob sequence
1660 test = test.replace("?", r".") # change glob char
Tim Peters55c12d42001-08-09 18:04:14 +00001661 for val in host:
1662 # print "%s <--> %s" %( test, val )
1663 if re.match(test, val, re.I):
1664 return 1
1665 return 0
1666
Georg Brandl22350112008-01-20 12:05:43 +00001667 def proxy_bypass(host):
1668 """Return a dictionary of scheme -> proxy server URL mappings.
1669
1670 Returns settings gathered from the environment, if specified,
1671 or the registry.
1672
1673 """
1674 if getproxies_environment():
1675 return proxy_bypass_environment(host)
1676 else:
1677 return proxy_bypass_registry(host)
1678
Mark Hammond4f570b92000-07-26 07:04:38 +00001679else:
1680 # By default use environment variables
1681 getproxies = getproxies_environment
Georg Brandl22350112008-01-20 12:05:43 +00001682 proxy_bypass = proxy_bypass_environment
Guido van Rossum442e7201996-03-20 15:33:11 +00001683
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001684# Test and time quote() and unquote()
1685def test1():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001686 s = ''
1687 for i in range(256): s = s + chr(i)
1688 s = s*4
1689 t0 = time.time()
1690 qs = quote(s)
1691 uqs = unquote(qs)
1692 t1 = time.time()
1693 if uqs != s:
1694 print 'Wrong!'
Walter Dörwald70a6b492004-02-12 17:35:32 +00001695 print repr(s)
1696 print repr(qs)
1697 print repr(uqs)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001698 print round(t1 - t0, 3), 'sec'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001699
1700
Guido van Rossum9ab96d41998-09-28 14:07:00 +00001701def reporthook(blocknum, blocksize, totalsize):
1702 # Report during remote transfers
Guido van Rossumb2493f82000-12-15 15:01:37 +00001703 print "Block number: %d, Block size: %d, Total size: %d" % (
1704 blocknum, blocksize, totalsize)
Guido van Rossum9ab96d41998-09-28 14:07:00 +00001705
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001706# Test program
Guido van Rossum23490151998-06-25 02:39:00 +00001707def test(args=[]):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001708 if not args:
1709 args = [
1710 '/etc/passwd',
1711 'file:/etc/passwd',
1712 'file://localhost/etc/passwd',
Collin Winter071d1ae2007-03-12 01:55:54 +00001713 'ftp://ftp.gnu.org/pub/README',
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001714 'http://www.python.org/index.html',
1715 ]
Guido van Rossum09c8b6c1999-12-07 21:37:17 +00001716 if hasattr(URLopener, "open_https"):
1717 args.append('https://synergy.as.cmu.edu/~geek/')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001718 try:
1719 for url in args:
1720 print '-'*10, url, '-'*10
1721 fn, h = urlretrieve(url, None, reporthook)
Guido van Rossumb2493f82000-12-15 15:01:37 +00001722 print fn
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001723 if h:
1724 print '======'
1725 for k in h.keys(): print k + ':', h[k]
1726 print '======'
1727 fp = open(fn, 'rb')
1728 data = fp.read()
1729 del fp
1730 if '\r' in data:
1731 table = string.maketrans("", "")
Guido van Rossumb2493f82000-12-15 15:01:37 +00001732 data = data.translate(table, "\r")
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001733 print data
1734 fn, h = None, None
1735 print '-'*40
1736 finally:
1737 urlcleanup()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001738
Guido van Rossum23490151998-06-25 02:39:00 +00001739def main():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001740 import getopt, sys
1741 try:
1742 opts, args = getopt.getopt(sys.argv[1:], "th")
1743 except getopt.error, msg:
1744 print msg
1745 print "Use -h for help"
1746 return
1747 t = 0
1748 for o, a in opts:
1749 if o == '-t':
1750 t = t + 1
1751 if o == '-h':
1752 print "Usage: python urllib.py [-t] [url ...]"
1753 print "-t runs self-test;",
1754 print "otherwise, contents of urls are printed"
1755 return
1756 if t:
1757 if t > 1:
1758 test1()
1759 test(args)
1760 else:
1761 if not args:
1762 print "Use -h for help"
1763 for url in args:
1764 print urlopen(url).read(),
Guido van Rossum23490151998-06-25 02:39:00 +00001765
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001766# Run test program when run as a script
1767if __name__ == '__main__':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001768 main()