blob: f69ec63692c7ba4d6558f0f81c51057cb4fd543b [file] [log] [blame]
Guido van Rossume7b146f2000-02-04 15:28:42 +00001"""Open an arbitrary URL.
2
3See the following document for more info on URLs:
4"Names and Addresses, URIs, URLs, URNs, URCs", at
5http://www.w3.org/pub/WWW/Addressing/Overview.html
6
7See also the HTTP spec (from which the error codes are derived):
8"HTTP - Hypertext Transfer Protocol", at
9http://www.w3.org/pub/WWW/Protocols/
10
11Related standards and specs:
12- RFC1808: the "relative URL" spec. (authoritative status)
13- RFC1738 - the "URL standard". (authoritative status)
14- RFC1630 - the "URI spec". (informational status)
15
16The object returned by URLopener().open(file) will differ per
17protocol. All you know is that is has methods read(), readline(),
18readlines(), fileno(), close() and info(). The read*(), fileno()
Fredrik Lundhb49f88b2000-09-24 18:51:25 +000019and close() methods work like those of open files.
Guido van Rossume7b146f2000-02-04 15:28:42 +000020The info() method returns a mimetools.Message object which can be
21used to query various info about the object, if available.
22(mimetools.Message objects are queried with the getheader() method.)
23"""
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000024
Guido van Rossum7c395db1994-07-04 22:14:49 +000025import string
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000026import socket
Jack Jansendc3e3f61995-12-15 13:22:13 +000027import os
Guido van Rossumf0713d32001-08-09 17:43:35 +000028import time
Guido van Rossum3c8484e1996-11-20 22:02:24 +000029import sys
Brett Cannon69200fa2004-03-23 21:26:39 +000030from urlparse import urljoin as basejoin
Brett Cannon8bb8fa52008-07-02 01:57:08 +000031import warnings
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000032
Skip Montanaro40fc1602001-03-01 04:27:19 +000033__all__ = ["urlopen", "URLopener", "FancyURLopener", "urlretrieve",
34 "urlcleanup", "quote", "quote_plus", "unquote", "unquote_plus",
Skip Montanaro44d5e0c2001-03-13 19:47:16 +000035 "urlencode", "url2pathname", "pathname2url", "splittag",
36 "localhost", "thishost", "ftperrors", "basejoin", "unwrap",
37 "splittype", "splithost", "splituser", "splitpasswd", "splitport",
38 "splitnport", "splitquery", "splitattr", "splitvalue",
Brett Cannond75f0432007-05-16 22:42:29 +000039 "getproxies"]
Skip Montanaro40fc1602001-03-01 04:27:19 +000040
Martin v. Löwis3e865952006-01-24 15:51:21 +000041__version__ = '1.17' # XXX This version is not always updated :-(
Guido van Rossumf668d171997-06-06 21:11:11 +000042
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000043MAXFTPCACHE = 10 # Trim the ftp cache beyond this size
Guido van Rossum6cb15a01995-06-22 19:00:13 +000044
Jack Jansendc3e3f61995-12-15 13:22:13 +000045# Helper for non-unix systems
46if os.name == 'mac':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000047 from macurl2path import url2pathname, pathname2url
Guido van Rossum7e7ca0b1998-03-26 21:01:39 +000048elif os.name == 'nt':
Fredrik Lundhb49f88b2000-09-24 18:51:25 +000049 from nturl2path import url2pathname, pathname2url
Guido van Rossumd74fb6b2001-03-02 06:43:49 +000050elif os.name == 'riscos':
51 from rourl2path import url2pathname, pathname2url
Jack Jansendc3e3f61995-12-15 13:22:13 +000052else:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000053 def url2pathname(pathname):
Georg Brandlc0b24732005-12-26 22:53:56 +000054 """OS-specific conversion from a relative URL of the 'file' scheme
55 to a file system path; not recommended for general use."""
Guido van Rossum367ac801999-03-12 14:31:10 +000056 return unquote(pathname)
Georg Brandlc0b24732005-12-26 22:53:56 +000057
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000058 def pathname2url(pathname):
Georg Brandlc0b24732005-12-26 22:53:56 +000059 """OS-specific conversion from a file system path to a relative URL
60 of the 'file' scheme; not recommended for general use."""
Guido van Rossum367ac801999-03-12 14:31:10 +000061 return quote(pathname)
Guido van Rossum33add0a1998-12-18 15:25:22 +000062
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000063# This really consists of two pieces:
64# (1) a class which handles opening of all sorts of URLs
65# (plus assorted utilities etc.)
66# (2) a set of functions for parsing URLs
67# XXX Should these be separated out into different modules?
68
69
70# Shortcut for basic usage
71_urlopener = None
Fred Drakedf6eca72002-04-04 20:41:34 +000072def urlopen(url, data=None, proxies=None):
Brett Cannon8bb8fa52008-07-02 01:57:08 +000073 """Create a file-like object for the specified URL to read from."""
74 from warnings import warnpy3k
75 warnings.warnpy3k("urllib.urlopen() has been removed in Python 3.0 in "
76 "favor of urllib2.urlopen()", stacklevel=2)
77
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000078 global _urlopener
Fred Drakedf6eca72002-04-04 20:41:34 +000079 if proxies is not None:
80 opener = FancyURLopener(proxies=proxies)
81 elif not _urlopener:
82 opener = FancyURLopener()
83 _urlopener = opener
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000084 else:
Fred Drakedf6eca72002-04-04 20:41:34 +000085 opener = _urlopener
86 if data is None:
87 return opener.open(url)
88 else:
89 return opener.open(url, data)
Fred Drake316a7932000-08-24 01:01:26 +000090def urlretrieve(url, filename=None, reporthook=None, data=None):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000091 global _urlopener
92 if not _urlopener:
93 _urlopener = FancyURLopener()
Fred Drake316a7932000-08-24 01:01:26 +000094 return _urlopener.retrieve(url, filename, reporthook, data)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000095def urlcleanup():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000096 if _urlopener:
97 _urlopener.cleanup()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000098
Bill Janssen426ea0a2007-08-29 22:35:05 +000099# check for SSL
100try:
101 import ssl
102except:
103 _have_ssl = False
104else:
105 _have_ssl = True
106
Georg Brandlb9256022005-08-24 18:46:39 +0000107# exception raised when downloaded size does not match content-length
108class ContentTooShortError(IOError):
109 def __init__(self, message, content):
110 IOError.__init__(self, message)
111 self.content = content
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000112
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000113ftpcache = {}
114class URLopener:
Guido van Rossume7b146f2000-02-04 15:28:42 +0000115 """Class to open URLs.
116 This is a class rather than just a subroutine because we may need
117 more than one set of global protocol-specific options.
118 Note -- this is a base class for those who don't want the
119 automatic handling of errors type 302 (relocated) and 401
120 (authorization needed)."""
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000121
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000122 __tempfiles = None
Guido van Rossum29e77811996-11-27 19:39:58 +0000123
Guido van Rossumba311382000-08-24 16:18:04 +0000124 version = "Python-urllib/%s" % __version__
125
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000126 # Constructor
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000127 def __init__(self, proxies=None, **x509):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000128 if proxies is None:
129 proxies = getproxies()
130 assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
131 self.proxies = proxies
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000132 self.key_file = x509.get('key_file')
133 self.cert_file = x509.get('cert_file')
Georg Brandl0619a322006-07-26 07:40:17 +0000134 self.addheaders = [('User-Agent', self.version)]
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000135 self.__tempfiles = []
136 self.__unlink = os.unlink # See cleanup()
137 self.tempcache = None
138 # Undocumented feature: if you assign {} to tempcache,
139 # it is used to cache files retrieved with
140 # self.retrieve(). This is not enabled by default
141 # since it does not work for changing documents (and I
142 # haven't got the logic to check expiration headers
143 # yet).
144 self.ftpcache = ftpcache
145 # Undocumented feature: you can use a different
146 # ftp cache by assigning to the .ftpcache member;
147 # in case you want logically independent URL openers
148 # XXX This is not threadsafe. Bah.
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000149
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000150 def __del__(self):
151 self.close()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000152
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000153 def close(self):
154 self.cleanup()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000155
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000156 def cleanup(self):
157 # This code sometimes runs when the rest of this module
158 # has already been deleted, so it can't use any globals
159 # or import anything.
160 if self.__tempfiles:
161 for file in self.__tempfiles:
162 try:
163 self.__unlink(file)
Martin v. Löwis58682b72001-08-11 15:02:57 +0000164 except OSError:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000165 pass
166 del self.__tempfiles[:]
167 if self.tempcache:
168 self.tempcache.clear()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000169
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000170 def addheader(self, *args):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000171 """Add a header to be used by the HTTP interface only
172 e.g. u.addheader('Accept', 'sound/basic')"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000173 self.addheaders.append(args)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000174
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000175 # External interface
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000176 def open(self, fullurl, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000177 """Use URLopener().open(file) instead of open(file, 'r')."""
Martin v. Löwis1d994332000-12-03 18:30:10 +0000178 fullurl = unwrap(toBytes(fullurl))
Senthil Kumaranb52c6f82009-08-15 17:49:55 +0000179 # percent encode url. fixing lame server errors like space within url
180 # parts
181 fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]")
Raymond Hettinger54f02222002-06-01 14:18:47 +0000182 if self.tempcache and fullurl in self.tempcache:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000183 filename, headers = self.tempcache[fullurl]
184 fp = open(filename, 'rb')
185 return addinfourl(fp, headers, fullurl)
Martin v. Löwis1d994332000-12-03 18:30:10 +0000186 urltype, url = splittype(fullurl)
187 if not urltype:
188 urltype = 'file'
Raymond Hettinger54f02222002-06-01 14:18:47 +0000189 if urltype in self.proxies:
Martin v. Löwis1d994332000-12-03 18:30:10 +0000190 proxy = self.proxies[urltype]
191 urltype, proxyhost = splittype(proxy)
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000192 host, selector = splithost(proxyhost)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000193 url = (host, fullurl) # Signal special case to open_*()
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000194 else:
195 proxy = None
Martin v. Löwis1d994332000-12-03 18:30:10 +0000196 name = 'open_' + urltype
197 self.type = urltype
Brett Cannonaaeffaf2004-03-23 23:50:17 +0000198 name = name.replace('-', '_')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000199 if not hasattr(self, name):
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000200 if proxy:
201 return self.open_unknown_proxy(proxy, fullurl, data)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000202 else:
203 return self.open_unknown(fullurl, data)
204 try:
205 if data is None:
206 return getattr(self, name)(url)
207 else:
208 return getattr(self, name)(url, data)
209 except socket.error, msg:
210 raise IOError, ('socket error', msg), sys.exc_info()[2]
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000211
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000212 def open_unknown(self, fullurl, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000213 """Overridable interface to open unknown URL type."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000214 type, url = splittype(fullurl)
215 raise IOError, ('url error', 'unknown url type', type)
Guido van Rossumca445401995-08-29 19:19:12 +0000216
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000217 def open_unknown_proxy(self, proxy, fullurl, data=None):
218 """Overridable interface to open unknown URL type."""
219 type, url = splittype(fullurl)
220 raise IOError, ('url error', 'invalid proxy for %s' % type, proxy)
221
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000222 # External interface
Sjoerd Mullenderd7b86f02000-08-25 11:23:36 +0000223 def retrieve(self, url, filename=None, reporthook=None, data=None):
Brett Cannon7d618c72003-04-24 02:43:20 +0000224 """retrieve(url) returns (filename, headers) for a local object
Guido van Rossume7b146f2000-02-04 15:28:42 +0000225 or (tempfilename, headers) for a remote object."""
Martin v. Löwis1d994332000-12-03 18:30:10 +0000226 url = unwrap(toBytes(url))
Raymond Hettinger54f02222002-06-01 14:18:47 +0000227 if self.tempcache and url in self.tempcache:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000228 return self.tempcache[url]
229 type, url1 = splittype(url)
Raymond Hettinger10ff7062002-06-02 03:04:52 +0000230 if filename is None and (not type or type == 'file'):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000231 try:
232 fp = self.open_local_file(url1)
233 hdrs = fp.info()
234 del fp
235 return url2pathname(splithost(url1)[1]), hdrs
236 except IOError, msg:
237 pass
Fred Drake316a7932000-08-24 01:01:26 +0000238 fp = self.open(url, data)
Benjamin Peterson373498f2009-03-22 17:49:21 +0000239 try:
240 headers = fp.info()
241 if filename:
242 tfp = open(filename, 'wb')
243 else:
244 import tempfile
245 garbage, path = splittype(url)
246 garbage, path = splithost(path or "")
247 path, garbage = splitquery(path or "")
248 path, garbage = splitattr(path or "")
249 suffix = os.path.splitext(path)[1]
250 (fd, filename) = tempfile.mkstemp(suffix)
251 self.__tempfiles.append(filename)
252 tfp = os.fdopen(fd, 'wb')
253 try:
254 result = filename, headers
255 if self.tempcache is not None:
256 self.tempcache[url] = result
257 bs = 1024*8
258 size = -1
259 read = 0
260 blocknum = 0
261 if reporthook:
262 if "content-length" in headers:
263 size = int(headers["Content-Length"])
264 reporthook(blocknum, bs, size)
265 while 1:
266 block = fp.read(bs)
267 if block == "":
268 break
269 read += len(block)
270 tfp.write(block)
271 blocknum += 1
272 if reporthook:
273 reporthook(blocknum, bs, size)
274 finally:
275 tfp.close()
276 finally:
277 fp.close()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000278 del fp
279 del tfp
Georg Brandlb9256022005-08-24 18:46:39 +0000280
281 # raise exception if actual size does not match content-length header
282 if size >= 0 and read < size:
283 raise ContentTooShortError("retrieval incomplete: got only %i out "
284 "of %i bytes" % (read, size), result)
285
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000286 return result
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000287
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000288 # Each method named open_<type> knows how to open that type of URL
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000289
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000290 def open_http(self, url, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000291 """Use HTTP protocol."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000292 import httplib
293 user_passwd = None
Martin v. Löwis3e865952006-01-24 15:51:21 +0000294 proxy_passwd= None
Walter Dörwald65230a22002-06-03 15:58:32 +0000295 if isinstance(url, str):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000296 host, selector = splithost(url)
297 if host:
298 user_passwd, host = splituser(host)
299 host = unquote(host)
300 realhost = host
301 else:
302 host, selector = url
Martin v. Löwis3e865952006-01-24 15:51:21 +0000303 # check whether the proxy contains authorization information
304 proxy_passwd, host = splituser(host)
305 # now we proceed with the url we want to obtain
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000306 urltype, rest = splittype(selector)
307 url = rest
308 user_passwd = None
Guido van Rossumb2493f82000-12-15 15:01:37 +0000309 if urltype.lower() != 'http':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000310 realhost = None
311 else:
312 realhost, rest = splithost(rest)
313 if realhost:
314 user_passwd, realhost = splituser(realhost)
315 if user_passwd:
316 selector = "%s://%s%s" % (urltype, realhost, rest)
Tim Peters55c12d42001-08-09 18:04:14 +0000317 if proxy_bypass(realhost):
318 host = realhost
319
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000320 #print "proxy via http:", host, selector
321 if not host: raise IOError, ('http error', 'no host given')
Tim Peters92037a12006-01-24 22:44:08 +0000322
Martin v. Löwis3e865952006-01-24 15:51:21 +0000323 if proxy_passwd:
324 import base64
Andrew M. Kuchling872dba42006-10-27 17:11:23 +0000325 proxy_auth = base64.b64encode(proxy_passwd).strip()
Martin v. Löwis3e865952006-01-24 15:51:21 +0000326 else:
327 proxy_auth = None
328
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000329 if user_passwd:
330 import base64
Andrew M. Kuchling872dba42006-10-27 17:11:23 +0000331 auth = base64.b64encode(user_passwd).strip()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000332 else:
333 auth = None
334 h = httplib.HTTP(host)
335 if data is not None:
336 h.putrequest('POST', selector)
Georg Brandl0619a322006-07-26 07:40:17 +0000337 h.putheader('Content-Type', 'application/x-www-form-urlencoded')
338 h.putheader('Content-Length', '%d' % len(data))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000339 else:
340 h.putrequest('GET', selector)
Martin v. Löwis3e865952006-01-24 15:51:21 +0000341 if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000342 if auth: h.putheader('Authorization', 'Basic %s' % auth)
343 if realhost: h.putheader('Host', realhost)
Guido van Rossum68468eb2003-02-27 20:14:51 +0000344 for args in self.addheaders: h.putheader(*args)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000345 h.endheaders()
346 if data is not None:
Fred Drakeec3dfde2001-07-04 05:18:29 +0000347 h.send(data)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000348 errcode, errmsg, headers = h.getreply()
Neal Norwitzce55e212007-03-20 08:14:57 +0000349 fp = h.getfile()
Georg Brandlf66b6032007-03-14 08:27:52 +0000350 if errcode == -1:
Neal Norwitzce55e212007-03-20 08:14:57 +0000351 if fp: fp.close()
Georg Brandlf66b6032007-03-14 08:27:52 +0000352 # something went wrong with the HTTP status line
353 raise IOError, ('http protocol error', 0,
354 'got a bad status line', None)
Sean Reifscheidera1afbf62007-09-19 07:52:56 +0000355 # According to RFC 2616, "2xx" code indicates that the client's
356 # request was successfully received, understood, and accepted.
Kurt B. Kaiser0f7c25d2008-01-02 04:11:28 +0000357 if (200 <= errcode < 300):
Georg Brandl9b0d46d2008-01-20 11:43:03 +0000358 return addinfourl(fp, headers, "http:" + url, errcode)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000359 else:
360 if data is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000361 return self.http_error(url, fp, errcode, errmsg, headers)
Guido van Rossum29aab751999-03-09 19:31:21 +0000362 else:
363 return self.http_error(url, fp, errcode, errmsg, headers, data)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000364
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000365 def http_error(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000366 """Handle http errors.
367 Derived class can override this, or provide specific handlers
368 named http_error_DDD where DDD is the 3-digit error code."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000369 # First check if there's a specific handler for this error
370 name = 'http_error_%d' % errcode
371 if hasattr(self, name):
372 method = getattr(self, name)
373 if data is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000374 result = method(url, fp, errcode, errmsg, headers)
Jeremy Hyltonb30f52a1999-02-25 16:14:58 +0000375 else:
376 result = method(url, fp, errcode, errmsg, headers, data)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000377 if result: return result
Jeremy Hyltonb30f52a1999-02-25 16:14:58 +0000378 return self.http_error_default(url, fp, errcode, errmsg, headers)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000379
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000380 def http_error_default(self, url, fp, errcode, errmsg, headers):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000381 """Default error handler: close the connection and raise IOError."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000382 void = fp.read()
383 fp.close()
384 raise IOError, ('http error', errcode, errmsg, headers)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000385
Bill Janssen426ea0a2007-08-29 22:35:05 +0000386 if _have_ssl:
Andrew M. Kuchling141e9892000-04-23 02:53:11 +0000387 def open_https(self, url, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000388 """Use HTTPS protocol."""
Bill Janssen426ea0a2007-08-29 22:35:05 +0000389
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000390 import httplib
Fred Drake567ca8e2000-08-21 21:42:42 +0000391 user_passwd = None
Martin v. Löwis3e865952006-01-24 15:51:21 +0000392 proxy_passwd = None
Walter Dörwald65230a22002-06-03 15:58:32 +0000393 if isinstance(url, str):
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000394 host, selector = splithost(url)
Fred Drake567ca8e2000-08-21 21:42:42 +0000395 if host:
396 user_passwd, host = splituser(host)
397 host = unquote(host)
398 realhost = host
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000399 else:
400 host, selector = url
Martin v. Löwis3e865952006-01-24 15:51:21 +0000401 # here, we determine, whether the proxy contains authorization information
402 proxy_passwd, host = splituser(host)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000403 urltype, rest = splittype(selector)
Fred Drake567ca8e2000-08-21 21:42:42 +0000404 url = rest
405 user_passwd = None
Guido van Rossumb2493f82000-12-15 15:01:37 +0000406 if urltype.lower() != 'https':
Fred Drake567ca8e2000-08-21 21:42:42 +0000407 realhost = None
408 else:
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000409 realhost, rest = splithost(rest)
Fred Drake567ca8e2000-08-21 21:42:42 +0000410 if realhost:
411 user_passwd, realhost = splituser(realhost)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000412 if user_passwd:
413 selector = "%s://%s%s" % (urltype, realhost, rest)
Andrew M. Kuchling7ad47922000-06-10 01:41:48 +0000414 #print "proxy via https:", host, selector
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000415 if not host: raise IOError, ('https error', 'no host given')
Martin v. Löwis3e865952006-01-24 15:51:21 +0000416 if proxy_passwd:
417 import base64
Andrew M. Kuchling872dba42006-10-27 17:11:23 +0000418 proxy_auth = base64.b64encode(proxy_passwd).strip()
Martin v. Löwis3e865952006-01-24 15:51:21 +0000419 else:
420 proxy_auth = None
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000421 if user_passwd:
422 import base64
Andrew M. Kuchling872dba42006-10-27 17:11:23 +0000423 auth = base64.b64encode(user_passwd).strip()
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000424 else:
425 auth = None
426 h = httplib.HTTPS(host, 0,
427 key_file=self.key_file,
428 cert_file=self.cert_file)
Andrew M. Kuchling141e9892000-04-23 02:53:11 +0000429 if data is not None:
430 h.putrequest('POST', selector)
Georg Brandl0619a322006-07-26 07:40:17 +0000431 h.putheader('Content-Type',
Andrew M. Kuchling141e9892000-04-23 02:53:11 +0000432 'application/x-www-form-urlencoded')
Georg Brandl0619a322006-07-26 07:40:17 +0000433 h.putheader('Content-Length', '%d' % len(data))
Andrew M. Kuchling141e9892000-04-23 02:53:11 +0000434 else:
435 h.putrequest('GET', selector)
Andrew M. Kuchling52278572006-12-19 15:11:41 +0000436 if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
437 if auth: h.putheader('Authorization', 'Basic %s' % auth)
Fred Drake567ca8e2000-08-21 21:42:42 +0000438 if realhost: h.putheader('Host', realhost)
Guido van Rossum68468eb2003-02-27 20:14:51 +0000439 for args in self.addheaders: h.putheader(*args)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000440 h.endheaders()
Andrew M. Kuchling43c5af02000-04-24 14:17:06 +0000441 if data is not None:
Fred Drakeec3dfde2001-07-04 05:18:29 +0000442 h.send(data)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000443 errcode, errmsg, headers = h.getreply()
Neal Norwitzce55e212007-03-20 08:14:57 +0000444 fp = h.getfile()
Georg Brandlf66b6032007-03-14 08:27:52 +0000445 if errcode == -1:
Neal Norwitzce55e212007-03-20 08:14:57 +0000446 if fp: fp.close()
Georg Brandlf66b6032007-03-14 08:27:52 +0000447 # something went wrong with the HTTP status line
448 raise IOError, ('http protocol error', 0,
449 'got a bad status line', None)
Georg Brandl9b915672007-09-24 18:08:24 +0000450 # According to RFC 2616, "2xx" code indicates that the client's
451 # request was successfully received, understood, and accepted.
Kurt B. Kaiser0f7c25d2008-01-02 04:11:28 +0000452 if (200 <= errcode < 300):
Georg Brandl9b0d46d2008-01-20 11:43:03 +0000453 return addinfourl(fp, headers, "https:" + url, errcode)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000454 else:
Fred Drake567ca8e2000-08-21 21:42:42 +0000455 if data is None:
456 return self.http_error(url, fp, errcode, errmsg, headers)
457 else:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000458 return self.http_error(url, fp, errcode, errmsg, headers,
459 data)
Fred Drake567ca8e2000-08-21 21:42:42 +0000460
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000461 def open_file(self, url):
Neal Norwitzc5d0dbd2006-04-09 04:00:49 +0000462 """Use local file or FTP depending on form of URL."""
Martin v. Löwis3e865952006-01-24 15:51:21 +0000463 if not isinstance(url, str):
464 raise IOError, ('file error', 'proxy support for file protocol currently not implemented')
Jack Jansen4ef11032002-09-12 20:14:04 +0000465 if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000466 return self.open_ftp(url)
467 else:
468 return self.open_local_file(url)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000469
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000470 def open_local_file(self, url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000471 """Use local file."""
Georg Brandl5a096e12007-01-22 19:40:21 +0000472 import mimetypes, mimetools, email.utils
Raymond Hettingera6172712004-12-31 19:15:26 +0000473 try:
474 from cStringIO import StringIO
475 except ImportError:
476 from StringIO import StringIO
Guido van Rossumf0713d32001-08-09 17:43:35 +0000477 host, file = splithost(url)
478 localname = url2pathname(file)
Guido van Rossuma2da3052002-04-15 00:25:01 +0000479 try:
480 stats = os.stat(localname)
481 except OSError, e:
482 raise IOError(e.errno, e.strerror, e.filename)
Walter Dörwald92b48b72002-03-22 17:30:38 +0000483 size = stats.st_size
Georg Brandl5a096e12007-01-22 19:40:21 +0000484 modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000485 mtype = mimetypes.guess_type(url)[0]
Raymond Hettingera6172712004-12-31 19:15:26 +0000486 headers = mimetools.Message(StringIO(
Guido van Rossumf0713d32001-08-09 17:43:35 +0000487 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
488 (mtype or 'text/plain', size, modified)))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000489 if not host:
Guido van Rossum336a2011999-06-24 15:27:36 +0000490 urlfile = file
491 if file[:1] == '/':
492 urlfile = 'file://' + file
Guido van Rossumf0713d32001-08-09 17:43:35 +0000493 return addinfourl(open(localname, 'rb'),
Guido van Rossum336a2011999-06-24 15:27:36 +0000494 headers, urlfile)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000495 host, port = splitport(host)
496 if not port \
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000497 and socket.gethostbyname(host) in (localhost(), thishost()):
Guido van Rossum336a2011999-06-24 15:27:36 +0000498 urlfile = file
499 if file[:1] == '/':
500 urlfile = 'file://' + file
Guido van Rossumf0713d32001-08-09 17:43:35 +0000501 return addinfourl(open(localname, 'rb'),
Guido van Rossum336a2011999-06-24 15:27:36 +0000502 headers, urlfile)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000503 raise IOError, ('local file error', 'not on local host')
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000504
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000505 def open_ftp(self, url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000506 """Use FTP protocol."""
Martin v. Löwis3e865952006-01-24 15:51:21 +0000507 if not isinstance(url, str):
508 raise IOError, ('ftp error', 'proxy support for ftp protocol currently not implemented')
Raymond Hettingera6172712004-12-31 19:15:26 +0000509 import mimetypes, mimetools
510 try:
511 from cStringIO import StringIO
512 except ImportError:
513 from StringIO import StringIO
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000514 host, path = splithost(url)
515 if not host: raise IOError, ('ftp error', 'no host given')
516 host, port = splitport(host)
517 user, host = splituser(host)
518 if user: user, passwd = splitpasswd(user)
519 else: passwd = None
520 host = unquote(host)
521 user = unquote(user or '')
522 passwd = unquote(passwd or '')
523 host = socket.gethostbyname(host)
524 if not port:
525 import ftplib
526 port = ftplib.FTP_PORT
527 else:
528 port = int(port)
529 path, attrs = splitattr(path)
530 path = unquote(path)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000531 dirs = path.split('/')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000532 dirs, file = dirs[:-1], dirs[-1]
533 if dirs and not dirs[0]: dirs = dirs[1:]
Guido van Rossum5e006a31999-08-18 17:40:33 +0000534 if dirs and not dirs[0]: dirs[0] = '/'
Guido van Rossumb2493f82000-12-15 15:01:37 +0000535 key = user, host, port, '/'.join(dirs)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000536 # XXX thread unsafe!
537 if len(self.ftpcache) > MAXFTPCACHE:
538 # Prune the cache, rather arbitrarily
539 for k in self.ftpcache.keys():
540 if k != key:
541 v = self.ftpcache[k]
542 del self.ftpcache[k]
543 v.close()
544 try:
Raymond Hettinger54f02222002-06-01 14:18:47 +0000545 if not key in self.ftpcache:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000546 self.ftpcache[key] = \
547 ftpwrapper(user, passwd, host, port, dirs)
548 if not file: type = 'D'
549 else: type = 'I'
550 for attr in attrs:
551 attr, value = splitvalue(attr)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000552 if attr.lower() == 'type' and \
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000553 value in ('a', 'A', 'i', 'I', 'd', 'D'):
Guido van Rossumb2493f82000-12-15 15:01:37 +0000554 type = value.upper()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000555 (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
Guido van Rossum88e0b5b2001-08-23 13:38:15 +0000556 mtype = mimetypes.guess_type("ftp:" + url)[0]
557 headers = ""
558 if mtype:
559 headers += "Content-Type: %s\n" % mtype
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000560 if retrlen is not None and retrlen >= 0:
Guido van Rossum88e0b5b2001-08-23 13:38:15 +0000561 headers += "Content-Length: %d\n" % retrlen
Raymond Hettingera6172712004-12-31 19:15:26 +0000562 headers = mimetools.Message(StringIO(headers))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000563 return addinfourl(fp, headers, "ftp:" + url)
564 except ftperrors(), msg:
565 raise IOError, ('ftp error', msg), sys.exc_info()[2]
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000566
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000567 def open_data(self, url, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000568 """Use "data" URL."""
Martin v. Löwis3e865952006-01-24 15:51:21 +0000569 if not isinstance(url, str):
570 raise IOError, ('data error', 'proxy support for data protocol currently not implemented')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000571 # ignore POSTed data
572 #
573 # syntax of data URLs:
574 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
575 # mediatype := [ type "/" subtype ] *( ";" parameter )
576 # data := *urlchar
577 # parameter := attribute "=" value
Raymond Hettingera6172712004-12-31 19:15:26 +0000578 import mimetools
579 try:
580 from cStringIO import StringIO
581 except ImportError:
582 from StringIO import StringIO
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000583 try:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000584 [type, data] = url.split(',', 1)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000585 except ValueError:
586 raise IOError, ('data error', 'bad data URL')
587 if not type:
588 type = 'text/plain;charset=US-ASCII'
Guido van Rossumb2493f82000-12-15 15:01:37 +0000589 semi = type.rfind(';')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000590 if semi >= 0 and '=' not in type[semi:]:
591 encoding = type[semi+1:]
592 type = type[:semi]
593 else:
594 encoding = ''
595 msg = []
596 msg.append('Date: %s'%time.strftime('%a, %d %b %Y %T GMT',
597 time.gmtime(time.time())))
598 msg.append('Content-type: %s' % type)
599 if encoding == 'base64':
600 import base64
601 data = base64.decodestring(data)
602 else:
603 data = unquote(data)
Georg Brandl0619a322006-07-26 07:40:17 +0000604 msg.append('Content-Length: %d' % len(data))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000605 msg.append('')
606 msg.append(data)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000607 msg = '\n'.join(msg)
Raymond Hettingera6172712004-12-31 19:15:26 +0000608 f = StringIO(msg)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000609 headers = mimetools.Message(f, 0)
Georg Brandl1f663572005-11-26 16:50:44 +0000610 #f.fileno = None # needed for addinfourl
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000611 return addinfourl(f, headers, url)
Guido van Rossum6d4d1c21998-03-12 14:32:55 +0000612
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000613
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000614class FancyURLopener(URLopener):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000615 """Derived class with handlers for errors we can handle (perhaps)."""
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000616
Neal Norwitz60e04cd2002-06-11 13:38:51 +0000617 def __init__(self, *args, **kwargs):
Guido van Rossum68468eb2003-02-27 20:14:51 +0000618 URLopener.__init__(self, *args, **kwargs)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000619 self.auth_cache = {}
Skip Montanaroc3e11d62001-02-15 16:56:36 +0000620 self.tries = 0
621 self.maxtries = 10
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000622
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000623 def http_error_default(self, url, fp, errcode, errmsg, headers):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000624 """Default error handling -- don't raise an exception."""
Georg Brandl9b0d46d2008-01-20 11:43:03 +0000625 return addinfourl(fp, headers, "http:" + url, errcode)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000626
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000627 def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000628 """Error 302 -- relocated (temporarily)."""
Skip Montanaroc3e11d62001-02-15 16:56:36 +0000629 self.tries += 1
630 if self.maxtries and self.tries >= self.maxtries:
631 if hasattr(self, "http_error_500"):
632 meth = self.http_error_500
633 else:
634 meth = self.http_error_default
635 self.tries = 0
636 return meth(url, fp, 500,
637 "Internal Server Error: Redirect Recursion", headers)
638 result = self.redirect_internal(url, fp, errcode, errmsg, headers,
639 data)
640 self.tries = 0
641 return result
642
643 def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
Raymond Hettinger54f02222002-06-01 14:18:47 +0000644 if 'location' in headers:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000645 newurl = headers['location']
Raymond Hettinger54f02222002-06-01 14:18:47 +0000646 elif 'uri' in headers:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000647 newurl = headers['uri']
648 else:
649 return
650 void = fp.read()
651 fp.close()
Guido van Rossum3527f591999-03-29 20:23:41 +0000652 # In case the server sent a relative URL, join with original:
Moshe Zadka5d87d472001-04-09 14:54:21 +0000653 newurl = basejoin(self.type + ":" + url, newurl)
Guido van Rossumfa19f7c2003-05-16 01:46:51 +0000654 return self.open(newurl)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000655
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000656 def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000657 """Error 301 -- also relocated (permanently)."""
658 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
Guido van Rossume6ad8911996-09-10 17:02:56 +0000659
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000660 def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
661 """Error 303 -- also relocated (essentially identical to 302)."""
662 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
663
Guido van Rossumfa19f7c2003-05-16 01:46:51 +0000664 def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
665 """Error 307 -- relocated, but turn POST into error."""
666 if data is None:
667 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
668 else:
669 return self.http_error_default(url, fp, errcode, errmsg, headers)
670
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000671 def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000672 """Error 401 -- authentication required.
Martin v. Löwis3e865952006-01-24 15:51:21 +0000673 This function supports Basic authentication only."""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000674 if not 'www-authenticate' in headers:
Tim Peters85ba6732001-02-28 08:26:44 +0000675 URLopener.http_error_default(self, url, fp,
Fred Drakec680ae82001-10-13 18:37:07 +0000676 errcode, errmsg, headers)
Moshe Zadkae99bd172001-02-27 06:27:04 +0000677 stuff = headers['www-authenticate']
678 import re
679 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
680 if not match:
Tim Peters85ba6732001-02-28 08:26:44 +0000681 URLopener.http_error_default(self, url, fp,
Moshe Zadkae99bd172001-02-27 06:27:04 +0000682 errcode, errmsg, headers)
683 scheme, realm = match.groups()
684 if scheme.lower() != 'basic':
Tim Peters85ba6732001-02-28 08:26:44 +0000685 URLopener.http_error_default(self, url, fp,
Moshe Zadkae99bd172001-02-27 06:27:04 +0000686 errcode, errmsg, headers)
687 name = 'retry_' + self.type + '_basic_auth'
688 if data is None:
689 return getattr(self,name)(url, realm)
690 else:
691 return getattr(self,name)(url, realm, data)
Tim Peters92037a12006-01-24 22:44:08 +0000692
Martin v. Löwis3e865952006-01-24 15:51:21 +0000693 def http_error_407(self, url, fp, errcode, errmsg, headers, data=None):
694 """Error 407 -- proxy authentication required.
695 This function supports Basic authentication only."""
696 if not 'proxy-authenticate' in headers:
697 URLopener.http_error_default(self, url, fp,
698 errcode, errmsg, headers)
699 stuff = headers['proxy-authenticate']
700 import re
701 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
702 if not match:
703 URLopener.http_error_default(self, url, fp,
704 errcode, errmsg, headers)
705 scheme, realm = match.groups()
706 if scheme.lower() != 'basic':
707 URLopener.http_error_default(self, url, fp,
708 errcode, errmsg, headers)
709 name = 'retry_proxy_' + self.type + '_basic_auth'
710 if data is None:
711 return getattr(self,name)(url, realm)
712 else:
713 return getattr(self,name)(url, realm, data)
Tim Peters92037a12006-01-24 22:44:08 +0000714
Martin v. Löwis3e865952006-01-24 15:51:21 +0000715 def retry_proxy_http_basic_auth(self, url, realm, data=None):
716 host, selector = splithost(url)
717 newurl = 'http://' + host + selector
718 proxy = self.proxies['http']
719 urltype, proxyhost = splittype(proxy)
720 proxyhost, proxyselector = splithost(proxyhost)
721 i = proxyhost.find('@') + 1
722 proxyhost = proxyhost[i:]
723 user, passwd = self.get_user_passwd(proxyhost, realm, i)
724 if not (user or passwd): return None
725 proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
726 self.proxies['http'] = 'http://' + proxyhost + proxyselector
727 if data is None:
728 return self.open(newurl)
729 else:
730 return self.open(newurl, data)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000731
Martin v. Löwis3e865952006-01-24 15:51:21 +0000732 def retry_proxy_https_basic_auth(self, url, realm, data=None):
733 host, selector = splithost(url)
734 newurl = 'https://' + host + selector
735 proxy = self.proxies['https']
736 urltype, proxyhost = splittype(proxy)
737 proxyhost, proxyselector = splithost(proxyhost)
738 i = proxyhost.find('@') + 1
739 proxyhost = proxyhost[i:]
740 user, passwd = self.get_user_passwd(proxyhost, realm, i)
741 if not (user or passwd): return None
742 proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
743 self.proxies['https'] = 'https://' + proxyhost + proxyselector
744 if data is None:
745 return self.open(newurl)
746 else:
747 return self.open(newurl, data)
Tim Peters92037a12006-01-24 22:44:08 +0000748
Guido van Rossum3c8baed2000-02-01 23:36:55 +0000749 def retry_http_basic_auth(self, url, realm, data=None):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000750 host, selector = splithost(url)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000751 i = host.find('@') + 1
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000752 host = host[i:]
753 user, passwd = self.get_user_passwd(host, realm, i)
754 if not (user or passwd): return None
Guido van Rossumafc4f042001-01-15 18:31:13 +0000755 host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000756 newurl = 'http://' + host + selector
Guido van Rossum3c8baed2000-02-01 23:36:55 +0000757 if data is None:
758 return self.open(newurl)
759 else:
760 return self.open(newurl, data)
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000761
Guido van Rossum3c8baed2000-02-01 23:36:55 +0000762 def retry_https_basic_auth(self, url, realm, data=None):
Tim Peterse1190062001-01-15 03:34:38 +0000763 host, selector = splithost(url)
764 i = host.find('@') + 1
765 host = host[i:]
766 user, passwd = self.get_user_passwd(host, realm, i)
767 if not (user or passwd): return None
Guido van Rossumafc4f042001-01-15 18:31:13 +0000768 host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
Martin v. Löwis3e865952006-01-24 15:51:21 +0000769 newurl = 'https://' + host + selector
770 if data is None:
771 return self.open(newurl)
772 else:
773 return self.open(newurl, data)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000774
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000775 def get_user_passwd(self, host, realm, clear_cache = 0):
Guido van Rossumb2493f82000-12-15 15:01:37 +0000776 key = realm + '@' + host.lower()
Raymond Hettinger54f02222002-06-01 14:18:47 +0000777 if key in self.auth_cache:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000778 if clear_cache:
779 del self.auth_cache[key]
780 else:
781 return self.auth_cache[key]
782 user, passwd = self.prompt_user_passwd(host, realm)
783 if user or passwd: self.auth_cache[key] = (user, passwd)
784 return user, passwd
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000785
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000786 def prompt_user_passwd(self, host, realm):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000787 """Override this in a GUI environment!"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000788 import getpass
789 try:
790 user = raw_input("Enter username for %s at %s: " % (realm,
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000791 host))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000792 passwd = getpass.getpass("Enter password for %s in %s at %s: " %
793 (user, realm, host))
794 return user, passwd
795 except KeyboardInterrupt:
796 print
797 return None, None
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000798
799
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000800# Utility functions
801
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000802_localhost = None
803def localhost():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000804 """Return the IP address of the magic hostname 'localhost'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000805 global _localhost
Raymond Hettinger10ff7062002-06-02 03:04:52 +0000806 if _localhost is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000807 _localhost = socket.gethostbyname('localhost')
808 return _localhost
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000809
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000810_thishost = None
811def thishost():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000812 """Return the IP address of the current host."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000813 global _thishost
Raymond Hettinger10ff7062002-06-02 03:04:52 +0000814 if _thishost is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000815 _thishost = socket.gethostbyname(socket.gethostname())
816 return _thishost
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000817
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000818_ftperrors = None
819def ftperrors():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000820 """Return the set of errors raised by the FTP class."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000821 global _ftperrors
Raymond Hettinger10ff7062002-06-02 03:04:52 +0000822 if _ftperrors is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000823 import ftplib
824 _ftperrors = ftplib.all_errors
825 return _ftperrors
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000826
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000827_noheaders = None
828def noheaders():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000829 """Return an empty mimetools.Message object."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000830 global _noheaders
Raymond Hettinger10ff7062002-06-02 03:04:52 +0000831 if _noheaders is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000832 import mimetools
Raymond Hettingera6172712004-12-31 19:15:26 +0000833 try:
834 from cStringIO import StringIO
835 except ImportError:
836 from StringIO import StringIO
837 _noheaders = mimetools.Message(StringIO(), 0)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000838 _noheaders.fp.close() # Recycle file descriptor
839 return _noheaders
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000840
841
842# Utility classes
843
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000844class ftpwrapper:
Guido van Rossume7b146f2000-02-04 15:28:42 +0000845 """Class used by open_ftp() for cache of open FTP connections."""
846
Facundo Batista4f1b1ed2008-05-29 16:39:26 +0000847 def __init__(self, user, passwd, host, port, dirs,
848 timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000849 self.user = user
850 self.passwd = passwd
851 self.host = host
852 self.port = port
853 self.dirs = dirs
Facundo Batista711a54e2007-05-24 17:50:54 +0000854 self.timeout = timeout
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000855 self.init()
Guido van Rossume7b146f2000-02-04 15:28:42 +0000856
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000857 def init(self):
858 import ftplib
859 self.busy = 0
860 self.ftp = ftplib.FTP()
Facundo Batista711a54e2007-05-24 17:50:54 +0000861 self.ftp.connect(self.host, self.port, self.timeout)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000862 self.ftp.login(self.user, self.passwd)
863 for dir in self.dirs:
864 self.ftp.cwd(dir)
Guido van Rossume7b146f2000-02-04 15:28:42 +0000865
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000866 def retrfile(self, file, type):
867 import ftplib
868 self.endtransfer()
869 if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
870 else: cmd = 'TYPE ' + type; isdir = 0
871 try:
872 self.ftp.voidcmd(cmd)
873 except ftplib.all_errors:
874 self.init()
875 self.ftp.voidcmd(cmd)
876 conn = None
877 if file and not isdir:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000878 # Try to retrieve as a file
879 try:
880 cmd = 'RETR ' + file
881 conn = self.ftp.ntransfercmd(cmd)
882 except ftplib.error_perm, reason:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000883 if str(reason)[:3] != '550':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000884 raise IOError, ('ftp error', reason), sys.exc_info()[2]
885 if not conn:
886 # Set transfer mode to ASCII!
887 self.ftp.voidcmd('TYPE A')
Georg Brandld5e6cf22008-01-20 12:18:17 +0000888 # Try a directory listing. Verify that directory exists.
889 if file:
890 pwd = self.ftp.pwd()
891 try:
892 try:
893 self.ftp.cwd(file)
894 except ftplib.error_perm, reason:
895 raise IOError, ('ftp error', reason), sys.exc_info()[2]
896 finally:
897 self.ftp.cwd(pwd)
898 cmd = 'LIST ' + file
899 else:
900 cmd = 'LIST'
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000901 conn = self.ftp.ntransfercmd(cmd)
902 self.busy = 1
903 # Pass back both a suitably decorated object and a retrieval length
904 return (addclosehook(conn[0].makefile('rb'),
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000905 self.endtransfer), conn[1])
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000906 def endtransfer(self):
907 if not self.busy:
908 return
909 self.busy = 0
910 try:
911 self.ftp.voidresp()
912 except ftperrors():
913 pass
Guido van Rossume7b146f2000-02-04 15:28:42 +0000914
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000915 def close(self):
916 self.endtransfer()
917 try:
918 self.ftp.close()
919 except ftperrors():
920 pass
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000921
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000922class addbase:
Guido van Rossume7b146f2000-02-04 15:28:42 +0000923 """Base class for addinfo and addclosehook."""
924
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000925 def __init__(self, fp):
926 self.fp = fp
927 self.read = self.fp.read
928 self.readline = self.fp.readline
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000929 if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines
Georg Brandl1f663572005-11-26 16:50:44 +0000930 if hasattr(self.fp, "fileno"):
931 self.fileno = self.fp.fileno
932 else:
933 self.fileno = lambda: None
Raymond Hettinger42182eb2003-03-09 05:33:33 +0000934 if hasattr(self.fp, "__iter__"):
935 self.__iter__ = self.fp.__iter__
936 if hasattr(self.fp, "next"):
937 self.next = self.fp.next
Guido van Rossume7b146f2000-02-04 15:28:42 +0000938
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000939 def __repr__(self):
Walter Dörwald70a6b492004-02-12 17:35:32 +0000940 return '<%s at %r whose fp = %r>' % (self.__class__.__name__,
941 id(self), self.fp)
Guido van Rossume7b146f2000-02-04 15:28:42 +0000942
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000943 def close(self):
944 self.read = None
945 self.readline = None
946 self.readlines = None
947 self.fileno = None
948 if self.fp: self.fp.close()
949 self.fp = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000950
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000951class addclosehook(addbase):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000952 """Class to add a close hook to an open file."""
953
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000954 def __init__(self, fp, closehook, *hookargs):
955 addbase.__init__(self, fp)
956 self.closehook = closehook
957 self.hookargs = hookargs
Guido van Rossume7b146f2000-02-04 15:28:42 +0000958
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000959 def close(self):
Guido van Rossumc580dae2000-05-24 13:21:46 +0000960 addbase.close(self)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000961 if self.closehook:
Guido van Rossum68468eb2003-02-27 20:14:51 +0000962 self.closehook(*self.hookargs)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000963 self.closehook = None
964 self.hookargs = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000965
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000966class addinfo(addbase):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000967 """class to add an info() method to an open file."""
968
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000969 def __init__(self, fp, headers):
970 addbase.__init__(self, fp)
971 self.headers = headers
Guido van Rossume7b146f2000-02-04 15:28:42 +0000972
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000973 def info(self):
974 return self.headers
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000975
Guido van Rossume6ad8911996-09-10 17:02:56 +0000976class addinfourl(addbase):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000977 """class to add info() and geturl() methods to an open file."""
978
Georg Brandl9b0d46d2008-01-20 11:43:03 +0000979 def __init__(self, fp, headers, url, code=None):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000980 addbase.__init__(self, fp)
981 self.headers = headers
982 self.url = url
Georg Brandl9b0d46d2008-01-20 11:43:03 +0000983 self.code = code
Guido van Rossume7b146f2000-02-04 15:28:42 +0000984
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000985 def info(self):
986 return self.headers
Guido van Rossume7b146f2000-02-04 15:28:42 +0000987
Georg Brandl9b0d46d2008-01-20 11:43:03 +0000988 def getcode(self):
989 return self.code
990
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000991 def geturl(self):
992 return self.url
Guido van Rossume6ad8911996-09-10 17:02:56 +0000993
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000994
Guido van Rossum7c395db1994-07-04 22:14:49 +0000995# Utilities to parse URLs (most of these return None for missing parts):
Sjoerd Mullendere0371b81995-11-10 10:36:07 +0000996# unwrap('<URL:type://host/path>') --> 'type://host/path'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000997# splittype('type:opaquestring') --> 'type', 'opaquestring'
998# splithost('//host[:port]/path') --> 'host[:port]', '/path'
Guido van Rossum7c395db1994-07-04 22:14:49 +0000999# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'
1000# splitpasswd('user:passwd') -> 'user', 'passwd'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001001# splitport('host:port') --> 'host', 'port'
1002# splitquery('/path?query') --> '/path', 'query'
1003# splittag('/path#tag') --> '/path', 'tag'
Guido van Rossum7c395db1994-07-04 22:14:49 +00001004# splitattr('/path;attr1=value1;attr2=value2;...') ->
1005# '/path', ['attr1=value1', 'attr2=value2', ...]
1006# splitvalue('attr=value') --> 'attr', 'value'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001007# unquote('abc%20def') -> 'abc def'
1008# quote('abc def') -> 'abc%20def')
1009
Walter Dörwald65230a22002-06-03 15:58:32 +00001010try:
1011 unicode
1012except NameError:
Guido van Rossum4b46c0a2002-05-24 17:58:05 +00001013 def _is_unicode(x):
1014 return 0
Walter Dörwald65230a22002-06-03 15:58:32 +00001015else:
1016 def _is_unicode(x):
1017 return isinstance(x, unicode)
Guido van Rossum4b46c0a2002-05-24 17:58:05 +00001018
Martin v. Löwis1d994332000-12-03 18:30:10 +00001019def toBytes(url):
1020 """toBytes(u"URL") --> 'URL'."""
1021 # Most URL schemes require ASCII. If that changes, the conversion
1022 # can be relaxed
Guido van Rossum4b46c0a2002-05-24 17:58:05 +00001023 if _is_unicode(url):
Martin v. Löwis1d994332000-12-03 18:30:10 +00001024 try:
1025 url = url.encode("ASCII")
1026 except UnicodeError:
Guido van Rossumb2493f82000-12-15 15:01:37 +00001027 raise UnicodeError("URL " + repr(url) +
1028 " contains non-ASCII characters")
Martin v. Löwis1d994332000-12-03 18:30:10 +00001029 return url
1030
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001031def unwrap(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001032 """unwrap('<URL:type://host/path>') --> 'type://host/path'."""
Guido van Rossumb2493f82000-12-15 15:01:37 +00001033 url = url.strip()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001034 if url[:1] == '<' and url[-1:] == '>':
Guido van Rossumb2493f82000-12-15 15:01:37 +00001035 url = url[1:-1].strip()
1036 if url[:4] == 'URL:': url = url[4:].strip()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001037 return url
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001038
Guido van Rossum332e1441997-09-29 23:23:46 +00001039_typeprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001040def splittype(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001041 """splittype('type:opaquestring') --> 'type', 'opaquestring'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001042 global _typeprog
1043 if _typeprog is None:
1044 import re
1045 _typeprog = re.compile('^([^/:]+):')
Guido van Rossum332e1441997-09-29 23:23:46 +00001046
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001047 match = _typeprog.match(url)
1048 if match:
1049 scheme = match.group(1)
Fred Drake9e94afd2000-07-01 07:03:30 +00001050 return scheme.lower(), url[len(scheme) + 1:]
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001051 return None, url
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001052
Guido van Rossum332e1441997-09-29 23:23:46 +00001053_hostprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001054def splithost(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001055 """splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001056 global _hostprog
1057 if _hostprog is None:
1058 import re
Georg Brandl1c168d82006-03-26 20:59:38 +00001059 _hostprog = re.compile('^//([^/?]*)(.*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +00001060
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001061 match = _hostprog.match(url)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001062 if match: return match.group(1, 2)
1063 return None, url
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001064
Guido van Rossum332e1441997-09-29 23:23:46 +00001065_userprog = None
Guido van Rossum7c395db1994-07-04 22:14:49 +00001066def splituser(host):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001067 """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001068 global _userprog
1069 if _userprog is None:
1070 import re
Raymond Hettingerf2e45dd2002-08-18 20:08:56 +00001071 _userprog = re.compile('^(.*)@(.*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +00001072
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001073 match = _userprog.match(host)
Fred Drake567ca8e2000-08-21 21:42:42 +00001074 if match: return map(unquote, match.group(1, 2))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001075 return None, host
Guido van Rossum7c395db1994-07-04 22:14:49 +00001076
Guido van Rossum332e1441997-09-29 23:23:46 +00001077_passwdprog = None
Guido van Rossum7c395db1994-07-04 22:14:49 +00001078def splitpasswd(user):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001079 """splitpasswd('user:passwd') -> 'user', 'passwd'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001080 global _passwdprog
1081 if _passwdprog is None:
1082 import re
1083 _passwdprog = re.compile('^([^:]*):(.*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +00001084
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001085 match = _passwdprog.match(user)
1086 if match: return match.group(1, 2)
1087 return user, None
Guido van Rossum7c395db1994-07-04 22:14:49 +00001088
Guido van Rossume7b146f2000-02-04 15:28:42 +00001089# splittag('/path#tag') --> '/path', 'tag'
Guido van Rossum332e1441997-09-29 23:23:46 +00001090_portprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001091def splitport(host):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001092 """splitport('host:port') --> 'host', 'port'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001093 global _portprog
1094 if _portprog is None:
1095 import re
1096 _portprog = re.compile('^(.*):([0-9]+)$')
Guido van Rossum332e1441997-09-29 23:23:46 +00001097
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001098 match = _portprog.match(host)
1099 if match: return match.group(1, 2)
1100 return host, None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001101
Guido van Rossum332e1441997-09-29 23:23:46 +00001102_nportprog = None
Guido van Rossum53725a21996-06-13 19:12:35 +00001103def splitnport(host, defport=-1):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001104 """Split host and port, returning numeric port.
1105 Return given default port if no ':' found; defaults to -1.
1106 Return numerical port if a valid number are found after ':'.
1107 Return None if ':' but not a valid number."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001108 global _nportprog
1109 if _nportprog is None:
1110 import re
1111 _nportprog = re.compile('^(.*):(.*)$')
Guido van Rossum7e7ca0b1998-03-26 21:01:39 +00001112
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001113 match = _nportprog.match(host)
1114 if match:
1115 host, port = match.group(1, 2)
1116 try:
Guido van Rossumb2493f82000-12-15 15:01:37 +00001117 if not port: raise ValueError, "no digits"
1118 nport = int(port)
1119 except ValueError:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001120 nport = None
1121 return host, nport
1122 return host, defport
Guido van Rossum53725a21996-06-13 19:12:35 +00001123
Guido van Rossum332e1441997-09-29 23:23:46 +00001124_queryprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001125def splitquery(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001126 """splitquery('/path?query') --> '/path', 'query'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001127 global _queryprog
1128 if _queryprog is None:
1129 import re
1130 _queryprog = re.compile('^(.*)\?([^?]*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +00001131
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001132 match = _queryprog.match(url)
1133 if match: return match.group(1, 2)
1134 return url, None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001135
Guido van Rossum332e1441997-09-29 23:23:46 +00001136_tagprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001137def splittag(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001138 """splittag('/path#tag') --> '/path', 'tag'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001139 global _tagprog
1140 if _tagprog is None:
1141 import re
1142 _tagprog = re.compile('^(.*)#([^#]*)$')
Guido van Rossum7e7ca0b1998-03-26 21:01:39 +00001143
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001144 match = _tagprog.match(url)
1145 if match: return match.group(1, 2)
1146 return url, None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001147
Guido van Rossum7c395db1994-07-04 22:14:49 +00001148def splitattr(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001149 """splitattr('/path;attr1=value1;attr2=value2;...') ->
1150 '/path', ['attr1=value1', 'attr2=value2', ...]."""
Guido van Rossumb2493f82000-12-15 15:01:37 +00001151 words = url.split(';')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001152 return words[0], words[1:]
Guido van Rossum7c395db1994-07-04 22:14:49 +00001153
Guido van Rossum332e1441997-09-29 23:23:46 +00001154_valueprog = None
Guido van Rossum7c395db1994-07-04 22:14:49 +00001155def splitvalue(attr):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001156 """splitvalue('attr=value') --> 'attr', 'value'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001157 global _valueprog
1158 if _valueprog is None:
1159 import re
1160 _valueprog = re.compile('^([^=]*)=(.*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +00001161
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001162 match = _valueprog.match(attr)
1163 if match: return match.group(1, 2)
1164 return attr, None
Guido van Rossum7c395db1994-07-04 22:14:49 +00001165
Raymond Hettinger803ce802005-09-10 06:49:04 +00001166_hextochr = dict(('%02x' % i, chr(i)) for i in range(256))
1167_hextochr.update(('%02X' % i, chr(i)) for i in range(256))
1168
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001169def unquote(s):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001170 """unquote('abc%20def') -> 'abc def'."""
Raymond Hettinger803ce802005-09-10 06:49:04 +00001171 res = s.split('%')
1172 for i in xrange(1, len(res)):
1173 item = res[i]
1174 try:
1175 res[i] = _hextochr[item[:2]] + item[2:]
1176 except KeyError:
1177 res[i] = '%' + item
Raymond Hettinger4b0f20d2005-10-15 16:41:53 +00001178 except UnicodeDecodeError:
1179 res[i] = unichr(int(item[:2], 16)) + item[2:]
Guido van Rossumb2493f82000-12-15 15:01:37 +00001180 return "".join(res)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001181
Guido van Rossum0564e121996-12-13 14:47:36 +00001182def unquote_plus(s):
Skip Montanaro79f1c172000-08-22 03:00:52 +00001183 """unquote('%7e/abc+def') -> '~/abc def'"""
Brett Cannonaaeffaf2004-03-23 23:50:17 +00001184 s = s.replace('+', ' ')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001185 return unquote(s)
Guido van Rossum0564e121996-12-13 14:47:36 +00001186
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001187always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
Jeremy Hylton6102e292000-08-31 15:48:10 +00001188 'abcdefghijklmnopqrstuvwxyz'
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001189 '0123456789' '_.-')
Raymond Hettinger199d2f72005-09-09 22:27:13 +00001190_safemaps = {}
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001191
Guido van Rossum7c395db1994-07-04 22:14:49 +00001192def quote(s, safe = '/'):
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001193 """quote('abc def') -> 'abc%20def'
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001194
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001195 Each part of a URL, e.g. the path info, the query, etc., has a
1196 different set of reserved characters that must be quoted.
1197
1198 RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
1199 the following reserved characters.
1200
1201 reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
1202 "$" | ","
1203
1204 Each of these characters is reserved in some component of a URL,
1205 but not necessarily in all of them.
1206
1207 By default, the quote function is intended for quoting the path
1208 section of a URL. Thus, it will not encode '/'. This character
1209 is reserved, but in typical usage the quote function is being
1210 called on a path where the existing slash characters are used as
1211 reserved characters.
1212 """
Raymond Hettinger199d2f72005-09-09 22:27:13 +00001213 cachekey = (safe, always_safe)
1214 try:
1215 safe_map = _safemaps[cachekey]
1216 except KeyError:
1217 safe += always_safe
1218 safe_map = {}
1219 for i in range(256):
1220 c = chr(i)
1221 safe_map[c] = (c in safe) and c or ('%%%02X' % i)
1222 _safemaps[cachekey] = safe_map
1223 res = map(safe_map.__getitem__, s)
Guido van Rossumb2493f82000-12-15 15:01:37 +00001224 return ''.join(res)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001225
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001226def quote_plus(s, safe = ''):
1227 """Quote the query fragment of a URL; replacing ' ' with '+'"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001228 if ' ' in s:
Raymond Hettingercf6b6322005-09-10 18:17:54 +00001229 s = quote(s, safe + ' ')
1230 return s.replace(' ', '+')
1231 return quote(s, safe)
Guido van Rossum0564e121996-12-13 14:47:36 +00001232
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001233def urlencode(query,doseq=0):
1234 """Encode a sequence of two-element tuples or dictionary into a URL query string.
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001235
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001236 If any values in the query arg are sequences and doseq is true, each
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001237 sequence element is converted to a separate parameter.
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001238
1239 If the query arg is a sequence of two-element tuples, the order of the
1240 parameters in the output will match the order of parameters in the
1241 input.
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001242 """
Tim Peters658cba62001-02-09 20:06:00 +00001243
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001244 if hasattr(query,"items"):
1245 # mapping objects
1246 query = query.items()
1247 else:
1248 # it's a bother at times that strings and string-like objects are
1249 # sequences...
1250 try:
1251 # non-sequence items should not work with len()
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001252 # non-empty strings will fail this
Walter Dörwald65230a22002-06-03 15:58:32 +00001253 if len(query) and not isinstance(query[0], tuple):
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001254 raise TypeError
1255 # zero-length sequences of all types will get here and succeed,
1256 # but that's a minor nit - since the original implementation
1257 # allowed empty dicts that type of behavior probably should be
1258 # preserved for consistency
1259 except TypeError:
1260 ty,va,tb = sys.exc_info()
1261 raise TypeError, "not a valid non-string sequence or mapping object", tb
1262
Guido van Rossume7b146f2000-02-04 15:28:42 +00001263 l = []
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001264 if not doseq:
1265 # preserve old behavior
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001266 for k, v in query:
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001267 k = quote_plus(str(k))
1268 v = quote_plus(str(v))
1269 l.append(k + '=' + v)
1270 else:
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001271 for k, v in query:
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001272 k = quote_plus(str(k))
Walter Dörwald65230a22002-06-03 15:58:32 +00001273 if isinstance(v, str):
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001274 v = quote_plus(v)
1275 l.append(k + '=' + v)
Guido van Rossum4b46c0a2002-05-24 17:58:05 +00001276 elif _is_unicode(v):
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001277 # is there a reasonable way to convert to ASCII?
1278 # encode generates a string, but "replace" or "ignore"
1279 # lose information and "strict" can raise UnicodeError
1280 v = quote_plus(v.encode("ASCII","replace"))
1281 l.append(k + '=' + v)
1282 else:
1283 try:
1284 # is this a sufficient test for sequence-ness?
1285 x = len(v)
1286 except TypeError:
1287 # not a sequence
1288 v = quote_plus(str(v))
1289 l.append(k + '=' + v)
1290 else:
1291 # loop over the sequence
1292 for elt in v:
1293 l.append(k + '=' + quote_plus(str(elt)))
Guido van Rossumb2493f82000-12-15 15:01:37 +00001294 return '&'.join(l)
Guido van Rossum810a3391998-07-22 21:33:23 +00001295
Guido van Rossum442e7201996-03-20 15:33:11 +00001296# Proxy handling
Mark Hammond4f570b92000-07-26 07:04:38 +00001297def getproxies_environment():
1298 """Return a dictionary of scheme -> proxy server URL mappings.
1299
1300 Scan the environment for variables named <scheme>_proxy;
1301 this seems to be the standard convention. If you need a
1302 different way, you can pass a proxies dictionary to the
1303 [Fancy]URLopener constructor.
1304
1305 """
1306 proxies = {}
1307 for name, value in os.environ.items():
Guido van Rossumb2493f82000-12-15 15:01:37 +00001308 name = name.lower()
Mark Hammond4f570b92000-07-26 07:04:38 +00001309 if value and name[-6:] == '_proxy':
1310 proxies[name[:-6]] = value
1311 return proxies
1312
Georg Brandl22350112008-01-20 12:05:43 +00001313def proxy_bypass_environment(host):
1314 """Test if proxies should not be used for a particular host.
1315
1316 Checks the environment for a variable named no_proxy, which should
1317 be a list of DNS suffixes separated by commas, or '*' for all hosts.
1318 """
1319 no_proxy = os.environ.get('no_proxy', '') or os.environ.get('NO_PROXY', '')
1320 # '*' is special case for always bypass
1321 if no_proxy == '*':
1322 return 1
1323 # strip port off host
1324 hostonly, port = splitport(host)
1325 # check if the host ends with any of the DNS suffixes
1326 for name in no_proxy.split(','):
1327 if name and (hostonly.endswith(name) or host.endswith(name)):
1328 return 1
1329 # otherwise, don't bypass
1330 return 0
1331
1332
Jack Jansen11d9b062004-07-16 11:45:00 +00001333if sys.platform == 'darwin':
Ronald Oussoren099646f2008-05-18 20:09:54 +00001334
1335 def _CFSetup(sc):
1336 from ctypes import c_int32, c_void_p, c_char_p, c_int
1337 sc.CFStringCreateWithCString.argtypes = [ c_void_p, c_char_p, c_int32 ]
1338 sc.CFStringCreateWithCString.restype = c_void_p
1339 sc.SCDynamicStoreCopyProxies.argtypes = [ c_void_p ]
1340 sc.SCDynamicStoreCopyProxies.restype = c_void_p
1341 sc.CFDictionaryGetValue.argtypes = [ c_void_p, c_void_p ]
1342 sc.CFDictionaryGetValue.restype = c_void_p
1343 sc.CFStringGetLength.argtypes = [ c_void_p ]
1344 sc.CFStringGetLength.restype = c_int32
1345 sc.CFStringGetCString.argtypes = [ c_void_p, c_char_p, c_int32, c_int32 ]
1346 sc.CFStringGetCString.restype = c_int32
1347 sc.CFNumberGetValue.argtypes = [ c_void_p, c_int, c_void_p ]
1348 sc.CFNumberGetValue.restype = c_int32
1349 sc.CFRelease.argtypes = [ c_void_p ]
1350 sc.CFRelease.restype = None
1351
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001352 def _CStringFromCFString(sc, value):
1353 from ctypes import create_string_buffer
1354 length = sc.CFStringGetLength(value) + 1
1355 buff = create_string_buffer(length)
1356 sc.CFStringGetCString(value, buff, length, 0)
1357 return buff.value
1358
1359 def _CFNumberToInt32(sc, cfnum):
1360 from ctypes import byref, c_int
1361 val = c_int()
1362 kCFNumberSInt32Type = 3
1363 sc.CFNumberGetValue(cfnum, kCFNumberSInt32Type, byref(val))
1364 return val.value
1365
1366
1367 def proxy_bypass_macosx_sysconf(host):
1368 """
1369 Return True iff this host shouldn't be accessed using a proxy
1370
1371 This function uses the MacOSX framework SystemConfiguration
1372 to fetch the proxy information.
1373 """
1374 from ctypes import cdll
1375 from ctypes.util import find_library
1376 import re
1377 import socket
1378 from fnmatch import fnmatch
1379
1380 def ip2num(ipAddr):
1381 parts = ipAddr.split('.')
1382 parts = map(int, parts)
1383 if len(parts) != 4:
1384 parts = (parts + [0, 0, 0, 0])[:4]
1385 return (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3]
1386
1387 sc = cdll.LoadLibrary(find_library("SystemConfiguration"))
Ronald Oussoren099646f2008-05-18 20:09:54 +00001388 _CFSetup(sc)
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001389
1390 hostIP = None
1391
1392 if not sc:
1393 return False
1394
1395 kSCPropNetProxiesExceptionsList = sc.CFStringCreateWithCString(0, "ExceptionsList", 0)
1396 kSCPropNetProxiesExcludeSimpleHostnames = sc.CFStringCreateWithCString(0,
1397 "ExcludeSimpleHostnames", 0)
1398
1399
1400 proxyDict = sc.SCDynamicStoreCopyProxies(None)
Ronald Oussoren099646f2008-05-18 20:09:54 +00001401 if proxyDict is None:
1402 return False
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001403
1404 try:
1405 # Check for simple host names:
1406 if '.' not in host:
1407 exclude_simple = sc.CFDictionaryGetValue(proxyDict,
1408 kSCPropNetProxiesExcludeSimpleHostnames)
1409 if exclude_simple and _CFNumberToInt32(sc, exclude_simple):
1410 return True
1411
1412
1413 # Check the exceptions list:
1414 exceptions = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesExceptionsList)
1415 if exceptions:
1416 # Items in the list are strings like these: *.local, 169.254/16
1417 for index in xrange(sc.CFArrayGetCount(exceptions)):
1418 value = sc.CFArrayGetValueAtIndex(exceptions, index)
1419 if not value: continue
1420 value = _CStringFromCFString(sc, value)
1421
1422 m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value)
1423 if m is not None:
1424 if hostIP is None:
1425 hostIP = socket.gethostbyname(host)
1426 hostIP = ip2num(hostIP)
1427
1428 base = ip2num(m.group(1))
1429 mask = int(m.group(2)[1:])
1430 mask = 32 - mask
1431
1432 if (hostIP >> mask) == (base >> mask):
1433 return True
1434
1435 elif fnmatch(host, value):
1436 return True
1437
1438 return False
1439
1440 finally:
1441 sc.CFRelease(kSCPropNetProxiesExceptionsList)
1442 sc.CFRelease(kSCPropNetProxiesExcludeSimpleHostnames)
1443
1444
1445
1446 def getproxies_macosx_sysconf():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001447 """Return a dictionary of scheme -> proxy server URL mappings.
Guido van Rossum442e7201996-03-20 15:33:11 +00001448
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001449 This function uses the MacOSX framework SystemConfiguration
1450 to fetch the proxy information.
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001451 """
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001452 from ctypes import cdll
1453 from ctypes.util import find_library
1454
1455 sc = cdll.LoadLibrary(find_library("SystemConfiguration"))
Ronald Oussoren099646f2008-05-18 20:09:54 +00001456 _CFSetup(sc)
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001457
1458 if not sc:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001459 return {}
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001460
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001461 kSCPropNetProxiesHTTPEnable = sc.CFStringCreateWithCString(0, "HTTPEnable", 0)
1462 kSCPropNetProxiesHTTPProxy = sc.CFStringCreateWithCString(0, "HTTPProxy", 0)
1463 kSCPropNetProxiesHTTPPort = sc.CFStringCreateWithCString(0, "HTTPPort", 0)
1464
1465 kSCPropNetProxiesHTTPSEnable = sc.CFStringCreateWithCString(0, "HTTPSEnable", 0)
1466 kSCPropNetProxiesHTTPSProxy = sc.CFStringCreateWithCString(0, "HTTPSProxy", 0)
1467 kSCPropNetProxiesHTTPSPort = sc.CFStringCreateWithCString(0, "HTTPSPort", 0)
1468
1469 kSCPropNetProxiesFTPEnable = sc.CFStringCreateWithCString(0, "FTPEnable", 0)
1470 kSCPropNetProxiesFTPPassive = sc.CFStringCreateWithCString(0, "FTPPassive", 0)
1471 kSCPropNetProxiesFTPPort = sc.CFStringCreateWithCString(0, "FTPPort", 0)
1472 kSCPropNetProxiesFTPProxy = sc.CFStringCreateWithCString(0, "FTPProxy", 0)
1473
1474 kSCPropNetProxiesGopherEnable = sc.CFStringCreateWithCString(0, "GopherEnable", 0)
1475 kSCPropNetProxiesGopherPort = sc.CFStringCreateWithCString(0, "GopherPort", 0)
1476 kSCPropNetProxiesGopherProxy = sc.CFStringCreateWithCString(0, "GopherProxy", 0)
1477
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001478 proxies = {}
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001479 proxyDict = sc.SCDynamicStoreCopyProxies(None)
1480
1481 try:
1482 # HTTP:
1483 enabled = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesHTTPEnable)
1484 if enabled and _CFNumberToInt32(sc, enabled):
1485 proxy = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesHTTPProxy)
1486 port = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesHTTPPort)
1487
1488 if proxy:
1489 proxy = _CStringFromCFString(sc, proxy)
1490 if port:
1491 port = _CFNumberToInt32(sc, port)
1492 proxies["http"] = "http://%s:%i" % (proxy, port)
1493 else:
1494 proxies["http"] = "http://%s" % (proxy, )
1495
1496 # HTTPS:
1497 enabled = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesHTTPSEnable)
1498 if enabled and _CFNumberToInt32(sc, enabled):
1499 proxy = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesHTTPSProxy)
1500 port = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesHTTPSPort)
1501
1502 if proxy:
1503 proxy = _CStringFromCFString(sc, proxy)
1504 if port:
1505 port = _CFNumberToInt32(sc, port)
1506 proxies["https"] = "http://%s:%i" % (proxy, port)
1507 else:
1508 proxies["https"] = "http://%s" % (proxy, )
1509
1510 # FTP:
1511 enabled = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesFTPEnable)
1512 if enabled and _CFNumberToInt32(sc, enabled):
1513 proxy = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesFTPProxy)
1514 port = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesFTPPort)
1515
1516 if proxy:
1517 proxy = _CStringFromCFString(sc, proxy)
1518 if port:
1519 port = _CFNumberToInt32(sc, port)
1520 proxies["ftp"] = "http://%s:%i" % (proxy, port)
1521 else:
1522 proxies["ftp"] = "http://%s" % (proxy, )
1523
1524 # Gopher:
1525 enabled = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesGopherEnable)
1526 if enabled and _CFNumberToInt32(sc, enabled):
1527 proxy = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesGopherProxy)
1528 port = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesGopherPort)
1529
1530 if proxy:
1531 proxy = _CStringFromCFString(sc, proxy)
1532 if port:
1533 port = _CFNumberToInt32(sc, port)
1534 proxies["gopher"] = "http://%s:%i" % (proxy, port)
1535 else:
1536 proxies["gopher"] = "http://%s" % (proxy, )
1537 finally:
1538 sc.CFRelease(proxyDict)
1539
1540 sc.CFRelease(kSCPropNetProxiesHTTPEnable)
1541 sc.CFRelease(kSCPropNetProxiesHTTPProxy)
1542 sc.CFRelease(kSCPropNetProxiesHTTPPort)
1543 sc.CFRelease(kSCPropNetProxiesFTPEnable)
1544 sc.CFRelease(kSCPropNetProxiesFTPPassive)
1545 sc.CFRelease(kSCPropNetProxiesFTPPort)
1546 sc.CFRelease(kSCPropNetProxiesFTPProxy)
1547 sc.CFRelease(kSCPropNetProxiesGopherEnable)
1548 sc.CFRelease(kSCPropNetProxiesGopherPort)
1549 sc.CFRelease(kSCPropNetProxiesGopherProxy)
1550
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001551 return proxies
Mark Hammond4f570b92000-07-26 07:04:38 +00001552
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001553
1554
Georg Brandl22350112008-01-20 12:05:43 +00001555 def proxy_bypass(host):
1556 if getproxies_environment():
1557 return proxy_bypass_environment(host)
1558 else:
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001559 return proxy_bypass_macosx_sysconf(host)
Tim Peters55c12d42001-08-09 18:04:14 +00001560
Jack Jansen11d9b062004-07-16 11:45:00 +00001561 def getproxies():
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001562 return getproxies_environment() or getproxies_macosx_sysconf()
Tim Peters182b5ac2004-07-18 06:16:08 +00001563
Mark Hammond4f570b92000-07-26 07:04:38 +00001564elif os.name == 'nt':
1565 def getproxies_registry():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001566 """Return a dictionary of scheme -> proxy server URL mappings.
Mark Hammond4f570b92000-07-26 07:04:38 +00001567
1568 Win32 uses the registry to store proxies.
1569
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001570 """
1571 proxies = {}
Mark Hammond4f570b92000-07-26 07:04:38 +00001572 try:
1573 import _winreg
1574 except ImportError:
1575 # Std module, so should be around - but you never know!
1576 return proxies
1577 try:
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001578 internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1579 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
Mark Hammond4f570b92000-07-26 07:04:38 +00001580 proxyEnable = _winreg.QueryValueEx(internetSettings,
1581 'ProxyEnable')[0]
1582 if proxyEnable:
1583 # Returned as Unicode but problems if not converted to ASCII
1584 proxyServer = str(_winreg.QueryValueEx(internetSettings,
1585 'ProxyServer')[0])
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001586 if '=' in proxyServer:
1587 # Per-protocol settings
Mark Hammond4f570b92000-07-26 07:04:38 +00001588 for p in proxyServer.split(';'):
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001589 protocol, address = p.split('=', 1)
Guido van Rossumb955d6c2002-03-31 23:38:48 +00001590 # See if address has a type:// prefix
Guido van Rossum64e5aa92002-04-02 14:38:16 +00001591 import re
1592 if not re.match('^([^/:]+)://', address):
Guido van Rossumb955d6c2002-03-31 23:38:48 +00001593 address = '%s://%s' % (protocol, address)
1594 proxies[protocol] = address
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001595 else:
1596 # Use one setting for all protocols
1597 if proxyServer[:5] == 'http:':
1598 proxies['http'] = proxyServer
1599 else:
1600 proxies['http'] = 'http://%s' % proxyServer
1601 proxies['ftp'] = 'ftp://%s' % proxyServer
Mark Hammond4f570b92000-07-26 07:04:38 +00001602 internetSettings.Close()
1603 except (WindowsError, ValueError, TypeError):
1604 # Either registry key not found etc, or the value in an
1605 # unexpected format.
1606 # proxies already set up to be empty so nothing to do
1607 pass
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001608 return proxies
Guido van Rossum442e7201996-03-20 15:33:11 +00001609
Mark Hammond4f570b92000-07-26 07:04:38 +00001610 def getproxies():
1611 """Return a dictionary of scheme -> proxy server URL mappings.
1612
1613 Returns settings gathered from the environment, if specified,
1614 or the registry.
1615
1616 """
1617 return getproxies_environment() or getproxies_registry()
Tim Peters55c12d42001-08-09 18:04:14 +00001618
Georg Brandl22350112008-01-20 12:05:43 +00001619 def proxy_bypass_registry(host):
Tim Peters55c12d42001-08-09 18:04:14 +00001620 try:
1621 import _winreg
1622 import re
Tim Peters55c12d42001-08-09 18:04:14 +00001623 except ImportError:
1624 # Std modules, so should be around - but you never know!
1625 return 0
1626 try:
1627 internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1628 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1629 proxyEnable = _winreg.QueryValueEx(internetSettings,
1630 'ProxyEnable')[0]
1631 proxyOverride = str(_winreg.QueryValueEx(internetSettings,
1632 'ProxyOverride')[0])
1633 # ^^^^ Returned as Unicode but problems if not converted to ASCII
1634 except WindowsError:
1635 return 0
1636 if not proxyEnable or not proxyOverride:
1637 return 0
1638 # try to make a host list from name and IP address.
Georg Brandl1f636702006-02-18 23:10:23 +00001639 rawHost, port = splitport(host)
1640 host = [rawHost]
Tim Peters55c12d42001-08-09 18:04:14 +00001641 try:
Georg Brandl1f636702006-02-18 23:10:23 +00001642 addr = socket.gethostbyname(rawHost)
1643 if addr != rawHost:
Tim Peters55c12d42001-08-09 18:04:14 +00001644 host.append(addr)
1645 except socket.error:
1646 pass
Georg Brandl1f636702006-02-18 23:10:23 +00001647 try:
1648 fqdn = socket.getfqdn(rawHost)
1649 if fqdn != rawHost:
1650 host.append(fqdn)
1651 except socket.error:
1652 pass
Tim Peters55c12d42001-08-09 18:04:14 +00001653 # make a check value list from the registry entry: replace the
1654 # '<local>' string by the localhost entry and the corresponding
1655 # canonical entry.
1656 proxyOverride = proxyOverride.split(';')
1657 i = 0
1658 while i < len(proxyOverride):
1659 if proxyOverride[i] == '<local>':
1660 proxyOverride[i:i+1] = ['localhost',
1661 '127.0.0.1',
1662 socket.gethostname(),
1663 socket.gethostbyname(
1664 socket.gethostname())]
1665 i += 1
1666 # print proxyOverride
1667 # now check if we match one of the registry values.
1668 for test in proxyOverride:
Tim Petersab9ba272001-08-09 21:40:30 +00001669 test = test.replace(".", r"\.") # mask dots
1670 test = test.replace("*", r".*") # change glob sequence
1671 test = test.replace("?", r".") # change glob char
Tim Peters55c12d42001-08-09 18:04:14 +00001672 for val in host:
1673 # print "%s <--> %s" %( test, val )
1674 if re.match(test, val, re.I):
1675 return 1
1676 return 0
1677
Georg Brandl22350112008-01-20 12:05:43 +00001678 def proxy_bypass(host):
1679 """Return a dictionary of scheme -> proxy server URL mappings.
1680
1681 Returns settings gathered from the environment, if specified,
1682 or the registry.
1683
1684 """
1685 if getproxies_environment():
1686 return proxy_bypass_environment(host)
1687 else:
1688 return proxy_bypass_registry(host)
1689
Mark Hammond4f570b92000-07-26 07:04:38 +00001690else:
1691 # By default use environment variables
1692 getproxies = getproxies_environment
Georg Brandl22350112008-01-20 12:05:43 +00001693 proxy_bypass = proxy_bypass_environment
Guido van Rossum442e7201996-03-20 15:33:11 +00001694
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001695# Test and time quote() and unquote()
1696def test1():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001697 s = ''
1698 for i in range(256): s = s + chr(i)
1699 s = s*4
1700 t0 = time.time()
1701 qs = quote(s)
1702 uqs = unquote(qs)
1703 t1 = time.time()
1704 if uqs != s:
1705 print 'Wrong!'
Walter Dörwald70a6b492004-02-12 17:35:32 +00001706 print repr(s)
1707 print repr(qs)
1708 print repr(uqs)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001709 print round(t1 - t0, 3), 'sec'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001710
1711
Guido van Rossum9ab96d41998-09-28 14:07:00 +00001712def reporthook(blocknum, blocksize, totalsize):
1713 # Report during remote transfers
Guido van Rossumb2493f82000-12-15 15:01:37 +00001714 print "Block number: %d, Block size: %d, Total size: %d" % (
1715 blocknum, blocksize, totalsize)
Guido van Rossum9ab96d41998-09-28 14:07:00 +00001716
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001717# Test program
Guido van Rossum23490151998-06-25 02:39:00 +00001718def test(args=[]):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001719 if not args:
1720 args = [
1721 '/etc/passwd',
1722 'file:/etc/passwd',
1723 'file://localhost/etc/passwd',
Collin Winter071d1ae2007-03-12 01:55:54 +00001724 'ftp://ftp.gnu.org/pub/README',
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001725 'http://www.python.org/index.html',
1726 ]
Guido van Rossum09c8b6c1999-12-07 21:37:17 +00001727 if hasattr(URLopener, "open_https"):
1728 args.append('https://synergy.as.cmu.edu/~geek/')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001729 try:
1730 for url in args:
1731 print '-'*10, url, '-'*10
1732 fn, h = urlretrieve(url, None, reporthook)
Guido van Rossumb2493f82000-12-15 15:01:37 +00001733 print fn
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001734 if h:
1735 print '======'
1736 for k in h.keys(): print k + ':', h[k]
1737 print '======'
1738 fp = open(fn, 'rb')
1739 data = fp.read()
1740 del fp
1741 if '\r' in data:
1742 table = string.maketrans("", "")
Guido van Rossumb2493f82000-12-15 15:01:37 +00001743 data = data.translate(table, "\r")
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001744 print data
1745 fn, h = None, None
1746 print '-'*40
1747 finally:
1748 urlcleanup()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001749
Guido van Rossum23490151998-06-25 02:39:00 +00001750def main():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001751 import getopt, sys
1752 try:
1753 opts, args = getopt.getopt(sys.argv[1:], "th")
1754 except getopt.error, msg:
1755 print msg
1756 print "Use -h for help"
1757 return
1758 t = 0
1759 for o, a in opts:
1760 if o == '-t':
1761 t = t + 1
1762 if o == '-h':
1763 print "Usage: python urllib.py [-t] [url ...]"
1764 print "-t runs self-test;",
1765 print "otherwise, contents of urls are printed"
1766 return
1767 if t:
1768 if t > 1:
1769 test1()
1770 test(args)
1771 else:
1772 if not args:
1773 print "Use -h for help"
1774 for url in args:
1775 print urlopen(url).read(),
Guido van Rossum23490151998-06-25 02:39:00 +00001776
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001777# Run test program when run as a script
1778if __name__ == '__main__':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001779 main()