blob: 38c5ee49f11754ab5d9b50f2524bfcea29a09fa7 [file] [log] [blame]
Guido van Rossume7b146f2000-02-04 15:28:42 +00001"""Open an arbitrary URL.
2
3See the following document for more info on URLs:
4"Names and Addresses, URIs, URLs, URNs, URCs", at
5http://www.w3.org/pub/WWW/Addressing/Overview.html
6
7See also the HTTP spec (from which the error codes are derived):
8"HTTP - Hypertext Transfer Protocol", at
9http://www.w3.org/pub/WWW/Protocols/
10
11Related standards and specs:
12- RFC1808: the "relative URL" spec. (authoritative status)
13- RFC1738 - the "URL standard". (authoritative status)
14- RFC1630 - the "URI spec". (informational status)
15
16The object returned by URLopener().open(file) will differ per
17protocol. All you know is that is has methods read(), readline(),
18readlines(), fileno(), close() and info(). The read*(), fileno()
Fredrik Lundhb49f88b2000-09-24 18:51:25 +000019and close() methods work like those of open files.
Guido van Rossume7b146f2000-02-04 15:28:42 +000020The info() method returns a mimetools.Message object which can be
21used to query various info about the object, if available.
22(mimetools.Message objects are queried with the getheader() method.)
23"""
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000024
Guido van Rossum7c395db1994-07-04 22:14:49 +000025import string
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000026import socket
Jack Jansendc3e3f61995-12-15 13:22:13 +000027import os
Guido van Rossumf0713d32001-08-09 17:43:35 +000028import time
Guido van Rossum3c8484e1996-11-20 22:02:24 +000029import sys
Brett Cannon69200fa2004-03-23 21:26:39 +000030from urlparse import urljoin as basejoin
Brett Cannon8bb8fa52008-07-02 01:57:08 +000031import warnings
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000032
Skip Montanaro40fc1602001-03-01 04:27:19 +000033__all__ = ["urlopen", "URLopener", "FancyURLopener", "urlretrieve",
34 "urlcleanup", "quote", "quote_plus", "unquote", "unquote_plus",
Skip Montanaro44d5e0c2001-03-13 19:47:16 +000035 "urlencode", "url2pathname", "pathname2url", "splittag",
36 "localhost", "thishost", "ftperrors", "basejoin", "unwrap",
37 "splittype", "splithost", "splituser", "splitpasswd", "splitport",
38 "splitnport", "splitquery", "splitattr", "splitvalue",
Brett Cannond75f0432007-05-16 22:42:29 +000039 "getproxies"]
Skip Montanaro40fc1602001-03-01 04:27:19 +000040
Martin v. Löwis3e865952006-01-24 15:51:21 +000041__version__ = '1.17' # XXX This version is not always updated :-(
Guido van Rossumf668d171997-06-06 21:11:11 +000042
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000043MAXFTPCACHE = 10 # Trim the ftp cache beyond this size
Guido van Rossum6cb15a01995-06-22 19:00:13 +000044
Jack Jansendc3e3f61995-12-15 13:22:13 +000045# Helper for non-unix systems
46if os.name == 'mac':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000047 from macurl2path import url2pathname, pathname2url
Guido van Rossum7e7ca0b1998-03-26 21:01:39 +000048elif os.name == 'nt':
Fredrik Lundhb49f88b2000-09-24 18:51:25 +000049 from nturl2path import url2pathname, pathname2url
Guido van Rossumd74fb6b2001-03-02 06:43:49 +000050elif os.name == 'riscos':
51 from rourl2path import url2pathname, pathname2url
Jack Jansendc3e3f61995-12-15 13:22:13 +000052else:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000053 def url2pathname(pathname):
Georg Brandlc0b24732005-12-26 22:53:56 +000054 """OS-specific conversion from a relative URL of the 'file' scheme
55 to a file system path; not recommended for general use."""
Guido van Rossum367ac801999-03-12 14:31:10 +000056 return unquote(pathname)
Georg Brandlc0b24732005-12-26 22:53:56 +000057
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000058 def pathname2url(pathname):
Georg Brandlc0b24732005-12-26 22:53:56 +000059 """OS-specific conversion from a file system path to a relative URL
60 of the 'file' scheme; not recommended for general use."""
Guido van Rossum367ac801999-03-12 14:31:10 +000061 return quote(pathname)
Guido van Rossum33add0a1998-12-18 15:25:22 +000062
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000063# This really consists of two pieces:
64# (1) a class which handles opening of all sorts of URLs
65# (plus assorted utilities etc.)
66# (2) a set of functions for parsing URLs
67# XXX Should these be separated out into different modules?
68
69
70# Shortcut for basic usage
71_urlopener = None
Fred Drakedf6eca72002-04-04 20:41:34 +000072def urlopen(url, data=None, proxies=None):
Brett Cannon8bb8fa52008-07-02 01:57:08 +000073 """Create a file-like object for the specified URL to read from."""
74 from warnings import warnpy3k
75 warnings.warnpy3k("urllib.urlopen() has been removed in Python 3.0 in "
76 "favor of urllib2.urlopen()", stacklevel=2)
77
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000078 global _urlopener
Fred Drakedf6eca72002-04-04 20:41:34 +000079 if proxies is not None:
80 opener = FancyURLopener(proxies=proxies)
81 elif not _urlopener:
82 opener = FancyURLopener()
83 _urlopener = opener
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000084 else:
Fred Drakedf6eca72002-04-04 20:41:34 +000085 opener = _urlopener
86 if data is None:
87 return opener.open(url)
88 else:
89 return opener.open(url, data)
Fred Drake316a7932000-08-24 01:01:26 +000090def urlretrieve(url, filename=None, reporthook=None, data=None):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000091 global _urlopener
92 if not _urlopener:
93 _urlopener = FancyURLopener()
Fred Drake316a7932000-08-24 01:01:26 +000094 return _urlopener.retrieve(url, filename, reporthook, data)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000095def urlcleanup():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000096 if _urlopener:
97 _urlopener.cleanup()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000098
Bill Janssen426ea0a2007-08-29 22:35:05 +000099# check for SSL
100try:
101 import ssl
102except:
103 _have_ssl = False
104else:
105 _have_ssl = True
106
Georg Brandlb9256022005-08-24 18:46:39 +0000107# exception raised when downloaded size does not match content-length
108class ContentTooShortError(IOError):
109 def __init__(self, message, content):
110 IOError.__init__(self, message)
111 self.content = content
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000112
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000113ftpcache = {}
114class URLopener:
Guido van Rossume7b146f2000-02-04 15:28:42 +0000115 """Class to open URLs.
116 This is a class rather than just a subroutine because we may need
117 more than one set of global protocol-specific options.
118 Note -- this is a base class for those who don't want the
119 automatic handling of errors type 302 (relocated) and 401
120 (authorization needed)."""
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000121
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000122 __tempfiles = None
Guido van Rossum29e77811996-11-27 19:39:58 +0000123
Guido van Rossumba311382000-08-24 16:18:04 +0000124 version = "Python-urllib/%s" % __version__
125
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000126 # Constructor
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000127 def __init__(self, proxies=None, **x509):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000128 if proxies is None:
129 proxies = getproxies()
130 assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
131 self.proxies = proxies
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000132 self.key_file = x509.get('key_file')
133 self.cert_file = x509.get('cert_file')
Georg Brandl0619a322006-07-26 07:40:17 +0000134 self.addheaders = [('User-Agent', self.version)]
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000135 self.__tempfiles = []
136 self.__unlink = os.unlink # See cleanup()
137 self.tempcache = None
138 # Undocumented feature: if you assign {} to tempcache,
139 # it is used to cache files retrieved with
140 # self.retrieve(). This is not enabled by default
141 # since it does not work for changing documents (and I
142 # haven't got the logic to check expiration headers
143 # yet).
144 self.ftpcache = ftpcache
145 # Undocumented feature: you can use a different
146 # ftp cache by assigning to the .ftpcache member;
147 # in case you want logically independent URL openers
148 # XXX This is not threadsafe. Bah.
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000149
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000150 def __del__(self):
151 self.close()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000152
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000153 def close(self):
154 self.cleanup()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000155
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000156 def cleanup(self):
157 # This code sometimes runs when the rest of this module
158 # has already been deleted, so it can't use any globals
159 # or import anything.
160 if self.__tempfiles:
161 for file in self.__tempfiles:
162 try:
163 self.__unlink(file)
Martin v. Löwis58682b72001-08-11 15:02:57 +0000164 except OSError:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000165 pass
166 del self.__tempfiles[:]
167 if self.tempcache:
168 self.tempcache.clear()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000169
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000170 def addheader(self, *args):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000171 """Add a header to be used by the HTTP interface only
172 e.g. u.addheader('Accept', 'sound/basic')"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000173 self.addheaders.append(args)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000174
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000175 # External interface
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000176 def open(self, fullurl, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000177 """Use URLopener().open(file) instead of open(file, 'r')."""
Martin v. Löwis1d994332000-12-03 18:30:10 +0000178 fullurl = unwrap(toBytes(fullurl))
Raymond Hettinger54f02222002-06-01 14:18:47 +0000179 if self.tempcache and fullurl in self.tempcache:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000180 filename, headers = self.tempcache[fullurl]
181 fp = open(filename, 'rb')
182 return addinfourl(fp, headers, fullurl)
Martin v. Löwis1d994332000-12-03 18:30:10 +0000183 urltype, url = splittype(fullurl)
184 if not urltype:
185 urltype = 'file'
Raymond Hettinger54f02222002-06-01 14:18:47 +0000186 if urltype in self.proxies:
Martin v. Löwis1d994332000-12-03 18:30:10 +0000187 proxy = self.proxies[urltype]
188 urltype, proxyhost = splittype(proxy)
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000189 host, selector = splithost(proxyhost)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000190 url = (host, fullurl) # Signal special case to open_*()
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000191 else:
192 proxy = None
Martin v. Löwis1d994332000-12-03 18:30:10 +0000193 name = 'open_' + urltype
194 self.type = urltype
Brett Cannonaaeffaf2004-03-23 23:50:17 +0000195 name = name.replace('-', '_')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000196 if not hasattr(self, name):
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000197 if proxy:
198 return self.open_unknown_proxy(proxy, fullurl, data)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000199 else:
200 return self.open_unknown(fullurl, data)
201 try:
202 if data is None:
203 return getattr(self, name)(url)
204 else:
205 return getattr(self, name)(url, data)
206 except socket.error, msg:
207 raise IOError, ('socket error', msg), sys.exc_info()[2]
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000208
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000209 def open_unknown(self, fullurl, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000210 """Overridable interface to open unknown URL type."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000211 type, url = splittype(fullurl)
212 raise IOError, ('url error', 'unknown url type', type)
Guido van Rossumca445401995-08-29 19:19:12 +0000213
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000214 def open_unknown_proxy(self, proxy, fullurl, data=None):
215 """Overridable interface to open unknown URL type."""
216 type, url = splittype(fullurl)
217 raise IOError, ('url error', 'invalid proxy for %s' % type, proxy)
218
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000219 # External interface
Sjoerd Mullenderd7b86f02000-08-25 11:23:36 +0000220 def retrieve(self, url, filename=None, reporthook=None, data=None):
Brett Cannon7d618c72003-04-24 02:43:20 +0000221 """retrieve(url) returns (filename, headers) for a local object
Guido van Rossume7b146f2000-02-04 15:28:42 +0000222 or (tempfilename, headers) for a remote object."""
Martin v. Löwis1d994332000-12-03 18:30:10 +0000223 url = unwrap(toBytes(url))
Raymond Hettinger54f02222002-06-01 14:18:47 +0000224 if self.tempcache and url in self.tempcache:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000225 return self.tempcache[url]
226 type, url1 = splittype(url)
Raymond Hettinger10ff7062002-06-02 03:04:52 +0000227 if filename is None and (not type or type == 'file'):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000228 try:
229 fp = self.open_local_file(url1)
230 hdrs = fp.info()
231 del fp
232 return url2pathname(splithost(url1)[1]), hdrs
233 except IOError, msg:
234 pass
Fred Drake316a7932000-08-24 01:01:26 +0000235 fp = self.open(url, data)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000236 headers = fp.info()
Guido van Rossum3b0a3292002-08-09 16:38:32 +0000237 if filename:
238 tfp = open(filename, 'wb')
239 else:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000240 import tempfile
241 garbage, path = splittype(url)
242 garbage, path = splithost(path or "")
243 path, garbage = splitquery(path or "")
244 path, garbage = splitattr(path or "")
245 suffix = os.path.splitext(path)[1]
Guido van Rossum3b0a3292002-08-09 16:38:32 +0000246 (fd, filename) = tempfile.mkstemp(suffix)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000247 self.__tempfiles.append(filename)
Jeremy Hylton3bd6fde2002-10-11 14:36:24 +0000248 tfp = os.fdopen(fd, 'wb')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000249 result = filename, headers
250 if self.tempcache is not None:
251 self.tempcache[url] = result
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000252 bs = 1024*8
253 size = -1
Georg Brandlb9256022005-08-24 18:46:39 +0000254 read = 0
Georg Brandl5a650a22005-08-26 08:51:34 +0000255 blocknum = 0
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000256 if reporthook:
Raymond Hettinger54f02222002-06-01 14:18:47 +0000257 if "content-length" in headers:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000258 size = int(headers["Content-Length"])
Georg Brandl5a650a22005-08-26 08:51:34 +0000259 reporthook(blocknum, bs, size)
260 while 1:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000261 block = fp.read(bs)
Georg Brandl5a650a22005-08-26 08:51:34 +0000262 if block == "":
263 break
Georg Brandlb9256022005-08-24 18:46:39 +0000264 read += len(block)
Georg Brandl5a650a22005-08-26 08:51:34 +0000265 tfp.write(block)
Georg Brandlb9256022005-08-24 18:46:39 +0000266 blocknum += 1
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000267 if reporthook:
268 reporthook(blocknum, bs, size)
269 fp.close()
270 tfp.close()
271 del fp
272 del tfp
Georg Brandlb9256022005-08-24 18:46:39 +0000273
274 # raise exception if actual size does not match content-length header
275 if size >= 0 and read < size:
276 raise ContentTooShortError("retrieval incomplete: got only %i out "
277 "of %i bytes" % (read, size), result)
278
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000279 return result
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000280
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000281 # Each method named open_<type> knows how to open that type of URL
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000282
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000283 def open_http(self, url, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000284 """Use HTTP protocol."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000285 import httplib
286 user_passwd = None
Martin v. Löwis3e865952006-01-24 15:51:21 +0000287 proxy_passwd= None
Walter Dörwald65230a22002-06-03 15:58:32 +0000288 if isinstance(url, str):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000289 host, selector = splithost(url)
290 if host:
291 user_passwd, host = splituser(host)
292 host = unquote(host)
293 realhost = host
294 else:
295 host, selector = url
Martin v. Löwis3e865952006-01-24 15:51:21 +0000296 # check whether the proxy contains authorization information
297 proxy_passwd, host = splituser(host)
298 # now we proceed with the url we want to obtain
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000299 urltype, rest = splittype(selector)
300 url = rest
301 user_passwd = None
Guido van Rossumb2493f82000-12-15 15:01:37 +0000302 if urltype.lower() != 'http':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000303 realhost = None
304 else:
305 realhost, rest = splithost(rest)
306 if realhost:
307 user_passwd, realhost = splituser(realhost)
308 if user_passwd:
309 selector = "%s://%s%s" % (urltype, realhost, rest)
Tim Peters55c12d42001-08-09 18:04:14 +0000310 if proxy_bypass(realhost):
311 host = realhost
312
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000313 #print "proxy via http:", host, selector
314 if not host: raise IOError, ('http error', 'no host given')
Tim Peters92037a12006-01-24 22:44:08 +0000315
Martin v. Löwis3e865952006-01-24 15:51:21 +0000316 if proxy_passwd:
317 import base64
Andrew M. Kuchling872dba42006-10-27 17:11:23 +0000318 proxy_auth = base64.b64encode(proxy_passwd).strip()
Martin v. Löwis3e865952006-01-24 15:51:21 +0000319 else:
320 proxy_auth = None
321
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000322 if user_passwd:
323 import base64
Andrew M. Kuchling872dba42006-10-27 17:11:23 +0000324 auth = base64.b64encode(user_passwd).strip()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000325 else:
326 auth = None
327 h = httplib.HTTP(host)
328 if data is not None:
329 h.putrequest('POST', selector)
Georg Brandl0619a322006-07-26 07:40:17 +0000330 h.putheader('Content-Type', 'application/x-www-form-urlencoded')
331 h.putheader('Content-Length', '%d' % len(data))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000332 else:
333 h.putrequest('GET', selector)
Martin v. Löwis3e865952006-01-24 15:51:21 +0000334 if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000335 if auth: h.putheader('Authorization', 'Basic %s' % auth)
336 if realhost: h.putheader('Host', realhost)
Guido van Rossum68468eb2003-02-27 20:14:51 +0000337 for args in self.addheaders: h.putheader(*args)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000338 h.endheaders()
339 if data is not None:
Fred Drakeec3dfde2001-07-04 05:18:29 +0000340 h.send(data)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000341 errcode, errmsg, headers = h.getreply()
Neal Norwitzce55e212007-03-20 08:14:57 +0000342 fp = h.getfile()
Georg Brandlf66b6032007-03-14 08:27:52 +0000343 if errcode == -1:
Neal Norwitzce55e212007-03-20 08:14:57 +0000344 if fp: fp.close()
Georg Brandlf66b6032007-03-14 08:27:52 +0000345 # something went wrong with the HTTP status line
346 raise IOError, ('http protocol error', 0,
347 'got a bad status line', None)
Sean Reifscheidera1afbf62007-09-19 07:52:56 +0000348 # According to RFC 2616, "2xx" code indicates that the client's
349 # request was successfully received, understood, and accepted.
Kurt B. Kaiser0f7c25d2008-01-02 04:11:28 +0000350 if (200 <= errcode < 300):
Georg Brandl9b0d46d2008-01-20 11:43:03 +0000351 return addinfourl(fp, headers, "http:" + url, errcode)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000352 else:
353 if data is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000354 return self.http_error(url, fp, errcode, errmsg, headers)
Guido van Rossum29aab751999-03-09 19:31:21 +0000355 else:
356 return self.http_error(url, fp, errcode, errmsg, headers, data)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000357
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000358 def http_error(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000359 """Handle http errors.
360 Derived class can override this, or provide specific handlers
361 named http_error_DDD where DDD is the 3-digit error code."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000362 # First check if there's a specific handler for this error
363 name = 'http_error_%d' % errcode
364 if hasattr(self, name):
365 method = getattr(self, name)
366 if data is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000367 result = method(url, fp, errcode, errmsg, headers)
Jeremy Hyltonb30f52a1999-02-25 16:14:58 +0000368 else:
369 result = method(url, fp, errcode, errmsg, headers, data)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000370 if result: return result
Jeremy Hyltonb30f52a1999-02-25 16:14:58 +0000371 return self.http_error_default(url, fp, errcode, errmsg, headers)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000372
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000373 def http_error_default(self, url, fp, errcode, errmsg, headers):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000374 """Default error handler: close the connection and raise IOError."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000375 void = fp.read()
376 fp.close()
377 raise IOError, ('http error', errcode, errmsg, headers)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000378
Bill Janssen426ea0a2007-08-29 22:35:05 +0000379 if _have_ssl:
Andrew M. Kuchling141e9892000-04-23 02:53:11 +0000380 def open_https(self, url, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000381 """Use HTTPS protocol."""
Bill Janssen426ea0a2007-08-29 22:35:05 +0000382
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000383 import httplib
Fred Drake567ca8e2000-08-21 21:42:42 +0000384 user_passwd = None
Martin v. Löwis3e865952006-01-24 15:51:21 +0000385 proxy_passwd = None
Walter Dörwald65230a22002-06-03 15:58:32 +0000386 if isinstance(url, str):
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000387 host, selector = splithost(url)
Fred Drake567ca8e2000-08-21 21:42:42 +0000388 if host:
389 user_passwd, host = splituser(host)
390 host = unquote(host)
391 realhost = host
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000392 else:
393 host, selector = url
Martin v. Löwis3e865952006-01-24 15:51:21 +0000394 # here, we determine, whether the proxy contains authorization information
395 proxy_passwd, host = splituser(host)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000396 urltype, rest = splittype(selector)
Fred Drake567ca8e2000-08-21 21:42:42 +0000397 url = rest
398 user_passwd = None
Guido van Rossumb2493f82000-12-15 15:01:37 +0000399 if urltype.lower() != 'https':
Fred Drake567ca8e2000-08-21 21:42:42 +0000400 realhost = None
401 else:
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000402 realhost, rest = splithost(rest)
Fred Drake567ca8e2000-08-21 21:42:42 +0000403 if realhost:
404 user_passwd, realhost = splituser(realhost)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000405 if user_passwd:
406 selector = "%s://%s%s" % (urltype, realhost, rest)
Andrew M. Kuchling7ad47922000-06-10 01:41:48 +0000407 #print "proxy via https:", host, selector
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000408 if not host: raise IOError, ('https error', 'no host given')
Martin v. Löwis3e865952006-01-24 15:51:21 +0000409 if proxy_passwd:
410 import base64
Andrew M. Kuchling872dba42006-10-27 17:11:23 +0000411 proxy_auth = base64.b64encode(proxy_passwd).strip()
Martin v. Löwis3e865952006-01-24 15:51:21 +0000412 else:
413 proxy_auth = None
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000414 if user_passwd:
415 import base64
Andrew M. Kuchling872dba42006-10-27 17:11:23 +0000416 auth = base64.b64encode(user_passwd).strip()
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000417 else:
418 auth = None
419 h = httplib.HTTPS(host, 0,
420 key_file=self.key_file,
421 cert_file=self.cert_file)
Andrew M. Kuchling141e9892000-04-23 02:53:11 +0000422 if data is not None:
423 h.putrequest('POST', selector)
Georg Brandl0619a322006-07-26 07:40:17 +0000424 h.putheader('Content-Type',
Andrew M. Kuchling141e9892000-04-23 02:53:11 +0000425 'application/x-www-form-urlencoded')
Georg Brandl0619a322006-07-26 07:40:17 +0000426 h.putheader('Content-Length', '%d' % len(data))
Andrew M. Kuchling141e9892000-04-23 02:53:11 +0000427 else:
428 h.putrequest('GET', selector)
Andrew M. Kuchling52278572006-12-19 15:11:41 +0000429 if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
430 if auth: h.putheader('Authorization', 'Basic %s' % auth)
Fred Drake567ca8e2000-08-21 21:42:42 +0000431 if realhost: h.putheader('Host', realhost)
Guido van Rossum68468eb2003-02-27 20:14:51 +0000432 for args in self.addheaders: h.putheader(*args)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000433 h.endheaders()
Andrew M. Kuchling43c5af02000-04-24 14:17:06 +0000434 if data is not None:
Fred Drakeec3dfde2001-07-04 05:18:29 +0000435 h.send(data)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000436 errcode, errmsg, headers = h.getreply()
Neal Norwitzce55e212007-03-20 08:14:57 +0000437 fp = h.getfile()
Georg Brandlf66b6032007-03-14 08:27:52 +0000438 if errcode == -1:
Neal Norwitzce55e212007-03-20 08:14:57 +0000439 if fp: fp.close()
Georg Brandlf66b6032007-03-14 08:27:52 +0000440 # something went wrong with the HTTP status line
441 raise IOError, ('http protocol error', 0,
442 'got a bad status line', None)
Georg Brandl9b915672007-09-24 18:08:24 +0000443 # According to RFC 2616, "2xx" code indicates that the client's
444 # request was successfully received, understood, and accepted.
Kurt B. Kaiser0f7c25d2008-01-02 04:11:28 +0000445 if (200 <= errcode < 300):
Georg Brandl9b0d46d2008-01-20 11:43:03 +0000446 return addinfourl(fp, headers, "https:" + url, errcode)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000447 else:
Fred Drake567ca8e2000-08-21 21:42:42 +0000448 if data is None:
449 return self.http_error(url, fp, errcode, errmsg, headers)
450 else:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000451 return self.http_error(url, fp, errcode, errmsg, headers,
452 data)
Fred Drake567ca8e2000-08-21 21:42:42 +0000453
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000454 def open_file(self, url):
Neal Norwitzc5d0dbd2006-04-09 04:00:49 +0000455 """Use local file or FTP depending on form of URL."""
Martin v. Löwis3e865952006-01-24 15:51:21 +0000456 if not isinstance(url, str):
457 raise IOError, ('file error', 'proxy support for file protocol currently not implemented')
Jack Jansen4ef11032002-09-12 20:14:04 +0000458 if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000459 return self.open_ftp(url)
460 else:
461 return self.open_local_file(url)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000462
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000463 def open_local_file(self, url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000464 """Use local file."""
Georg Brandl5a096e12007-01-22 19:40:21 +0000465 import mimetypes, mimetools, email.utils
Raymond Hettingera6172712004-12-31 19:15:26 +0000466 try:
467 from cStringIO import StringIO
468 except ImportError:
469 from StringIO import StringIO
Guido van Rossumf0713d32001-08-09 17:43:35 +0000470 host, file = splithost(url)
471 localname = url2pathname(file)
Guido van Rossuma2da3052002-04-15 00:25:01 +0000472 try:
473 stats = os.stat(localname)
474 except OSError, e:
475 raise IOError(e.errno, e.strerror, e.filename)
Walter Dörwald92b48b72002-03-22 17:30:38 +0000476 size = stats.st_size
Georg Brandl5a096e12007-01-22 19:40:21 +0000477 modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000478 mtype = mimetypes.guess_type(url)[0]
Raymond Hettingera6172712004-12-31 19:15:26 +0000479 headers = mimetools.Message(StringIO(
Guido van Rossumf0713d32001-08-09 17:43:35 +0000480 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
481 (mtype or 'text/plain', size, modified)))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000482 if not host:
Guido van Rossum336a2011999-06-24 15:27:36 +0000483 urlfile = file
484 if file[:1] == '/':
485 urlfile = 'file://' + file
Guido van Rossumf0713d32001-08-09 17:43:35 +0000486 return addinfourl(open(localname, 'rb'),
Guido van Rossum336a2011999-06-24 15:27:36 +0000487 headers, urlfile)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000488 host, port = splitport(host)
489 if not port \
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000490 and socket.gethostbyname(host) in (localhost(), thishost()):
Guido van Rossum336a2011999-06-24 15:27:36 +0000491 urlfile = file
492 if file[:1] == '/':
493 urlfile = 'file://' + file
Guido van Rossumf0713d32001-08-09 17:43:35 +0000494 return addinfourl(open(localname, 'rb'),
Guido van Rossum336a2011999-06-24 15:27:36 +0000495 headers, urlfile)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000496 raise IOError, ('local file error', 'not on local host')
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000497
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000498 def open_ftp(self, url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000499 """Use FTP protocol."""
Martin v. Löwis3e865952006-01-24 15:51:21 +0000500 if not isinstance(url, str):
501 raise IOError, ('ftp error', 'proxy support for ftp protocol currently not implemented')
Raymond Hettingera6172712004-12-31 19:15:26 +0000502 import mimetypes, mimetools
503 try:
504 from cStringIO import StringIO
505 except ImportError:
506 from StringIO import StringIO
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000507 host, path = splithost(url)
508 if not host: raise IOError, ('ftp error', 'no host given')
509 host, port = splitport(host)
510 user, host = splituser(host)
511 if user: user, passwd = splitpasswd(user)
512 else: passwd = None
513 host = unquote(host)
514 user = unquote(user or '')
515 passwd = unquote(passwd or '')
516 host = socket.gethostbyname(host)
517 if not port:
518 import ftplib
519 port = ftplib.FTP_PORT
520 else:
521 port = int(port)
522 path, attrs = splitattr(path)
523 path = unquote(path)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000524 dirs = path.split('/')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000525 dirs, file = dirs[:-1], dirs[-1]
526 if dirs and not dirs[0]: dirs = dirs[1:]
Guido van Rossum5e006a31999-08-18 17:40:33 +0000527 if dirs and not dirs[0]: dirs[0] = '/'
Guido van Rossumb2493f82000-12-15 15:01:37 +0000528 key = user, host, port, '/'.join(dirs)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000529 # XXX thread unsafe!
530 if len(self.ftpcache) > MAXFTPCACHE:
531 # Prune the cache, rather arbitrarily
532 for k in self.ftpcache.keys():
533 if k != key:
534 v = self.ftpcache[k]
535 del self.ftpcache[k]
536 v.close()
537 try:
Raymond Hettinger54f02222002-06-01 14:18:47 +0000538 if not key in self.ftpcache:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000539 self.ftpcache[key] = \
540 ftpwrapper(user, passwd, host, port, dirs)
541 if not file: type = 'D'
542 else: type = 'I'
543 for attr in attrs:
544 attr, value = splitvalue(attr)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000545 if attr.lower() == 'type' and \
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000546 value in ('a', 'A', 'i', 'I', 'd', 'D'):
Guido van Rossumb2493f82000-12-15 15:01:37 +0000547 type = value.upper()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000548 (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
Guido van Rossum88e0b5b2001-08-23 13:38:15 +0000549 mtype = mimetypes.guess_type("ftp:" + url)[0]
550 headers = ""
551 if mtype:
552 headers += "Content-Type: %s\n" % mtype
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000553 if retrlen is not None and retrlen >= 0:
Guido van Rossum88e0b5b2001-08-23 13:38:15 +0000554 headers += "Content-Length: %d\n" % retrlen
Raymond Hettingera6172712004-12-31 19:15:26 +0000555 headers = mimetools.Message(StringIO(headers))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000556 return addinfourl(fp, headers, "ftp:" + url)
557 except ftperrors(), msg:
558 raise IOError, ('ftp error', msg), sys.exc_info()[2]
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000559
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000560 def open_data(self, url, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000561 """Use "data" URL."""
Martin v. Löwis3e865952006-01-24 15:51:21 +0000562 if not isinstance(url, str):
563 raise IOError, ('data error', 'proxy support for data protocol currently not implemented')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000564 # ignore POSTed data
565 #
566 # syntax of data URLs:
567 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
568 # mediatype := [ type "/" subtype ] *( ";" parameter )
569 # data := *urlchar
570 # parameter := attribute "=" value
Raymond Hettingera6172712004-12-31 19:15:26 +0000571 import mimetools
572 try:
573 from cStringIO import StringIO
574 except ImportError:
575 from StringIO import StringIO
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000576 try:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000577 [type, data] = url.split(',', 1)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000578 except ValueError:
579 raise IOError, ('data error', 'bad data URL')
580 if not type:
581 type = 'text/plain;charset=US-ASCII'
Guido van Rossumb2493f82000-12-15 15:01:37 +0000582 semi = type.rfind(';')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000583 if semi >= 0 and '=' not in type[semi:]:
584 encoding = type[semi+1:]
585 type = type[:semi]
586 else:
587 encoding = ''
588 msg = []
589 msg.append('Date: %s'%time.strftime('%a, %d %b %Y %T GMT',
590 time.gmtime(time.time())))
591 msg.append('Content-type: %s' % type)
592 if encoding == 'base64':
593 import base64
594 data = base64.decodestring(data)
595 else:
596 data = unquote(data)
Georg Brandl0619a322006-07-26 07:40:17 +0000597 msg.append('Content-Length: %d' % len(data))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000598 msg.append('')
599 msg.append(data)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000600 msg = '\n'.join(msg)
Raymond Hettingera6172712004-12-31 19:15:26 +0000601 f = StringIO(msg)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000602 headers = mimetools.Message(f, 0)
Georg Brandl1f663572005-11-26 16:50:44 +0000603 #f.fileno = None # needed for addinfourl
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000604 return addinfourl(f, headers, url)
Guido van Rossum6d4d1c21998-03-12 14:32:55 +0000605
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000606
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000607class FancyURLopener(URLopener):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000608 """Derived class with handlers for errors we can handle (perhaps)."""
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000609
Neal Norwitz60e04cd2002-06-11 13:38:51 +0000610 def __init__(self, *args, **kwargs):
Guido van Rossum68468eb2003-02-27 20:14:51 +0000611 URLopener.__init__(self, *args, **kwargs)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000612 self.auth_cache = {}
Skip Montanaroc3e11d62001-02-15 16:56:36 +0000613 self.tries = 0
614 self.maxtries = 10
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000615
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000616 def http_error_default(self, url, fp, errcode, errmsg, headers):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000617 """Default error handling -- don't raise an exception."""
Georg Brandl9b0d46d2008-01-20 11:43:03 +0000618 return addinfourl(fp, headers, "http:" + url, errcode)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000619
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000620 def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000621 """Error 302 -- relocated (temporarily)."""
Skip Montanaroc3e11d62001-02-15 16:56:36 +0000622 self.tries += 1
623 if self.maxtries and self.tries >= self.maxtries:
624 if hasattr(self, "http_error_500"):
625 meth = self.http_error_500
626 else:
627 meth = self.http_error_default
628 self.tries = 0
629 return meth(url, fp, 500,
630 "Internal Server Error: Redirect Recursion", headers)
631 result = self.redirect_internal(url, fp, errcode, errmsg, headers,
632 data)
633 self.tries = 0
634 return result
635
636 def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
Raymond Hettinger54f02222002-06-01 14:18:47 +0000637 if 'location' in headers:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000638 newurl = headers['location']
Raymond Hettinger54f02222002-06-01 14:18:47 +0000639 elif 'uri' in headers:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000640 newurl = headers['uri']
641 else:
642 return
643 void = fp.read()
644 fp.close()
Guido van Rossum3527f591999-03-29 20:23:41 +0000645 # In case the server sent a relative URL, join with original:
Moshe Zadka5d87d472001-04-09 14:54:21 +0000646 newurl = basejoin(self.type + ":" + url, newurl)
Guido van Rossumfa19f7c2003-05-16 01:46:51 +0000647 return self.open(newurl)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000648
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000649 def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000650 """Error 301 -- also relocated (permanently)."""
651 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
Guido van Rossume6ad8911996-09-10 17:02:56 +0000652
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000653 def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
654 """Error 303 -- also relocated (essentially identical to 302)."""
655 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
656
Guido van Rossumfa19f7c2003-05-16 01:46:51 +0000657 def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
658 """Error 307 -- relocated, but turn POST into error."""
659 if data is None:
660 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
661 else:
662 return self.http_error_default(url, fp, errcode, errmsg, headers)
663
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000664 def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000665 """Error 401 -- authentication required.
Martin v. Löwis3e865952006-01-24 15:51:21 +0000666 This function supports Basic authentication only."""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000667 if not 'www-authenticate' in headers:
Tim Peters85ba6732001-02-28 08:26:44 +0000668 URLopener.http_error_default(self, url, fp,
Fred Drakec680ae82001-10-13 18:37:07 +0000669 errcode, errmsg, headers)
Moshe Zadkae99bd172001-02-27 06:27:04 +0000670 stuff = headers['www-authenticate']
671 import re
672 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
673 if not match:
Tim Peters85ba6732001-02-28 08:26:44 +0000674 URLopener.http_error_default(self, url, fp,
Moshe Zadkae99bd172001-02-27 06:27:04 +0000675 errcode, errmsg, headers)
676 scheme, realm = match.groups()
677 if scheme.lower() != 'basic':
Tim Peters85ba6732001-02-28 08:26:44 +0000678 URLopener.http_error_default(self, url, fp,
Moshe Zadkae99bd172001-02-27 06:27:04 +0000679 errcode, errmsg, headers)
680 name = 'retry_' + self.type + '_basic_auth'
681 if data is None:
682 return getattr(self,name)(url, realm)
683 else:
684 return getattr(self,name)(url, realm, data)
Tim Peters92037a12006-01-24 22:44:08 +0000685
Martin v. Löwis3e865952006-01-24 15:51:21 +0000686 def http_error_407(self, url, fp, errcode, errmsg, headers, data=None):
687 """Error 407 -- proxy authentication required.
688 This function supports Basic authentication only."""
689 if not 'proxy-authenticate' in headers:
690 URLopener.http_error_default(self, url, fp,
691 errcode, errmsg, headers)
692 stuff = headers['proxy-authenticate']
693 import re
694 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
695 if not match:
696 URLopener.http_error_default(self, url, fp,
697 errcode, errmsg, headers)
698 scheme, realm = match.groups()
699 if scheme.lower() != 'basic':
700 URLopener.http_error_default(self, url, fp,
701 errcode, errmsg, headers)
702 name = 'retry_proxy_' + self.type + '_basic_auth'
703 if data is None:
704 return getattr(self,name)(url, realm)
705 else:
706 return getattr(self,name)(url, realm, data)
Tim Peters92037a12006-01-24 22:44:08 +0000707
Martin v. Löwis3e865952006-01-24 15:51:21 +0000708 def retry_proxy_http_basic_auth(self, url, realm, data=None):
709 host, selector = splithost(url)
710 newurl = 'http://' + host + selector
711 proxy = self.proxies['http']
712 urltype, proxyhost = splittype(proxy)
713 proxyhost, proxyselector = splithost(proxyhost)
714 i = proxyhost.find('@') + 1
715 proxyhost = proxyhost[i:]
716 user, passwd = self.get_user_passwd(proxyhost, realm, i)
717 if not (user or passwd): return None
718 proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
719 self.proxies['http'] = 'http://' + proxyhost + proxyselector
720 if data is None:
721 return self.open(newurl)
722 else:
723 return self.open(newurl, data)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000724
Martin v. Löwis3e865952006-01-24 15:51:21 +0000725 def retry_proxy_https_basic_auth(self, url, realm, data=None):
726 host, selector = splithost(url)
727 newurl = 'https://' + host + selector
728 proxy = self.proxies['https']
729 urltype, proxyhost = splittype(proxy)
730 proxyhost, proxyselector = splithost(proxyhost)
731 i = proxyhost.find('@') + 1
732 proxyhost = proxyhost[i:]
733 user, passwd = self.get_user_passwd(proxyhost, realm, i)
734 if not (user or passwd): return None
735 proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
736 self.proxies['https'] = 'https://' + proxyhost + proxyselector
737 if data is None:
738 return self.open(newurl)
739 else:
740 return self.open(newurl, data)
Tim Peters92037a12006-01-24 22:44:08 +0000741
Guido van Rossum3c8baed2000-02-01 23:36:55 +0000742 def retry_http_basic_auth(self, url, realm, data=None):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000743 host, selector = splithost(url)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000744 i = host.find('@') + 1
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000745 host = host[i:]
746 user, passwd = self.get_user_passwd(host, realm, i)
747 if not (user or passwd): return None
Guido van Rossumafc4f042001-01-15 18:31:13 +0000748 host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000749 newurl = 'http://' + host + selector
Guido van Rossum3c8baed2000-02-01 23:36:55 +0000750 if data is None:
751 return self.open(newurl)
752 else:
753 return self.open(newurl, data)
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000754
Guido van Rossum3c8baed2000-02-01 23:36:55 +0000755 def retry_https_basic_auth(self, url, realm, data=None):
Tim Peterse1190062001-01-15 03:34:38 +0000756 host, selector = splithost(url)
757 i = host.find('@') + 1
758 host = host[i:]
759 user, passwd = self.get_user_passwd(host, realm, i)
760 if not (user or passwd): return None
Guido van Rossumafc4f042001-01-15 18:31:13 +0000761 host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
Martin v. Löwis3e865952006-01-24 15:51:21 +0000762 newurl = 'https://' + host + selector
763 if data is None:
764 return self.open(newurl)
765 else:
766 return self.open(newurl, data)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000767
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000768 def get_user_passwd(self, host, realm, clear_cache = 0):
Guido van Rossumb2493f82000-12-15 15:01:37 +0000769 key = realm + '@' + host.lower()
Raymond Hettinger54f02222002-06-01 14:18:47 +0000770 if key in self.auth_cache:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000771 if clear_cache:
772 del self.auth_cache[key]
773 else:
774 return self.auth_cache[key]
775 user, passwd = self.prompt_user_passwd(host, realm)
776 if user or passwd: self.auth_cache[key] = (user, passwd)
777 return user, passwd
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000778
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000779 def prompt_user_passwd(self, host, realm):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000780 """Override this in a GUI environment!"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000781 import getpass
782 try:
783 user = raw_input("Enter username for %s at %s: " % (realm,
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000784 host))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000785 passwd = getpass.getpass("Enter password for %s in %s at %s: " %
786 (user, realm, host))
787 return user, passwd
788 except KeyboardInterrupt:
789 print
790 return None, None
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000791
792
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000793# Utility functions
794
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000795_localhost = None
796def localhost():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000797 """Return the IP address of the magic hostname 'localhost'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000798 global _localhost
Raymond Hettinger10ff7062002-06-02 03:04:52 +0000799 if _localhost is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000800 _localhost = socket.gethostbyname('localhost')
801 return _localhost
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000802
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000803_thishost = None
804def thishost():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000805 """Return the IP address of the current host."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000806 global _thishost
Raymond Hettinger10ff7062002-06-02 03:04:52 +0000807 if _thishost is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000808 _thishost = socket.gethostbyname(socket.gethostname())
809 return _thishost
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000810
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000811_ftperrors = None
812def ftperrors():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000813 """Return the set of errors raised by the FTP class."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000814 global _ftperrors
Raymond Hettinger10ff7062002-06-02 03:04:52 +0000815 if _ftperrors is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000816 import ftplib
817 _ftperrors = ftplib.all_errors
818 return _ftperrors
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000819
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000820_noheaders = None
821def noheaders():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000822 """Return an empty mimetools.Message object."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000823 global _noheaders
Raymond Hettinger10ff7062002-06-02 03:04:52 +0000824 if _noheaders is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000825 import mimetools
Raymond Hettingera6172712004-12-31 19:15:26 +0000826 try:
827 from cStringIO import StringIO
828 except ImportError:
829 from StringIO import StringIO
830 _noheaders = mimetools.Message(StringIO(), 0)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000831 _noheaders.fp.close() # Recycle file descriptor
832 return _noheaders
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000833
834
835# Utility classes
836
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000837class ftpwrapper:
Guido van Rossume7b146f2000-02-04 15:28:42 +0000838 """Class used by open_ftp() for cache of open FTP connections."""
839
Facundo Batista4f1b1ed2008-05-29 16:39:26 +0000840 def __init__(self, user, passwd, host, port, dirs,
841 timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000842 self.user = user
843 self.passwd = passwd
844 self.host = host
845 self.port = port
846 self.dirs = dirs
Facundo Batista711a54e2007-05-24 17:50:54 +0000847 self.timeout = timeout
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000848 self.init()
Guido van Rossume7b146f2000-02-04 15:28:42 +0000849
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000850 def init(self):
851 import ftplib
852 self.busy = 0
853 self.ftp = ftplib.FTP()
Facundo Batista711a54e2007-05-24 17:50:54 +0000854 self.ftp.connect(self.host, self.port, self.timeout)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000855 self.ftp.login(self.user, self.passwd)
856 for dir in self.dirs:
857 self.ftp.cwd(dir)
Guido van Rossume7b146f2000-02-04 15:28:42 +0000858
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000859 def retrfile(self, file, type):
860 import ftplib
861 self.endtransfer()
862 if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
863 else: cmd = 'TYPE ' + type; isdir = 0
864 try:
865 self.ftp.voidcmd(cmd)
866 except ftplib.all_errors:
867 self.init()
868 self.ftp.voidcmd(cmd)
869 conn = None
870 if file and not isdir:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000871 # Try to retrieve as a file
872 try:
873 cmd = 'RETR ' + file
874 conn = self.ftp.ntransfercmd(cmd)
875 except ftplib.error_perm, reason:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000876 if str(reason)[:3] != '550':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000877 raise IOError, ('ftp error', reason), sys.exc_info()[2]
878 if not conn:
879 # Set transfer mode to ASCII!
880 self.ftp.voidcmd('TYPE A')
Georg Brandld5e6cf22008-01-20 12:18:17 +0000881 # Try a directory listing. Verify that directory exists.
882 if file:
883 pwd = self.ftp.pwd()
884 try:
885 try:
886 self.ftp.cwd(file)
887 except ftplib.error_perm, reason:
888 raise IOError, ('ftp error', reason), sys.exc_info()[2]
889 finally:
890 self.ftp.cwd(pwd)
891 cmd = 'LIST ' + file
892 else:
893 cmd = 'LIST'
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000894 conn = self.ftp.ntransfercmd(cmd)
895 self.busy = 1
896 # Pass back both a suitably decorated object and a retrieval length
897 return (addclosehook(conn[0].makefile('rb'),
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000898 self.endtransfer), conn[1])
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000899 def endtransfer(self):
900 if not self.busy:
901 return
902 self.busy = 0
903 try:
904 self.ftp.voidresp()
905 except ftperrors():
906 pass
Guido van Rossume7b146f2000-02-04 15:28:42 +0000907
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000908 def close(self):
909 self.endtransfer()
910 try:
911 self.ftp.close()
912 except ftperrors():
913 pass
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000914
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000915class addbase:
Guido van Rossume7b146f2000-02-04 15:28:42 +0000916 """Base class for addinfo and addclosehook."""
917
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000918 def __init__(self, fp):
919 self.fp = fp
920 self.read = self.fp.read
921 self.readline = self.fp.readline
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000922 if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines
Georg Brandl1f663572005-11-26 16:50:44 +0000923 if hasattr(self.fp, "fileno"):
924 self.fileno = self.fp.fileno
925 else:
926 self.fileno = lambda: None
Raymond Hettinger42182eb2003-03-09 05:33:33 +0000927 if hasattr(self.fp, "__iter__"):
928 self.__iter__ = self.fp.__iter__
929 if hasattr(self.fp, "next"):
930 self.next = self.fp.next
Guido van Rossume7b146f2000-02-04 15:28:42 +0000931
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000932 def __repr__(self):
Walter Dörwald70a6b492004-02-12 17:35:32 +0000933 return '<%s at %r whose fp = %r>' % (self.__class__.__name__,
934 id(self), self.fp)
Guido van Rossume7b146f2000-02-04 15:28:42 +0000935
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000936 def close(self):
937 self.read = None
938 self.readline = None
939 self.readlines = None
940 self.fileno = None
941 if self.fp: self.fp.close()
942 self.fp = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000943
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000944class addclosehook(addbase):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000945 """Class to add a close hook to an open file."""
946
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000947 def __init__(self, fp, closehook, *hookargs):
948 addbase.__init__(self, fp)
949 self.closehook = closehook
950 self.hookargs = hookargs
Guido van Rossume7b146f2000-02-04 15:28:42 +0000951
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000952 def close(self):
Guido van Rossumc580dae2000-05-24 13:21:46 +0000953 addbase.close(self)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000954 if self.closehook:
Guido van Rossum68468eb2003-02-27 20:14:51 +0000955 self.closehook(*self.hookargs)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000956 self.closehook = None
957 self.hookargs = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000958
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000959class addinfo(addbase):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000960 """class to add an info() method to an open file."""
961
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000962 def __init__(self, fp, headers):
963 addbase.__init__(self, fp)
964 self.headers = headers
Guido van Rossume7b146f2000-02-04 15:28:42 +0000965
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000966 def info(self):
967 return self.headers
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000968
Guido van Rossume6ad8911996-09-10 17:02:56 +0000969class addinfourl(addbase):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000970 """class to add info() and geturl() methods to an open file."""
971
Georg Brandl9b0d46d2008-01-20 11:43:03 +0000972 def __init__(self, fp, headers, url, code=None):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000973 addbase.__init__(self, fp)
974 self.headers = headers
975 self.url = url
Georg Brandl9b0d46d2008-01-20 11:43:03 +0000976 self.code = code
Guido van Rossume7b146f2000-02-04 15:28:42 +0000977
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000978 def info(self):
979 return self.headers
Guido van Rossume7b146f2000-02-04 15:28:42 +0000980
Georg Brandl9b0d46d2008-01-20 11:43:03 +0000981 def getcode(self):
982 return self.code
983
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000984 def geturl(self):
985 return self.url
Guido van Rossume6ad8911996-09-10 17:02:56 +0000986
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000987
Guido van Rossum7c395db1994-07-04 22:14:49 +0000988# Utilities to parse URLs (most of these return None for missing parts):
Sjoerd Mullendere0371b81995-11-10 10:36:07 +0000989# unwrap('<URL:type://host/path>') --> 'type://host/path'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000990# splittype('type:opaquestring') --> 'type', 'opaquestring'
991# splithost('//host[:port]/path') --> 'host[:port]', '/path'
Guido van Rossum7c395db1994-07-04 22:14:49 +0000992# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'
993# splitpasswd('user:passwd') -> 'user', 'passwd'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000994# splitport('host:port') --> 'host', 'port'
995# splitquery('/path?query') --> '/path', 'query'
996# splittag('/path#tag') --> '/path', 'tag'
Guido van Rossum7c395db1994-07-04 22:14:49 +0000997# splitattr('/path;attr1=value1;attr2=value2;...') ->
998# '/path', ['attr1=value1', 'attr2=value2', ...]
999# splitvalue('attr=value') --> 'attr', 'value'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001000# unquote('abc%20def') -> 'abc def'
1001# quote('abc def') -> 'abc%20def')
1002
Walter Dörwald65230a22002-06-03 15:58:32 +00001003try:
1004 unicode
1005except NameError:
Guido van Rossum4b46c0a2002-05-24 17:58:05 +00001006 def _is_unicode(x):
1007 return 0
Walter Dörwald65230a22002-06-03 15:58:32 +00001008else:
1009 def _is_unicode(x):
1010 return isinstance(x, unicode)
Guido van Rossum4b46c0a2002-05-24 17:58:05 +00001011
Martin v. Löwis1d994332000-12-03 18:30:10 +00001012def toBytes(url):
1013 """toBytes(u"URL") --> 'URL'."""
1014 # Most URL schemes require ASCII. If that changes, the conversion
1015 # can be relaxed
Guido van Rossum4b46c0a2002-05-24 17:58:05 +00001016 if _is_unicode(url):
Martin v. Löwis1d994332000-12-03 18:30:10 +00001017 try:
1018 url = url.encode("ASCII")
1019 except UnicodeError:
Guido van Rossumb2493f82000-12-15 15:01:37 +00001020 raise UnicodeError("URL " + repr(url) +
1021 " contains non-ASCII characters")
Martin v. Löwis1d994332000-12-03 18:30:10 +00001022 return url
1023
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001024def unwrap(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001025 """unwrap('<URL:type://host/path>') --> 'type://host/path'."""
Guido van Rossumb2493f82000-12-15 15:01:37 +00001026 url = url.strip()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001027 if url[:1] == '<' and url[-1:] == '>':
Guido van Rossumb2493f82000-12-15 15:01:37 +00001028 url = url[1:-1].strip()
1029 if url[:4] == 'URL:': url = url[4:].strip()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001030 return url
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001031
Guido van Rossum332e1441997-09-29 23:23:46 +00001032_typeprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001033def splittype(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001034 """splittype('type:opaquestring') --> 'type', 'opaquestring'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001035 global _typeprog
1036 if _typeprog is None:
1037 import re
1038 _typeprog = re.compile('^([^/:]+):')
Guido van Rossum332e1441997-09-29 23:23:46 +00001039
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001040 match = _typeprog.match(url)
1041 if match:
1042 scheme = match.group(1)
Fred Drake9e94afd2000-07-01 07:03:30 +00001043 return scheme.lower(), url[len(scheme) + 1:]
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001044 return None, url
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001045
Guido van Rossum332e1441997-09-29 23:23:46 +00001046_hostprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001047def splithost(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001048 """splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001049 global _hostprog
1050 if _hostprog is None:
1051 import re
Georg Brandl1c168d82006-03-26 20:59:38 +00001052 _hostprog = re.compile('^//([^/?]*)(.*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +00001053
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001054 match = _hostprog.match(url)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001055 if match: return match.group(1, 2)
1056 return None, url
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001057
Guido van Rossum332e1441997-09-29 23:23:46 +00001058_userprog = None
Guido van Rossum7c395db1994-07-04 22:14:49 +00001059def splituser(host):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001060 """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001061 global _userprog
1062 if _userprog is None:
1063 import re
Raymond Hettingerf2e45dd2002-08-18 20:08:56 +00001064 _userprog = re.compile('^(.*)@(.*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +00001065
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001066 match = _userprog.match(host)
Fred Drake567ca8e2000-08-21 21:42:42 +00001067 if match: return map(unquote, match.group(1, 2))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001068 return None, host
Guido van Rossum7c395db1994-07-04 22:14:49 +00001069
Guido van Rossum332e1441997-09-29 23:23:46 +00001070_passwdprog = None
Guido van Rossum7c395db1994-07-04 22:14:49 +00001071def splitpasswd(user):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001072 """splitpasswd('user:passwd') -> 'user', 'passwd'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001073 global _passwdprog
1074 if _passwdprog is None:
1075 import re
1076 _passwdprog = re.compile('^([^:]*):(.*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +00001077
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001078 match = _passwdprog.match(user)
1079 if match: return match.group(1, 2)
1080 return user, None
Guido van Rossum7c395db1994-07-04 22:14:49 +00001081
Guido van Rossume7b146f2000-02-04 15:28:42 +00001082# splittag('/path#tag') --> '/path', 'tag'
Guido van Rossum332e1441997-09-29 23:23:46 +00001083_portprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001084def splitport(host):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001085 """splitport('host:port') --> 'host', 'port'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001086 global _portprog
1087 if _portprog is None:
1088 import re
1089 _portprog = re.compile('^(.*):([0-9]+)$')
Guido van Rossum332e1441997-09-29 23:23:46 +00001090
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001091 match = _portprog.match(host)
1092 if match: return match.group(1, 2)
1093 return host, None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001094
Guido van Rossum332e1441997-09-29 23:23:46 +00001095_nportprog = None
Guido van Rossum53725a21996-06-13 19:12:35 +00001096def splitnport(host, defport=-1):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001097 """Split host and port, returning numeric port.
1098 Return given default port if no ':' found; defaults to -1.
1099 Return numerical port if a valid number are found after ':'.
1100 Return None if ':' but not a valid number."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001101 global _nportprog
1102 if _nportprog is None:
1103 import re
1104 _nportprog = re.compile('^(.*):(.*)$')
Guido van Rossum7e7ca0b1998-03-26 21:01:39 +00001105
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001106 match = _nportprog.match(host)
1107 if match:
1108 host, port = match.group(1, 2)
1109 try:
Guido van Rossumb2493f82000-12-15 15:01:37 +00001110 if not port: raise ValueError, "no digits"
1111 nport = int(port)
1112 except ValueError:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001113 nport = None
1114 return host, nport
1115 return host, defport
Guido van Rossum53725a21996-06-13 19:12:35 +00001116
Guido van Rossum332e1441997-09-29 23:23:46 +00001117_queryprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001118def splitquery(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001119 """splitquery('/path?query') --> '/path', 'query'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001120 global _queryprog
1121 if _queryprog is None:
1122 import re
1123 _queryprog = re.compile('^(.*)\?([^?]*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +00001124
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001125 match = _queryprog.match(url)
1126 if match: return match.group(1, 2)
1127 return url, None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001128
Guido van Rossum332e1441997-09-29 23:23:46 +00001129_tagprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001130def splittag(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001131 """splittag('/path#tag') --> '/path', 'tag'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001132 global _tagprog
1133 if _tagprog is None:
1134 import re
1135 _tagprog = re.compile('^(.*)#([^#]*)$')
Guido van Rossum7e7ca0b1998-03-26 21:01:39 +00001136
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001137 match = _tagprog.match(url)
1138 if match: return match.group(1, 2)
1139 return url, None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001140
Guido van Rossum7c395db1994-07-04 22:14:49 +00001141def splitattr(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001142 """splitattr('/path;attr1=value1;attr2=value2;...') ->
1143 '/path', ['attr1=value1', 'attr2=value2', ...]."""
Guido van Rossumb2493f82000-12-15 15:01:37 +00001144 words = url.split(';')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001145 return words[0], words[1:]
Guido van Rossum7c395db1994-07-04 22:14:49 +00001146
Guido van Rossum332e1441997-09-29 23:23:46 +00001147_valueprog = None
Guido van Rossum7c395db1994-07-04 22:14:49 +00001148def splitvalue(attr):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001149 """splitvalue('attr=value') --> 'attr', 'value'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001150 global _valueprog
1151 if _valueprog is None:
1152 import re
1153 _valueprog = re.compile('^([^=]*)=(.*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +00001154
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001155 match = _valueprog.match(attr)
1156 if match: return match.group(1, 2)
1157 return attr, None
Guido van Rossum7c395db1994-07-04 22:14:49 +00001158
Raymond Hettinger803ce802005-09-10 06:49:04 +00001159_hextochr = dict(('%02x' % i, chr(i)) for i in range(256))
1160_hextochr.update(('%02X' % i, chr(i)) for i in range(256))
1161
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001162def unquote(s):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001163 """unquote('abc%20def') -> 'abc def'."""
Raymond Hettinger803ce802005-09-10 06:49:04 +00001164 res = s.split('%')
1165 for i in xrange(1, len(res)):
1166 item = res[i]
1167 try:
1168 res[i] = _hextochr[item[:2]] + item[2:]
1169 except KeyError:
1170 res[i] = '%' + item
Raymond Hettinger4b0f20d2005-10-15 16:41:53 +00001171 except UnicodeDecodeError:
1172 res[i] = unichr(int(item[:2], 16)) + item[2:]
Guido van Rossumb2493f82000-12-15 15:01:37 +00001173 return "".join(res)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001174
Guido van Rossum0564e121996-12-13 14:47:36 +00001175def unquote_plus(s):
Skip Montanaro79f1c172000-08-22 03:00:52 +00001176 """unquote('%7e/abc+def') -> '~/abc def'"""
Brett Cannonaaeffaf2004-03-23 23:50:17 +00001177 s = s.replace('+', ' ')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001178 return unquote(s)
Guido van Rossum0564e121996-12-13 14:47:36 +00001179
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001180always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
Jeremy Hylton6102e292000-08-31 15:48:10 +00001181 'abcdefghijklmnopqrstuvwxyz'
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001182 '0123456789' '_.-')
Raymond Hettinger199d2f72005-09-09 22:27:13 +00001183_safemaps = {}
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001184
Guido van Rossum7c395db1994-07-04 22:14:49 +00001185def quote(s, safe = '/'):
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001186 """quote('abc def') -> 'abc%20def'
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001187
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001188 Each part of a URL, e.g. the path info, the query, etc., has a
1189 different set of reserved characters that must be quoted.
1190
1191 RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
1192 the following reserved characters.
1193
1194 reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
1195 "$" | ","
1196
1197 Each of these characters is reserved in some component of a URL,
1198 but not necessarily in all of them.
1199
1200 By default, the quote function is intended for quoting the path
1201 section of a URL. Thus, it will not encode '/'. This character
1202 is reserved, but in typical usage the quote function is being
1203 called on a path where the existing slash characters are used as
1204 reserved characters.
1205 """
Raymond Hettinger199d2f72005-09-09 22:27:13 +00001206 cachekey = (safe, always_safe)
1207 try:
1208 safe_map = _safemaps[cachekey]
1209 except KeyError:
1210 safe += always_safe
1211 safe_map = {}
1212 for i in range(256):
1213 c = chr(i)
1214 safe_map[c] = (c in safe) and c or ('%%%02X' % i)
1215 _safemaps[cachekey] = safe_map
1216 res = map(safe_map.__getitem__, s)
Guido van Rossumb2493f82000-12-15 15:01:37 +00001217 return ''.join(res)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001218
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001219def quote_plus(s, safe = ''):
1220 """Quote the query fragment of a URL; replacing ' ' with '+'"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001221 if ' ' in s:
Raymond Hettingercf6b6322005-09-10 18:17:54 +00001222 s = quote(s, safe + ' ')
1223 return s.replace(' ', '+')
1224 return quote(s, safe)
Guido van Rossum0564e121996-12-13 14:47:36 +00001225
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001226def urlencode(query,doseq=0):
1227 """Encode a sequence of two-element tuples or dictionary into a URL query string.
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001228
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001229 If any values in the query arg are sequences and doseq is true, each
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001230 sequence element is converted to a separate parameter.
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001231
1232 If the query arg is a sequence of two-element tuples, the order of the
1233 parameters in the output will match the order of parameters in the
1234 input.
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001235 """
Tim Peters658cba62001-02-09 20:06:00 +00001236
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001237 if hasattr(query,"items"):
1238 # mapping objects
1239 query = query.items()
1240 else:
1241 # it's a bother at times that strings and string-like objects are
1242 # sequences...
1243 try:
1244 # non-sequence items should not work with len()
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001245 # non-empty strings will fail this
Walter Dörwald65230a22002-06-03 15:58:32 +00001246 if len(query) and not isinstance(query[0], tuple):
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001247 raise TypeError
1248 # zero-length sequences of all types will get here and succeed,
1249 # but that's a minor nit - since the original implementation
1250 # allowed empty dicts that type of behavior probably should be
1251 # preserved for consistency
1252 except TypeError:
1253 ty,va,tb = sys.exc_info()
1254 raise TypeError, "not a valid non-string sequence or mapping object", tb
1255
Guido van Rossume7b146f2000-02-04 15:28:42 +00001256 l = []
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001257 if not doseq:
1258 # preserve old behavior
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001259 for k, v in query:
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001260 k = quote_plus(str(k))
1261 v = quote_plus(str(v))
1262 l.append(k + '=' + v)
1263 else:
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001264 for k, v in query:
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001265 k = quote_plus(str(k))
Walter Dörwald65230a22002-06-03 15:58:32 +00001266 if isinstance(v, str):
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001267 v = quote_plus(v)
1268 l.append(k + '=' + v)
Guido van Rossum4b46c0a2002-05-24 17:58:05 +00001269 elif _is_unicode(v):
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001270 # is there a reasonable way to convert to ASCII?
1271 # encode generates a string, but "replace" or "ignore"
1272 # lose information and "strict" can raise UnicodeError
1273 v = quote_plus(v.encode("ASCII","replace"))
1274 l.append(k + '=' + v)
1275 else:
1276 try:
1277 # is this a sufficient test for sequence-ness?
1278 x = len(v)
1279 except TypeError:
1280 # not a sequence
1281 v = quote_plus(str(v))
1282 l.append(k + '=' + v)
1283 else:
1284 # loop over the sequence
1285 for elt in v:
1286 l.append(k + '=' + quote_plus(str(elt)))
Guido van Rossumb2493f82000-12-15 15:01:37 +00001287 return '&'.join(l)
Guido van Rossum810a3391998-07-22 21:33:23 +00001288
Guido van Rossum442e7201996-03-20 15:33:11 +00001289# Proxy handling
Mark Hammond4f570b92000-07-26 07:04:38 +00001290def getproxies_environment():
1291 """Return a dictionary of scheme -> proxy server URL mappings.
1292
1293 Scan the environment for variables named <scheme>_proxy;
1294 this seems to be the standard convention. If you need a
1295 different way, you can pass a proxies dictionary to the
1296 [Fancy]URLopener constructor.
1297
1298 """
1299 proxies = {}
1300 for name, value in os.environ.items():
Guido van Rossumb2493f82000-12-15 15:01:37 +00001301 name = name.lower()
Mark Hammond4f570b92000-07-26 07:04:38 +00001302 if value and name[-6:] == '_proxy':
1303 proxies[name[:-6]] = value
1304 return proxies
1305
Georg Brandl22350112008-01-20 12:05:43 +00001306def proxy_bypass_environment(host):
1307 """Test if proxies should not be used for a particular host.
1308
1309 Checks the environment for a variable named no_proxy, which should
1310 be a list of DNS suffixes separated by commas, or '*' for all hosts.
1311 """
1312 no_proxy = os.environ.get('no_proxy', '') or os.environ.get('NO_PROXY', '')
1313 # '*' is special case for always bypass
1314 if no_proxy == '*':
1315 return 1
1316 # strip port off host
1317 hostonly, port = splitport(host)
1318 # check if the host ends with any of the DNS suffixes
1319 for name in no_proxy.split(','):
1320 if name and (hostonly.endswith(name) or host.endswith(name)):
1321 return 1
1322 # otherwise, don't bypass
1323 return 0
1324
1325
Jack Jansen11d9b062004-07-16 11:45:00 +00001326if sys.platform == 'darwin':
Ronald Oussoren099646f2008-05-18 20:09:54 +00001327
1328 def _CFSetup(sc):
1329 from ctypes import c_int32, c_void_p, c_char_p, c_int
1330 sc.CFStringCreateWithCString.argtypes = [ c_void_p, c_char_p, c_int32 ]
1331 sc.CFStringCreateWithCString.restype = c_void_p
1332 sc.SCDynamicStoreCopyProxies.argtypes = [ c_void_p ]
1333 sc.SCDynamicStoreCopyProxies.restype = c_void_p
1334 sc.CFDictionaryGetValue.argtypes = [ c_void_p, c_void_p ]
1335 sc.CFDictionaryGetValue.restype = c_void_p
1336 sc.CFStringGetLength.argtypes = [ c_void_p ]
1337 sc.CFStringGetLength.restype = c_int32
1338 sc.CFStringGetCString.argtypes = [ c_void_p, c_char_p, c_int32, c_int32 ]
1339 sc.CFStringGetCString.restype = c_int32
1340 sc.CFNumberGetValue.argtypes = [ c_void_p, c_int, c_void_p ]
1341 sc.CFNumberGetValue.restype = c_int32
1342 sc.CFRelease.argtypes = [ c_void_p ]
1343 sc.CFRelease.restype = None
1344
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001345 def _CStringFromCFString(sc, value):
1346 from ctypes import create_string_buffer
1347 length = sc.CFStringGetLength(value) + 1
1348 buff = create_string_buffer(length)
1349 sc.CFStringGetCString(value, buff, length, 0)
1350 return buff.value
1351
1352 def _CFNumberToInt32(sc, cfnum):
1353 from ctypes import byref, c_int
1354 val = c_int()
1355 kCFNumberSInt32Type = 3
1356 sc.CFNumberGetValue(cfnum, kCFNumberSInt32Type, byref(val))
1357 return val.value
1358
1359
1360 def proxy_bypass_macosx_sysconf(host):
1361 """
1362 Return True iff this host shouldn't be accessed using a proxy
1363
1364 This function uses the MacOSX framework SystemConfiguration
1365 to fetch the proxy information.
1366 """
1367 from ctypes import cdll
1368 from ctypes.util import find_library
1369 import re
1370 import socket
1371 from fnmatch import fnmatch
1372
1373 def ip2num(ipAddr):
1374 parts = ipAddr.split('.')
1375 parts = map(int, parts)
1376 if len(parts) != 4:
1377 parts = (parts + [0, 0, 0, 0])[:4]
1378 return (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3]
1379
1380 sc = cdll.LoadLibrary(find_library("SystemConfiguration"))
Ronald Oussoren099646f2008-05-18 20:09:54 +00001381 _CFSetup(sc)
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001382
1383 hostIP = None
1384
1385 if not sc:
1386 return False
1387
1388 kSCPropNetProxiesExceptionsList = sc.CFStringCreateWithCString(0, "ExceptionsList", 0)
1389 kSCPropNetProxiesExcludeSimpleHostnames = sc.CFStringCreateWithCString(0,
1390 "ExcludeSimpleHostnames", 0)
1391
1392
1393 proxyDict = sc.SCDynamicStoreCopyProxies(None)
Ronald Oussoren099646f2008-05-18 20:09:54 +00001394 if proxyDict is None:
1395 return False
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001396
1397 try:
1398 # Check for simple host names:
1399 if '.' not in host:
1400 exclude_simple = sc.CFDictionaryGetValue(proxyDict,
1401 kSCPropNetProxiesExcludeSimpleHostnames)
1402 if exclude_simple and _CFNumberToInt32(sc, exclude_simple):
1403 return True
1404
1405
1406 # Check the exceptions list:
1407 exceptions = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesExceptionsList)
1408 if exceptions:
1409 # Items in the list are strings like these: *.local, 169.254/16
1410 for index in xrange(sc.CFArrayGetCount(exceptions)):
1411 value = sc.CFArrayGetValueAtIndex(exceptions, index)
1412 if not value: continue
1413 value = _CStringFromCFString(sc, value)
1414
1415 m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value)
1416 if m is not None:
1417 if hostIP is None:
1418 hostIP = socket.gethostbyname(host)
1419 hostIP = ip2num(hostIP)
1420
1421 base = ip2num(m.group(1))
1422 mask = int(m.group(2)[1:])
1423 mask = 32 - mask
1424
1425 if (hostIP >> mask) == (base >> mask):
1426 return True
1427
1428 elif fnmatch(host, value):
1429 return True
1430
1431 return False
1432
1433 finally:
1434 sc.CFRelease(kSCPropNetProxiesExceptionsList)
1435 sc.CFRelease(kSCPropNetProxiesExcludeSimpleHostnames)
1436
1437
1438
1439 def getproxies_macosx_sysconf():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001440 """Return a dictionary of scheme -> proxy server URL mappings.
Guido van Rossum442e7201996-03-20 15:33:11 +00001441
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001442 This function uses the MacOSX framework SystemConfiguration
1443 to fetch the proxy information.
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001444 """
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001445 from ctypes import cdll
1446 from ctypes.util import find_library
1447
1448 sc = cdll.LoadLibrary(find_library("SystemConfiguration"))
Ronald Oussoren099646f2008-05-18 20:09:54 +00001449 _CFSetup(sc)
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001450
1451 if not sc:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001452 return {}
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001453
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001454 kSCPropNetProxiesHTTPEnable = sc.CFStringCreateWithCString(0, "HTTPEnable", 0)
1455 kSCPropNetProxiesHTTPProxy = sc.CFStringCreateWithCString(0, "HTTPProxy", 0)
1456 kSCPropNetProxiesHTTPPort = sc.CFStringCreateWithCString(0, "HTTPPort", 0)
1457
1458 kSCPropNetProxiesHTTPSEnable = sc.CFStringCreateWithCString(0, "HTTPSEnable", 0)
1459 kSCPropNetProxiesHTTPSProxy = sc.CFStringCreateWithCString(0, "HTTPSProxy", 0)
1460 kSCPropNetProxiesHTTPSPort = sc.CFStringCreateWithCString(0, "HTTPSPort", 0)
1461
1462 kSCPropNetProxiesFTPEnable = sc.CFStringCreateWithCString(0, "FTPEnable", 0)
1463 kSCPropNetProxiesFTPPassive = sc.CFStringCreateWithCString(0, "FTPPassive", 0)
1464 kSCPropNetProxiesFTPPort = sc.CFStringCreateWithCString(0, "FTPPort", 0)
1465 kSCPropNetProxiesFTPProxy = sc.CFStringCreateWithCString(0, "FTPProxy", 0)
1466
1467 kSCPropNetProxiesGopherEnable = sc.CFStringCreateWithCString(0, "GopherEnable", 0)
1468 kSCPropNetProxiesGopherPort = sc.CFStringCreateWithCString(0, "GopherPort", 0)
1469 kSCPropNetProxiesGopherProxy = sc.CFStringCreateWithCString(0, "GopherProxy", 0)
1470
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001471 proxies = {}
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001472 proxyDict = sc.SCDynamicStoreCopyProxies(None)
1473
1474 try:
1475 # HTTP:
1476 enabled = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesHTTPEnable)
1477 if enabled and _CFNumberToInt32(sc, enabled):
1478 proxy = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesHTTPProxy)
1479 port = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesHTTPPort)
1480
1481 if proxy:
1482 proxy = _CStringFromCFString(sc, proxy)
1483 if port:
1484 port = _CFNumberToInt32(sc, port)
1485 proxies["http"] = "http://%s:%i" % (proxy, port)
1486 else:
1487 proxies["http"] = "http://%s" % (proxy, )
1488
1489 # HTTPS:
1490 enabled = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesHTTPSEnable)
1491 if enabled and _CFNumberToInt32(sc, enabled):
1492 proxy = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesHTTPSProxy)
1493 port = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesHTTPSPort)
1494
1495 if proxy:
1496 proxy = _CStringFromCFString(sc, proxy)
1497 if port:
1498 port = _CFNumberToInt32(sc, port)
1499 proxies["https"] = "http://%s:%i" % (proxy, port)
1500 else:
1501 proxies["https"] = "http://%s" % (proxy, )
1502
1503 # FTP:
1504 enabled = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesFTPEnable)
1505 if enabled and _CFNumberToInt32(sc, enabled):
1506 proxy = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesFTPProxy)
1507 port = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesFTPPort)
1508
1509 if proxy:
1510 proxy = _CStringFromCFString(sc, proxy)
1511 if port:
1512 port = _CFNumberToInt32(sc, port)
1513 proxies["ftp"] = "http://%s:%i" % (proxy, port)
1514 else:
1515 proxies["ftp"] = "http://%s" % (proxy, )
1516
1517 # Gopher:
1518 enabled = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesGopherEnable)
1519 if enabled and _CFNumberToInt32(sc, enabled):
1520 proxy = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesGopherProxy)
1521 port = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesGopherPort)
1522
1523 if proxy:
1524 proxy = _CStringFromCFString(sc, proxy)
1525 if port:
1526 port = _CFNumberToInt32(sc, port)
1527 proxies["gopher"] = "http://%s:%i" % (proxy, port)
1528 else:
1529 proxies["gopher"] = "http://%s" % (proxy, )
1530 finally:
1531 sc.CFRelease(proxyDict)
1532
1533 sc.CFRelease(kSCPropNetProxiesHTTPEnable)
1534 sc.CFRelease(kSCPropNetProxiesHTTPProxy)
1535 sc.CFRelease(kSCPropNetProxiesHTTPPort)
1536 sc.CFRelease(kSCPropNetProxiesFTPEnable)
1537 sc.CFRelease(kSCPropNetProxiesFTPPassive)
1538 sc.CFRelease(kSCPropNetProxiesFTPPort)
1539 sc.CFRelease(kSCPropNetProxiesFTPProxy)
1540 sc.CFRelease(kSCPropNetProxiesGopherEnable)
1541 sc.CFRelease(kSCPropNetProxiesGopherPort)
1542 sc.CFRelease(kSCPropNetProxiesGopherProxy)
1543
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001544 return proxies
Mark Hammond4f570b92000-07-26 07:04:38 +00001545
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001546
1547
Georg Brandl22350112008-01-20 12:05:43 +00001548 def proxy_bypass(host):
1549 if getproxies_environment():
1550 return proxy_bypass_environment(host)
1551 else:
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001552 return proxy_bypass_macosx_sysconf(host)
Tim Peters55c12d42001-08-09 18:04:14 +00001553
Jack Jansen11d9b062004-07-16 11:45:00 +00001554 def getproxies():
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001555 return getproxies_environment() or getproxies_macosx_sysconf()
Tim Peters182b5ac2004-07-18 06:16:08 +00001556
Mark Hammond4f570b92000-07-26 07:04:38 +00001557elif os.name == 'nt':
1558 def getproxies_registry():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001559 """Return a dictionary of scheme -> proxy server URL mappings.
Mark Hammond4f570b92000-07-26 07:04:38 +00001560
1561 Win32 uses the registry to store proxies.
1562
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001563 """
1564 proxies = {}
Mark Hammond4f570b92000-07-26 07:04:38 +00001565 try:
1566 import _winreg
1567 except ImportError:
1568 # Std module, so should be around - but you never know!
1569 return proxies
1570 try:
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001571 internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1572 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
Mark Hammond4f570b92000-07-26 07:04:38 +00001573 proxyEnable = _winreg.QueryValueEx(internetSettings,
1574 'ProxyEnable')[0]
1575 if proxyEnable:
1576 # Returned as Unicode but problems if not converted to ASCII
1577 proxyServer = str(_winreg.QueryValueEx(internetSettings,
1578 'ProxyServer')[0])
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001579 if '=' in proxyServer:
1580 # Per-protocol settings
Mark Hammond4f570b92000-07-26 07:04:38 +00001581 for p in proxyServer.split(';'):
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001582 protocol, address = p.split('=', 1)
Guido van Rossumb955d6c2002-03-31 23:38:48 +00001583 # See if address has a type:// prefix
Guido van Rossum64e5aa92002-04-02 14:38:16 +00001584 import re
1585 if not re.match('^([^/:]+)://', address):
Guido van Rossumb955d6c2002-03-31 23:38:48 +00001586 address = '%s://%s' % (protocol, address)
1587 proxies[protocol] = address
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001588 else:
1589 # Use one setting for all protocols
1590 if proxyServer[:5] == 'http:':
1591 proxies['http'] = proxyServer
1592 else:
1593 proxies['http'] = 'http://%s' % proxyServer
1594 proxies['ftp'] = 'ftp://%s' % proxyServer
Mark Hammond4f570b92000-07-26 07:04:38 +00001595 internetSettings.Close()
1596 except (WindowsError, ValueError, TypeError):
1597 # Either registry key not found etc, or the value in an
1598 # unexpected format.
1599 # proxies already set up to be empty so nothing to do
1600 pass
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001601 return proxies
Guido van Rossum442e7201996-03-20 15:33:11 +00001602
Mark Hammond4f570b92000-07-26 07:04:38 +00001603 def getproxies():
1604 """Return a dictionary of scheme -> proxy server URL mappings.
1605
1606 Returns settings gathered from the environment, if specified,
1607 or the registry.
1608
1609 """
1610 return getproxies_environment() or getproxies_registry()
Tim Peters55c12d42001-08-09 18:04:14 +00001611
Georg Brandl22350112008-01-20 12:05:43 +00001612 def proxy_bypass_registry(host):
Tim Peters55c12d42001-08-09 18:04:14 +00001613 try:
1614 import _winreg
1615 import re
Tim Peters55c12d42001-08-09 18:04:14 +00001616 except ImportError:
1617 # Std modules, so should be around - but you never know!
1618 return 0
1619 try:
1620 internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1621 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1622 proxyEnable = _winreg.QueryValueEx(internetSettings,
1623 'ProxyEnable')[0]
1624 proxyOverride = str(_winreg.QueryValueEx(internetSettings,
1625 'ProxyOverride')[0])
1626 # ^^^^ Returned as Unicode but problems if not converted to ASCII
1627 except WindowsError:
1628 return 0
1629 if not proxyEnable or not proxyOverride:
1630 return 0
1631 # try to make a host list from name and IP address.
Georg Brandl1f636702006-02-18 23:10:23 +00001632 rawHost, port = splitport(host)
1633 host = [rawHost]
Tim Peters55c12d42001-08-09 18:04:14 +00001634 try:
Georg Brandl1f636702006-02-18 23:10:23 +00001635 addr = socket.gethostbyname(rawHost)
1636 if addr != rawHost:
Tim Peters55c12d42001-08-09 18:04:14 +00001637 host.append(addr)
1638 except socket.error:
1639 pass
Georg Brandl1f636702006-02-18 23:10:23 +00001640 try:
1641 fqdn = socket.getfqdn(rawHost)
1642 if fqdn != rawHost:
1643 host.append(fqdn)
1644 except socket.error:
1645 pass
Tim Peters55c12d42001-08-09 18:04:14 +00001646 # make a check value list from the registry entry: replace the
1647 # '<local>' string by the localhost entry and the corresponding
1648 # canonical entry.
1649 proxyOverride = proxyOverride.split(';')
1650 i = 0
1651 while i < len(proxyOverride):
1652 if proxyOverride[i] == '<local>':
1653 proxyOverride[i:i+1] = ['localhost',
1654 '127.0.0.1',
1655 socket.gethostname(),
1656 socket.gethostbyname(
1657 socket.gethostname())]
1658 i += 1
1659 # print proxyOverride
1660 # now check if we match one of the registry values.
1661 for test in proxyOverride:
Tim Petersab9ba272001-08-09 21:40:30 +00001662 test = test.replace(".", r"\.") # mask dots
1663 test = test.replace("*", r".*") # change glob sequence
1664 test = test.replace("?", r".") # change glob char
Tim Peters55c12d42001-08-09 18:04:14 +00001665 for val in host:
1666 # print "%s <--> %s" %( test, val )
1667 if re.match(test, val, re.I):
1668 return 1
1669 return 0
1670
Georg Brandl22350112008-01-20 12:05:43 +00001671 def proxy_bypass(host):
1672 """Return a dictionary of scheme -> proxy server URL mappings.
1673
1674 Returns settings gathered from the environment, if specified,
1675 or the registry.
1676
1677 """
1678 if getproxies_environment():
1679 return proxy_bypass_environment(host)
1680 else:
1681 return proxy_bypass_registry(host)
1682
Mark Hammond4f570b92000-07-26 07:04:38 +00001683else:
1684 # By default use environment variables
1685 getproxies = getproxies_environment
Georg Brandl22350112008-01-20 12:05:43 +00001686 proxy_bypass = proxy_bypass_environment
Guido van Rossum442e7201996-03-20 15:33:11 +00001687
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001688# Test and time quote() and unquote()
1689def test1():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001690 s = ''
1691 for i in range(256): s = s + chr(i)
1692 s = s*4
1693 t0 = time.time()
1694 qs = quote(s)
1695 uqs = unquote(qs)
1696 t1 = time.time()
1697 if uqs != s:
1698 print 'Wrong!'
Walter Dörwald70a6b492004-02-12 17:35:32 +00001699 print repr(s)
1700 print repr(qs)
1701 print repr(uqs)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001702 print round(t1 - t0, 3), 'sec'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001703
1704
Guido van Rossum9ab96d41998-09-28 14:07:00 +00001705def reporthook(blocknum, blocksize, totalsize):
1706 # Report during remote transfers
Guido van Rossumb2493f82000-12-15 15:01:37 +00001707 print "Block number: %d, Block size: %d, Total size: %d" % (
1708 blocknum, blocksize, totalsize)
Guido van Rossum9ab96d41998-09-28 14:07:00 +00001709
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001710# Test program
Guido van Rossum23490151998-06-25 02:39:00 +00001711def test(args=[]):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001712 if not args:
1713 args = [
1714 '/etc/passwd',
1715 'file:/etc/passwd',
1716 'file://localhost/etc/passwd',
Collin Winter071d1ae2007-03-12 01:55:54 +00001717 'ftp://ftp.gnu.org/pub/README',
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001718 'http://www.python.org/index.html',
1719 ]
Guido van Rossum09c8b6c1999-12-07 21:37:17 +00001720 if hasattr(URLopener, "open_https"):
1721 args.append('https://synergy.as.cmu.edu/~geek/')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001722 try:
1723 for url in args:
1724 print '-'*10, url, '-'*10
1725 fn, h = urlretrieve(url, None, reporthook)
Guido van Rossumb2493f82000-12-15 15:01:37 +00001726 print fn
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001727 if h:
1728 print '======'
1729 for k in h.keys(): print k + ':', h[k]
1730 print '======'
1731 fp = open(fn, 'rb')
1732 data = fp.read()
1733 del fp
1734 if '\r' in data:
1735 table = string.maketrans("", "")
Guido van Rossumb2493f82000-12-15 15:01:37 +00001736 data = data.translate(table, "\r")
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001737 print data
1738 fn, h = None, None
1739 print '-'*40
1740 finally:
1741 urlcleanup()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001742
Guido van Rossum23490151998-06-25 02:39:00 +00001743def main():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001744 import getopt, sys
1745 try:
1746 opts, args = getopt.getopt(sys.argv[1:], "th")
1747 except getopt.error, msg:
1748 print msg
1749 print "Use -h for help"
1750 return
1751 t = 0
1752 for o, a in opts:
1753 if o == '-t':
1754 t = t + 1
1755 if o == '-h':
1756 print "Usage: python urllib.py [-t] [url ...]"
1757 print "-t runs self-test;",
1758 print "otherwise, contents of urls are printed"
1759 return
1760 if t:
1761 if t > 1:
1762 test1()
1763 test(args)
1764 else:
1765 if not args:
1766 print "Use -h for help"
1767 for url in args:
1768 print urlopen(url).read(),
Guido van Rossum23490151998-06-25 02:39:00 +00001769
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001770# Run test program when run as a script
1771if __name__ == '__main__':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001772 main()