blob: d23d0706cc29c6032f6766ef8981f669e20b1bb4 [file] [log] [blame]
Guido van Rossume7b146f2000-02-04 15:28:42 +00001"""Open an arbitrary URL.
2
3See the following document for more info on URLs:
4"Names and Addresses, URIs, URLs, URNs, URCs", at
5http://www.w3.org/pub/WWW/Addressing/Overview.html
6
7See also the HTTP spec (from which the error codes are derived):
8"HTTP - Hypertext Transfer Protocol", at
9http://www.w3.org/pub/WWW/Protocols/
10
11Related standards and specs:
12- RFC1808: the "relative URL" spec. (authoritative status)
13- RFC1738 - the "URL standard". (authoritative status)
14- RFC1630 - the "URI spec". (informational status)
15
16The object returned by URLopener().open(file) will differ per
17protocol. All you know is that is has methods read(), readline(),
18readlines(), fileno(), close() and info(). The read*(), fileno()
Fredrik Lundhb49f88b2000-09-24 18:51:25 +000019and close() methods work like those of open files.
Guido van Rossume7b146f2000-02-04 15:28:42 +000020The info() method returns a mimetools.Message object which can be
21used to query various info about the object, if available.
22(mimetools.Message objects are queried with the getheader() method.)
23"""
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000024
Guido van Rossum7c395db1994-07-04 22:14:49 +000025import string
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000026import socket
Jack Jansendc3e3f61995-12-15 13:22:13 +000027import os
Guido van Rossumf0713d32001-08-09 17:43:35 +000028import time
Guido van Rossum3c8484e1996-11-20 22:02:24 +000029import sys
Brett Cannon69200fa2004-03-23 21:26:39 +000030from urlparse import urljoin as basejoin
Brett Cannon8bb8fa52008-07-02 01:57:08 +000031import warnings
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000032
Skip Montanaro40fc1602001-03-01 04:27:19 +000033__all__ = ["urlopen", "URLopener", "FancyURLopener", "urlretrieve",
34 "urlcleanup", "quote", "quote_plus", "unquote", "unquote_plus",
Skip Montanaro44d5e0c2001-03-13 19:47:16 +000035 "urlencode", "url2pathname", "pathname2url", "splittag",
36 "localhost", "thishost", "ftperrors", "basejoin", "unwrap",
37 "splittype", "splithost", "splituser", "splitpasswd", "splitport",
38 "splitnport", "splitquery", "splitattr", "splitvalue",
Brett Cannond75f0432007-05-16 22:42:29 +000039 "getproxies"]
Skip Montanaro40fc1602001-03-01 04:27:19 +000040
Martin v. Löwis3e865952006-01-24 15:51:21 +000041__version__ = '1.17' # XXX This version is not always updated :-(
Guido van Rossumf668d171997-06-06 21:11:11 +000042
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000043MAXFTPCACHE = 10 # Trim the ftp cache beyond this size
Guido van Rossum6cb15a01995-06-22 19:00:13 +000044
Jack Jansendc3e3f61995-12-15 13:22:13 +000045# Helper for non-unix systems
46if os.name == 'mac':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000047 from macurl2path import url2pathname, pathname2url
Guido van Rossum7e7ca0b1998-03-26 21:01:39 +000048elif os.name == 'nt':
Fredrik Lundhb49f88b2000-09-24 18:51:25 +000049 from nturl2path import url2pathname, pathname2url
Guido van Rossumd74fb6b2001-03-02 06:43:49 +000050elif os.name == 'riscos':
51 from rourl2path import url2pathname, pathname2url
Jack Jansendc3e3f61995-12-15 13:22:13 +000052else:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000053 def url2pathname(pathname):
Georg Brandlc0b24732005-12-26 22:53:56 +000054 """OS-specific conversion from a relative URL of the 'file' scheme
55 to a file system path; not recommended for general use."""
Guido van Rossum367ac801999-03-12 14:31:10 +000056 return unquote(pathname)
Georg Brandlc0b24732005-12-26 22:53:56 +000057
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000058 def pathname2url(pathname):
Georg Brandlc0b24732005-12-26 22:53:56 +000059 """OS-specific conversion from a file system path to a relative URL
60 of the 'file' scheme; not recommended for general use."""
Guido van Rossum367ac801999-03-12 14:31:10 +000061 return quote(pathname)
Guido van Rossum33add0a1998-12-18 15:25:22 +000062
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000063# This really consists of two pieces:
64# (1) a class which handles opening of all sorts of URLs
65# (plus assorted utilities etc.)
66# (2) a set of functions for parsing URLs
67# XXX Should these be separated out into different modules?
68
69
70# Shortcut for basic usage
71_urlopener = None
Fred Drakedf6eca72002-04-04 20:41:34 +000072def urlopen(url, data=None, proxies=None):
Brett Cannon8bb8fa52008-07-02 01:57:08 +000073 """Create a file-like object for the specified URL to read from."""
74 from warnings import warnpy3k
75 warnings.warnpy3k("urllib.urlopen() has been removed in Python 3.0 in "
76 "favor of urllib2.urlopen()", stacklevel=2)
77
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000078 global _urlopener
Fred Drakedf6eca72002-04-04 20:41:34 +000079 if proxies is not None:
80 opener = FancyURLopener(proxies=proxies)
81 elif not _urlopener:
82 opener = FancyURLopener()
83 _urlopener = opener
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000084 else:
Fred Drakedf6eca72002-04-04 20:41:34 +000085 opener = _urlopener
86 if data is None:
87 return opener.open(url)
88 else:
89 return opener.open(url, data)
Fred Drake316a7932000-08-24 01:01:26 +000090def urlretrieve(url, filename=None, reporthook=None, data=None):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000091 global _urlopener
92 if not _urlopener:
93 _urlopener = FancyURLopener()
Fred Drake316a7932000-08-24 01:01:26 +000094 return _urlopener.retrieve(url, filename, reporthook, data)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000095def urlcleanup():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000096 if _urlopener:
97 _urlopener.cleanup()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000098
Bill Janssen426ea0a2007-08-29 22:35:05 +000099# check for SSL
100try:
101 import ssl
102except:
103 _have_ssl = False
104else:
105 _have_ssl = True
106
Georg Brandlb9256022005-08-24 18:46:39 +0000107# exception raised when downloaded size does not match content-length
108class ContentTooShortError(IOError):
109 def __init__(self, message, content):
110 IOError.__init__(self, message)
111 self.content = content
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000112
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000113ftpcache = {}
114class URLopener:
Guido van Rossume7b146f2000-02-04 15:28:42 +0000115 """Class to open URLs.
116 This is a class rather than just a subroutine because we may need
117 more than one set of global protocol-specific options.
118 Note -- this is a base class for those who don't want the
119 automatic handling of errors type 302 (relocated) and 401
120 (authorization needed)."""
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000121
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000122 __tempfiles = None
Guido van Rossum29e77811996-11-27 19:39:58 +0000123
Guido van Rossumba311382000-08-24 16:18:04 +0000124 version = "Python-urllib/%s" % __version__
125
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000126 # Constructor
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000127 def __init__(self, proxies=None, **x509):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000128 if proxies is None:
129 proxies = getproxies()
130 assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
131 self.proxies = proxies
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000132 self.key_file = x509.get('key_file')
133 self.cert_file = x509.get('cert_file')
Georg Brandl0619a322006-07-26 07:40:17 +0000134 self.addheaders = [('User-Agent', self.version)]
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000135 self.__tempfiles = []
136 self.__unlink = os.unlink # See cleanup()
137 self.tempcache = None
138 # Undocumented feature: if you assign {} to tempcache,
139 # it is used to cache files retrieved with
140 # self.retrieve(). This is not enabled by default
141 # since it does not work for changing documents (and I
142 # haven't got the logic to check expiration headers
143 # yet).
144 self.ftpcache = ftpcache
145 # Undocumented feature: you can use a different
146 # ftp cache by assigning to the .ftpcache member;
147 # in case you want logically independent URL openers
148 # XXX This is not threadsafe. Bah.
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000149
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000150 def __del__(self):
151 self.close()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000152
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000153 def close(self):
154 self.cleanup()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000155
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000156 def cleanup(self):
157 # This code sometimes runs when the rest of this module
158 # has already been deleted, so it can't use any globals
159 # or import anything.
160 if self.__tempfiles:
161 for file in self.__tempfiles:
162 try:
163 self.__unlink(file)
Martin v. Löwis58682b72001-08-11 15:02:57 +0000164 except OSError:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000165 pass
166 del self.__tempfiles[:]
167 if self.tempcache:
168 self.tempcache.clear()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000169
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000170 def addheader(self, *args):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000171 """Add a header to be used by the HTTP interface only
172 e.g. u.addheader('Accept', 'sound/basic')"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000173 self.addheaders.append(args)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000174
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000175 # External interface
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000176 def open(self, fullurl, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000177 """Use URLopener().open(file) instead of open(file, 'r')."""
Martin v. Löwis1d994332000-12-03 18:30:10 +0000178 fullurl = unwrap(toBytes(fullurl))
Raymond Hettinger54f02222002-06-01 14:18:47 +0000179 if self.tempcache and fullurl in self.tempcache:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000180 filename, headers = self.tempcache[fullurl]
181 fp = open(filename, 'rb')
182 return addinfourl(fp, headers, fullurl)
Martin v. Löwis1d994332000-12-03 18:30:10 +0000183 urltype, url = splittype(fullurl)
184 if not urltype:
185 urltype = 'file'
Raymond Hettinger54f02222002-06-01 14:18:47 +0000186 if urltype in self.proxies:
Martin v. Löwis1d994332000-12-03 18:30:10 +0000187 proxy = self.proxies[urltype]
188 urltype, proxyhost = splittype(proxy)
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000189 host, selector = splithost(proxyhost)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000190 url = (host, fullurl) # Signal special case to open_*()
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000191 else:
192 proxy = None
Martin v. Löwis1d994332000-12-03 18:30:10 +0000193 name = 'open_' + urltype
194 self.type = urltype
Brett Cannonaaeffaf2004-03-23 23:50:17 +0000195 name = name.replace('-', '_')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000196 if not hasattr(self, name):
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000197 if proxy:
198 return self.open_unknown_proxy(proxy, fullurl, data)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000199 else:
200 return self.open_unknown(fullurl, data)
201 try:
202 if data is None:
203 return getattr(self, name)(url)
204 else:
205 return getattr(self, name)(url, data)
206 except socket.error, msg:
207 raise IOError, ('socket error', msg), sys.exc_info()[2]
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000208
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000209 def open_unknown(self, fullurl, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000210 """Overridable interface to open unknown URL type."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000211 type, url = splittype(fullurl)
212 raise IOError, ('url error', 'unknown url type', type)
Guido van Rossumca445401995-08-29 19:19:12 +0000213
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000214 def open_unknown_proxy(self, proxy, fullurl, data=None):
215 """Overridable interface to open unknown URL type."""
216 type, url = splittype(fullurl)
217 raise IOError, ('url error', 'invalid proxy for %s' % type, proxy)
218
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000219 # External interface
Sjoerd Mullenderd7b86f02000-08-25 11:23:36 +0000220 def retrieve(self, url, filename=None, reporthook=None, data=None):
Brett Cannon7d618c72003-04-24 02:43:20 +0000221 """retrieve(url) returns (filename, headers) for a local object
Guido van Rossume7b146f2000-02-04 15:28:42 +0000222 or (tempfilename, headers) for a remote object."""
Martin v. Löwis1d994332000-12-03 18:30:10 +0000223 url = unwrap(toBytes(url))
Raymond Hettinger54f02222002-06-01 14:18:47 +0000224 if self.tempcache and url in self.tempcache:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000225 return self.tempcache[url]
226 type, url1 = splittype(url)
Raymond Hettinger10ff7062002-06-02 03:04:52 +0000227 if filename is None and (not type or type == 'file'):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000228 try:
229 fp = self.open_local_file(url1)
230 hdrs = fp.info()
231 del fp
232 return url2pathname(splithost(url1)[1]), hdrs
233 except IOError, msg:
234 pass
Fred Drake316a7932000-08-24 01:01:26 +0000235 fp = self.open(url, data)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000236 headers = fp.info()
Guido van Rossum3b0a3292002-08-09 16:38:32 +0000237 if filename:
238 tfp = open(filename, 'wb')
239 else:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000240 import tempfile
241 garbage, path = splittype(url)
242 garbage, path = splithost(path or "")
243 path, garbage = splitquery(path or "")
244 path, garbage = splitattr(path or "")
245 suffix = os.path.splitext(path)[1]
Guido van Rossum3b0a3292002-08-09 16:38:32 +0000246 (fd, filename) = tempfile.mkstemp(suffix)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000247 self.__tempfiles.append(filename)
Jeremy Hylton3bd6fde2002-10-11 14:36:24 +0000248 tfp = os.fdopen(fd, 'wb')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000249 result = filename, headers
250 if self.tempcache is not None:
251 self.tempcache[url] = result
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000252 bs = 1024*8
253 size = -1
Georg Brandlb9256022005-08-24 18:46:39 +0000254 read = 0
Georg Brandl5a650a22005-08-26 08:51:34 +0000255 blocknum = 0
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000256 if reporthook:
Raymond Hettinger54f02222002-06-01 14:18:47 +0000257 if "content-length" in headers:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000258 size = int(headers["Content-Length"])
Georg Brandl5a650a22005-08-26 08:51:34 +0000259 reporthook(blocknum, bs, size)
260 while 1:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000261 block = fp.read(bs)
Georg Brandl5a650a22005-08-26 08:51:34 +0000262 if block == "":
263 break
Georg Brandlb9256022005-08-24 18:46:39 +0000264 read += len(block)
Georg Brandl5a650a22005-08-26 08:51:34 +0000265 tfp.write(block)
Georg Brandlb9256022005-08-24 18:46:39 +0000266 blocknum += 1
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000267 if reporthook:
268 reporthook(blocknum, bs, size)
269 fp.close()
270 tfp.close()
271 del fp
272 del tfp
Georg Brandlb9256022005-08-24 18:46:39 +0000273
274 # raise exception if actual size does not match content-length header
275 if size >= 0 and read < size:
276 raise ContentTooShortError("retrieval incomplete: got only %i out "
277 "of %i bytes" % (read, size), result)
278
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000279 return result
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000280
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000281 # Each method named open_<type> knows how to open that type of URL
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000282
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000283 def open_http(self, url, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000284 """Use HTTP protocol."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000285 import httplib
286 user_passwd = None
Martin v. Löwis3e865952006-01-24 15:51:21 +0000287 proxy_passwd= None
Walter Dörwald65230a22002-06-03 15:58:32 +0000288 if isinstance(url, str):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000289 host, selector = splithost(url)
290 if host:
291 user_passwd, host = splituser(host)
292 host = unquote(host)
293 realhost = host
294 else:
295 host, selector = url
Martin v. Löwis3e865952006-01-24 15:51:21 +0000296 # check whether the proxy contains authorization information
297 proxy_passwd, host = splituser(host)
298 # now we proceed with the url we want to obtain
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000299 urltype, rest = splittype(selector)
300 url = rest
301 user_passwd = None
Guido van Rossumb2493f82000-12-15 15:01:37 +0000302 if urltype.lower() != 'http':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000303 realhost = None
304 else:
305 realhost, rest = splithost(rest)
306 if realhost:
307 user_passwd, realhost = splituser(realhost)
308 if user_passwd:
309 selector = "%s://%s%s" % (urltype, realhost, rest)
Tim Peters55c12d42001-08-09 18:04:14 +0000310 if proxy_bypass(realhost):
311 host = realhost
312
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000313 #print "proxy via http:", host, selector
314 if not host: raise IOError, ('http error', 'no host given')
Tim Peters92037a12006-01-24 22:44:08 +0000315
Martin v. Löwis3e865952006-01-24 15:51:21 +0000316 if proxy_passwd:
317 import base64
Andrew M. Kuchling872dba42006-10-27 17:11:23 +0000318 proxy_auth = base64.b64encode(proxy_passwd).strip()
Martin v. Löwis3e865952006-01-24 15:51:21 +0000319 else:
320 proxy_auth = None
321
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000322 if user_passwd:
323 import base64
Andrew M. Kuchling872dba42006-10-27 17:11:23 +0000324 auth = base64.b64encode(user_passwd).strip()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000325 else:
326 auth = None
327 h = httplib.HTTP(host)
328 if data is not None:
329 h.putrequest('POST', selector)
Georg Brandl0619a322006-07-26 07:40:17 +0000330 h.putheader('Content-Type', 'application/x-www-form-urlencoded')
331 h.putheader('Content-Length', '%d' % len(data))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000332 else:
333 h.putrequest('GET', selector)
Martin v. Löwis3e865952006-01-24 15:51:21 +0000334 if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000335 if auth: h.putheader('Authorization', 'Basic %s' % auth)
336 if realhost: h.putheader('Host', realhost)
Guido van Rossum68468eb2003-02-27 20:14:51 +0000337 for args in self.addheaders: h.putheader(*args)
Kristján Valur Jónsson84040db2009-01-09 20:27:16 +0000338 h.endheaders(data)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000339 errcode, errmsg, headers = h.getreply()
Neal Norwitzce55e212007-03-20 08:14:57 +0000340 fp = h.getfile()
Georg Brandlf66b6032007-03-14 08:27:52 +0000341 if errcode == -1:
Neal Norwitzce55e212007-03-20 08:14:57 +0000342 if fp: fp.close()
Georg Brandlf66b6032007-03-14 08:27:52 +0000343 # something went wrong with the HTTP status line
344 raise IOError, ('http protocol error', 0,
345 'got a bad status line', None)
Sean Reifscheidera1afbf62007-09-19 07:52:56 +0000346 # According to RFC 2616, "2xx" code indicates that the client's
347 # request was successfully received, understood, and accepted.
Kurt B. Kaiser0f7c25d2008-01-02 04:11:28 +0000348 if (200 <= errcode < 300):
Georg Brandl9b0d46d2008-01-20 11:43:03 +0000349 return addinfourl(fp, headers, "http:" + url, errcode)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000350 else:
351 if data is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000352 return self.http_error(url, fp, errcode, errmsg, headers)
Guido van Rossum29aab751999-03-09 19:31:21 +0000353 else:
354 return self.http_error(url, fp, errcode, errmsg, headers, data)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000355
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000356 def http_error(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000357 """Handle http errors.
358 Derived class can override this, or provide specific handlers
359 named http_error_DDD where DDD is the 3-digit error code."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000360 # First check if there's a specific handler for this error
361 name = 'http_error_%d' % errcode
362 if hasattr(self, name):
363 method = getattr(self, name)
364 if data is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000365 result = method(url, fp, errcode, errmsg, headers)
Jeremy Hyltonb30f52a1999-02-25 16:14:58 +0000366 else:
367 result = method(url, fp, errcode, errmsg, headers, data)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000368 if result: return result
Jeremy Hyltonb30f52a1999-02-25 16:14:58 +0000369 return self.http_error_default(url, fp, errcode, errmsg, headers)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000370
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000371 def http_error_default(self, url, fp, errcode, errmsg, headers):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000372 """Default error handler: close the connection and raise IOError."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000373 void = fp.read()
374 fp.close()
375 raise IOError, ('http error', errcode, errmsg, headers)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000376
Bill Janssen426ea0a2007-08-29 22:35:05 +0000377 if _have_ssl:
Andrew M. Kuchling141e9892000-04-23 02:53:11 +0000378 def open_https(self, url, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000379 """Use HTTPS protocol."""
Bill Janssen426ea0a2007-08-29 22:35:05 +0000380
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000381 import httplib
Fred Drake567ca8e2000-08-21 21:42:42 +0000382 user_passwd = None
Martin v. Löwis3e865952006-01-24 15:51:21 +0000383 proxy_passwd = None
Walter Dörwald65230a22002-06-03 15:58:32 +0000384 if isinstance(url, str):
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000385 host, selector = splithost(url)
Fred Drake567ca8e2000-08-21 21:42:42 +0000386 if host:
387 user_passwd, host = splituser(host)
388 host = unquote(host)
389 realhost = host
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000390 else:
391 host, selector = url
Martin v. Löwis3e865952006-01-24 15:51:21 +0000392 # here, we determine, whether the proxy contains authorization information
393 proxy_passwd, host = splituser(host)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000394 urltype, rest = splittype(selector)
Fred Drake567ca8e2000-08-21 21:42:42 +0000395 url = rest
396 user_passwd = None
Guido van Rossumb2493f82000-12-15 15:01:37 +0000397 if urltype.lower() != 'https':
Fred Drake567ca8e2000-08-21 21:42:42 +0000398 realhost = None
399 else:
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000400 realhost, rest = splithost(rest)
Fred Drake567ca8e2000-08-21 21:42:42 +0000401 if realhost:
402 user_passwd, realhost = splituser(realhost)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000403 if user_passwd:
404 selector = "%s://%s%s" % (urltype, realhost, rest)
Andrew M. Kuchling7ad47922000-06-10 01:41:48 +0000405 #print "proxy via https:", host, selector
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000406 if not host: raise IOError, ('https error', 'no host given')
Martin v. Löwis3e865952006-01-24 15:51:21 +0000407 if proxy_passwd:
408 import base64
Andrew M. Kuchling872dba42006-10-27 17:11:23 +0000409 proxy_auth = base64.b64encode(proxy_passwd).strip()
Martin v. Löwis3e865952006-01-24 15:51:21 +0000410 else:
411 proxy_auth = None
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000412 if user_passwd:
413 import base64
Andrew M. Kuchling872dba42006-10-27 17:11:23 +0000414 auth = base64.b64encode(user_passwd).strip()
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000415 else:
416 auth = None
417 h = httplib.HTTPS(host, 0,
418 key_file=self.key_file,
419 cert_file=self.cert_file)
Andrew M. Kuchling141e9892000-04-23 02:53:11 +0000420 if data is not None:
421 h.putrequest('POST', selector)
Georg Brandl0619a322006-07-26 07:40:17 +0000422 h.putheader('Content-Type',
Andrew M. Kuchling141e9892000-04-23 02:53:11 +0000423 'application/x-www-form-urlencoded')
Georg Brandl0619a322006-07-26 07:40:17 +0000424 h.putheader('Content-Length', '%d' % len(data))
Andrew M. Kuchling141e9892000-04-23 02:53:11 +0000425 else:
426 h.putrequest('GET', selector)
Andrew M. Kuchling52278572006-12-19 15:11:41 +0000427 if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
428 if auth: h.putheader('Authorization', 'Basic %s' % auth)
Fred Drake567ca8e2000-08-21 21:42:42 +0000429 if realhost: h.putheader('Host', realhost)
Guido van Rossum68468eb2003-02-27 20:14:51 +0000430 for args in self.addheaders: h.putheader(*args)
Kristján Valur Jónsson84040db2009-01-09 20:27:16 +0000431 h.endheaders(data)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000432 errcode, errmsg, headers = h.getreply()
Neal Norwitzce55e212007-03-20 08:14:57 +0000433 fp = h.getfile()
Georg Brandlf66b6032007-03-14 08:27:52 +0000434 if errcode == -1:
Neal Norwitzce55e212007-03-20 08:14:57 +0000435 if fp: fp.close()
Georg Brandlf66b6032007-03-14 08:27:52 +0000436 # something went wrong with the HTTP status line
437 raise IOError, ('http protocol error', 0,
438 'got a bad status line', None)
Georg Brandl9b915672007-09-24 18:08:24 +0000439 # According to RFC 2616, "2xx" code indicates that the client's
440 # request was successfully received, understood, and accepted.
Kurt B. Kaiser0f7c25d2008-01-02 04:11:28 +0000441 if (200 <= errcode < 300):
Georg Brandl9b0d46d2008-01-20 11:43:03 +0000442 return addinfourl(fp, headers, "https:" + url, errcode)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000443 else:
Fred Drake567ca8e2000-08-21 21:42:42 +0000444 if data is None:
445 return self.http_error(url, fp, errcode, errmsg, headers)
446 else:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000447 return self.http_error(url, fp, errcode, errmsg, headers,
448 data)
Fred Drake567ca8e2000-08-21 21:42:42 +0000449
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000450 def open_file(self, url):
Neal Norwitzc5d0dbd2006-04-09 04:00:49 +0000451 """Use local file or FTP depending on form of URL."""
Martin v. Löwis3e865952006-01-24 15:51:21 +0000452 if not isinstance(url, str):
453 raise IOError, ('file error', 'proxy support for file protocol currently not implemented')
Jack Jansen4ef11032002-09-12 20:14:04 +0000454 if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000455 return self.open_ftp(url)
456 else:
457 return self.open_local_file(url)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000458
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000459 def open_local_file(self, url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000460 """Use local file."""
Georg Brandl5a096e12007-01-22 19:40:21 +0000461 import mimetypes, mimetools, email.utils
Raymond Hettingera6172712004-12-31 19:15:26 +0000462 try:
463 from cStringIO import StringIO
464 except ImportError:
465 from StringIO import StringIO
Guido van Rossumf0713d32001-08-09 17:43:35 +0000466 host, file = splithost(url)
467 localname = url2pathname(file)
Guido van Rossuma2da3052002-04-15 00:25:01 +0000468 try:
469 stats = os.stat(localname)
470 except OSError, e:
471 raise IOError(e.errno, e.strerror, e.filename)
Walter Dörwald92b48b72002-03-22 17:30:38 +0000472 size = stats.st_size
Georg Brandl5a096e12007-01-22 19:40:21 +0000473 modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000474 mtype = mimetypes.guess_type(url)[0]
Raymond Hettingera6172712004-12-31 19:15:26 +0000475 headers = mimetools.Message(StringIO(
Guido van Rossumf0713d32001-08-09 17:43:35 +0000476 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
477 (mtype or 'text/plain', size, modified)))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000478 if not host:
Guido van Rossum336a2011999-06-24 15:27:36 +0000479 urlfile = file
480 if file[:1] == '/':
481 urlfile = 'file://' + file
Guido van Rossumf0713d32001-08-09 17:43:35 +0000482 return addinfourl(open(localname, 'rb'),
Guido van Rossum336a2011999-06-24 15:27:36 +0000483 headers, urlfile)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000484 host, port = splitport(host)
485 if not port \
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000486 and socket.gethostbyname(host) in (localhost(), thishost()):
Guido van Rossum336a2011999-06-24 15:27:36 +0000487 urlfile = file
488 if file[:1] == '/':
489 urlfile = 'file://' + file
Guido van Rossumf0713d32001-08-09 17:43:35 +0000490 return addinfourl(open(localname, 'rb'),
Guido van Rossum336a2011999-06-24 15:27:36 +0000491 headers, urlfile)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000492 raise IOError, ('local file error', 'not on local host')
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000493
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000494 def open_ftp(self, url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000495 """Use FTP protocol."""
Martin v. Löwis3e865952006-01-24 15:51:21 +0000496 if not isinstance(url, str):
497 raise IOError, ('ftp error', 'proxy support for ftp protocol currently not implemented')
Raymond Hettingera6172712004-12-31 19:15:26 +0000498 import mimetypes, mimetools
499 try:
500 from cStringIO import StringIO
501 except ImportError:
502 from StringIO import StringIO
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000503 host, path = splithost(url)
504 if not host: raise IOError, ('ftp error', 'no host given')
505 host, port = splitport(host)
506 user, host = splituser(host)
507 if user: user, passwd = splitpasswd(user)
508 else: passwd = None
509 host = unquote(host)
510 user = unquote(user or '')
511 passwd = unquote(passwd or '')
512 host = socket.gethostbyname(host)
513 if not port:
514 import ftplib
515 port = ftplib.FTP_PORT
516 else:
517 port = int(port)
518 path, attrs = splitattr(path)
519 path = unquote(path)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000520 dirs = path.split('/')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000521 dirs, file = dirs[:-1], dirs[-1]
522 if dirs and not dirs[0]: dirs = dirs[1:]
Guido van Rossum5e006a31999-08-18 17:40:33 +0000523 if dirs and not dirs[0]: dirs[0] = '/'
Guido van Rossumb2493f82000-12-15 15:01:37 +0000524 key = user, host, port, '/'.join(dirs)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000525 # XXX thread unsafe!
526 if len(self.ftpcache) > MAXFTPCACHE:
527 # Prune the cache, rather arbitrarily
528 for k in self.ftpcache.keys():
529 if k != key:
530 v = self.ftpcache[k]
531 del self.ftpcache[k]
532 v.close()
533 try:
Raymond Hettinger54f02222002-06-01 14:18:47 +0000534 if not key in self.ftpcache:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000535 self.ftpcache[key] = \
536 ftpwrapper(user, passwd, host, port, dirs)
537 if not file: type = 'D'
538 else: type = 'I'
539 for attr in attrs:
540 attr, value = splitvalue(attr)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000541 if attr.lower() == 'type' and \
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000542 value in ('a', 'A', 'i', 'I', 'd', 'D'):
Guido van Rossumb2493f82000-12-15 15:01:37 +0000543 type = value.upper()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000544 (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
Guido van Rossum88e0b5b2001-08-23 13:38:15 +0000545 mtype = mimetypes.guess_type("ftp:" + url)[0]
546 headers = ""
547 if mtype:
548 headers += "Content-Type: %s\n" % mtype
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000549 if retrlen is not None and retrlen >= 0:
Guido van Rossum88e0b5b2001-08-23 13:38:15 +0000550 headers += "Content-Length: %d\n" % retrlen
Raymond Hettingera6172712004-12-31 19:15:26 +0000551 headers = mimetools.Message(StringIO(headers))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000552 return addinfourl(fp, headers, "ftp:" + url)
553 except ftperrors(), msg:
554 raise IOError, ('ftp error', msg), sys.exc_info()[2]
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000555
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000556 def open_data(self, url, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000557 """Use "data" URL."""
Martin v. Löwis3e865952006-01-24 15:51:21 +0000558 if not isinstance(url, str):
559 raise IOError, ('data error', 'proxy support for data protocol currently not implemented')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000560 # ignore POSTed data
561 #
562 # syntax of data URLs:
563 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
564 # mediatype := [ type "/" subtype ] *( ";" parameter )
565 # data := *urlchar
566 # parameter := attribute "=" value
Raymond Hettingera6172712004-12-31 19:15:26 +0000567 import mimetools
568 try:
569 from cStringIO import StringIO
570 except ImportError:
571 from StringIO import StringIO
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000572 try:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000573 [type, data] = url.split(',', 1)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000574 except ValueError:
575 raise IOError, ('data error', 'bad data URL')
576 if not type:
577 type = 'text/plain;charset=US-ASCII'
Guido van Rossumb2493f82000-12-15 15:01:37 +0000578 semi = type.rfind(';')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000579 if semi >= 0 and '=' not in type[semi:]:
580 encoding = type[semi+1:]
581 type = type[:semi]
582 else:
583 encoding = ''
584 msg = []
585 msg.append('Date: %s'%time.strftime('%a, %d %b %Y %T GMT',
586 time.gmtime(time.time())))
587 msg.append('Content-type: %s' % type)
588 if encoding == 'base64':
589 import base64
590 data = base64.decodestring(data)
591 else:
592 data = unquote(data)
Georg Brandl0619a322006-07-26 07:40:17 +0000593 msg.append('Content-Length: %d' % len(data))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000594 msg.append('')
595 msg.append(data)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000596 msg = '\n'.join(msg)
Raymond Hettingera6172712004-12-31 19:15:26 +0000597 f = StringIO(msg)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000598 headers = mimetools.Message(f, 0)
Georg Brandl1f663572005-11-26 16:50:44 +0000599 #f.fileno = None # needed for addinfourl
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000600 return addinfourl(f, headers, url)
Guido van Rossum6d4d1c21998-03-12 14:32:55 +0000601
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000602
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000603class FancyURLopener(URLopener):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000604 """Derived class with handlers for errors we can handle (perhaps)."""
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000605
Neal Norwitz60e04cd2002-06-11 13:38:51 +0000606 def __init__(self, *args, **kwargs):
Guido van Rossum68468eb2003-02-27 20:14:51 +0000607 URLopener.__init__(self, *args, **kwargs)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000608 self.auth_cache = {}
Skip Montanaroc3e11d62001-02-15 16:56:36 +0000609 self.tries = 0
610 self.maxtries = 10
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000611
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000612 def http_error_default(self, url, fp, errcode, errmsg, headers):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000613 """Default error handling -- don't raise an exception."""
Georg Brandl9b0d46d2008-01-20 11:43:03 +0000614 return addinfourl(fp, headers, "http:" + url, errcode)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000615
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000616 def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000617 """Error 302 -- relocated (temporarily)."""
Skip Montanaroc3e11d62001-02-15 16:56:36 +0000618 self.tries += 1
619 if self.maxtries and self.tries >= self.maxtries:
620 if hasattr(self, "http_error_500"):
621 meth = self.http_error_500
622 else:
623 meth = self.http_error_default
624 self.tries = 0
625 return meth(url, fp, 500,
626 "Internal Server Error: Redirect Recursion", headers)
627 result = self.redirect_internal(url, fp, errcode, errmsg, headers,
628 data)
629 self.tries = 0
630 return result
631
632 def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
Raymond Hettinger54f02222002-06-01 14:18:47 +0000633 if 'location' in headers:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000634 newurl = headers['location']
Raymond Hettinger54f02222002-06-01 14:18:47 +0000635 elif 'uri' in headers:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000636 newurl = headers['uri']
637 else:
638 return
639 void = fp.read()
640 fp.close()
Guido van Rossum3527f591999-03-29 20:23:41 +0000641 # In case the server sent a relative URL, join with original:
Moshe Zadka5d87d472001-04-09 14:54:21 +0000642 newurl = basejoin(self.type + ":" + url, newurl)
Guido van Rossumfa19f7c2003-05-16 01:46:51 +0000643 return self.open(newurl)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000644
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000645 def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000646 """Error 301 -- also relocated (permanently)."""
647 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
Guido van Rossume6ad8911996-09-10 17:02:56 +0000648
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000649 def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
650 """Error 303 -- also relocated (essentially identical to 302)."""
651 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
652
Guido van Rossumfa19f7c2003-05-16 01:46:51 +0000653 def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
654 """Error 307 -- relocated, but turn POST into error."""
655 if data is None:
656 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
657 else:
658 return self.http_error_default(url, fp, errcode, errmsg, headers)
659
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000660 def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000661 """Error 401 -- authentication required.
Martin v. Löwis3e865952006-01-24 15:51:21 +0000662 This function supports Basic authentication only."""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000663 if not 'www-authenticate' in headers:
Tim Peters85ba6732001-02-28 08:26:44 +0000664 URLopener.http_error_default(self, url, fp,
Fred Drakec680ae82001-10-13 18:37:07 +0000665 errcode, errmsg, headers)
Moshe Zadkae99bd172001-02-27 06:27:04 +0000666 stuff = headers['www-authenticate']
667 import re
668 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
669 if not match:
Tim Peters85ba6732001-02-28 08:26:44 +0000670 URLopener.http_error_default(self, url, fp,
Moshe Zadkae99bd172001-02-27 06:27:04 +0000671 errcode, errmsg, headers)
672 scheme, realm = match.groups()
673 if scheme.lower() != 'basic':
Tim Peters85ba6732001-02-28 08:26:44 +0000674 URLopener.http_error_default(self, url, fp,
Moshe Zadkae99bd172001-02-27 06:27:04 +0000675 errcode, errmsg, headers)
676 name = 'retry_' + self.type + '_basic_auth'
677 if data is None:
678 return getattr(self,name)(url, realm)
679 else:
680 return getattr(self,name)(url, realm, data)
Tim Peters92037a12006-01-24 22:44:08 +0000681
Martin v. Löwis3e865952006-01-24 15:51:21 +0000682 def http_error_407(self, url, fp, errcode, errmsg, headers, data=None):
683 """Error 407 -- proxy authentication required.
684 This function supports Basic authentication only."""
685 if not 'proxy-authenticate' in headers:
686 URLopener.http_error_default(self, url, fp,
687 errcode, errmsg, headers)
688 stuff = headers['proxy-authenticate']
689 import re
690 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
691 if not match:
692 URLopener.http_error_default(self, url, fp,
693 errcode, errmsg, headers)
694 scheme, realm = match.groups()
695 if scheme.lower() != 'basic':
696 URLopener.http_error_default(self, url, fp,
697 errcode, errmsg, headers)
698 name = 'retry_proxy_' + self.type + '_basic_auth'
699 if data is None:
700 return getattr(self,name)(url, realm)
701 else:
702 return getattr(self,name)(url, realm, data)
Tim Peters92037a12006-01-24 22:44:08 +0000703
Martin v. Löwis3e865952006-01-24 15:51:21 +0000704 def retry_proxy_http_basic_auth(self, url, realm, data=None):
705 host, selector = splithost(url)
706 newurl = 'http://' + host + selector
707 proxy = self.proxies['http']
708 urltype, proxyhost = splittype(proxy)
709 proxyhost, proxyselector = splithost(proxyhost)
710 i = proxyhost.find('@') + 1
711 proxyhost = proxyhost[i:]
712 user, passwd = self.get_user_passwd(proxyhost, realm, i)
713 if not (user or passwd): return None
714 proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
715 self.proxies['http'] = 'http://' + proxyhost + proxyselector
716 if data is None:
717 return self.open(newurl)
718 else:
719 return self.open(newurl, data)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000720
Martin v. Löwis3e865952006-01-24 15:51:21 +0000721 def retry_proxy_https_basic_auth(self, url, realm, data=None):
722 host, selector = splithost(url)
723 newurl = 'https://' + host + selector
724 proxy = self.proxies['https']
725 urltype, proxyhost = splittype(proxy)
726 proxyhost, proxyselector = splithost(proxyhost)
727 i = proxyhost.find('@') + 1
728 proxyhost = proxyhost[i:]
729 user, passwd = self.get_user_passwd(proxyhost, realm, i)
730 if not (user or passwd): return None
731 proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
732 self.proxies['https'] = 'https://' + proxyhost + proxyselector
733 if data is None:
734 return self.open(newurl)
735 else:
736 return self.open(newurl, data)
Tim Peters92037a12006-01-24 22:44:08 +0000737
Guido van Rossum3c8baed2000-02-01 23:36:55 +0000738 def retry_http_basic_auth(self, url, realm, data=None):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000739 host, selector = splithost(url)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000740 i = host.find('@') + 1
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000741 host = host[i:]
742 user, passwd = self.get_user_passwd(host, realm, i)
743 if not (user or passwd): return None
Guido van Rossumafc4f042001-01-15 18:31:13 +0000744 host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000745 newurl = 'http://' + host + selector
Guido van Rossum3c8baed2000-02-01 23:36:55 +0000746 if data is None:
747 return self.open(newurl)
748 else:
749 return self.open(newurl, data)
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000750
Guido van Rossum3c8baed2000-02-01 23:36:55 +0000751 def retry_https_basic_auth(self, url, realm, data=None):
Tim Peterse1190062001-01-15 03:34:38 +0000752 host, selector = splithost(url)
753 i = host.find('@') + 1
754 host = host[i:]
755 user, passwd = self.get_user_passwd(host, realm, i)
756 if not (user or passwd): return None
Guido van Rossumafc4f042001-01-15 18:31:13 +0000757 host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
Martin v. Löwis3e865952006-01-24 15:51:21 +0000758 newurl = 'https://' + host + selector
759 if data is None:
760 return self.open(newurl)
761 else:
762 return self.open(newurl, data)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000763
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000764 def get_user_passwd(self, host, realm, clear_cache = 0):
Guido van Rossumb2493f82000-12-15 15:01:37 +0000765 key = realm + '@' + host.lower()
Raymond Hettinger54f02222002-06-01 14:18:47 +0000766 if key in self.auth_cache:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000767 if clear_cache:
768 del self.auth_cache[key]
769 else:
770 return self.auth_cache[key]
771 user, passwd = self.prompt_user_passwd(host, realm)
772 if user or passwd: self.auth_cache[key] = (user, passwd)
773 return user, passwd
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000774
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000775 def prompt_user_passwd(self, host, realm):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000776 """Override this in a GUI environment!"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000777 import getpass
778 try:
779 user = raw_input("Enter username for %s at %s: " % (realm,
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000780 host))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000781 passwd = getpass.getpass("Enter password for %s in %s at %s: " %
782 (user, realm, host))
783 return user, passwd
784 except KeyboardInterrupt:
785 print
786 return None, None
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000787
788
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000789# Utility functions
790
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000791_localhost = None
792def localhost():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000793 """Return the IP address of the magic hostname 'localhost'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000794 global _localhost
Raymond Hettinger10ff7062002-06-02 03:04:52 +0000795 if _localhost is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000796 _localhost = socket.gethostbyname('localhost')
797 return _localhost
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000798
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000799_thishost = None
800def thishost():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000801 """Return the IP address of the current host."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000802 global _thishost
Raymond Hettinger10ff7062002-06-02 03:04:52 +0000803 if _thishost is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000804 _thishost = socket.gethostbyname(socket.gethostname())
805 return _thishost
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000806
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000807_ftperrors = None
808def ftperrors():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000809 """Return the set of errors raised by the FTP class."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000810 global _ftperrors
Raymond Hettinger10ff7062002-06-02 03:04:52 +0000811 if _ftperrors is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000812 import ftplib
813 _ftperrors = ftplib.all_errors
814 return _ftperrors
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000815
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000816_noheaders = None
817def noheaders():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000818 """Return an empty mimetools.Message object."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000819 global _noheaders
Raymond Hettinger10ff7062002-06-02 03:04:52 +0000820 if _noheaders is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000821 import mimetools
Raymond Hettingera6172712004-12-31 19:15:26 +0000822 try:
823 from cStringIO import StringIO
824 except ImportError:
825 from StringIO import StringIO
826 _noheaders = mimetools.Message(StringIO(), 0)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000827 _noheaders.fp.close() # Recycle file descriptor
828 return _noheaders
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000829
830
831# Utility classes
832
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000833class ftpwrapper:
Guido van Rossume7b146f2000-02-04 15:28:42 +0000834 """Class used by open_ftp() for cache of open FTP connections."""
835
Facundo Batista4f1b1ed2008-05-29 16:39:26 +0000836 def __init__(self, user, passwd, host, port, dirs,
837 timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000838 self.user = user
839 self.passwd = passwd
840 self.host = host
841 self.port = port
842 self.dirs = dirs
Facundo Batista711a54e2007-05-24 17:50:54 +0000843 self.timeout = timeout
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000844 self.init()
Guido van Rossume7b146f2000-02-04 15:28:42 +0000845
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000846 def init(self):
847 import ftplib
848 self.busy = 0
849 self.ftp = ftplib.FTP()
Facundo Batista711a54e2007-05-24 17:50:54 +0000850 self.ftp.connect(self.host, self.port, self.timeout)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000851 self.ftp.login(self.user, self.passwd)
852 for dir in self.dirs:
853 self.ftp.cwd(dir)
Guido van Rossume7b146f2000-02-04 15:28:42 +0000854
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000855 def retrfile(self, file, type):
856 import ftplib
857 self.endtransfer()
858 if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
859 else: cmd = 'TYPE ' + type; isdir = 0
860 try:
861 self.ftp.voidcmd(cmd)
862 except ftplib.all_errors:
863 self.init()
864 self.ftp.voidcmd(cmd)
865 conn = None
866 if file and not isdir:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000867 # Try to retrieve as a file
868 try:
869 cmd = 'RETR ' + file
870 conn = self.ftp.ntransfercmd(cmd)
871 except ftplib.error_perm, reason:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000872 if str(reason)[:3] != '550':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000873 raise IOError, ('ftp error', reason), sys.exc_info()[2]
874 if not conn:
875 # Set transfer mode to ASCII!
876 self.ftp.voidcmd('TYPE A')
Georg Brandld5e6cf22008-01-20 12:18:17 +0000877 # Try a directory listing. Verify that directory exists.
878 if file:
879 pwd = self.ftp.pwd()
880 try:
881 try:
882 self.ftp.cwd(file)
883 except ftplib.error_perm, reason:
884 raise IOError, ('ftp error', reason), sys.exc_info()[2]
885 finally:
886 self.ftp.cwd(pwd)
887 cmd = 'LIST ' + file
888 else:
889 cmd = 'LIST'
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000890 conn = self.ftp.ntransfercmd(cmd)
891 self.busy = 1
892 # Pass back both a suitably decorated object and a retrieval length
893 return (addclosehook(conn[0].makefile('rb'),
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000894 self.endtransfer), conn[1])
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000895 def endtransfer(self):
896 if not self.busy:
897 return
898 self.busy = 0
899 try:
900 self.ftp.voidresp()
901 except ftperrors():
902 pass
Guido van Rossume7b146f2000-02-04 15:28:42 +0000903
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000904 def close(self):
905 self.endtransfer()
906 try:
907 self.ftp.close()
908 except ftperrors():
909 pass
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000910
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000911class addbase:
Guido van Rossume7b146f2000-02-04 15:28:42 +0000912 """Base class for addinfo and addclosehook."""
913
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000914 def __init__(self, fp):
915 self.fp = fp
916 self.read = self.fp.read
917 self.readline = self.fp.readline
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000918 if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines
Georg Brandl1f663572005-11-26 16:50:44 +0000919 if hasattr(self.fp, "fileno"):
920 self.fileno = self.fp.fileno
921 else:
922 self.fileno = lambda: None
Raymond Hettinger42182eb2003-03-09 05:33:33 +0000923 if hasattr(self.fp, "__iter__"):
924 self.__iter__ = self.fp.__iter__
925 if hasattr(self.fp, "next"):
926 self.next = self.fp.next
Guido van Rossume7b146f2000-02-04 15:28:42 +0000927
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000928 def __repr__(self):
Walter Dörwald70a6b492004-02-12 17:35:32 +0000929 return '<%s at %r whose fp = %r>' % (self.__class__.__name__,
930 id(self), self.fp)
Guido van Rossume7b146f2000-02-04 15:28:42 +0000931
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000932 def close(self):
933 self.read = None
934 self.readline = None
935 self.readlines = None
936 self.fileno = None
937 if self.fp: self.fp.close()
938 self.fp = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000939
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000940class addclosehook(addbase):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000941 """Class to add a close hook to an open file."""
942
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000943 def __init__(self, fp, closehook, *hookargs):
944 addbase.__init__(self, fp)
945 self.closehook = closehook
946 self.hookargs = hookargs
Guido van Rossume7b146f2000-02-04 15:28:42 +0000947
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000948 def close(self):
Guido van Rossumc580dae2000-05-24 13:21:46 +0000949 addbase.close(self)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000950 if self.closehook:
Guido van Rossum68468eb2003-02-27 20:14:51 +0000951 self.closehook(*self.hookargs)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000952 self.closehook = None
953 self.hookargs = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000954
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000955class addinfo(addbase):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000956 """class to add an info() method to an open file."""
957
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000958 def __init__(self, fp, headers):
959 addbase.__init__(self, fp)
960 self.headers = headers
Guido van Rossume7b146f2000-02-04 15:28:42 +0000961
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000962 def info(self):
963 return self.headers
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000964
Guido van Rossume6ad8911996-09-10 17:02:56 +0000965class addinfourl(addbase):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000966 """class to add info() and geturl() methods to an open file."""
967
Georg Brandl9b0d46d2008-01-20 11:43:03 +0000968 def __init__(self, fp, headers, url, code=None):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000969 addbase.__init__(self, fp)
970 self.headers = headers
971 self.url = url
Georg Brandl9b0d46d2008-01-20 11:43:03 +0000972 self.code = code
Guido van Rossume7b146f2000-02-04 15:28:42 +0000973
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000974 def info(self):
975 return self.headers
Guido van Rossume7b146f2000-02-04 15:28:42 +0000976
Georg Brandl9b0d46d2008-01-20 11:43:03 +0000977 def getcode(self):
978 return self.code
979
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000980 def geturl(self):
981 return self.url
Guido van Rossume6ad8911996-09-10 17:02:56 +0000982
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000983
Guido van Rossum7c395db1994-07-04 22:14:49 +0000984# Utilities to parse URLs (most of these return None for missing parts):
Sjoerd Mullendere0371b81995-11-10 10:36:07 +0000985# unwrap('<URL:type://host/path>') --> 'type://host/path'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000986# splittype('type:opaquestring') --> 'type', 'opaquestring'
987# splithost('//host[:port]/path') --> 'host[:port]', '/path'
Guido van Rossum7c395db1994-07-04 22:14:49 +0000988# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'
989# splitpasswd('user:passwd') -> 'user', 'passwd'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000990# splitport('host:port') --> 'host', 'port'
991# splitquery('/path?query') --> '/path', 'query'
992# splittag('/path#tag') --> '/path', 'tag'
Guido van Rossum7c395db1994-07-04 22:14:49 +0000993# splitattr('/path;attr1=value1;attr2=value2;...') ->
994# '/path', ['attr1=value1', 'attr2=value2', ...]
995# splitvalue('attr=value') --> 'attr', 'value'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000996# unquote('abc%20def') -> 'abc def'
997# quote('abc def') -> 'abc%20def')
998
Walter Dörwald65230a22002-06-03 15:58:32 +0000999try:
1000 unicode
1001except NameError:
Guido van Rossum4b46c0a2002-05-24 17:58:05 +00001002 def _is_unicode(x):
1003 return 0
Walter Dörwald65230a22002-06-03 15:58:32 +00001004else:
1005 def _is_unicode(x):
1006 return isinstance(x, unicode)
Guido van Rossum4b46c0a2002-05-24 17:58:05 +00001007
Martin v. Löwis1d994332000-12-03 18:30:10 +00001008def toBytes(url):
1009 """toBytes(u"URL") --> 'URL'."""
1010 # Most URL schemes require ASCII. If that changes, the conversion
1011 # can be relaxed
Guido van Rossum4b46c0a2002-05-24 17:58:05 +00001012 if _is_unicode(url):
Martin v. Löwis1d994332000-12-03 18:30:10 +00001013 try:
1014 url = url.encode("ASCII")
1015 except UnicodeError:
Guido van Rossumb2493f82000-12-15 15:01:37 +00001016 raise UnicodeError("URL " + repr(url) +
1017 " contains non-ASCII characters")
Martin v. Löwis1d994332000-12-03 18:30:10 +00001018 return url
1019
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001020def unwrap(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001021 """unwrap('<URL:type://host/path>') --> 'type://host/path'."""
Guido van Rossumb2493f82000-12-15 15:01:37 +00001022 url = url.strip()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001023 if url[:1] == '<' and url[-1:] == '>':
Guido van Rossumb2493f82000-12-15 15:01:37 +00001024 url = url[1:-1].strip()
1025 if url[:4] == 'URL:': url = url[4:].strip()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001026 return url
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001027
Guido van Rossum332e1441997-09-29 23:23:46 +00001028_typeprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001029def splittype(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001030 """splittype('type:opaquestring') --> 'type', 'opaquestring'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001031 global _typeprog
1032 if _typeprog is None:
1033 import re
1034 _typeprog = re.compile('^([^/:]+):')
Guido van Rossum332e1441997-09-29 23:23:46 +00001035
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001036 match = _typeprog.match(url)
1037 if match:
1038 scheme = match.group(1)
Fred Drake9e94afd2000-07-01 07:03:30 +00001039 return scheme.lower(), url[len(scheme) + 1:]
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001040 return None, url
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001041
Guido van Rossum332e1441997-09-29 23:23:46 +00001042_hostprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001043def splithost(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001044 """splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001045 global _hostprog
1046 if _hostprog is None:
1047 import re
Georg Brandl1c168d82006-03-26 20:59:38 +00001048 _hostprog = re.compile('^//([^/?]*)(.*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +00001049
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001050 match = _hostprog.match(url)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001051 if match: return match.group(1, 2)
1052 return None, url
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001053
Guido van Rossum332e1441997-09-29 23:23:46 +00001054_userprog = None
Guido van Rossum7c395db1994-07-04 22:14:49 +00001055def splituser(host):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001056 """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001057 global _userprog
1058 if _userprog is None:
1059 import re
Raymond Hettingerf2e45dd2002-08-18 20:08:56 +00001060 _userprog = re.compile('^(.*)@(.*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +00001061
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001062 match = _userprog.match(host)
Fred Drake567ca8e2000-08-21 21:42:42 +00001063 if match: return map(unquote, match.group(1, 2))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001064 return None, host
Guido van Rossum7c395db1994-07-04 22:14:49 +00001065
Guido van Rossum332e1441997-09-29 23:23:46 +00001066_passwdprog = None
Guido van Rossum7c395db1994-07-04 22:14:49 +00001067def splitpasswd(user):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001068 """splitpasswd('user:passwd') -> 'user', 'passwd'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001069 global _passwdprog
1070 if _passwdprog is None:
1071 import re
1072 _passwdprog = re.compile('^([^:]*):(.*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +00001073
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001074 match = _passwdprog.match(user)
1075 if match: return match.group(1, 2)
1076 return user, None
Guido van Rossum7c395db1994-07-04 22:14:49 +00001077
Guido van Rossume7b146f2000-02-04 15:28:42 +00001078# splittag('/path#tag') --> '/path', 'tag'
Guido van Rossum332e1441997-09-29 23:23:46 +00001079_portprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001080def splitport(host):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001081 """splitport('host:port') --> 'host', 'port'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001082 global _portprog
1083 if _portprog is None:
1084 import re
1085 _portprog = re.compile('^(.*):([0-9]+)$')
Guido van Rossum332e1441997-09-29 23:23:46 +00001086
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001087 match = _portprog.match(host)
1088 if match: return match.group(1, 2)
1089 return host, None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001090
Guido van Rossum332e1441997-09-29 23:23:46 +00001091_nportprog = None
Guido van Rossum53725a21996-06-13 19:12:35 +00001092def splitnport(host, defport=-1):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001093 """Split host and port, returning numeric port.
1094 Return given default port if no ':' found; defaults to -1.
1095 Return numerical port if a valid number are found after ':'.
1096 Return None if ':' but not a valid number."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001097 global _nportprog
1098 if _nportprog is None:
1099 import re
1100 _nportprog = re.compile('^(.*):(.*)$')
Guido van Rossum7e7ca0b1998-03-26 21:01:39 +00001101
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001102 match = _nportprog.match(host)
1103 if match:
1104 host, port = match.group(1, 2)
1105 try:
Guido van Rossumb2493f82000-12-15 15:01:37 +00001106 if not port: raise ValueError, "no digits"
1107 nport = int(port)
1108 except ValueError:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001109 nport = None
1110 return host, nport
1111 return host, defport
Guido van Rossum53725a21996-06-13 19:12:35 +00001112
Guido van Rossum332e1441997-09-29 23:23:46 +00001113_queryprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001114def splitquery(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001115 """splitquery('/path?query') --> '/path', 'query'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001116 global _queryprog
1117 if _queryprog is None:
1118 import re
1119 _queryprog = re.compile('^(.*)\?([^?]*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +00001120
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001121 match = _queryprog.match(url)
1122 if match: return match.group(1, 2)
1123 return url, None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001124
Guido van Rossum332e1441997-09-29 23:23:46 +00001125_tagprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001126def splittag(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001127 """splittag('/path#tag') --> '/path', 'tag'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001128 global _tagprog
1129 if _tagprog is None:
1130 import re
1131 _tagprog = re.compile('^(.*)#([^#]*)$')
Guido van Rossum7e7ca0b1998-03-26 21:01:39 +00001132
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001133 match = _tagprog.match(url)
1134 if match: return match.group(1, 2)
1135 return url, None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001136
Guido van Rossum7c395db1994-07-04 22:14:49 +00001137def splitattr(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001138 """splitattr('/path;attr1=value1;attr2=value2;...') ->
1139 '/path', ['attr1=value1', 'attr2=value2', ...]."""
Guido van Rossumb2493f82000-12-15 15:01:37 +00001140 words = url.split(';')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001141 return words[0], words[1:]
Guido van Rossum7c395db1994-07-04 22:14:49 +00001142
Guido van Rossum332e1441997-09-29 23:23:46 +00001143_valueprog = None
Guido van Rossum7c395db1994-07-04 22:14:49 +00001144def splitvalue(attr):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001145 """splitvalue('attr=value') --> 'attr', 'value'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001146 global _valueprog
1147 if _valueprog is None:
1148 import re
1149 _valueprog = re.compile('^([^=]*)=(.*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +00001150
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001151 match = _valueprog.match(attr)
1152 if match: return match.group(1, 2)
1153 return attr, None
Guido van Rossum7c395db1994-07-04 22:14:49 +00001154
Raymond Hettinger803ce802005-09-10 06:49:04 +00001155_hextochr = dict(('%02x' % i, chr(i)) for i in range(256))
1156_hextochr.update(('%02X' % i, chr(i)) for i in range(256))
1157
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001158def unquote(s):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001159 """unquote('abc%20def') -> 'abc def'."""
Raymond Hettinger803ce802005-09-10 06:49:04 +00001160 res = s.split('%')
1161 for i in xrange(1, len(res)):
1162 item = res[i]
1163 try:
1164 res[i] = _hextochr[item[:2]] + item[2:]
1165 except KeyError:
1166 res[i] = '%' + item
Raymond Hettinger4b0f20d2005-10-15 16:41:53 +00001167 except UnicodeDecodeError:
1168 res[i] = unichr(int(item[:2], 16)) + item[2:]
Guido van Rossumb2493f82000-12-15 15:01:37 +00001169 return "".join(res)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001170
Guido van Rossum0564e121996-12-13 14:47:36 +00001171def unquote_plus(s):
Skip Montanaro79f1c172000-08-22 03:00:52 +00001172 """unquote('%7e/abc+def') -> '~/abc def'"""
Brett Cannonaaeffaf2004-03-23 23:50:17 +00001173 s = s.replace('+', ' ')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001174 return unquote(s)
Guido van Rossum0564e121996-12-13 14:47:36 +00001175
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001176always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
Jeremy Hylton6102e292000-08-31 15:48:10 +00001177 'abcdefghijklmnopqrstuvwxyz'
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001178 '0123456789' '_.-')
Raymond Hettinger199d2f72005-09-09 22:27:13 +00001179_safemaps = {}
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001180
Guido van Rossum7c395db1994-07-04 22:14:49 +00001181def quote(s, safe = '/'):
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001182 """quote('abc def') -> 'abc%20def'
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001183
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001184 Each part of a URL, e.g. the path info, the query, etc., has a
1185 different set of reserved characters that must be quoted.
1186
1187 RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
1188 the following reserved characters.
1189
1190 reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
1191 "$" | ","
1192
1193 Each of these characters is reserved in some component of a URL,
1194 but not necessarily in all of them.
1195
1196 By default, the quote function is intended for quoting the path
1197 section of a URL. Thus, it will not encode '/'. This character
1198 is reserved, but in typical usage the quote function is being
1199 called on a path where the existing slash characters are used as
1200 reserved characters.
1201 """
Raymond Hettinger199d2f72005-09-09 22:27:13 +00001202 cachekey = (safe, always_safe)
1203 try:
1204 safe_map = _safemaps[cachekey]
1205 except KeyError:
1206 safe += always_safe
1207 safe_map = {}
1208 for i in range(256):
1209 c = chr(i)
1210 safe_map[c] = (c in safe) and c or ('%%%02X' % i)
1211 _safemaps[cachekey] = safe_map
1212 res = map(safe_map.__getitem__, s)
Guido van Rossumb2493f82000-12-15 15:01:37 +00001213 return ''.join(res)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001214
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001215def quote_plus(s, safe = ''):
1216 """Quote the query fragment of a URL; replacing ' ' with '+'"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001217 if ' ' in s:
Raymond Hettingercf6b6322005-09-10 18:17:54 +00001218 s = quote(s, safe + ' ')
1219 return s.replace(' ', '+')
1220 return quote(s, safe)
Guido van Rossum0564e121996-12-13 14:47:36 +00001221
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001222def urlencode(query,doseq=0):
1223 """Encode a sequence of two-element tuples or dictionary into a URL query string.
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001224
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001225 If any values in the query arg are sequences and doseq is true, each
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001226 sequence element is converted to a separate parameter.
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001227
1228 If the query arg is a sequence of two-element tuples, the order of the
1229 parameters in the output will match the order of parameters in the
1230 input.
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001231 """
Tim Peters658cba62001-02-09 20:06:00 +00001232
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001233 if hasattr(query,"items"):
1234 # mapping objects
1235 query = query.items()
1236 else:
1237 # it's a bother at times that strings and string-like objects are
1238 # sequences...
1239 try:
1240 # non-sequence items should not work with len()
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001241 # non-empty strings will fail this
Walter Dörwald65230a22002-06-03 15:58:32 +00001242 if len(query) and not isinstance(query[0], tuple):
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001243 raise TypeError
1244 # zero-length sequences of all types will get here and succeed,
1245 # but that's a minor nit - since the original implementation
1246 # allowed empty dicts that type of behavior probably should be
1247 # preserved for consistency
1248 except TypeError:
1249 ty,va,tb = sys.exc_info()
1250 raise TypeError, "not a valid non-string sequence or mapping object", tb
1251
Guido van Rossume7b146f2000-02-04 15:28:42 +00001252 l = []
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001253 if not doseq:
1254 # preserve old behavior
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001255 for k, v in query:
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001256 k = quote_plus(str(k))
1257 v = quote_plus(str(v))
1258 l.append(k + '=' + v)
1259 else:
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001260 for k, v in query:
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001261 k = quote_plus(str(k))
Walter Dörwald65230a22002-06-03 15:58:32 +00001262 if isinstance(v, str):
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001263 v = quote_plus(v)
1264 l.append(k + '=' + v)
Guido van Rossum4b46c0a2002-05-24 17:58:05 +00001265 elif _is_unicode(v):
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001266 # is there a reasonable way to convert to ASCII?
1267 # encode generates a string, but "replace" or "ignore"
1268 # lose information and "strict" can raise UnicodeError
1269 v = quote_plus(v.encode("ASCII","replace"))
1270 l.append(k + '=' + v)
1271 else:
1272 try:
1273 # is this a sufficient test for sequence-ness?
1274 x = len(v)
1275 except TypeError:
1276 # not a sequence
1277 v = quote_plus(str(v))
1278 l.append(k + '=' + v)
1279 else:
1280 # loop over the sequence
1281 for elt in v:
1282 l.append(k + '=' + quote_plus(str(elt)))
Guido van Rossumb2493f82000-12-15 15:01:37 +00001283 return '&'.join(l)
Guido van Rossum810a3391998-07-22 21:33:23 +00001284
Guido van Rossum442e7201996-03-20 15:33:11 +00001285# Proxy handling
Mark Hammond4f570b92000-07-26 07:04:38 +00001286def getproxies_environment():
1287 """Return a dictionary of scheme -> proxy server URL mappings.
1288
1289 Scan the environment for variables named <scheme>_proxy;
1290 this seems to be the standard convention. If you need a
1291 different way, you can pass a proxies dictionary to the
1292 [Fancy]URLopener constructor.
1293
1294 """
1295 proxies = {}
1296 for name, value in os.environ.items():
Guido van Rossumb2493f82000-12-15 15:01:37 +00001297 name = name.lower()
Mark Hammond4f570b92000-07-26 07:04:38 +00001298 if value and name[-6:] == '_proxy':
1299 proxies[name[:-6]] = value
1300 return proxies
1301
Georg Brandl22350112008-01-20 12:05:43 +00001302def proxy_bypass_environment(host):
1303 """Test if proxies should not be used for a particular host.
1304
1305 Checks the environment for a variable named no_proxy, which should
1306 be a list of DNS suffixes separated by commas, or '*' for all hosts.
1307 """
1308 no_proxy = os.environ.get('no_proxy', '') or os.environ.get('NO_PROXY', '')
1309 # '*' is special case for always bypass
1310 if no_proxy == '*':
1311 return 1
1312 # strip port off host
1313 hostonly, port = splitport(host)
1314 # check if the host ends with any of the DNS suffixes
1315 for name in no_proxy.split(','):
1316 if name and (hostonly.endswith(name) or host.endswith(name)):
1317 return 1
1318 # otherwise, don't bypass
1319 return 0
1320
1321
Jack Jansen11d9b062004-07-16 11:45:00 +00001322if sys.platform == 'darwin':
Ronald Oussoren099646f2008-05-18 20:09:54 +00001323
1324 def _CFSetup(sc):
1325 from ctypes import c_int32, c_void_p, c_char_p, c_int
1326 sc.CFStringCreateWithCString.argtypes = [ c_void_p, c_char_p, c_int32 ]
1327 sc.CFStringCreateWithCString.restype = c_void_p
1328 sc.SCDynamicStoreCopyProxies.argtypes = [ c_void_p ]
1329 sc.SCDynamicStoreCopyProxies.restype = c_void_p
1330 sc.CFDictionaryGetValue.argtypes = [ c_void_p, c_void_p ]
1331 sc.CFDictionaryGetValue.restype = c_void_p
1332 sc.CFStringGetLength.argtypes = [ c_void_p ]
1333 sc.CFStringGetLength.restype = c_int32
1334 sc.CFStringGetCString.argtypes = [ c_void_p, c_char_p, c_int32, c_int32 ]
1335 sc.CFStringGetCString.restype = c_int32
1336 sc.CFNumberGetValue.argtypes = [ c_void_p, c_int, c_void_p ]
1337 sc.CFNumberGetValue.restype = c_int32
1338 sc.CFRelease.argtypes = [ c_void_p ]
1339 sc.CFRelease.restype = None
1340
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001341 def _CStringFromCFString(sc, value):
1342 from ctypes import create_string_buffer
1343 length = sc.CFStringGetLength(value) + 1
1344 buff = create_string_buffer(length)
1345 sc.CFStringGetCString(value, buff, length, 0)
1346 return buff.value
1347
1348 def _CFNumberToInt32(sc, cfnum):
1349 from ctypes import byref, c_int
1350 val = c_int()
1351 kCFNumberSInt32Type = 3
1352 sc.CFNumberGetValue(cfnum, kCFNumberSInt32Type, byref(val))
1353 return val.value
1354
1355
1356 def proxy_bypass_macosx_sysconf(host):
1357 """
1358 Return True iff this host shouldn't be accessed using a proxy
1359
1360 This function uses the MacOSX framework SystemConfiguration
1361 to fetch the proxy information.
1362 """
1363 from ctypes import cdll
1364 from ctypes.util import find_library
1365 import re
1366 import socket
1367 from fnmatch import fnmatch
1368
1369 def ip2num(ipAddr):
1370 parts = ipAddr.split('.')
1371 parts = map(int, parts)
1372 if len(parts) != 4:
1373 parts = (parts + [0, 0, 0, 0])[:4]
1374 return (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3]
1375
1376 sc = cdll.LoadLibrary(find_library("SystemConfiguration"))
Ronald Oussoren099646f2008-05-18 20:09:54 +00001377 _CFSetup(sc)
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001378
1379 hostIP = None
1380
1381 if not sc:
1382 return False
1383
1384 kSCPropNetProxiesExceptionsList = sc.CFStringCreateWithCString(0, "ExceptionsList", 0)
1385 kSCPropNetProxiesExcludeSimpleHostnames = sc.CFStringCreateWithCString(0,
1386 "ExcludeSimpleHostnames", 0)
1387
1388
1389 proxyDict = sc.SCDynamicStoreCopyProxies(None)
Ronald Oussoren099646f2008-05-18 20:09:54 +00001390 if proxyDict is None:
1391 return False
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001392
1393 try:
1394 # Check for simple host names:
1395 if '.' not in host:
1396 exclude_simple = sc.CFDictionaryGetValue(proxyDict,
1397 kSCPropNetProxiesExcludeSimpleHostnames)
1398 if exclude_simple and _CFNumberToInt32(sc, exclude_simple):
1399 return True
1400
1401
1402 # Check the exceptions list:
1403 exceptions = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesExceptionsList)
1404 if exceptions:
1405 # Items in the list are strings like these: *.local, 169.254/16
1406 for index in xrange(sc.CFArrayGetCount(exceptions)):
1407 value = sc.CFArrayGetValueAtIndex(exceptions, index)
1408 if not value: continue
1409 value = _CStringFromCFString(sc, value)
1410
1411 m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value)
1412 if m is not None:
1413 if hostIP is None:
1414 hostIP = socket.gethostbyname(host)
1415 hostIP = ip2num(hostIP)
1416
1417 base = ip2num(m.group(1))
1418 mask = int(m.group(2)[1:])
1419 mask = 32 - mask
1420
1421 if (hostIP >> mask) == (base >> mask):
1422 return True
1423
1424 elif fnmatch(host, value):
1425 return True
1426
1427 return False
1428
1429 finally:
1430 sc.CFRelease(kSCPropNetProxiesExceptionsList)
1431 sc.CFRelease(kSCPropNetProxiesExcludeSimpleHostnames)
1432
1433
1434
1435 def getproxies_macosx_sysconf():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001436 """Return a dictionary of scheme -> proxy server URL mappings.
Guido van Rossum442e7201996-03-20 15:33:11 +00001437
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001438 This function uses the MacOSX framework SystemConfiguration
1439 to fetch the proxy information.
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001440 """
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001441 from ctypes import cdll
1442 from ctypes.util import find_library
1443
1444 sc = cdll.LoadLibrary(find_library("SystemConfiguration"))
Ronald Oussoren099646f2008-05-18 20:09:54 +00001445 _CFSetup(sc)
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001446
1447 if not sc:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001448 return {}
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001449
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001450 kSCPropNetProxiesHTTPEnable = sc.CFStringCreateWithCString(0, "HTTPEnable", 0)
1451 kSCPropNetProxiesHTTPProxy = sc.CFStringCreateWithCString(0, "HTTPProxy", 0)
1452 kSCPropNetProxiesHTTPPort = sc.CFStringCreateWithCString(0, "HTTPPort", 0)
1453
1454 kSCPropNetProxiesHTTPSEnable = sc.CFStringCreateWithCString(0, "HTTPSEnable", 0)
1455 kSCPropNetProxiesHTTPSProxy = sc.CFStringCreateWithCString(0, "HTTPSProxy", 0)
1456 kSCPropNetProxiesHTTPSPort = sc.CFStringCreateWithCString(0, "HTTPSPort", 0)
1457
1458 kSCPropNetProxiesFTPEnable = sc.CFStringCreateWithCString(0, "FTPEnable", 0)
1459 kSCPropNetProxiesFTPPassive = sc.CFStringCreateWithCString(0, "FTPPassive", 0)
1460 kSCPropNetProxiesFTPPort = sc.CFStringCreateWithCString(0, "FTPPort", 0)
1461 kSCPropNetProxiesFTPProxy = sc.CFStringCreateWithCString(0, "FTPProxy", 0)
1462
1463 kSCPropNetProxiesGopherEnable = sc.CFStringCreateWithCString(0, "GopherEnable", 0)
1464 kSCPropNetProxiesGopherPort = sc.CFStringCreateWithCString(0, "GopherPort", 0)
1465 kSCPropNetProxiesGopherProxy = sc.CFStringCreateWithCString(0, "GopherProxy", 0)
1466
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001467 proxies = {}
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001468 proxyDict = sc.SCDynamicStoreCopyProxies(None)
1469
1470 try:
1471 # HTTP:
1472 enabled = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesHTTPEnable)
1473 if enabled and _CFNumberToInt32(sc, enabled):
1474 proxy = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesHTTPProxy)
1475 port = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesHTTPPort)
1476
1477 if proxy:
1478 proxy = _CStringFromCFString(sc, proxy)
1479 if port:
1480 port = _CFNumberToInt32(sc, port)
1481 proxies["http"] = "http://%s:%i" % (proxy, port)
1482 else:
1483 proxies["http"] = "http://%s" % (proxy, )
1484
1485 # HTTPS:
1486 enabled = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesHTTPSEnable)
1487 if enabled and _CFNumberToInt32(sc, enabled):
1488 proxy = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesHTTPSProxy)
1489 port = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesHTTPSPort)
1490
1491 if proxy:
1492 proxy = _CStringFromCFString(sc, proxy)
1493 if port:
1494 port = _CFNumberToInt32(sc, port)
1495 proxies["https"] = "http://%s:%i" % (proxy, port)
1496 else:
1497 proxies["https"] = "http://%s" % (proxy, )
1498
1499 # FTP:
1500 enabled = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesFTPEnable)
1501 if enabled and _CFNumberToInt32(sc, enabled):
1502 proxy = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesFTPProxy)
1503 port = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesFTPPort)
1504
1505 if proxy:
1506 proxy = _CStringFromCFString(sc, proxy)
1507 if port:
1508 port = _CFNumberToInt32(sc, port)
1509 proxies["ftp"] = "http://%s:%i" % (proxy, port)
1510 else:
1511 proxies["ftp"] = "http://%s" % (proxy, )
1512
1513 # Gopher:
1514 enabled = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesGopherEnable)
1515 if enabled and _CFNumberToInt32(sc, enabled):
1516 proxy = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesGopherProxy)
1517 port = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesGopherPort)
1518
1519 if proxy:
1520 proxy = _CStringFromCFString(sc, proxy)
1521 if port:
1522 port = _CFNumberToInt32(sc, port)
1523 proxies["gopher"] = "http://%s:%i" % (proxy, port)
1524 else:
1525 proxies["gopher"] = "http://%s" % (proxy, )
1526 finally:
1527 sc.CFRelease(proxyDict)
1528
1529 sc.CFRelease(kSCPropNetProxiesHTTPEnable)
1530 sc.CFRelease(kSCPropNetProxiesHTTPProxy)
1531 sc.CFRelease(kSCPropNetProxiesHTTPPort)
1532 sc.CFRelease(kSCPropNetProxiesFTPEnable)
1533 sc.CFRelease(kSCPropNetProxiesFTPPassive)
1534 sc.CFRelease(kSCPropNetProxiesFTPPort)
1535 sc.CFRelease(kSCPropNetProxiesFTPProxy)
1536 sc.CFRelease(kSCPropNetProxiesGopherEnable)
1537 sc.CFRelease(kSCPropNetProxiesGopherPort)
1538 sc.CFRelease(kSCPropNetProxiesGopherProxy)
1539
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001540 return proxies
Mark Hammond4f570b92000-07-26 07:04:38 +00001541
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001542
1543
Georg Brandl22350112008-01-20 12:05:43 +00001544 def proxy_bypass(host):
1545 if getproxies_environment():
1546 return proxy_bypass_environment(host)
1547 else:
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001548 return proxy_bypass_macosx_sysconf(host)
Tim Peters55c12d42001-08-09 18:04:14 +00001549
Jack Jansen11d9b062004-07-16 11:45:00 +00001550 def getproxies():
Ronald Oussoren9dd6b1d2008-05-12 11:31:05 +00001551 return getproxies_environment() or getproxies_macosx_sysconf()
Tim Peters182b5ac2004-07-18 06:16:08 +00001552
Mark Hammond4f570b92000-07-26 07:04:38 +00001553elif os.name == 'nt':
1554 def getproxies_registry():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001555 """Return a dictionary of scheme -> proxy server URL mappings.
Mark Hammond4f570b92000-07-26 07:04:38 +00001556
1557 Win32 uses the registry to store proxies.
1558
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001559 """
1560 proxies = {}
Mark Hammond4f570b92000-07-26 07:04:38 +00001561 try:
1562 import _winreg
1563 except ImportError:
1564 # Std module, so should be around - but you never know!
1565 return proxies
1566 try:
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001567 internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1568 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
Mark Hammond4f570b92000-07-26 07:04:38 +00001569 proxyEnable = _winreg.QueryValueEx(internetSettings,
1570 'ProxyEnable')[0]
1571 if proxyEnable:
1572 # Returned as Unicode but problems if not converted to ASCII
1573 proxyServer = str(_winreg.QueryValueEx(internetSettings,
1574 'ProxyServer')[0])
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001575 if '=' in proxyServer:
1576 # Per-protocol settings
Mark Hammond4f570b92000-07-26 07:04:38 +00001577 for p in proxyServer.split(';'):
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001578 protocol, address = p.split('=', 1)
Guido van Rossumb955d6c2002-03-31 23:38:48 +00001579 # See if address has a type:// prefix
Guido van Rossum64e5aa92002-04-02 14:38:16 +00001580 import re
1581 if not re.match('^([^/:]+)://', address):
Guido van Rossumb955d6c2002-03-31 23:38:48 +00001582 address = '%s://%s' % (protocol, address)
1583 proxies[protocol] = address
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001584 else:
1585 # Use one setting for all protocols
1586 if proxyServer[:5] == 'http:':
1587 proxies['http'] = proxyServer
1588 else:
1589 proxies['http'] = 'http://%s' % proxyServer
1590 proxies['ftp'] = 'ftp://%s' % proxyServer
Mark Hammond4f570b92000-07-26 07:04:38 +00001591 internetSettings.Close()
1592 except (WindowsError, ValueError, TypeError):
1593 # Either registry key not found etc, or the value in an
1594 # unexpected format.
1595 # proxies already set up to be empty so nothing to do
1596 pass
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001597 return proxies
Guido van Rossum442e7201996-03-20 15:33:11 +00001598
Mark Hammond4f570b92000-07-26 07:04:38 +00001599 def getproxies():
1600 """Return a dictionary of scheme -> proxy server URL mappings.
1601
1602 Returns settings gathered from the environment, if specified,
1603 or the registry.
1604
1605 """
1606 return getproxies_environment() or getproxies_registry()
Tim Peters55c12d42001-08-09 18:04:14 +00001607
Georg Brandl22350112008-01-20 12:05:43 +00001608 def proxy_bypass_registry(host):
Tim Peters55c12d42001-08-09 18:04:14 +00001609 try:
1610 import _winreg
1611 import re
Tim Peters55c12d42001-08-09 18:04:14 +00001612 except ImportError:
1613 # Std modules, so should be around - but you never know!
1614 return 0
1615 try:
1616 internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1617 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1618 proxyEnable = _winreg.QueryValueEx(internetSettings,
1619 'ProxyEnable')[0]
1620 proxyOverride = str(_winreg.QueryValueEx(internetSettings,
1621 'ProxyOverride')[0])
1622 # ^^^^ Returned as Unicode but problems if not converted to ASCII
1623 except WindowsError:
1624 return 0
1625 if not proxyEnable or not proxyOverride:
1626 return 0
1627 # try to make a host list from name and IP address.
Georg Brandl1f636702006-02-18 23:10:23 +00001628 rawHost, port = splitport(host)
1629 host = [rawHost]
Tim Peters55c12d42001-08-09 18:04:14 +00001630 try:
Georg Brandl1f636702006-02-18 23:10:23 +00001631 addr = socket.gethostbyname(rawHost)
1632 if addr != rawHost:
Tim Peters55c12d42001-08-09 18:04:14 +00001633 host.append(addr)
1634 except socket.error:
1635 pass
Georg Brandl1f636702006-02-18 23:10:23 +00001636 try:
1637 fqdn = socket.getfqdn(rawHost)
1638 if fqdn != rawHost:
1639 host.append(fqdn)
1640 except socket.error:
1641 pass
Tim Peters55c12d42001-08-09 18:04:14 +00001642 # make a check value list from the registry entry: replace the
1643 # '<local>' string by the localhost entry and the corresponding
1644 # canonical entry.
1645 proxyOverride = proxyOverride.split(';')
1646 i = 0
1647 while i < len(proxyOverride):
1648 if proxyOverride[i] == '<local>':
1649 proxyOverride[i:i+1] = ['localhost',
1650 '127.0.0.1',
1651 socket.gethostname(),
1652 socket.gethostbyname(
1653 socket.gethostname())]
1654 i += 1
1655 # print proxyOverride
1656 # now check if we match one of the registry values.
1657 for test in proxyOverride:
Tim Petersab9ba272001-08-09 21:40:30 +00001658 test = test.replace(".", r"\.") # mask dots
1659 test = test.replace("*", r".*") # change glob sequence
1660 test = test.replace("?", r".") # change glob char
Tim Peters55c12d42001-08-09 18:04:14 +00001661 for val in host:
1662 # print "%s <--> %s" %( test, val )
1663 if re.match(test, val, re.I):
1664 return 1
1665 return 0
1666
Georg Brandl22350112008-01-20 12:05:43 +00001667 def proxy_bypass(host):
1668 """Return a dictionary of scheme -> proxy server URL mappings.
1669
1670 Returns settings gathered from the environment, if specified,
1671 or the registry.
1672
1673 """
1674 if getproxies_environment():
1675 return proxy_bypass_environment(host)
1676 else:
1677 return proxy_bypass_registry(host)
1678
Mark Hammond4f570b92000-07-26 07:04:38 +00001679else:
1680 # By default use environment variables
1681 getproxies = getproxies_environment
Georg Brandl22350112008-01-20 12:05:43 +00001682 proxy_bypass = proxy_bypass_environment
Guido van Rossum442e7201996-03-20 15:33:11 +00001683
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001684# Test and time quote() and unquote()
1685def test1():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001686 s = ''
1687 for i in range(256): s = s + chr(i)
1688 s = s*4
1689 t0 = time.time()
1690 qs = quote(s)
1691 uqs = unquote(qs)
1692 t1 = time.time()
1693 if uqs != s:
1694 print 'Wrong!'
Walter Dörwald70a6b492004-02-12 17:35:32 +00001695 print repr(s)
1696 print repr(qs)
1697 print repr(uqs)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001698 print round(t1 - t0, 3), 'sec'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001699
1700
Guido van Rossum9ab96d41998-09-28 14:07:00 +00001701def reporthook(blocknum, blocksize, totalsize):
1702 # Report during remote transfers
Guido van Rossumb2493f82000-12-15 15:01:37 +00001703 print "Block number: %d, Block size: %d, Total size: %d" % (
1704 blocknum, blocksize, totalsize)
Guido van Rossum9ab96d41998-09-28 14:07:00 +00001705
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001706# Test program
Guido van Rossum23490151998-06-25 02:39:00 +00001707def test(args=[]):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001708 if not args:
1709 args = [
1710 '/etc/passwd',
1711 'file:/etc/passwd',
1712 'file://localhost/etc/passwd',
Collin Winter071d1ae2007-03-12 01:55:54 +00001713 'ftp://ftp.gnu.org/pub/README',
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001714 'http://www.python.org/index.html',
1715 ]
Guido van Rossum09c8b6c1999-12-07 21:37:17 +00001716 if hasattr(URLopener, "open_https"):
1717 args.append('https://synergy.as.cmu.edu/~geek/')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001718 try:
1719 for url in args:
1720 print '-'*10, url, '-'*10
1721 fn, h = urlretrieve(url, None, reporthook)
Guido van Rossumb2493f82000-12-15 15:01:37 +00001722 print fn
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001723 if h:
1724 print '======'
1725 for k in h.keys(): print k + ':', h[k]
1726 print '======'
1727 fp = open(fn, 'rb')
1728 data = fp.read()
1729 del fp
1730 if '\r' in data:
1731 table = string.maketrans("", "")
Guido van Rossumb2493f82000-12-15 15:01:37 +00001732 data = data.translate(table, "\r")
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001733 print data
1734 fn, h = None, None
1735 print '-'*40
1736 finally:
1737 urlcleanup()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001738
Guido van Rossum23490151998-06-25 02:39:00 +00001739def main():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001740 import getopt, sys
1741 try:
1742 opts, args = getopt.getopt(sys.argv[1:], "th")
1743 except getopt.error, msg:
1744 print msg
1745 print "Use -h for help"
1746 return
1747 t = 0
1748 for o, a in opts:
1749 if o == '-t':
1750 t = t + 1
1751 if o == '-h':
1752 print "Usage: python urllib.py [-t] [url ...]"
1753 print "-t runs self-test;",
1754 print "otherwise, contents of urls are printed"
1755 return
1756 if t:
1757 if t > 1:
1758 test1()
1759 test(args)
1760 else:
1761 if not args:
1762 print "Use -h for help"
1763 for url in args:
1764 print urlopen(url).read(),
Guido van Rossum23490151998-06-25 02:39:00 +00001765
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001766# Run test program when run as a script
1767if __name__ == '__main__':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001768 main()