blob: 4f1ebdd21ed81208ea48c25f2ec8a7b68c0196e7 [file] [log] [blame]
Guido van Rossume7b146f2000-02-04 15:28:42 +00001"""Open an arbitrary URL.
2
3See the following document for more info on URLs:
4"Names and Addresses, URIs, URLs, URNs, URCs", at
5http://www.w3.org/pub/WWW/Addressing/Overview.html
6
7See also the HTTP spec (from which the error codes are derived):
8"HTTP - Hypertext Transfer Protocol", at
9http://www.w3.org/pub/WWW/Protocols/
10
11Related standards and specs:
12- RFC1808: the "relative URL" spec. (authoritative status)
13- RFC1738 - the "URL standard". (authoritative status)
14- RFC1630 - the "URI spec". (informational status)
15
16The object returned by URLopener().open(file) will differ per
17protocol. All you know is that is has methods read(), readline(),
18readlines(), fileno(), close() and info(). The read*(), fileno()
Fredrik Lundhb49f88b2000-09-24 18:51:25 +000019and close() methods work like those of open files.
Guido van Rossume7b146f2000-02-04 15:28:42 +000020The info() method returns a mimetools.Message object which can be
21used to query various info about the object, if available.
22(mimetools.Message objects are queried with the getheader() method.)
23"""
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000024
Guido van Rossum7c395db1994-07-04 22:14:49 +000025import string
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000026import socket
Jack Jansendc3e3f61995-12-15 13:22:13 +000027import os
Guido van Rossumf0713d32001-08-09 17:43:35 +000028import time
Guido van Rossum3c8484e1996-11-20 22:02:24 +000029import sys
Brett Cannon69200fa2004-03-23 21:26:39 +000030from urlparse import urljoin as basejoin
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000031
Skip Montanaro40fc1602001-03-01 04:27:19 +000032__all__ = ["urlopen", "URLopener", "FancyURLopener", "urlretrieve",
33 "urlcleanup", "quote", "quote_plus", "unquote", "unquote_plus",
Skip Montanaro44d5e0c2001-03-13 19:47:16 +000034 "urlencode", "url2pathname", "pathname2url", "splittag",
35 "localhost", "thishost", "ftperrors", "basejoin", "unwrap",
36 "splittype", "splithost", "splituser", "splitpasswd", "splitport",
37 "splitnport", "splitquery", "splitattr", "splitvalue",
38 "splitgophertype", "getproxies"]
Skip Montanaro40fc1602001-03-01 04:27:19 +000039
Brett Cannon69200fa2004-03-23 21:26:39 +000040__version__ = '1.16' # XXX This version is not always updated :-(
Guido van Rossumf668d171997-06-06 21:11:11 +000041
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000042MAXFTPCACHE = 10 # Trim the ftp cache beyond this size
Guido van Rossum6cb15a01995-06-22 19:00:13 +000043
Jack Jansendc3e3f61995-12-15 13:22:13 +000044# Helper for non-unix systems
45if os.name == 'mac':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000046 from macurl2path import url2pathname, pathname2url
Guido van Rossum7e7ca0b1998-03-26 21:01:39 +000047elif os.name == 'nt':
Fredrik Lundhb49f88b2000-09-24 18:51:25 +000048 from nturl2path import url2pathname, pathname2url
Guido van Rossumd74fb6b2001-03-02 06:43:49 +000049elif os.name == 'riscos':
50 from rourl2path import url2pathname, pathname2url
Jack Jansendc3e3f61995-12-15 13:22:13 +000051else:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000052 def url2pathname(pathname):
Guido van Rossum367ac801999-03-12 14:31:10 +000053 return unquote(pathname)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000054 def pathname2url(pathname):
Guido van Rossum367ac801999-03-12 14:31:10 +000055 return quote(pathname)
Guido van Rossum33add0a1998-12-18 15:25:22 +000056
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000057# This really consists of two pieces:
58# (1) a class which handles opening of all sorts of URLs
59# (plus assorted utilities etc.)
60# (2) a set of functions for parsing URLs
61# XXX Should these be separated out into different modules?
62
63
64# Shortcut for basic usage
65_urlopener = None
Fred Drakedf6eca72002-04-04 20:41:34 +000066def urlopen(url, data=None, proxies=None):
Skip Montanaro79f1c172000-08-22 03:00:52 +000067 """urlopen(url [, data]) -> open file-like object"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000068 global _urlopener
Fred Drakedf6eca72002-04-04 20:41:34 +000069 if proxies is not None:
70 opener = FancyURLopener(proxies=proxies)
71 elif not _urlopener:
72 opener = FancyURLopener()
73 _urlopener = opener
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000074 else:
Fred Drakedf6eca72002-04-04 20:41:34 +000075 opener = _urlopener
76 if data is None:
77 return opener.open(url)
78 else:
79 return opener.open(url, data)
Fred Drake316a7932000-08-24 01:01:26 +000080def urlretrieve(url, filename=None, reporthook=None, data=None):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000081 global _urlopener
82 if not _urlopener:
83 _urlopener = FancyURLopener()
Fred Drake316a7932000-08-24 01:01:26 +000084 return _urlopener.retrieve(url, filename, reporthook, data)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000085def urlcleanup():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000086 if _urlopener:
87 _urlopener.cleanup()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000088
Georg Brandlb9256022005-08-24 18:46:39 +000089# exception raised when downloaded size does not match content-length
90class ContentTooShortError(IOError):
91 def __init__(self, message, content):
92 IOError.__init__(self, message)
93 self.content = content
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000094
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000095ftpcache = {}
96class URLopener:
Guido van Rossume7b146f2000-02-04 15:28:42 +000097 """Class to open URLs.
98 This is a class rather than just a subroutine because we may need
99 more than one set of global protocol-specific options.
100 Note -- this is a base class for those who don't want the
101 automatic handling of errors type 302 (relocated) and 401
102 (authorization needed)."""
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000103
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000104 __tempfiles = None
Guido van Rossum29e77811996-11-27 19:39:58 +0000105
Guido van Rossumba311382000-08-24 16:18:04 +0000106 version = "Python-urllib/%s" % __version__
107
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000108 # Constructor
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000109 def __init__(self, proxies=None, **x509):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000110 if proxies is None:
111 proxies = getproxies()
112 assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
113 self.proxies = proxies
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000114 self.key_file = x509.get('key_file')
115 self.cert_file = x509.get('cert_file')
Guido van Rossumba311382000-08-24 16:18:04 +0000116 self.addheaders = [('User-agent', self.version)]
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000117 self.__tempfiles = []
118 self.__unlink = os.unlink # See cleanup()
119 self.tempcache = None
120 # Undocumented feature: if you assign {} to tempcache,
121 # it is used to cache files retrieved with
122 # self.retrieve(). This is not enabled by default
123 # since it does not work for changing documents (and I
124 # haven't got the logic to check expiration headers
125 # yet).
126 self.ftpcache = ftpcache
127 # Undocumented feature: you can use a different
128 # ftp cache by assigning to the .ftpcache member;
129 # in case you want logically independent URL openers
130 # XXX This is not threadsafe. Bah.
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000131
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000132 def __del__(self):
133 self.close()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000134
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000135 def close(self):
136 self.cleanup()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000137
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000138 def cleanup(self):
139 # This code sometimes runs when the rest of this module
140 # has already been deleted, so it can't use any globals
141 # or import anything.
142 if self.__tempfiles:
143 for file in self.__tempfiles:
144 try:
145 self.__unlink(file)
Martin v. Löwis58682b72001-08-11 15:02:57 +0000146 except OSError:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000147 pass
148 del self.__tempfiles[:]
149 if self.tempcache:
150 self.tempcache.clear()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000151
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000152 def addheader(self, *args):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000153 """Add a header to be used by the HTTP interface only
154 e.g. u.addheader('Accept', 'sound/basic')"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000155 self.addheaders.append(args)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000156
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000157 # External interface
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000158 def open(self, fullurl, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000159 """Use URLopener().open(file) instead of open(file, 'r')."""
Martin v. Löwis1d994332000-12-03 18:30:10 +0000160 fullurl = unwrap(toBytes(fullurl))
Raymond Hettinger54f02222002-06-01 14:18:47 +0000161 if self.tempcache and fullurl in self.tempcache:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000162 filename, headers = self.tempcache[fullurl]
163 fp = open(filename, 'rb')
164 return addinfourl(fp, headers, fullurl)
Martin v. Löwis1d994332000-12-03 18:30:10 +0000165 urltype, url = splittype(fullurl)
166 if not urltype:
167 urltype = 'file'
Raymond Hettinger54f02222002-06-01 14:18:47 +0000168 if urltype in self.proxies:
Martin v. Löwis1d994332000-12-03 18:30:10 +0000169 proxy = self.proxies[urltype]
170 urltype, proxyhost = splittype(proxy)
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000171 host, selector = splithost(proxyhost)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000172 url = (host, fullurl) # Signal special case to open_*()
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000173 else:
174 proxy = None
Martin v. Löwis1d994332000-12-03 18:30:10 +0000175 name = 'open_' + urltype
176 self.type = urltype
Brett Cannonaaeffaf2004-03-23 23:50:17 +0000177 name = name.replace('-', '_')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000178 if not hasattr(self, name):
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000179 if proxy:
180 return self.open_unknown_proxy(proxy, fullurl, data)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000181 else:
182 return self.open_unknown(fullurl, data)
183 try:
184 if data is None:
185 return getattr(self, name)(url)
186 else:
187 return getattr(self, name)(url, data)
188 except socket.error, msg:
189 raise IOError, ('socket error', msg), sys.exc_info()[2]
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000190
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000191 def open_unknown(self, fullurl, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000192 """Overridable interface to open unknown URL type."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000193 type, url = splittype(fullurl)
194 raise IOError, ('url error', 'unknown url type', type)
Guido van Rossumca445401995-08-29 19:19:12 +0000195
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000196 def open_unknown_proxy(self, proxy, fullurl, data=None):
197 """Overridable interface to open unknown URL type."""
198 type, url = splittype(fullurl)
199 raise IOError, ('url error', 'invalid proxy for %s' % type, proxy)
200
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000201 # External interface
Sjoerd Mullenderd7b86f02000-08-25 11:23:36 +0000202 def retrieve(self, url, filename=None, reporthook=None, data=None):
Brett Cannon7d618c72003-04-24 02:43:20 +0000203 """retrieve(url) returns (filename, headers) for a local object
Guido van Rossume7b146f2000-02-04 15:28:42 +0000204 or (tempfilename, headers) for a remote object."""
Martin v. Löwis1d994332000-12-03 18:30:10 +0000205 url = unwrap(toBytes(url))
Raymond Hettinger54f02222002-06-01 14:18:47 +0000206 if self.tempcache and url in self.tempcache:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000207 return self.tempcache[url]
208 type, url1 = splittype(url)
Raymond Hettinger10ff7062002-06-02 03:04:52 +0000209 if filename is None and (not type or type == 'file'):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000210 try:
211 fp = self.open_local_file(url1)
212 hdrs = fp.info()
213 del fp
214 return url2pathname(splithost(url1)[1]), hdrs
215 except IOError, msg:
216 pass
Fred Drake316a7932000-08-24 01:01:26 +0000217 fp = self.open(url, data)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000218 headers = fp.info()
Guido van Rossum3b0a3292002-08-09 16:38:32 +0000219 if filename:
220 tfp = open(filename, 'wb')
221 else:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000222 import tempfile
223 garbage, path = splittype(url)
224 garbage, path = splithost(path or "")
225 path, garbage = splitquery(path or "")
226 path, garbage = splitattr(path or "")
227 suffix = os.path.splitext(path)[1]
Guido van Rossum3b0a3292002-08-09 16:38:32 +0000228 (fd, filename) = tempfile.mkstemp(suffix)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000229 self.__tempfiles.append(filename)
Jeremy Hylton3bd6fde2002-10-11 14:36:24 +0000230 tfp = os.fdopen(fd, 'wb')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000231 result = filename, headers
232 if self.tempcache is not None:
233 self.tempcache[url] = result
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000234 bs = 1024*8
235 size = -1
Georg Brandlb9256022005-08-24 18:46:39 +0000236 read = 0
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000237 blocknum = 1
238 if reporthook:
Raymond Hettinger54f02222002-06-01 14:18:47 +0000239 if "content-length" in headers:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000240 size = int(headers["Content-Length"])
241 reporthook(0, bs, size)
242 block = fp.read(bs)
Georg Brandlb9256022005-08-24 18:46:39 +0000243 read += len(block)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000244 if reporthook:
245 reporthook(1, bs, size)
246 while block:
247 tfp.write(block)
248 block = fp.read(bs)
Georg Brandlb9256022005-08-24 18:46:39 +0000249 read += len(block)
250 blocknum += 1
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000251 if reporthook:
252 reporthook(blocknum, bs, size)
253 fp.close()
254 tfp.close()
255 del fp
256 del tfp
Georg Brandlb9256022005-08-24 18:46:39 +0000257
258 # raise exception if actual size does not match content-length header
259 if size >= 0 and read < size:
260 raise ContentTooShortError("retrieval incomplete: got only %i out "
261 "of %i bytes" % (read, size), result)
262
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000263 return result
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000264
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000265 # Each method named open_<type> knows how to open that type of URL
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000266
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000267 def open_http(self, url, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000268 """Use HTTP protocol."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000269 import httplib
270 user_passwd = None
Walter Dörwald65230a22002-06-03 15:58:32 +0000271 if isinstance(url, str):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000272 host, selector = splithost(url)
273 if host:
274 user_passwd, host = splituser(host)
275 host = unquote(host)
276 realhost = host
277 else:
278 host, selector = url
279 urltype, rest = splittype(selector)
280 url = rest
281 user_passwd = None
Guido van Rossumb2493f82000-12-15 15:01:37 +0000282 if urltype.lower() != 'http':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000283 realhost = None
284 else:
285 realhost, rest = splithost(rest)
286 if realhost:
287 user_passwd, realhost = splituser(realhost)
288 if user_passwd:
289 selector = "%s://%s%s" % (urltype, realhost, rest)
Tim Peters55c12d42001-08-09 18:04:14 +0000290 if proxy_bypass(realhost):
291 host = realhost
292
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000293 #print "proxy via http:", host, selector
294 if not host: raise IOError, ('http error', 'no host given')
295 if user_passwd:
296 import base64
Guido van Rossumb2493f82000-12-15 15:01:37 +0000297 auth = base64.encodestring(user_passwd).strip()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000298 else:
299 auth = None
300 h = httplib.HTTP(host)
301 if data is not None:
302 h.putrequest('POST', selector)
303 h.putheader('Content-type', 'application/x-www-form-urlencoded')
304 h.putheader('Content-length', '%d' % len(data))
305 else:
306 h.putrequest('GET', selector)
307 if auth: h.putheader('Authorization', 'Basic %s' % auth)
308 if realhost: h.putheader('Host', realhost)
Guido van Rossum68468eb2003-02-27 20:14:51 +0000309 for args in self.addheaders: h.putheader(*args)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000310 h.endheaders()
311 if data is not None:
Fred Drakeec3dfde2001-07-04 05:18:29 +0000312 h.send(data)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000313 errcode, errmsg, headers = h.getreply()
314 fp = h.getfile()
315 if errcode == 200:
316 return addinfourl(fp, headers, "http:" + url)
317 else:
318 if data is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000319 return self.http_error(url, fp, errcode, errmsg, headers)
Guido van Rossum29aab751999-03-09 19:31:21 +0000320 else:
321 return self.http_error(url, fp, errcode, errmsg, headers, data)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000322
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000323 def http_error(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000324 """Handle http errors.
325 Derived class can override this, or provide specific handlers
326 named http_error_DDD where DDD is the 3-digit error code."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000327 # First check if there's a specific handler for this error
328 name = 'http_error_%d' % errcode
329 if hasattr(self, name):
330 method = getattr(self, name)
331 if data is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000332 result = method(url, fp, errcode, errmsg, headers)
Jeremy Hyltonb30f52a1999-02-25 16:14:58 +0000333 else:
334 result = method(url, fp, errcode, errmsg, headers, data)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000335 if result: return result
Jeremy Hyltonb30f52a1999-02-25 16:14:58 +0000336 return self.http_error_default(url, fp, errcode, errmsg, headers)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000337
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000338 def http_error_default(self, url, fp, errcode, errmsg, headers):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000339 """Default error handler: close the connection and raise IOError."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000340 void = fp.read()
341 fp.close()
342 raise IOError, ('http error', errcode, errmsg, headers)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000343
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000344 if hasattr(socket, "ssl"):
Andrew M. Kuchling141e9892000-04-23 02:53:11 +0000345 def open_https(self, url, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000346 """Use HTTPS protocol."""
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000347 import httplib
Fred Drake567ca8e2000-08-21 21:42:42 +0000348 user_passwd = None
Walter Dörwald65230a22002-06-03 15:58:32 +0000349 if isinstance(url, str):
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000350 host, selector = splithost(url)
Fred Drake567ca8e2000-08-21 21:42:42 +0000351 if host:
352 user_passwd, host = splituser(host)
353 host = unquote(host)
354 realhost = host
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000355 else:
356 host, selector = url
357 urltype, rest = splittype(selector)
Fred Drake567ca8e2000-08-21 21:42:42 +0000358 url = rest
359 user_passwd = None
Guido van Rossumb2493f82000-12-15 15:01:37 +0000360 if urltype.lower() != 'https':
Fred Drake567ca8e2000-08-21 21:42:42 +0000361 realhost = None
362 else:
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000363 realhost, rest = splithost(rest)
Fred Drake567ca8e2000-08-21 21:42:42 +0000364 if realhost:
365 user_passwd, realhost = splituser(realhost)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000366 if user_passwd:
367 selector = "%s://%s%s" % (urltype, realhost, rest)
Andrew M. Kuchling7ad47922000-06-10 01:41:48 +0000368 #print "proxy via https:", host, selector
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000369 if not host: raise IOError, ('https error', 'no host given')
370 if user_passwd:
371 import base64
Guido van Rossumb2493f82000-12-15 15:01:37 +0000372 auth = base64.encodestring(user_passwd).strip()
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000373 else:
374 auth = None
375 h = httplib.HTTPS(host, 0,
376 key_file=self.key_file,
377 cert_file=self.cert_file)
Andrew M. Kuchling141e9892000-04-23 02:53:11 +0000378 if data is not None:
379 h.putrequest('POST', selector)
380 h.putheader('Content-type',
381 'application/x-www-form-urlencoded')
382 h.putheader('Content-length', '%d' % len(data))
383 else:
384 h.putrequest('GET', selector)
Andrew M. Kuchlingff638ea2003-08-29 18:12:23 +0000385 if auth: h.putheader('Authorization', 'Basic %s' % auth)
Fred Drake567ca8e2000-08-21 21:42:42 +0000386 if realhost: h.putheader('Host', realhost)
Guido van Rossum68468eb2003-02-27 20:14:51 +0000387 for args in self.addheaders: h.putheader(*args)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000388 h.endheaders()
Andrew M. Kuchling43c5af02000-04-24 14:17:06 +0000389 if data is not None:
Fred Drakeec3dfde2001-07-04 05:18:29 +0000390 h.send(data)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000391 errcode, errmsg, headers = h.getreply()
392 fp = h.getfile()
393 if errcode == 200:
Guido van Rossumb931bf32001-12-08 17:09:07 +0000394 return addinfourl(fp, headers, "https:" + url)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000395 else:
Fred Drake567ca8e2000-08-21 21:42:42 +0000396 if data is None:
397 return self.http_error(url, fp, errcode, errmsg, headers)
398 else:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000399 return self.http_error(url, fp, errcode, errmsg, headers,
400 data)
Fred Drake567ca8e2000-08-21 21:42:42 +0000401
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000402 def open_gopher(self, url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000403 """Use Gopher protocol."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000404 import gopherlib
405 host, selector = splithost(url)
406 if not host: raise IOError, ('gopher error', 'no host given')
407 host = unquote(host)
408 type, selector = splitgophertype(selector)
409 selector, query = splitquery(selector)
410 selector = unquote(selector)
411 if query:
412 query = unquote(query)
413 fp = gopherlib.send_query(selector, query, host)
414 else:
415 fp = gopherlib.send_selector(selector, host)
416 return addinfourl(fp, noheaders(), "gopher:" + url)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000417
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000418 def open_file(self, url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000419 """Use local file or FTP depending on form of URL."""
Jack Jansen4ef11032002-09-12 20:14:04 +0000420 if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000421 return self.open_ftp(url)
422 else:
423 return self.open_local_file(url)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000424
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000425 def open_local_file(self, url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000426 """Use local file."""
Raymond Hettingera6172712004-12-31 19:15:26 +0000427 import mimetypes, mimetools, email.Utils
428 try:
429 from cStringIO import StringIO
430 except ImportError:
431 from StringIO import StringIO
Guido van Rossumf0713d32001-08-09 17:43:35 +0000432 host, file = splithost(url)
433 localname = url2pathname(file)
Guido van Rossuma2da3052002-04-15 00:25:01 +0000434 try:
435 stats = os.stat(localname)
436 except OSError, e:
437 raise IOError(e.errno, e.strerror, e.filename)
Walter Dörwald92b48b72002-03-22 17:30:38 +0000438 size = stats.st_size
Anthony Baxter3dd9e462004-10-11 13:53:08 +0000439 modified = email.Utils.formatdate(stats.st_mtime, usegmt=True)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000440 mtype = mimetypes.guess_type(url)[0]
Raymond Hettingera6172712004-12-31 19:15:26 +0000441 headers = mimetools.Message(StringIO(
Guido van Rossumf0713d32001-08-09 17:43:35 +0000442 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
443 (mtype or 'text/plain', size, modified)))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000444 if not host:
Guido van Rossum336a2011999-06-24 15:27:36 +0000445 urlfile = file
446 if file[:1] == '/':
447 urlfile = 'file://' + file
Guido van Rossumf0713d32001-08-09 17:43:35 +0000448 return addinfourl(open(localname, 'rb'),
Guido van Rossum336a2011999-06-24 15:27:36 +0000449 headers, urlfile)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000450 host, port = splitport(host)
451 if not port \
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000452 and socket.gethostbyname(host) in (localhost(), thishost()):
Guido van Rossum336a2011999-06-24 15:27:36 +0000453 urlfile = file
454 if file[:1] == '/':
455 urlfile = 'file://' + file
Guido van Rossumf0713d32001-08-09 17:43:35 +0000456 return addinfourl(open(localname, 'rb'),
Guido van Rossum336a2011999-06-24 15:27:36 +0000457 headers, urlfile)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000458 raise IOError, ('local file error', 'not on local host')
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000459
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000460 def open_ftp(self, url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000461 """Use FTP protocol."""
Raymond Hettingera6172712004-12-31 19:15:26 +0000462 import mimetypes, mimetools
463 try:
464 from cStringIO import StringIO
465 except ImportError:
466 from StringIO import StringIO
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000467 host, path = splithost(url)
468 if not host: raise IOError, ('ftp error', 'no host given')
469 host, port = splitport(host)
470 user, host = splituser(host)
471 if user: user, passwd = splitpasswd(user)
472 else: passwd = None
473 host = unquote(host)
474 user = unquote(user or '')
475 passwd = unquote(passwd or '')
476 host = socket.gethostbyname(host)
477 if not port:
478 import ftplib
479 port = ftplib.FTP_PORT
480 else:
481 port = int(port)
482 path, attrs = splitattr(path)
483 path = unquote(path)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000484 dirs = path.split('/')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000485 dirs, file = dirs[:-1], dirs[-1]
486 if dirs and not dirs[0]: dirs = dirs[1:]
Guido van Rossum5e006a31999-08-18 17:40:33 +0000487 if dirs and not dirs[0]: dirs[0] = '/'
Guido van Rossumb2493f82000-12-15 15:01:37 +0000488 key = user, host, port, '/'.join(dirs)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000489 # XXX thread unsafe!
490 if len(self.ftpcache) > MAXFTPCACHE:
491 # Prune the cache, rather arbitrarily
492 for k in self.ftpcache.keys():
493 if k != key:
494 v = self.ftpcache[k]
495 del self.ftpcache[k]
496 v.close()
497 try:
Raymond Hettinger54f02222002-06-01 14:18:47 +0000498 if not key in self.ftpcache:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000499 self.ftpcache[key] = \
500 ftpwrapper(user, passwd, host, port, dirs)
501 if not file: type = 'D'
502 else: type = 'I'
503 for attr in attrs:
504 attr, value = splitvalue(attr)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000505 if attr.lower() == 'type' and \
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000506 value in ('a', 'A', 'i', 'I', 'd', 'D'):
Guido van Rossumb2493f82000-12-15 15:01:37 +0000507 type = value.upper()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000508 (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
Guido van Rossum88e0b5b2001-08-23 13:38:15 +0000509 mtype = mimetypes.guess_type("ftp:" + url)[0]
510 headers = ""
511 if mtype:
512 headers += "Content-Type: %s\n" % mtype
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000513 if retrlen is not None and retrlen >= 0:
Guido van Rossum88e0b5b2001-08-23 13:38:15 +0000514 headers += "Content-Length: %d\n" % retrlen
Raymond Hettingera6172712004-12-31 19:15:26 +0000515 headers = mimetools.Message(StringIO(headers))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000516 return addinfourl(fp, headers, "ftp:" + url)
517 except ftperrors(), msg:
518 raise IOError, ('ftp error', msg), sys.exc_info()[2]
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000519
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000520 def open_data(self, url, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000521 """Use "data" URL."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000522 # ignore POSTed data
523 #
524 # syntax of data URLs:
525 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
526 # mediatype := [ type "/" subtype ] *( ";" parameter )
527 # data := *urlchar
528 # parameter := attribute "=" value
Raymond Hettingera6172712004-12-31 19:15:26 +0000529 import mimetools
530 try:
531 from cStringIO import StringIO
532 except ImportError:
533 from StringIO import StringIO
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000534 try:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000535 [type, data] = url.split(',', 1)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000536 except ValueError:
537 raise IOError, ('data error', 'bad data URL')
538 if not type:
539 type = 'text/plain;charset=US-ASCII'
Guido van Rossumb2493f82000-12-15 15:01:37 +0000540 semi = type.rfind(';')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000541 if semi >= 0 and '=' not in type[semi:]:
542 encoding = type[semi+1:]
543 type = type[:semi]
544 else:
545 encoding = ''
546 msg = []
547 msg.append('Date: %s'%time.strftime('%a, %d %b %Y %T GMT',
548 time.gmtime(time.time())))
549 msg.append('Content-type: %s' % type)
550 if encoding == 'base64':
551 import base64
552 data = base64.decodestring(data)
553 else:
554 data = unquote(data)
555 msg.append('Content-length: %d' % len(data))
556 msg.append('')
557 msg.append(data)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000558 msg = '\n'.join(msg)
Raymond Hettingera6172712004-12-31 19:15:26 +0000559 f = StringIO(msg)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000560 headers = mimetools.Message(f, 0)
561 f.fileno = None # needed for addinfourl
562 return addinfourl(f, headers, url)
Guido van Rossum6d4d1c21998-03-12 14:32:55 +0000563
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000564
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000565class FancyURLopener(URLopener):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000566 """Derived class with handlers for errors we can handle (perhaps)."""
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000567
Neal Norwitz60e04cd2002-06-11 13:38:51 +0000568 def __init__(self, *args, **kwargs):
Guido van Rossum68468eb2003-02-27 20:14:51 +0000569 URLopener.__init__(self, *args, **kwargs)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000570 self.auth_cache = {}
Skip Montanaroc3e11d62001-02-15 16:56:36 +0000571 self.tries = 0
572 self.maxtries = 10
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000573
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000574 def http_error_default(self, url, fp, errcode, errmsg, headers):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000575 """Default error handling -- don't raise an exception."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000576 return addinfourl(fp, headers, "http:" + url)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000577
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000578 def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000579 """Error 302 -- relocated (temporarily)."""
Skip Montanaroc3e11d62001-02-15 16:56:36 +0000580 self.tries += 1
581 if self.maxtries and self.tries >= self.maxtries:
582 if hasattr(self, "http_error_500"):
583 meth = self.http_error_500
584 else:
585 meth = self.http_error_default
586 self.tries = 0
587 return meth(url, fp, 500,
588 "Internal Server Error: Redirect Recursion", headers)
589 result = self.redirect_internal(url, fp, errcode, errmsg, headers,
590 data)
591 self.tries = 0
592 return result
593
594 def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
Raymond Hettinger54f02222002-06-01 14:18:47 +0000595 if 'location' in headers:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000596 newurl = headers['location']
Raymond Hettinger54f02222002-06-01 14:18:47 +0000597 elif 'uri' in headers:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000598 newurl = headers['uri']
599 else:
600 return
601 void = fp.read()
602 fp.close()
Guido van Rossum3527f591999-03-29 20:23:41 +0000603 # In case the server sent a relative URL, join with original:
Moshe Zadka5d87d472001-04-09 14:54:21 +0000604 newurl = basejoin(self.type + ":" + url, newurl)
Guido van Rossumfa19f7c2003-05-16 01:46:51 +0000605 return self.open(newurl)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000606
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000607 def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000608 """Error 301 -- also relocated (permanently)."""
609 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
Guido van Rossume6ad8911996-09-10 17:02:56 +0000610
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000611 def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
612 """Error 303 -- also relocated (essentially identical to 302)."""
613 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
614
Guido van Rossumfa19f7c2003-05-16 01:46:51 +0000615 def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
616 """Error 307 -- relocated, but turn POST into error."""
617 if data is None:
618 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
619 else:
620 return self.http_error_default(url, fp, errcode, errmsg, headers)
621
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000622 def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000623 """Error 401 -- authentication required.
624 See this URL for a description of the basic authentication scheme:
625 http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt"""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000626 if not 'www-authenticate' in headers:
Tim Peters85ba6732001-02-28 08:26:44 +0000627 URLopener.http_error_default(self, url, fp,
Fred Drakec680ae82001-10-13 18:37:07 +0000628 errcode, errmsg, headers)
Moshe Zadkae99bd172001-02-27 06:27:04 +0000629 stuff = headers['www-authenticate']
630 import re
631 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
632 if not match:
Tim Peters85ba6732001-02-28 08:26:44 +0000633 URLopener.http_error_default(self, url, fp,
Moshe Zadkae99bd172001-02-27 06:27:04 +0000634 errcode, errmsg, headers)
635 scheme, realm = match.groups()
636 if scheme.lower() != 'basic':
Tim Peters85ba6732001-02-28 08:26:44 +0000637 URLopener.http_error_default(self, url, fp,
Moshe Zadkae99bd172001-02-27 06:27:04 +0000638 errcode, errmsg, headers)
639 name = 'retry_' + self.type + '_basic_auth'
640 if data is None:
641 return getattr(self,name)(url, realm)
642 else:
643 return getattr(self,name)(url, realm, data)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000644
Guido van Rossum3c8baed2000-02-01 23:36:55 +0000645 def retry_http_basic_auth(self, url, realm, data=None):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000646 host, selector = splithost(url)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000647 i = host.find('@') + 1
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000648 host = host[i:]
649 user, passwd = self.get_user_passwd(host, realm, i)
650 if not (user or passwd): return None
Guido van Rossumafc4f042001-01-15 18:31:13 +0000651 host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000652 newurl = 'http://' + host + selector
Guido van Rossum3c8baed2000-02-01 23:36:55 +0000653 if data is None:
654 return self.open(newurl)
655 else:
656 return self.open(newurl, data)
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000657
Guido van Rossum3c8baed2000-02-01 23:36:55 +0000658 def retry_https_basic_auth(self, url, realm, data=None):
Tim Peterse1190062001-01-15 03:34:38 +0000659 host, selector = splithost(url)
660 i = host.find('@') + 1
661 host = host[i:]
662 user, passwd = self.get_user_passwd(host, realm, i)
663 if not (user or passwd): return None
Guido van Rossumafc4f042001-01-15 18:31:13 +0000664 host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
Tim Peterse1190062001-01-15 03:34:38 +0000665 newurl = '//' + host + selector
Guido van Rossumafc4f042001-01-15 18:31:13 +0000666 return self.open_https(newurl, data)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000667
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000668 def get_user_passwd(self, host, realm, clear_cache = 0):
Guido van Rossumb2493f82000-12-15 15:01:37 +0000669 key = realm + '@' + host.lower()
Raymond Hettinger54f02222002-06-01 14:18:47 +0000670 if key in self.auth_cache:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000671 if clear_cache:
672 del self.auth_cache[key]
673 else:
674 return self.auth_cache[key]
675 user, passwd = self.prompt_user_passwd(host, realm)
676 if user or passwd: self.auth_cache[key] = (user, passwd)
677 return user, passwd
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000678
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000679 def prompt_user_passwd(self, host, realm):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000680 """Override this in a GUI environment!"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000681 import getpass
682 try:
683 user = raw_input("Enter username for %s at %s: " % (realm,
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000684 host))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000685 passwd = getpass.getpass("Enter password for %s in %s at %s: " %
686 (user, realm, host))
687 return user, passwd
688 except KeyboardInterrupt:
689 print
690 return None, None
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000691
692
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000693# Utility functions
694
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000695_localhost = None
696def localhost():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000697 """Return the IP address of the magic hostname 'localhost'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000698 global _localhost
Raymond Hettinger10ff7062002-06-02 03:04:52 +0000699 if _localhost is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000700 _localhost = socket.gethostbyname('localhost')
701 return _localhost
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000702
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000703_thishost = None
704def thishost():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000705 """Return the IP address of the current host."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000706 global _thishost
Raymond Hettinger10ff7062002-06-02 03:04:52 +0000707 if _thishost is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000708 _thishost = socket.gethostbyname(socket.gethostname())
709 return _thishost
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000710
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000711_ftperrors = None
712def ftperrors():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000713 """Return the set of errors raised by the FTP class."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000714 global _ftperrors
Raymond Hettinger10ff7062002-06-02 03:04:52 +0000715 if _ftperrors is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000716 import ftplib
717 _ftperrors = ftplib.all_errors
718 return _ftperrors
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000719
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000720_noheaders = None
721def noheaders():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000722 """Return an empty mimetools.Message object."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000723 global _noheaders
Raymond Hettinger10ff7062002-06-02 03:04:52 +0000724 if _noheaders is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000725 import mimetools
Raymond Hettingera6172712004-12-31 19:15:26 +0000726 try:
727 from cStringIO import StringIO
728 except ImportError:
729 from StringIO import StringIO
730 _noheaders = mimetools.Message(StringIO(), 0)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000731 _noheaders.fp.close() # Recycle file descriptor
732 return _noheaders
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000733
734
735# Utility classes
736
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000737class ftpwrapper:
Guido van Rossume7b146f2000-02-04 15:28:42 +0000738 """Class used by open_ftp() for cache of open FTP connections."""
739
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000740 def __init__(self, user, passwd, host, port, dirs):
741 self.user = user
742 self.passwd = passwd
743 self.host = host
744 self.port = port
745 self.dirs = dirs
746 self.init()
Guido van Rossume7b146f2000-02-04 15:28:42 +0000747
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000748 def init(self):
749 import ftplib
750 self.busy = 0
751 self.ftp = ftplib.FTP()
752 self.ftp.connect(self.host, self.port)
753 self.ftp.login(self.user, self.passwd)
754 for dir in self.dirs:
755 self.ftp.cwd(dir)
Guido van Rossume7b146f2000-02-04 15:28:42 +0000756
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000757 def retrfile(self, file, type):
758 import ftplib
759 self.endtransfer()
760 if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
761 else: cmd = 'TYPE ' + type; isdir = 0
762 try:
763 self.ftp.voidcmd(cmd)
764 except ftplib.all_errors:
765 self.init()
766 self.ftp.voidcmd(cmd)
767 conn = None
768 if file and not isdir:
769 # Use nlst to see if the file exists at all
770 try:
771 self.ftp.nlst(file)
772 except ftplib.error_perm, reason:
773 raise IOError, ('ftp error', reason), sys.exc_info()[2]
774 # Restore the transfer mode!
775 self.ftp.voidcmd(cmd)
776 # Try to retrieve as a file
777 try:
778 cmd = 'RETR ' + file
779 conn = self.ftp.ntransfercmd(cmd)
780 except ftplib.error_perm, reason:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000781 if str(reason)[:3] != '550':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000782 raise IOError, ('ftp error', reason), sys.exc_info()[2]
783 if not conn:
784 # Set transfer mode to ASCII!
785 self.ftp.voidcmd('TYPE A')
786 # Try a directory listing
787 if file: cmd = 'LIST ' + file
788 else: cmd = 'LIST'
789 conn = self.ftp.ntransfercmd(cmd)
790 self.busy = 1
791 # Pass back both a suitably decorated object and a retrieval length
792 return (addclosehook(conn[0].makefile('rb'),
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000793 self.endtransfer), conn[1])
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000794 def endtransfer(self):
795 if not self.busy:
796 return
797 self.busy = 0
798 try:
799 self.ftp.voidresp()
800 except ftperrors():
801 pass
Guido van Rossume7b146f2000-02-04 15:28:42 +0000802
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000803 def close(self):
804 self.endtransfer()
805 try:
806 self.ftp.close()
807 except ftperrors():
808 pass
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000809
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000810class addbase:
Guido van Rossume7b146f2000-02-04 15:28:42 +0000811 """Base class for addinfo and addclosehook."""
812
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000813 def __init__(self, fp):
814 self.fp = fp
815 self.read = self.fp.read
816 self.readline = self.fp.readline
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000817 if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines
818 if hasattr(self.fp, "fileno"): self.fileno = self.fp.fileno
Raymond Hettinger42182eb2003-03-09 05:33:33 +0000819 if hasattr(self.fp, "__iter__"):
820 self.__iter__ = self.fp.__iter__
821 if hasattr(self.fp, "next"):
822 self.next = self.fp.next
Guido van Rossume7b146f2000-02-04 15:28:42 +0000823
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000824 def __repr__(self):
Walter Dörwald70a6b492004-02-12 17:35:32 +0000825 return '<%s at %r whose fp = %r>' % (self.__class__.__name__,
826 id(self), self.fp)
Guido van Rossume7b146f2000-02-04 15:28:42 +0000827
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000828 def close(self):
829 self.read = None
830 self.readline = None
831 self.readlines = None
832 self.fileno = None
833 if self.fp: self.fp.close()
834 self.fp = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000835
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000836class addclosehook(addbase):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000837 """Class to add a close hook to an open file."""
838
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000839 def __init__(self, fp, closehook, *hookargs):
840 addbase.__init__(self, fp)
841 self.closehook = closehook
842 self.hookargs = hookargs
Guido van Rossume7b146f2000-02-04 15:28:42 +0000843
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000844 def close(self):
Guido van Rossumc580dae2000-05-24 13:21:46 +0000845 addbase.close(self)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000846 if self.closehook:
Guido van Rossum68468eb2003-02-27 20:14:51 +0000847 self.closehook(*self.hookargs)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000848 self.closehook = None
849 self.hookargs = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000850
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000851class addinfo(addbase):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000852 """class to add an info() method to an open file."""
853
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000854 def __init__(self, fp, headers):
855 addbase.__init__(self, fp)
856 self.headers = headers
Guido van Rossume7b146f2000-02-04 15:28:42 +0000857
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000858 def info(self):
859 return self.headers
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000860
Guido van Rossume6ad8911996-09-10 17:02:56 +0000861class addinfourl(addbase):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000862 """class to add info() and geturl() methods to an open file."""
863
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000864 def __init__(self, fp, headers, url):
865 addbase.__init__(self, fp)
866 self.headers = headers
867 self.url = url
Guido van Rossume7b146f2000-02-04 15:28:42 +0000868
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000869 def info(self):
870 return self.headers
Guido van Rossume7b146f2000-02-04 15:28:42 +0000871
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000872 def geturl(self):
873 return self.url
Guido van Rossume6ad8911996-09-10 17:02:56 +0000874
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000875
Guido van Rossum7c395db1994-07-04 22:14:49 +0000876# Utilities to parse URLs (most of these return None for missing parts):
Sjoerd Mullendere0371b81995-11-10 10:36:07 +0000877# unwrap('<URL:type://host/path>') --> 'type://host/path'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000878# splittype('type:opaquestring') --> 'type', 'opaquestring'
879# splithost('//host[:port]/path') --> 'host[:port]', '/path'
Guido van Rossum7c395db1994-07-04 22:14:49 +0000880# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'
881# splitpasswd('user:passwd') -> 'user', 'passwd'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000882# splitport('host:port') --> 'host', 'port'
883# splitquery('/path?query') --> '/path', 'query'
884# splittag('/path#tag') --> '/path', 'tag'
Guido van Rossum7c395db1994-07-04 22:14:49 +0000885# splitattr('/path;attr1=value1;attr2=value2;...') ->
886# '/path', ['attr1=value1', 'attr2=value2', ...]
887# splitvalue('attr=value') --> 'attr', 'value'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000888# splitgophertype('/Xselector') --> 'X', 'selector'
889# unquote('abc%20def') -> 'abc def'
890# quote('abc def') -> 'abc%20def')
891
Walter Dörwald65230a22002-06-03 15:58:32 +0000892try:
893 unicode
894except NameError:
Guido van Rossum4b46c0a2002-05-24 17:58:05 +0000895 def _is_unicode(x):
896 return 0
Walter Dörwald65230a22002-06-03 15:58:32 +0000897else:
898 def _is_unicode(x):
899 return isinstance(x, unicode)
Guido van Rossum4b46c0a2002-05-24 17:58:05 +0000900
Martin v. Löwis1d994332000-12-03 18:30:10 +0000901def toBytes(url):
902 """toBytes(u"URL") --> 'URL'."""
903 # Most URL schemes require ASCII. If that changes, the conversion
904 # can be relaxed
Guido van Rossum4b46c0a2002-05-24 17:58:05 +0000905 if _is_unicode(url):
Martin v. Löwis1d994332000-12-03 18:30:10 +0000906 try:
907 url = url.encode("ASCII")
908 except UnicodeError:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000909 raise UnicodeError("URL " + repr(url) +
910 " contains non-ASCII characters")
Martin v. Löwis1d994332000-12-03 18:30:10 +0000911 return url
912
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000913def unwrap(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000914 """unwrap('<URL:type://host/path>') --> 'type://host/path'."""
Guido van Rossumb2493f82000-12-15 15:01:37 +0000915 url = url.strip()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000916 if url[:1] == '<' and url[-1:] == '>':
Guido van Rossumb2493f82000-12-15 15:01:37 +0000917 url = url[1:-1].strip()
918 if url[:4] == 'URL:': url = url[4:].strip()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000919 return url
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000920
Guido van Rossum332e1441997-09-29 23:23:46 +0000921_typeprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000922def splittype(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000923 """splittype('type:opaquestring') --> 'type', 'opaquestring'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000924 global _typeprog
925 if _typeprog is None:
926 import re
927 _typeprog = re.compile('^([^/:]+):')
Guido van Rossum332e1441997-09-29 23:23:46 +0000928
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000929 match = _typeprog.match(url)
930 if match:
931 scheme = match.group(1)
Fred Drake9e94afd2000-07-01 07:03:30 +0000932 return scheme.lower(), url[len(scheme) + 1:]
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000933 return None, url
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000934
Guido van Rossum332e1441997-09-29 23:23:46 +0000935_hostprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000936def splithost(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000937 """splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000938 global _hostprog
939 if _hostprog is None:
940 import re
Guido van Rossum3427c1f1999-07-01 23:20:56 +0000941 _hostprog = re.compile('^//([^/]*)(.*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +0000942
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000943 match = _hostprog.match(url)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000944 if match: return match.group(1, 2)
945 return None, url
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000946
Guido van Rossum332e1441997-09-29 23:23:46 +0000947_userprog = None
Guido van Rossum7c395db1994-07-04 22:14:49 +0000948def splituser(host):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000949 """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000950 global _userprog
951 if _userprog is None:
952 import re
Raymond Hettingerf2e45dd2002-08-18 20:08:56 +0000953 _userprog = re.compile('^(.*)@(.*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +0000954
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000955 match = _userprog.match(host)
Fred Drake567ca8e2000-08-21 21:42:42 +0000956 if match: return map(unquote, match.group(1, 2))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000957 return None, host
Guido van Rossum7c395db1994-07-04 22:14:49 +0000958
Guido van Rossum332e1441997-09-29 23:23:46 +0000959_passwdprog = None
Guido van Rossum7c395db1994-07-04 22:14:49 +0000960def splitpasswd(user):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000961 """splitpasswd('user:passwd') -> 'user', 'passwd'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000962 global _passwdprog
963 if _passwdprog is None:
964 import re
965 _passwdprog = re.compile('^([^:]*):(.*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +0000966
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000967 match = _passwdprog.match(user)
968 if match: return match.group(1, 2)
969 return user, None
Guido van Rossum7c395db1994-07-04 22:14:49 +0000970
Guido van Rossume7b146f2000-02-04 15:28:42 +0000971# splittag('/path#tag') --> '/path', 'tag'
Guido van Rossum332e1441997-09-29 23:23:46 +0000972_portprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000973def splitport(host):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000974 """splitport('host:port') --> 'host', 'port'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000975 global _portprog
976 if _portprog is None:
977 import re
978 _portprog = re.compile('^(.*):([0-9]+)$')
Guido van Rossum332e1441997-09-29 23:23:46 +0000979
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000980 match = _portprog.match(host)
981 if match: return match.group(1, 2)
982 return host, None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000983
Guido van Rossum332e1441997-09-29 23:23:46 +0000984_nportprog = None
Guido van Rossum53725a21996-06-13 19:12:35 +0000985def splitnport(host, defport=-1):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000986 """Split host and port, returning numeric port.
987 Return given default port if no ':' found; defaults to -1.
988 Return numerical port if a valid number are found after ':'.
989 Return None if ':' but not a valid number."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000990 global _nportprog
991 if _nportprog is None:
992 import re
993 _nportprog = re.compile('^(.*):(.*)$')
Guido van Rossum7e7ca0b1998-03-26 21:01:39 +0000994
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000995 match = _nportprog.match(host)
996 if match:
997 host, port = match.group(1, 2)
998 try:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000999 if not port: raise ValueError, "no digits"
1000 nport = int(port)
1001 except ValueError:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001002 nport = None
1003 return host, nport
1004 return host, defport
Guido van Rossum53725a21996-06-13 19:12:35 +00001005
Guido van Rossum332e1441997-09-29 23:23:46 +00001006_queryprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001007def splitquery(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001008 """splitquery('/path?query') --> '/path', 'query'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001009 global _queryprog
1010 if _queryprog is None:
1011 import re
1012 _queryprog = re.compile('^(.*)\?([^?]*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +00001013
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001014 match = _queryprog.match(url)
1015 if match: return match.group(1, 2)
1016 return url, None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001017
Guido van Rossum332e1441997-09-29 23:23:46 +00001018_tagprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001019def splittag(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001020 """splittag('/path#tag') --> '/path', 'tag'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001021 global _tagprog
1022 if _tagprog is None:
1023 import re
1024 _tagprog = re.compile('^(.*)#([^#]*)$')
Guido van Rossum7e7ca0b1998-03-26 21:01:39 +00001025
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001026 match = _tagprog.match(url)
1027 if match: return match.group(1, 2)
1028 return url, None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001029
Guido van Rossum7c395db1994-07-04 22:14:49 +00001030def splitattr(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001031 """splitattr('/path;attr1=value1;attr2=value2;...') ->
1032 '/path', ['attr1=value1', 'attr2=value2', ...]."""
Guido van Rossumb2493f82000-12-15 15:01:37 +00001033 words = url.split(';')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001034 return words[0], words[1:]
Guido van Rossum7c395db1994-07-04 22:14:49 +00001035
Guido van Rossum332e1441997-09-29 23:23:46 +00001036_valueprog = None
Guido van Rossum7c395db1994-07-04 22:14:49 +00001037def splitvalue(attr):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001038 """splitvalue('attr=value') --> 'attr', 'value'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001039 global _valueprog
1040 if _valueprog is None:
1041 import re
1042 _valueprog = re.compile('^([^=]*)=(.*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +00001043
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001044 match = _valueprog.match(attr)
1045 if match: return match.group(1, 2)
1046 return attr, None
Guido van Rossum7c395db1994-07-04 22:14:49 +00001047
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001048def splitgophertype(selector):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001049 """splitgophertype('/Xselector') --> 'X', 'selector'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001050 if selector[:1] == '/' and selector[1:2]:
1051 return selector[1], selector[2:]
1052 return None, selector
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001053
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001054def unquote(s):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001055 """unquote('abc%20def') -> 'abc def'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001056 mychr = chr
Guido van Rossumb2493f82000-12-15 15:01:37 +00001057 myatoi = int
1058 list = s.split('%')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001059 res = [list[0]]
1060 myappend = res.append
1061 del list[0]
1062 for item in list:
1063 if item[1:2]:
1064 try:
1065 myappend(mychr(myatoi(item[:2], 16))
1066 + item[2:])
Martin v. Löwis58682b72001-08-11 15:02:57 +00001067 except ValueError:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001068 myappend('%' + item)
1069 else:
1070 myappend('%' + item)
Guido van Rossumb2493f82000-12-15 15:01:37 +00001071 return "".join(res)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001072
Guido van Rossum0564e121996-12-13 14:47:36 +00001073def unquote_plus(s):
Skip Montanaro79f1c172000-08-22 03:00:52 +00001074 """unquote('%7e/abc+def') -> '~/abc def'"""
Brett Cannonaaeffaf2004-03-23 23:50:17 +00001075 s = s.replace('+', ' ')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001076 return unquote(s)
Guido van Rossum0564e121996-12-13 14:47:36 +00001077
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001078always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
Jeremy Hylton6102e292000-08-31 15:48:10 +00001079 'abcdefghijklmnopqrstuvwxyz'
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001080 '0123456789' '_.-')
1081
1082_fast_safe_test = always_safe + '/'
1083_fast_safe = None
1084
1085def _fast_quote(s):
1086 global _fast_safe
1087 if _fast_safe is None:
1088 _fast_safe = {}
1089 for c in _fast_safe_test:
1090 _fast_safe[c] = c
1091 res = list(s)
1092 for i in range(len(res)):
1093 c = res[i]
Raymond Hettinger54f02222002-06-01 14:18:47 +00001094 if not c in _fast_safe:
Guido van Rossume27a7b82001-01-19 03:28:15 +00001095 res[i] = '%%%02X' % ord(c)
Guido van Rossumb2493f82000-12-15 15:01:37 +00001096 return ''.join(res)
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001097
Guido van Rossum7c395db1994-07-04 22:14:49 +00001098def quote(s, safe = '/'):
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001099 """quote('abc def') -> 'abc%20def'
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001100
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001101 Each part of a URL, e.g. the path info, the query, etc., has a
1102 different set of reserved characters that must be quoted.
1103
1104 RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
1105 the following reserved characters.
1106
1107 reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
1108 "$" | ","
1109
1110 Each of these characters is reserved in some component of a URL,
1111 but not necessarily in all of them.
1112
1113 By default, the quote function is intended for quoting the path
1114 section of a URL. Thus, it will not encode '/'. This character
1115 is reserved, but in typical usage the quote function is being
1116 called on a path where the existing slash characters are used as
1117 reserved characters.
1118 """
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001119 safe = always_safe + safe
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001120 if _fast_safe_test == safe:
1121 return _fast_quote(s)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001122 res = list(s)
1123 for i in range(len(res)):
1124 c = res[i]
1125 if c not in safe:
Guido van Rossume27a7b82001-01-19 03:28:15 +00001126 res[i] = '%%%02X' % ord(c)
Guido van Rossumb2493f82000-12-15 15:01:37 +00001127 return ''.join(res)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001128
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001129def quote_plus(s, safe = ''):
1130 """Quote the query fragment of a URL; replacing ' ' with '+'"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001131 if ' ' in s:
Guido van Rossumb2493f82000-12-15 15:01:37 +00001132 l = s.split(' ')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001133 for i in range(len(l)):
1134 l[i] = quote(l[i], safe)
Guido van Rossumb2493f82000-12-15 15:01:37 +00001135 return '+'.join(l)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001136 else:
1137 return quote(s, safe)
Guido van Rossum0564e121996-12-13 14:47:36 +00001138
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001139def urlencode(query,doseq=0):
1140 """Encode a sequence of two-element tuples or dictionary into a URL query string.
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001141
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001142 If any values in the query arg are sequences and doseq is true, each
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001143 sequence element is converted to a separate parameter.
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001144
1145 If the query arg is a sequence of two-element tuples, the order of the
1146 parameters in the output will match the order of parameters in the
1147 input.
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001148 """
Tim Peters658cba62001-02-09 20:06:00 +00001149
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001150 if hasattr(query,"items"):
1151 # mapping objects
1152 query = query.items()
1153 else:
1154 # it's a bother at times that strings and string-like objects are
1155 # sequences...
1156 try:
1157 # non-sequence items should not work with len()
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001158 # non-empty strings will fail this
Walter Dörwald65230a22002-06-03 15:58:32 +00001159 if len(query) and not isinstance(query[0], tuple):
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001160 raise TypeError
1161 # zero-length sequences of all types will get here and succeed,
1162 # but that's a minor nit - since the original implementation
1163 # allowed empty dicts that type of behavior probably should be
1164 # preserved for consistency
1165 except TypeError:
1166 ty,va,tb = sys.exc_info()
1167 raise TypeError, "not a valid non-string sequence or mapping object", tb
1168
Guido van Rossume7b146f2000-02-04 15:28:42 +00001169 l = []
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001170 if not doseq:
1171 # preserve old behavior
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001172 for k, v in query:
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001173 k = quote_plus(str(k))
1174 v = quote_plus(str(v))
1175 l.append(k + '=' + v)
1176 else:
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001177 for k, v in query:
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001178 k = quote_plus(str(k))
Walter Dörwald65230a22002-06-03 15:58:32 +00001179 if isinstance(v, str):
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001180 v = quote_plus(v)
1181 l.append(k + '=' + v)
Guido van Rossum4b46c0a2002-05-24 17:58:05 +00001182 elif _is_unicode(v):
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001183 # is there a reasonable way to convert to ASCII?
1184 # encode generates a string, but "replace" or "ignore"
1185 # lose information and "strict" can raise UnicodeError
1186 v = quote_plus(v.encode("ASCII","replace"))
1187 l.append(k + '=' + v)
1188 else:
1189 try:
1190 # is this a sufficient test for sequence-ness?
1191 x = len(v)
1192 except TypeError:
1193 # not a sequence
1194 v = quote_plus(str(v))
1195 l.append(k + '=' + v)
1196 else:
1197 # loop over the sequence
1198 for elt in v:
1199 l.append(k + '=' + quote_plus(str(elt)))
Guido van Rossumb2493f82000-12-15 15:01:37 +00001200 return '&'.join(l)
Guido van Rossum810a3391998-07-22 21:33:23 +00001201
Guido van Rossum442e7201996-03-20 15:33:11 +00001202# Proxy handling
Mark Hammond4f570b92000-07-26 07:04:38 +00001203def getproxies_environment():
1204 """Return a dictionary of scheme -> proxy server URL mappings.
1205
1206 Scan the environment for variables named <scheme>_proxy;
1207 this seems to be the standard convention. If you need a
1208 different way, you can pass a proxies dictionary to the
1209 [Fancy]URLopener constructor.
1210
1211 """
1212 proxies = {}
1213 for name, value in os.environ.items():
Guido van Rossumb2493f82000-12-15 15:01:37 +00001214 name = name.lower()
Mark Hammond4f570b92000-07-26 07:04:38 +00001215 if value and name[-6:] == '_proxy':
1216 proxies[name[:-6]] = value
1217 return proxies
1218
Jack Jansen11d9b062004-07-16 11:45:00 +00001219if sys.platform == 'darwin':
1220 def getproxies_internetconfig():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001221 """Return a dictionary of scheme -> proxy server URL mappings.
Guido van Rossum442e7201996-03-20 15:33:11 +00001222
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001223 By convention the mac uses Internet Config to store
1224 proxies. An HTTP proxy, for instance, is stored under
1225 the HttpProxy key.
Guido van Rossum442e7201996-03-20 15:33:11 +00001226
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001227 """
1228 try:
1229 import ic
1230 except ImportError:
1231 return {}
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001232
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001233 try:
1234 config = ic.IC()
1235 except ic.error:
1236 return {}
1237 proxies = {}
1238 # HTTP:
Raymond Hettinger54f02222002-06-01 14:18:47 +00001239 if 'UseHTTPProxy' in config and config['UseHTTPProxy']:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001240 try:
1241 value = config['HTTPProxyHost']
1242 except ic.error:
1243 pass
1244 else:
1245 proxies['http'] = 'http://%s' % value
1246 # FTP: XXXX To be done.
1247 # Gopher: XXXX To be done.
1248 return proxies
Mark Hammond4f570b92000-07-26 07:04:38 +00001249
Tim Peters55c12d42001-08-09 18:04:14 +00001250 def proxy_bypass(x):
1251 return 0
1252
Jack Jansen11d9b062004-07-16 11:45:00 +00001253 def getproxies():
1254 return getproxies_environment() or getproxies_internetconfig()
Tim Peters182b5ac2004-07-18 06:16:08 +00001255
Mark Hammond4f570b92000-07-26 07:04:38 +00001256elif os.name == 'nt':
1257 def getproxies_registry():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001258 """Return a dictionary of scheme -> proxy server URL mappings.
Mark Hammond4f570b92000-07-26 07:04:38 +00001259
1260 Win32 uses the registry to store proxies.
1261
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001262 """
1263 proxies = {}
Mark Hammond4f570b92000-07-26 07:04:38 +00001264 try:
1265 import _winreg
1266 except ImportError:
1267 # Std module, so should be around - but you never know!
1268 return proxies
1269 try:
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001270 internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1271 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
Mark Hammond4f570b92000-07-26 07:04:38 +00001272 proxyEnable = _winreg.QueryValueEx(internetSettings,
1273 'ProxyEnable')[0]
1274 if proxyEnable:
1275 # Returned as Unicode but problems if not converted to ASCII
1276 proxyServer = str(_winreg.QueryValueEx(internetSettings,
1277 'ProxyServer')[0])
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001278 if '=' in proxyServer:
1279 # Per-protocol settings
Mark Hammond4f570b92000-07-26 07:04:38 +00001280 for p in proxyServer.split(';'):
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001281 protocol, address = p.split('=', 1)
Guido van Rossumb955d6c2002-03-31 23:38:48 +00001282 # See if address has a type:// prefix
Guido van Rossum64e5aa92002-04-02 14:38:16 +00001283 import re
1284 if not re.match('^([^/:]+)://', address):
Guido van Rossumb955d6c2002-03-31 23:38:48 +00001285 address = '%s://%s' % (protocol, address)
1286 proxies[protocol] = address
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001287 else:
1288 # Use one setting for all protocols
1289 if proxyServer[:5] == 'http:':
1290 proxies['http'] = proxyServer
1291 else:
1292 proxies['http'] = 'http://%s' % proxyServer
1293 proxies['ftp'] = 'ftp://%s' % proxyServer
Mark Hammond4f570b92000-07-26 07:04:38 +00001294 internetSettings.Close()
1295 except (WindowsError, ValueError, TypeError):
1296 # Either registry key not found etc, or the value in an
1297 # unexpected format.
1298 # proxies already set up to be empty so nothing to do
1299 pass
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001300 return proxies
Guido van Rossum442e7201996-03-20 15:33:11 +00001301
Mark Hammond4f570b92000-07-26 07:04:38 +00001302 def getproxies():
1303 """Return a dictionary of scheme -> proxy server URL mappings.
1304
1305 Returns settings gathered from the environment, if specified,
1306 or the registry.
1307
1308 """
1309 return getproxies_environment() or getproxies_registry()
Tim Peters55c12d42001-08-09 18:04:14 +00001310
1311 def proxy_bypass(host):
1312 try:
1313 import _winreg
1314 import re
Tim Peters55c12d42001-08-09 18:04:14 +00001315 except ImportError:
1316 # Std modules, so should be around - but you never know!
1317 return 0
1318 try:
1319 internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1320 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1321 proxyEnable = _winreg.QueryValueEx(internetSettings,
1322 'ProxyEnable')[0]
1323 proxyOverride = str(_winreg.QueryValueEx(internetSettings,
1324 'ProxyOverride')[0])
1325 # ^^^^ Returned as Unicode but problems if not converted to ASCII
1326 except WindowsError:
1327 return 0
1328 if not proxyEnable or not proxyOverride:
1329 return 0
1330 # try to make a host list from name and IP address.
1331 host = [host]
1332 try:
1333 addr = socket.gethostbyname(host[0])
1334 if addr != host:
1335 host.append(addr)
1336 except socket.error:
1337 pass
1338 # make a check value list from the registry entry: replace the
1339 # '<local>' string by the localhost entry and the corresponding
1340 # canonical entry.
1341 proxyOverride = proxyOverride.split(';')
1342 i = 0
1343 while i < len(proxyOverride):
1344 if proxyOverride[i] == '<local>':
1345 proxyOverride[i:i+1] = ['localhost',
1346 '127.0.0.1',
1347 socket.gethostname(),
1348 socket.gethostbyname(
1349 socket.gethostname())]
1350 i += 1
1351 # print proxyOverride
1352 # now check if we match one of the registry values.
1353 for test in proxyOverride:
Tim Petersab9ba272001-08-09 21:40:30 +00001354 test = test.replace(".", r"\.") # mask dots
1355 test = test.replace("*", r".*") # change glob sequence
1356 test = test.replace("?", r".") # change glob char
Tim Peters55c12d42001-08-09 18:04:14 +00001357 for val in host:
1358 # print "%s <--> %s" %( test, val )
1359 if re.match(test, val, re.I):
1360 return 1
1361 return 0
1362
Mark Hammond4f570b92000-07-26 07:04:38 +00001363else:
1364 # By default use environment variables
1365 getproxies = getproxies_environment
1366
Tim Peters55c12d42001-08-09 18:04:14 +00001367 def proxy_bypass(host):
1368 return 0
Guido van Rossum442e7201996-03-20 15:33:11 +00001369
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001370# Test and time quote() and unquote()
1371def test1():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001372 s = ''
1373 for i in range(256): s = s + chr(i)
1374 s = s*4
1375 t0 = time.time()
1376 qs = quote(s)
1377 uqs = unquote(qs)
1378 t1 = time.time()
1379 if uqs != s:
1380 print 'Wrong!'
Walter Dörwald70a6b492004-02-12 17:35:32 +00001381 print repr(s)
1382 print repr(qs)
1383 print repr(uqs)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001384 print round(t1 - t0, 3), 'sec'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001385
1386
Guido van Rossum9ab96d41998-09-28 14:07:00 +00001387def reporthook(blocknum, blocksize, totalsize):
1388 # Report during remote transfers
Guido van Rossumb2493f82000-12-15 15:01:37 +00001389 print "Block number: %d, Block size: %d, Total size: %d" % (
1390 blocknum, blocksize, totalsize)
Guido van Rossum9ab96d41998-09-28 14:07:00 +00001391
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001392# Test program
Guido van Rossum23490151998-06-25 02:39:00 +00001393def test(args=[]):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001394 if not args:
1395 args = [
1396 '/etc/passwd',
1397 'file:/etc/passwd',
1398 'file://localhost/etc/passwd',
Andrew M. Kuchling56a42352002-03-18 22:18:46 +00001399 'ftp://ftp.python.org/pub/python/README',
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001400## 'gopher://gopher.micro.umn.edu/1/',
1401 'http://www.python.org/index.html',
1402 ]
Guido van Rossum09c8b6c1999-12-07 21:37:17 +00001403 if hasattr(URLopener, "open_https"):
1404 args.append('https://synergy.as.cmu.edu/~geek/')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001405 try:
1406 for url in args:
1407 print '-'*10, url, '-'*10
1408 fn, h = urlretrieve(url, None, reporthook)
Guido van Rossumb2493f82000-12-15 15:01:37 +00001409 print fn
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001410 if h:
1411 print '======'
1412 for k in h.keys(): print k + ':', h[k]
1413 print '======'
1414 fp = open(fn, 'rb')
1415 data = fp.read()
1416 del fp
1417 if '\r' in data:
1418 table = string.maketrans("", "")
Guido van Rossumb2493f82000-12-15 15:01:37 +00001419 data = data.translate(table, "\r")
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001420 print data
1421 fn, h = None, None
1422 print '-'*40
1423 finally:
1424 urlcleanup()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001425
Guido van Rossum23490151998-06-25 02:39:00 +00001426def main():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001427 import getopt, sys
1428 try:
1429 opts, args = getopt.getopt(sys.argv[1:], "th")
1430 except getopt.error, msg:
1431 print msg
1432 print "Use -h for help"
1433 return
1434 t = 0
1435 for o, a in opts:
1436 if o == '-t':
1437 t = t + 1
1438 if o == '-h':
1439 print "Usage: python urllib.py [-t] [url ...]"
1440 print "-t runs self-test;",
1441 print "otherwise, contents of urls are printed"
1442 return
1443 if t:
1444 if t > 1:
1445 test1()
1446 test(args)
1447 else:
1448 if not args:
1449 print "Use -h for help"
1450 for url in args:
1451 print urlopen(url).read(),
Guido van Rossum23490151998-06-25 02:39:00 +00001452
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001453# Run test program when run as a script
1454if __name__ == '__main__':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001455 main()