blob: f700d718f4bc9389f72bbf0f592b9d5901a6a347 [file] [log] [blame]
Guido van Rossume7b146f2000-02-04 15:28:42 +00001"""Open an arbitrary URL.
2
3See the following document for more info on URLs:
4"Names and Addresses, URIs, URLs, URNs, URCs", at
5http://www.w3.org/pub/WWW/Addressing/Overview.html
6
7See also the HTTP spec (from which the error codes are derived):
8"HTTP - Hypertext Transfer Protocol", at
9http://www.w3.org/pub/WWW/Protocols/
10
11Related standards and specs:
12- RFC1808: the "relative URL" spec. (authoritative status)
13- RFC1738 - the "URL standard". (authoritative status)
14- RFC1630 - the "URI spec". (informational status)
15
16The object returned by URLopener().open(file) will differ per
17protocol. All you know is that is has methods read(), readline(),
18readlines(), fileno(), close() and info(). The read*(), fileno()
Fredrik Lundhb49f88b2000-09-24 18:51:25 +000019and close() methods work like those of open files.
Guido van Rossume7b146f2000-02-04 15:28:42 +000020The info() method returns a mimetools.Message object which can be
21used to query various info about the object, if available.
22(mimetools.Message objects are queried with the getheader() method.)
23"""
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000024
Guido van Rossum7c395db1994-07-04 22:14:49 +000025import string
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000026import socket
Jack Jansendc3e3f61995-12-15 13:22:13 +000027import os
Guido van Rossumf0713d32001-08-09 17:43:35 +000028import time
Guido van Rossum3c8484e1996-11-20 22:02:24 +000029import sys
Brett Cannon69200fa2004-03-23 21:26:39 +000030from urlparse import urljoin as basejoin
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000031
Skip Montanaro40fc1602001-03-01 04:27:19 +000032__all__ = ["urlopen", "URLopener", "FancyURLopener", "urlretrieve",
33 "urlcleanup", "quote", "quote_plus", "unquote", "unquote_plus",
Skip Montanaro44d5e0c2001-03-13 19:47:16 +000034 "urlencode", "url2pathname", "pathname2url", "splittag",
35 "localhost", "thishost", "ftperrors", "basejoin", "unwrap",
36 "splittype", "splithost", "splituser", "splitpasswd", "splitport",
37 "splitnport", "splitquery", "splitattr", "splitvalue",
38 "splitgophertype", "getproxies"]
Skip Montanaro40fc1602001-03-01 04:27:19 +000039
Brett Cannon69200fa2004-03-23 21:26:39 +000040__version__ = '1.16' # XXX This version is not always updated :-(
Guido van Rossumf668d171997-06-06 21:11:11 +000041
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000042MAXFTPCACHE = 10 # Trim the ftp cache beyond this size
Guido van Rossum6cb15a01995-06-22 19:00:13 +000043
Jack Jansendc3e3f61995-12-15 13:22:13 +000044# Helper for non-unix systems
45if os.name == 'mac':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000046 from macurl2path import url2pathname, pathname2url
Guido van Rossum7e7ca0b1998-03-26 21:01:39 +000047elif os.name == 'nt':
Fredrik Lundhb49f88b2000-09-24 18:51:25 +000048 from nturl2path import url2pathname, pathname2url
Guido van Rossumd74fb6b2001-03-02 06:43:49 +000049elif os.name == 'riscos':
50 from rourl2path import url2pathname, pathname2url
Jack Jansendc3e3f61995-12-15 13:22:13 +000051else:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000052 def url2pathname(pathname):
Guido van Rossum367ac801999-03-12 14:31:10 +000053 return unquote(pathname)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000054 def pathname2url(pathname):
Guido van Rossum367ac801999-03-12 14:31:10 +000055 return quote(pathname)
Guido van Rossum33add0a1998-12-18 15:25:22 +000056
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000057# This really consists of two pieces:
58# (1) a class which handles opening of all sorts of URLs
59# (plus assorted utilities etc.)
60# (2) a set of functions for parsing URLs
61# XXX Should these be separated out into different modules?
62
63
64# Shortcut for basic usage
65_urlopener = None
Fred Drakedf6eca72002-04-04 20:41:34 +000066def urlopen(url, data=None, proxies=None):
Skip Montanaro79f1c172000-08-22 03:00:52 +000067 """urlopen(url [, data]) -> open file-like object"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000068 global _urlopener
Fred Drakedf6eca72002-04-04 20:41:34 +000069 if proxies is not None:
70 opener = FancyURLopener(proxies=proxies)
71 elif not _urlopener:
72 opener = FancyURLopener()
73 _urlopener = opener
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000074 else:
Fred Drakedf6eca72002-04-04 20:41:34 +000075 opener = _urlopener
76 if data is None:
77 return opener.open(url)
78 else:
79 return opener.open(url, data)
Fred Drake316a7932000-08-24 01:01:26 +000080def urlretrieve(url, filename=None, reporthook=None, data=None):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000081 global _urlopener
82 if not _urlopener:
83 _urlopener = FancyURLopener()
Fred Drake316a7932000-08-24 01:01:26 +000084 return _urlopener.retrieve(url, filename, reporthook, data)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000085def urlcleanup():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000086 if _urlopener:
87 _urlopener.cleanup()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000088
Georg Brandlb9256022005-08-24 18:46:39 +000089# exception raised when downloaded size does not match content-length
90class ContentTooShortError(IOError):
91 def __init__(self, message, content):
92 IOError.__init__(self, message)
93 self.content = content
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000094
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000095ftpcache = {}
96class URLopener:
Guido van Rossume7b146f2000-02-04 15:28:42 +000097 """Class to open URLs.
98 This is a class rather than just a subroutine because we may need
99 more than one set of global protocol-specific options.
100 Note -- this is a base class for those who don't want the
101 automatic handling of errors type 302 (relocated) and 401
102 (authorization needed)."""
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000103
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000104 __tempfiles = None
Guido van Rossum29e77811996-11-27 19:39:58 +0000105
Guido van Rossumba311382000-08-24 16:18:04 +0000106 version = "Python-urllib/%s" % __version__
107
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000108 # Constructor
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000109 def __init__(self, proxies=None, **x509):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000110 if proxies is None:
111 proxies = getproxies()
112 assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
113 self.proxies = proxies
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000114 self.key_file = x509.get('key_file')
115 self.cert_file = x509.get('cert_file')
Guido van Rossumba311382000-08-24 16:18:04 +0000116 self.addheaders = [('User-agent', self.version)]
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000117 self.__tempfiles = []
118 self.__unlink = os.unlink # See cleanup()
119 self.tempcache = None
120 # Undocumented feature: if you assign {} to tempcache,
121 # it is used to cache files retrieved with
122 # self.retrieve(). This is not enabled by default
123 # since it does not work for changing documents (and I
124 # haven't got the logic to check expiration headers
125 # yet).
126 self.ftpcache = ftpcache
127 # Undocumented feature: you can use a different
128 # ftp cache by assigning to the .ftpcache member;
129 # in case you want logically independent URL openers
130 # XXX This is not threadsafe. Bah.
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000131
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000132 def __del__(self):
133 self.close()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000134
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000135 def close(self):
136 self.cleanup()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000137
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000138 def cleanup(self):
139 # This code sometimes runs when the rest of this module
140 # has already been deleted, so it can't use any globals
141 # or import anything.
142 if self.__tempfiles:
143 for file in self.__tempfiles:
144 try:
145 self.__unlink(file)
Martin v. Löwis58682b72001-08-11 15:02:57 +0000146 except OSError:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000147 pass
148 del self.__tempfiles[:]
149 if self.tempcache:
150 self.tempcache.clear()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000151
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000152 def addheader(self, *args):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000153 """Add a header to be used by the HTTP interface only
154 e.g. u.addheader('Accept', 'sound/basic')"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000155 self.addheaders.append(args)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000156
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000157 # External interface
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000158 def open(self, fullurl, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000159 """Use URLopener().open(file) instead of open(file, 'r')."""
Martin v. Löwis1d994332000-12-03 18:30:10 +0000160 fullurl = unwrap(toBytes(fullurl))
Raymond Hettinger54f02222002-06-01 14:18:47 +0000161 if self.tempcache and fullurl in self.tempcache:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000162 filename, headers = self.tempcache[fullurl]
163 fp = open(filename, 'rb')
164 return addinfourl(fp, headers, fullurl)
Martin v. Löwis1d994332000-12-03 18:30:10 +0000165 urltype, url = splittype(fullurl)
166 if not urltype:
167 urltype = 'file'
Raymond Hettinger54f02222002-06-01 14:18:47 +0000168 if urltype in self.proxies:
Martin v. Löwis1d994332000-12-03 18:30:10 +0000169 proxy = self.proxies[urltype]
170 urltype, proxyhost = splittype(proxy)
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000171 host, selector = splithost(proxyhost)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000172 url = (host, fullurl) # Signal special case to open_*()
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000173 else:
174 proxy = None
Martin v. Löwis1d994332000-12-03 18:30:10 +0000175 name = 'open_' + urltype
176 self.type = urltype
Brett Cannonaaeffaf2004-03-23 23:50:17 +0000177 name = name.replace('-', '_')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000178 if not hasattr(self, name):
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000179 if proxy:
180 return self.open_unknown_proxy(proxy, fullurl, data)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000181 else:
182 return self.open_unknown(fullurl, data)
183 try:
184 if data is None:
185 return getattr(self, name)(url)
186 else:
187 return getattr(self, name)(url, data)
188 except socket.error, msg:
189 raise IOError, ('socket error', msg), sys.exc_info()[2]
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000190
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000191 def open_unknown(self, fullurl, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000192 """Overridable interface to open unknown URL type."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000193 type, url = splittype(fullurl)
194 raise IOError, ('url error', 'unknown url type', type)
Guido van Rossumca445401995-08-29 19:19:12 +0000195
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000196 def open_unknown_proxy(self, proxy, fullurl, data=None):
197 """Overridable interface to open unknown URL type."""
198 type, url = splittype(fullurl)
199 raise IOError, ('url error', 'invalid proxy for %s' % type, proxy)
200
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000201 # External interface
Sjoerd Mullenderd7b86f02000-08-25 11:23:36 +0000202 def retrieve(self, url, filename=None, reporthook=None, data=None):
Brett Cannon7d618c72003-04-24 02:43:20 +0000203 """retrieve(url) returns (filename, headers) for a local object
Guido van Rossume7b146f2000-02-04 15:28:42 +0000204 or (tempfilename, headers) for a remote object."""
Martin v. Löwis1d994332000-12-03 18:30:10 +0000205 url = unwrap(toBytes(url))
Raymond Hettinger54f02222002-06-01 14:18:47 +0000206 if self.tempcache and url in self.tempcache:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000207 return self.tempcache[url]
208 type, url1 = splittype(url)
Raymond Hettinger10ff7062002-06-02 03:04:52 +0000209 if filename is None and (not type or type == 'file'):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000210 try:
211 fp = self.open_local_file(url1)
212 hdrs = fp.info()
213 del fp
214 return url2pathname(splithost(url1)[1]), hdrs
215 except IOError, msg:
216 pass
Fred Drake316a7932000-08-24 01:01:26 +0000217 fp = self.open(url, data)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000218 headers = fp.info()
Guido van Rossum3b0a3292002-08-09 16:38:32 +0000219 if filename:
220 tfp = open(filename, 'wb')
221 else:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000222 import tempfile
223 garbage, path = splittype(url)
224 garbage, path = splithost(path or "")
225 path, garbage = splitquery(path or "")
226 path, garbage = splitattr(path or "")
227 suffix = os.path.splitext(path)[1]
Guido van Rossum3b0a3292002-08-09 16:38:32 +0000228 (fd, filename) = tempfile.mkstemp(suffix)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000229 self.__tempfiles.append(filename)
Jeremy Hylton3bd6fde2002-10-11 14:36:24 +0000230 tfp = os.fdopen(fd, 'wb')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000231 result = filename, headers
232 if self.tempcache is not None:
233 self.tempcache[url] = result
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000234 bs = 1024*8
235 size = -1
Georg Brandlb9256022005-08-24 18:46:39 +0000236 read = 0
Georg Brandl5a650a22005-08-26 08:51:34 +0000237 blocknum = 0
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000238 if reporthook:
Raymond Hettinger54f02222002-06-01 14:18:47 +0000239 if "content-length" in headers:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000240 size = int(headers["Content-Length"])
Georg Brandl5a650a22005-08-26 08:51:34 +0000241 reporthook(blocknum, bs, size)
242 while 1:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000243 block = fp.read(bs)
Georg Brandl5a650a22005-08-26 08:51:34 +0000244 if block == "":
245 break
Georg Brandlb9256022005-08-24 18:46:39 +0000246 read += len(block)
Georg Brandl5a650a22005-08-26 08:51:34 +0000247 tfp.write(block)
Georg Brandlb9256022005-08-24 18:46:39 +0000248 blocknum += 1
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000249 if reporthook:
250 reporthook(blocknum, bs, size)
251 fp.close()
252 tfp.close()
253 del fp
254 del tfp
Georg Brandlb9256022005-08-24 18:46:39 +0000255
256 # raise exception if actual size does not match content-length header
257 if size >= 0 and read < size:
258 raise ContentTooShortError("retrieval incomplete: got only %i out "
259 "of %i bytes" % (read, size), result)
260
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000261 return result
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000262
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000263 # Each method named open_<type> knows how to open that type of URL
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000264
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000265 def open_http(self, url, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000266 """Use HTTP protocol."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000267 import httplib
268 user_passwd = None
Walter Dörwald65230a22002-06-03 15:58:32 +0000269 if isinstance(url, str):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000270 host, selector = splithost(url)
271 if host:
272 user_passwd, host = splituser(host)
273 host = unquote(host)
274 realhost = host
275 else:
276 host, selector = url
277 urltype, rest = splittype(selector)
278 url = rest
279 user_passwd = None
Guido van Rossumb2493f82000-12-15 15:01:37 +0000280 if urltype.lower() != 'http':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000281 realhost = None
282 else:
283 realhost, rest = splithost(rest)
284 if realhost:
285 user_passwd, realhost = splituser(realhost)
286 if user_passwd:
287 selector = "%s://%s%s" % (urltype, realhost, rest)
Tim Peters55c12d42001-08-09 18:04:14 +0000288 if proxy_bypass(realhost):
289 host = realhost
290
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000291 #print "proxy via http:", host, selector
292 if not host: raise IOError, ('http error', 'no host given')
293 if user_passwd:
294 import base64
Guido van Rossumb2493f82000-12-15 15:01:37 +0000295 auth = base64.encodestring(user_passwd).strip()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000296 else:
297 auth = None
298 h = httplib.HTTP(host)
299 if data is not None:
300 h.putrequest('POST', selector)
301 h.putheader('Content-type', 'application/x-www-form-urlencoded')
302 h.putheader('Content-length', '%d' % len(data))
303 else:
304 h.putrequest('GET', selector)
305 if auth: h.putheader('Authorization', 'Basic %s' % auth)
306 if realhost: h.putheader('Host', realhost)
Guido van Rossum68468eb2003-02-27 20:14:51 +0000307 for args in self.addheaders: h.putheader(*args)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000308 h.endheaders()
309 if data is not None:
Fred Drakeec3dfde2001-07-04 05:18:29 +0000310 h.send(data)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000311 errcode, errmsg, headers = h.getreply()
312 fp = h.getfile()
313 if errcode == 200:
314 return addinfourl(fp, headers, "http:" + url)
315 else:
316 if data is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000317 return self.http_error(url, fp, errcode, errmsg, headers)
Guido van Rossum29aab751999-03-09 19:31:21 +0000318 else:
319 return self.http_error(url, fp, errcode, errmsg, headers, data)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000320
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000321 def http_error(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000322 """Handle http errors.
323 Derived class can override this, or provide specific handlers
324 named http_error_DDD where DDD is the 3-digit error code."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000325 # First check if there's a specific handler for this error
326 name = 'http_error_%d' % errcode
327 if hasattr(self, name):
328 method = getattr(self, name)
329 if data is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000330 result = method(url, fp, errcode, errmsg, headers)
Jeremy Hyltonb30f52a1999-02-25 16:14:58 +0000331 else:
332 result = method(url, fp, errcode, errmsg, headers, data)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000333 if result: return result
Jeremy Hyltonb30f52a1999-02-25 16:14:58 +0000334 return self.http_error_default(url, fp, errcode, errmsg, headers)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000335
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000336 def http_error_default(self, url, fp, errcode, errmsg, headers):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000337 """Default error handler: close the connection and raise IOError."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000338 void = fp.read()
339 fp.close()
340 raise IOError, ('http error', errcode, errmsg, headers)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000341
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000342 if hasattr(socket, "ssl"):
Andrew M. Kuchling141e9892000-04-23 02:53:11 +0000343 def open_https(self, url, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000344 """Use HTTPS protocol."""
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000345 import httplib
Fred Drake567ca8e2000-08-21 21:42:42 +0000346 user_passwd = None
Walter Dörwald65230a22002-06-03 15:58:32 +0000347 if isinstance(url, str):
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000348 host, selector = splithost(url)
Fred Drake567ca8e2000-08-21 21:42:42 +0000349 if host:
350 user_passwd, host = splituser(host)
351 host = unquote(host)
352 realhost = host
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000353 else:
354 host, selector = url
355 urltype, rest = splittype(selector)
Fred Drake567ca8e2000-08-21 21:42:42 +0000356 url = rest
357 user_passwd = None
Guido van Rossumb2493f82000-12-15 15:01:37 +0000358 if urltype.lower() != 'https':
Fred Drake567ca8e2000-08-21 21:42:42 +0000359 realhost = None
360 else:
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000361 realhost, rest = splithost(rest)
Fred Drake567ca8e2000-08-21 21:42:42 +0000362 if realhost:
363 user_passwd, realhost = splituser(realhost)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000364 if user_passwd:
365 selector = "%s://%s%s" % (urltype, realhost, rest)
Andrew M. Kuchling7ad47922000-06-10 01:41:48 +0000366 #print "proxy via https:", host, selector
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000367 if not host: raise IOError, ('https error', 'no host given')
368 if user_passwd:
369 import base64
Guido van Rossumb2493f82000-12-15 15:01:37 +0000370 auth = base64.encodestring(user_passwd).strip()
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000371 else:
372 auth = None
373 h = httplib.HTTPS(host, 0,
374 key_file=self.key_file,
375 cert_file=self.cert_file)
Andrew M. Kuchling141e9892000-04-23 02:53:11 +0000376 if data is not None:
377 h.putrequest('POST', selector)
378 h.putheader('Content-type',
379 'application/x-www-form-urlencoded')
380 h.putheader('Content-length', '%d' % len(data))
381 else:
382 h.putrequest('GET', selector)
Andrew M. Kuchlingff638ea2003-08-29 18:12:23 +0000383 if auth: h.putheader('Authorization', 'Basic %s' % auth)
Fred Drake567ca8e2000-08-21 21:42:42 +0000384 if realhost: h.putheader('Host', realhost)
Guido van Rossum68468eb2003-02-27 20:14:51 +0000385 for args in self.addheaders: h.putheader(*args)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000386 h.endheaders()
Andrew M. Kuchling43c5af02000-04-24 14:17:06 +0000387 if data is not None:
Fred Drakeec3dfde2001-07-04 05:18:29 +0000388 h.send(data)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000389 errcode, errmsg, headers = h.getreply()
390 fp = h.getfile()
391 if errcode == 200:
Guido van Rossumb931bf32001-12-08 17:09:07 +0000392 return addinfourl(fp, headers, "https:" + url)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000393 else:
Fred Drake567ca8e2000-08-21 21:42:42 +0000394 if data is None:
395 return self.http_error(url, fp, errcode, errmsg, headers)
396 else:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000397 return self.http_error(url, fp, errcode, errmsg, headers,
398 data)
Fred Drake567ca8e2000-08-21 21:42:42 +0000399
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000400 def open_gopher(self, url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000401 """Use Gopher protocol."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000402 import gopherlib
403 host, selector = splithost(url)
404 if not host: raise IOError, ('gopher error', 'no host given')
405 host = unquote(host)
406 type, selector = splitgophertype(selector)
407 selector, query = splitquery(selector)
408 selector = unquote(selector)
409 if query:
410 query = unquote(query)
411 fp = gopherlib.send_query(selector, query, host)
412 else:
413 fp = gopherlib.send_selector(selector, host)
414 return addinfourl(fp, noheaders(), "gopher:" + url)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000415
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000416 def open_file(self, url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000417 """Use local file or FTP depending on form of URL."""
Jack Jansen4ef11032002-09-12 20:14:04 +0000418 if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000419 return self.open_ftp(url)
420 else:
421 return self.open_local_file(url)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000422
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000423 def open_local_file(self, url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000424 """Use local file."""
Raymond Hettingera6172712004-12-31 19:15:26 +0000425 import mimetypes, mimetools, email.Utils
426 try:
427 from cStringIO import StringIO
428 except ImportError:
429 from StringIO import StringIO
Guido van Rossumf0713d32001-08-09 17:43:35 +0000430 host, file = splithost(url)
431 localname = url2pathname(file)
Guido van Rossuma2da3052002-04-15 00:25:01 +0000432 try:
433 stats = os.stat(localname)
434 except OSError, e:
435 raise IOError(e.errno, e.strerror, e.filename)
Walter Dörwald92b48b72002-03-22 17:30:38 +0000436 size = stats.st_size
Anthony Baxter3dd9e462004-10-11 13:53:08 +0000437 modified = email.Utils.formatdate(stats.st_mtime, usegmt=True)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000438 mtype = mimetypes.guess_type(url)[0]
Raymond Hettingera6172712004-12-31 19:15:26 +0000439 headers = mimetools.Message(StringIO(
Guido van Rossumf0713d32001-08-09 17:43:35 +0000440 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
441 (mtype or 'text/plain', size, modified)))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000442 if not host:
Guido van Rossum336a2011999-06-24 15:27:36 +0000443 urlfile = file
444 if file[:1] == '/':
445 urlfile = 'file://' + file
Guido van Rossumf0713d32001-08-09 17:43:35 +0000446 return addinfourl(open(localname, 'rb'),
Guido van Rossum336a2011999-06-24 15:27:36 +0000447 headers, urlfile)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000448 host, port = splitport(host)
449 if not port \
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000450 and socket.gethostbyname(host) in (localhost(), thishost()):
Guido van Rossum336a2011999-06-24 15:27:36 +0000451 urlfile = file
452 if file[:1] == '/':
453 urlfile = 'file://' + file
Guido van Rossumf0713d32001-08-09 17:43:35 +0000454 return addinfourl(open(localname, 'rb'),
Guido van Rossum336a2011999-06-24 15:27:36 +0000455 headers, urlfile)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000456 raise IOError, ('local file error', 'not on local host')
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000457
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000458 def open_ftp(self, url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000459 """Use FTP protocol."""
Raymond Hettingera6172712004-12-31 19:15:26 +0000460 import mimetypes, mimetools
461 try:
462 from cStringIO import StringIO
463 except ImportError:
464 from StringIO import StringIO
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000465 host, path = splithost(url)
466 if not host: raise IOError, ('ftp error', 'no host given')
467 host, port = splitport(host)
468 user, host = splituser(host)
469 if user: user, passwd = splitpasswd(user)
470 else: passwd = None
471 host = unquote(host)
472 user = unquote(user or '')
473 passwd = unquote(passwd or '')
474 host = socket.gethostbyname(host)
475 if not port:
476 import ftplib
477 port = ftplib.FTP_PORT
478 else:
479 port = int(port)
480 path, attrs = splitattr(path)
481 path = unquote(path)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000482 dirs = path.split('/')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000483 dirs, file = dirs[:-1], dirs[-1]
484 if dirs and not dirs[0]: dirs = dirs[1:]
Guido van Rossum5e006a31999-08-18 17:40:33 +0000485 if dirs and not dirs[0]: dirs[0] = '/'
Guido van Rossumb2493f82000-12-15 15:01:37 +0000486 key = user, host, port, '/'.join(dirs)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000487 # XXX thread unsafe!
488 if len(self.ftpcache) > MAXFTPCACHE:
489 # Prune the cache, rather arbitrarily
490 for k in self.ftpcache.keys():
491 if k != key:
492 v = self.ftpcache[k]
493 del self.ftpcache[k]
494 v.close()
495 try:
Raymond Hettinger54f02222002-06-01 14:18:47 +0000496 if not key in self.ftpcache:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000497 self.ftpcache[key] = \
498 ftpwrapper(user, passwd, host, port, dirs)
499 if not file: type = 'D'
500 else: type = 'I'
501 for attr in attrs:
502 attr, value = splitvalue(attr)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000503 if attr.lower() == 'type' and \
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000504 value in ('a', 'A', 'i', 'I', 'd', 'D'):
Guido van Rossumb2493f82000-12-15 15:01:37 +0000505 type = value.upper()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000506 (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
Guido van Rossum88e0b5b2001-08-23 13:38:15 +0000507 mtype = mimetypes.guess_type("ftp:" + url)[0]
508 headers = ""
509 if mtype:
510 headers += "Content-Type: %s\n" % mtype
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000511 if retrlen is not None and retrlen >= 0:
Guido van Rossum88e0b5b2001-08-23 13:38:15 +0000512 headers += "Content-Length: %d\n" % retrlen
Raymond Hettingera6172712004-12-31 19:15:26 +0000513 headers = mimetools.Message(StringIO(headers))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000514 return addinfourl(fp, headers, "ftp:" + url)
515 except ftperrors(), msg:
516 raise IOError, ('ftp error', msg), sys.exc_info()[2]
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000517
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000518 def open_data(self, url, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000519 """Use "data" URL."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000520 # ignore POSTed data
521 #
522 # syntax of data URLs:
523 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
524 # mediatype := [ type "/" subtype ] *( ";" parameter )
525 # data := *urlchar
526 # parameter := attribute "=" value
Raymond Hettingera6172712004-12-31 19:15:26 +0000527 import mimetools
528 try:
529 from cStringIO import StringIO
530 except ImportError:
531 from StringIO import StringIO
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000532 try:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000533 [type, data] = url.split(',', 1)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000534 except ValueError:
535 raise IOError, ('data error', 'bad data URL')
536 if not type:
537 type = 'text/plain;charset=US-ASCII'
Guido van Rossumb2493f82000-12-15 15:01:37 +0000538 semi = type.rfind(';')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000539 if semi >= 0 and '=' not in type[semi:]:
540 encoding = type[semi+1:]
541 type = type[:semi]
542 else:
543 encoding = ''
544 msg = []
545 msg.append('Date: %s'%time.strftime('%a, %d %b %Y %T GMT',
546 time.gmtime(time.time())))
547 msg.append('Content-type: %s' % type)
548 if encoding == 'base64':
549 import base64
550 data = base64.decodestring(data)
551 else:
552 data = unquote(data)
553 msg.append('Content-length: %d' % len(data))
554 msg.append('')
555 msg.append(data)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000556 msg = '\n'.join(msg)
Raymond Hettingera6172712004-12-31 19:15:26 +0000557 f = StringIO(msg)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000558 headers = mimetools.Message(f, 0)
Georg Brandl1f663572005-11-26 16:50:44 +0000559 #f.fileno = None # needed for addinfourl
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000560 return addinfourl(f, headers, url)
Guido van Rossum6d4d1c21998-03-12 14:32:55 +0000561
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000562
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000563class FancyURLopener(URLopener):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000564 """Derived class with handlers for errors we can handle (perhaps)."""
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000565
Neal Norwitz60e04cd2002-06-11 13:38:51 +0000566 def __init__(self, *args, **kwargs):
Guido van Rossum68468eb2003-02-27 20:14:51 +0000567 URLopener.__init__(self, *args, **kwargs)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000568 self.auth_cache = {}
Skip Montanaroc3e11d62001-02-15 16:56:36 +0000569 self.tries = 0
570 self.maxtries = 10
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000571
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000572 def http_error_default(self, url, fp, errcode, errmsg, headers):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000573 """Default error handling -- don't raise an exception."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000574 return addinfourl(fp, headers, "http:" + url)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000575
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000576 def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000577 """Error 302 -- relocated (temporarily)."""
Skip Montanaroc3e11d62001-02-15 16:56:36 +0000578 self.tries += 1
579 if self.maxtries and self.tries >= self.maxtries:
580 if hasattr(self, "http_error_500"):
581 meth = self.http_error_500
582 else:
583 meth = self.http_error_default
584 self.tries = 0
585 return meth(url, fp, 500,
586 "Internal Server Error: Redirect Recursion", headers)
587 result = self.redirect_internal(url, fp, errcode, errmsg, headers,
588 data)
589 self.tries = 0
590 return result
591
592 def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
Raymond Hettinger54f02222002-06-01 14:18:47 +0000593 if 'location' in headers:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000594 newurl = headers['location']
Raymond Hettinger54f02222002-06-01 14:18:47 +0000595 elif 'uri' in headers:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000596 newurl = headers['uri']
597 else:
598 return
599 void = fp.read()
600 fp.close()
Guido van Rossum3527f591999-03-29 20:23:41 +0000601 # In case the server sent a relative URL, join with original:
Moshe Zadka5d87d472001-04-09 14:54:21 +0000602 newurl = basejoin(self.type + ":" + url, newurl)
Guido van Rossumfa19f7c2003-05-16 01:46:51 +0000603 return self.open(newurl)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000604
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000605 def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000606 """Error 301 -- also relocated (permanently)."""
607 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
Guido van Rossume6ad8911996-09-10 17:02:56 +0000608
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000609 def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
610 """Error 303 -- also relocated (essentially identical to 302)."""
611 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
612
Guido van Rossumfa19f7c2003-05-16 01:46:51 +0000613 def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
614 """Error 307 -- relocated, but turn POST into error."""
615 if data is None:
616 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
617 else:
618 return self.http_error_default(url, fp, errcode, errmsg, headers)
619
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000620 def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000621 """Error 401 -- authentication required.
622 See this URL for a description of the basic authentication scheme:
623 http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt"""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000624 if not 'www-authenticate' in headers:
Tim Peters85ba6732001-02-28 08:26:44 +0000625 URLopener.http_error_default(self, url, fp,
Fred Drakec680ae82001-10-13 18:37:07 +0000626 errcode, errmsg, headers)
Moshe Zadkae99bd172001-02-27 06:27:04 +0000627 stuff = headers['www-authenticate']
628 import re
629 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
630 if not match:
Tim Peters85ba6732001-02-28 08:26:44 +0000631 URLopener.http_error_default(self, url, fp,
Moshe Zadkae99bd172001-02-27 06:27:04 +0000632 errcode, errmsg, headers)
633 scheme, realm = match.groups()
634 if scheme.lower() != 'basic':
Tim Peters85ba6732001-02-28 08:26:44 +0000635 URLopener.http_error_default(self, url, fp,
Moshe Zadkae99bd172001-02-27 06:27:04 +0000636 errcode, errmsg, headers)
637 name = 'retry_' + self.type + '_basic_auth'
638 if data is None:
639 return getattr(self,name)(url, realm)
640 else:
641 return getattr(self,name)(url, realm, data)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000642
Guido van Rossum3c8baed2000-02-01 23:36:55 +0000643 def retry_http_basic_auth(self, url, realm, data=None):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000644 host, selector = splithost(url)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000645 i = host.find('@') + 1
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000646 host = host[i:]
647 user, passwd = self.get_user_passwd(host, realm, i)
648 if not (user or passwd): return None
Guido van Rossumafc4f042001-01-15 18:31:13 +0000649 host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000650 newurl = 'http://' + host + selector
Guido van Rossum3c8baed2000-02-01 23:36:55 +0000651 if data is None:
652 return self.open(newurl)
653 else:
654 return self.open(newurl, data)
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000655
Guido van Rossum3c8baed2000-02-01 23:36:55 +0000656 def retry_https_basic_auth(self, url, realm, data=None):
Tim Peterse1190062001-01-15 03:34:38 +0000657 host, selector = splithost(url)
658 i = host.find('@') + 1
659 host = host[i:]
660 user, passwd = self.get_user_passwd(host, realm, i)
661 if not (user or passwd): return None
Guido van Rossumafc4f042001-01-15 18:31:13 +0000662 host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
Tim Peterse1190062001-01-15 03:34:38 +0000663 newurl = '//' + host + selector
Guido van Rossumafc4f042001-01-15 18:31:13 +0000664 return self.open_https(newurl, data)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000665
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000666 def get_user_passwd(self, host, realm, clear_cache = 0):
Guido van Rossumb2493f82000-12-15 15:01:37 +0000667 key = realm + '@' + host.lower()
Raymond Hettinger54f02222002-06-01 14:18:47 +0000668 if key in self.auth_cache:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000669 if clear_cache:
670 del self.auth_cache[key]
671 else:
672 return self.auth_cache[key]
673 user, passwd = self.prompt_user_passwd(host, realm)
674 if user or passwd: self.auth_cache[key] = (user, passwd)
675 return user, passwd
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000676
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000677 def prompt_user_passwd(self, host, realm):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000678 """Override this in a GUI environment!"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000679 import getpass
680 try:
681 user = raw_input("Enter username for %s at %s: " % (realm,
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000682 host))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000683 passwd = getpass.getpass("Enter password for %s in %s at %s: " %
684 (user, realm, host))
685 return user, passwd
686 except KeyboardInterrupt:
687 print
688 return None, None
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000689
690
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000691# Utility functions
692
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000693_localhost = None
694def localhost():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000695 """Return the IP address of the magic hostname 'localhost'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000696 global _localhost
Raymond Hettinger10ff7062002-06-02 03:04:52 +0000697 if _localhost is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000698 _localhost = socket.gethostbyname('localhost')
699 return _localhost
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000700
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000701_thishost = None
702def thishost():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000703 """Return the IP address of the current host."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000704 global _thishost
Raymond Hettinger10ff7062002-06-02 03:04:52 +0000705 if _thishost is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000706 _thishost = socket.gethostbyname(socket.gethostname())
707 return _thishost
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000708
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000709_ftperrors = None
710def ftperrors():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000711 """Return the set of errors raised by the FTP class."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000712 global _ftperrors
Raymond Hettinger10ff7062002-06-02 03:04:52 +0000713 if _ftperrors is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000714 import ftplib
715 _ftperrors = ftplib.all_errors
716 return _ftperrors
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000717
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000718_noheaders = None
719def noheaders():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000720 """Return an empty mimetools.Message object."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000721 global _noheaders
Raymond Hettinger10ff7062002-06-02 03:04:52 +0000722 if _noheaders is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000723 import mimetools
Raymond Hettingera6172712004-12-31 19:15:26 +0000724 try:
725 from cStringIO import StringIO
726 except ImportError:
727 from StringIO import StringIO
728 _noheaders = mimetools.Message(StringIO(), 0)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000729 _noheaders.fp.close() # Recycle file descriptor
730 return _noheaders
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000731
732
733# Utility classes
734
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000735class ftpwrapper:
Guido van Rossume7b146f2000-02-04 15:28:42 +0000736 """Class used by open_ftp() for cache of open FTP connections."""
737
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000738 def __init__(self, user, passwd, host, port, dirs):
739 self.user = user
740 self.passwd = passwd
741 self.host = host
742 self.port = port
743 self.dirs = dirs
744 self.init()
Guido van Rossume7b146f2000-02-04 15:28:42 +0000745
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000746 def init(self):
747 import ftplib
748 self.busy = 0
749 self.ftp = ftplib.FTP()
750 self.ftp.connect(self.host, self.port)
751 self.ftp.login(self.user, self.passwd)
752 for dir in self.dirs:
753 self.ftp.cwd(dir)
Guido van Rossume7b146f2000-02-04 15:28:42 +0000754
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000755 def retrfile(self, file, type):
756 import ftplib
757 self.endtransfer()
758 if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
759 else: cmd = 'TYPE ' + type; isdir = 0
760 try:
761 self.ftp.voidcmd(cmd)
762 except ftplib.all_errors:
763 self.init()
764 self.ftp.voidcmd(cmd)
765 conn = None
766 if file and not isdir:
767 # Use nlst to see if the file exists at all
768 try:
769 self.ftp.nlst(file)
770 except ftplib.error_perm, reason:
771 raise IOError, ('ftp error', reason), sys.exc_info()[2]
772 # Restore the transfer mode!
773 self.ftp.voidcmd(cmd)
774 # Try to retrieve as a file
775 try:
776 cmd = 'RETR ' + file
777 conn = self.ftp.ntransfercmd(cmd)
778 except ftplib.error_perm, reason:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000779 if str(reason)[:3] != '550':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000780 raise IOError, ('ftp error', reason), sys.exc_info()[2]
781 if not conn:
782 # Set transfer mode to ASCII!
783 self.ftp.voidcmd('TYPE A')
784 # Try a directory listing
785 if file: cmd = 'LIST ' + file
786 else: cmd = 'LIST'
787 conn = self.ftp.ntransfercmd(cmd)
788 self.busy = 1
789 # Pass back both a suitably decorated object and a retrieval length
790 return (addclosehook(conn[0].makefile('rb'),
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000791 self.endtransfer), conn[1])
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000792 def endtransfer(self):
793 if not self.busy:
794 return
795 self.busy = 0
796 try:
797 self.ftp.voidresp()
798 except ftperrors():
799 pass
Guido van Rossume7b146f2000-02-04 15:28:42 +0000800
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000801 def close(self):
802 self.endtransfer()
803 try:
804 self.ftp.close()
805 except ftperrors():
806 pass
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000807
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000808class addbase:
Guido van Rossume7b146f2000-02-04 15:28:42 +0000809 """Base class for addinfo and addclosehook."""
810
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000811 def __init__(self, fp):
812 self.fp = fp
813 self.read = self.fp.read
814 self.readline = self.fp.readline
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000815 if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines
Georg Brandl1f663572005-11-26 16:50:44 +0000816 if hasattr(self.fp, "fileno"):
817 self.fileno = self.fp.fileno
818 else:
819 self.fileno = lambda: None
Raymond Hettinger42182eb2003-03-09 05:33:33 +0000820 if hasattr(self.fp, "__iter__"):
821 self.__iter__ = self.fp.__iter__
822 if hasattr(self.fp, "next"):
823 self.next = self.fp.next
Guido van Rossume7b146f2000-02-04 15:28:42 +0000824
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000825 def __repr__(self):
Walter Dörwald70a6b492004-02-12 17:35:32 +0000826 return '<%s at %r whose fp = %r>' % (self.__class__.__name__,
827 id(self), self.fp)
Guido van Rossume7b146f2000-02-04 15:28:42 +0000828
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000829 def close(self):
830 self.read = None
831 self.readline = None
832 self.readlines = None
833 self.fileno = None
834 if self.fp: self.fp.close()
835 self.fp = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000836
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000837class addclosehook(addbase):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000838 """Class to add a close hook to an open file."""
839
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000840 def __init__(self, fp, closehook, *hookargs):
841 addbase.__init__(self, fp)
842 self.closehook = closehook
843 self.hookargs = hookargs
Guido van Rossume7b146f2000-02-04 15:28:42 +0000844
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000845 def close(self):
Guido van Rossumc580dae2000-05-24 13:21:46 +0000846 addbase.close(self)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000847 if self.closehook:
Guido van Rossum68468eb2003-02-27 20:14:51 +0000848 self.closehook(*self.hookargs)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000849 self.closehook = None
850 self.hookargs = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000851
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000852class addinfo(addbase):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000853 """class to add an info() method to an open file."""
854
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000855 def __init__(self, fp, headers):
856 addbase.__init__(self, fp)
857 self.headers = headers
Guido van Rossume7b146f2000-02-04 15:28:42 +0000858
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000859 def info(self):
860 return self.headers
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000861
Guido van Rossume6ad8911996-09-10 17:02:56 +0000862class addinfourl(addbase):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000863 """class to add info() and geturl() methods to an open file."""
864
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000865 def __init__(self, fp, headers, url):
866 addbase.__init__(self, fp)
867 self.headers = headers
868 self.url = url
Guido van Rossume7b146f2000-02-04 15:28:42 +0000869
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000870 def info(self):
871 return self.headers
Guido van Rossume7b146f2000-02-04 15:28:42 +0000872
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000873 def geturl(self):
874 return self.url
Guido van Rossume6ad8911996-09-10 17:02:56 +0000875
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000876
Guido van Rossum7c395db1994-07-04 22:14:49 +0000877# Utilities to parse URLs (most of these return None for missing parts):
Sjoerd Mullendere0371b81995-11-10 10:36:07 +0000878# unwrap('<URL:type://host/path>') --> 'type://host/path'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000879# splittype('type:opaquestring') --> 'type', 'opaquestring'
880# splithost('//host[:port]/path') --> 'host[:port]', '/path'
Guido van Rossum7c395db1994-07-04 22:14:49 +0000881# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'
882# splitpasswd('user:passwd') -> 'user', 'passwd'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000883# splitport('host:port') --> 'host', 'port'
884# splitquery('/path?query') --> '/path', 'query'
885# splittag('/path#tag') --> '/path', 'tag'
Guido van Rossum7c395db1994-07-04 22:14:49 +0000886# splitattr('/path;attr1=value1;attr2=value2;...') ->
887# '/path', ['attr1=value1', 'attr2=value2', ...]
888# splitvalue('attr=value') --> 'attr', 'value'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000889# splitgophertype('/Xselector') --> 'X', 'selector'
890# unquote('abc%20def') -> 'abc def'
891# quote('abc def') -> 'abc%20def')
892
Walter Dörwald65230a22002-06-03 15:58:32 +0000893try:
894 unicode
895except NameError:
Guido van Rossum4b46c0a2002-05-24 17:58:05 +0000896 def _is_unicode(x):
897 return 0
Walter Dörwald65230a22002-06-03 15:58:32 +0000898else:
899 def _is_unicode(x):
900 return isinstance(x, unicode)
Guido van Rossum4b46c0a2002-05-24 17:58:05 +0000901
Martin v. Löwis1d994332000-12-03 18:30:10 +0000902def toBytes(url):
903 """toBytes(u"URL") --> 'URL'."""
904 # Most URL schemes require ASCII. If that changes, the conversion
905 # can be relaxed
Guido van Rossum4b46c0a2002-05-24 17:58:05 +0000906 if _is_unicode(url):
Martin v. Löwis1d994332000-12-03 18:30:10 +0000907 try:
908 url = url.encode("ASCII")
909 except UnicodeError:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000910 raise UnicodeError("URL " + repr(url) +
911 " contains non-ASCII characters")
Martin v. Löwis1d994332000-12-03 18:30:10 +0000912 return url
913
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000914def unwrap(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000915 """unwrap('<URL:type://host/path>') --> 'type://host/path'."""
Guido van Rossumb2493f82000-12-15 15:01:37 +0000916 url = url.strip()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000917 if url[:1] == '<' and url[-1:] == '>':
Guido van Rossumb2493f82000-12-15 15:01:37 +0000918 url = url[1:-1].strip()
919 if url[:4] == 'URL:': url = url[4:].strip()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000920 return url
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000921
Guido van Rossum332e1441997-09-29 23:23:46 +0000922_typeprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000923def splittype(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000924 """splittype('type:opaquestring') --> 'type', 'opaquestring'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000925 global _typeprog
926 if _typeprog is None:
927 import re
928 _typeprog = re.compile('^([^/:]+):')
Guido van Rossum332e1441997-09-29 23:23:46 +0000929
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000930 match = _typeprog.match(url)
931 if match:
932 scheme = match.group(1)
Fred Drake9e94afd2000-07-01 07:03:30 +0000933 return scheme.lower(), url[len(scheme) + 1:]
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000934 return None, url
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000935
Guido van Rossum332e1441997-09-29 23:23:46 +0000936_hostprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000937def splithost(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000938 """splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000939 global _hostprog
940 if _hostprog is None:
941 import re
Guido van Rossum3427c1f1999-07-01 23:20:56 +0000942 _hostprog = re.compile('^//([^/]*)(.*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +0000943
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000944 match = _hostprog.match(url)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000945 if match: return match.group(1, 2)
946 return None, url
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000947
Guido van Rossum332e1441997-09-29 23:23:46 +0000948_userprog = None
Guido van Rossum7c395db1994-07-04 22:14:49 +0000949def splituser(host):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000950 """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000951 global _userprog
952 if _userprog is None:
953 import re
Raymond Hettingerf2e45dd2002-08-18 20:08:56 +0000954 _userprog = re.compile('^(.*)@(.*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +0000955
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000956 match = _userprog.match(host)
Fred Drake567ca8e2000-08-21 21:42:42 +0000957 if match: return map(unquote, match.group(1, 2))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000958 return None, host
Guido van Rossum7c395db1994-07-04 22:14:49 +0000959
Guido van Rossum332e1441997-09-29 23:23:46 +0000960_passwdprog = None
Guido van Rossum7c395db1994-07-04 22:14:49 +0000961def splitpasswd(user):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000962 """splitpasswd('user:passwd') -> 'user', 'passwd'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000963 global _passwdprog
964 if _passwdprog is None:
965 import re
966 _passwdprog = re.compile('^([^:]*):(.*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +0000967
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000968 match = _passwdprog.match(user)
969 if match: return match.group(1, 2)
970 return user, None
Guido van Rossum7c395db1994-07-04 22:14:49 +0000971
Guido van Rossume7b146f2000-02-04 15:28:42 +0000972# splittag('/path#tag') --> '/path', 'tag'
Guido van Rossum332e1441997-09-29 23:23:46 +0000973_portprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000974def splitport(host):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000975 """splitport('host:port') --> 'host', 'port'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000976 global _portprog
977 if _portprog is None:
978 import re
979 _portprog = re.compile('^(.*):([0-9]+)$')
Guido van Rossum332e1441997-09-29 23:23:46 +0000980
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000981 match = _portprog.match(host)
982 if match: return match.group(1, 2)
983 return host, None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000984
Guido van Rossum332e1441997-09-29 23:23:46 +0000985_nportprog = None
Guido van Rossum53725a21996-06-13 19:12:35 +0000986def splitnport(host, defport=-1):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000987 """Split host and port, returning numeric port.
988 Return given default port if no ':' found; defaults to -1.
989 Return numerical port if a valid number are found after ':'.
990 Return None if ':' but not a valid number."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000991 global _nportprog
992 if _nportprog is None:
993 import re
994 _nportprog = re.compile('^(.*):(.*)$')
Guido van Rossum7e7ca0b1998-03-26 21:01:39 +0000995
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000996 match = _nportprog.match(host)
997 if match:
998 host, port = match.group(1, 2)
999 try:
Guido van Rossumb2493f82000-12-15 15:01:37 +00001000 if not port: raise ValueError, "no digits"
1001 nport = int(port)
1002 except ValueError:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001003 nport = None
1004 return host, nport
1005 return host, defport
Guido van Rossum53725a21996-06-13 19:12:35 +00001006
Guido van Rossum332e1441997-09-29 23:23:46 +00001007_queryprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001008def splitquery(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001009 """splitquery('/path?query') --> '/path', 'query'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001010 global _queryprog
1011 if _queryprog is None:
1012 import re
1013 _queryprog = re.compile('^(.*)\?([^?]*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +00001014
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001015 match = _queryprog.match(url)
1016 if match: return match.group(1, 2)
1017 return url, None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001018
Guido van Rossum332e1441997-09-29 23:23:46 +00001019_tagprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001020def splittag(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001021 """splittag('/path#tag') --> '/path', 'tag'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001022 global _tagprog
1023 if _tagprog is None:
1024 import re
1025 _tagprog = re.compile('^(.*)#([^#]*)$')
Guido van Rossum7e7ca0b1998-03-26 21:01:39 +00001026
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001027 match = _tagprog.match(url)
1028 if match: return match.group(1, 2)
1029 return url, None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001030
Guido van Rossum7c395db1994-07-04 22:14:49 +00001031def splitattr(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001032 """splitattr('/path;attr1=value1;attr2=value2;...') ->
1033 '/path', ['attr1=value1', 'attr2=value2', ...]."""
Guido van Rossumb2493f82000-12-15 15:01:37 +00001034 words = url.split(';')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001035 return words[0], words[1:]
Guido van Rossum7c395db1994-07-04 22:14:49 +00001036
Guido van Rossum332e1441997-09-29 23:23:46 +00001037_valueprog = None
Guido van Rossum7c395db1994-07-04 22:14:49 +00001038def splitvalue(attr):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001039 """splitvalue('attr=value') --> 'attr', 'value'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001040 global _valueprog
1041 if _valueprog is None:
1042 import re
1043 _valueprog = re.compile('^([^=]*)=(.*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +00001044
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001045 match = _valueprog.match(attr)
1046 if match: return match.group(1, 2)
1047 return attr, None
Guido van Rossum7c395db1994-07-04 22:14:49 +00001048
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001049def splitgophertype(selector):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001050 """splitgophertype('/Xselector') --> 'X', 'selector'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001051 if selector[:1] == '/' and selector[1:2]:
1052 return selector[1], selector[2:]
1053 return None, selector
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001054
Raymond Hettinger803ce802005-09-10 06:49:04 +00001055_hextochr = dict(('%02x' % i, chr(i)) for i in range(256))
1056_hextochr.update(('%02X' % i, chr(i)) for i in range(256))
1057
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001058def unquote(s):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001059 """unquote('abc%20def') -> 'abc def'."""
Raymond Hettinger803ce802005-09-10 06:49:04 +00001060 res = s.split('%')
1061 for i in xrange(1, len(res)):
1062 item = res[i]
1063 try:
1064 res[i] = _hextochr[item[:2]] + item[2:]
1065 except KeyError:
1066 res[i] = '%' + item
Raymond Hettinger4b0f20d2005-10-15 16:41:53 +00001067 except UnicodeDecodeError:
1068 res[i] = unichr(int(item[:2], 16)) + item[2:]
Guido van Rossumb2493f82000-12-15 15:01:37 +00001069 return "".join(res)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001070
Guido van Rossum0564e121996-12-13 14:47:36 +00001071def unquote_plus(s):
Skip Montanaro79f1c172000-08-22 03:00:52 +00001072 """unquote('%7e/abc+def') -> '~/abc def'"""
Brett Cannonaaeffaf2004-03-23 23:50:17 +00001073 s = s.replace('+', ' ')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001074 return unquote(s)
Guido van Rossum0564e121996-12-13 14:47:36 +00001075
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001076always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
Jeremy Hylton6102e292000-08-31 15:48:10 +00001077 'abcdefghijklmnopqrstuvwxyz'
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001078 '0123456789' '_.-')
Raymond Hettinger199d2f72005-09-09 22:27:13 +00001079_safemaps = {}
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001080
Guido van Rossum7c395db1994-07-04 22:14:49 +00001081def quote(s, safe = '/'):
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001082 """quote('abc def') -> 'abc%20def'
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001083
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001084 Each part of a URL, e.g. the path info, the query, etc., has a
1085 different set of reserved characters that must be quoted.
1086
1087 RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
1088 the following reserved characters.
1089
1090 reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
1091 "$" | ","
1092
1093 Each of these characters is reserved in some component of a URL,
1094 but not necessarily in all of them.
1095
1096 By default, the quote function is intended for quoting the path
1097 section of a URL. Thus, it will not encode '/'. This character
1098 is reserved, but in typical usage the quote function is being
1099 called on a path where the existing slash characters are used as
1100 reserved characters.
1101 """
Raymond Hettinger199d2f72005-09-09 22:27:13 +00001102 cachekey = (safe, always_safe)
1103 try:
1104 safe_map = _safemaps[cachekey]
1105 except KeyError:
1106 safe += always_safe
1107 safe_map = {}
1108 for i in range(256):
1109 c = chr(i)
1110 safe_map[c] = (c in safe) and c or ('%%%02X' % i)
1111 _safemaps[cachekey] = safe_map
1112 res = map(safe_map.__getitem__, s)
Guido van Rossumb2493f82000-12-15 15:01:37 +00001113 return ''.join(res)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001114
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001115def quote_plus(s, safe = ''):
1116 """Quote the query fragment of a URL; replacing ' ' with '+'"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001117 if ' ' in s:
Raymond Hettingercf6b6322005-09-10 18:17:54 +00001118 s = quote(s, safe + ' ')
1119 return s.replace(' ', '+')
1120 return quote(s, safe)
Guido van Rossum0564e121996-12-13 14:47:36 +00001121
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001122def urlencode(query,doseq=0):
1123 """Encode a sequence of two-element tuples or dictionary into a URL query string.
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001124
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001125 If any values in the query arg are sequences and doseq is true, each
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001126 sequence element is converted to a separate parameter.
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001127
1128 If the query arg is a sequence of two-element tuples, the order of the
1129 parameters in the output will match the order of parameters in the
1130 input.
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001131 """
Tim Peters658cba62001-02-09 20:06:00 +00001132
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001133 if hasattr(query,"items"):
1134 # mapping objects
1135 query = query.items()
1136 else:
1137 # it's a bother at times that strings and string-like objects are
1138 # sequences...
1139 try:
1140 # non-sequence items should not work with len()
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001141 # non-empty strings will fail this
Walter Dörwald65230a22002-06-03 15:58:32 +00001142 if len(query) and not isinstance(query[0], tuple):
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001143 raise TypeError
1144 # zero-length sequences of all types will get here and succeed,
1145 # but that's a minor nit - since the original implementation
1146 # allowed empty dicts that type of behavior probably should be
1147 # preserved for consistency
1148 except TypeError:
1149 ty,va,tb = sys.exc_info()
1150 raise TypeError, "not a valid non-string sequence or mapping object", tb
1151
Guido van Rossume7b146f2000-02-04 15:28:42 +00001152 l = []
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001153 if not doseq:
1154 # preserve old behavior
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001155 for k, v in query:
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001156 k = quote_plus(str(k))
1157 v = quote_plus(str(v))
1158 l.append(k + '=' + v)
1159 else:
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001160 for k, v in query:
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001161 k = quote_plus(str(k))
Walter Dörwald65230a22002-06-03 15:58:32 +00001162 if isinstance(v, str):
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001163 v = quote_plus(v)
1164 l.append(k + '=' + v)
Guido van Rossum4b46c0a2002-05-24 17:58:05 +00001165 elif _is_unicode(v):
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001166 # is there a reasonable way to convert to ASCII?
1167 # encode generates a string, but "replace" or "ignore"
1168 # lose information and "strict" can raise UnicodeError
1169 v = quote_plus(v.encode("ASCII","replace"))
1170 l.append(k + '=' + v)
1171 else:
1172 try:
1173 # is this a sufficient test for sequence-ness?
1174 x = len(v)
1175 except TypeError:
1176 # not a sequence
1177 v = quote_plus(str(v))
1178 l.append(k + '=' + v)
1179 else:
1180 # loop over the sequence
1181 for elt in v:
1182 l.append(k + '=' + quote_plus(str(elt)))
Guido van Rossumb2493f82000-12-15 15:01:37 +00001183 return '&'.join(l)
Guido van Rossum810a3391998-07-22 21:33:23 +00001184
Guido van Rossum442e7201996-03-20 15:33:11 +00001185# Proxy handling
Mark Hammond4f570b92000-07-26 07:04:38 +00001186def getproxies_environment():
1187 """Return a dictionary of scheme -> proxy server URL mappings.
1188
1189 Scan the environment for variables named <scheme>_proxy;
1190 this seems to be the standard convention. If you need a
1191 different way, you can pass a proxies dictionary to the
1192 [Fancy]URLopener constructor.
1193
1194 """
1195 proxies = {}
1196 for name, value in os.environ.items():
Guido van Rossumb2493f82000-12-15 15:01:37 +00001197 name = name.lower()
Mark Hammond4f570b92000-07-26 07:04:38 +00001198 if value and name[-6:] == '_proxy':
1199 proxies[name[:-6]] = value
1200 return proxies
1201
Jack Jansen11d9b062004-07-16 11:45:00 +00001202if sys.platform == 'darwin':
1203 def getproxies_internetconfig():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001204 """Return a dictionary of scheme -> proxy server URL mappings.
Guido van Rossum442e7201996-03-20 15:33:11 +00001205
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001206 By convention the mac uses Internet Config to store
1207 proxies. An HTTP proxy, for instance, is stored under
1208 the HttpProxy key.
Guido van Rossum442e7201996-03-20 15:33:11 +00001209
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001210 """
1211 try:
1212 import ic
1213 except ImportError:
1214 return {}
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001215
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001216 try:
1217 config = ic.IC()
1218 except ic.error:
1219 return {}
1220 proxies = {}
1221 # HTTP:
Raymond Hettinger54f02222002-06-01 14:18:47 +00001222 if 'UseHTTPProxy' in config and config['UseHTTPProxy']:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001223 try:
1224 value = config['HTTPProxyHost']
1225 except ic.error:
1226 pass
1227 else:
1228 proxies['http'] = 'http://%s' % value
1229 # FTP: XXXX To be done.
1230 # Gopher: XXXX To be done.
1231 return proxies
Mark Hammond4f570b92000-07-26 07:04:38 +00001232
Tim Peters55c12d42001-08-09 18:04:14 +00001233 def proxy_bypass(x):
1234 return 0
1235
Jack Jansen11d9b062004-07-16 11:45:00 +00001236 def getproxies():
1237 return getproxies_environment() or getproxies_internetconfig()
Tim Peters182b5ac2004-07-18 06:16:08 +00001238
Mark Hammond4f570b92000-07-26 07:04:38 +00001239elif os.name == 'nt':
1240 def getproxies_registry():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001241 """Return a dictionary of scheme -> proxy server URL mappings.
Mark Hammond4f570b92000-07-26 07:04:38 +00001242
1243 Win32 uses the registry to store proxies.
1244
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001245 """
1246 proxies = {}
Mark Hammond4f570b92000-07-26 07:04:38 +00001247 try:
1248 import _winreg
1249 except ImportError:
1250 # Std module, so should be around - but you never know!
1251 return proxies
1252 try:
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001253 internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1254 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
Mark Hammond4f570b92000-07-26 07:04:38 +00001255 proxyEnable = _winreg.QueryValueEx(internetSettings,
1256 'ProxyEnable')[0]
1257 if proxyEnable:
1258 # Returned as Unicode but problems if not converted to ASCII
1259 proxyServer = str(_winreg.QueryValueEx(internetSettings,
1260 'ProxyServer')[0])
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001261 if '=' in proxyServer:
1262 # Per-protocol settings
Mark Hammond4f570b92000-07-26 07:04:38 +00001263 for p in proxyServer.split(';'):
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001264 protocol, address = p.split('=', 1)
Guido van Rossumb955d6c2002-03-31 23:38:48 +00001265 # See if address has a type:// prefix
Guido van Rossum64e5aa92002-04-02 14:38:16 +00001266 import re
1267 if not re.match('^([^/:]+)://', address):
Guido van Rossumb955d6c2002-03-31 23:38:48 +00001268 address = '%s://%s' % (protocol, address)
1269 proxies[protocol] = address
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001270 else:
1271 # Use one setting for all protocols
1272 if proxyServer[:5] == 'http:':
1273 proxies['http'] = proxyServer
1274 else:
1275 proxies['http'] = 'http://%s' % proxyServer
1276 proxies['ftp'] = 'ftp://%s' % proxyServer
Mark Hammond4f570b92000-07-26 07:04:38 +00001277 internetSettings.Close()
1278 except (WindowsError, ValueError, TypeError):
1279 # Either registry key not found etc, or the value in an
1280 # unexpected format.
1281 # proxies already set up to be empty so nothing to do
1282 pass
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001283 return proxies
Guido van Rossum442e7201996-03-20 15:33:11 +00001284
Mark Hammond4f570b92000-07-26 07:04:38 +00001285 def getproxies():
1286 """Return a dictionary of scheme -> proxy server URL mappings.
1287
1288 Returns settings gathered from the environment, if specified,
1289 or the registry.
1290
1291 """
1292 return getproxies_environment() or getproxies_registry()
Tim Peters55c12d42001-08-09 18:04:14 +00001293
1294 def proxy_bypass(host):
1295 try:
1296 import _winreg
1297 import re
Tim Peters55c12d42001-08-09 18:04:14 +00001298 except ImportError:
1299 # Std modules, so should be around - but you never know!
1300 return 0
1301 try:
1302 internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1303 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1304 proxyEnable = _winreg.QueryValueEx(internetSettings,
1305 'ProxyEnable')[0]
1306 proxyOverride = str(_winreg.QueryValueEx(internetSettings,
1307 'ProxyOverride')[0])
1308 # ^^^^ Returned as Unicode but problems if not converted to ASCII
1309 except WindowsError:
1310 return 0
1311 if not proxyEnable or not proxyOverride:
1312 return 0
1313 # try to make a host list from name and IP address.
1314 host = [host]
1315 try:
1316 addr = socket.gethostbyname(host[0])
1317 if addr != host:
1318 host.append(addr)
1319 except socket.error:
1320 pass
1321 # make a check value list from the registry entry: replace the
1322 # '<local>' string by the localhost entry and the corresponding
1323 # canonical entry.
1324 proxyOverride = proxyOverride.split(';')
1325 i = 0
1326 while i < len(proxyOverride):
1327 if proxyOverride[i] == '<local>':
1328 proxyOverride[i:i+1] = ['localhost',
1329 '127.0.0.1',
1330 socket.gethostname(),
1331 socket.gethostbyname(
1332 socket.gethostname())]
1333 i += 1
1334 # print proxyOverride
1335 # now check if we match one of the registry values.
1336 for test in proxyOverride:
Tim Petersab9ba272001-08-09 21:40:30 +00001337 test = test.replace(".", r"\.") # mask dots
1338 test = test.replace("*", r".*") # change glob sequence
1339 test = test.replace("?", r".") # change glob char
Tim Peters55c12d42001-08-09 18:04:14 +00001340 for val in host:
1341 # print "%s <--> %s" %( test, val )
1342 if re.match(test, val, re.I):
1343 return 1
1344 return 0
1345
Mark Hammond4f570b92000-07-26 07:04:38 +00001346else:
1347 # By default use environment variables
1348 getproxies = getproxies_environment
1349
Tim Peters55c12d42001-08-09 18:04:14 +00001350 def proxy_bypass(host):
1351 return 0
Guido van Rossum442e7201996-03-20 15:33:11 +00001352
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001353# Test and time quote() and unquote()
1354def test1():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001355 s = ''
1356 for i in range(256): s = s + chr(i)
1357 s = s*4
1358 t0 = time.time()
1359 qs = quote(s)
1360 uqs = unquote(qs)
1361 t1 = time.time()
1362 if uqs != s:
1363 print 'Wrong!'
Walter Dörwald70a6b492004-02-12 17:35:32 +00001364 print repr(s)
1365 print repr(qs)
1366 print repr(uqs)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001367 print round(t1 - t0, 3), 'sec'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001368
1369
Guido van Rossum9ab96d41998-09-28 14:07:00 +00001370def reporthook(blocknum, blocksize, totalsize):
1371 # Report during remote transfers
Guido van Rossumb2493f82000-12-15 15:01:37 +00001372 print "Block number: %d, Block size: %d, Total size: %d" % (
1373 blocknum, blocksize, totalsize)
Guido van Rossum9ab96d41998-09-28 14:07:00 +00001374
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001375# Test program
Guido van Rossum23490151998-06-25 02:39:00 +00001376def test(args=[]):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001377 if not args:
1378 args = [
1379 '/etc/passwd',
1380 'file:/etc/passwd',
1381 'file://localhost/etc/passwd',
Andrew M. Kuchling56a42352002-03-18 22:18:46 +00001382 'ftp://ftp.python.org/pub/python/README',
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001383## 'gopher://gopher.micro.umn.edu/1/',
1384 'http://www.python.org/index.html',
1385 ]
Guido van Rossum09c8b6c1999-12-07 21:37:17 +00001386 if hasattr(URLopener, "open_https"):
1387 args.append('https://synergy.as.cmu.edu/~geek/')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001388 try:
1389 for url in args:
1390 print '-'*10, url, '-'*10
1391 fn, h = urlretrieve(url, None, reporthook)
Guido van Rossumb2493f82000-12-15 15:01:37 +00001392 print fn
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001393 if h:
1394 print '======'
1395 for k in h.keys(): print k + ':', h[k]
1396 print '======'
1397 fp = open(fn, 'rb')
1398 data = fp.read()
1399 del fp
1400 if '\r' in data:
1401 table = string.maketrans("", "")
Guido van Rossumb2493f82000-12-15 15:01:37 +00001402 data = data.translate(table, "\r")
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001403 print data
1404 fn, h = None, None
1405 print '-'*40
1406 finally:
1407 urlcleanup()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001408
Guido van Rossum23490151998-06-25 02:39:00 +00001409def main():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001410 import getopt, sys
1411 try:
1412 opts, args = getopt.getopt(sys.argv[1:], "th")
1413 except getopt.error, msg:
1414 print msg
1415 print "Use -h for help"
1416 return
1417 t = 0
1418 for o, a in opts:
1419 if o == '-t':
1420 t = t + 1
1421 if o == '-h':
1422 print "Usage: python urllib.py [-t] [url ...]"
1423 print "-t runs self-test;",
1424 print "otherwise, contents of urls are printed"
1425 return
1426 if t:
1427 if t > 1:
1428 test1()
1429 test(args)
1430 else:
1431 if not args:
1432 print "Use -h for help"
1433 for url in args:
1434 print urlopen(url).read(),
Guido van Rossum23490151998-06-25 02:39:00 +00001435
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001436# Run test program when run as a script
1437if __name__ == '__main__':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001438 main()