blob: eb7e496c822a60761aa3074c32ce39d2b722ceb8 [file] [log] [blame]
Guido van Rossume7b146f2000-02-04 15:28:42 +00001"""Open an arbitrary URL.
2
3See the following document for more info on URLs:
4"Names and Addresses, URIs, URLs, URNs, URCs", at
5http://www.w3.org/pub/WWW/Addressing/Overview.html
6
7See also the HTTP spec (from which the error codes are derived):
8"HTTP - Hypertext Transfer Protocol", at
9http://www.w3.org/pub/WWW/Protocols/
10
11Related standards and specs:
12- RFC1808: the "relative URL" spec. (authoritative status)
13- RFC1738 - the "URL standard". (authoritative status)
14- RFC1630 - the "URI spec". (informational status)
15
16The object returned by URLopener().open(file) will differ per
17protocol. All you know is that is has methods read(), readline(),
18readlines(), fileno(), close() and info(). The read*(), fileno()
Fredrik Lundhb49f88b2000-09-24 18:51:25 +000019and close() methods work like those of open files.
Guido van Rossume7b146f2000-02-04 15:28:42 +000020The info() method returns a mimetools.Message object which can be
21used to query various info about the object, if available.
22(mimetools.Message objects are queried with the getheader() method.)
23"""
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000024
Guido van Rossum7c395db1994-07-04 22:14:49 +000025import string
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000026import socket
Jack Jansendc3e3f61995-12-15 13:22:13 +000027import os
Guido van Rossumf0713d32001-08-09 17:43:35 +000028import time
Guido van Rossum3c8484e1996-11-20 22:02:24 +000029import sys
Martin v. Löwis1d994332000-12-03 18:30:10 +000030import types
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000031
Skip Montanaro40fc1602001-03-01 04:27:19 +000032__all__ = ["urlopen", "URLopener", "FancyURLopener", "urlretrieve",
33 "urlcleanup", "quote", "quote_plus", "unquote", "unquote_plus",
Skip Montanaro44d5e0c2001-03-13 19:47:16 +000034 "urlencode", "url2pathname", "pathname2url", "splittag",
35 "localhost", "thishost", "ftperrors", "basejoin", "unwrap",
36 "splittype", "splithost", "splituser", "splitpasswd", "splitport",
37 "splitnport", "splitquery", "splitattr", "splitvalue",
38 "splitgophertype", "getproxies"]
Skip Montanaro40fc1602001-03-01 04:27:19 +000039
Guido van Rossumb2493f82000-12-15 15:01:37 +000040__version__ = '1.15' # XXX This version is not always updated :-(
Guido van Rossumf668d171997-06-06 21:11:11 +000041
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000042MAXFTPCACHE = 10 # Trim the ftp cache beyond this size
Guido van Rossum6cb15a01995-06-22 19:00:13 +000043
Jack Jansendc3e3f61995-12-15 13:22:13 +000044# Helper for non-unix systems
45if os.name == 'mac':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000046 from macurl2path import url2pathname, pathname2url
Guido van Rossum7e7ca0b1998-03-26 21:01:39 +000047elif os.name == 'nt':
Fredrik Lundhb49f88b2000-09-24 18:51:25 +000048 from nturl2path import url2pathname, pathname2url
Guido van Rossumd74fb6b2001-03-02 06:43:49 +000049elif os.name == 'riscos':
50 from rourl2path import url2pathname, pathname2url
Jack Jansendc3e3f61995-12-15 13:22:13 +000051else:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000052 def url2pathname(pathname):
Guido van Rossum367ac801999-03-12 14:31:10 +000053 return unquote(pathname)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000054 def pathname2url(pathname):
Guido van Rossum367ac801999-03-12 14:31:10 +000055 return quote(pathname)
Guido van Rossum33add0a1998-12-18 15:25:22 +000056
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000057# This really consists of two pieces:
58# (1) a class which handles opening of all sorts of URLs
59# (plus assorted utilities etc.)
60# (2) a set of functions for parsing URLs
61# XXX Should these be separated out into different modules?
62
63
64# Shortcut for basic usage
65_urlopener = None
Fred Drakedf6eca72002-04-04 20:41:34 +000066def urlopen(url, data=None, proxies=None):
Skip Montanaro79f1c172000-08-22 03:00:52 +000067 """urlopen(url [, data]) -> open file-like object"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000068 global _urlopener
Fred Drakedf6eca72002-04-04 20:41:34 +000069 if proxies is not None:
70 opener = FancyURLopener(proxies=proxies)
71 elif not _urlopener:
72 opener = FancyURLopener()
73 _urlopener = opener
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000074 else:
Fred Drakedf6eca72002-04-04 20:41:34 +000075 opener = _urlopener
76 if data is None:
77 return opener.open(url)
78 else:
79 return opener.open(url, data)
Fred Drake316a7932000-08-24 01:01:26 +000080def urlretrieve(url, filename=None, reporthook=None, data=None):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000081 global _urlopener
82 if not _urlopener:
83 _urlopener = FancyURLopener()
Fred Drake316a7932000-08-24 01:01:26 +000084 return _urlopener.retrieve(url, filename, reporthook, data)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000085def urlcleanup():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000086 if _urlopener:
87 _urlopener.cleanup()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000088
89
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000090ftpcache = {}
91class URLopener:
Guido van Rossume7b146f2000-02-04 15:28:42 +000092 """Class to open URLs.
93 This is a class rather than just a subroutine because we may need
94 more than one set of global protocol-specific options.
95 Note -- this is a base class for those who don't want the
96 automatic handling of errors type 302 (relocated) and 401
97 (authorization needed)."""
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000098
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000099 __tempfiles = None
Guido van Rossum29e77811996-11-27 19:39:58 +0000100
Guido van Rossumba311382000-08-24 16:18:04 +0000101 version = "Python-urllib/%s" % __version__
102
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000103 # Constructor
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000104 def __init__(self, proxies=None, **x509):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000105 if proxies is None:
106 proxies = getproxies()
107 assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
108 self.proxies = proxies
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000109 self.key_file = x509.get('key_file')
110 self.cert_file = x509.get('cert_file')
Guido van Rossumba311382000-08-24 16:18:04 +0000111 self.addheaders = [('User-agent', self.version)]
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000112 self.__tempfiles = []
113 self.__unlink = os.unlink # See cleanup()
114 self.tempcache = None
115 # Undocumented feature: if you assign {} to tempcache,
116 # it is used to cache files retrieved with
117 # self.retrieve(). This is not enabled by default
118 # since it does not work for changing documents (and I
119 # haven't got the logic to check expiration headers
120 # yet).
121 self.ftpcache = ftpcache
122 # Undocumented feature: you can use a different
123 # ftp cache by assigning to the .ftpcache member;
124 # in case you want logically independent URL openers
125 # XXX This is not threadsafe. Bah.
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000126
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000127 def __del__(self):
128 self.close()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000129
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000130 def close(self):
131 self.cleanup()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000132
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000133 def cleanup(self):
134 # This code sometimes runs when the rest of this module
135 # has already been deleted, so it can't use any globals
136 # or import anything.
137 if self.__tempfiles:
138 for file in self.__tempfiles:
139 try:
140 self.__unlink(file)
Martin v. Löwis58682b72001-08-11 15:02:57 +0000141 except OSError:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000142 pass
143 del self.__tempfiles[:]
144 if self.tempcache:
145 self.tempcache.clear()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000146
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000147 def addheader(self, *args):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000148 """Add a header to be used by the HTTP interface only
149 e.g. u.addheader('Accept', 'sound/basic')"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000150 self.addheaders.append(args)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000151
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000152 # External interface
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000153 def open(self, fullurl, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000154 """Use URLopener().open(file) instead of open(file, 'r')."""
Martin v. Löwis1d994332000-12-03 18:30:10 +0000155 fullurl = unwrap(toBytes(fullurl))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000156 if self.tempcache and self.tempcache.has_key(fullurl):
157 filename, headers = self.tempcache[fullurl]
158 fp = open(filename, 'rb')
159 return addinfourl(fp, headers, fullurl)
Martin v. Löwis1d994332000-12-03 18:30:10 +0000160 urltype, url = splittype(fullurl)
161 if not urltype:
162 urltype = 'file'
163 if self.proxies.has_key(urltype):
164 proxy = self.proxies[urltype]
165 urltype, proxyhost = splittype(proxy)
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000166 host, selector = splithost(proxyhost)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000167 url = (host, fullurl) # Signal special case to open_*()
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000168 else:
169 proxy = None
Martin v. Löwis1d994332000-12-03 18:30:10 +0000170 name = 'open_' + urltype
171 self.type = urltype
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000172 if '-' in name:
173 # replace - with _
Guido van Rossumb2493f82000-12-15 15:01:37 +0000174 name = '_'.join(name.split('-'))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000175 if not hasattr(self, name):
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000176 if proxy:
177 return self.open_unknown_proxy(proxy, fullurl, data)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000178 else:
179 return self.open_unknown(fullurl, data)
180 try:
181 if data is None:
182 return getattr(self, name)(url)
183 else:
184 return getattr(self, name)(url, data)
185 except socket.error, msg:
186 raise IOError, ('socket error', msg), sys.exc_info()[2]
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000187
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000188 def open_unknown(self, fullurl, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000189 """Overridable interface to open unknown URL type."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000190 type, url = splittype(fullurl)
191 raise IOError, ('url error', 'unknown url type', type)
Guido van Rossumca445401995-08-29 19:19:12 +0000192
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000193 def open_unknown_proxy(self, proxy, fullurl, data=None):
194 """Overridable interface to open unknown URL type."""
195 type, url = splittype(fullurl)
196 raise IOError, ('url error', 'invalid proxy for %s' % type, proxy)
197
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000198 # External interface
Sjoerd Mullenderd7b86f02000-08-25 11:23:36 +0000199 def retrieve(self, url, filename=None, reporthook=None, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000200 """retrieve(url) returns (filename, None) for a local object
201 or (tempfilename, headers) for a remote object."""
Martin v. Löwis1d994332000-12-03 18:30:10 +0000202 url = unwrap(toBytes(url))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000203 if self.tempcache and self.tempcache.has_key(url):
204 return self.tempcache[url]
205 type, url1 = splittype(url)
206 if not filename and (not type or type == 'file'):
207 try:
208 fp = self.open_local_file(url1)
209 hdrs = fp.info()
210 del fp
211 return url2pathname(splithost(url1)[1]), hdrs
212 except IOError, msg:
213 pass
Fred Drake316a7932000-08-24 01:01:26 +0000214 fp = self.open(url, data)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000215 headers = fp.info()
216 if not filename:
217 import tempfile
218 garbage, path = splittype(url)
219 garbage, path = splithost(path or "")
220 path, garbage = splitquery(path or "")
221 path, garbage = splitattr(path or "")
222 suffix = os.path.splitext(path)[1]
223 filename = tempfile.mktemp(suffix)
224 self.__tempfiles.append(filename)
225 result = filename, headers
226 if self.tempcache is not None:
227 self.tempcache[url] = result
228 tfp = open(filename, 'wb')
229 bs = 1024*8
230 size = -1
231 blocknum = 1
232 if reporthook:
233 if headers.has_key("content-length"):
234 size = int(headers["Content-Length"])
235 reporthook(0, bs, size)
236 block = fp.read(bs)
237 if reporthook:
238 reporthook(1, bs, size)
239 while block:
240 tfp.write(block)
241 block = fp.read(bs)
242 blocknum = blocknum + 1
243 if reporthook:
244 reporthook(blocknum, bs, size)
245 fp.close()
246 tfp.close()
247 del fp
248 del tfp
249 return result
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000250
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000251 # Each method named open_<type> knows how to open that type of URL
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000252
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000253 def open_http(self, url, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000254 """Use HTTP protocol."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000255 import httplib
256 user_passwd = None
Martin v. Löwis1d994332000-12-03 18:30:10 +0000257 if type(url) is types.StringType:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000258 host, selector = splithost(url)
259 if host:
260 user_passwd, host = splituser(host)
261 host = unquote(host)
262 realhost = host
263 else:
264 host, selector = url
265 urltype, rest = splittype(selector)
266 url = rest
267 user_passwd = None
Guido van Rossumb2493f82000-12-15 15:01:37 +0000268 if urltype.lower() != 'http':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000269 realhost = None
270 else:
271 realhost, rest = splithost(rest)
272 if realhost:
273 user_passwd, realhost = splituser(realhost)
274 if user_passwd:
275 selector = "%s://%s%s" % (urltype, realhost, rest)
Tim Peters55c12d42001-08-09 18:04:14 +0000276 if proxy_bypass(realhost):
277 host = realhost
278
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000279 #print "proxy via http:", host, selector
280 if not host: raise IOError, ('http error', 'no host given')
281 if user_passwd:
282 import base64
Guido van Rossumb2493f82000-12-15 15:01:37 +0000283 auth = base64.encodestring(user_passwd).strip()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000284 else:
285 auth = None
286 h = httplib.HTTP(host)
287 if data is not None:
288 h.putrequest('POST', selector)
289 h.putheader('Content-type', 'application/x-www-form-urlencoded')
290 h.putheader('Content-length', '%d' % len(data))
291 else:
292 h.putrequest('GET', selector)
293 if auth: h.putheader('Authorization', 'Basic %s' % auth)
294 if realhost: h.putheader('Host', realhost)
295 for args in self.addheaders: apply(h.putheader, args)
296 h.endheaders()
297 if data is not None:
Fred Drakeec3dfde2001-07-04 05:18:29 +0000298 h.send(data)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000299 errcode, errmsg, headers = h.getreply()
300 fp = h.getfile()
301 if errcode == 200:
302 return addinfourl(fp, headers, "http:" + url)
303 else:
304 if data is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000305 return self.http_error(url, fp, errcode, errmsg, headers)
Guido van Rossum29aab751999-03-09 19:31:21 +0000306 else:
307 return self.http_error(url, fp, errcode, errmsg, headers, data)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000308
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000309 def http_error(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000310 """Handle http errors.
311 Derived class can override this, or provide specific handlers
312 named http_error_DDD where DDD is the 3-digit error code."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000313 # First check if there's a specific handler for this error
314 name = 'http_error_%d' % errcode
315 if hasattr(self, name):
316 method = getattr(self, name)
317 if data is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000318 result = method(url, fp, errcode, errmsg, headers)
Jeremy Hyltonb30f52a1999-02-25 16:14:58 +0000319 else:
320 result = method(url, fp, errcode, errmsg, headers, data)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000321 if result: return result
Jeremy Hyltonb30f52a1999-02-25 16:14:58 +0000322 return self.http_error_default(url, fp, errcode, errmsg, headers)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000323
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000324 def http_error_default(self, url, fp, errcode, errmsg, headers):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000325 """Default error handler: close the connection and raise IOError."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000326 void = fp.read()
327 fp.close()
328 raise IOError, ('http error', errcode, errmsg, headers)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000329
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000330 if hasattr(socket, "ssl"):
Andrew M. Kuchling141e9892000-04-23 02:53:11 +0000331 def open_https(self, url, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000332 """Use HTTPS protocol."""
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000333 import httplib
Fred Drake567ca8e2000-08-21 21:42:42 +0000334 user_passwd = None
Moshe Zadkab2a0a832001-01-08 07:09:25 +0000335 if type(url) is types.StringType:
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000336 host, selector = splithost(url)
Fred Drake567ca8e2000-08-21 21:42:42 +0000337 if host:
338 user_passwd, host = splituser(host)
339 host = unquote(host)
340 realhost = host
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000341 else:
342 host, selector = url
343 urltype, rest = splittype(selector)
Fred Drake567ca8e2000-08-21 21:42:42 +0000344 url = rest
345 user_passwd = None
Guido van Rossumb2493f82000-12-15 15:01:37 +0000346 if urltype.lower() != 'https':
Fred Drake567ca8e2000-08-21 21:42:42 +0000347 realhost = None
348 else:
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000349 realhost, rest = splithost(rest)
Fred Drake567ca8e2000-08-21 21:42:42 +0000350 if realhost:
351 user_passwd, realhost = splituser(realhost)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000352 if user_passwd:
353 selector = "%s://%s%s" % (urltype, realhost, rest)
Andrew M. Kuchling7ad47922000-06-10 01:41:48 +0000354 #print "proxy via https:", host, selector
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000355 if not host: raise IOError, ('https error', 'no host given')
356 if user_passwd:
357 import base64
Guido van Rossumb2493f82000-12-15 15:01:37 +0000358 auth = base64.encodestring(user_passwd).strip()
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000359 else:
360 auth = None
361 h = httplib.HTTPS(host, 0,
362 key_file=self.key_file,
363 cert_file=self.cert_file)
Andrew M. Kuchling141e9892000-04-23 02:53:11 +0000364 if data is not None:
365 h.putrequest('POST', selector)
366 h.putheader('Content-type',
367 'application/x-www-form-urlencoded')
368 h.putheader('Content-length', '%d' % len(data))
369 else:
370 h.putrequest('GET', selector)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000371 if auth: h.putheader('Authorization: Basic %s' % auth)
Fred Drake567ca8e2000-08-21 21:42:42 +0000372 if realhost: h.putheader('Host', realhost)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000373 for args in self.addheaders: apply(h.putheader, args)
374 h.endheaders()
Andrew M. Kuchling43c5af02000-04-24 14:17:06 +0000375 if data is not None:
Fred Drakeec3dfde2001-07-04 05:18:29 +0000376 h.send(data)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000377 errcode, errmsg, headers = h.getreply()
378 fp = h.getfile()
379 if errcode == 200:
Guido van Rossumb931bf32001-12-08 17:09:07 +0000380 return addinfourl(fp, headers, "https:" + url)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000381 else:
Fred Drake567ca8e2000-08-21 21:42:42 +0000382 if data is None:
383 return self.http_error(url, fp, errcode, errmsg, headers)
384 else:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000385 return self.http_error(url, fp, errcode, errmsg, headers,
386 data)
Fred Drake567ca8e2000-08-21 21:42:42 +0000387
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000388 def open_gopher(self, url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000389 """Use Gopher protocol."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000390 import gopherlib
391 host, selector = splithost(url)
392 if not host: raise IOError, ('gopher error', 'no host given')
393 host = unquote(host)
394 type, selector = splitgophertype(selector)
395 selector, query = splitquery(selector)
396 selector = unquote(selector)
397 if query:
398 query = unquote(query)
399 fp = gopherlib.send_query(selector, query, host)
400 else:
401 fp = gopherlib.send_selector(selector, host)
402 return addinfourl(fp, noheaders(), "gopher:" + url)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000403
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000404 def open_file(self, url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000405 """Use local file or FTP depending on form of URL."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000406 if url[:2] == '//' and url[2:3] != '/':
407 return self.open_ftp(url)
408 else:
409 return self.open_local_file(url)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000410
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000411 def open_local_file(self, url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000412 """Use local file."""
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +0000413 import mimetypes, mimetools, rfc822, StringIO
Guido van Rossumf0713d32001-08-09 17:43:35 +0000414 host, file = splithost(url)
415 localname = url2pathname(file)
Guido van Rossuma2da3052002-04-15 00:25:01 +0000416 try:
417 stats = os.stat(localname)
418 except OSError, e:
419 raise IOError(e.errno, e.strerror, e.filename)
Walter Dörwald92b48b72002-03-22 17:30:38 +0000420 size = stats.st_size
421 modified = rfc822.formatdate(stats.st_mtime)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000422 mtype = mimetypes.guess_type(url)[0]
423 headers = mimetools.Message(StringIO.StringIO(
Guido van Rossumf0713d32001-08-09 17:43:35 +0000424 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
425 (mtype or 'text/plain', size, modified)))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000426 if not host:
Guido van Rossum336a2011999-06-24 15:27:36 +0000427 urlfile = file
428 if file[:1] == '/':
429 urlfile = 'file://' + file
Guido van Rossumf0713d32001-08-09 17:43:35 +0000430 return addinfourl(open(localname, 'rb'),
Guido van Rossum336a2011999-06-24 15:27:36 +0000431 headers, urlfile)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000432 host, port = splitport(host)
433 if not port \
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000434 and socket.gethostbyname(host) in (localhost(), thishost()):
Guido van Rossum336a2011999-06-24 15:27:36 +0000435 urlfile = file
436 if file[:1] == '/':
437 urlfile = 'file://' + file
Guido van Rossumf0713d32001-08-09 17:43:35 +0000438 return addinfourl(open(localname, 'rb'),
Guido van Rossum336a2011999-06-24 15:27:36 +0000439 headers, urlfile)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000440 raise IOError, ('local file error', 'not on local host')
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000441
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000442 def open_ftp(self, url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000443 """Use FTP protocol."""
Guido van Rossum88e0b5b2001-08-23 13:38:15 +0000444 import mimetypes, mimetools, StringIO
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000445 host, path = splithost(url)
446 if not host: raise IOError, ('ftp error', 'no host given')
447 host, port = splitport(host)
448 user, host = splituser(host)
449 if user: user, passwd = splitpasswd(user)
450 else: passwd = None
451 host = unquote(host)
452 user = unquote(user or '')
453 passwd = unquote(passwd or '')
454 host = socket.gethostbyname(host)
455 if not port:
456 import ftplib
457 port = ftplib.FTP_PORT
458 else:
459 port = int(port)
460 path, attrs = splitattr(path)
461 path = unquote(path)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000462 dirs = path.split('/')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000463 dirs, file = dirs[:-1], dirs[-1]
464 if dirs and not dirs[0]: dirs = dirs[1:]
Guido van Rossum5e006a31999-08-18 17:40:33 +0000465 if dirs and not dirs[0]: dirs[0] = '/'
Guido van Rossumb2493f82000-12-15 15:01:37 +0000466 key = user, host, port, '/'.join(dirs)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000467 # XXX thread unsafe!
468 if len(self.ftpcache) > MAXFTPCACHE:
469 # Prune the cache, rather arbitrarily
470 for k in self.ftpcache.keys():
471 if k != key:
472 v = self.ftpcache[k]
473 del self.ftpcache[k]
474 v.close()
475 try:
476 if not self.ftpcache.has_key(key):
477 self.ftpcache[key] = \
478 ftpwrapper(user, passwd, host, port, dirs)
479 if not file: type = 'D'
480 else: type = 'I'
481 for attr in attrs:
482 attr, value = splitvalue(attr)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000483 if attr.lower() == 'type' and \
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000484 value in ('a', 'A', 'i', 'I', 'd', 'D'):
Guido van Rossumb2493f82000-12-15 15:01:37 +0000485 type = value.upper()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000486 (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
Guido van Rossum88e0b5b2001-08-23 13:38:15 +0000487 mtype = mimetypes.guess_type("ftp:" + url)[0]
488 headers = ""
489 if mtype:
490 headers += "Content-Type: %s\n" % mtype
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000491 if retrlen is not None and retrlen >= 0:
Guido van Rossum88e0b5b2001-08-23 13:38:15 +0000492 headers += "Content-Length: %d\n" % retrlen
493 headers = mimetools.Message(StringIO.StringIO(headers))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000494 return addinfourl(fp, headers, "ftp:" + url)
495 except ftperrors(), msg:
496 raise IOError, ('ftp error', msg), sys.exc_info()[2]
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000497
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000498 def open_data(self, url, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000499 """Use "data" URL."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000500 # ignore POSTed data
501 #
502 # syntax of data URLs:
503 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
504 # mediatype := [ type "/" subtype ] *( ";" parameter )
505 # data := *urlchar
506 # parameter := attribute "=" value
Neal Norwitzaad18492002-03-26 16:25:01 +0000507 import StringIO, mimetools
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000508 try:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000509 [type, data] = url.split(',', 1)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000510 except ValueError:
511 raise IOError, ('data error', 'bad data URL')
512 if not type:
513 type = 'text/plain;charset=US-ASCII'
Guido van Rossumb2493f82000-12-15 15:01:37 +0000514 semi = type.rfind(';')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000515 if semi >= 0 and '=' not in type[semi:]:
516 encoding = type[semi+1:]
517 type = type[:semi]
518 else:
519 encoding = ''
520 msg = []
521 msg.append('Date: %s'%time.strftime('%a, %d %b %Y %T GMT',
522 time.gmtime(time.time())))
523 msg.append('Content-type: %s' % type)
524 if encoding == 'base64':
525 import base64
526 data = base64.decodestring(data)
527 else:
528 data = unquote(data)
529 msg.append('Content-length: %d' % len(data))
530 msg.append('')
531 msg.append(data)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000532 msg = '\n'.join(msg)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000533 f = StringIO.StringIO(msg)
534 headers = mimetools.Message(f, 0)
535 f.fileno = None # needed for addinfourl
536 return addinfourl(f, headers, url)
Guido van Rossum6d4d1c21998-03-12 14:32:55 +0000537
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000538
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000539class FancyURLopener(URLopener):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000540 """Derived class with handlers for errors we can handle (perhaps)."""
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000541
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000542 def __init__(self, *args):
543 apply(URLopener.__init__, (self,) + args)
544 self.auth_cache = {}
Skip Montanaroc3e11d62001-02-15 16:56:36 +0000545 self.tries = 0
546 self.maxtries = 10
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000547
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000548 def http_error_default(self, url, fp, errcode, errmsg, headers):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000549 """Default error handling -- don't raise an exception."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000550 return addinfourl(fp, headers, "http:" + url)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000551
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000552 def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000553 """Error 302 -- relocated (temporarily)."""
Skip Montanaroc3e11d62001-02-15 16:56:36 +0000554 self.tries += 1
555 if self.maxtries and self.tries >= self.maxtries:
556 if hasattr(self, "http_error_500"):
557 meth = self.http_error_500
558 else:
559 meth = self.http_error_default
560 self.tries = 0
561 return meth(url, fp, 500,
562 "Internal Server Error: Redirect Recursion", headers)
563 result = self.redirect_internal(url, fp, errcode, errmsg, headers,
564 data)
565 self.tries = 0
566 return result
567
568 def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000569 if headers.has_key('location'):
570 newurl = headers['location']
571 elif headers.has_key('uri'):
572 newurl = headers['uri']
573 else:
574 return
575 void = fp.read()
576 fp.close()
Guido van Rossum3527f591999-03-29 20:23:41 +0000577 # In case the server sent a relative URL, join with original:
Moshe Zadka5d87d472001-04-09 14:54:21 +0000578 newurl = basejoin(self.type + ":" + url, newurl)
Guido van Rossum3c8baed2000-02-01 23:36:55 +0000579 if data is None:
580 return self.open(newurl)
581 else:
582 return self.open(newurl, data)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000583
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000584 def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000585 """Error 301 -- also relocated (permanently)."""
586 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
Guido van Rossume6ad8911996-09-10 17:02:56 +0000587
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000588 def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000589 """Error 401 -- authentication required.
590 See this URL for a description of the basic authentication scheme:
591 http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt"""
Moshe Zadkae99bd172001-02-27 06:27:04 +0000592 if not headers.has_key('www-authenticate'):
Tim Peters85ba6732001-02-28 08:26:44 +0000593 URLopener.http_error_default(self, url, fp,
Fred Drakec680ae82001-10-13 18:37:07 +0000594 errcode, errmsg, headers)
Moshe Zadkae99bd172001-02-27 06:27:04 +0000595 stuff = headers['www-authenticate']
596 import re
597 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
598 if not match:
Tim Peters85ba6732001-02-28 08:26:44 +0000599 URLopener.http_error_default(self, url, fp,
Moshe Zadkae99bd172001-02-27 06:27:04 +0000600 errcode, errmsg, headers)
601 scheme, realm = match.groups()
602 if scheme.lower() != 'basic':
Tim Peters85ba6732001-02-28 08:26:44 +0000603 URLopener.http_error_default(self, url, fp,
Moshe Zadkae99bd172001-02-27 06:27:04 +0000604 errcode, errmsg, headers)
605 name = 'retry_' + self.type + '_basic_auth'
606 if data is None:
607 return getattr(self,name)(url, realm)
608 else:
609 return getattr(self,name)(url, realm, data)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000610
Guido van Rossum3c8baed2000-02-01 23:36:55 +0000611 def retry_http_basic_auth(self, url, realm, data=None):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000612 host, selector = splithost(url)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000613 i = host.find('@') + 1
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000614 host = host[i:]
615 user, passwd = self.get_user_passwd(host, realm, i)
616 if not (user or passwd): return None
Guido van Rossumafc4f042001-01-15 18:31:13 +0000617 host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000618 newurl = 'http://' + host + selector
Guido van Rossum3c8baed2000-02-01 23:36:55 +0000619 if data is None:
620 return self.open(newurl)
621 else:
622 return self.open(newurl, data)
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000623
Guido van Rossum3c8baed2000-02-01 23:36:55 +0000624 def retry_https_basic_auth(self, url, realm, data=None):
Tim Peterse1190062001-01-15 03:34:38 +0000625 host, selector = splithost(url)
626 i = host.find('@') + 1
627 host = host[i:]
628 user, passwd = self.get_user_passwd(host, realm, i)
629 if not (user or passwd): return None
Guido van Rossumafc4f042001-01-15 18:31:13 +0000630 host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
Tim Peterse1190062001-01-15 03:34:38 +0000631 newurl = '//' + host + selector
Guido van Rossumafc4f042001-01-15 18:31:13 +0000632 return self.open_https(newurl, data)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000633
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000634 def get_user_passwd(self, host, realm, clear_cache = 0):
Guido van Rossumb2493f82000-12-15 15:01:37 +0000635 key = realm + '@' + host.lower()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000636 if self.auth_cache.has_key(key):
637 if clear_cache:
638 del self.auth_cache[key]
639 else:
640 return self.auth_cache[key]
641 user, passwd = self.prompt_user_passwd(host, realm)
642 if user or passwd: self.auth_cache[key] = (user, passwd)
643 return user, passwd
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000644
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000645 def prompt_user_passwd(self, host, realm):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000646 """Override this in a GUI environment!"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000647 import getpass
648 try:
649 user = raw_input("Enter username for %s at %s: " % (realm,
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000650 host))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000651 passwd = getpass.getpass("Enter password for %s in %s at %s: " %
652 (user, realm, host))
653 return user, passwd
654 except KeyboardInterrupt:
655 print
656 return None, None
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000657
658
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000659# Utility functions
660
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000661_localhost = None
662def localhost():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000663 """Return the IP address of the magic hostname 'localhost'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000664 global _localhost
665 if not _localhost:
666 _localhost = socket.gethostbyname('localhost')
667 return _localhost
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000668
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000669_thishost = None
670def thishost():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000671 """Return the IP address of the current host."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000672 global _thishost
673 if not _thishost:
674 _thishost = socket.gethostbyname(socket.gethostname())
675 return _thishost
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000676
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000677_ftperrors = None
678def ftperrors():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000679 """Return the set of errors raised by the FTP class."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000680 global _ftperrors
681 if not _ftperrors:
682 import ftplib
683 _ftperrors = ftplib.all_errors
684 return _ftperrors
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000685
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000686_noheaders = None
687def noheaders():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000688 """Return an empty mimetools.Message object."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000689 global _noheaders
690 if not _noheaders:
691 import mimetools
692 import StringIO
693 _noheaders = mimetools.Message(StringIO.StringIO(), 0)
694 _noheaders.fp.close() # Recycle file descriptor
695 return _noheaders
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000696
697
698# Utility classes
699
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000700class ftpwrapper:
Guido van Rossume7b146f2000-02-04 15:28:42 +0000701 """Class used by open_ftp() for cache of open FTP connections."""
702
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000703 def __init__(self, user, passwd, host, port, dirs):
704 self.user = user
705 self.passwd = passwd
706 self.host = host
707 self.port = port
708 self.dirs = dirs
709 self.init()
Guido van Rossume7b146f2000-02-04 15:28:42 +0000710
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000711 def init(self):
712 import ftplib
713 self.busy = 0
714 self.ftp = ftplib.FTP()
715 self.ftp.connect(self.host, self.port)
716 self.ftp.login(self.user, self.passwd)
717 for dir in self.dirs:
718 self.ftp.cwd(dir)
Guido van Rossume7b146f2000-02-04 15:28:42 +0000719
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000720 def retrfile(self, file, type):
721 import ftplib
722 self.endtransfer()
723 if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
724 else: cmd = 'TYPE ' + type; isdir = 0
725 try:
726 self.ftp.voidcmd(cmd)
727 except ftplib.all_errors:
728 self.init()
729 self.ftp.voidcmd(cmd)
730 conn = None
731 if file and not isdir:
732 # Use nlst to see if the file exists at all
733 try:
734 self.ftp.nlst(file)
735 except ftplib.error_perm, reason:
736 raise IOError, ('ftp error', reason), sys.exc_info()[2]
737 # Restore the transfer mode!
738 self.ftp.voidcmd(cmd)
739 # Try to retrieve as a file
740 try:
741 cmd = 'RETR ' + file
742 conn = self.ftp.ntransfercmd(cmd)
743 except ftplib.error_perm, reason:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000744 if str(reason)[:3] != '550':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000745 raise IOError, ('ftp error', reason), sys.exc_info()[2]
746 if not conn:
747 # Set transfer mode to ASCII!
748 self.ftp.voidcmd('TYPE A')
749 # Try a directory listing
750 if file: cmd = 'LIST ' + file
751 else: cmd = 'LIST'
752 conn = self.ftp.ntransfercmd(cmd)
753 self.busy = 1
754 # Pass back both a suitably decorated object and a retrieval length
755 return (addclosehook(conn[0].makefile('rb'),
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000756 self.endtransfer), conn[1])
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000757 def endtransfer(self):
758 if not self.busy:
759 return
760 self.busy = 0
761 try:
762 self.ftp.voidresp()
763 except ftperrors():
764 pass
Guido van Rossume7b146f2000-02-04 15:28:42 +0000765
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000766 def close(self):
767 self.endtransfer()
768 try:
769 self.ftp.close()
770 except ftperrors():
771 pass
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000772
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000773class addbase:
Guido van Rossume7b146f2000-02-04 15:28:42 +0000774 """Base class for addinfo and addclosehook."""
775
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000776 def __init__(self, fp):
777 self.fp = fp
778 self.read = self.fp.read
779 self.readline = self.fp.readline
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000780 if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines
781 if hasattr(self.fp, "fileno"): self.fileno = self.fp.fileno
Guido van Rossume7b146f2000-02-04 15:28:42 +0000782
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000783 def __repr__(self):
784 return '<%s at %s whose fp = %s>' % (self.__class__.__name__,
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000785 `id(self)`, `self.fp`)
Guido van Rossume7b146f2000-02-04 15:28:42 +0000786
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000787 def close(self):
788 self.read = None
789 self.readline = None
790 self.readlines = None
791 self.fileno = None
792 if self.fp: self.fp.close()
793 self.fp = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000794
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000795class addclosehook(addbase):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000796 """Class to add a close hook to an open file."""
797
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000798 def __init__(self, fp, closehook, *hookargs):
799 addbase.__init__(self, fp)
800 self.closehook = closehook
801 self.hookargs = hookargs
Guido van Rossume7b146f2000-02-04 15:28:42 +0000802
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000803 def close(self):
Guido van Rossumc580dae2000-05-24 13:21:46 +0000804 addbase.close(self)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000805 if self.closehook:
806 apply(self.closehook, self.hookargs)
807 self.closehook = None
808 self.hookargs = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000809
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000810class addinfo(addbase):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000811 """class to add an info() method to an open file."""
812
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000813 def __init__(self, fp, headers):
814 addbase.__init__(self, fp)
815 self.headers = headers
Guido van Rossume7b146f2000-02-04 15:28:42 +0000816
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000817 def info(self):
818 return self.headers
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000819
Guido van Rossume6ad8911996-09-10 17:02:56 +0000820class addinfourl(addbase):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000821 """class to add info() and geturl() methods to an open file."""
822
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000823 def __init__(self, fp, headers, url):
824 addbase.__init__(self, fp)
825 self.headers = headers
826 self.url = url
Guido van Rossume7b146f2000-02-04 15:28:42 +0000827
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000828 def info(self):
829 return self.headers
Guido van Rossume7b146f2000-02-04 15:28:42 +0000830
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000831 def geturl(self):
832 return self.url
Guido van Rossume6ad8911996-09-10 17:02:56 +0000833
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000834
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000835def basejoin(base, url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000836 """Utility to combine a URL with a base URL to form a new URL."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000837 type, path = splittype(url)
838 if type:
839 # if url is complete (i.e., it contains a type), return it
840 return url
841 host, path = splithost(path)
842 type, basepath = splittype(base) # inherit type from base
843 if host:
844 # if url contains host, just inherit type
845 if type: return type + '://' + host + path
846 else:
847 # no type inherited, so url must have started with //
848 # just return it
849 return url
850 host, basepath = splithost(basepath) # inherit host
Thomas Wouters7e474022000-07-16 12:04:32 +0000851 basepath, basetag = splittag(basepath) # remove extraneous cruft
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000852 basepath, basequery = splitquery(basepath) # idem
853 if path[:1] != '/':
854 # non-absolute path name
855 if path[:1] in ('#', '?'):
856 # path is just a tag or query, attach to basepath
857 i = len(basepath)
858 else:
859 # else replace last component
Guido van Rossumb2493f82000-12-15 15:01:37 +0000860 i = basepath.rfind('/')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000861 if i < 0:
862 # basepath not absolute
863 if host:
864 # host present, make absolute
865 basepath = '/'
866 else:
867 # else keep non-absolute
868 basepath = ''
869 else:
870 # remove last file component
871 basepath = basepath[:i+1]
872 # Interpret ../ (important because of symlinks)
873 while basepath and path[:3] == '../':
874 path = path[3:]
Guido van Rossumb2493f82000-12-15 15:01:37 +0000875 i = basepath[:-1].rfind('/')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000876 if i > 0:
877 basepath = basepath[:i+1]
878 elif i == 0:
879 basepath = '/'
880 break
881 else:
882 basepath = ''
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000883
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000884 path = basepath + path
Guido van Rossumb8bf3be2001-04-15 20:47:33 +0000885 if host and path and path[0] != '/':
886 path = '/' + path
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000887 if type and host: return type + '://' + host + path
888 elif type: return type + ':' + path
889 elif host: return '//' + host + path # don't know what this means
890 else: return path
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000891
892
Guido van Rossum7c395db1994-07-04 22:14:49 +0000893# Utilities to parse URLs (most of these return None for missing parts):
Sjoerd Mullendere0371b81995-11-10 10:36:07 +0000894# unwrap('<URL:type://host/path>') --> 'type://host/path'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000895# splittype('type:opaquestring') --> 'type', 'opaquestring'
896# splithost('//host[:port]/path') --> 'host[:port]', '/path'
Guido van Rossum7c395db1994-07-04 22:14:49 +0000897# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'
898# splitpasswd('user:passwd') -> 'user', 'passwd'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000899# splitport('host:port') --> 'host', 'port'
900# splitquery('/path?query') --> '/path', 'query'
901# splittag('/path#tag') --> '/path', 'tag'
Guido van Rossum7c395db1994-07-04 22:14:49 +0000902# splitattr('/path;attr1=value1;attr2=value2;...') ->
903# '/path', ['attr1=value1', 'attr2=value2', ...]
904# splitvalue('attr=value') --> 'attr', 'value'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000905# splitgophertype('/Xselector') --> 'X', 'selector'
906# unquote('abc%20def') -> 'abc def'
907# quote('abc def') -> 'abc%20def')
908
Guido van Rossum4b46c0a2002-05-24 17:58:05 +0000909if hasattr(types, "UnicodeType"):
910 def _is_unicode(x):
911 return isinstance(x, unicode)
912else:
913 def _is_unicode(x):
914 return 0
915
Martin v. Löwis1d994332000-12-03 18:30:10 +0000916def toBytes(url):
917 """toBytes(u"URL") --> 'URL'."""
918 # Most URL schemes require ASCII. If that changes, the conversion
919 # can be relaxed
Guido van Rossum4b46c0a2002-05-24 17:58:05 +0000920 if _is_unicode(url):
Martin v. Löwis1d994332000-12-03 18:30:10 +0000921 try:
922 url = url.encode("ASCII")
923 except UnicodeError:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000924 raise UnicodeError("URL " + repr(url) +
925 " contains non-ASCII characters")
Martin v. Löwis1d994332000-12-03 18:30:10 +0000926 return url
927
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000928def unwrap(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000929 """unwrap('<URL:type://host/path>') --> 'type://host/path'."""
Guido van Rossumb2493f82000-12-15 15:01:37 +0000930 url = url.strip()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000931 if url[:1] == '<' and url[-1:] == '>':
Guido van Rossumb2493f82000-12-15 15:01:37 +0000932 url = url[1:-1].strip()
933 if url[:4] == 'URL:': url = url[4:].strip()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000934 return url
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000935
Guido van Rossum332e1441997-09-29 23:23:46 +0000936_typeprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000937def splittype(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000938 """splittype('type:opaquestring') --> 'type', 'opaquestring'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000939 global _typeprog
940 if _typeprog is None:
941 import re
942 _typeprog = re.compile('^([^/:]+):')
Guido van Rossum332e1441997-09-29 23:23:46 +0000943
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000944 match = _typeprog.match(url)
945 if match:
946 scheme = match.group(1)
Fred Drake9e94afd2000-07-01 07:03:30 +0000947 return scheme.lower(), url[len(scheme) + 1:]
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000948 return None, url
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000949
Guido van Rossum332e1441997-09-29 23:23:46 +0000950_hostprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000951def splithost(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000952 """splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000953 global _hostprog
954 if _hostprog is None:
955 import re
Guido van Rossum3427c1f1999-07-01 23:20:56 +0000956 _hostprog = re.compile('^//([^/]*)(.*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +0000957
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000958 match = _hostprog.match(url)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000959 if match: return match.group(1, 2)
960 return None, url
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000961
Guido van Rossum332e1441997-09-29 23:23:46 +0000962_userprog = None
Guido van Rossum7c395db1994-07-04 22:14:49 +0000963def splituser(host):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000964 """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000965 global _userprog
966 if _userprog is None:
967 import re
968 _userprog = re.compile('^([^@]*)@(.*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +0000969
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000970 match = _userprog.match(host)
Fred Drake567ca8e2000-08-21 21:42:42 +0000971 if match: return map(unquote, match.group(1, 2))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000972 return None, host
Guido van Rossum7c395db1994-07-04 22:14:49 +0000973
Guido van Rossum332e1441997-09-29 23:23:46 +0000974_passwdprog = None
Guido van Rossum7c395db1994-07-04 22:14:49 +0000975def splitpasswd(user):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000976 """splitpasswd('user:passwd') -> 'user', 'passwd'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000977 global _passwdprog
978 if _passwdprog is None:
979 import re
980 _passwdprog = re.compile('^([^:]*):(.*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +0000981
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000982 match = _passwdprog.match(user)
983 if match: return match.group(1, 2)
984 return user, None
Guido van Rossum7c395db1994-07-04 22:14:49 +0000985
Guido van Rossume7b146f2000-02-04 15:28:42 +0000986# splittag('/path#tag') --> '/path', 'tag'
Guido van Rossum332e1441997-09-29 23:23:46 +0000987_portprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000988def splitport(host):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000989 """splitport('host:port') --> 'host', 'port'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000990 global _portprog
991 if _portprog is None:
992 import re
993 _portprog = re.compile('^(.*):([0-9]+)$')
Guido van Rossum332e1441997-09-29 23:23:46 +0000994
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000995 match = _portprog.match(host)
996 if match: return match.group(1, 2)
997 return host, None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000998
Guido van Rossum332e1441997-09-29 23:23:46 +0000999_nportprog = None
Guido van Rossum53725a21996-06-13 19:12:35 +00001000def splitnport(host, defport=-1):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001001 """Split host and port, returning numeric port.
1002 Return given default port if no ':' found; defaults to -1.
1003 Return numerical port if a valid number are found after ':'.
1004 Return None if ':' but not a valid number."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001005 global _nportprog
1006 if _nportprog is None:
1007 import re
1008 _nportprog = re.compile('^(.*):(.*)$')
Guido van Rossum7e7ca0b1998-03-26 21:01:39 +00001009
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001010 match = _nportprog.match(host)
1011 if match:
1012 host, port = match.group(1, 2)
1013 try:
Guido van Rossumb2493f82000-12-15 15:01:37 +00001014 if not port: raise ValueError, "no digits"
1015 nport = int(port)
1016 except ValueError:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001017 nport = None
1018 return host, nport
1019 return host, defport
Guido van Rossum53725a21996-06-13 19:12:35 +00001020
Guido van Rossum332e1441997-09-29 23:23:46 +00001021_queryprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001022def splitquery(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001023 """splitquery('/path?query') --> '/path', 'query'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001024 global _queryprog
1025 if _queryprog is None:
1026 import re
1027 _queryprog = re.compile('^(.*)\?([^?]*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +00001028
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001029 match = _queryprog.match(url)
1030 if match: return match.group(1, 2)
1031 return url, None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001032
Guido van Rossum332e1441997-09-29 23:23:46 +00001033_tagprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001034def splittag(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001035 """splittag('/path#tag') --> '/path', 'tag'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001036 global _tagprog
1037 if _tagprog is None:
1038 import re
1039 _tagprog = re.compile('^(.*)#([^#]*)$')
Guido van Rossum7e7ca0b1998-03-26 21:01:39 +00001040
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001041 match = _tagprog.match(url)
1042 if match: return match.group(1, 2)
1043 return url, None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001044
Guido van Rossum7c395db1994-07-04 22:14:49 +00001045def splitattr(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001046 """splitattr('/path;attr1=value1;attr2=value2;...') ->
1047 '/path', ['attr1=value1', 'attr2=value2', ...]."""
Guido van Rossumb2493f82000-12-15 15:01:37 +00001048 words = url.split(';')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001049 return words[0], words[1:]
Guido van Rossum7c395db1994-07-04 22:14:49 +00001050
Guido van Rossum332e1441997-09-29 23:23:46 +00001051_valueprog = None
Guido van Rossum7c395db1994-07-04 22:14:49 +00001052def splitvalue(attr):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001053 """splitvalue('attr=value') --> 'attr', 'value'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001054 global _valueprog
1055 if _valueprog is None:
1056 import re
1057 _valueprog = re.compile('^([^=]*)=(.*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +00001058
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001059 match = _valueprog.match(attr)
1060 if match: return match.group(1, 2)
1061 return attr, None
Guido van Rossum7c395db1994-07-04 22:14:49 +00001062
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001063def splitgophertype(selector):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001064 """splitgophertype('/Xselector') --> 'X', 'selector'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001065 if selector[:1] == '/' and selector[1:2]:
1066 return selector[1], selector[2:]
1067 return None, selector
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001068
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001069def unquote(s):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001070 """unquote('abc%20def') -> 'abc def'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001071 mychr = chr
Guido van Rossumb2493f82000-12-15 15:01:37 +00001072 myatoi = int
1073 list = s.split('%')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001074 res = [list[0]]
1075 myappend = res.append
1076 del list[0]
1077 for item in list:
1078 if item[1:2]:
1079 try:
1080 myappend(mychr(myatoi(item[:2], 16))
1081 + item[2:])
Martin v. Löwis58682b72001-08-11 15:02:57 +00001082 except ValueError:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001083 myappend('%' + item)
1084 else:
1085 myappend('%' + item)
Guido van Rossumb2493f82000-12-15 15:01:37 +00001086 return "".join(res)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001087
Guido van Rossum0564e121996-12-13 14:47:36 +00001088def unquote_plus(s):
Skip Montanaro79f1c172000-08-22 03:00:52 +00001089 """unquote('%7e/abc+def') -> '~/abc def'"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001090 if '+' in s:
1091 # replace '+' with ' '
Guido van Rossumb2493f82000-12-15 15:01:37 +00001092 s = ' '.join(s.split('+'))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001093 return unquote(s)
Guido van Rossum0564e121996-12-13 14:47:36 +00001094
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001095always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
Jeremy Hylton6102e292000-08-31 15:48:10 +00001096 'abcdefghijklmnopqrstuvwxyz'
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001097 '0123456789' '_.-')
1098
1099_fast_safe_test = always_safe + '/'
1100_fast_safe = None
1101
1102def _fast_quote(s):
1103 global _fast_safe
1104 if _fast_safe is None:
1105 _fast_safe = {}
1106 for c in _fast_safe_test:
1107 _fast_safe[c] = c
1108 res = list(s)
1109 for i in range(len(res)):
1110 c = res[i]
1111 if not _fast_safe.has_key(c):
Guido van Rossume27a7b82001-01-19 03:28:15 +00001112 res[i] = '%%%02X' % ord(c)
Guido van Rossumb2493f82000-12-15 15:01:37 +00001113 return ''.join(res)
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001114
Guido van Rossum7c395db1994-07-04 22:14:49 +00001115def quote(s, safe = '/'):
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001116 """quote('abc def') -> 'abc%20def'
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001117
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001118 Each part of a URL, e.g. the path info, the query, etc., has a
1119 different set of reserved characters that must be quoted.
1120
1121 RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
1122 the following reserved characters.
1123
1124 reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
1125 "$" | ","
1126
1127 Each of these characters is reserved in some component of a URL,
1128 but not necessarily in all of them.
1129
1130 By default, the quote function is intended for quoting the path
1131 section of a URL. Thus, it will not encode '/'. This character
1132 is reserved, but in typical usage the quote function is being
1133 called on a path where the existing slash characters are used as
1134 reserved characters.
1135 """
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001136 safe = always_safe + safe
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001137 if _fast_safe_test == safe:
1138 return _fast_quote(s)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001139 res = list(s)
1140 for i in range(len(res)):
1141 c = res[i]
1142 if c not in safe:
Guido van Rossume27a7b82001-01-19 03:28:15 +00001143 res[i] = '%%%02X' % ord(c)
Guido van Rossumb2493f82000-12-15 15:01:37 +00001144 return ''.join(res)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001145
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001146def quote_plus(s, safe = ''):
1147 """Quote the query fragment of a URL; replacing ' ' with '+'"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001148 if ' ' in s:
Guido van Rossumb2493f82000-12-15 15:01:37 +00001149 l = s.split(' ')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001150 for i in range(len(l)):
1151 l[i] = quote(l[i], safe)
Guido van Rossumb2493f82000-12-15 15:01:37 +00001152 return '+'.join(l)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001153 else:
1154 return quote(s, safe)
Guido van Rossum0564e121996-12-13 14:47:36 +00001155
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001156def urlencode(query,doseq=0):
1157 """Encode a sequence of two-element tuples or dictionary into a URL query string.
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001158
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001159 If any values in the query arg are sequences and doseq is true, each
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001160 sequence element is converted to a separate parameter.
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001161
1162 If the query arg is a sequence of two-element tuples, the order of the
1163 parameters in the output will match the order of parameters in the
1164 input.
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001165 """
Tim Peters658cba62001-02-09 20:06:00 +00001166
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001167 if hasattr(query,"items"):
1168 # mapping objects
1169 query = query.items()
1170 else:
1171 # it's a bother at times that strings and string-like objects are
1172 # sequences...
1173 try:
1174 # non-sequence items should not work with len()
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001175 # non-empty strings will fail this
1176 if len(query) and type(query[0]) != types.TupleType:
1177 raise TypeError
1178 # zero-length sequences of all types will get here and succeed,
1179 # but that's a minor nit - since the original implementation
1180 # allowed empty dicts that type of behavior probably should be
1181 # preserved for consistency
1182 except TypeError:
1183 ty,va,tb = sys.exc_info()
1184 raise TypeError, "not a valid non-string sequence or mapping object", tb
1185
Guido van Rossume7b146f2000-02-04 15:28:42 +00001186 l = []
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001187 if not doseq:
1188 # preserve old behavior
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001189 for k, v in query:
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001190 k = quote_plus(str(k))
1191 v = quote_plus(str(v))
1192 l.append(k + '=' + v)
1193 else:
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001194 for k, v in query:
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001195 k = quote_plus(str(k))
1196 if type(v) == types.StringType:
1197 v = quote_plus(v)
1198 l.append(k + '=' + v)
Guido van Rossum4b46c0a2002-05-24 17:58:05 +00001199 elif _is_unicode(v):
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001200 # is there a reasonable way to convert to ASCII?
1201 # encode generates a string, but "replace" or "ignore"
1202 # lose information and "strict" can raise UnicodeError
1203 v = quote_plus(v.encode("ASCII","replace"))
1204 l.append(k + '=' + v)
1205 else:
1206 try:
1207 # is this a sufficient test for sequence-ness?
1208 x = len(v)
1209 except TypeError:
1210 # not a sequence
1211 v = quote_plus(str(v))
1212 l.append(k + '=' + v)
1213 else:
1214 # loop over the sequence
1215 for elt in v:
1216 l.append(k + '=' + quote_plus(str(elt)))
Guido van Rossumb2493f82000-12-15 15:01:37 +00001217 return '&'.join(l)
Guido van Rossum810a3391998-07-22 21:33:23 +00001218
Guido van Rossum442e7201996-03-20 15:33:11 +00001219# Proxy handling
Mark Hammond4f570b92000-07-26 07:04:38 +00001220def getproxies_environment():
1221 """Return a dictionary of scheme -> proxy server URL mappings.
1222
1223 Scan the environment for variables named <scheme>_proxy;
1224 this seems to be the standard convention. If you need a
1225 different way, you can pass a proxies dictionary to the
1226 [Fancy]URLopener constructor.
1227
1228 """
1229 proxies = {}
1230 for name, value in os.environ.items():
Guido van Rossumb2493f82000-12-15 15:01:37 +00001231 name = name.lower()
Mark Hammond4f570b92000-07-26 07:04:38 +00001232 if value and name[-6:] == '_proxy':
1233 proxies[name[:-6]] = value
1234 return proxies
1235
Guido van Rossum4163e701998-08-06 13:39:09 +00001236if os.name == 'mac':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001237 def getproxies():
1238 """Return a dictionary of scheme -> proxy server URL mappings.
Guido van Rossum442e7201996-03-20 15:33:11 +00001239
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001240 By convention the mac uses Internet Config to store
1241 proxies. An HTTP proxy, for instance, is stored under
1242 the HttpProxy key.
Guido van Rossum442e7201996-03-20 15:33:11 +00001243
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001244 """
1245 try:
1246 import ic
1247 except ImportError:
1248 return {}
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001249
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001250 try:
1251 config = ic.IC()
1252 except ic.error:
1253 return {}
1254 proxies = {}
1255 # HTTP:
1256 if config.has_key('UseHTTPProxy') and config['UseHTTPProxy']:
1257 try:
1258 value = config['HTTPProxyHost']
1259 except ic.error:
1260 pass
1261 else:
1262 proxies['http'] = 'http://%s' % value
1263 # FTP: XXXX To be done.
1264 # Gopher: XXXX To be done.
1265 return proxies
Mark Hammond4f570b92000-07-26 07:04:38 +00001266
Tim Peters55c12d42001-08-09 18:04:14 +00001267 def proxy_bypass(x):
1268 return 0
1269
Mark Hammond4f570b92000-07-26 07:04:38 +00001270elif os.name == 'nt':
1271 def getproxies_registry():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001272 """Return a dictionary of scheme -> proxy server URL mappings.
Mark Hammond4f570b92000-07-26 07:04:38 +00001273
1274 Win32 uses the registry to store proxies.
1275
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001276 """
1277 proxies = {}
Mark Hammond4f570b92000-07-26 07:04:38 +00001278 try:
1279 import _winreg
1280 except ImportError:
1281 # Std module, so should be around - but you never know!
1282 return proxies
1283 try:
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001284 internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1285 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
Mark Hammond4f570b92000-07-26 07:04:38 +00001286 proxyEnable = _winreg.QueryValueEx(internetSettings,
1287 'ProxyEnable')[0]
1288 if proxyEnable:
1289 # Returned as Unicode but problems if not converted to ASCII
1290 proxyServer = str(_winreg.QueryValueEx(internetSettings,
1291 'ProxyServer')[0])
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001292 if '=' in proxyServer:
1293 # Per-protocol settings
Mark Hammond4f570b92000-07-26 07:04:38 +00001294 for p in proxyServer.split(';'):
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001295 protocol, address = p.split('=', 1)
Guido van Rossumb955d6c2002-03-31 23:38:48 +00001296 # See if address has a type:// prefix
Guido van Rossum64e5aa92002-04-02 14:38:16 +00001297 import re
1298 if not re.match('^([^/:]+)://', address):
Guido van Rossumb955d6c2002-03-31 23:38:48 +00001299 address = '%s://%s' % (protocol, address)
1300 proxies[protocol] = address
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001301 else:
1302 # Use one setting for all protocols
1303 if proxyServer[:5] == 'http:':
1304 proxies['http'] = proxyServer
1305 else:
1306 proxies['http'] = 'http://%s' % proxyServer
1307 proxies['ftp'] = 'ftp://%s' % proxyServer
Mark Hammond4f570b92000-07-26 07:04:38 +00001308 internetSettings.Close()
1309 except (WindowsError, ValueError, TypeError):
1310 # Either registry key not found etc, or the value in an
1311 # unexpected format.
1312 # proxies already set up to be empty so nothing to do
1313 pass
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001314 return proxies
Guido van Rossum442e7201996-03-20 15:33:11 +00001315
Mark Hammond4f570b92000-07-26 07:04:38 +00001316 def getproxies():
1317 """Return a dictionary of scheme -> proxy server URL mappings.
1318
1319 Returns settings gathered from the environment, if specified,
1320 or the registry.
1321
1322 """
1323 return getproxies_environment() or getproxies_registry()
Tim Peters55c12d42001-08-09 18:04:14 +00001324
1325 def proxy_bypass(host):
1326 try:
1327 import _winreg
1328 import re
Tim Peters55c12d42001-08-09 18:04:14 +00001329 except ImportError:
1330 # Std modules, so should be around - but you never know!
1331 return 0
1332 try:
1333 internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1334 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1335 proxyEnable = _winreg.QueryValueEx(internetSettings,
1336 'ProxyEnable')[0]
1337 proxyOverride = str(_winreg.QueryValueEx(internetSettings,
1338 'ProxyOverride')[0])
1339 # ^^^^ Returned as Unicode but problems if not converted to ASCII
1340 except WindowsError:
1341 return 0
1342 if not proxyEnable or not proxyOverride:
1343 return 0
1344 # try to make a host list from name and IP address.
1345 host = [host]
1346 try:
1347 addr = socket.gethostbyname(host[0])
1348 if addr != host:
1349 host.append(addr)
1350 except socket.error:
1351 pass
1352 # make a check value list from the registry entry: replace the
1353 # '<local>' string by the localhost entry and the corresponding
1354 # canonical entry.
1355 proxyOverride = proxyOverride.split(';')
1356 i = 0
1357 while i < len(proxyOverride):
1358 if proxyOverride[i] == '<local>':
1359 proxyOverride[i:i+1] = ['localhost',
1360 '127.0.0.1',
1361 socket.gethostname(),
1362 socket.gethostbyname(
1363 socket.gethostname())]
1364 i += 1
1365 # print proxyOverride
1366 # now check if we match one of the registry values.
1367 for test in proxyOverride:
Tim Petersab9ba272001-08-09 21:40:30 +00001368 test = test.replace(".", r"\.") # mask dots
1369 test = test.replace("*", r".*") # change glob sequence
1370 test = test.replace("?", r".") # change glob char
Tim Peters55c12d42001-08-09 18:04:14 +00001371 for val in host:
1372 # print "%s <--> %s" %( test, val )
1373 if re.match(test, val, re.I):
1374 return 1
1375 return 0
1376
Mark Hammond4f570b92000-07-26 07:04:38 +00001377else:
1378 # By default use environment variables
1379 getproxies = getproxies_environment
1380
Tim Peters55c12d42001-08-09 18:04:14 +00001381 def proxy_bypass(host):
1382 return 0
Guido van Rossum442e7201996-03-20 15:33:11 +00001383
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001384# Test and time quote() and unquote()
1385def test1():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001386 s = ''
1387 for i in range(256): s = s + chr(i)
1388 s = s*4
1389 t0 = time.time()
1390 qs = quote(s)
1391 uqs = unquote(qs)
1392 t1 = time.time()
1393 if uqs != s:
1394 print 'Wrong!'
1395 print `s`
1396 print `qs`
1397 print `uqs`
1398 print round(t1 - t0, 3), 'sec'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001399
1400
Guido van Rossum9ab96d41998-09-28 14:07:00 +00001401def reporthook(blocknum, blocksize, totalsize):
1402 # Report during remote transfers
Guido van Rossumb2493f82000-12-15 15:01:37 +00001403 print "Block number: %d, Block size: %d, Total size: %d" % (
1404 blocknum, blocksize, totalsize)
Guido van Rossum9ab96d41998-09-28 14:07:00 +00001405
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001406# Test program
Guido van Rossum23490151998-06-25 02:39:00 +00001407def test(args=[]):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001408 if not args:
1409 args = [
1410 '/etc/passwd',
1411 'file:/etc/passwd',
1412 'file://localhost/etc/passwd',
Andrew M. Kuchling56a42352002-03-18 22:18:46 +00001413 'ftp://ftp.python.org/pub/python/README',
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001414## 'gopher://gopher.micro.umn.edu/1/',
1415 'http://www.python.org/index.html',
1416 ]
Guido van Rossum09c8b6c1999-12-07 21:37:17 +00001417 if hasattr(URLopener, "open_https"):
1418 args.append('https://synergy.as.cmu.edu/~geek/')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001419 try:
1420 for url in args:
1421 print '-'*10, url, '-'*10
1422 fn, h = urlretrieve(url, None, reporthook)
Guido van Rossumb2493f82000-12-15 15:01:37 +00001423 print fn
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001424 if h:
1425 print '======'
1426 for k in h.keys(): print k + ':', h[k]
1427 print '======'
1428 fp = open(fn, 'rb')
1429 data = fp.read()
1430 del fp
1431 if '\r' in data:
1432 table = string.maketrans("", "")
Guido van Rossumb2493f82000-12-15 15:01:37 +00001433 data = data.translate(table, "\r")
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001434 print data
1435 fn, h = None, None
1436 print '-'*40
1437 finally:
1438 urlcleanup()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001439
Guido van Rossum23490151998-06-25 02:39:00 +00001440def main():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001441 import getopt, sys
1442 try:
1443 opts, args = getopt.getopt(sys.argv[1:], "th")
1444 except getopt.error, msg:
1445 print msg
1446 print "Use -h for help"
1447 return
1448 t = 0
1449 for o, a in opts:
1450 if o == '-t':
1451 t = t + 1
1452 if o == '-h':
1453 print "Usage: python urllib.py [-t] [url ...]"
1454 print "-t runs self-test;",
1455 print "otherwise, contents of urls are printed"
1456 return
1457 if t:
1458 if t > 1:
1459 test1()
1460 test(args)
1461 else:
1462 if not args:
1463 print "Use -h for help"
1464 for url in args:
1465 print urlopen(url).read(),
Guido van Rossum23490151998-06-25 02:39:00 +00001466
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001467# Run test program when run as a script
1468if __name__ == '__main__':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001469 main()