blob: 2d53c125885205b93e8dbe693a9b1d685a27d4b2 [file] [log] [blame]
Guido van Rossume7b146f2000-02-04 15:28:42 +00001"""Open an arbitrary URL.
2
3See the following document for more info on URLs:
4"Names and Addresses, URIs, URLs, URNs, URCs", at
5http://www.w3.org/pub/WWW/Addressing/Overview.html
6
7See also the HTTP spec (from which the error codes are derived):
8"HTTP - Hypertext Transfer Protocol", at
9http://www.w3.org/pub/WWW/Protocols/
10
11Related standards and specs:
12- RFC1808: the "relative URL" spec. (authoritative status)
13- RFC1738 - the "URL standard". (authoritative status)
14- RFC1630 - the "URI spec". (informational status)
15
16The object returned by URLopener().open(file) will differ per
17protocol. All you know is that is has methods read(), readline(),
18readlines(), fileno(), close() and info(). The read*(), fileno()
Fredrik Lundhb49f88b2000-09-24 18:51:25 +000019and close() methods work like those of open files.
Guido van Rossume7b146f2000-02-04 15:28:42 +000020The info() method returns a mimetools.Message object which can be
21used to query various info about the object, if available.
22(mimetools.Message objects are queried with the getheader() method.)
23"""
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000024
Guido van Rossum7c395db1994-07-04 22:14:49 +000025import string
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000026import socket
Jack Jansendc3e3f61995-12-15 13:22:13 +000027import os
Guido van Rossumf0713d32001-08-09 17:43:35 +000028import time
Guido van Rossum3c8484e1996-11-20 22:02:24 +000029import sys
Martin v. Löwis1d994332000-12-03 18:30:10 +000030import types
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000031
Skip Montanaro40fc1602001-03-01 04:27:19 +000032__all__ = ["urlopen", "URLopener", "FancyURLopener", "urlretrieve",
33 "urlcleanup", "quote", "quote_plus", "unquote", "unquote_plus",
Skip Montanaro44d5e0c2001-03-13 19:47:16 +000034 "urlencode", "url2pathname", "pathname2url", "splittag",
35 "localhost", "thishost", "ftperrors", "basejoin", "unwrap",
36 "splittype", "splithost", "splituser", "splitpasswd", "splitport",
37 "splitnport", "splitquery", "splitattr", "splitvalue",
38 "splitgophertype", "getproxies"]
Skip Montanaro40fc1602001-03-01 04:27:19 +000039
Guido van Rossumb2493f82000-12-15 15:01:37 +000040__version__ = '1.15' # XXX This version is not always updated :-(
Guido van Rossumf668d171997-06-06 21:11:11 +000041
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000042MAXFTPCACHE = 10 # Trim the ftp cache beyond this size
Guido van Rossum6cb15a01995-06-22 19:00:13 +000043
Jack Jansendc3e3f61995-12-15 13:22:13 +000044# Helper for non-unix systems
45if os.name == 'mac':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000046 from macurl2path import url2pathname, pathname2url
Guido van Rossum7e7ca0b1998-03-26 21:01:39 +000047elif os.name == 'nt':
Fredrik Lundhb49f88b2000-09-24 18:51:25 +000048 from nturl2path import url2pathname, pathname2url
Guido van Rossumd74fb6b2001-03-02 06:43:49 +000049elif os.name == 'riscos':
50 from rourl2path import url2pathname, pathname2url
Jack Jansendc3e3f61995-12-15 13:22:13 +000051else:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000052 def url2pathname(pathname):
Guido van Rossum367ac801999-03-12 14:31:10 +000053 return unquote(pathname)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000054 def pathname2url(pathname):
Guido van Rossum367ac801999-03-12 14:31:10 +000055 return quote(pathname)
Guido van Rossum33add0a1998-12-18 15:25:22 +000056
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000057# This really consists of two pieces:
58# (1) a class which handles opening of all sorts of URLs
59# (plus assorted utilities etc.)
60# (2) a set of functions for parsing URLs
61# XXX Should these be separated out into different modules?
62
63
64# Shortcut for basic usage
65_urlopener = None
Fred Drakedf6eca72002-04-04 20:41:34 +000066def urlopen(url, data=None, proxies=None):
Skip Montanaro79f1c172000-08-22 03:00:52 +000067 """urlopen(url [, data]) -> open file-like object"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000068 global _urlopener
Fred Drakedf6eca72002-04-04 20:41:34 +000069 if proxies is not None:
70 opener = FancyURLopener(proxies=proxies)
71 elif not _urlopener:
72 opener = FancyURLopener()
73 _urlopener = opener
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000074 else:
Fred Drakedf6eca72002-04-04 20:41:34 +000075 opener = _urlopener
76 if data is None:
77 return opener.open(url)
78 else:
79 return opener.open(url, data)
Fred Drake316a7932000-08-24 01:01:26 +000080def urlretrieve(url, filename=None, reporthook=None, data=None):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000081 global _urlopener
82 if not _urlopener:
83 _urlopener = FancyURLopener()
Fred Drake316a7932000-08-24 01:01:26 +000084 return _urlopener.retrieve(url, filename, reporthook, data)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000085def urlcleanup():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000086 if _urlopener:
87 _urlopener.cleanup()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000088
89
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000090ftpcache = {}
91class URLopener:
Guido van Rossume7b146f2000-02-04 15:28:42 +000092 """Class to open URLs.
93 This is a class rather than just a subroutine because we may need
94 more than one set of global protocol-specific options.
95 Note -- this is a base class for those who don't want the
96 automatic handling of errors type 302 (relocated) and 401
97 (authorization needed)."""
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000098
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000099 __tempfiles = None
Guido van Rossum29e77811996-11-27 19:39:58 +0000100
Guido van Rossumba311382000-08-24 16:18:04 +0000101 version = "Python-urllib/%s" % __version__
102
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000103 # Constructor
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000104 def __init__(self, proxies=None, **x509):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000105 if proxies is None:
106 proxies = getproxies()
107 assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
108 self.proxies = proxies
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000109 self.key_file = x509.get('key_file')
110 self.cert_file = x509.get('cert_file')
Guido van Rossumba311382000-08-24 16:18:04 +0000111 self.addheaders = [('User-agent', self.version)]
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000112 self.__tempfiles = []
113 self.__unlink = os.unlink # See cleanup()
114 self.tempcache = None
115 # Undocumented feature: if you assign {} to tempcache,
116 # it is used to cache files retrieved with
117 # self.retrieve(). This is not enabled by default
118 # since it does not work for changing documents (and I
119 # haven't got the logic to check expiration headers
120 # yet).
121 self.ftpcache = ftpcache
122 # Undocumented feature: you can use a different
123 # ftp cache by assigning to the .ftpcache member;
124 # in case you want logically independent URL openers
125 # XXX This is not threadsafe. Bah.
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000126
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000127 def __del__(self):
128 self.close()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000129
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000130 def close(self):
131 self.cleanup()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000132
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000133 def cleanup(self):
134 # This code sometimes runs when the rest of this module
135 # has already been deleted, so it can't use any globals
136 # or import anything.
137 if self.__tempfiles:
138 for file in self.__tempfiles:
139 try:
140 self.__unlink(file)
Martin v. Löwis58682b72001-08-11 15:02:57 +0000141 except OSError:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000142 pass
143 del self.__tempfiles[:]
144 if self.tempcache:
145 self.tempcache.clear()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000146
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000147 def addheader(self, *args):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000148 """Add a header to be used by the HTTP interface only
149 e.g. u.addheader('Accept', 'sound/basic')"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000150 self.addheaders.append(args)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000151
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000152 # External interface
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000153 def open(self, fullurl, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000154 """Use URLopener().open(file) instead of open(file, 'r')."""
Martin v. Löwis1d994332000-12-03 18:30:10 +0000155 fullurl = unwrap(toBytes(fullurl))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000156 if self.tempcache and self.tempcache.has_key(fullurl):
157 filename, headers = self.tempcache[fullurl]
158 fp = open(filename, 'rb')
159 return addinfourl(fp, headers, fullurl)
Martin v. Löwis1d994332000-12-03 18:30:10 +0000160 urltype, url = splittype(fullurl)
161 if not urltype:
162 urltype = 'file'
163 if self.proxies.has_key(urltype):
164 proxy = self.proxies[urltype]
165 urltype, proxyhost = splittype(proxy)
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000166 host, selector = splithost(proxyhost)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000167 url = (host, fullurl) # Signal special case to open_*()
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000168 else:
169 proxy = None
Martin v. Löwis1d994332000-12-03 18:30:10 +0000170 name = 'open_' + urltype
171 self.type = urltype
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000172 if '-' in name:
173 # replace - with _
Guido van Rossumb2493f82000-12-15 15:01:37 +0000174 name = '_'.join(name.split('-'))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000175 if not hasattr(self, name):
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000176 if proxy:
177 return self.open_unknown_proxy(proxy, fullurl, data)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000178 else:
179 return self.open_unknown(fullurl, data)
180 try:
181 if data is None:
182 return getattr(self, name)(url)
183 else:
184 return getattr(self, name)(url, data)
185 except socket.error, msg:
186 raise IOError, ('socket error', msg), sys.exc_info()[2]
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000187
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000188 def open_unknown(self, fullurl, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000189 """Overridable interface to open unknown URL type."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000190 type, url = splittype(fullurl)
191 raise IOError, ('url error', 'unknown url type', type)
Guido van Rossumca445401995-08-29 19:19:12 +0000192
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000193 def open_unknown_proxy(self, proxy, fullurl, data=None):
194 """Overridable interface to open unknown URL type."""
195 type, url = splittype(fullurl)
196 raise IOError, ('url error', 'invalid proxy for %s' % type, proxy)
197
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000198 # External interface
Sjoerd Mullenderd7b86f02000-08-25 11:23:36 +0000199 def retrieve(self, url, filename=None, reporthook=None, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000200 """retrieve(url) returns (filename, None) for a local object
201 or (tempfilename, headers) for a remote object."""
Martin v. Löwis1d994332000-12-03 18:30:10 +0000202 url = unwrap(toBytes(url))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000203 if self.tempcache and self.tempcache.has_key(url):
204 return self.tempcache[url]
205 type, url1 = splittype(url)
206 if not filename and (not type or type == 'file'):
207 try:
208 fp = self.open_local_file(url1)
209 hdrs = fp.info()
210 del fp
211 return url2pathname(splithost(url1)[1]), hdrs
212 except IOError, msg:
213 pass
Fred Drake316a7932000-08-24 01:01:26 +0000214 fp = self.open(url, data)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000215 headers = fp.info()
216 if not filename:
217 import tempfile
218 garbage, path = splittype(url)
219 garbage, path = splithost(path or "")
220 path, garbage = splitquery(path or "")
221 path, garbage = splitattr(path or "")
222 suffix = os.path.splitext(path)[1]
223 filename = tempfile.mktemp(suffix)
224 self.__tempfiles.append(filename)
225 result = filename, headers
226 if self.tempcache is not None:
227 self.tempcache[url] = result
228 tfp = open(filename, 'wb')
229 bs = 1024*8
230 size = -1
231 blocknum = 1
232 if reporthook:
233 if headers.has_key("content-length"):
234 size = int(headers["Content-Length"])
235 reporthook(0, bs, size)
236 block = fp.read(bs)
237 if reporthook:
238 reporthook(1, bs, size)
239 while block:
240 tfp.write(block)
241 block = fp.read(bs)
242 blocknum = blocknum + 1
243 if reporthook:
244 reporthook(blocknum, bs, size)
245 fp.close()
246 tfp.close()
247 del fp
248 del tfp
249 return result
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000250
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000251 # Each method named open_<type> knows how to open that type of URL
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000252
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000253 def open_http(self, url, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000254 """Use HTTP protocol."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000255 import httplib
256 user_passwd = None
Martin v. Löwis1d994332000-12-03 18:30:10 +0000257 if type(url) is types.StringType:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000258 host, selector = splithost(url)
259 if host:
260 user_passwd, host = splituser(host)
261 host = unquote(host)
262 realhost = host
263 else:
264 host, selector = url
265 urltype, rest = splittype(selector)
266 url = rest
267 user_passwd = None
Guido van Rossumb2493f82000-12-15 15:01:37 +0000268 if urltype.lower() != 'http':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000269 realhost = None
270 else:
271 realhost, rest = splithost(rest)
272 if realhost:
273 user_passwd, realhost = splituser(realhost)
274 if user_passwd:
275 selector = "%s://%s%s" % (urltype, realhost, rest)
Tim Peters55c12d42001-08-09 18:04:14 +0000276 if proxy_bypass(realhost):
277 host = realhost
278
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000279 #print "proxy via http:", host, selector
280 if not host: raise IOError, ('http error', 'no host given')
281 if user_passwd:
282 import base64
Guido van Rossumb2493f82000-12-15 15:01:37 +0000283 auth = base64.encodestring(user_passwd).strip()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000284 else:
285 auth = None
286 h = httplib.HTTP(host)
287 if data is not None:
288 h.putrequest('POST', selector)
289 h.putheader('Content-type', 'application/x-www-form-urlencoded')
290 h.putheader('Content-length', '%d' % len(data))
291 else:
292 h.putrequest('GET', selector)
293 if auth: h.putheader('Authorization', 'Basic %s' % auth)
294 if realhost: h.putheader('Host', realhost)
295 for args in self.addheaders: apply(h.putheader, args)
296 h.endheaders()
297 if data is not None:
Fred Drakeec3dfde2001-07-04 05:18:29 +0000298 h.send(data)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000299 errcode, errmsg, headers = h.getreply()
300 fp = h.getfile()
301 if errcode == 200:
302 return addinfourl(fp, headers, "http:" + url)
303 else:
304 if data is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000305 return self.http_error(url, fp, errcode, errmsg, headers)
Guido van Rossum29aab751999-03-09 19:31:21 +0000306 else:
307 return self.http_error(url, fp, errcode, errmsg, headers, data)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000308
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000309 def http_error(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000310 """Handle http errors.
311 Derived class can override this, or provide specific handlers
312 named http_error_DDD where DDD is the 3-digit error code."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000313 # First check if there's a specific handler for this error
314 name = 'http_error_%d' % errcode
315 if hasattr(self, name):
316 method = getattr(self, name)
317 if data is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000318 result = method(url, fp, errcode, errmsg, headers)
Jeremy Hyltonb30f52a1999-02-25 16:14:58 +0000319 else:
320 result = method(url, fp, errcode, errmsg, headers, data)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000321 if result: return result
Jeremy Hyltonb30f52a1999-02-25 16:14:58 +0000322 return self.http_error_default(url, fp, errcode, errmsg, headers)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000323
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000324 def http_error_default(self, url, fp, errcode, errmsg, headers):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000325 """Default error handler: close the connection and raise IOError."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000326 void = fp.read()
327 fp.close()
328 raise IOError, ('http error', errcode, errmsg, headers)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000329
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000330 if hasattr(socket, "ssl"):
Andrew M. Kuchling141e9892000-04-23 02:53:11 +0000331 def open_https(self, url, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000332 """Use HTTPS protocol."""
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000333 import httplib
Fred Drake567ca8e2000-08-21 21:42:42 +0000334 user_passwd = None
Moshe Zadkab2a0a832001-01-08 07:09:25 +0000335 if type(url) is types.StringType:
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000336 host, selector = splithost(url)
Fred Drake567ca8e2000-08-21 21:42:42 +0000337 if host:
338 user_passwd, host = splituser(host)
339 host = unquote(host)
340 realhost = host
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000341 else:
342 host, selector = url
343 urltype, rest = splittype(selector)
Fred Drake567ca8e2000-08-21 21:42:42 +0000344 url = rest
345 user_passwd = None
Guido van Rossumb2493f82000-12-15 15:01:37 +0000346 if urltype.lower() != 'https':
Fred Drake567ca8e2000-08-21 21:42:42 +0000347 realhost = None
348 else:
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000349 realhost, rest = splithost(rest)
Fred Drake567ca8e2000-08-21 21:42:42 +0000350 if realhost:
351 user_passwd, realhost = splituser(realhost)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000352 if user_passwd:
353 selector = "%s://%s%s" % (urltype, realhost, rest)
Andrew M. Kuchling7ad47922000-06-10 01:41:48 +0000354 #print "proxy via https:", host, selector
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000355 if not host: raise IOError, ('https error', 'no host given')
356 if user_passwd:
357 import base64
Guido van Rossumb2493f82000-12-15 15:01:37 +0000358 auth = base64.encodestring(user_passwd).strip()
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000359 else:
360 auth = None
361 h = httplib.HTTPS(host, 0,
362 key_file=self.key_file,
363 cert_file=self.cert_file)
Andrew M. Kuchling141e9892000-04-23 02:53:11 +0000364 if data is not None:
365 h.putrequest('POST', selector)
366 h.putheader('Content-type',
367 'application/x-www-form-urlencoded')
368 h.putheader('Content-length', '%d' % len(data))
369 else:
370 h.putrequest('GET', selector)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000371 if auth: h.putheader('Authorization: Basic %s' % auth)
Fred Drake567ca8e2000-08-21 21:42:42 +0000372 if realhost: h.putheader('Host', realhost)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000373 for args in self.addheaders: apply(h.putheader, args)
374 h.endheaders()
Andrew M. Kuchling43c5af02000-04-24 14:17:06 +0000375 if data is not None:
Fred Drakeec3dfde2001-07-04 05:18:29 +0000376 h.send(data)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000377 errcode, errmsg, headers = h.getreply()
378 fp = h.getfile()
379 if errcode == 200:
Guido van Rossumb931bf32001-12-08 17:09:07 +0000380 return addinfourl(fp, headers, "https:" + url)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000381 else:
Fred Drake567ca8e2000-08-21 21:42:42 +0000382 if data is None:
383 return self.http_error(url, fp, errcode, errmsg, headers)
384 else:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000385 return self.http_error(url, fp, errcode, errmsg, headers,
386 data)
Fred Drake567ca8e2000-08-21 21:42:42 +0000387
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000388 def open_gopher(self, url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000389 """Use Gopher protocol."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000390 import gopherlib
391 host, selector = splithost(url)
392 if not host: raise IOError, ('gopher error', 'no host given')
393 host = unquote(host)
394 type, selector = splitgophertype(selector)
395 selector, query = splitquery(selector)
396 selector = unquote(selector)
397 if query:
398 query = unquote(query)
399 fp = gopherlib.send_query(selector, query, host)
400 else:
401 fp = gopherlib.send_selector(selector, host)
402 return addinfourl(fp, noheaders(), "gopher:" + url)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000403
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000404 def open_file(self, url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000405 """Use local file or FTP depending on form of URL."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000406 if url[:2] == '//' and url[2:3] != '/':
407 return self.open_ftp(url)
408 else:
409 return self.open_local_file(url)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000410
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000411 def open_local_file(self, url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000412 """Use local file."""
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +0000413 import mimetypes, mimetools, rfc822, StringIO
Guido van Rossumf0713d32001-08-09 17:43:35 +0000414 host, file = splithost(url)
415 localname = url2pathname(file)
416 stats = os.stat(localname)
Walter Dörwald92b48b72002-03-22 17:30:38 +0000417 size = stats.st_size
418 modified = rfc822.formatdate(stats.st_mtime)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000419 mtype = mimetypes.guess_type(url)[0]
420 headers = mimetools.Message(StringIO.StringIO(
Guido van Rossumf0713d32001-08-09 17:43:35 +0000421 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
422 (mtype or 'text/plain', size, modified)))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000423 if not host:
Guido van Rossum336a2011999-06-24 15:27:36 +0000424 urlfile = file
425 if file[:1] == '/':
426 urlfile = 'file://' + file
Guido van Rossumf0713d32001-08-09 17:43:35 +0000427 return addinfourl(open(localname, 'rb'),
Guido van Rossum336a2011999-06-24 15:27:36 +0000428 headers, urlfile)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000429 host, port = splitport(host)
430 if not port \
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000431 and socket.gethostbyname(host) in (localhost(), thishost()):
Guido van Rossum336a2011999-06-24 15:27:36 +0000432 urlfile = file
433 if file[:1] == '/':
434 urlfile = 'file://' + file
Guido van Rossumf0713d32001-08-09 17:43:35 +0000435 return addinfourl(open(localname, 'rb'),
Guido van Rossum336a2011999-06-24 15:27:36 +0000436 headers, urlfile)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000437 raise IOError, ('local file error', 'not on local host')
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000438
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000439 def open_ftp(self, url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000440 """Use FTP protocol."""
Guido van Rossum88e0b5b2001-08-23 13:38:15 +0000441 import mimetypes, mimetools, StringIO
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000442 host, path = splithost(url)
443 if not host: raise IOError, ('ftp error', 'no host given')
444 host, port = splitport(host)
445 user, host = splituser(host)
446 if user: user, passwd = splitpasswd(user)
447 else: passwd = None
448 host = unquote(host)
449 user = unquote(user or '')
450 passwd = unquote(passwd or '')
451 host = socket.gethostbyname(host)
452 if not port:
453 import ftplib
454 port = ftplib.FTP_PORT
455 else:
456 port = int(port)
457 path, attrs = splitattr(path)
458 path = unquote(path)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000459 dirs = path.split('/')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000460 dirs, file = dirs[:-1], dirs[-1]
461 if dirs and not dirs[0]: dirs = dirs[1:]
Guido van Rossum5e006a31999-08-18 17:40:33 +0000462 if dirs and not dirs[0]: dirs[0] = '/'
Guido van Rossumb2493f82000-12-15 15:01:37 +0000463 key = user, host, port, '/'.join(dirs)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000464 # XXX thread unsafe!
465 if len(self.ftpcache) > MAXFTPCACHE:
466 # Prune the cache, rather arbitrarily
467 for k in self.ftpcache.keys():
468 if k != key:
469 v = self.ftpcache[k]
470 del self.ftpcache[k]
471 v.close()
472 try:
473 if not self.ftpcache.has_key(key):
474 self.ftpcache[key] = \
475 ftpwrapper(user, passwd, host, port, dirs)
476 if not file: type = 'D'
477 else: type = 'I'
478 for attr in attrs:
479 attr, value = splitvalue(attr)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000480 if attr.lower() == 'type' and \
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000481 value in ('a', 'A', 'i', 'I', 'd', 'D'):
Guido van Rossumb2493f82000-12-15 15:01:37 +0000482 type = value.upper()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000483 (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
Guido van Rossum88e0b5b2001-08-23 13:38:15 +0000484 mtype = mimetypes.guess_type("ftp:" + url)[0]
485 headers = ""
486 if mtype:
487 headers += "Content-Type: %s\n" % mtype
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000488 if retrlen is not None and retrlen >= 0:
Guido van Rossum88e0b5b2001-08-23 13:38:15 +0000489 headers += "Content-Length: %d\n" % retrlen
490 headers = mimetools.Message(StringIO.StringIO(headers))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000491 return addinfourl(fp, headers, "ftp:" + url)
492 except ftperrors(), msg:
493 raise IOError, ('ftp error', msg), sys.exc_info()[2]
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000494
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000495 def open_data(self, url, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000496 """Use "data" URL."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000497 # ignore POSTed data
498 #
499 # syntax of data URLs:
500 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
501 # mediatype := [ type "/" subtype ] *( ";" parameter )
502 # data := *urlchar
503 # parameter := attribute "=" value
Neal Norwitzaad18492002-03-26 16:25:01 +0000504 import StringIO, mimetools
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000505 try:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000506 [type, data] = url.split(',', 1)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000507 except ValueError:
508 raise IOError, ('data error', 'bad data URL')
509 if not type:
510 type = 'text/plain;charset=US-ASCII'
Guido van Rossumb2493f82000-12-15 15:01:37 +0000511 semi = type.rfind(';')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000512 if semi >= 0 and '=' not in type[semi:]:
513 encoding = type[semi+1:]
514 type = type[:semi]
515 else:
516 encoding = ''
517 msg = []
518 msg.append('Date: %s'%time.strftime('%a, %d %b %Y %T GMT',
519 time.gmtime(time.time())))
520 msg.append('Content-type: %s' % type)
521 if encoding == 'base64':
522 import base64
523 data = base64.decodestring(data)
524 else:
525 data = unquote(data)
526 msg.append('Content-length: %d' % len(data))
527 msg.append('')
528 msg.append(data)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000529 msg = '\n'.join(msg)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000530 f = StringIO.StringIO(msg)
531 headers = mimetools.Message(f, 0)
532 f.fileno = None # needed for addinfourl
533 return addinfourl(f, headers, url)
Guido van Rossum6d4d1c21998-03-12 14:32:55 +0000534
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000535
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000536class FancyURLopener(URLopener):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000537 """Derived class with handlers for errors we can handle (perhaps)."""
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000538
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000539 def __init__(self, *args):
540 apply(URLopener.__init__, (self,) + args)
541 self.auth_cache = {}
Skip Montanaroc3e11d62001-02-15 16:56:36 +0000542 self.tries = 0
543 self.maxtries = 10
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000544
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000545 def http_error_default(self, url, fp, errcode, errmsg, headers):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000546 """Default error handling -- don't raise an exception."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000547 return addinfourl(fp, headers, "http:" + url)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000548
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000549 def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000550 """Error 302 -- relocated (temporarily)."""
Skip Montanaroc3e11d62001-02-15 16:56:36 +0000551 self.tries += 1
552 if self.maxtries and self.tries >= self.maxtries:
553 if hasattr(self, "http_error_500"):
554 meth = self.http_error_500
555 else:
556 meth = self.http_error_default
557 self.tries = 0
558 return meth(url, fp, 500,
559 "Internal Server Error: Redirect Recursion", headers)
560 result = self.redirect_internal(url, fp, errcode, errmsg, headers,
561 data)
562 self.tries = 0
563 return result
564
565 def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000566 if headers.has_key('location'):
567 newurl = headers['location']
568 elif headers.has_key('uri'):
569 newurl = headers['uri']
570 else:
571 return
572 void = fp.read()
573 fp.close()
Guido van Rossum3527f591999-03-29 20:23:41 +0000574 # In case the server sent a relative URL, join with original:
Moshe Zadka5d87d472001-04-09 14:54:21 +0000575 newurl = basejoin(self.type + ":" + url, newurl)
Guido van Rossum3c8baed2000-02-01 23:36:55 +0000576 if data is None:
577 return self.open(newurl)
578 else:
579 return self.open(newurl, data)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000580
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000581 def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000582 """Error 301 -- also relocated (permanently)."""
583 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
Guido van Rossume6ad8911996-09-10 17:02:56 +0000584
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000585 def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000586 """Error 401 -- authentication required.
587 See this URL for a description of the basic authentication scheme:
588 http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt"""
Moshe Zadkae99bd172001-02-27 06:27:04 +0000589 if not headers.has_key('www-authenticate'):
Tim Peters85ba6732001-02-28 08:26:44 +0000590 URLopener.http_error_default(self, url, fp,
Fred Drakec680ae82001-10-13 18:37:07 +0000591 errcode, errmsg, headers)
Moshe Zadkae99bd172001-02-27 06:27:04 +0000592 stuff = headers['www-authenticate']
593 import re
594 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
595 if not match:
Tim Peters85ba6732001-02-28 08:26:44 +0000596 URLopener.http_error_default(self, url, fp,
Moshe Zadkae99bd172001-02-27 06:27:04 +0000597 errcode, errmsg, headers)
598 scheme, realm = match.groups()
599 if scheme.lower() != 'basic':
Tim Peters85ba6732001-02-28 08:26:44 +0000600 URLopener.http_error_default(self, url, fp,
Moshe Zadkae99bd172001-02-27 06:27:04 +0000601 errcode, errmsg, headers)
602 name = 'retry_' + self.type + '_basic_auth'
603 if data is None:
604 return getattr(self,name)(url, realm)
605 else:
606 return getattr(self,name)(url, realm, data)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000607
Guido van Rossum3c8baed2000-02-01 23:36:55 +0000608 def retry_http_basic_auth(self, url, realm, data=None):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000609 host, selector = splithost(url)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000610 i = host.find('@') + 1
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000611 host = host[i:]
612 user, passwd = self.get_user_passwd(host, realm, i)
613 if not (user or passwd): return None
Guido van Rossumafc4f042001-01-15 18:31:13 +0000614 host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000615 newurl = 'http://' + host + selector
Guido van Rossum3c8baed2000-02-01 23:36:55 +0000616 if data is None:
617 return self.open(newurl)
618 else:
619 return self.open(newurl, data)
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000620
Guido van Rossum3c8baed2000-02-01 23:36:55 +0000621 def retry_https_basic_auth(self, url, realm, data=None):
Tim Peterse1190062001-01-15 03:34:38 +0000622 host, selector = splithost(url)
623 i = host.find('@') + 1
624 host = host[i:]
625 user, passwd = self.get_user_passwd(host, realm, i)
626 if not (user or passwd): return None
Guido van Rossumafc4f042001-01-15 18:31:13 +0000627 host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
Tim Peterse1190062001-01-15 03:34:38 +0000628 newurl = '//' + host + selector
Guido van Rossumafc4f042001-01-15 18:31:13 +0000629 return self.open_https(newurl, data)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000630
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000631 def get_user_passwd(self, host, realm, clear_cache = 0):
Guido van Rossumb2493f82000-12-15 15:01:37 +0000632 key = realm + '@' + host.lower()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000633 if self.auth_cache.has_key(key):
634 if clear_cache:
635 del self.auth_cache[key]
636 else:
637 return self.auth_cache[key]
638 user, passwd = self.prompt_user_passwd(host, realm)
639 if user or passwd: self.auth_cache[key] = (user, passwd)
640 return user, passwd
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000641
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000642 def prompt_user_passwd(self, host, realm):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000643 """Override this in a GUI environment!"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000644 import getpass
645 try:
646 user = raw_input("Enter username for %s at %s: " % (realm,
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000647 host))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000648 passwd = getpass.getpass("Enter password for %s in %s at %s: " %
649 (user, realm, host))
650 return user, passwd
651 except KeyboardInterrupt:
652 print
653 return None, None
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000654
655
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000656# Utility functions
657
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000658_localhost = None
659def localhost():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000660 """Return the IP address of the magic hostname 'localhost'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000661 global _localhost
662 if not _localhost:
663 _localhost = socket.gethostbyname('localhost')
664 return _localhost
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000665
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000666_thishost = None
667def thishost():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000668 """Return the IP address of the current host."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000669 global _thishost
670 if not _thishost:
671 _thishost = socket.gethostbyname(socket.gethostname())
672 return _thishost
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000673
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000674_ftperrors = None
675def ftperrors():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000676 """Return the set of errors raised by the FTP class."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000677 global _ftperrors
678 if not _ftperrors:
679 import ftplib
680 _ftperrors = ftplib.all_errors
681 return _ftperrors
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000682
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000683_noheaders = None
684def noheaders():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000685 """Return an empty mimetools.Message object."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000686 global _noheaders
687 if not _noheaders:
688 import mimetools
689 import StringIO
690 _noheaders = mimetools.Message(StringIO.StringIO(), 0)
691 _noheaders.fp.close() # Recycle file descriptor
692 return _noheaders
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000693
694
695# Utility classes
696
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000697class ftpwrapper:
Guido van Rossume7b146f2000-02-04 15:28:42 +0000698 """Class used by open_ftp() for cache of open FTP connections."""
699
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000700 def __init__(self, user, passwd, host, port, dirs):
701 self.user = user
702 self.passwd = passwd
703 self.host = host
704 self.port = port
705 self.dirs = dirs
706 self.init()
Guido van Rossume7b146f2000-02-04 15:28:42 +0000707
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000708 def init(self):
709 import ftplib
710 self.busy = 0
711 self.ftp = ftplib.FTP()
712 self.ftp.connect(self.host, self.port)
713 self.ftp.login(self.user, self.passwd)
714 for dir in self.dirs:
715 self.ftp.cwd(dir)
Guido van Rossume7b146f2000-02-04 15:28:42 +0000716
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000717 def retrfile(self, file, type):
718 import ftplib
719 self.endtransfer()
720 if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
721 else: cmd = 'TYPE ' + type; isdir = 0
722 try:
723 self.ftp.voidcmd(cmd)
724 except ftplib.all_errors:
725 self.init()
726 self.ftp.voidcmd(cmd)
727 conn = None
728 if file and not isdir:
729 # Use nlst to see if the file exists at all
730 try:
731 self.ftp.nlst(file)
732 except ftplib.error_perm, reason:
733 raise IOError, ('ftp error', reason), sys.exc_info()[2]
734 # Restore the transfer mode!
735 self.ftp.voidcmd(cmd)
736 # Try to retrieve as a file
737 try:
738 cmd = 'RETR ' + file
739 conn = self.ftp.ntransfercmd(cmd)
740 except ftplib.error_perm, reason:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000741 if str(reason)[:3] != '550':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000742 raise IOError, ('ftp error', reason), sys.exc_info()[2]
743 if not conn:
744 # Set transfer mode to ASCII!
745 self.ftp.voidcmd('TYPE A')
746 # Try a directory listing
747 if file: cmd = 'LIST ' + file
748 else: cmd = 'LIST'
749 conn = self.ftp.ntransfercmd(cmd)
750 self.busy = 1
751 # Pass back both a suitably decorated object and a retrieval length
752 return (addclosehook(conn[0].makefile('rb'),
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000753 self.endtransfer), conn[1])
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000754 def endtransfer(self):
755 if not self.busy:
756 return
757 self.busy = 0
758 try:
759 self.ftp.voidresp()
760 except ftperrors():
761 pass
Guido van Rossume7b146f2000-02-04 15:28:42 +0000762
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000763 def close(self):
764 self.endtransfer()
765 try:
766 self.ftp.close()
767 except ftperrors():
768 pass
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000769
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000770class addbase:
Guido van Rossume7b146f2000-02-04 15:28:42 +0000771 """Base class for addinfo and addclosehook."""
772
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000773 def __init__(self, fp):
774 self.fp = fp
775 self.read = self.fp.read
776 self.readline = self.fp.readline
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000777 if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines
778 if hasattr(self.fp, "fileno"): self.fileno = self.fp.fileno
Guido van Rossume7b146f2000-02-04 15:28:42 +0000779
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000780 def __repr__(self):
781 return '<%s at %s whose fp = %s>' % (self.__class__.__name__,
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000782 `id(self)`, `self.fp`)
Guido van Rossume7b146f2000-02-04 15:28:42 +0000783
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000784 def close(self):
785 self.read = None
786 self.readline = None
787 self.readlines = None
788 self.fileno = None
789 if self.fp: self.fp.close()
790 self.fp = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000791
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000792class addclosehook(addbase):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000793 """Class to add a close hook to an open file."""
794
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000795 def __init__(self, fp, closehook, *hookargs):
796 addbase.__init__(self, fp)
797 self.closehook = closehook
798 self.hookargs = hookargs
Guido van Rossume7b146f2000-02-04 15:28:42 +0000799
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000800 def close(self):
Guido van Rossumc580dae2000-05-24 13:21:46 +0000801 addbase.close(self)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000802 if self.closehook:
803 apply(self.closehook, self.hookargs)
804 self.closehook = None
805 self.hookargs = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000806
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000807class addinfo(addbase):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000808 """class to add an info() method to an open file."""
809
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000810 def __init__(self, fp, headers):
811 addbase.__init__(self, fp)
812 self.headers = headers
Guido van Rossume7b146f2000-02-04 15:28:42 +0000813
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000814 def info(self):
815 return self.headers
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000816
Guido van Rossume6ad8911996-09-10 17:02:56 +0000817class addinfourl(addbase):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000818 """class to add info() and geturl() methods to an open file."""
819
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000820 def __init__(self, fp, headers, url):
821 addbase.__init__(self, fp)
822 self.headers = headers
823 self.url = url
Guido van Rossume7b146f2000-02-04 15:28:42 +0000824
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000825 def info(self):
826 return self.headers
Guido van Rossume7b146f2000-02-04 15:28:42 +0000827
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000828 def geturl(self):
829 return self.url
Guido van Rossume6ad8911996-09-10 17:02:56 +0000830
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000831
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000832def basejoin(base, url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000833 """Utility to combine a URL with a base URL to form a new URL."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000834 type, path = splittype(url)
835 if type:
836 # if url is complete (i.e., it contains a type), return it
837 return url
838 host, path = splithost(path)
839 type, basepath = splittype(base) # inherit type from base
840 if host:
841 # if url contains host, just inherit type
842 if type: return type + '://' + host + path
843 else:
844 # no type inherited, so url must have started with //
845 # just return it
846 return url
847 host, basepath = splithost(basepath) # inherit host
Thomas Wouters7e474022000-07-16 12:04:32 +0000848 basepath, basetag = splittag(basepath) # remove extraneous cruft
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000849 basepath, basequery = splitquery(basepath) # idem
850 if path[:1] != '/':
851 # non-absolute path name
852 if path[:1] in ('#', '?'):
853 # path is just a tag or query, attach to basepath
854 i = len(basepath)
855 else:
856 # else replace last component
Guido van Rossumb2493f82000-12-15 15:01:37 +0000857 i = basepath.rfind('/')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000858 if i < 0:
859 # basepath not absolute
860 if host:
861 # host present, make absolute
862 basepath = '/'
863 else:
864 # else keep non-absolute
865 basepath = ''
866 else:
867 # remove last file component
868 basepath = basepath[:i+1]
869 # Interpret ../ (important because of symlinks)
870 while basepath and path[:3] == '../':
871 path = path[3:]
Guido van Rossumb2493f82000-12-15 15:01:37 +0000872 i = basepath[:-1].rfind('/')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000873 if i > 0:
874 basepath = basepath[:i+1]
875 elif i == 0:
876 basepath = '/'
877 break
878 else:
879 basepath = ''
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000880
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000881 path = basepath + path
Guido van Rossumb8bf3be2001-04-15 20:47:33 +0000882 if host and path and path[0] != '/':
883 path = '/' + path
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000884 if type and host: return type + '://' + host + path
885 elif type: return type + ':' + path
886 elif host: return '//' + host + path # don't know what this means
887 else: return path
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000888
889
Guido van Rossum7c395db1994-07-04 22:14:49 +0000890# Utilities to parse URLs (most of these return None for missing parts):
Sjoerd Mullendere0371b81995-11-10 10:36:07 +0000891# unwrap('<URL:type://host/path>') --> 'type://host/path'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000892# splittype('type:opaquestring') --> 'type', 'opaquestring'
893# splithost('//host[:port]/path') --> 'host[:port]', '/path'
Guido van Rossum7c395db1994-07-04 22:14:49 +0000894# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'
895# splitpasswd('user:passwd') -> 'user', 'passwd'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000896# splitport('host:port') --> 'host', 'port'
897# splitquery('/path?query') --> '/path', 'query'
898# splittag('/path#tag') --> '/path', 'tag'
Guido van Rossum7c395db1994-07-04 22:14:49 +0000899# splitattr('/path;attr1=value1;attr2=value2;...') ->
900# '/path', ['attr1=value1', 'attr2=value2', ...]
901# splitvalue('attr=value') --> 'attr', 'value'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000902# splitgophertype('/Xselector') --> 'X', 'selector'
903# unquote('abc%20def') -> 'abc def'
904# quote('abc def') -> 'abc%20def')
905
Martin v. Löwis1d994332000-12-03 18:30:10 +0000906def toBytes(url):
907 """toBytes(u"URL") --> 'URL'."""
908 # Most URL schemes require ASCII. If that changes, the conversion
909 # can be relaxed
910 if type(url) is types.UnicodeType:
911 try:
912 url = url.encode("ASCII")
913 except UnicodeError:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000914 raise UnicodeError("URL " + repr(url) +
915 " contains non-ASCII characters")
Martin v. Löwis1d994332000-12-03 18:30:10 +0000916 return url
917
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000918def unwrap(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000919 """unwrap('<URL:type://host/path>') --> 'type://host/path'."""
Guido van Rossumb2493f82000-12-15 15:01:37 +0000920 url = url.strip()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000921 if url[:1] == '<' and url[-1:] == '>':
Guido van Rossumb2493f82000-12-15 15:01:37 +0000922 url = url[1:-1].strip()
923 if url[:4] == 'URL:': url = url[4:].strip()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000924 return url
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000925
Guido van Rossum332e1441997-09-29 23:23:46 +0000926_typeprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000927def splittype(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000928 """splittype('type:opaquestring') --> 'type', 'opaquestring'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000929 global _typeprog
930 if _typeprog is None:
931 import re
932 _typeprog = re.compile('^([^/:]+):')
Guido van Rossum332e1441997-09-29 23:23:46 +0000933
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000934 match = _typeprog.match(url)
935 if match:
936 scheme = match.group(1)
Fred Drake9e94afd2000-07-01 07:03:30 +0000937 return scheme.lower(), url[len(scheme) + 1:]
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000938 return None, url
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000939
Guido van Rossum332e1441997-09-29 23:23:46 +0000940_hostprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000941def splithost(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000942 """splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000943 global _hostprog
944 if _hostprog is None:
945 import re
Guido van Rossum3427c1f1999-07-01 23:20:56 +0000946 _hostprog = re.compile('^//([^/]*)(.*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +0000947
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000948 match = _hostprog.match(url)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000949 if match: return match.group(1, 2)
950 return None, url
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000951
Guido van Rossum332e1441997-09-29 23:23:46 +0000952_userprog = None
Guido van Rossum7c395db1994-07-04 22:14:49 +0000953def splituser(host):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000954 """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000955 global _userprog
956 if _userprog is None:
957 import re
958 _userprog = re.compile('^([^@]*)@(.*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +0000959
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000960 match = _userprog.match(host)
Fred Drake567ca8e2000-08-21 21:42:42 +0000961 if match: return map(unquote, match.group(1, 2))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000962 return None, host
Guido van Rossum7c395db1994-07-04 22:14:49 +0000963
Guido van Rossum332e1441997-09-29 23:23:46 +0000964_passwdprog = None
Guido van Rossum7c395db1994-07-04 22:14:49 +0000965def splitpasswd(user):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000966 """splitpasswd('user:passwd') -> 'user', 'passwd'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000967 global _passwdprog
968 if _passwdprog is None:
969 import re
970 _passwdprog = re.compile('^([^:]*):(.*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +0000971
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000972 match = _passwdprog.match(user)
973 if match: return match.group(1, 2)
974 return user, None
Guido van Rossum7c395db1994-07-04 22:14:49 +0000975
Guido van Rossume7b146f2000-02-04 15:28:42 +0000976# splittag('/path#tag') --> '/path', 'tag'
Guido van Rossum332e1441997-09-29 23:23:46 +0000977_portprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000978def splitport(host):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000979 """splitport('host:port') --> 'host', 'port'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000980 global _portprog
981 if _portprog is None:
982 import re
983 _portprog = re.compile('^(.*):([0-9]+)$')
Guido van Rossum332e1441997-09-29 23:23:46 +0000984
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000985 match = _portprog.match(host)
986 if match: return match.group(1, 2)
987 return host, None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000988
Guido van Rossum332e1441997-09-29 23:23:46 +0000989_nportprog = None
Guido van Rossum53725a21996-06-13 19:12:35 +0000990def splitnport(host, defport=-1):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000991 """Split host and port, returning numeric port.
992 Return given default port if no ':' found; defaults to -1.
993 Return numerical port if a valid number are found after ':'.
994 Return None if ':' but not a valid number."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000995 global _nportprog
996 if _nportprog is None:
997 import re
998 _nportprog = re.compile('^(.*):(.*)$')
Guido van Rossum7e7ca0b1998-03-26 21:01:39 +0000999
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001000 match = _nportprog.match(host)
1001 if match:
1002 host, port = match.group(1, 2)
1003 try:
Guido van Rossumb2493f82000-12-15 15:01:37 +00001004 if not port: raise ValueError, "no digits"
1005 nport = int(port)
1006 except ValueError:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001007 nport = None
1008 return host, nport
1009 return host, defport
Guido van Rossum53725a21996-06-13 19:12:35 +00001010
Guido van Rossum332e1441997-09-29 23:23:46 +00001011_queryprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001012def splitquery(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001013 """splitquery('/path?query') --> '/path', 'query'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001014 global _queryprog
1015 if _queryprog is None:
1016 import re
1017 _queryprog = re.compile('^(.*)\?([^?]*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +00001018
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001019 match = _queryprog.match(url)
1020 if match: return match.group(1, 2)
1021 return url, None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001022
Guido van Rossum332e1441997-09-29 23:23:46 +00001023_tagprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001024def splittag(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001025 """splittag('/path#tag') --> '/path', 'tag'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001026 global _tagprog
1027 if _tagprog is None:
1028 import re
1029 _tagprog = re.compile('^(.*)#([^#]*)$')
Guido van Rossum7e7ca0b1998-03-26 21:01:39 +00001030
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001031 match = _tagprog.match(url)
1032 if match: return match.group(1, 2)
1033 return url, None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001034
Guido van Rossum7c395db1994-07-04 22:14:49 +00001035def splitattr(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001036 """splitattr('/path;attr1=value1;attr2=value2;...') ->
1037 '/path', ['attr1=value1', 'attr2=value2', ...]."""
Guido van Rossumb2493f82000-12-15 15:01:37 +00001038 words = url.split(';')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001039 return words[0], words[1:]
Guido van Rossum7c395db1994-07-04 22:14:49 +00001040
Guido van Rossum332e1441997-09-29 23:23:46 +00001041_valueprog = None
Guido van Rossum7c395db1994-07-04 22:14:49 +00001042def splitvalue(attr):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001043 """splitvalue('attr=value') --> 'attr', 'value'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001044 global _valueprog
1045 if _valueprog is None:
1046 import re
1047 _valueprog = re.compile('^([^=]*)=(.*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +00001048
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001049 match = _valueprog.match(attr)
1050 if match: return match.group(1, 2)
1051 return attr, None
Guido van Rossum7c395db1994-07-04 22:14:49 +00001052
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001053def splitgophertype(selector):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001054 """splitgophertype('/Xselector') --> 'X', 'selector'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001055 if selector[:1] == '/' and selector[1:2]:
1056 return selector[1], selector[2:]
1057 return None, selector
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001058
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001059def unquote(s):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001060 """unquote('abc%20def') -> 'abc def'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001061 mychr = chr
Guido van Rossumb2493f82000-12-15 15:01:37 +00001062 myatoi = int
1063 list = s.split('%')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001064 res = [list[0]]
1065 myappend = res.append
1066 del list[0]
1067 for item in list:
1068 if item[1:2]:
1069 try:
1070 myappend(mychr(myatoi(item[:2], 16))
1071 + item[2:])
Martin v. Löwis58682b72001-08-11 15:02:57 +00001072 except ValueError:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001073 myappend('%' + item)
1074 else:
1075 myappend('%' + item)
Guido van Rossumb2493f82000-12-15 15:01:37 +00001076 return "".join(res)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001077
Guido van Rossum0564e121996-12-13 14:47:36 +00001078def unquote_plus(s):
Skip Montanaro79f1c172000-08-22 03:00:52 +00001079 """unquote('%7e/abc+def') -> '~/abc def'"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001080 if '+' in s:
1081 # replace '+' with ' '
Guido van Rossumb2493f82000-12-15 15:01:37 +00001082 s = ' '.join(s.split('+'))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001083 return unquote(s)
Guido van Rossum0564e121996-12-13 14:47:36 +00001084
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001085always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
Jeremy Hylton6102e292000-08-31 15:48:10 +00001086 'abcdefghijklmnopqrstuvwxyz'
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001087 '0123456789' '_.-')
1088
1089_fast_safe_test = always_safe + '/'
1090_fast_safe = None
1091
1092def _fast_quote(s):
1093 global _fast_safe
1094 if _fast_safe is None:
1095 _fast_safe = {}
1096 for c in _fast_safe_test:
1097 _fast_safe[c] = c
1098 res = list(s)
1099 for i in range(len(res)):
1100 c = res[i]
1101 if not _fast_safe.has_key(c):
Guido van Rossume27a7b82001-01-19 03:28:15 +00001102 res[i] = '%%%02X' % ord(c)
Guido van Rossumb2493f82000-12-15 15:01:37 +00001103 return ''.join(res)
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001104
Guido van Rossum7c395db1994-07-04 22:14:49 +00001105def quote(s, safe = '/'):
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001106 """quote('abc def') -> 'abc%20def'
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001107
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001108 Each part of a URL, e.g. the path info, the query, etc., has a
1109 different set of reserved characters that must be quoted.
1110
1111 RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
1112 the following reserved characters.
1113
1114 reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
1115 "$" | ","
1116
1117 Each of these characters is reserved in some component of a URL,
1118 but not necessarily in all of them.
1119
1120 By default, the quote function is intended for quoting the path
1121 section of a URL. Thus, it will not encode '/'. This character
1122 is reserved, but in typical usage the quote function is being
1123 called on a path where the existing slash characters are used as
1124 reserved characters.
1125 """
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001126 safe = always_safe + safe
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001127 if _fast_safe_test == safe:
1128 return _fast_quote(s)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001129 res = list(s)
1130 for i in range(len(res)):
1131 c = res[i]
1132 if c not in safe:
Guido van Rossume27a7b82001-01-19 03:28:15 +00001133 res[i] = '%%%02X' % ord(c)
Guido van Rossumb2493f82000-12-15 15:01:37 +00001134 return ''.join(res)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001135
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001136def quote_plus(s, safe = ''):
1137 """Quote the query fragment of a URL; replacing ' ' with '+'"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001138 if ' ' in s:
Guido van Rossumb2493f82000-12-15 15:01:37 +00001139 l = s.split(' ')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001140 for i in range(len(l)):
1141 l[i] = quote(l[i], safe)
Guido van Rossumb2493f82000-12-15 15:01:37 +00001142 return '+'.join(l)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001143 else:
1144 return quote(s, safe)
Guido van Rossum0564e121996-12-13 14:47:36 +00001145
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001146def urlencode(query,doseq=0):
1147 """Encode a sequence of two-element tuples or dictionary into a URL query string.
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001148
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001149 If any values in the query arg are sequences and doseq is true, each
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001150 sequence element is converted to a separate parameter.
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001151
1152 If the query arg is a sequence of two-element tuples, the order of the
1153 parameters in the output will match the order of parameters in the
1154 input.
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001155 """
Tim Peters658cba62001-02-09 20:06:00 +00001156
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001157 if hasattr(query,"items"):
1158 # mapping objects
1159 query = query.items()
1160 else:
1161 # it's a bother at times that strings and string-like objects are
1162 # sequences...
1163 try:
1164 # non-sequence items should not work with len()
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001165 # non-empty strings will fail this
1166 if len(query) and type(query[0]) != types.TupleType:
1167 raise TypeError
1168 # zero-length sequences of all types will get here and succeed,
1169 # but that's a minor nit - since the original implementation
1170 # allowed empty dicts that type of behavior probably should be
1171 # preserved for consistency
1172 except TypeError:
1173 ty,va,tb = sys.exc_info()
1174 raise TypeError, "not a valid non-string sequence or mapping object", tb
1175
Guido van Rossume7b146f2000-02-04 15:28:42 +00001176 l = []
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001177 if not doseq:
1178 # preserve old behavior
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001179 for k, v in query:
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001180 k = quote_plus(str(k))
1181 v = quote_plus(str(v))
1182 l.append(k + '=' + v)
1183 else:
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001184 for k, v in query:
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001185 k = quote_plus(str(k))
1186 if type(v) == types.StringType:
1187 v = quote_plus(v)
1188 l.append(k + '=' + v)
1189 elif type(v) == types.UnicodeType:
1190 # is there a reasonable way to convert to ASCII?
1191 # encode generates a string, but "replace" or "ignore"
1192 # lose information and "strict" can raise UnicodeError
1193 v = quote_plus(v.encode("ASCII","replace"))
1194 l.append(k + '=' + v)
1195 else:
1196 try:
1197 # is this a sufficient test for sequence-ness?
1198 x = len(v)
1199 except TypeError:
1200 # not a sequence
1201 v = quote_plus(str(v))
1202 l.append(k + '=' + v)
1203 else:
1204 # loop over the sequence
1205 for elt in v:
1206 l.append(k + '=' + quote_plus(str(elt)))
Guido van Rossumb2493f82000-12-15 15:01:37 +00001207 return '&'.join(l)
Guido van Rossum810a3391998-07-22 21:33:23 +00001208
Guido van Rossum442e7201996-03-20 15:33:11 +00001209# Proxy handling
Mark Hammond4f570b92000-07-26 07:04:38 +00001210def getproxies_environment():
1211 """Return a dictionary of scheme -> proxy server URL mappings.
1212
1213 Scan the environment for variables named <scheme>_proxy;
1214 this seems to be the standard convention. If you need a
1215 different way, you can pass a proxies dictionary to the
1216 [Fancy]URLopener constructor.
1217
1218 """
1219 proxies = {}
1220 for name, value in os.environ.items():
Guido van Rossumb2493f82000-12-15 15:01:37 +00001221 name = name.lower()
Mark Hammond4f570b92000-07-26 07:04:38 +00001222 if value and name[-6:] == '_proxy':
1223 proxies[name[:-6]] = value
1224 return proxies
1225
Guido van Rossum4163e701998-08-06 13:39:09 +00001226if os.name == 'mac':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001227 def getproxies():
1228 """Return a dictionary of scheme -> proxy server URL mappings.
Guido van Rossum442e7201996-03-20 15:33:11 +00001229
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001230 By convention the mac uses Internet Config to store
1231 proxies. An HTTP proxy, for instance, is stored under
1232 the HttpProxy key.
Guido van Rossum442e7201996-03-20 15:33:11 +00001233
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001234 """
1235 try:
1236 import ic
1237 except ImportError:
1238 return {}
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001239
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001240 try:
1241 config = ic.IC()
1242 except ic.error:
1243 return {}
1244 proxies = {}
1245 # HTTP:
1246 if config.has_key('UseHTTPProxy') and config['UseHTTPProxy']:
1247 try:
1248 value = config['HTTPProxyHost']
1249 except ic.error:
1250 pass
1251 else:
1252 proxies['http'] = 'http://%s' % value
1253 # FTP: XXXX To be done.
1254 # Gopher: XXXX To be done.
1255 return proxies
Mark Hammond4f570b92000-07-26 07:04:38 +00001256
Tim Peters55c12d42001-08-09 18:04:14 +00001257 def proxy_bypass(x):
1258 return 0
1259
Mark Hammond4f570b92000-07-26 07:04:38 +00001260elif os.name == 'nt':
1261 def getproxies_registry():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001262 """Return a dictionary of scheme -> proxy server URL mappings.
Mark Hammond4f570b92000-07-26 07:04:38 +00001263
1264 Win32 uses the registry to store proxies.
1265
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001266 """
1267 proxies = {}
Mark Hammond4f570b92000-07-26 07:04:38 +00001268 try:
1269 import _winreg
1270 except ImportError:
1271 # Std module, so should be around - but you never know!
1272 return proxies
1273 try:
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001274 internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1275 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
Mark Hammond4f570b92000-07-26 07:04:38 +00001276 proxyEnable = _winreg.QueryValueEx(internetSettings,
1277 'ProxyEnable')[0]
1278 if proxyEnable:
1279 # Returned as Unicode but problems if not converted to ASCII
1280 proxyServer = str(_winreg.QueryValueEx(internetSettings,
1281 'ProxyServer')[0])
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001282 if '=' in proxyServer:
1283 # Per-protocol settings
Mark Hammond4f570b92000-07-26 07:04:38 +00001284 for p in proxyServer.split(';'):
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001285 protocol, address = p.split('=', 1)
Guido van Rossumb955d6c2002-03-31 23:38:48 +00001286 # See if address has a type:// prefix
Guido van Rossum64e5aa92002-04-02 14:38:16 +00001287 import re
1288 if not re.match('^([^/:]+)://', address):
Guido van Rossumb955d6c2002-03-31 23:38:48 +00001289 address = '%s://%s' % (protocol, address)
1290 proxies[protocol] = address
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001291 else:
1292 # Use one setting for all protocols
1293 if proxyServer[:5] == 'http:':
1294 proxies['http'] = proxyServer
1295 else:
1296 proxies['http'] = 'http://%s' % proxyServer
1297 proxies['ftp'] = 'ftp://%s' % proxyServer
Mark Hammond4f570b92000-07-26 07:04:38 +00001298 internetSettings.Close()
1299 except (WindowsError, ValueError, TypeError):
1300 # Either registry key not found etc, or the value in an
1301 # unexpected format.
1302 # proxies already set up to be empty so nothing to do
1303 pass
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001304 return proxies
Guido van Rossum442e7201996-03-20 15:33:11 +00001305
Mark Hammond4f570b92000-07-26 07:04:38 +00001306 def getproxies():
1307 """Return a dictionary of scheme -> proxy server URL mappings.
1308
1309 Returns settings gathered from the environment, if specified,
1310 or the registry.
1311
1312 """
1313 return getproxies_environment() or getproxies_registry()
Tim Peters55c12d42001-08-09 18:04:14 +00001314
1315 def proxy_bypass(host):
1316 try:
1317 import _winreg
1318 import re
Tim Peters55c12d42001-08-09 18:04:14 +00001319 except ImportError:
1320 # Std modules, so should be around - but you never know!
1321 return 0
1322 try:
1323 internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1324 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1325 proxyEnable = _winreg.QueryValueEx(internetSettings,
1326 'ProxyEnable')[0]
1327 proxyOverride = str(_winreg.QueryValueEx(internetSettings,
1328 'ProxyOverride')[0])
1329 # ^^^^ Returned as Unicode but problems if not converted to ASCII
1330 except WindowsError:
1331 return 0
1332 if not proxyEnable or not proxyOverride:
1333 return 0
1334 # try to make a host list from name and IP address.
1335 host = [host]
1336 try:
1337 addr = socket.gethostbyname(host[0])
1338 if addr != host:
1339 host.append(addr)
1340 except socket.error:
1341 pass
1342 # make a check value list from the registry entry: replace the
1343 # '<local>' string by the localhost entry and the corresponding
1344 # canonical entry.
1345 proxyOverride = proxyOverride.split(';')
1346 i = 0
1347 while i < len(proxyOverride):
1348 if proxyOverride[i] == '<local>':
1349 proxyOverride[i:i+1] = ['localhost',
1350 '127.0.0.1',
1351 socket.gethostname(),
1352 socket.gethostbyname(
1353 socket.gethostname())]
1354 i += 1
1355 # print proxyOverride
1356 # now check if we match one of the registry values.
1357 for test in proxyOverride:
Tim Petersab9ba272001-08-09 21:40:30 +00001358 test = test.replace(".", r"\.") # mask dots
1359 test = test.replace("*", r".*") # change glob sequence
1360 test = test.replace("?", r".") # change glob char
Tim Peters55c12d42001-08-09 18:04:14 +00001361 for val in host:
1362 # print "%s <--> %s" %( test, val )
1363 if re.match(test, val, re.I):
1364 return 1
1365 return 0
1366
Mark Hammond4f570b92000-07-26 07:04:38 +00001367else:
1368 # By default use environment variables
1369 getproxies = getproxies_environment
1370
Tim Peters55c12d42001-08-09 18:04:14 +00001371 def proxy_bypass(host):
1372 return 0
Guido van Rossum442e7201996-03-20 15:33:11 +00001373
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001374# Test and time quote() and unquote()
1375def test1():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001376 s = ''
1377 for i in range(256): s = s + chr(i)
1378 s = s*4
1379 t0 = time.time()
1380 qs = quote(s)
1381 uqs = unquote(qs)
1382 t1 = time.time()
1383 if uqs != s:
1384 print 'Wrong!'
1385 print `s`
1386 print `qs`
1387 print `uqs`
1388 print round(t1 - t0, 3), 'sec'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001389
1390
Guido van Rossum9ab96d41998-09-28 14:07:00 +00001391def reporthook(blocknum, blocksize, totalsize):
1392 # Report during remote transfers
Guido van Rossumb2493f82000-12-15 15:01:37 +00001393 print "Block number: %d, Block size: %d, Total size: %d" % (
1394 blocknum, blocksize, totalsize)
Guido van Rossum9ab96d41998-09-28 14:07:00 +00001395
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001396# Test program
Guido van Rossum23490151998-06-25 02:39:00 +00001397def test(args=[]):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001398 if not args:
1399 args = [
1400 '/etc/passwd',
1401 'file:/etc/passwd',
1402 'file://localhost/etc/passwd',
Andrew M. Kuchling56a42352002-03-18 22:18:46 +00001403 'ftp://ftp.python.org/pub/python/README',
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001404## 'gopher://gopher.micro.umn.edu/1/',
1405 'http://www.python.org/index.html',
1406 ]
Guido van Rossum09c8b6c1999-12-07 21:37:17 +00001407 if hasattr(URLopener, "open_https"):
1408 args.append('https://synergy.as.cmu.edu/~geek/')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001409 try:
1410 for url in args:
1411 print '-'*10, url, '-'*10
1412 fn, h = urlretrieve(url, None, reporthook)
Guido van Rossumb2493f82000-12-15 15:01:37 +00001413 print fn
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001414 if h:
1415 print '======'
1416 for k in h.keys(): print k + ':', h[k]
1417 print '======'
1418 fp = open(fn, 'rb')
1419 data = fp.read()
1420 del fp
1421 if '\r' in data:
1422 table = string.maketrans("", "")
Guido van Rossumb2493f82000-12-15 15:01:37 +00001423 data = data.translate(table, "\r")
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001424 print data
1425 fn, h = None, None
1426 print '-'*40
1427 finally:
1428 urlcleanup()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001429
Guido van Rossum23490151998-06-25 02:39:00 +00001430def main():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001431 import getopt, sys
1432 try:
1433 opts, args = getopt.getopt(sys.argv[1:], "th")
1434 except getopt.error, msg:
1435 print msg
1436 print "Use -h for help"
1437 return
1438 t = 0
1439 for o, a in opts:
1440 if o == '-t':
1441 t = t + 1
1442 if o == '-h':
1443 print "Usage: python urllib.py [-t] [url ...]"
1444 print "-t runs self-test;",
1445 print "otherwise, contents of urls are printed"
1446 return
1447 if t:
1448 if t > 1:
1449 test1()
1450 test(args)
1451 else:
1452 if not args:
1453 print "Use -h for help"
1454 for url in args:
1455 print urlopen(url).read(),
Guido van Rossum23490151998-06-25 02:39:00 +00001456
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001457# Run test program when run as a script
1458if __name__ == '__main__':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001459 main()