blob: a255956efd2914dbeb67bde87365146969515d2c [file] [log] [blame]
Guido van Rossume7b146f2000-02-04 15:28:42 +00001"""Open an arbitrary URL.
2
3See the following document for more info on URLs:
4"Names and Addresses, URIs, URLs, URNs, URCs", at
5http://www.w3.org/pub/WWW/Addressing/Overview.html
6
7See also the HTTP spec (from which the error codes are derived):
8"HTTP - Hypertext Transfer Protocol", at
9http://www.w3.org/pub/WWW/Protocols/
10
11Related standards and specs:
12- RFC1808: the "relative URL" spec. (authoritative status)
13- RFC1738 - the "URL standard". (authoritative status)
14- RFC1630 - the "URI spec". (informational status)
15
16The object returned by URLopener().open(file) will differ per
17protocol. All you know is that is has methods read(), readline(),
18readlines(), fileno(), close() and info(). The read*(), fileno()
Fredrik Lundhb49f88b2000-09-24 18:51:25 +000019and close() methods work like those of open files.
Guido van Rossume7b146f2000-02-04 15:28:42 +000020The info() method returns a mimetools.Message object which can be
21used to query various info about the object, if available.
22(mimetools.Message objects are queried with the getheader() method.)
23"""
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000024
Guido van Rossum7c395db1994-07-04 22:14:49 +000025import string
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000026import socket
Jack Jansendc3e3f61995-12-15 13:22:13 +000027import os
Guido van Rossumf0713d32001-08-09 17:43:35 +000028import stat
29import time
Guido van Rossum3c8484e1996-11-20 22:02:24 +000030import sys
Martin v. Löwis1d994332000-12-03 18:30:10 +000031import types
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000032
Skip Montanaro40fc1602001-03-01 04:27:19 +000033__all__ = ["urlopen", "URLopener", "FancyURLopener", "urlretrieve",
34 "urlcleanup", "quote", "quote_plus", "unquote", "unquote_plus",
Skip Montanaro44d5e0c2001-03-13 19:47:16 +000035 "urlencode", "url2pathname", "pathname2url", "splittag",
36 "localhost", "thishost", "ftperrors", "basejoin", "unwrap",
37 "splittype", "splithost", "splituser", "splitpasswd", "splitport",
38 "splitnport", "splitquery", "splitattr", "splitvalue",
39 "splitgophertype", "getproxies"]
Skip Montanaro40fc1602001-03-01 04:27:19 +000040
Guido van Rossumb2493f82000-12-15 15:01:37 +000041__version__ = '1.15' # XXX This version is not always updated :-(
Guido van Rossumf668d171997-06-06 21:11:11 +000042
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000043MAXFTPCACHE = 10 # Trim the ftp cache beyond this size
Guido van Rossum6cb15a01995-06-22 19:00:13 +000044
Jack Jansendc3e3f61995-12-15 13:22:13 +000045# Helper for non-unix systems
46if os.name == 'mac':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000047 from macurl2path import url2pathname, pathname2url
Guido van Rossum7e7ca0b1998-03-26 21:01:39 +000048elif os.name == 'nt':
Fredrik Lundhb49f88b2000-09-24 18:51:25 +000049 from nturl2path import url2pathname, pathname2url
Guido van Rossumd74fb6b2001-03-02 06:43:49 +000050elif os.name == 'riscos':
51 from rourl2path import url2pathname, pathname2url
Jack Jansendc3e3f61995-12-15 13:22:13 +000052else:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000053 def url2pathname(pathname):
Guido van Rossum367ac801999-03-12 14:31:10 +000054 return unquote(pathname)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000055 def pathname2url(pathname):
Guido van Rossum367ac801999-03-12 14:31:10 +000056 return quote(pathname)
Guido van Rossum33add0a1998-12-18 15:25:22 +000057
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000058# This really consists of two pieces:
59# (1) a class which handles opening of all sorts of URLs
60# (plus assorted utilities etc.)
61# (2) a set of functions for parsing URLs
62# XXX Should these be separated out into different modules?
63
64
65# Shortcut for basic usage
66_urlopener = None
Guido van Rossumbd013741996-12-10 16:00:28 +000067def urlopen(url, data=None):
Skip Montanaro79f1c172000-08-22 03:00:52 +000068 """urlopen(url [, data]) -> open file-like object"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000069 global _urlopener
70 if not _urlopener:
71 _urlopener = FancyURLopener()
72 if data is None:
73 return _urlopener.open(url)
74 else:
75 return _urlopener.open(url, data)
Fred Drake316a7932000-08-24 01:01:26 +000076def urlretrieve(url, filename=None, reporthook=None, data=None):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000077 global _urlopener
78 if not _urlopener:
79 _urlopener = FancyURLopener()
Fred Drake316a7932000-08-24 01:01:26 +000080 return _urlopener.retrieve(url, filename, reporthook, data)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000081def urlcleanup():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000082 if _urlopener:
83 _urlopener.cleanup()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000084
85
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000086ftpcache = {}
87class URLopener:
Guido van Rossume7b146f2000-02-04 15:28:42 +000088 """Class to open URLs.
89 This is a class rather than just a subroutine because we may need
90 more than one set of global protocol-specific options.
91 Note -- this is a base class for those who don't want the
92 automatic handling of errors type 302 (relocated) and 401
93 (authorization needed)."""
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000094
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000095 __tempfiles = None
Guido van Rossum29e77811996-11-27 19:39:58 +000096
Guido van Rossumba311382000-08-24 16:18:04 +000097 version = "Python-urllib/%s" % __version__
98
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000099 # Constructor
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000100 def __init__(self, proxies=None, **x509):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000101 if proxies is None:
102 proxies = getproxies()
103 assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
104 self.proxies = proxies
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000105 self.key_file = x509.get('key_file')
106 self.cert_file = x509.get('cert_file')
Guido van Rossumba311382000-08-24 16:18:04 +0000107 self.addheaders = [('User-agent', self.version)]
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000108 self.__tempfiles = []
109 self.__unlink = os.unlink # See cleanup()
110 self.tempcache = None
111 # Undocumented feature: if you assign {} to tempcache,
112 # it is used to cache files retrieved with
113 # self.retrieve(). This is not enabled by default
114 # since it does not work for changing documents (and I
115 # haven't got the logic to check expiration headers
116 # yet).
117 self.ftpcache = ftpcache
118 # Undocumented feature: you can use a different
119 # ftp cache by assigning to the .ftpcache member;
120 # in case you want logically independent URL openers
121 # XXX This is not threadsafe. Bah.
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000122
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000123 def __del__(self):
124 self.close()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000125
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000126 def close(self):
127 self.cleanup()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000128
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000129 def cleanup(self):
130 # This code sometimes runs when the rest of this module
131 # has already been deleted, so it can't use any globals
132 # or import anything.
133 if self.__tempfiles:
134 for file in self.__tempfiles:
135 try:
136 self.__unlink(file)
Martin v. Löwis58682b72001-08-11 15:02:57 +0000137 except OSError:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000138 pass
139 del self.__tempfiles[:]
140 if self.tempcache:
141 self.tempcache.clear()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000142
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000143 def addheader(self, *args):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000144 """Add a header to be used by the HTTP interface only
145 e.g. u.addheader('Accept', 'sound/basic')"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000146 self.addheaders.append(args)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000147
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000148 # External interface
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000149 def open(self, fullurl, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000150 """Use URLopener().open(file) instead of open(file, 'r')."""
Martin v. Löwis1d994332000-12-03 18:30:10 +0000151 fullurl = unwrap(toBytes(fullurl))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000152 if self.tempcache and self.tempcache.has_key(fullurl):
153 filename, headers = self.tempcache[fullurl]
154 fp = open(filename, 'rb')
155 return addinfourl(fp, headers, fullurl)
Martin v. Löwis1d994332000-12-03 18:30:10 +0000156 urltype, url = splittype(fullurl)
157 if not urltype:
158 urltype = 'file'
159 if self.proxies.has_key(urltype):
160 proxy = self.proxies[urltype]
161 urltype, proxyhost = splittype(proxy)
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000162 host, selector = splithost(proxyhost)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000163 url = (host, fullurl) # Signal special case to open_*()
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000164 else:
165 proxy = None
Martin v. Löwis1d994332000-12-03 18:30:10 +0000166 name = 'open_' + urltype
167 self.type = urltype
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000168 if '-' in name:
169 # replace - with _
Guido van Rossumb2493f82000-12-15 15:01:37 +0000170 name = '_'.join(name.split('-'))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000171 if not hasattr(self, name):
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000172 if proxy:
173 return self.open_unknown_proxy(proxy, fullurl, data)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000174 else:
175 return self.open_unknown(fullurl, data)
176 try:
177 if data is None:
178 return getattr(self, name)(url)
179 else:
180 return getattr(self, name)(url, data)
181 except socket.error, msg:
182 raise IOError, ('socket error', msg), sys.exc_info()[2]
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000183
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000184 def open_unknown(self, fullurl, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000185 """Overridable interface to open unknown URL type."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000186 type, url = splittype(fullurl)
187 raise IOError, ('url error', 'unknown url type', type)
Guido van Rossumca445401995-08-29 19:19:12 +0000188
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000189 def open_unknown_proxy(self, proxy, fullurl, data=None):
190 """Overridable interface to open unknown URL type."""
191 type, url = splittype(fullurl)
192 raise IOError, ('url error', 'invalid proxy for %s' % type, proxy)
193
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000194 # External interface
Sjoerd Mullenderd7b86f02000-08-25 11:23:36 +0000195 def retrieve(self, url, filename=None, reporthook=None, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000196 """retrieve(url) returns (filename, None) for a local object
197 or (tempfilename, headers) for a remote object."""
Martin v. Löwis1d994332000-12-03 18:30:10 +0000198 url = unwrap(toBytes(url))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000199 if self.tempcache and self.tempcache.has_key(url):
200 return self.tempcache[url]
201 type, url1 = splittype(url)
202 if not filename and (not type or type == 'file'):
203 try:
204 fp = self.open_local_file(url1)
205 hdrs = fp.info()
206 del fp
207 return url2pathname(splithost(url1)[1]), hdrs
208 except IOError, msg:
209 pass
Fred Drake316a7932000-08-24 01:01:26 +0000210 fp = self.open(url, data)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000211 headers = fp.info()
212 if not filename:
213 import tempfile
214 garbage, path = splittype(url)
215 garbage, path = splithost(path or "")
216 path, garbage = splitquery(path or "")
217 path, garbage = splitattr(path or "")
218 suffix = os.path.splitext(path)[1]
219 filename = tempfile.mktemp(suffix)
220 self.__tempfiles.append(filename)
221 result = filename, headers
222 if self.tempcache is not None:
223 self.tempcache[url] = result
224 tfp = open(filename, 'wb')
225 bs = 1024*8
226 size = -1
227 blocknum = 1
228 if reporthook:
229 if headers.has_key("content-length"):
230 size = int(headers["Content-Length"])
231 reporthook(0, bs, size)
232 block = fp.read(bs)
233 if reporthook:
234 reporthook(1, bs, size)
235 while block:
236 tfp.write(block)
237 block = fp.read(bs)
238 blocknum = blocknum + 1
239 if reporthook:
240 reporthook(blocknum, bs, size)
241 fp.close()
242 tfp.close()
243 del fp
244 del tfp
245 return result
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000246
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000247 # Each method named open_<type> knows how to open that type of URL
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000248
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000249 def open_http(self, url, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000250 """Use HTTP protocol."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000251 import httplib
252 user_passwd = None
Martin v. Löwis1d994332000-12-03 18:30:10 +0000253 if type(url) is types.StringType:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000254 host, selector = splithost(url)
255 if host:
256 user_passwd, host = splituser(host)
257 host = unquote(host)
258 realhost = host
259 else:
260 host, selector = url
261 urltype, rest = splittype(selector)
262 url = rest
263 user_passwd = None
Guido van Rossumb2493f82000-12-15 15:01:37 +0000264 if urltype.lower() != 'http':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000265 realhost = None
266 else:
267 realhost, rest = splithost(rest)
268 if realhost:
269 user_passwd, realhost = splituser(realhost)
270 if user_passwd:
271 selector = "%s://%s%s" % (urltype, realhost, rest)
Tim Peters55c12d42001-08-09 18:04:14 +0000272 if proxy_bypass(realhost):
273 host = realhost
274
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000275 #print "proxy via http:", host, selector
276 if not host: raise IOError, ('http error', 'no host given')
277 if user_passwd:
278 import base64
Guido van Rossumb2493f82000-12-15 15:01:37 +0000279 auth = base64.encodestring(user_passwd).strip()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000280 else:
281 auth = None
282 h = httplib.HTTP(host)
283 if data is not None:
284 h.putrequest('POST', selector)
285 h.putheader('Content-type', 'application/x-www-form-urlencoded')
286 h.putheader('Content-length', '%d' % len(data))
287 else:
288 h.putrequest('GET', selector)
289 if auth: h.putheader('Authorization', 'Basic %s' % auth)
290 if realhost: h.putheader('Host', realhost)
291 for args in self.addheaders: apply(h.putheader, args)
292 h.endheaders()
293 if data is not None:
Fred Drakeec3dfde2001-07-04 05:18:29 +0000294 h.send(data)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000295 errcode, errmsg, headers = h.getreply()
296 fp = h.getfile()
297 if errcode == 200:
298 return addinfourl(fp, headers, "http:" + url)
299 else:
300 if data is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000301 return self.http_error(url, fp, errcode, errmsg, headers)
Guido van Rossum29aab751999-03-09 19:31:21 +0000302 else:
303 return self.http_error(url, fp, errcode, errmsg, headers, data)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000304
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000305 def http_error(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000306 """Handle http errors.
307 Derived class can override this, or provide specific handlers
308 named http_error_DDD where DDD is the 3-digit error code."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000309 # First check if there's a specific handler for this error
310 name = 'http_error_%d' % errcode
311 if hasattr(self, name):
312 method = getattr(self, name)
313 if data is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000314 result = method(url, fp, errcode, errmsg, headers)
Jeremy Hyltonb30f52a1999-02-25 16:14:58 +0000315 else:
316 result = method(url, fp, errcode, errmsg, headers, data)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000317 if result: return result
Jeremy Hyltonb30f52a1999-02-25 16:14:58 +0000318 return self.http_error_default(url, fp, errcode, errmsg, headers)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000319
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000320 def http_error_default(self, url, fp, errcode, errmsg, headers):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000321 """Default error handler: close the connection and raise IOError."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000322 void = fp.read()
323 fp.close()
324 raise IOError, ('http error', errcode, errmsg, headers)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000325
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000326 if hasattr(socket, "ssl"):
Andrew M. Kuchling141e9892000-04-23 02:53:11 +0000327 def open_https(self, url, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000328 """Use HTTPS protocol."""
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000329 import httplib
Fred Drake567ca8e2000-08-21 21:42:42 +0000330 user_passwd = None
Moshe Zadkab2a0a832001-01-08 07:09:25 +0000331 if type(url) is types.StringType:
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000332 host, selector = splithost(url)
Fred Drake567ca8e2000-08-21 21:42:42 +0000333 if host:
334 user_passwd, host = splituser(host)
335 host = unquote(host)
336 realhost = host
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000337 else:
338 host, selector = url
339 urltype, rest = splittype(selector)
Fred Drake567ca8e2000-08-21 21:42:42 +0000340 url = rest
341 user_passwd = None
Guido van Rossumb2493f82000-12-15 15:01:37 +0000342 if urltype.lower() != 'https':
Fred Drake567ca8e2000-08-21 21:42:42 +0000343 realhost = None
344 else:
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000345 realhost, rest = splithost(rest)
Fred Drake567ca8e2000-08-21 21:42:42 +0000346 if realhost:
347 user_passwd, realhost = splituser(realhost)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000348 if user_passwd:
349 selector = "%s://%s%s" % (urltype, realhost, rest)
Andrew M. Kuchling7ad47922000-06-10 01:41:48 +0000350 #print "proxy via https:", host, selector
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000351 if not host: raise IOError, ('https error', 'no host given')
352 if user_passwd:
353 import base64
Guido van Rossumb2493f82000-12-15 15:01:37 +0000354 auth = base64.encodestring(user_passwd).strip()
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000355 else:
356 auth = None
357 h = httplib.HTTPS(host, 0,
358 key_file=self.key_file,
359 cert_file=self.cert_file)
Andrew M. Kuchling141e9892000-04-23 02:53:11 +0000360 if data is not None:
361 h.putrequest('POST', selector)
362 h.putheader('Content-type',
363 'application/x-www-form-urlencoded')
364 h.putheader('Content-length', '%d' % len(data))
365 else:
366 h.putrequest('GET', selector)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000367 if auth: h.putheader('Authorization: Basic %s' % auth)
Fred Drake567ca8e2000-08-21 21:42:42 +0000368 if realhost: h.putheader('Host', realhost)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000369 for args in self.addheaders: apply(h.putheader, args)
370 h.endheaders()
Andrew M. Kuchling43c5af02000-04-24 14:17:06 +0000371 if data is not None:
Fred Drakeec3dfde2001-07-04 05:18:29 +0000372 h.send(data)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000373 errcode, errmsg, headers = h.getreply()
374 fp = h.getfile()
375 if errcode == 200:
376 return addinfourl(fp, headers, url)
377 else:
Fred Drake567ca8e2000-08-21 21:42:42 +0000378 if data is None:
379 return self.http_error(url, fp, errcode, errmsg, headers)
380 else:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000381 return self.http_error(url, fp, errcode, errmsg, headers,
382 data)
Fred Drake567ca8e2000-08-21 21:42:42 +0000383
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000384 def open_gopher(self, url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000385 """Use Gopher protocol."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000386 import gopherlib
387 host, selector = splithost(url)
388 if not host: raise IOError, ('gopher error', 'no host given')
389 host = unquote(host)
390 type, selector = splitgophertype(selector)
391 selector, query = splitquery(selector)
392 selector = unquote(selector)
393 if query:
394 query = unquote(query)
395 fp = gopherlib.send_query(selector, query, host)
396 else:
397 fp = gopherlib.send_selector(selector, host)
398 return addinfourl(fp, noheaders(), "gopher:" + url)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000399
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000400 def open_file(self, url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000401 """Use local file or FTP depending on form of URL."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000402 if url[:2] == '//' and url[2:3] != '/':
403 return self.open_ftp(url)
404 else:
405 return self.open_local_file(url)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000406
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000407 def open_local_file(self, url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000408 """Use local file."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000409 import mimetypes, mimetools, StringIO
Guido van Rossumf0713d32001-08-09 17:43:35 +0000410 host, file = splithost(url)
411 localname = url2pathname(file)
412 stats = os.stat(localname)
413 size = stats[stat.ST_SIZE]
414 modified = time.gmtime(stats[stat.ST_MTIME])
415 modified = "%s, %02d %s %04d %02d:%02d:%02d GMT" % (
416 ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"][modified[6]],
417 modified[2],
418 ["Jan", "Feb", "Mar", "Apr", "May", "Jun",
419 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"][modified[1]-1],
420 modified[0], modified[3], modified[4], modified[5])
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000421 mtype = mimetypes.guess_type(url)[0]
422 headers = mimetools.Message(StringIO.StringIO(
Guido van Rossumf0713d32001-08-09 17:43:35 +0000423 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
424 (mtype or 'text/plain', size, modified)))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000425 if not host:
Guido van Rossum336a2011999-06-24 15:27:36 +0000426 urlfile = file
427 if file[:1] == '/':
428 urlfile = 'file://' + file
Guido van Rossumf0713d32001-08-09 17:43:35 +0000429 return addinfourl(open(localname, 'rb'),
Guido van Rossum336a2011999-06-24 15:27:36 +0000430 headers, urlfile)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000431 host, port = splitport(host)
432 if not port \
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000433 and socket.gethostbyname(host) in (localhost(), thishost()):
Guido van Rossum336a2011999-06-24 15:27:36 +0000434 urlfile = file
435 if file[:1] == '/':
436 urlfile = 'file://' + file
Guido van Rossumf0713d32001-08-09 17:43:35 +0000437 return addinfourl(open(localname, 'rb'),
Guido van Rossum336a2011999-06-24 15:27:36 +0000438 headers, urlfile)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000439 raise IOError, ('local file error', 'not on local host')
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000440
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000441 def open_ftp(self, url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000442 """Use FTP protocol."""
Guido van Rossum88e0b5b2001-08-23 13:38:15 +0000443 import mimetypes, mimetools, StringIO
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000444 host, path = splithost(url)
445 if not host: raise IOError, ('ftp error', 'no host given')
446 host, port = splitport(host)
447 user, host = splituser(host)
448 if user: user, passwd = splitpasswd(user)
449 else: passwd = None
450 host = unquote(host)
451 user = unquote(user or '')
452 passwd = unquote(passwd or '')
453 host = socket.gethostbyname(host)
454 if not port:
455 import ftplib
456 port = ftplib.FTP_PORT
457 else:
458 port = int(port)
459 path, attrs = splitattr(path)
460 path = unquote(path)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000461 dirs = path.split('/')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000462 dirs, file = dirs[:-1], dirs[-1]
463 if dirs and not dirs[0]: dirs = dirs[1:]
Guido van Rossum5e006a31999-08-18 17:40:33 +0000464 if dirs and not dirs[0]: dirs[0] = '/'
Guido van Rossumb2493f82000-12-15 15:01:37 +0000465 key = user, host, port, '/'.join(dirs)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000466 # XXX thread unsafe!
467 if len(self.ftpcache) > MAXFTPCACHE:
468 # Prune the cache, rather arbitrarily
469 for k in self.ftpcache.keys():
470 if k != key:
471 v = self.ftpcache[k]
472 del self.ftpcache[k]
473 v.close()
474 try:
475 if not self.ftpcache.has_key(key):
476 self.ftpcache[key] = \
477 ftpwrapper(user, passwd, host, port, dirs)
478 if not file: type = 'D'
479 else: type = 'I'
480 for attr in attrs:
481 attr, value = splitvalue(attr)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000482 if attr.lower() == 'type' and \
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000483 value in ('a', 'A', 'i', 'I', 'd', 'D'):
Guido van Rossumb2493f82000-12-15 15:01:37 +0000484 type = value.upper()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000485 (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
Guido van Rossum88e0b5b2001-08-23 13:38:15 +0000486 mtype = mimetypes.guess_type("ftp:" + url)[0]
487 headers = ""
488 if mtype:
489 headers += "Content-Type: %s\n" % mtype
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000490 if retrlen is not None and retrlen >= 0:
Guido van Rossum88e0b5b2001-08-23 13:38:15 +0000491 headers += "Content-Length: %d\n" % retrlen
492 headers = mimetools.Message(StringIO.StringIO(headers))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000493 return addinfourl(fp, headers, "ftp:" + url)
494 except ftperrors(), msg:
495 raise IOError, ('ftp error', msg), sys.exc_info()[2]
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000496
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000497 def open_data(self, url, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000498 """Use "data" URL."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000499 # ignore POSTed data
500 #
501 # syntax of data URLs:
502 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
503 # mediatype := [ type "/" subtype ] *( ";" parameter )
504 # data := *urlchar
505 # parameter := attribute "=" value
506 import StringIO, mimetools, time
507 try:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000508 [type, data] = url.split(',', 1)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000509 except ValueError:
510 raise IOError, ('data error', 'bad data URL')
511 if not type:
512 type = 'text/plain;charset=US-ASCII'
Guido van Rossumb2493f82000-12-15 15:01:37 +0000513 semi = type.rfind(';')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000514 if semi >= 0 and '=' not in type[semi:]:
515 encoding = type[semi+1:]
516 type = type[:semi]
517 else:
518 encoding = ''
519 msg = []
520 msg.append('Date: %s'%time.strftime('%a, %d %b %Y %T GMT',
521 time.gmtime(time.time())))
522 msg.append('Content-type: %s' % type)
523 if encoding == 'base64':
524 import base64
525 data = base64.decodestring(data)
526 else:
527 data = unquote(data)
528 msg.append('Content-length: %d' % len(data))
529 msg.append('')
530 msg.append(data)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000531 msg = '\n'.join(msg)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000532 f = StringIO.StringIO(msg)
533 headers = mimetools.Message(f, 0)
534 f.fileno = None # needed for addinfourl
535 return addinfourl(f, headers, url)
Guido van Rossum6d4d1c21998-03-12 14:32:55 +0000536
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000537
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000538class FancyURLopener(URLopener):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000539 """Derived class with handlers for errors we can handle (perhaps)."""
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000540
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000541 def __init__(self, *args):
542 apply(URLopener.__init__, (self,) + args)
543 self.auth_cache = {}
Skip Montanaroc3e11d62001-02-15 16:56:36 +0000544 self.tries = 0
545 self.maxtries = 10
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000546
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000547 def http_error_default(self, url, fp, errcode, errmsg, headers):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000548 """Default error handling -- don't raise an exception."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000549 return addinfourl(fp, headers, "http:" + url)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000550
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000551 def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000552 """Error 302 -- relocated (temporarily)."""
Skip Montanaroc3e11d62001-02-15 16:56:36 +0000553 self.tries += 1
554 if self.maxtries and self.tries >= self.maxtries:
555 if hasattr(self, "http_error_500"):
556 meth = self.http_error_500
557 else:
558 meth = self.http_error_default
559 self.tries = 0
560 return meth(url, fp, 500,
561 "Internal Server Error: Redirect Recursion", headers)
562 result = self.redirect_internal(url, fp, errcode, errmsg, headers,
563 data)
564 self.tries = 0
565 return result
566
567 def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000568 if headers.has_key('location'):
569 newurl = headers['location']
570 elif headers.has_key('uri'):
571 newurl = headers['uri']
572 else:
573 return
574 void = fp.read()
575 fp.close()
Guido van Rossum3527f591999-03-29 20:23:41 +0000576 # In case the server sent a relative URL, join with original:
Moshe Zadka5d87d472001-04-09 14:54:21 +0000577 newurl = basejoin(self.type + ":" + url, newurl)
Guido van Rossum3c8baed2000-02-01 23:36:55 +0000578 if data is None:
579 return self.open(newurl)
580 else:
581 return self.open(newurl, data)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000582
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000583 def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000584 """Error 301 -- also relocated (permanently)."""
585 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
Guido van Rossume6ad8911996-09-10 17:02:56 +0000586
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000587 def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000588 """Error 401 -- authentication required.
589 See this URL for a description of the basic authentication scheme:
590 http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt"""
Moshe Zadkae99bd172001-02-27 06:27:04 +0000591 if not headers.has_key('www-authenticate'):
Tim Peters85ba6732001-02-28 08:26:44 +0000592 URLopener.http_error_default(self, url, fp,
Moshe Zadkae99bd172001-02-27 06:27:04 +0000593 errmsg, headers)
594 stuff = headers['www-authenticate']
595 import re
596 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
597 if not match:
Tim Peters85ba6732001-02-28 08:26:44 +0000598 URLopener.http_error_default(self, url, fp,
Moshe Zadkae99bd172001-02-27 06:27:04 +0000599 errcode, errmsg, headers)
600 scheme, realm = match.groups()
601 if scheme.lower() != 'basic':
Tim Peters85ba6732001-02-28 08:26:44 +0000602 URLopener.http_error_default(self, url, fp,
Moshe Zadkae99bd172001-02-27 06:27:04 +0000603 errcode, errmsg, headers)
604 name = 'retry_' + self.type + '_basic_auth'
605 if data is None:
606 return getattr(self,name)(url, realm)
607 else:
608 return getattr(self,name)(url, realm, data)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000609
Guido van Rossum3c8baed2000-02-01 23:36:55 +0000610 def retry_http_basic_auth(self, url, realm, data=None):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000611 host, selector = splithost(url)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000612 i = host.find('@') + 1
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000613 host = host[i:]
614 user, passwd = self.get_user_passwd(host, realm, i)
615 if not (user or passwd): return None
Guido van Rossumafc4f042001-01-15 18:31:13 +0000616 host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000617 newurl = 'http://' + host + selector
Guido van Rossum3c8baed2000-02-01 23:36:55 +0000618 if data is None:
619 return self.open(newurl)
620 else:
621 return self.open(newurl, data)
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000622
Guido van Rossum3c8baed2000-02-01 23:36:55 +0000623 def retry_https_basic_auth(self, url, realm, data=None):
Tim Peterse1190062001-01-15 03:34:38 +0000624 host, selector = splithost(url)
625 i = host.find('@') + 1
626 host = host[i:]
627 user, passwd = self.get_user_passwd(host, realm, i)
628 if not (user or passwd): return None
Guido van Rossumafc4f042001-01-15 18:31:13 +0000629 host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
Tim Peterse1190062001-01-15 03:34:38 +0000630 newurl = '//' + host + selector
Guido van Rossumafc4f042001-01-15 18:31:13 +0000631 return self.open_https(newurl, data)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000632
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000633 def get_user_passwd(self, host, realm, clear_cache = 0):
Guido van Rossumb2493f82000-12-15 15:01:37 +0000634 key = realm + '@' + host.lower()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000635 if self.auth_cache.has_key(key):
636 if clear_cache:
637 del self.auth_cache[key]
638 else:
639 return self.auth_cache[key]
640 user, passwd = self.prompt_user_passwd(host, realm)
641 if user or passwd: self.auth_cache[key] = (user, passwd)
642 return user, passwd
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000643
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000644 def prompt_user_passwd(self, host, realm):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000645 """Override this in a GUI environment!"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000646 import getpass
647 try:
648 user = raw_input("Enter username for %s at %s: " % (realm,
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000649 host))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000650 passwd = getpass.getpass("Enter password for %s in %s at %s: " %
651 (user, realm, host))
652 return user, passwd
653 except KeyboardInterrupt:
654 print
655 return None, None
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000656
657
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000658# Utility functions
659
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000660_localhost = None
661def localhost():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000662 """Return the IP address of the magic hostname 'localhost'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000663 global _localhost
664 if not _localhost:
665 _localhost = socket.gethostbyname('localhost')
666 return _localhost
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000667
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000668_thishost = None
669def thishost():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000670 """Return the IP address of the current host."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000671 global _thishost
672 if not _thishost:
673 _thishost = socket.gethostbyname(socket.gethostname())
674 return _thishost
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000675
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000676_ftperrors = None
677def ftperrors():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000678 """Return the set of errors raised by the FTP class."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000679 global _ftperrors
680 if not _ftperrors:
681 import ftplib
682 _ftperrors = ftplib.all_errors
683 return _ftperrors
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000684
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000685_noheaders = None
686def noheaders():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000687 """Return an empty mimetools.Message object."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000688 global _noheaders
689 if not _noheaders:
690 import mimetools
691 import StringIO
692 _noheaders = mimetools.Message(StringIO.StringIO(), 0)
693 _noheaders.fp.close() # Recycle file descriptor
694 return _noheaders
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000695
696
697# Utility classes
698
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000699class ftpwrapper:
Guido van Rossume7b146f2000-02-04 15:28:42 +0000700 """Class used by open_ftp() for cache of open FTP connections."""
701
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000702 def __init__(self, user, passwd, host, port, dirs):
703 self.user = user
704 self.passwd = passwd
705 self.host = host
706 self.port = port
707 self.dirs = dirs
708 self.init()
Guido van Rossume7b146f2000-02-04 15:28:42 +0000709
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000710 def init(self):
711 import ftplib
712 self.busy = 0
713 self.ftp = ftplib.FTP()
714 self.ftp.connect(self.host, self.port)
715 self.ftp.login(self.user, self.passwd)
716 for dir in self.dirs:
717 self.ftp.cwd(dir)
Guido van Rossume7b146f2000-02-04 15:28:42 +0000718
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000719 def retrfile(self, file, type):
720 import ftplib
721 self.endtransfer()
722 if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
723 else: cmd = 'TYPE ' + type; isdir = 0
724 try:
725 self.ftp.voidcmd(cmd)
726 except ftplib.all_errors:
727 self.init()
728 self.ftp.voidcmd(cmd)
729 conn = None
730 if file and not isdir:
731 # Use nlst to see if the file exists at all
732 try:
733 self.ftp.nlst(file)
734 except ftplib.error_perm, reason:
735 raise IOError, ('ftp error', reason), sys.exc_info()[2]
736 # Restore the transfer mode!
737 self.ftp.voidcmd(cmd)
738 # Try to retrieve as a file
739 try:
740 cmd = 'RETR ' + file
741 conn = self.ftp.ntransfercmd(cmd)
742 except ftplib.error_perm, reason:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000743 if str(reason)[:3] != '550':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000744 raise IOError, ('ftp error', reason), sys.exc_info()[2]
745 if not conn:
746 # Set transfer mode to ASCII!
747 self.ftp.voidcmd('TYPE A')
748 # Try a directory listing
749 if file: cmd = 'LIST ' + file
750 else: cmd = 'LIST'
751 conn = self.ftp.ntransfercmd(cmd)
752 self.busy = 1
753 # Pass back both a suitably decorated object and a retrieval length
754 return (addclosehook(conn[0].makefile('rb'),
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000755 self.endtransfer), conn[1])
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000756 def endtransfer(self):
757 if not self.busy:
758 return
759 self.busy = 0
760 try:
761 self.ftp.voidresp()
762 except ftperrors():
763 pass
Guido van Rossume7b146f2000-02-04 15:28:42 +0000764
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000765 def close(self):
766 self.endtransfer()
767 try:
768 self.ftp.close()
769 except ftperrors():
770 pass
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000771
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000772class addbase:
Guido van Rossume7b146f2000-02-04 15:28:42 +0000773 """Base class for addinfo and addclosehook."""
774
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000775 def __init__(self, fp):
776 self.fp = fp
777 self.read = self.fp.read
778 self.readline = self.fp.readline
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000779 if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines
780 if hasattr(self.fp, "fileno"): self.fileno = self.fp.fileno
Guido van Rossume7b146f2000-02-04 15:28:42 +0000781
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000782 def __repr__(self):
783 return '<%s at %s whose fp = %s>' % (self.__class__.__name__,
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000784 `id(self)`, `self.fp`)
Guido van Rossume7b146f2000-02-04 15:28:42 +0000785
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000786 def close(self):
787 self.read = None
788 self.readline = None
789 self.readlines = None
790 self.fileno = None
791 if self.fp: self.fp.close()
792 self.fp = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000793
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000794class addclosehook(addbase):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000795 """Class to add a close hook to an open file."""
796
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000797 def __init__(self, fp, closehook, *hookargs):
798 addbase.__init__(self, fp)
799 self.closehook = closehook
800 self.hookargs = hookargs
Guido van Rossume7b146f2000-02-04 15:28:42 +0000801
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000802 def close(self):
Guido van Rossumc580dae2000-05-24 13:21:46 +0000803 addbase.close(self)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000804 if self.closehook:
805 apply(self.closehook, self.hookargs)
806 self.closehook = None
807 self.hookargs = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000808
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000809class addinfo(addbase):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000810 """class to add an info() method to an open file."""
811
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000812 def __init__(self, fp, headers):
813 addbase.__init__(self, fp)
814 self.headers = headers
Guido van Rossume7b146f2000-02-04 15:28:42 +0000815
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000816 def info(self):
817 return self.headers
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000818
Guido van Rossume6ad8911996-09-10 17:02:56 +0000819class addinfourl(addbase):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000820 """class to add info() and geturl() methods to an open file."""
821
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000822 def __init__(self, fp, headers, url):
823 addbase.__init__(self, fp)
824 self.headers = headers
825 self.url = url
Guido van Rossume7b146f2000-02-04 15:28:42 +0000826
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000827 def info(self):
828 return self.headers
Guido van Rossume7b146f2000-02-04 15:28:42 +0000829
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000830 def geturl(self):
831 return self.url
Guido van Rossume6ad8911996-09-10 17:02:56 +0000832
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000833
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000834def basejoin(base, url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000835 """Utility to combine a URL with a base URL to form a new URL."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000836 type, path = splittype(url)
837 if type:
838 # if url is complete (i.e., it contains a type), return it
839 return url
840 host, path = splithost(path)
841 type, basepath = splittype(base) # inherit type from base
842 if host:
843 # if url contains host, just inherit type
844 if type: return type + '://' + host + path
845 else:
846 # no type inherited, so url must have started with //
847 # just return it
848 return url
849 host, basepath = splithost(basepath) # inherit host
Thomas Wouters7e474022000-07-16 12:04:32 +0000850 basepath, basetag = splittag(basepath) # remove extraneous cruft
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000851 basepath, basequery = splitquery(basepath) # idem
852 if path[:1] != '/':
853 # non-absolute path name
854 if path[:1] in ('#', '?'):
855 # path is just a tag or query, attach to basepath
856 i = len(basepath)
857 else:
858 # else replace last component
Guido van Rossumb2493f82000-12-15 15:01:37 +0000859 i = basepath.rfind('/')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000860 if i < 0:
861 # basepath not absolute
862 if host:
863 # host present, make absolute
864 basepath = '/'
865 else:
866 # else keep non-absolute
867 basepath = ''
868 else:
869 # remove last file component
870 basepath = basepath[:i+1]
871 # Interpret ../ (important because of symlinks)
872 while basepath and path[:3] == '../':
873 path = path[3:]
Guido van Rossumb2493f82000-12-15 15:01:37 +0000874 i = basepath[:-1].rfind('/')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000875 if i > 0:
876 basepath = basepath[:i+1]
877 elif i == 0:
878 basepath = '/'
879 break
880 else:
881 basepath = ''
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000882
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000883 path = basepath + path
Guido van Rossumb8bf3be2001-04-15 20:47:33 +0000884 if host and path and path[0] != '/':
885 path = '/' + path
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000886 if type and host: return type + '://' + host + path
887 elif type: return type + ':' + path
888 elif host: return '//' + host + path # don't know what this means
889 else: return path
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000890
891
Guido van Rossum7c395db1994-07-04 22:14:49 +0000892# Utilities to parse URLs (most of these return None for missing parts):
Sjoerd Mullendere0371b81995-11-10 10:36:07 +0000893# unwrap('<URL:type://host/path>') --> 'type://host/path'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000894# splittype('type:opaquestring') --> 'type', 'opaquestring'
895# splithost('//host[:port]/path') --> 'host[:port]', '/path'
Guido van Rossum7c395db1994-07-04 22:14:49 +0000896# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'
897# splitpasswd('user:passwd') -> 'user', 'passwd'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000898# splitport('host:port') --> 'host', 'port'
899# splitquery('/path?query') --> '/path', 'query'
900# splittag('/path#tag') --> '/path', 'tag'
Guido van Rossum7c395db1994-07-04 22:14:49 +0000901# splitattr('/path;attr1=value1;attr2=value2;...') ->
902# '/path', ['attr1=value1', 'attr2=value2', ...]
903# splitvalue('attr=value') --> 'attr', 'value'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000904# splitgophertype('/Xselector') --> 'X', 'selector'
905# unquote('abc%20def') -> 'abc def'
906# quote('abc def') -> 'abc%20def')
907
Martin v. Löwis1d994332000-12-03 18:30:10 +0000908def toBytes(url):
909 """toBytes(u"URL") --> 'URL'."""
910 # Most URL schemes require ASCII. If that changes, the conversion
911 # can be relaxed
912 if type(url) is types.UnicodeType:
913 try:
914 url = url.encode("ASCII")
915 except UnicodeError:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000916 raise UnicodeError("URL " + repr(url) +
917 " contains non-ASCII characters")
Martin v. Löwis1d994332000-12-03 18:30:10 +0000918 return url
919
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000920def unwrap(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000921 """unwrap('<URL:type://host/path>') --> 'type://host/path'."""
Guido van Rossumb2493f82000-12-15 15:01:37 +0000922 url = url.strip()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000923 if url[:1] == '<' and url[-1:] == '>':
Guido van Rossumb2493f82000-12-15 15:01:37 +0000924 url = url[1:-1].strip()
925 if url[:4] == 'URL:': url = url[4:].strip()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000926 return url
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000927
Guido van Rossum332e1441997-09-29 23:23:46 +0000928_typeprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000929def splittype(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000930 """splittype('type:opaquestring') --> 'type', 'opaquestring'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000931 global _typeprog
932 if _typeprog is None:
933 import re
934 _typeprog = re.compile('^([^/:]+):')
Guido van Rossum332e1441997-09-29 23:23:46 +0000935
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000936 match = _typeprog.match(url)
937 if match:
938 scheme = match.group(1)
Fred Drake9e94afd2000-07-01 07:03:30 +0000939 return scheme.lower(), url[len(scheme) + 1:]
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000940 return None, url
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000941
Guido van Rossum332e1441997-09-29 23:23:46 +0000942_hostprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000943def splithost(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000944 """splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000945 global _hostprog
946 if _hostprog is None:
947 import re
Guido van Rossum3427c1f1999-07-01 23:20:56 +0000948 _hostprog = re.compile('^//([^/]*)(.*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +0000949
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000950 match = _hostprog.match(url)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000951 if match: return match.group(1, 2)
952 return None, url
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000953
Guido van Rossum332e1441997-09-29 23:23:46 +0000954_userprog = None
Guido van Rossum7c395db1994-07-04 22:14:49 +0000955def splituser(host):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000956 """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000957 global _userprog
958 if _userprog is None:
959 import re
960 _userprog = re.compile('^([^@]*)@(.*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +0000961
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000962 match = _userprog.match(host)
Fred Drake567ca8e2000-08-21 21:42:42 +0000963 if match: return map(unquote, match.group(1, 2))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000964 return None, host
Guido van Rossum7c395db1994-07-04 22:14:49 +0000965
Guido van Rossum332e1441997-09-29 23:23:46 +0000966_passwdprog = None
Guido van Rossum7c395db1994-07-04 22:14:49 +0000967def splitpasswd(user):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000968 """splitpasswd('user:passwd') -> 'user', 'passwd'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000969 global _passwdprog
970 if _passwdprog is None:
971 import re
972 _passwdprog = re.compile('^([^:]*):(.*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +0000973
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000974 match = _passwdprog.match(user)
975 if match: return match.group(1, 2)
976 return user, None
Guido van Rossum7c395db1994-07-04 22:14:49 +0000977
Guido van Rossume7b146f2000-02-04 15:28:42 +0000978# splittag('/path#tag') --> '/path', 'tag'
Guido van Rossum332e1441997-09-29 23:23:46 +0000979_portprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000980def splitport(host):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000981 """splitport('host:port') --> 'host', 'port'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000982 global _portprog
983 if _portprog is None:
984 import re
985 _portprog = re.compile('^(.*):([0-9]+)$')
Guido van Rossum332e1441997-09-29 23:23:46 +0000986
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000987 match = _portprog.match(host)
988 if match: return match.group(1, 2)
989 return host, None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000990
Guido van Rossum332e1441997-09-29 23:23:46 +0000991_nportprog = None
Guido van Rossum53725a21996-06-13 19:12:35 +0000992def splitnport(host, defport=-1):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000993 """Split host and port, returning numeric port.
994 Return given default port if no ':' found; defaults to -1.
995 Return numerical port if a valid number are found after ':'.
996 Return None if ':' but not a valid number."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000997 global _nportprog
998 if _nportprog is None:
999 import re
1000 _nportprog = re.compile('^(.*):(.*)$')
Guido van Rossum7e7ca0b1998-03-26 21:01:39 +00001001
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001002 match = _nportprog.match(host)
1003 if match:
1004 host, port = match.group(1, 2)
1005 try:
Guido van Rossumb2493f82000-12-15 15:01:37 +00001006 if not port: raise ValueError, "no digits"
1007 nport = int(port)
1008 except ValueError:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001009 nport = None
1010 return host, nport
1011 return host, defport
Guido van Rossum53725a21996-06-13 19:12:35 +00001012
Guido van Rossum332e1441997-09-29 23:23:46 +00001013_queryprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001014def splitquery(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001015 """splitquery('/path?query') --> '/path', 'query'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001016 global _queryprog
1017 if _queryprog is None:
1018 import re
1019 _queryprog = re.compile('^(.*)\?([^?]*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +00001020
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001021 match = _queryprog.match(url)
1022 if match: return match.group(1, 2)
1023 return url, None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001024
Guido van Rossum332e1441997-09-29 23:23:46 +00001025_tagprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001026def splittag(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001027 """splittag('/path#tag') --> '/path', 'tag'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001028 global _tagprog
1029 if _tagprog is None:
1030 import re
1031 _tagprog = re.compile('^(.*)#([^#]*)$')
Guido van Rossum7e7ca0b1998-03-26 21:01:39 +00001032
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001033 match = _tagprog.match(url)
1034 if match: return match.group(1, 2)
1035 return url, None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001036
Guido van Rossum7c395db1994-07-04 22:14:49 +00001037def splitattr(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001038 """splitattr('/path;attr1=value1;attr2=value2;...') ->
1039 '/path', ['attr1=value1', 'attr2=value2', ...]."""
Guido van Rossumb2493f82000-12-15 15:01:37 +00001040 words = url.split(';')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001041 return words[0], words[1:]
Guido van Rossum7c395db1994-07-04 22:14:49 +00001042
Guido van Rossum332e1441997-09-29 23:23:46 +00001043_valueprog = None
Guido van Rossum7c395db1994-07-04 22:14:49 +00001044def splitvalue(attr):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001045 """splitvalue('attr=value') --> 'attr', 'value'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001046 global _valueprog
1047 if _valueprog is None:
1048 import re
1049 _valueprog = re.compile('^([^=]*)=(.*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +00001050
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001051 match = _valueprog.match(attr)
1052 if match: return match.group(1, 2)
1053 return attr, None
Guido van Rossum7c395db1994-07-04 22:14:49 +00001054
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001055def splitgophertype(selector):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001056 """splitgophertype('/Xselector') --> 'X', 'selector'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001057 if selector[:1] == '/' and selector[1:2]:
1058 return selector[1], selector[2:]
1059 return None, selector
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001060
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001061def unquote(s):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001062 """unquote('abc%20def') -> 'abc def'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001063 mychr = chr
Guido van Rossumb2493f82000-12-15 15:01:37 +00001064 myatoi = int
1065 list = s.split('%')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001066 res = [list[0]]
1067 myappend = res.append
1068 del list[0]
1069 for item in list:
1070 if item[1:2]:
1071 try:
1072 myappend(mychr(myatoi(item[:2], 16))
1073 + item[2:])
Martin v. Löwis58682b72001-08-11 15:02:57 +00001074 except ValueError:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001075 myappend('%' + item)
1076 else:
1077 myappend('%' + item)
Guido van Rossumb2493f82000-12-15 15:01:37 +00001078 return "".join(res)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001079
Guido van Rossum0564e121996-12-13 14:47:36 +00001080def unquote_plus(s):
Skip Montanaro79f1c172000-08-22 03:00:52 +00001081 """unquote('%7e/abc+def') -> '~/abc def'"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001082 if '+' in s:
1083 # replace '+' with ' '
Guido van Rossumb2493f82000-12-15 15:01:37 +00001084 s = ' '.join(s.split('+'))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001085 return unquote(s)
Guido van Rossum0564e121996-12-13 14:47:36 +00001086
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001087always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
Jeremy Hylton6102e292000-08-31 15:48:10 +00001088 'abcdefghijklmnopqrstuvwxyz'
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001089 '0123456789' '_.-')
1090
1091_fast_safe_test = always_safe + '/'
1092_fast_safe = None
1093
1094def _fast_quote(s):
1095 global _fast_safe
1096 if _fast_safe is None:
1097 _fast_safe = {}
1098 for c in _fast_safe_test:
1099 _fast_safe[c] = c
1100 res = list(s)
1101 for i in range(len(res)):
1102 c = res[i]
1103 if not _fast_safe.has_key(c):
Guido van Rossume27a7b82001-01-19 03:28:15 +00001104 res[i] = '%%%02X' % ord(c)
Guido van Rossumb2493f82000-12-15 15:01:37 +00001105 return ''.join(res)
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001106
Guido van Rossum7c395db1994-07-04 22:14:49 +00001107def quote(s, safe = '/'):
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001108 """quote('abc def') -> 'abc%20def'
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001109
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001110 Each part of a URL, e.g. the path info, the query, etc., has a
1111 different set of reserved characters that must be quoted.
1112
1113 RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
1114 the following reserved characters.
1115
1116 reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
1117 "$" | ","
1118
1119 Each of these characters is reserved in some component of a URL,
1120 but not necessarily in all of them.
1121
1122 By default, the quote function is intended for quoting the path
1123 section of a URL. Thus, it will not encode '/'. This character
1124 is reserved, but in typical usage the quote function is being
1125 called on a path where the existing slash characters are used as
1126 reserved characters.
1127 """
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001128 safe = always_safe + safe
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001129 if _fast_safe_test == safe:
1130 return _fast_quote(s)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001131 res = list(s)
1132 for i in range(len(res)):
1133 c = res[i]
1134 if c not in safe:
Guido van Rossume27a7b82001-01-19 03:28:15 +00001135 res[i] = '%%%02X' % ord(c)
Guido van Rossumb2493f82000-12-15 15:01:37 +00001136 return ''.join(res)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001137
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001138def quote_plus(s, safe = ''):
1139 """Quote the query fragment of a URL; replacing ' ' with '+'"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001140 if ' ' in s:
Guido van Rossumb2493f82000-12-15 15:01:37 +00001141 l = s.split(' ')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001142 for i in range(len(l)):
1143 l[i] = quote(l[i], safe)
Guido van Rossumb2493f82000-12-15 15:01:37 +00001144 return '+'.join(l)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001145 else:
1146 return quote(s, safe)
Guido van Rossum0564e121996-12-13 14:47:36 +00001147
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001148def urlencode(query,doseq=0):
1149 """Encode a sequence of two-element tuples or dictionary into a URL query string.
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001150
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001151 If any values in the query arg are sequences and doseq is true, each
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001152 sequence element is converted to a separate parameter.
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001153
1154 If the query arg is a sequence of two-element tuples, the order of the
1155 parameters in the output will match the order of parameters in the
1156 input.
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001157 """
Tim Peters658cba62001-02-09 20:06:00 +00001158
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001159 if hasattr(query,"items"):
1160 # mapping objects
1161 query = query.items()
1162 else:
1163 # it's a bother at times that strings and string-like objects are
1164 # sequences...
1165 try:
1166 # non-sequence items should not work with len()
1167 x = len(query)
1168 # non-empty strings will fail this
1169 if len(query) and type(query[0]) != types.TupleType:
1170 raise TypeError
1171 # zero-length sequences of all types will get here and succeed,
1172 # but that's a minor nit - since the original implementation
1173 # allowed empty dicts that type of behavior probably should be
1174 # preserved for consistency
1175 except TypeError:
1176 ty,va,tb = sys.exc_info()
1177 raise TypeError, "not a valid non-string sequence or mapping object", tb
1178
Guido van Rossume7b146f2000-02-04 15:28:42 +00001179 l = []
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001180 if not doseq:
1181 # preserve old behavior
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001182 for k, v in query:
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001183 k = quote_plus(str(k))
1184 v = quote_plus(str(v))
1185 l.append(k + '=' + v)
1186 else:
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001187 for k, v in query:
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001188 k = quote_plus(str(k))
1189 if type(v) == types.StringType:
1190 v = quote_plus(v)
1191 l.append(k + '=' + v)
1192 elif type(v) == types.UnicodeType:
1193 # is there a reasonable way to convert to ASCII?
1194 # encode generates a string, but "replace" or "ignore"
1195 # lose information and "strict" can raise UnicodeError
1196 v = quote_plus(v.encode("ASCII","replace"))
1197 l.append(k + '=' + v)
1198 else:
1199 try:
1200 # is this a sufficient test for sequence-ness?
1201 x = len(v)
1202 except TypeError:
1203 # not a sequence
1204 v = quote_plus(str(v))
1205 l.append(k + '=' + v)
1206 else:
1207 # loop over the sequence
1208 for elt in v:
1209 l.append(k + '=' + quote_plus(str(elt)))
Guido van Rossumb2493f82000-12-15 15:01:37 +00001210 return '&'.join(l)
Guido van Rossum810a3391998-07-22 21:33:23 +00001211
Guido van Rossum442e7201996-03-20 15:33:11 +00001212# Proxy handling
Mark Hammond4f570b92000-07-26 07:04:38 +00001213def getproxies_environment():
1214 """Return a dictionary of scheme -> proxy server URL mappings.
1215
1216 Scan the environment for variables named <scheme>_proxy;
1217 this seems to be the standard convention. If you need a
1218 different way, you can pass a proxies dictionary to the
1219 [Fancy]URLopener constructor.
1220
1221 """
1222 proxies = {}
1223 for name, value in os.environ.items():
Guido van Rossumb2493f82000-12-15 15:01:37 +00001224 name = name.lower()
Mark Hammond4f570b92000-07-26 07:04:38 +00001225 if value and name[-6:] == '_proxy':
1226 proxies[name[:-6]] = value
1227 return proxies
1228
Guido van Rossum4163e701998-08-06 13:39:09 +00001229if os.name == 'mac':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001230 def getproxies():
1231 """Return a dictionary of scheme -> proxy server URL mappings.
Guido van Rossum442e7201996-03-20 15:33:11 +00001232
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001233 By convention the mac uses Internet Config to store
1234 proxies. An HTTP proxy, for instance, is stored under
1235 the HttpProxy key.
Guido van Rossum442e7201996-03-20 15:33:11 +00001236
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001237 """
1238 try:
1239 import ic
1240 except ImportError:
1241 return {}
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001242
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001243 try:
1244 config = ic.IC()
1245 except ic.error:
1246 return {}
1247 proxies = {}
1248 # HTTP:
1249 if config.has_key('UseHTTPProxy') and config['UseHTTPProxy']:
1250 try:
1251 value = config['HTTPProxyHost']
1252 except ic.error:
1253 pass
1254 else:
1255 proxies['http'] = 'http://%s' % value
1256 # FTP: XXXX To be done.
1257 # Gopher: XXXX To be done.
1258 return proxies
Mark Hammond4f570b92000-07-26 07:04:38 +00001259
Tim Peters55c12d42001-08-09 18:04:14 +00001260 def proxy_bypass(x):
1261 return 0
1262
Mark Hammond4f570b92000-07-26 07:04:38 +00001263elif os.name == 'nt':
1264 def getproxies_registry():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001265 """Return a dictionary of scheme -> proxy server URL mappings.
Mark Hammond4f570b92000-07-26 07:04:38 +00001266
1267 Win32 uses the registry to store proxies.
1268
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001269 """
1270 proxies = {}
Mark Hammond4f570b92000-07-26 07:04:38 +00001271 try:
1272 import _winreg
1273 except ImportError:
1274 # Std module, so should be around - but you never know!
1275 return proxies
1276 try:
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001277 internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1278 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
Mark Hammond4f570b92000-07-26 07:04:38 +00001279 proxyEnable = _winreg.QueryValueEx(internetSettings,
1280 'ProxyEnable')[0]
1281 if proxyEnable:
1282 # Returned as Unicode but problems if not converted to ASCII
1283 proxyServer = str(_winreg.QueryValueEx(internetSettings,
1284 'ProxyServer')[0])
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001285 if '=' in proxyServer:
1286 # Per-protocol settings
Mark Hammond4f570b92000-07-26 07:04:38 +00001287 for p in proxyServer.split(';'):
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001288 protocol, address = p.split('=', 1)
Mark Hammond4f570b92000-07-26 07:04:38 +00001289 proxies[protocol] = '%s://%s' % (protocol, address)
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001290 else:
1291 # Use one setting for all protocols
1292 if proxyServer[:5] == 'http:':
1293 proxies['http'] = proxyServer
1294 else:
1295 proxies['http'] = 'http://%s' % proxyServer
1296 proxies['ftp'] = 'ftp://%s' % proxyServer
Mark Hammond4f570b92000-07-26 07:04:38 +00001297 internetSettings.Close()
1298 except (WindowsError, ValueError, TypeError):
1299 # Either registry key not found etc, or the value in an
1300 # unexpected format.
1301 # proxies already set up to be empty so nothing to do
1302 pass
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001303 return proxies
Guido van Rossum442e7201996-03-20 15:33:11 +00001304
Mark Hammond4f570b92000-07-26 07:04:38 +00001305 def getproxies():
1306 """Return a dictionary of scheme -> proxy server URL mappings.
1307
1308 Returns settings gathered from the environment, if specified,
1309 or the registry.
1310
1311 """
1312 return getproxies_environment() or getproxies_registry()
Tim Peters55c12d42001-08-09 18:04:14 +00001313
1314 def proxy_bypass(host):
1315 try:
1316 import _winreg
1317 import re
1318 import socket
1319 except ImportError:
1320 # Std modules, so should be around - but you never know!
1321 return 0
1322 try:
1323 internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1324 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1325 proxyEnable = _winreg.QueryValueEx(internetSettings,
1326 'ProxyEnable')[0]
1327 proxyOverride = str(_winreg.QueryValueEx(internetSettings,
1328 'ProxyOverride')[0])
1329 # ^^^^ Returned as Unicode but problems if not converted to ASCII
1330 except WindowsError:
1331 return 0
1332 if not proxyEnable or not proxyOverride:
1333 return 0
1334 # try to make a host list from name and IP address.
1335 host = [host]
1336 try:
1337 addr = socket.gethostbyname(host[0])
1338 if addr != host:
1339 host.append(addr)
1340 except socket.error:
1341 pass
1342 # make a check value list from the registry entry: replace the
1343 # '<local>' string by the localhost entry and the corresponding
1344 # canonical entry.
1345 proxyOverride = proxyOverride.split(';')
1346 i = 0
1347 while i < len(proxyOverride):
1348 if proxyOverride[i] == '<local>':
1349 proxyOverride[i:i+1] = ['localhost',
1350 '127.0.0.1',
1351 socket.gethostname(),
1352 socket.gethostbyname(
1353 socket.gethostname())]
1354 i += 1
1355 # print proxyOverride
1356 # now check if we match one of the registry values.
1357 for test in proxyOverride:
Tim Petersab9ba272001-08-09 21:40:30 +00001358 test = test.replace(".", r"\.") # mask dots
1359 test = test.replace("*", r".*") # change glob sequence
1360 test = test.replace("?", r".") # change glob char
Tim Peters55c12d42001-08-09 18:04:14 +00001361 for val in host:
1362 # print "%s <--> %s" %( test, val )
1363 if re.match(test, val, re.I):
1364 return 1
1365 return 0
1366
Mark Hammond4f570b92000-07-26 07:04:38 +00001367else:
1368 # By default use environment variables
1369 getproxies = getproxies_environment
1370
Tim Peters55c12d42001-08-09 18:04:14 +00001371 def proxy_bypass(host):
1372 return 0
Guido van Rossum442e7201996-03-20 15:33:11 +00001373
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001374# Test and time quote() and unquote()
1375def test1():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001376 import time
1377 s = ''
1378 for i in range(256): s = s + chr(i)
1379 s = s*4
1380 t0 = time.time()
1381 qs = quote(s)
1382 uqs = unquote(qs)
1383 t1 = time.time()
1384 if uqs != s:
1385 print 'Wrong!'
1386 print `s`
1387 print `qs`
1388 print `uqs`
1389 print round(t1 - t0, 3), 'sec'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001390
1391
Guido van Rossum9ab96d41998-09-28 14:07:00 +00001392def reporthook(blocknum, blocksize, totalsize):
1393 # Report during remote transfers
Guido van Rossumb2493f82000-12-15 15:01:37 +00001394 print "Block number: %d, Block size: %d, Total size: %d" % (
1395 blocknum, blocksize, totalsize)
Guido van Rossum9ab96d41998-09-28 14:07:00 +00001396
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001397# Test program
Guido van Rossum23490151998-06-25 02:39:00 +00001398def test(args=[]):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001399 if not args:
1400 args = [
1401 '/etc/passwd',
1402 'file:/etc/passwd',
1403 'file://localhost/etc/passwd',
1404 'ftp://ftp.python.org/etc/passwd',
1405## 'gopher://gopher.micro.umn.edu/1/',
1406 'http://www.python.org/index.html',
1407 ]
Guido van Rossum09c8b6c1999-12-07 21:37:17 +00001408 if hasattr(URLopener, "open_https"):
1409 args.append('https://synergy.as.cmu.edu/~geek/')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001410 try:
1411 for url in args:
1412 print '-'*10, url, '-'*10
1413 fn, h = urlretrieve(url, None, reporthook)
Guido van Rossumb2493f82000-12-15 15:01:37 +00001414 print fn
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001415 if h:
1416 print '======'
1417 for k in h.keys(): print k + ':', h[k]
1418 print '======'
1419 fp = open(fn, 'rb')
1420 data = fp.read()
1421 del fp
1422 if '\r' in data:
1423 table = string.maketrans("", "")
Guido van Rossumb2493f82000-12-15 15:01:37 +00001424 data = data.translate(table, "\r")
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001425 print data
1426 fn, h = None, None
1427 print '-'*40
1428 finally:
1429 urlcleanup()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001430
Guido van Rossum23490151998-06-25 02:39:00 +00001431def main():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001432 import getopt, sys
1433 try:
1434 opts, args = getopt.getopt(sys.argv[1:], "th")
1435 except getopt.error, msg:
1436 print msg
1437 print "Use -h for help"
1438 return
1439 t = 0
1440 for o, a in opts:
1441 if o == '-t':
1442 t = t + 1
1443 if o == '-h':
1444 print "Usage: python urllib.py [-t] [url ...]"
1445 print "-t runs self-test;",
1446 print "otherwise, contents of urls are printed"
1447 return
1448 if t:
1449 if t > 1:
1450 test1()
1451 test(args)
1452 else:
1453 if not args:
1454 print "Use -h for help"
1455 for url in args:
1456 print urlopen(url).read(),
Guido van Rossum23490151998-06-25 02:39:00 +00001457
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001458# Run test program when run as a script
1459if __name__ == '__main__':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001460 main()