blob: cffa02673d55e72ff27dd569ca178acf914689c1 [file] [log] [blame]
Guido van Rossume7b146f2000-02-04 15:28:42 +00001"""Open an arbitrary URL.
2
3See the following document for more info on URLs:
4"Names and Addresses, URIs, URLs, URNs, URCs", at
5http://www.w3.org/pub/WWW/Addressing/Overview.html
6
7See also the HTTP spec (from which the error codes are derived):
8"HTTP - Hypertext Transfer Protocol", at
9http://www.w3.org/pub/WWW/Protocols/
10
11Related standards and specs:
12- RFC1808: the "relative URL" spec. (authoritative status)
13- RFC1738 - the "URL standard". (authoritative status)
14- RFC1630 - the "URI spec". (informational status)
15
16The object returned by URLopener().open(file) will differ per
17protocol. All you know is that is has methods read(), readline(),
18readlines(), fileno(), close() and info(). The read*(), fileno()
Fredrik Lundhb49f88b2000-09-24 18:51:25 +000019and close() methods work like those of open files.
Guido van Rossume7b146f2000-02-04 15:28:42 +000020The info() method returns a mimetools.Message object which can be
21used to query various info about the object, if available.
22(mimetools.Message objects are queried with the getheader() method.)
23"""
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000024
25import socket
Jack Jansendc3e3f61995-12-15 13:22:13 +000026import os
Guido van Rossumf0713d32001-08-09 17:43:35 +000027import time
Guido van Rossum3c8484e1996-11-20 22:02:24 +000028import sys
Brett Cannon69200fa2004-03-23 21:26:39 +000029from urlparse import urljoin as basejoin
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000030
Skip Montanaro40fc1602001-03-01 04:27:19 +000031__all__ = ["urlopen", "URLopener", "FancyURLopener", "urlretrieve",
32 "urlcleanup", "quote", "quote_plus", "unquote", "unquote_plus",
Skip Montanaro44d5e0c2001-03-13 19:47:16 +000033 "urlencode", "url2pathname", "pathname2url", "splittag",
34 "localhost", "thishost", "ftperrors", "basejoin", "unwrap",
35 "splittype", "splithost", "splituser", "splitpasswd", "splitport",
36 "splitnport", "splitquery", "splitattr", "splitvalue",
37 "splitgophertype", "getproxies"]
Skip Montanaro40fc1602001-03-01 04:27:19 +000038
Martin v. Löwis3e865952006-01-24 15:51:21 +000039__version__ = '1.17' # XXX This version is not always updated :-(
Guido van Rossumf668d171997-06-06 21:11:11 +000040
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000041MAXFTPCACHE = 10 # Trim the ftp cache beyond this size
Guido van Rossum6cb15a01995-06-22 19:00:13 +000042
Jack Jansendc3e3f61995-12-15 13:22:13 +000043# Helper for non-unix systems
44if os.name == 'mac':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000045 from macurl2path import url2pathname, pathname2url
Guido van Rossum7e7ca0b1998-03-26 21:01:39 +000046elif os.name == 'nt':
Fredrik Lundhb49f88b2000-09-24 18:51:25 +000047 from nturl2path import url2pathname, pathname2url
Guido van Rossumd74fb6b2001-03-02 06:43:49 +000048elif os.name == 'riscos':
49 from rourl2path import url2pathname, pathname2url
Jack Jansendc3e3f61995-12-15 13:22:13 +000050else:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000051 def url2pathname(pathname):
Georg Brandlc0b24732005-12-26 22:53:56 +000052 """OS-specific conversion from a relative URL of the 'file' scheme
53 to a file system path; not recommended for general use."""
Guido van Rossum367ac801999-03-12 14:31:10 +000054 return unquote(pathname)
Georg Brandlc0b24732005-12-26 22:53:56 +000055
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000056 def pathname2url(pathname):
Georg Brandlc0b24732005-12-26 22:53:56 +000057 """OS-specific conversion from a file system path to a relative URL
58 of the 'file' scheme; not recommended for general use."""
Guido van Rossum367ac801999-03-12 14:31:10 +000059 return quote(pathname)
Guido van Rossum33add0a1998-12-18 15:25:22 +000060
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000061# This really consists of two pieces:
62# (1) a class which handles opening of all sorts of URLs
63# (plus assorted utilities etc.)
64# (2) a set of functions for parsing URLs
65# XXX Should these be separated out into different modules?
66
67
68# Shortcut for basic usage
69_urlopener = None
Fred Drakedf6eca72002-04-04 20:41:34 +000070def urlopen(url, data=None, proxies=None):
Skip Montanaro79f1c172000-08-22 03:00:52 +000071 """urlopen(url [, data]) -> open file-like object"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000072 global _urlopener
Fred Drakedf6eca72002-04-04 20:41:34 +000073 if proxies is not None:
74 opener = FancyURLopener(proxies=proxies)
75 elif not _urlopener:
76 opener = FancyURLopener()
77 _urlopener = opener
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000078 else:
Fred Drakedf6eca72002-04-04 20:41:34 +000079 opener = _urlopener
80 if data is None:
81 return opener.open(url)
82 else:
83 return opener.open(url, data)
Fred Drake316a7932000-08-24 01:01:26 +000084def urlretrieve(url, filename=None, reporthook=None, data=None):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000085 global _urlopener
86 if not _urlopener:
87 _urlopener = FancyURLopener()
Fred Drake316a7932000-08-24 01:01:26 +000088 return _urlopener.retrieve(url, filename, reporthook, data)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000089def urlcleanup():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000090 if _urlopener:
91 _urlopener.cleanup()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000092
Georg Brandlb9256022005-08-24 18:46:39 +000093# exception raised when downloaded size does not match content-length
94class ContentTooShortError(IOError):
95 def __init__(self, message, content):
96 IOError.__init__(self, message)
97 self.content = content
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000098
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000099ftpcache = {}
100class URLopener:
Guido van Rossume7b146f2000-02-04 15:28:42 +0000101 """Class to open URLs.
102 This is a class rather than just a subroutine because we may need
103 more than one set of global protocol-specific options.
104 Note -- this is a base class for those who don't want the
105 automatic handling of errors type 302 (relocated) and 401
106 (authorization needed)."""
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000107
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000108 __tempfiles = None
Guido van Rossum29e77811996-11-27 19:39:58 +0000109
Guido van Rossumba311382000-08-24 16:18:04 +0000110 version = "Python-urllib/%s" % __version__
111
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000112 # Constructor
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000113 def __init__(self, proxies=None, **x509):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000114 if proxies is None:
115 proxies = getproxies()
Guido van Rossume2b70bc2006-08-18 22:13:04 +0000116 assert hasattr(proxies, 'keys'), "proxies must be a mapping"
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000117 self.proxies = proxies
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000118 self.key_file = x509.get('key_file')
119 self.cert_file = x509.get('cert_file')
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000120 self.addheaders = [('User-Agent', self.version)]
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000121 self.__tempfiles = []
122 self.__unlink = os.unlink # See cleanup()
123 self.tempcache = None
124 # Undocumented feature: if you assign {} to tempcache,
125 # it is used to cache files retrieved with
126 # self.retrieve(). This is not enabled by default
127 # since it does not work for changing documents (and I
128 # haven't got the logic to check expiration headers
129 # yet).
130 self.ftpcache = ftpcache
131 # Undocumented feature: you can use a different
132 # ftp cache by assigning to the .ftpcache member;
133 # in case you want logically independent URL openers
134 # XXX This is not threadsafe. Bah.
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000135
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000136 def __del__(self):
137 self.close()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000138
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000139 def close(self):
140 self.cleanup()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000141
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000142 def cleanup(self):
143 # This code sometimes runs when the rest of this module
144 # has already been deleted, so it can't use any globals
145 # or import anything.
146 if self.__tempfiles:
147 for file in self.__tempfiles:
148 try:
149 self.__unlink(file)
Martin v. Löwis58682b72001-08-11 15:02:57 +0000150 except OSError:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000151 pass
152 del self.__tempfiles[:]
153 if self.tempcache:
154 self.tempcache.clear()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000155
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000156 def addheader(self, *args):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000157 """Add a header to be used by the HTTP interface only
158 e.g. u.addheader('Accept', 'sound/basic')"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000159 self.addheaders.append(args)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000160
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000161 # External interface
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000162 def open(self, fullurl, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000163 """Use URLopener().open(file) instead of open(file, 'r')."""
Martin v. Löwis1d994332000-12-03 18:30:10 +0000164 fullurl = unwrap(toBytes(fullurl))
Raymond Hettinger54f02222002-06-01 14:18:47 +0000165 if self.tempcache and fullurl in self.tempcache:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000166 filename, headers = self.tempcache[fullurl]
167 fp = open(filename, 'rb')
168 return addinfourl(fp, headers, fullurl)
Martin v. Löwis1d994332000-12-03 18:30:10 +0000169 urltype, url = splittype(fullurl)
170 if not urltype:
171 urltype = 'file'
Raymond Hettinger54f02222002-06-01 14:18:47 +0000172 if urltype in self.proxies:
Martin v. Löwis1d994332000-12-03 18:30:10 +0000173 proxy = self.proxies[urltype]
174 urltype, proxyhost = splittype(proxy)
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000175 host, selector = splithost(proxyhost)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000176 url = (host, fullurl) # Signal special case to open_*()
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000177 else:
178 proxy = None
Martin v. Löwis1d994332000-12-03 18:30:10 +0000179 name = 'open_' + urltype
180 self.type = urltype
Brett Cannonaaeffaf2004-03-23 23:50:17 +0000181 name = name.replace('-', '_')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000182 if not hasattr(self, name):
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000183 if proxy:
184 return self.open_unknown_proxy(proxy, fullurl, data)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000185 else:
186 return self.open_unknown(fullurl, data)
187 try:
188 if data is None:
189 return getattr(self, name)(url)
190 else:
191 return getattr(self, name)(url, data)
Guido van Rossumb940e112007-01-10 16:19:56 +0000192 except socket.error as msg:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000193 raise IOError, ('socket error', msg), sys.exc_info()[2]
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000194
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000195 def open_unknown(self, fullurl, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000196 """Overridable interface to open unknown URL type."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000197 type, url = splittype(fullurl)
198 raise IOError, ('url error', 'unknown url type', type)
Guido van Rossumca445401995-08-29 19:19:12 +0000199
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000200 def open_unknown_proxy(self, proxy, fullurl, data=None):
201 """Overridable interface to open unknown URL type."""
202 type, url = splittype(fullurl)
203 raise IOError, ('url error', 'invalid proxy for %s' % type, proxy)
204
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000205 # External interface
Sjoerd Mullenderd7b86f02000-08-25 11:23:36 +0000206 def retrieve(self, url, filename=None, reporthook=None, data=None):
Brett Cannon7d618c72003-04-24 02:43:20 +0000207 """retrieve(url) returns (filename, headers) for a local object
Guido van Rossume7b146f2000-02-04 15:28:42 +0000208 or (tempfilename, headers) for a remote object."""
Martin v. Löwis1d994332000-12-03 18:30:10 +0000209 url = unwrap(toBytes(url))
Raymond Hettinger54f02222002-06-01 14:18:47 +0000210 if self.tempcache and url in self.tempcache:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000211 return self.tempcache[url]
212 type, url1 = splittype(url)
Raymond Hettinger10ff7062002-06-02 03:04:52 +0000213 if filename is None and (not type or type == 'file'):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000214 try:
215 fp = self.open_local_file(url1)
216 hdrs = fp.info()
217 del fp
218 return url2pathname(splithost(url1)[1]), hdrs
Guido van Rossumb940e112007-01-10 16:19:56 +0000219 except IOError as msg:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000220 pass
Fred Drake316a7932000-08-24 01:01:26 +0000221 fp = self.open(url, data)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000222 headers = fp.info()
Guido van Rossum3b0a3292002-08-09 16:38:32 +0000223 if filename:
224 tfp = open(filename, 'wb')
225 else:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000226 import tempfile
227 garbage, path = splittype(url)
228 garbage, path = splithost(path or "")
229 path, garbage = splitquery(path or "")
230 path, garbage = splitattr(path or "")
231 suffix = os.path.splitext(path)[1]
Guido van Rossum3b0a3292002-08-09 16:38:32 +0000232 (fd, filename) = tempfile.mkstemp(suffix)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000233 self.__tempfiles.append(filename)
Jeremy Hylton3bd6fde2002-10-11 14:36:24 +0000234 tfp = os.fdopen(fd, 'wb')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000235 result = filename, headers
236 if self.tempcache is not None:
237 self.tempcache[url] = result
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000238 bs = 1024*8
239 size = -1
Georg Brandlb9256022005-08-24 18:46:39 +0000240 read = 0
Georg Brandl5a650a22005-08-26 08:51:34 +0000241 blocknum = 0
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000242 if reporthook:
Raymond Hettinger54f02222002-06-01 14:18:47 +0000243 if "content-length" in headers:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000244 size = int(headers["Content-Length"])
Georg Brandl5a650a22005-08-26 08:51:34 +0000245 reporthook(blocknum, bs, size)
246 while 1:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000247 block = fp.read(bs)
Georg Brandl5a650a22005-08-26 08:51:34 +0000248 if block == "":
249 break
Georg Brandlb9256022005-08-24 18:46:39 +0000250 read += len(block)
Georg Brandl5a650a22005-08-26 08:51:34 +0000251 tfp.write(block)
Georg Brandlb9256022005-08-24 18:46:39 +0000252 blocknum += 1
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000253 if reporthook:
254 reporthook(blocknum, bs, size)
255 fp.close()
256 tfp.close()
257 del fp
258 del tfp
Georg Brandlb9256022005-08-24 18:46:39 +0000259
260 # raise exception if actual size does not match content-length header
261 if size >= 0 and read < size:
262 raise ContentTooShortError("retrieval incomplete: got only %i out "
263 "of %i bytes" % (read, size), result)
264
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000265 return result
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000266
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000267 # Each method named open_<type> knows how to open that type of URL
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000268
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000269 def open_http(self, url, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000270 """Use HTTP protocol."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000271 import httplib
272 user_passwd = None
Martin v. Löwis3e865952006-01-24 15:51:21 +0000273 proxy_passwd= None
Walter Dörwald65230a22002-06-03 15:58:32 +0000274 if isinstance(url, str):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000275 host, selector = splithost(url)
276 if host:
277 user_passwd, host = splituser(host)
278 host = unquote(host)
279 realhost = host
280 else:
281 host, selector = url
Martin v. Löwis3e865952006-01-24 15:51:21 +0000282 # check whether the proxy contains authorization information
283 proxy_passwd, host = splituser(host)
284 # now we proceed with the url we want to obtain
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000285 urltype, rest = splittype(selector)
286 url = rest
287 user_passwd = None
Guido van Rossumb2493f82000-12-15 15:01:37 +0000288 if urltype.lower() != 'http':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000289 realhost = None
290 else:
291 realhost, rest = splithost(rest)
292 if realhost:
293 user_passwd, realhost = splituser(realhost)
294 if user_passwd:
295 selector = "%s://%s%s" % (urltype, realhost, rest)
Tim Peters55c12d42001-08-09 18:04:14 +0000296 if proxy_bypass(realhost):
297 host = realhost
298
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000299 #print "proxy via http:", host, selector
300 if not host: raise IOError, ('http error', 'no host given')
Tim Peters92037a12006-01-24 22:44:08 +0000301
Martin v. Löwis3e865952006-01-24 15:51:21 +0000302 if proxy_passwd:
303 import base64
Thomas Wouters89f507f2006-12-13 04:49:30 +0000304 proxy_auth = base64.b64encode(proxy_passwd).strip()
Martin v. Löwis3e865952006-01-24 15:51:21 +0000305 else:
306 proxy_auth = None
307
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000308 if user_passwd:
309 import base64
Thomas Wouters89f507f2006-12-13 04:49:30 +0000310 auth = base64.b64encode(user_passwd).strip()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000311 else:
312 auth = None
313 h = httplib.HTTP(host)
314 if data is not None:
315 h.putrequest('POST', selector)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000316 h.putheader('Content-Type', 'application/x-www-form-urlencoded')
317 h.putheader('Content-Length', '%d' % len(data))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000318 else:
319 h.putrequest('GET', selector)
Martin v. Löwis3e865952006-01-24 15:51:21 +0000320 if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000321 if auth: h.putheader('Authorization', 'Basic %s' % auth)
322 if realhost: h.putheader('Host', realhost)
Guido van Rossum68468eb2003-02-27 20:14:51 +0000323 for args in self.addheaders: h.putheader(*args)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000324 h.endheaders()
325 if data is not None:
Fred Drakeec3dfde2001-07-04 05:18:29 +0000326 h.send(data)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000327 errcode, errmsg, headers = h.getreply()
328 fp = h.getfile()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000329 if errcode == -1:
330 if fp: fp.close()
331 # something went wrong with the HTTP status line
332 raise IOError, ('http protocol error', 0,
333 'got a bad status line', None)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000334 if errcode == 200:
335 return addinfourl(fp, headers, "http:" + url)
336 else:
337 if data is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000338 return self.http_error(url, fp, errcode, errmsg, headers)
Guido van Rossum29aab751999-03-09 19:31:21 +0000339 else:
340 return self.http_error(url, fp, errcode, errmsg, headers, data)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000341
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000342 def http_error(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000343 """Handle http errors.
344 Derived class can override this, or provide specific handlers
345 named http_error_DDD where DDD is the 3-digit error code."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000346 # First check if there's a specific handler for this error
347 name = 'http_error_%d' % errcode
348 if hasattr(self, name):
349 method = getattr(self, name)
350 if data is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000351 result = method(url, fp, errcode, errmsg, headers)
Jeremy Hyltonb30f52a1999-02-25 16:14:58 +0000352 else:
353 result = method(url, fp, errcode, errmsg, headers, data)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000354 if result: return result
Jeremy Hyltonb30f52a1999-02-25 16:14:58 +0000355 return self.http_error_default(url, fp, errcode, errmsg, headers)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000356
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000357 def http_error_default(self, url, fp, errcode, errmsg, headers):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000358 """Default error handler: close the connection and raise IOError."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000359 void = fp.read()
360 fp.close()
361 raise IOError, ('http error', errcode, errmsg, headers)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000362
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000363 if hasattr(socket, "ssl"):
Andrew M. Kuchling141e9892000-04-23 02:53:11 +0000364 def open_https(self, url, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000365 """Use HTTPS protocol."""
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000366 import httplib
Fred Drake567ca8e2000-08-21 21:42:42 +0000367 user_passwd = None
Martin v. Löwis3e865952006-01-24 15:51:21 +0000368 proxy_passwd = None
Walter Dörwald65230a22002-06-03 15:58:32 +0000369 if isinstance(url, str):
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000370 host, selector = splithost(url)
Fred Drake567ca8e2000-08-21 21:42:42 +0000371 if host:
372 user_passwd, host = splituser(host)
373 host = unquote(host)
374 realhost = host
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000375 else:
376 host, selector = url
Martin v. Löwis3e865952006-01-24 15:51:21 +0000377 # here, we determine, whether the proxy contains authorization information
378 proxy_passwd, host = splituser(host)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000379 urltype, rest = splittype(selector)
Fred Drake567ca8e2000-08-21 21:42:42 +0000380 url = rest
381 user_passwd = None
Guido van Rossumb2493f82000-12-15 15:01:37 +0000382 if urltype.lower() != 'https':
Fred Drake567ca8e2000-08-21 21:42:42 +0000383 realhost = None
384 else:
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000385 realhost, rest = splithost(rest)
Fred Drake567ca8e2000-08-21 21:42:42 +0000386 if realhost:
387 user_passwd, realhost = splituser(realhost)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000388 if user_passwd:
389 selector = "%s://%s%s" % (urltype, realhost, rest)
Andrew M. Kuchling7ad47922000-06-10 01:41:48 +0000390 #print "proxy via https:", host, selector
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000391 if not host: raise IOError, ('https error', 'no host given')
Martin v. Löwis3e865952006-01-24 15:51:21 +0000392 if proxy_passwd:
393 import base64
Thomas Wouters89f507f2006-12-13 04:49:30 +0000394 proxy_auth = base64.b64encode(proxy_passwd).strip()
Martin v. Löwis3e865952006-01-24 15:51:21 +0000395 else:
396 proxy_auth = None
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000397 if user_passwd:
398 import base64
Thomas Wouters89f507f2006-12-13 04:49:30 +0000399 auth = base64.b64encode(user_passwd).strip()
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000400 else:
401 auth = None
402 h = httplib.HTTPS(host, 0,
403 key_file=self.key_file,
404 cert_file=self.cert_file)
Andrew M. Kuchling141e9892000-04-23 02:53:11 +0000405 if data is not None:
406 h.putrequest('POST', selector)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000407 h.putheader('Content-Type',
Andrew M. Kuchling141e9892000-04-23 02:53:11 +0000408 'application/x-www-form-urlencoded')
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000409 h.putheader('Content-Length', '%d' % len(data))
Andrew M. Kuchling141e9892000-04-23 02:53:11 +0000410 else:
411 h.putrequest('GET', selector)
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000412 if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
413 if auth: h.putheader('Authorization', 'Basic %s' % auth)
Fred Drake567ca8e2000-08-21 21:42:42 +0000414 if realhost: h.putheader('Host', realhost)
Guido van Rossum68468eb2003-02-27 20:14:51 +0000415 for args in self.addheaders: h.putheader(*args)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000416 h.endheaders()
Andrew M. Kuchling43c5af02000-04-24 14:17:06 +0000417 if data is not None:
Fred Drakeec3dfde2001-07-04 05:18:29 +0000418 h.send(data)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000419 errcode, errmsg, headers = h.getreply()
420 fp = h.getfile()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000421 if errcode == -1:
422 if fp: fp.close()
423 # something went wrong with the HTTP status line
424 raise IOError, ('http protocol error', 0,
425 'got a bad status line', None)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000426 if errcode == 200:
Guido van Rossumb931bf32001-12-08 17:09:07 +0000427 return addinfourl(fp, headers, "https:" + url)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000428 else:
Fred Drake567ca8e2000-08-21 21:42:42 +0000429 if data is None:
430 return self.http_error(url, fp, errcode, errmsg, headers)
431 else:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000432 return self.http_error(url, fp, errcode, errmsg, headers,
433 data)
Fred Drake567ca8e2000-08-21 21:42:42 +0000434
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000435 def open_gopher(self, url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000436 """Use Gopher protocol."""
Martin v. Löwis3e865952006-01-24 15:51:21 +0000437 if not isinstance(url, str):
438 raise IOError, ('gopher error', 'proxy support for gopher protocol currently not implemented')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000439 import gopherlib
440 host, selector = splithost(url)
441 if not host: raise IOError, ('gopher error', 'no host given')
442 host = unquote(host)
443 type, selector = splitgophertype(selector)
444 selector, query = splitquery(selector)
445 selector = unquote(selector)
446 if query:
447 query = unquote(query)
448 fp = gopherlib.send_query(selector, query, host)
449 else:
450 fp = gopherlib.send_selector(selector, host)
451 return addinfourl(fp, noheaders(), "gopher:" + url)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000452
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000453 def open_file(self, url):
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000454 """Use local file or FTP depending on form of URL."""
Martin v. Löwis3e865952006-01-24 15:51:21 +0000455 if not isinstance(url, str):
456 raise IOError, ('file error', 'proxy support for file protocol currently not implemented')
Jack Jansen4ef11032002-09-12 20:14:04 +0000457 if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000458 return self.open_ftp(url)
459 else:
460 return self.open_local_file(url)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000461
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000462 def open_local_file(self, url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000463 """Use local file."""
Thomas Woutersb2137042007-02-01 18:02:27 +0000464 import mimetypes, mimetools, email.utils
Guido van Rossum68937b42007-05-18 00:51:22 +0000465 from io import StringIO
Guido van Rossumf0713d32001-08-09 17:43:35 +0000466 host, file = splithost(url)
467 localname = url2pathname(file)
Guido van Rossuma2da3052002-04-15 00:25:01 +0000468 try:
469 stats = os.stat(localname)
Guido van Rossumb940e112007-01-10 16:19:56 +0000470 except OSError as e:
Guido van Rossuma2da3052002-04-15 00:25:01 +0000471 raise IOError(e.errno, e.strerror, e.filename)
Walter Dörwald92b48b72002-03-22 17:30:38 +0000472 size = stats.st_size
Thomas Woutersb2137042007-02-01 18:02:27 +0000473 modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000474 mtype = mimetypes.guess_type(url)[0]
Raymond Hettingera6172712004-12-31 19:15:26 +0000475 headers = mimetools.Message(StringIO(
Guido van Rossumf0713d32001-08-09 17:43:35 +0000476 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
477 (mtype or 'text/plain', size, modified)))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000478 if not host:
Guido van Rossum336a2011999-06-24 15:27:36 +0000479 urlfile = file
480 if file[:1] == '/':
481 urlfile = 'file://' + file
Guido van Rossumf0713d32001-08-09 17:43:35 +0000482 return addinfourl(open(localname, 'rb'),
Guido van Rossum336a2011999-06-24 15:27:36 +0000483 headers, urlfile)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000484 host, port = splitport(host)
485 if not port \
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000486 and socket.gethostbyname(host) in (localhost(), thishost()):
Guido van Rossum336a2011999-06-24 15:27:36 +0000487 urlfile = file
488 if file[:1] == '/':
489 urlfile = 'file://' + file
Guido van Rossumf0713d32001-08-09 17:43:35 +0000490 return addinfourl(open(localname, 'rb'),
Guido van Rossum336a2011999-06-24 15:27:36 +0000491 headers, urlfile)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000492 raise IOError, ('local file error', 'not on local host')
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000493
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000494 def open_ftp(self, url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000495 """Use FTP protocol."""
Martin v. Löwis3e865952006-01-24 15:51:21 +0000496 if not isinstance(url, str):
497 raise IOError, ('ftp error', 'proxy support for ftp protocol currently not implemented')
Raymond Hettingera6172712004-12-31 19:15:26 +0000498 import mimetypes, mimetools
Guido van Rossum68937b42007-05-18 00:51:22 +0000499 from io import StringIO
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000500 host, path = splithost(url)
501 if not host: raise IOError, ('ftp error', 'no host given')
502 host, port = splitport(host)
503 user, host = splituser(host)
504 if user: user, passwd = splitpasswd(user)
505 else: passwd = None
506 host = unquote(host)
507 user = unquote(user or '')
508 passwd = unquote(passwd or '')
509 host = socket.gethostbyname(host)
510 if not port:
511 import ftplib
512 port = ftplib.FTP_PORT
513 else:
514 port = int(port)
515 path, attrs = splitattr(path)
516 path = unquote(path)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000517 dirs = path.split('/')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000518 dirs, file = dirs[:-1], dirs[-1]
519 if dirs and not dirs[0]: dirs = dirs[1:]
Guido van Rossum5e006a31999-08-18 17:40:33 +0000520 if dirs and not dirs[0]: dirs[0] = '/'
Guido van Rossumb2493f82000-12-15 15:01:37 +0000521 key = user, host, port, '/'.join(dirs)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000522 # XXX thread unsafe!
523 if len(self.ftpcache) > MAXFTPCACHE:
524 # Prune the cache, rather arbitrarily
525 for k in self.ftpcache.keys():
526 if k != key:
527 v = self.ftpcache[k]
528 del self.ftpcache[k]
529 v.close()
530 try:
Raymond Hettinger54f02222002-06-01 14:18:47 +0000531 if not key in self.ftpcache:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000532 self.ftpcache[key] = \
533 ftpwrapper(user, passwd, host, port, dirs)
534 if not file: type = 'D'
535 else: type = 'I'
536 for attr in attrs:
537 attr, value = splitvalue(attr)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000538 if attr.lower() == 'type' and \
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000539 value in ('a', 'A', 'i', 'I', 'd', 'D'):
Guido van Rossumb2493f82000-12-15 15:01:37 +0000540 type = value.upper()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000541 (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
Guido van Rossum88e0b5b2001-08-23 13:38:15 +0000542 mtype = mimetypes.guess_type("ftp:" + url)[0]
543 headers = ""
544 if mtype:
545 headers += "Content-Type: %s\n" % mtype
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000546 if retrlen is not None and retrlen >= 0:
Guido van Rossum88e0b5b2001-08-23 13:38:15 +0000547 headers += "Content-Length: %d\n" % retrlen
Raymond Hettingera6172712004-12-31 19:15:26 +0000548 headers = mimetools.Message(StringIO(headers))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000549 return addinfourl(fp, headers, "ftp:" + url)
Guido van Rossumb940e112007-01-10 16:19:56 +0000550 except ftperrors() as msg:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000551 raise IOError, ('ftp error', msg), sys.exc_info()[2]
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000552
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000553 def open_data(self, url, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000554 """Use "data" URL."""
Martin v. Löwis3e865952006-01-24 15:51:21 +0000555 if not isinstance(url, str):
556 raise IOError, ('data error', 'proxy support for data protocol currently not implemented')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000557 # ignore POSTed data
558 #
559 # syntax of data URLs:
560 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
561 # mediatype := [ type "/" subtype ] *( ";" parameter )
562 # data := *urlchar
563 # parameter := attribute "=" value
Raymond Hettingera6172712004-12-31 19:15:26 +0000564 import mimetools
Guido van Rossum68937b42007-05-18 00:51:22 +0000565 from io import StringIO
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000566 try:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000567 [type, data] = url.split(',', 1)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000568 except ValueError:
569 raise IOError, ('data error', 'bad data URL')
570 if not type:
571 type = 'text/plain;charset=US-ASCII'
Guido van Rossumb2493f82000-12-15 15:01:37 +0000572 semi = type.rfind(';')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000573 if semi >= 0 and '=' not in type[semi:]:
574 encoding = type[semi+1:]
575 type = type[:semi]
576 else:
577 encoding = ''
578 msg = []
579 msg.append('Date: %s'%time.strftime('%a, %d %b %Y %T GMT',
580 time.gmtime(time.time())))
581 msg.append('Content-type: %s' % type)
582 if encoding == 'base64':
583 import base64
584 data = base64.decodestring(data)
585 else:
586 data = unquote(data)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000587 msg.append('Content-Length: %d' % len(data))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000588 msg.append('')
589 msg.append(data)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000590 msg = '\n'.join(msg)
Raymond Hettingera6172712004-12-31 19:15:26 +0000591 f = StringIO(msg)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000592 headers = mimetools.Message(f, 0)
Georg Brandl1f663572005-11-26 16:50:44 +0000593 #f.fileno = None # needed for addinfourl
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000594 return addinfourl(f, headers, url)
Guido van Rossum6d4d1c21998-03-12 14:32:55 +0000595
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000596
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000597class FancyURLopener(URLopener):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000598 """Derived class with handlers for errors we can handle (perhaps)."""
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000599
Neal Norwitz60e04cd2002-06-11 13:38:51 +0000600 def __init__(self, *args, **kwargs):
Guido van Rossum68468eb2003-02-27 20:14:51 +0000601 URLopener.__init__(self, *args, **kwargs)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000602 self.auth_cache = {}
Skip Montanaroc3e11d62001-02-15 16:56:36 +0000603 self.tries = 0
604 self.maxtries = 10
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000605
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000606 def http_error_default(self, url, fp, errcode, errmsg, headers):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000607 """Default error handling -- don't raise an exception."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000608 return addinfourl(fp, headers, "http:" + url)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000609
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000610 def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000611 """Error 302 -- relocated (temporarily)."""
Skip Montanaroc3e11d62001-02-15 16:56:36 +0000612 self.tries += 1
613 if self.maxtries and self.tries >= self.maxtries:
614 if hasattr(self, "http_error_500"):
615 meth = self.http_error_500
616 else:
617 meth = self.http_error_default
618 self.tries = 0
619 return meth(url, fp, 500,
620 "Internal Server Error: Redirect Recursion", headers)
621 result = self.redirect_internal(url, fp, errcode, errmsg, headers,
622 data)
623 self.tries = 0
624 return result
625
626 def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
Raymond Hettinger54f02222002-06-01 14:18:47 +0000627 if 'location' in headers:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000628 newurl = headers['location']
Raymond Hettinger54f02222002-06-01 14:18:47 +0000629 elif 'uri' in headers:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000630 newurl = headers['uri']
631 else:
632 return
633 void = fp.read()
634 fp.close()
Guido van Rossum3527f591999-03-29 20:23:41 +0000635 # In case the server sent a relative URL, join with original:
Moshe Zadka5d87d472001-04-09 14:54:21 +0000636 newurl = basejoin(self.type + ":" + url, newurl)
Guido van Rossumfa19f7c2003-05-16 01:46:51 +0000637 return self.open(newurl)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000638
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000639 def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000640 """Error 301 -- also relocated (permanently)."""
641 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
Guido van Rossume6ad8911996-09-10 17:02:56 +0000642
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000643 def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
644 """Error 303 -- also relocated (essentially identical to 302)."""
645 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
646
Guido van Rossumfa19f7c2003-05-16 01:46:51 +0000647 def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
648 """Error 307 -- relocated, but turn POST into error."""
649 if data is None:
650 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
651 else:
652 return self.http_error_default(url, fp, errcode, errmsg, headers)
653
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000654 def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000655 """Error 401 -- authentication required.
Martin v. Löwis3e865952006-01-24 15:51:21 +0000656 This function supports Basic authentication only."""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000657 if not 'www-authenticate' in headers:
Tim Peters85ba6732001-02-28 08:26:44 +0000658 URLopener.http_error_default(self, url, fp,
Fred Drakec680ae82001-10-13 18:37:07 +0000659 errcode, errmsg, headers)
Moshe Zadkae99bd172001-02-27 06:27:04 +0000660 stuff = headers['www-authenticate']
661 import re
662 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
663 if not match:
Tim Peters85ba6732001-02-28 08:26:44 +0000664 URLopener.http_error_default(self, url, fp,
Moshe Zadkae99bd172001-02-27 06:27:04 +0000665 errcode, errmsg, headers)
666 scheme, realm = match.groups()
667 if scheme.lower() != 'basic':
Tim Peters85ba6732001-02-28 08:26:44 +0000668 URLopener.http_error_default(self, url, fp,
Moshe Zadkae99bd172001-02-27 06:27:04 +0000669 errcode, errmsg, headers)
670 name = 'retry_' + self.type + '_basic_auth'
671 if data is None:
672 return getattr(self,name)(url, realm)
673 else:
674 return getattr(self,name)(url, realm, data)
Tim Peters92037a12006-01-24 22:44:08 +0000675
Martin v. Löwis3e865952006-01-24 15:51:21 +0000676 def http_error_407(self, url, fp, errcode, errmsg, headers, data=None):
677 """Error 407 -- proxy authentication required.
678 This function supports Basic authentication only."""
679 if not 'proxy-authenticate' in headers:
680 URLopener.http_error_default(self, url, fp,
681 errcode, errmsg, headers)
682 stuff = headers['proxy-authenticate']
683 import re
684 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
685 if not match:
686 URLopener.http_error_default(self, url, fp,
687 errcode, errmsg, headers)
688 scheme, realm = match.groups()
689 if scheme.lower() != 'basic':
690 URLopener.http_error_default(self, url, fp,
691 errcode, errmsg, headers)
692 name = 'retry_proxy_' + self.type + '_basic_auth'
693 if data is None:
694 return getattr(self,name)(url, realm)
695 else:
696 return getattr(self,name)(url, realm, data)
Tim Peters92037a12006-01-24 22:44:08 +0000697
Martin v. Löwis3e865952006-01-24 15:51:21 +0000698 def retry_proxy_http_basic_auth(self, url, realm, data=None):
699 host, selector = splithost(url)
700 newurl = 'http://' + host + selector
701 proxy = self.proxies['http']
702 urltype, proxyhost = splittype(proxy)
703 proxyhost, proxyselector = splithost(proxyhost)
704 i = proxyhost.find('@') + 1
705 proxyhost = proxyhost[i:]
706 user, passwd = self.get_user_passwd(proxyhost, realm, i)
707 if not (user or passwd): return None
708 proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
709 self.proxies['http'] = 'http://' + proxyhost + proxyselector
710 if data is None:
711 return self.open(newurl)
712 else:
713 return self.open(newurl, data)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000714
Martin v. Löwis3e865952006-01-24 15:51:21 +0000715 def retry_proxy_https_basic_auth(self, url, realm, data=None):
716 host, selector = splithost(url)
717 newurl = 'https://' + host + selector
718 proxy = self.proxies['https']
719 urltype, proxyhost = splittype(proxy)
720 proxyhost, proxyselector = splithost(proxyhost)
721 i = proxyhost.find('@') + 1
722 proxyhost = proxyhost[i:]
723 user, passwd = self.get_user_passwd(proxyhost, realm, i)
724 if not (user or passwd): return None
725 proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
726 self.proxies['https'] = 'https://' + proxyhost + proxyselector
727 if data is None:
728 return self.open(newurl)
729 else:
730 return self.open(newurl, data)
Tim Peters92037a12006-01-24 22:44:08 +0000731
Guido van Rossum3c8baed2000-02-01 23:36:55 +0000732 def retry_http_basic_auth(self, url, realm, data=None):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000733 host, selector = splithost(url)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000734 i = host.find('@') + 1
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000735 host = host[i:]
736 user, passwd = self.get_user_passwd(host, realm, i)
737 if not (user or passwd): return None
Guido van Rossumafc4f042001-01-15 18:31:13 +0000738 host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000739 newurl = 'http://' + host + selector
Guido van Rossum3c8baed2000-02-01 23:36:55 +0000740 if data is None:
741 return self.open(newurl)
742 else:
743 return self.open(newurl, data)
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000744
Guido van Rossum3c8baed2000-02-01 23:36:55 +0000745 def retry_https_basic_auth(self, url, realm, data=None):
Tim Peterse1190062001-01-15 03:34:38 +0000746 host, selector = splithost(url)
747 i = host.find('@') + 1
748 host = host[i:]
749 user, passwd = self.get_user_passwd(host, realm, i)
750 if not (user or passwd): return None
Guido van Rossumafc4f042001-01-15 18:31:13 +0000751 host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
Martin v. Löwis3e865952006-01-24 15:51:21 +0000752 newurl = 'https://' + host + selector
753 if data is None:
754 return self.open(newurl)
755 else:
756 return self.open(newurl, data)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000757
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000758 def get_user_passwd(self, host, realm, clear_cache = 0):
Guido van Rossumb2493f82000-12-15 15:01:37 +0000759 key = realm + '@' + host.lower()
Raymond Hettinger54f02222002-06-01 14:18:47 +0000760 if key in self.auth_cache:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000761 if clear_cache:
762 del self.auth_cache[key]
763 else:
764 return self.auth_cache[key]
765 user, passwd = self.prompt_user_passwd(host, realm)
766 if user or passwd: self.auth_cache[key] = (user, passwd)
767 return user, passwd
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000768
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000769 def prompt_user_passwd(self, host, realm):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000770 """Override this in a GUI environment!"""
Guido van Rossum7cba8502007-03-19 22:23:59 +0000771 import getpass
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000772 try:
Guido van Rossum7cba8502007-03-19 22:23:59 +0000773 user = input("Enter username for %s at %s: " % (realm, host))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000774 passwd = getpass.getpass("Enter password for %s in %s at %s: " %
775 (user, realm, host))
776 return user, passwd
777 except KeyboardInterrupt:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000778 print()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000779 return None, None
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000780
781
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000782# Utility functions
783
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000784_localhost = None
785def localhost():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000786 """Return the IP address of the magic hostname 'localhost'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000787 global _localhost
Raymond Hettinger10ff7062002-06-02 03:04:52 +0000788 if _localhost is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000789 _localhost = socket.gethostbyname('localhost')
790 return _localhost
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000791
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000792_thishost = None
793def thishost():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000794 """Return the IP address of the current host."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000795 global _thishost
Raymond Hettinger10ff7062002-06-02 03:04:52 +0000796 if _thishost is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000797 _thishost = socket.gethostbyname(socket.gethostname())
798 return _thishost
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000799
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000800_ftperrors = None
801def ftperrors():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000802 """Return the set of errors raised by the FTP class."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000803 global _ftperrors
Raymond Hettinger10ff7062002-06-02 03:04:52 +0000804 if _ftperrors is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000805 import ftplib
806 _ftperrors = ftplib.all_errors
807 return _ftperrors
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000808
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000809_noheaders = None
810def noheaders():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000811 """Return an empty mimetools.Message object."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000812 global _noheaders
Raymond Hettinger10ff7062002-06-02 03:04:52 +0000813 if _noheaders is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000814 import mimetools
Guido van Rossum68937b42007-05-18 00:51:22 +0000815 from io import StringIO
Raymond Hettingera6172712004-12-31 19:15:26 +0000816 _noheaders = mimetools.Message(StringIO(), 0)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000817 _noheaders.fp.close() # Recycle file descriptor
818 return _noheaders
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000819
820
821# Utility classes
822
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000823class ftpwrapper:
Guido van Rossume7b146f2000-02-04 15:28:42 +0000824 """Class used by open_ftp() for cache of open FTP connections."""
825
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000826 def __init__(self, user, passwd, host, port, dirs):
827 self.user = user
828 self.passwd = passwd
829 self.host = host
830 self.port = port
831 self.dirs = dirs
832 self.init()
Guido van Rossume7b146f2000-02-04 15:28:42 +0000833
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000834 def init(self):
835 import ftplib
836 self.busy = 0
837 self.ftp = ftplib.FTP()
838 self.ftp.connect(self.host, self.port)
839 self.ftp.login(self.user, self.passwd)
840 for dir in self.dirs:
841 self.ftp.cwd(dir)
Guido van Rossume7b146f2000-02-04 15:28:42 +0000842
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000843 def retrfile(self, file, type):
844 import ftplib
845 self.endtransfer()
846 if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
847 else: cmd = 'TYPE ' + type; isdir = 0
848 try:
849 self.ftp.voidcmd(cmd)
850 except ftplib.all_errors:
851 self.init()
852 self.ftp.voidcmd(cmd)
853 conn = None
854 if file and not isdir:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000855 # Try to retrieve as a file
856 try:
857 cmd = 'RETR ' + file
858 conn = self.ftp.ntransfercmd(cmd)
Guido van Rossumb940e112007-01-10 16:19:56 +0000859 except ftplib.error_perm as reason:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000860 if str(reason)[:3] != '550':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000861 raise IOError, ('ftp error', reason), sys.exc_info()[2]
862 if not conn:
863 # Set transfer mode to ASCII!
864 self.ftp.voidcmd('TYPE A')
865 # Try a directory listing
866 if file: cmd = 'LIST ' + file
867 else: cmd = 'LIST'
868 conn = self.ftp.ntransfercmd(cmd)
869 self.busy = 1
870 # Pass back both a suitably decorated object and a retrieval length
871 return (addclosehook(conn[0].makefile('rb'),
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000872 self.endtransfer), conn[1])
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000873 def endtransfer(self):
874 if not self.busy:
875 return
876 self.busy = 0
877 try:
878 self.ftp.voidresp()
879 except ftperrors():
880 pass
Guido van Rossume7b146f2000-02-04 15:28:42 +0000881
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000882 def close(self):
883 self.endtransfer()
884 try:
885 self.ftp.close()
886 except ftperrors():
887 pass
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000888
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000889class addbase:
Guido van Rossume7b146f2000-02-04 15:28:42 +0000890 """Base class for addinfo and addclosehook."""
891
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000892 def __init__(self, fp):
893 self.fp = fp
894 self.read = self.fp.read
895 self.readline = self.fp.readline
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000896 if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines
Georg Brandl1f663572005-11-26 16:50:44 +0000897 if hasattr(self.fp, "fileno"):
898 self.fileno = self.fp.fileno
899 else:
900 self.fileno = lambda: None
Raymond Hettinger42182eb2003-03-09 05:33:33 +0000901 if hasattr(self.fp, "__iter__"):
902 self.__iter__ = self.fp.__iter__
Georg Brandla18af4e2007-04-21 15:47:16 +0000903 if hasattr(self.fp, "__next__"):
904 self.__next__ = self.fp.__next__
Guido van Rossume7b146f2000-02-04 15:28:42 +0000905
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000906 def __repr__(self):
Walter Dörwald70a6b492004-02-12 17:35:32 +0000907 return '<%s at %r whose fp = %r>' % (self.__class__.__name__,
908 id(self), self.fp)
Guido van Rossume7b146f2000-02-04 15:28:42 +0000909
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000910 def close(self):
911 self.read = None
912 self.readline = None
913 self.readlines = None
914 self.fileno = None
915 if self.fp: self.fp.close()
916 self.fp = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000917
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000918class addclosehook(addbase):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000919 """Class to add a close hook to an open file."""
920
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000921 def __init__(self, fp, closehook, *hookargs):
922 addbase.__init__(self, fp)
923 self.closehook = closehook
924 self.hookargs = hookargs
Guido van Rossume7b146f2000-02-04 15:28:42 +0000925
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000926 def close(self):
Guido van Rossumc580dae2000-05-24 13:21:46 +0000927 addbase.close(self)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000928 if self.closehook:
Guido van Rossum68468eb2003-02-27 20:14:51 +0000929 self.closehook(*self.hookargs)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000930 self.closehook = None
931 self.hookargs = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000932
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000933class addinfo(addbase):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000934 """class to add an info() method to an open file."""
935
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000936 def __init__(self, fp, headers):
937 addbase.__init__(self, fp)
938 self.headers = headers
Guido van Rossume7b146f2000-02-04 15:28:42 +0000939
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000940 def info(self):
941 return self.headers
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000942
Guido van Rossume6ad8911996-09-10 17:02:56 +0000943class addinfourl(addbase):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000944 """class to add info() and geturl() methods to an open file."""
945
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000946 def __init__(self, fp, headers, url):
947 addbase.__init__(self, fp)
948 self.headers = headers
949 self.url = url
Guido van Rossume7b146f2000-02-04 15:28:42 +0000950
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000951 def info(self):
952 return self.headers
Guido van Rossume7b146f2000-02-04 15:28:42 +0000953
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000954 def geturl(self):
955 return self.url
Guido van Rossume6ad8911996-09-10 17:02:56 +0000956
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000957
Guido van Rossum7c395db1994-07-04 22:14:49 +0000958# Utilities to parse URLs (most of these return None for missing parts):
Sjoerd Mullendere0371b81995-11-10 10:36:07 +0000959# unwrap('<URL:type://host/path>') --> 'type://host/path'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000960# splittype('type:opaquestring') --> 'type', 'opaquestring'
961# splithost('//host[:port]/path') --> 'host[:port]', '/path'
Guido van Rossum7c395db1994-07-04 22:14:49 +0000962# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'
963# splitpasswd('user:passwd') -> 'user', 'passwd'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000964# splitport('host:port') --> 'host', 'port'
965# splitquery('/path?query') --> '/path', 'query'
966# splittag('/path#tag') --> '/path', 'tag'
Guido van Rossum7c395db1994-07-04 22:14:49 +0000967# splitattr('/path;attr1=value1;attr2=value2;...') ->
968# '/path', ['attr1=value1', 'attr2=value2', ...]
969# splitvalue('attr=value') --> 'attr', 'value'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000970# splitgophertype('/Xselector') --> 'X', 'selector'
971# unquote('abc%20def') -> 'abc def'
972# quote('abc def') -> 'abc%20def')
973
Walter Dörwald65230a22002-06-03 15:58:32 +0000974try:
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000975 str
Walter Dörwald65230a22002-06-03 15:58:32 +0000976except NameError:
Guido van Rossum4b46c0a2002-05-24 17:58:05 +0000977 def _is_unicode(x):
978 return 0
Walter Dörwald65230a22002-06-03 15:58:32 +0000979else:
980 def _is_unicode(x):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000981 return isinstance(x, str)
Guido van Rossum4b46c0a2002-05-24 17:58:05 +0000982
Martin v. Löwis1d994332000-12-03 18:30:10 +0000983def toBytes(url):
984 """toBytes(u"URL") --> 'URL'."""
985 # Most URL schemes require ASCII. If that changes, the conversion
986 # can be relaxed
Guido van Rossum4b46c0a2002-05-24 17:58:05 +0000987 if _is_unicode(url):
Martin v. Löwis1d994332000-12-03 18:30:10 +0000988 try:
989 url = url.encode("ASCII")
990 except UnicodeError:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000991 raise UnicodeError("URL " + repr(url) +
992 " contains non-ASCII characters")
Martin v. Löwis1d994332000-12-03 18:30:10 +0000993 return url
994
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000995def unwrap(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000996 """unwrap('<URL:type://host/path>') --> 'type://host/path'."""
Guido van Rossumb2493f82000-12-15 15:01:37 +0000997 url = url.strip()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000998 if url[:1] == '<' and url[-1:] == '>':
Guido van Rossumb2493f82000-12-15 15:01:37 +0000999 url = url[1:-1].strip()
1000 if url[:4] == 'URL:': url = url[4:].strip()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001001 return url
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001002
Guido van Rossum332e1441997-09-29 23:23:46 +00001003_typeprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001004def splittype(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001005 """splittype('type:opaquestring') --> 'type', 'opaquestring'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001006 global _typeprog
1007 if _typeprog is None:
1008 import re
1009 _typeprog = re.compile('^([^/:]+):')
Guido van Rossum332e1441997-09-29 23:23:46 +00001010
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001011 match = _typeprog.match(url)
1012 if match:
1013 scheme = match.group(1)
Fred Drake9e94afd2000-07-01 07:03:30 +00001014 return scheme.lower(), url[len(scheme) + 1:]
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001015 return None, url
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001016
Guido van Rossum332e1441997-09-29 23:23:46 +00001017_hostprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001018def splithost(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001019 """splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001020 global _hostprog
1021 if _hostprog is None:
1022 import re
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001023 _hostprog = re.compile('^//([^/?]*)(.*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +00001024
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001025 match = _hostprog.match(url)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001026 if match: return match.group(1, 2)
1027 return None, url
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001028
Guido van Rossum332e1441997-09-29 23:23:46 +00001029_userprog = None
Guido van Rossum7c395db1994-07-04 22:14:49 +00001030def splituser(host):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001031 """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001032 global _userprog
1033 if _userprog is None:
1034 import re
Raymond Hettingerf2e45dd2002-08-18 20:08:56 +00001035 _userprog = re.compile('^(.*)@(.*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +00001036
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001037 match = _userprog.match(host)
Fred Drake567ca8e2000-08-21 21:42:42 +00001038 if match: return map(unquote, match.group(1, 2))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001039 return None, host
Guido van Rossum7c395db1994-07-04 22:14:49 +00001040
Guido van Rossum332e1441997-09-29 23:23:46 +00001041_passwdprog = None
Guido van Rossum7c395db1994-07-04 22:14:49 +00001042def splitpasswd(user):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001043 """splitpasswd('user:passwd') -> 'user', 'passwd'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001044 global _passwdprog
1045 if _passwdprog is None:
1046 import re
1047 _passwdprog = re.compile('^([^:]*):(.*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +00001048
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001049 match = _passwdprog.match(user)
1050 if match: return match.group(1, 2)
1051 return user, None
Guido van Rossum7c395db1994-07-04 22:14:49 +00001052
Guido van Rossume7b146f2000-02-04 15:28:42 +00001053# splittag('/path#tag') --> '/path', 'tag'
Guido van Rossum332e1441997-09-29 23:23:46 +00001054_portprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001055def splitport(host):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001056 """splitport('host:port') --> 'host', 'port'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001057 global _portprog
1058 if _portprog is None:
1059 import re
1060 _portprog = re.compile('^(.*):([0-9]+)$')
Guido van Rossum332e1441997-09-29 23:23:46 +00001061
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001062 match = _portprog.match(host)
1063 if match: return match.group(1, 2)
1064 return host, None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001065
Guido van Rossum332e1441997-09-29 23:23:46 +00001066_nportprog = None
Guido van Rossum53725a21996-06-13 19:12:35 +00001067def splitnport(host, defport=-1):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001068 """Split host and port, returning numeric port.
1069 Return given default port if no ':' found; defaults to -1.
1070 Return numerical port if a valid number are found after ':'.
1071 Return None if ':' but not a valid number."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001072 global _nportprog
1073 if _nportprog is None:
1074 import re
1075 _nportprog = re.compile('^(.*):(.*)$')
Guido van Rossum7e7ca0b1998-03-26 21:01:39 +00001076
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001077 match = _nportprog.match(host)
1078 if match:
1079 host, port = match.group(1, 2)
1080 try:
Guido van Rossumb2493f82000-12-15 15:01:37 +00001081 if not port: raise ValueError, "no digits"
1082 nport = int(port)
1083 except ValueError:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001084 nport = None
1085 return host, nport
1086 return host, defport
Guido van Rossum53725a21996-06-13 19:12:35 +00001087
Guido van Rossum332e1441997-09-29 23:23:46 +00001088_queryprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001089def splitquery(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001090 """splitquery('/path?query') --> '/path', 'query'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001091 global _queryprog
1092 if _queryprog is None:
1093 import re
1094 _queryprog = re.compile('^(.*)\?([^?]*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +00001095
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001096 match = _queryprog.match(url)
1097 if match: return match.group(1, 2)
1098 return url, None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001099
Guido van Rossum332e1441997-09-29 23:23:46 +00001100_tagprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001101def splittag(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001102 """splittag('/path#tag') --> '/path', 'tag'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001103 global _tagprog
1104 if _tagprog is None:
1105 import re
1106 _tagprog = re.compile('^(.*)#([^#]*)$')
Guido van Rossum7e7ca0b1998-03-26 21:01:39 +00001107
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001108 match = _tagprog.match(url)
1109 if match: return match.group(1, 2)
1110 return url, None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001111
Guido van Rossum7c395db1994-07-04 22:14:49 +00001112def splitattr(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001113 """splitattr('/path;attr1=value1;attr2=value2;...') ->
1114 '/path', ['attr1=value1', 'attr2=value2', ...]."""
Guido van Rossumb2493f82000-12-15 15:01:37 +00001115 words = url.split(';')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001116 return words[0], words[1:]
Guido van Rossum7c395db1994-07-04 22:14:49 +00001117
Guido van Rossum332e1441997-09-29 23:23:46 +00001118_valueprog = None
Guido van Rossum7c395db1994-07-04 22:14:49 +00001119def splitvalue(attr):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001120 """splitvalue('attr=value') --> 'attr', 'value'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001121 global _valueprog
1122 if _valueprog is None:
1123 import re
1124 _valueprog = re.compile('^([^=]*)=(.*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +00001125
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001126 match = _valueprog.match(attr)
1127 if match: return match.group(1, 2)
1128 return attr, None
Guido van Rossum7c395db1994-07-04 22:14:49 +00001129
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001130def splitgophertype(selector):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001131 """splitgophertype('/Xselector') --> 'X', 'selector'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001132 if selector[:1] == '/' and selector[1:2]:
1133 return selector[1], selector[2:]
1134 return None, selector
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001135
Raymond Hettinger803ce802005-09-10 06:49:04 +00001136_hextochr = dict(('%02x' % i, chr(i)) for i in range(256))
1137_hextochr.update(('%02X' % i, chr(i)) for i in range(256))
1138
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001139def unquote(s):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001140 """unquote('abc%20def') -> 'abc def'."""
Raymond Hettinger803ce802005-09-10 06:49:04 +00001141 res = s.split('%')
Guido van Rossum805365e2007-05-07 22:24:25 +00001142 for i in range(1, len(res)):
Raymond Hettinger803ce802005-09-10 06:49:04 +00001143 item = res[i]
1144 try:
1145 res[i] = _hextochr[item[:2]] + item[2:]
1146 except KeyError:
1147 res[i] = '%' + item
Raymond Hettinger4b0f20d2005-10-15 16:41:53 +00001148 except UnicodeDecodeError:
Guido van Rossum84fc66d2007-05-03 17:18:26 +00001149 res[i] = chr(int(item[:2], 16)) + item[2:]
Guido van Rossumb2493f82000-12-15 15:01:37 +00001150 return "".join(res)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001151
Guido van Rossum0564e121996-12-13 14:47:36 +00001152def unquote_plus(s):
Skip Montanaro79f1c172000-08-22 03:00:52 +00001153 """unquote('%7e/abc+def') -> '~/abc def'"""
Brett Cannonaaeffaf2004-03-23 23:50:17 +00001154 s = s.replace('+', ' ')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001155 return unquote(s)
Guido van Rossum0564e121996-12-13 14:47:36 +00001156
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001157always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
Jeremy Hylton6102e292000-08-31 15:48:10 +00001158 'abcdefghijklmnopqrstuvwxyz'
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001159 '0123456789' '_.-')
Raymond Hettinger199d2f72005-09-09 22:27:13 +00001160_safemaps = {}
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001161
Guido van Rossum7c395db1994-07-04 22:14:49 +00001162def quote(s, safe = '/'):
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001163 """quote('abc def') -> 'abc%20def'
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001164
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001165 Each part of a URL, e.g. the path info, the query, etc., has a
1166 different set of reserved characters that must be quoted.
1167
1168 RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
1169 the following reserved characters.
1170
1171 reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
1172 "$" | ","
1173
1174 Each of these characters is reserved in some component of a URL,
1175 but not necessarily in all of them.
1176
1177 By default, the quote function is intended for quoting the path
1178 section of a URL. Thus, it will not encode '/'. This character
1179 is reserved, but in typical usage the quote function is being
1180 called on a path where the existing slash characters are used as
1181 reserved characters.
1182 """
Raymond Hettinger199d2f72005-09-09 22:27:13 +00001183 cachekey = (safe, always_safe)
1184 try:
1185 safe_map = _safemaps[cachekey]
1186 except KeyError:
1187 safe += always_safe
1188 safe_map = {}
1189 for i in range(256):
1190 c = chr(i)
1191 safe_map[c] = (c in safe) and c or ('%%%02X' % i)
1192 _safemaps[cachekey] = safe_map
1193 res = map(safe_map.__getitem__, s)
Guido van Rossumb2493f82000-12-15 15:01:37 +00001194 return ''.join(res)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001195
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001196def quote_plus(s, safe = ''):
1197 """Quote the query fragment of a URL; replacing ' ' with '+'"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001198 if ' ' in s:
Raymond Hettingercf6b6322005-09-10 18:17:54 +00001199 s = quote(s, safe + ' ')
1200 return s.replace(' ', '+')
1201 return quote(s, safe)
Guido van Rossum0564e121996-12-13 14:47:36 +00001202
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001203def urlencode(query,doseq=0):
1204 """Encode a sequence of two-element tuples or dictionary into a URL query string.
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001205
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001206 If any values in the query arg are sequences and doseq is true, each
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001207 sequence element is converted to a separate parameter.
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001208
1209 If the query arg is a sequence of two-element tuples, the order of the
1210 parameters in the output will match the order of parameters in the
1211 input.
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001212 """
Tim Peters658cba62001-02-09 20:06:00 +00001213
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001214 if hasattr(query,"items"):
1215 # mapping objects
1216 query = query.items()
1217 else:
1218 # it's a bother at times that strings and string-like objects are
1219 # sequences...
1220 try:
1221 # non-sequence items should not work with len()
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001222 # non-empty strings will fail this
Walter Dörwald65230a22002-06-03 15:58:32 +00001223 if len(query) and not isinstance(query[0], tuple):
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001224 raise TypeError
1225 # zero-length sequences of all types will get here and succeed,
1226 # but that's a minor nit - since the original implementation
1227 # allowed empty dicts that type of behavior probably should be
1228 # preserved for consistency
1229 except TypeError:
1230 ty,va,tb = sys.exc_info()
1231 raise TypeError, "not a valid non-string sequence or mapping object", tb
1232
Guido van Rossume7b146f2000-02-04 15:28:42 +00001233 l = []
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001234 if not doseq:
1235 # preserve old behavior
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001236 for k, v in query:
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001237 k = quote_plus(str(k))
1238 v = quote_plus(str(v))
1239 l.append(k + '=' + v)
1240 else:
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001241 for k, v in query:
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001242 k = quote_plus(str(k))
Walter Dörwald65230a22002-06-03 15:58:32 +00001243 if isinstance(v, str):
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001244 v = quote_plus(v)
1245 l.append(k + '=' + v)
Guido van Rossum4b46c0a2002-05-24 17:58:05 +00001246 elif _is_unicode(v):
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001247 # is there a reasonable way to convert to ASCII?
1248 # encode generates a string, but "replace" or "ignore"
1249 # lose information and "strict" can raise UnicodeError
1250 v = quote_plus(v.encode("ASCII","replace"))
1251 l.append(k + '=' + v)
1252 else:
1253 try:
1254 # is this a sufficient test for sequence-ness?
1255 x = len(v)
1256 except TypeError:
1257 # not a sequence
1258 v = quote_plus(str(v))
1259 l.append(k + '=' + v)
1260 else:
1261 # loop over the sequence
1262 for elt in v:
1263 l.append(k + '=' + quote_plus(str(elt)))
Guido van Rossumb2493f82000-12-15 15:01:37 +00001264 return '&'.join(l)
Guido van Rossum810a3391998-07-22 21:33:23 +00001265
Guido van Rossum442e7201996-03-20 15:33:11 +00001266# Proxy handling
Mark Hammond4f570b92000-07-26 07:04:38 +00001267def getproxies_environment():
1268 """Return a dictionary of scheme -> proxy server URL mappings.
1269
1270 Scan the environment for variables named <scheme>_proxy;
1271 this seems to be the standard convention. If you need a
1272 different way, you can pass a proxies dictionary to the
1273 [Fancy]URLopener constructor.
1274
1275 """
1276 proxies = {}
1277 for name, value in os.environ.items():
Guido van Rossumb2493f82000-12-15 15:01:37 +00001278 name = name.lower()
Mark Hammond4f570b92000-07-26 07:04:38 +00001279 if value and name[-6:] == '_proxy':
1280 proxies[name[:-6]] = value
1281 return proxies
1282
Jack Jansen11d9b062004-07-16 11:45:00 +00001283if sys.platform == 'darwin':
1284 def getproxies_internetconfig():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001285 """Return a dictionary of scheme -> proxy server URL mappings.
Guido van Rossum442e7201996-03-20 15:33:11 +00001286
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001287 By convention the mac uses Internet Config to store
1288 proxies. An HTTP proxy, for instance, is stored under
1289 the HttpProxy key.
Guido van Rossum442e7201996-03-20 15:33:11 +00001290
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001291 """
1292 try:
1293 import ic
1294 except ImportError:
1295 return {}
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001296
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001297 try:
1298 config = ic.IC()
1299 except ic.error:
1300 return {}
1301 proxies = {}
1302 # HTTP:
Raymond Hettinger54f02222002-06-01 14:18:47 +00001303 if 'UseHTTPProxy' in config and config['UseHTTPProxy']:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001304 try:
1305 value = config['HTTPProxyHost']
1306 except ic.error:
1307 pass
1308 else:
1309 proxies['http'] = 'http://%s' % value
1310 # FTP: XXXX To be done.
1311 # Gopher: XXXX To be done.
1312 return proxies
Mark Hammond4f570b92000-07-26 07:04:38 +00001313
Tim Peters55c12d42001-08-09 18:04:14 +00001314 def proxy_bypass(x):
1315 return 0
1316
Jack Jansen11d9b062004-07-16 11:45:00 +00001317 def getproxies():
1318 return getproxies_environment() or getproxies_internetconfig()
Tim Peters182b5ac2004-07-18 06:16:08 +00001319
Mark Hammond4f570b92000-07-26 07:04:38 +00001320elif os.name == 'nt':
1321 def getproxies_registry():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001322 """Return a dictionary of scheme -> proxy server URL mappings.
Mark Hammond4f570b92000-07-26 07:04:38 +00001323
1324 Win32 uses the registry to store proxies.
1325
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001326 """
1327 proxies = {}
Mark Hammond4f570b92000-07-26 07:04:38 +00001328 try:
1329 import _winreg
1330 except ImportError:
1331 # Std module, so should be around - but you never know!
1332 return proxies
1333 try:
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001334 internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1335 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
Mark Hammond4f570b92000-07-26 07:04:38 +00001336 proxyEnable = _winreg.QueryValueEx(internetSettings,
1337 'ProxyEnable')[0]
1338 if proxyEnable:
1339 # Returned as Unicode but problems if not converted to ASCII
1340 proxyServer = str(_winreg.QueryValueEx(internetSettings,
1341 'ProxyServer')[0])
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001342 if '=' in proxyServer:
1343 # Per-protocol settings
Mark Hammond4f570b92000-07-26 07:04:38 +00001344 for p in proxyServer.split(';'):
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001345 protocol, address = p.split('=', 1)
Guido van Rossumb955d6c2002-03-31 23:38:48 +00001346 # See if address has a type:// prefix
Guido van Rossum64e5aa92002-04-02 14:38:16 +00001347 import re
1348 if not re.match('^([^/:]+)://', address):
Guido van Rossumb955d6c2002-03-31 23:38:48 +00001349 address = '%s://%s' % (protocol, address)
1350 proxies[protocol] = address
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001351 else:
1352 # Use one setting for all protocols
1353 if proxyServer[:5] == 'http:':
1354 proxies['http'] = proxyServer
1355 else:
1356 proxies['http'] = 'http://%s' % proxyServer
1357 proxies['ftp'] = 'ftp://%s' % proxyServer
Mark Hammond4f570b92000-07-26 07:04:38 +00001358 internetSettings.Close()
1359 except (WindowsError, ValueError, TypeError):
1360 # Either registry key not found etc, or the value in an
1361 # unexpected format.
1362 # proxies already set up to be empty so nothing to do
1363 pass
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001364 return proxies
Guido van Rossum442e7201996-03-20 15:33:11 +00001365
Mark Hammond4f570b92000-07-26 07:04:38 +00001366 def getproxies():
1367 """Return a dictionary of scheme -> proxy server URL mappings.
1368
1369 Returns settings gathered from the environment, if specified,
1370 or the registry.
1371
1372 """
1373 return getproxies_environment() or getproxies_registry()
Tim Peters55c12d42001-08-09 18:04:14 +00001374
1375 def proxy_bypass(host):
1376 try:
1377 import _winreg
1378 import re
Tim Peters55c12d42001-08-09 18:04:14 +00001379 except ImportError:
1380 # Std modules, so should be around - but you never know!
1381 return 0
1382 try:
1383 internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1384 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1385 proxyEnable = _winreg.QueryValueEx(internetSettings,
1386 'ProxyEnable')[0]
1387 proxyOverride = str(_winreg.QueryValueEx(internetSettings,
1388 'ProxyOverride')[0])
1389 # ^^^^ Returned as Unicode but problems if not converted to ASCII
1390 except WindowsError:
1391 return 0
1392 if not proxyEnable or not proxyOverride:
1393 return 0
1394 # try to make a host list from name and IP address.
Georg Brandl1f636702006-02-18 23:10:23 +00001395 rawHost, port = splitport(host)
1396 host = [rawHost]
Tim Peters55c12d42001-08-09 18:04:14 +00001397 try:
Georg Brandl1f636702006-02-18 23:10:23 +00001398 addr = socket.gethostbyname(rawHost)
1399 if addr != rawHost:
Tim Peters55c12d42001-08-09 18:04:14 +00001400 host.append(addr)
1401 except socket.error:
1402 pass
Georg Brandl1f636702006-02-18 23:10:23 +00001403 try:
1404 fqdn = socket.getfqdn(rawHost)
1405 if fqdn != rawHost:
1406 host.append(fqdn)
1407 except socket.error:
1408 pass
Tim Peters55c12d42001-08-09 18:04:14 +00001409 # make a check value list from the registry entry: replace the
1410 # '<local>' string by the localhost entry and the corresponding
1411 # canonical entry.
1412 proxyOverride = proxyOverride.split(';')
1413 i = 0
1414 while i < len(proxyOverride):
1415 if proxyOverride[i] == '<local>':
1416 proxyOverride[i:i+1] = ['localhost',
1417 '127.0.0.1',
1418 socket.gethostname(),
1419 socket.gethostbyname(
1420 socket.gethostname())]
1421 i += 1
1422 # print proxyOverride
1423 # now check if we match one of the registry values.
1424 for test in proxyOverride:
Tim Petersab9ba272001-08-09 21:40:30 +00001425 test = test.replace(".", r"\.") # mask dots
1426 test = test.replace("*", r".*") # change glob sequence
1427 test = test.replace("?", r".") # change glob char
Tim Peters55c12d42001-08-09 18:04:14 +00001428 for val in host:
1429 # print "%s <--> %s" %( test, val )
1430 if re.match(test, val, re.I):
1431 return 1
1432 return 0
1433
Mark Hammond4f570b92000-07-26 07:04:38 +00001434else:
1435 # By default use environment variables
1436 getproxies = getproxies_environment
1437
Tim Peters55c12d42001-08-09 18:04:14 +00001438 def proxy_bypass(host):
1439 return 0
Guido van Rossum442e7201996-03-20 15:33:11 +00001440
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001441# Test and time quote() and unquote()
1442def test1():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001443 s = ''
1444 for i in range(256): s = s + chr(i)
1445 s = s*4
1446 t0 = time.time()
1447 qs = quote(s)
1448 uqs = unquote(qs)
1449 t1 = time.time()
1450 if uqs != s:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001451 print('Wrong!')
1452 print(repr(s))
1453 print(repr(qs))
1454 print(repr(uqs))
1455 print(round(t1 - t0, 3), 'sec')
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001456
1457
Guido van Rossum9ab96d41998-09-28 14:07:00 +00001458def reporthook(blocknum, blocksize, totalsize):
1459 # Report during remote transfers
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001460 print("Block number: %d, Block size: %d, Total size: %d" % (
1461 blocknum, blocksize, totalsize))
Guido van Rossum9ab96d41998-09-28 14:07:00 +00001462
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001463# Test program
Guido van Rossum23490151998-06-25 02:39:00 +00001464def test(args=[]):
Neal Norwitz9d72bb42007-04-17 08:48:32 +00001465 import string
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001466 if not args:
1467 args = [
1468 '/etc/passwd',
1469 'file:/etc/passwd',
1470 'file://localhost/etc/passwd',
Guido van Rossumd8faa362007-04-27 19:54:29 +00001471 'ftp://ftp.gnu.org/pub/README',
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001472## 'gopher://gopher.micro.umn.edu/1/',
1473 'http://www.python.org/index.html',
1474 ]
Guido van Rossum09c8b6c1999-12-07 21:37:17 +00001475 if hasattr(URLopener, "open_https"):
1476 args.append('https://synergy.as.cmu.edu/~geek/')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001477 try:
1478 for url in args:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001479 print('-'*10, url, '-'*10)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001480 fn, h = urlretrieve(url, None, reporthook)
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001481 print(fn)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001482 if h:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001483 print('======')
1484 for k in h.keys(): print(k + ':', h[k])
1485 print('======')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001486 fp = open(fn, 'rb')
1487 data = fp.read()
1488 del fp
1489 if '\r' in data:
1490 table = string.maketrans("", "")
Guido van Rossumb2493f82000-12-15 15:01:37 +00001491 data = data.translate(table, "\r")
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001492 print(data)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001493 fn, h = None, None
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001494 print('-'*40)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001495 finally:
1496 urlcleanup()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001497
Guido van Rossum23490151998-06-25 02:39:00 +00001498def main():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001499 import getopt, sys
1500 try:
1501 opts, args = getopt.getopt(sys.argv[1:], "th")
Guido van Rossumb940e112007-01-10 16:19:56 +00001502 except getopt.error as msg:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001503 print(msg)
1504 print("Use -h for help")
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001505 return
1506 t = 0
1507 for o, a in opts:
1508 if o == '-t':
1509 t = t + 1
1510 if o == '-h':
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001511 print("Usage: python urllib.py [-t] [url ...]")
1512 print("-t runs self-test;", end=' ')
1513 print("otherwise, contents of urls are printed")
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001514 return
1515 if t:
1516 if t > 1:
1517 test1()
1518 test(args)
1519 else:
1520 if not args:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001521 print("Use -h for help")
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001522 for url in args:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001523 print(urlopen(url).read(), end=' ')
Guido van Rossum23490151998-06-25 02:39:00 +00001524
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001525# Run test program when run as a script
1526if __name__ == '__main__':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001527 main()