blob: 2eb90f9d7f783f77bc834802d34de155f3adca83 [file] [log] [blame]
Guido van Rossume7b146f2000-02-04 15:28:42 +00001"""Open an arbitrary URL.
2
3See the following document for more info on URLs:
4"Names and Addresses, URIs, URLs, URNs, URCs", at
5http://www.w3.org/pub/WWW/Addressing/Overview.html
6
7See also the HTTP spec (from which the error codes are derived):
8"HTTP - Hypertext Transfer Protocol", at
9http://www.w3.org/pub/WWW/Protocols/
10
11Related standards and specs:
12- RFC1808: the "relative URL" spec. (authoritative status)
13- RFC1738 - the "URL standard". (authoritative status)
14- RFC1630 - the "URI spec". (informational status)
15
16The object returned by URLopener().open(file) will differ per
17protocol. All you know is that is has methods read(), readline(),
18readlines(), fileno(), close() and info(). The read*(), fileno()
Fredrik Lundhb49f88b2000-09-24 18:51:25 +000019and close() methods work like those of open files.
Guido van Rossume7b146f2000-02-04 15:28:42 +000020The info() method returns a mimetools.Message object which can be
21used to query various info about the object, if available.
22(mimetools.Message objects are queried with the getheader() method.)
23"""
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000024
Guido van Rossum7c395db1994-07-04 22:14:49 +000025import string
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000026import socket
Jack Jansendc3e3f61995-12-15 13:22:13 +000027import os
Guido van Rossum3c8484e1996-11-20 22:02:24 +000028import sys
Martin v. Löwis1d994332000-12-03 18:30:10 +000029import types
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000030
Skip Montanaro40fc1602001-03-01 04:27:19 +000031__all__ = ["urlopen", "URLopener", "FancyURLopener", "urlretrieve",
32 "urlcleanup", "quote", "quote_plus", "unquote", "unquote_plus",
33 "urlencode"]
34
Guido van Rossumb2493f82000-12-15 15:01:37 +000035__version__ = '1.15' # XXX This version is not always updated :-(
Guido van Rossumf668d171997-06-06 21:11:11 +000036
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000037MAXFTPCACHE = 10 # Trim the ftp cache beyond this size
Guido van Rossum6cb15a01995-06-22 19:00:13 +000038
Jack Jansendc3e3f61995-12-15 13:22:13 +000039# Helper for non-unix systems
40if os.name == 'mac':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000041 from macurl2path import url2pathname, pathname2url
Guido van Rossum7e7ca0b1998-03-26 21:01:39 +000042elif os.name == 'nt':
Fredrik Lundhb49f88b2000-09-24 18:51:25 +000043 from nturl2path import url2pathname, pathname2url
Jack Jansendc3e3f61995-12-15 13:22:13 +000044else:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000045 def url2pathname(pathname):
Guido van Rossum367ac801999-03-12 14:31:10 +000046 return unquote(pathname)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000047 def pathname2url(pathname):
Guido van Rossum367ac801999-03-12 14:31:10 +000048 return quote(pathname)
Guido van Rossum33add0a1998-12-18 15:25:22 +000049
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000050# This really consists of two pieces:
51# (1) a class which handles opening of all sorts of URLs
52# (plus assorted utilities etc.)
53# (2) a set of functions for parsing URLs
54# XXX Should these be separated out into different modules?
55
56
57# Shortcut for basic usage
58_urlopener = None
Guido van Rossumbd013741996-12-10 16:00:28 +000059def urlopen(url, data=None):
Skip Montanaro79f1c172000-08-22 03:00:52 +000060 """urlopen(url [, data]) -> open file-like object"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000061 global _urlopener
62 if not _urlopener:
63 _urlopener = FancyURLopener()
64 if data is None:
65 return _urlopener.open(url)
66 else:
67 return _urlopener.open(url, data)
Fred Drake316a7932000-08-24 01:01:26 +000068def urlretrieve(url, filename=None, reporthook=None, data=None):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000069 global _urlopener
70 if not _urlopener:
71 _urlopener = FancyURLopener()
Fred Drake316a7932000-08-24 01:01:26 +000072 return _urlopener.retrieve(url, filename, reporthook, data)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000073def urlcleanup():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000074 if _urlopener:
75 _urlopener.cleanup()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000076
77
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000078ftpcache = {}
79class URLopener:
Guido van Rossume7b146f2000-02-04 15:28:42 +000080 """Class to open URLs.
81 This is a class rather than just a subroutine because we may need
82 more than one set of global protocol-specific options.
83 Note -- this is a base class for those who don't want the
84 automatic handling of errors type 302 (relocated) and 401
85 (authorization needed)."""
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000086
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000087 __tempfiles = None
Guido van Rossum29e77811996-11-27 19:39:58 +000088
Guido van Rossumba311382000-08-24 16:18:04 +000089 version = "Python-urllib/%s" % __version__
90
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000091 # Constructor
Guido van Rossum09c8b6c1999-12-07 21:37:17 +000092 def __init__(self, proxies=None, **x509):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000093 if proxies is None:
94 proxies = getproxies()
95 assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
96 self.proxies = proxies
Guido van Rossum09c8b6c1999-12-07 21:37:17 +000097 self.key_file = x509.get('key_file')
98 self.cert_file = x509.get('cert_file')
Guido van Rossumba311382000-08-24 16:18:04 +000099 self.addheaders = [('User-agent', self.version)]
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000100 self.__tempfiles = []
101 self.__unlink = os.unlink # See cleanup()
102 self.tempcache = None
103 # Undocumented feature: if you assign {} to tempcache,
104 # it is used to cache files retrieved with
105 # self.retrieve(). This is not enabled by default
106 # since it does not work for changing documents (and I
107 # haven't got the logic to check expiration headers
108 # yet).
109 self.ftpcache = ftpcache
110 # Undocumented feature: you can use a different
111 # ftp cache by assigning to the .ftpcache member;
112 # in case you want logically independent URL openers
113 # XXX This is not threadsafe. Bah.
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000114
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000115 def __del__(self):
116 self.close()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000117
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000118 def close(self):
119 self.cleanup()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000120
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000121 def cleanup(self):
122 # This code sometimes runs when the rest of this module
123 # has already been deleted, so it can't use any globals
124 # or import anything.
125 if self.__tempfiles:
126 for file in self.__tempfiles:
127 try:
128 self.__unlink(file)
129 except:
130 pass
131 del self.__tempfiles[:]
132 if self.tempcache:
133 self.tempcache.clear()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000134
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000135 def addheader(self, *args):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000136 """Add a header to be used by the HTTP interface only
137 e.g. u.addheader('Accept', 'sound/basic')"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000138 self.addheaders.append(args)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000139
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000140 # External interface
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000141 def open(self, fullurl, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000142 """Use URLopener().open(file) instead of open(file, 'r')."""
Martin v. Löwis1d994332000-12-03 18:30:10 +0000143 fullurl = unwrap(toBytes(fullurl))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000144 if self.tempcache and self.tempcache.has_key(fullurl):
145 filename, headers = self.tempcache[fullurl]
146 fp = open(filename, 'rb')
147 return addinfourl(fp, headers, fullurl)
Martin v. Löwis1d994332000-12-03 18:30:10 +0000148 urltype, url = splittype(fullurl)
149 if not urltype:
150 urltype = 'file'
151 if self.proxies.has_key(urltype):
152 proxy = self.proxies[urltype]
153 urltype, proxyhost = splittype(proxy)
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000154 host, selector = splithost(proxyhost)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000155 url = (host, fullurl) # Signal special case to open_*()
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000156 else:
157 proxy = None
Martin v. Löwis1d994332000-12-03 18:30:10 +0000158 name = 'open_' + urltype
159 self.type = urltype
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000160 if '-' in name:
161 # replace - with _
Guido van Rossumb2493f82000-12-15 15:01:37 +0000162 name = '_'.join(name.split('-'))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000163 if not hasattr(self, name):
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000164 if proxy:
165 return self.open_unknown_proxy(proxy, fullurl, data)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000166 else:
167 return self.open_unknown(fullurl, data)
168 try:
169 if data is None:
170 return getattr(self, name)(url)
171 else:
172 return getattr(self, name)(url, data)
173 except socket.error, msg:
174 raise IOError, ('socket error', msg), sys.exc_info()[2]
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000175
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000176 def open_unknown(self, fullurl, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000177 """Overridable interface to open unknown URL type."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000178 type, url = splittype(fullurl)
179 raise IOError, ('url error', 'unknown url type', type)
Guido van Rossumca445401995-08-29 19:19:12 +0000180
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000181 def open_unknown_proxy(self, proxy, fullurl, data=None):
182 """Overridable interface to open unknown URL type."""
183 type, url = splittype(fullurl)
184 raise IOError, ('url error', 'invalid proxy for %s' % type, proxy)
185
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000186 # External interface
Sjoerd Mullenderd7b86f02000-08-25 11:23:36 +0000187 def retrieve(self, url, filename=None, reporthook=None, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000188 """retrieve(url) returns (filename, None) for a local object
189 or (tempfilename, headers) for a remote object."""
Martin v. Löwis1d994332000-12-03 18:30:10 +0000190 url = unwrap(toBytes(url))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000191 if self.tempcache and self.tempcache.has_key(url):
192 return self.tempcache[url]
193 type, url1 = splittype(url)
194 if not filename and (not type or type == 'file'):
195 try:
196 fp = self.open_local_file(url1)
197 hdrs = fp.info()
198 del fp
199 return url2pathname(splithost(url1)[1]), hdrs
200 except IOError, msg:
201 pass
Fred Drake316a7932000-08-24 01:01:26 +0000202 fp = self.open(url, data)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000203 headers = fp.info()
204 if not filename:
205 import tempfile
206 garbage, path = splittype(url)
207 garbage, path = splithost(path or "")
208 path, garbage = splitquery(path or "")
209 path, garbage = splitattr(path or "")
210 suffix = os.path.splitext(path)[1]
211 filename = tempfile.mktemp(suffix)
212 self.__tempfiles.append(filename)
213 result = filename, headers
214 if self.tempcache is not None:
215 self.tempcache[url] = result
216 tfp = open(filename, 'wb')
217 bs = 1024*8
218 size = -1
219 blocknum = 1
220 if reporthook:
221 if headers.has_key("content-length"):
222 size = int(headers["Content-Length"])
223 reporthook(0, bs, size)
224 block = fp.read(bs)
225 if reporthook:
226 reporthook(1, bs, size)
227 while block:
228 tfp.write(block)
229 block = fp.read(bs)
230 blocknum = blocknum + 1
231 if reporthook:
232 reporthook(blocknum, bs, size)
233 fp.close()
234 tfp.close()
235 del fp
236 del tfp
237 return result
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000238
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000239 # Each method named open_<type> knows how to open that type of URL
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000240
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000241 def open_http(self, url, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000242 """Use HTTP protocol."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000243 import httplib
244 user_passwd = None
Martin v. Löwis1d994332000-12-03 18:30:10 +0000245 if type(url) is types.StringType:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000246 host, selector = splithost(url)
247 if host:
248 user_passwd, host = splituser(host)
249 host = unquote(host)
250 realhost = host
251 else:
252 host, selector = url
253 urltype, rest = splittype(selector)
254 url = rest
255 user_passwd = None
Guido van Rossumb2493f82000-12-15 15:01:37 +0000256 if urltype.lower() != 'http':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000257 realhost = None
258 else:
259 realhost, rest = splithost(rest)
260 if realhost:
261 user_passwd, realhost = splituser(realhost)
262 if user_passwd:
263 selector = "%s://%s%s" % (urltype, realhost, rest)
264 #print "proxy via http:", host, selector
265 if not host: raise IOError, ('http error', 'no host given')
266 if user_passwd:
267 import base64
Guido van Rossumb2493f82000-12-15 15:01:37 +0000268 auth = base64.encodestring(user_passwd).strip()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000269 else:
270 auth = None
271 h = httplib.HTTP(host)
272 if data is not None:
273 h.putrequest('POST', selector)
274 h.putheader('Content-type', 'application/x-www-form-urlencoded')
275 h.putheader('Content-length', '%d' % len(data))
276 else:
277 h.putrequest('GET', selector)
278 if auth: h.putheader('Authorization', 'Basic %s' % auth)
279 if realhost: h.putheader('Host', realhost)
280 for args in self.addheaders: apply(h.putheader, args)
281 h.endheaders()
282 if data is not None:
283 h.send(data + '\r\n')
284 errcode, errmsg, headers = h.getreply()
285 fp = h.getfile()
286 if errcode == 200:
287 return addinfourl(fp, headers, "http:" + url)
288 else:
289 if data is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000290 return self.http_error(url, fp, errcode, errmsg, headers)
Guido van Rossum29aab751999-03-09 19:31:21 +0000291 else:
292 return self.http_error(url, fp, errcode, errmsg, headers, data)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000293
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000294 def http_error(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000295 """Handle http errors.
296 Derived class can override this, or provide specific handlers
297 named http_error_DDD where DDD is the 3-digit error code."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000298 # First check if there's a specific handler for this error
299 name = 'http_error_%d' % errcode
300 if hasattr(self, name):
301 method = getattr(self, name)
302 if data is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000303 result = method(url, fp, errcode, errmsg, headers)
Jeremy Hyltonb30f52a1999-02-25 16:14:58 +0000304 else:
305 result = method(url, fp, errcode, errmsg, headers, data)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000306 if result: return result
Jeremy Hyltonb30f52a1999-02-25 16:14:58 +0000307 return self.http_error_default(url, fp, errcode, errmsg, headers)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000308
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000309 def http_error_default(self, url, fp, errcode, errmsg, headers):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000310 """Default error handler: close the connection and raise IOError."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000311 void = fp.read()
312 fp.close()
313 raise IOError, ('http error', errcode, errmsg, headers)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000314
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000315 if hasattr(socket, "ssl"):
Andrew M. Kuchling141e9892000-04-23 02:53:11 +0000316 def open_https(self, url, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000317 """Use HTTPS protocol."""
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000318 import httplib
Fred Drake567ca8e2000-08-21 21:42:42 +0000319 user_passwd = None
Moshe Zadkab2a0a832001-01-08 07:09:25 +0000320 if type(url) is types.StringType:
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000321 host, selector = splithost(url)
Fred Drake567ca8e2000-08-21 21:42:42 +0000322 if host:
323 user_passwd, host = splituser(host)
324 host = unquote(host)
325 realhost = host
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000326 else:
327 host, selector = url
328 urltype, rest = splittype(selector)
Fred Drake567ca8e2000-08-21 21:42:42 +0000329 url = rest
330 user_passwd = None
Guido van Rossumb2493f82000-12-15 15:01:37 +0000331 if urltype.lower() != 'https':
Fred Drake567ca8e2000-08-21 21:42:42 +0000332 realhost = None
333 else:
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000334 realhost, rest = splithost(rest)
Fred Drake567ca8e2000-08-21 21:42:42 +0000335 if realhost:
336 user_passwd, realhost = splituser(realhost)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000337 if user_passwd:
338 selector = "%s://%s%s" % (urltype, realhost, rest)
Andrew M. Kuchling7ad47922000-06-10 01:41:48 +0000339 #print "proxy via https:", host, selector
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000340 if not host: raise IOError, ('https error', 'no host given')
341 if user_passwd:
342 import base64
Guido van Rossumb2493f82000-12-15 15:01:37 +0000343 auth = base64.encodestring(user_passwd).strip()
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000344 else:
345 auth = None
346 h = httplib.HTTPS(host, 0,
347 key_file=self.key_file,
348 cert_file=self.cert_file)
Andrew M. Kuchling141e9892000-04-23 02:53:11 +0000349 if data is not None:
350 h.putrequest('POST', selector)
351 h.putheader('Content-type',
352 'application/x-www-form-urlencoded')
353 h.putheader('Content-length', '%d' % len(data))
354 else:
355 h.putrequest('GET', selector)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000356 if auth: h.putheader('Authorization: Basic %s' % auth)
Fred Drake567ca8e2000-08-21 21:42:42 +0000357 if realhost: h.putheader('Host', realhost)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000358 for args in self.addheaders: apply(h.putheader, args)
359 h.endheaders()
Andrew M. Kuchling43c5af02000-04-24 14:17:06 +0000360 if data is not None:
361 h.send(data + '\r\n')
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000362 errcode, errmsg, headers = h.getreply()
363 fp = h.getfile()
364 if errcode == 200:
365 return addinfourl(fp, headers, url)
366 else:
Fred Drake567ca8e2000-08-21 21:42:42 +0000367 if data is None:
368 return self.http_error(url, fp, errcode, errmsg, headers)
369 else:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000370 return self.http_error(url, fp, errcode, errmsg, headers,
371 data)
Fred Drake567ca8e2000-08-21 21:42:42 +0000372
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000373 def open_gopher(self, url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000374 """Use Gopher protocol."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000375 import gopherlib
376 host, selector = splithost(url)
377 if not host: raise IOError, ('gopher error', 'no host given')
378 host = unquote(host)
379 type, selector = splitgophertype(selector)
380 selector, query = splitquery(selector)
381 selector = unquote(selector)
382 if query:
383 query = unquote(query)
384 fp = gopherlib.send_query(selector, query, host)
385 else:
386 fp = gopherlib.send_selector(selector, host)
387 return addinfourl(fp, noheaders(), "gopher:" + url)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000388
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000389 def open_file(self, url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000390 """Use local file or FTP depending on form of URL."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000391 if url[:2] == '//' and url[2:3] != '/':
392 return self.open_ftp(url)
393 else:
394 return self.open_local_file(url)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000395
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000396 def open_local_file(self, url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000397 """Use local file."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000398 import mimetypes, mimetools, StringIO
399 mtype = mimetypes.guess_type(url)[0]
400 headers = mimetools.Message(StringIO.StringIO(
401 'Content-Type: %s\n' % (mtype or 'text/plain')))
402 host, file = splithost(url)
403 if not host:
Guido van Rossum336a2011999-06-24 15:27:36 +0000404 urlfile = file
405 if file[:1] == '/':
406 urlfile = 'file://' + file
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000407 return addinfourl(open(url2pathname(file), 'rb'),
Guido van Rossum336a2011999-06-24 15:27:36 +0000408 headers, urlfile)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000409 host, port = splitport(host)
410 if not port \
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000411 and socket.gethostbyname(host) in (localhost(), thishost()):
Guido van Rossum336a2011999-06-24 15:27:36 +0000412 urlfile = file
413 if file[:1] == '/':
414 urlfile = 'file://' + file
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000415 return addinfourl(open(url2pathname(file), 'rb'),
Guido van Rossum336a2011999-06-24 15:27:36 +0000416 headers, urlfile)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000417 raise IOError, ('local file error', 'not on local host')
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000418
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000419 def open_ftp(self, url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000420 """Use FTP protocol."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000421 host, path = splithost(url)
422 if not host: raise IOError, ('ftp error', 'no host given')
423 host, port = splitport(host)
424 user, host = splituser(host)
425 if user: user, passwd = splitpasswd(user)
426 else: passwd = None
427 host = unquote(host)
428 user = unquote(user or '')
429 passwd = unquote(passwd or '')
430 host = socket.gethostbyname(host)
431 if not port:
432 import ftplib
433 port = ftplib.FTP_PORT
434 else:
435 port = int(port)
436 path, attrs = splitattr(path)
437 path = unquote(path)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000438 dirs = path.split('/')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000439 dirs, file = dirs[:-1], dirs[-1]
440 if dirs and not dirs[0]: dirs = dirs[1:]
Guido van Rossum5e006a31999-08-18 17:40:33 +0000441 if dirs and not dirs[0]: dirs[0] = '/'
Guido van Rossumb2493f82000-12-15 15:01:37 +0000442 key = user, host, port, '/'.join(dirs)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000443 # XXX thread unsafe!
444 if len(self.ftpcache) > MAXFTPCACHE:
445 # Prune the cache, rather arbitrarily
446 for k in self.ftpcache.keys():
447 if k != key:
448 v = self.ftpcache[k]
449 del self.ftpcache[k]
450 v.close()
451 try:
452 if not self.ftpcache.has_key(key):
453 self.ftpcache[key] = \
454 ftpwrapper(user, passwd, host, port, dirs)
455 if not file: type = 'D'
456 else: type = 'I'
457 for attr in attrs:
458 attr, value = splitvalue(attr)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000459 if attr.lower() == 'type' and \
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000460 value in ('a', 'A', 'i', 'I', 'd', 'D'):
Guido van Rossumb2493f82000-12-15 15:01:37 +0000461 type = value.upper()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000462 (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
463 if retrlen is not None and retrlen >= 0:
464 import mimetools, StringIO
465 headers = mimetools.Message(StringIO.StringIO(
466 'Content-Length: %d\n' % retrlen))
467 else:
468 headers = noheaders()
469 return addinfourl(fp, headers, "ftp:" + url)
470 except ftperrors(), msg:
471 raise IOError, ('ftp error', msg), sys.exc_info()[2]
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000472
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000473 def open_data(self, url, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000474 """Use "data" URL."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000475 # ignore POSTed data
476 #
477 # syntax of data URLs:
478 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
479 # mediatype := [ type "/" subtype ] *( ";" parameter )
480 # data := *urlchar
481 # parameter := attribute "=" value
482 import StringIO, mimetools, time
483 try:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000484 [type, data] = url.split(',', 1)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000485 except ValueError:
486 raise IOError, ('data error', 'bad data URL')
487 if not type:
488 type = 'text/plain;charset=US-ASCII'
Guido van Rossumb2493f82000-12-15 15:01:37 +0000489 semi = type.rfind(';')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000490 if semi >= 0 and '=' not in type[semi:]:
491 encoding = type[semi+1:]
492 type = type[:semi]
493 else:
494 encoding = ''
495 msg = []
496 msg.append('Date: %s'%time.strftime('%a, %d %b %Y %T GMT',
497 time.gmtime(time.time())))
498 msg.append('Content-type: %s' % type)
499 if encoding == 'base64':
500 import base64
501 data = base64.decodestring(data)
502 else:
503 data = unquote(data)
504 msg.append('Content-length: %d' % len(data))
505 msg.append('')
506 msg.append(data)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000507 msg = '\n'.join(msg)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000508 f = StringIO.StringIO(msg)
509 headers = mimetools.Message(f, 0)
510 f.fileno = None # needed for addinfourl
511 return addinfourl(f, headers, url)
Guido van Rossum6d4d1c21998-03-12 14:32:55 +0000512
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000513
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000514class FancyURLopener(URLopener):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000515 """Derived class with handlers for errors we can handle (perhaps)."""
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000516
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000517 def __init__(self, *args):
518 apply(URLopener.__init__, (self,) + args)
519 self.auth_cache = {}
Skip Montanaroc3e11d62001-02-15 16:56:36 +0000520 self.tries = 0
521 self.maxtries = 10
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000522
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000523 def http_error_default(self, url, fp, errcode, errmsg, headers):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000524 """Default error handling -- don't raise an exception."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000525 return addinfourl(fp, headers, "http:" + url)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000526
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000527 def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000528 """Error 302 -- relocated (temporarily)."""
Skip Montanaroc3e11d62001-02-15 16:56:36 +0000529 self.tries += 1
530 if self.maxtries and self.tries >= self.maxtries:
531 if hasattr(self, "http_error_500"):
532 meth = self.http_error_500
533 else:
534 meth = self.http_error_default
535 self.tries = 0
536 return meth(url, fp, 500,
537 "Internal Server Error: Redirect Recursion", headers)
538 result = self.redirect_internal(url, fp, errcode, errmsg, headers,
539 data)
540 self.tries = 0
541 return result
542
543 def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000544 if headers.has_key('location'):
545 newurl = headers['location']
546 elif headers.has_key('uri'):
547 newurl = headers['uri']
548 else:
549 return
550 void = fp.read()
551 fp.close()
Guido van Rossum3527f591999-03-29 20:23:41 +0000552 # In case the server sent a relative URL, join with original:
553 newurl = basejoin("http:" + url, newurl)
Guido van Rossum3c8baed2000-02-01 23:36:55 +0000554 if data is None:
555 return self.open(newurl)
556 else:
557 return self.open(newurl, data)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000558
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000559 def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000560 """Error 301 -- also relocated (permanently)."""
561 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
Guido van Rossume6ad8911996-09-10 17:02:56 +0000562
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000563 def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000564 """Error 401 -- authentication required.
565 See this URL for a description of the basic authentication scheme:
566 http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt"""
Moshe Zadkae99bd172001-02-27 06:27:04 +0000567 if not headers.has_key('www-authenticate'):
Tim Peters85ba6732001-02-28 08:26:44 +0000568 URLopener.http_error_default(self, url, fp,
Moshe Zadkae99bd172001-02-27 06:27:04 +0000569 errmsg, headers)
570 stuff = headers['www-authenticate']
571 import re
572 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
573 if not match:
Tim Peters85ba6732001-02-28 08:26:44 +0000574 URLopener.http_error_default(self, url, fp,
Moshe Zadkae99bd172001-02-27 06:27:04 +0000575 errcode, errmsg, headers)
576 scheme, realm = match.groups()
577 if scheme.lower() != 'basic':
Tim Peters85ba6732001-02-28 08:26:44 +0000578 URLopener.http_error_default(self, url, fp,
Moshe Zadkae99bd172001-02-27 06:27:04 +0000579 errcode, errmsg, headers)
580 name = 'retry_' + self.type + '_basic_auth'
581 if data is None:
582 return getattr(self,name)(url, realm)
583 else:
584 return getattr(self,name)(url, realm, data)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000585
Guido van Rossum3c8baed2000-02-01 23:36:55 +0000586 def retry_http_basic_auth(self, url, realm, data=None):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000587 host, selector = splithost(url)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000588 i = host.find('@') + 1
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000589 host = host[i:]
590 user, passwd = self.get_user_passwd(host, realm, i)
591 if not (user or passwd): return None
Guido van Rossumafc4f042001-01-15 18:31:13 +0000592 host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000593 newurl = 'http://' + host + selector
Guido van Rossum3c8baed2000-02-01 23:36:55 +0000594 if data is None:
595 return self.open(newurl)
596 else:
597 return self.open(newurl, data)
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000598
Guido van Rossum3c8baed2000-02-01 23:36:55 +0000599 def retry_https_basic_auth(self, url, realm, data=None):
Tim Peterse1190062001-01-15 03:34:38 +0000600 host, selector = splithost(url)
601 i = host.find('@') + 1
602 host = host[i:]
603 user, passwd = self.get_user_passwd(host, realm, i)
604 if not (user or passwd): return None
Guido van Rossumafc4f042001-01-15 18:31:13 +0000605 host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
Tim Peterse1190062001-01-15 03:34:38 +0000606 newurl = '//' + host + selector
Guido van Rossumafc4f042001-01-15 18:31:13 +0000607 return self.open_https(newurl, data)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000608
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000609 def get_user_passwd(self, host, realm, clear_cache = 0):
Guido van Rossumb2493f82000-12-15 15:01:37 +0000610 key = realm + '@' + host.lower()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000611 if self.auth_cache.has_key(key):
612 if clear_cache:
613 del self.auth_cache[key]
614 else:
615 return self.auth_cache[key]
616 user, passwd = self.prompt_user_passwd(host, realm)
617 if user or passwd: self.auth_cache[key] = (user, passwd)
618 return user, passwd
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000619
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000620 def prompt_user_passwd(self, host, realm):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000621 """Override this in a GUI environment!"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000622 import getpass
623 try:
624 user = raw_input("Enter username for %s at %s: " % (realm,
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000625 host))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000626 passwd = getpass.getpass("Enter password for %s in %s at %s: " %
627 (user, realm, host))
628 return user, passwd
629 except KeyboardInterrupt:
630 print
631 return None, None
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000632
633
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000634# Utility functions
635
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000636_localhost = None
637def localhost():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000638 """Return the IP address of the magic hostname 'localhost'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000639 global _localhost
640 if not _localhost:
641 _localhost = socket.gethostbyname('localhost')
642 return _localhost
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000643
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000644_thishost = None
645def thishost():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000646 """Return the IP address of the current host."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000647 global _thishost
648 if not _thishost:
649 _thishost = socket.gethostbyname(socket.gethostname())
650 return _thishost
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000651
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000652_ftperrors = None
653def ftperrors():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000654 """Return the set of errors raised by the FTP class."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000655 global _ftperrors
656 if not _ftperrors:
657 import ftplib
658 _ftperrors = ftplib.all_errors
659 return _ftperrors
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000660
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000661_noheaders = None
662def noheaders():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000663 """Return an empty mimetools.Message object."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000664 global _noheaders
665 if not _noheaders:
666 import mimetools
667 import StringIO
668 _noheaders = mimetools.Message(StringIO.StringIO(), 0)
669 _noheaders.fp.close() # Recycle file descriptor
670 return _noheaders
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000671
672
673# Utility classes
674
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000675class ftpwrapper:
Guido van Rossume7b146f2000-02-04 15:28:42 +0000676 """Class used by open_ftp() for cache of open FTP connections."""
677
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000678 def __init__(self, user, passwd, host, port, dirs):
679 self.user = user
680 self.passwd = passwd
681 self.host = host
682 self.port = port
683 self.dirs = dirs
684 self.init()
Guido van Rossume7b146f2000-02-04 15:28:42 +0000685
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000686 def init(self):
687 import ftplib
688 self.busy = 0
689 self.ftp = ftplib.FTP()
690 self.ftp.connect(self.host, self.port)
691 self.ftp.login(self.user, self.passwd)
692 for dir in self.dirs:
693 self.ftp.cwd(dir)
Guido van Rossume7b146f2000-02-04 15:28:42 +0000694
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000695 def retrfile(self, file, type):
696 import ftplib
697 self.endtransfer()
698 if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
699 else: cmd = 'TYPE ' + type; isdir = 0
700 try:
701 self.ftp.voidcmd(cmd)
702 except ftplib.all_errors:
703 self.init()
704 self.ftp.voidcmd(cmd)
705 conn = None
706 if file and not isdir:
707 # Use nlst to see if the file exists at all
708 try:
709 self.ftp.nlst(file)
710 except ftplib.error_perm, reason:
711 raise IOError, ('ftp error', reason), sys.exc_info()[2]
712 # Restore the transfer mode!
713 self.ftp.voidcmd(cmd)
714 # Try to retrieve as a file
715 try:
716 cmd = 'RETR ' + file
717 conn = self.ftp.ntransfercmd(cmd)
718 except ftplib.error_perm, reason:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000719 if str(reason)[:3] != '550':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000720 raise IOError, ('ftp error', reason), sys.exc_info()[2]
721 if not conn:
722 # Set transfer mode to ASCII!
723 self.ftp.voidcmd('TYPE A')
724 # Try a directory listing
725 if file: cmd = 'LIST ' + file
726 else: cmd = 'LIST'
727 conn = self.ftp.ntransfercmd(cmd)
728 self.busy = 1
729 # Pass back both a suitably decorated object and a retrieval length
730 return (addclosehook(conn[0].makefile('rb'),
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000731 self.endtransfer), conn[1])
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000732 def endtransfer(self):
733 if not self.busy:
734 return
735 self.busy = 0
736 try:
737 self.ftp.voidresp()
738 except ftperrors():
739 pass
Guido van Rossume7b146f2000-02-04 15:28:42 +0000740
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000741 def close(self):
742 self.endtransfer()
743 try:
744 self.ftp.close()
745 except ftperrors():
746 pass
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000747
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000748class addbase:
Guido van Rossume7b146f2000-02-04 15:28:42 +0000749 """Base class for addinfo and addclosehook."""
750
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000751 def __init__(self, fp):
752 self.fp = fp
753 self.read = self.fp.read
754 self.readline = self.fp.readline
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000755 if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines
756 if hasattr(self.fp, "fileno"): self.fileno = self.fp.fileno
Guido van Rossume7b146f2000-02-04 15:28:42 +0000757
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000758 def __repr__(self):
759 return '<%s at %s whose fp = %s>' % (self.__class__.__name__,
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000760 `id(self)`, `self.fp`)
Guido van Rossume7b146f2000-02-04 15:28:42 +0000761
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000762 def close(self):
763 self.read = None
764 self.readline = None
765 self.readlines = None
766 self.fileno = None
767 if self.fp: self.fp.close()
768 self.fp = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000769
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000770class addclosehook(addbase):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000771 """Class to add a close hook to an open file."""
772
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000773 def __init__(self, fp, closehook, *hookargs):
774 addbase.__init__(self, fp)
775 self.closehook = closehook
776 self.hookargs = hookargs
Guido van Rossume7b146f2000-02-04 15:28:42 +0000777
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000778 def close(self):
Guido van Rossumc580dae2000-05-24 13:21:46 +0000779 addbase.close(self)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000780 if self.closehook:
781 apply(self.closehook, self.hookargs)
782 self.closehook = None
783 self.hookargs = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000784
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000785class addinfo(addbase):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000786 """class to add an info() method to an open file."""
787
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000788 def __init__(self, fp, headers):
789 addbase.__init__(self, fp)
790 self.headers = headers
Guido van Rossume7b146f2000-02-04 15:28:42 +0000791
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000792 def info(self):
793 return self.headers
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000794
Guido van Rossume6ad8911996-09-10 17:02:56 +0000795class addinfourl(addbase):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000796 """class to add info() and geturl() methods to an open file."""
797
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000798 def __init__(self, fp, headers, url):
799 addbase.__init__(self, fp)
800 self.headers = headers
801 self.url = url
Guido van Rossume7b146f2000-02-04 15:28:42 +0000802
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000803 def info(self):
804 return self.headers
Guido van Rossume7b146f2000-02-04 15:28:42 +0000805
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000806 def geturl(self):
807 return self.url
Guido van Rossume6ad8911996-09-10 17:02:56 +0000808
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000809
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000810def basejoin(base, url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000811 """Utility to combine a URL with a base URL to form a new URL."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000812 type, path = splittype(url)
813 if type:
814 # if url is complete (i.e., it contains a type), return it
815 return url
816 host, path = splithost(path)
817 type, basepath = splittype(base) # inherit type from base
818 if host:
819 # if url contains host, just inherit type
820 if type: return type + '://' + host + path
821 else:
822 # no type inherited, so url must have started with //
823 # just return it
824 return url
825 host, basepath = splithost(basepath) # inherit host
Thomas Wouters7e474022000-07-16 12:04:32 +0000826 basepath, basetag = splittag(basepath) # remove extraneous cruft
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000827 basepath, basequery = splitquery(basepath) # idem
828 if path[:1] != '/':
829 # non-absolute path name
830 if path[:1] in ('#', '?'):
831 # path is just a tag or query, attach to basepath
832 i = len(basepath)
833 else:
834 # else replace last component
Guido van Rossumb2493f82000-12-15 15:01:37 +0000835 i = basepath.rfind('/')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000836 if i < 0:
837 # basepath not absolute
838 if host:
839 # host present, make absolute
840 basepath = '/'
841 else:
842 # else keep non-absolute
843 basepath = ''
844 else:
845 # remove last file component
846 basepath = basepath[:i+1]
847 # Interpret ../ (important because of symlinks)
848 while basepath and path[:3] == '../':
849 path = path[3:]
Guido van Rossumb2493f82000-12-15 15:01:37 +0000850 i = basepath[:-1].rfind('/')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000851 if i > 0:
852 basepath = basepath[:i+1]
853 elif i == 0:
854 basepath = '/'
855 break
856 else:
857 basepath = ''
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000858
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000859 path = basepath + path
860 if type and host: return type + '://' + host + path
861 elif type: return type + ':' + path
862 elif host: return '//' + host + path # don't know what this means
863 else: return path
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000864
865
Guido van Rossum7c395db1994-07-04 22:14:49 +0000866# Utilities to parse URLs (most of these return None for missing parts):
Sjoerd Mullendere0371b81995-11-10 10:36:07 +0000867# unwrap('<URL:type://host/path>') --> 'type://host/path'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000868# splittype('type:opaquestring') --> 'type', 'opaquestring'
869# splithost('//host[:port]/path') --> 'host[:port]', '/path'
Guido van Rossum7c395db1994-07-04 22:14:49 +0000870# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'
871# splitpasswd('user:passwd') -> 'user', 'passwd'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000872# splitport('host:port') --> 'host', 'port'
873# splitquery('/path?query') --> '/path', 'query'
874# splittag('/path#tag') --> '/path', 'tag'
Guido van Rossum7c395db1994-07-04 22:14:49 +0000875# splitattr('/path;attr1=value1;attr2=value2;...') ->
876# '/path', ['attr1=value1', 'attr2=value2', ...]
877# splitvalue('attr=value') --> 'attr', 'value'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000878# splitgophertype('/Xselector') --> 'X', 'selector'
879# unquote('abc%20def') -> 'abc def'
880# quote('abc def') -> 'abc%20def')
881
Martin v. Löwis1d994332000-12-03 18:30:10 +0000882def toBytes(url):
883 """toBytes(u"URL") --> 'URL'."""
884 # Most URL schemes require ASCII. If that changes, the conversion
885 # can be relaxed
886 if type(url) is types.UnicodeType:
887 try:
888 url = url.encode("ASCII")
889 except UnicodeError:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000890 raise UnicodeError("URL " + repr(url) +
891 " contains non-ASCII characters")
Martin v. Löwis1d994332000-12-03 18:30:10 +0000892 return url
893
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000894def unwrap(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000895 """unwrap('<URL:type://host/path>') --> 'type://host/path'."""
Guido van Rossumb2493f82000-12-15 15:01:37 +0000896 url = url.strip()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000897 if url[:1] == '<' and url[-1:] == '>':
Guido van Rossumb2493f82000-12-15 15:01:37 +0000898 url = url[1:-1].strip()
899 if url[:4] == 'URL:': url = url[4:].strip()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000900 return url
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000901
Guido van Rossum332e1441997-09-29 23:23:46 +0000902_typeprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000903def splittype(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000904 """splittype('type:opaquestring') --> 'type', 'opaquestring'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000905 global _typeprog
906 if _typeprog is None:
907 import re
908 _typeprog = re.compile('^([^/:]+):')
Guido van Rossum332e1441997-09-29 23:23:46 +0000909
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000910 match = _typeprog.match(url)
911 if match:
912 scheme = match.group(1)
Fred Drake9e94afd2000-07-01 07:03:30 +0000913 return scheme.lower(), url[len(scheme) + 1:]
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000914 return None, url
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000915
Guido van Rossum332e1441997-09-29 23:23:46 +0000916_hostprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000917def splithost(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000918 """splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000919 global _hostprog
920 if _hostprog is None:
921 import re
Guido van Rossum3427c1f1999-07-01 23:20:56 +0000922 _hostprog = re.compile('^//([^/]*)(.*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +0000923
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000924 match = _hostprog.match(url)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000925 if match: return match.group(1, 2)
926 return None, url
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000927
Guido van Rossum332e1441997-09-29 23:23:46 +0000928_userprog = None
Guido van Rossum7c395db1994-07-04 22:14:49 +0000929def splituser(host):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000930 """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000931 global _userprog
932 if _userprog is None:
933 import re
934 _userprog = re.compile('^([^@]*)@(.*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +0000935
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000936 match = _userprog.match(host)
Fred Drake567ca8e2000-08-21 21:42:42 +0000937 if match: return map(unquote, match.group(1, 2))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000938 return None, host
Guido van Rossum7c395db1994-07-04 22:14:49 +0000939
Guido van Rossum332e1441997-09-29 23:23:46 +0000940_passwdprog = None
Guido van Rossum7c395db1994-07-04 22:14:49 +0000941def splitpasswd(user):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000942 """splitpasswd('user:passwd') -> 'user', 'passwd'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000943 global _passwdprog
944 if _passwdprog is None:
945 import re
946 _passwdprog = re.compile('^([^:]*):(.*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +0000947
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000948 match = _passwdprog.match(user)
949 if match: return match.group(1, 2)
950 return user, None
Guido van Rossum7c395db1994-07-04 22:14:49 +0000951
Guido van Rossume7b146f2000-02-04 15:28:42 +0000952# splittag('/path#tag') --> '/path', 'tag'
Guido van Rossum332e1441997-09-29 23:23:46 +0000953_portprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000954def splitport(host):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000955 """splitport('host:port') --> 'host', 'port'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000956 global _portprog
957 if _portprog is None:
958 import re
959 _portprog = re.compile('^(.*):([0-9]+)$')
Guido van Rossum332e1441997-09-29 23:23:46 +0000960
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000961 match = _portprog.match(host)
962 if match: return match.group(1, 2)
963 return host, None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000964
Guido van Rossum332e1441997-09-29 23:23:46 +0000965_nportprog = None
Guido van Rossum53725a21996-06-13 19:12:35 +0000966def splitnport(host, defport=-1):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000967 """Split host and port, returning numeric port.
968 Return given default port if no ':' found; defaults to -1.
969 Return numerical port if a valid number are found after ':'.
970 Return None if ':' but not a valid number."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000971 global _nportprog
972 if _nportprog is None:
973 import re
974 _nportprog = re.compile('^(.*):(.*)$')
Guido van Rossum7e7ca0b1998-03-26 21:01:39 +0000975
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000976 match = _nportprog.match(host)
977 if match:
978 host, port = match.group(1, 2)
979 try:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000980 if not port: raise ValueError, "no digits"
981 nport = int(port)
982 except ValueError:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000983 nport = None
984 return host, nport
985 return host, defport
Guido van Rossum53725a21996-06-13 19:12:35 +0000986
Guido van Rossum332e1441997-09-29 23:23:46 +0000987_queryprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000988def splitquery(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000989 """splitquery('/path?query') --> '/path', 'query'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000990 global _queryprog
991 if _queryprog is None:
992 import re
993 _queryprog = re.compile('^(.*)\?([^?]*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +0000994
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000995 match = _queryprog.match(url)
996 if match: return match.group(1, 2)
997 return url, None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000998
Guido van Rossum332e1441997-09-29 23:23:46 +0000999_tagprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001000def splittag(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001001 """splittag('/path#tag') --> '/path', 'tag'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001002 global _tagprog
1003 if _tagprog is None:
1004 import re
1005 _tagprog = re.compile('^(.*)#([^#]*)$')
Guido van Rossum7e7ca0b1998-03-26 21:01:39 +00001006
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001007 match = _tagprog.match(url)
1008 if match: return match.group(1, 2)
1009 return url, None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001010
Guido van Rossum7c395db1994-07-04 22:14:49 +00001011def splitattr(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001012 """splitattr('/path;attr1=value1;attr2=value2;...') ->
1013 '/path', ['attr1=value1', 'attr2=value2', ...]."""
Guido van Rossumb2493f82000-12-15 15:01:37 +00001014 words = url.split(';')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001015 return words[0], words[1:]
Guido van Rossum7c395db1994-07-04 22:14:49 +00001016
Guido van Rossum332e1441997-09-29 23:23:46 +00001017_valueprog = None
Guido van Rossum7c395db1994-07-04 22:14:49 +00001018def splitvalue(attr):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001019 """splitvalue('attr=value') --> 'attr', 'value'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001020 global _valueprog
1021 if _valueprog is None:
1022 import re
1023 _valueprog = re.compile('^([^=]*)=(.*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +00001024
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001025 match = _valueprog.match(attr)
1026 if match: return match.group(1, 2)
1027 return attr, None
Guido van Rossum7c395db1994-07-04 22:14:49 +00001028
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001029def splitgophertype(selector):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001030 """splitgophertype('/Xselector') --> 'X', 'selector'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001031 if selector[:1] == '/' and selector[1:2]:
1032 return selector[1], selector[2:]
1033 return None, selector
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001034
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001035def unquote(s):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001036 """unquote('abc%20def') -> 'abc def'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001037 mychr = chr
Guido van Rossumb2493f82000-12-15 15:01:37 +00001038 myatoi = int
1039 list = s.split('%')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001040 res = [list[0]]
1041 myappend = res.append
1042 del list[0]
1043 for item in list:
1044 if item[1:2]:
1045 try:
1046 myappend(mychr(myatoi(item[:2], 16))
1047 + item[2:])
1048 except:
1049 myappend('%' + item)
1050 else:
1051 myappend('%' + item)
Guido van Rossumb2493f82000-12-15 15:01:37 +00001052 return "".join(res)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001053
Guido van Rossum0564e121996-12-13 14:47:36 +00001054def unquote_plus(s):
Skip Montanaro79f1c172000-08-22 03:00:52 +00001055 """unquote('%7e/abc+def') -> '~/abc def'"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001056 if '+' in s:
1057 # replace '+' with ' '
Guido van Rossumb2493f82000-12-15 15:01:37 +00001058 s = ' '.join(s.split('+'))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001059 return unquote(s)
Guido van Rossum0564e121996-12-13 14:47:36 +00001060
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001061always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
Jeremy Hylton6102e292000-08-31 15:48:10 +00001062 'abcdefghijklmnopqrstuvwxyz'
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001063 '0123456789' '_.-')
1064
1065_fast_safe_test = always_safe + '/'
1066_fast_safe = None
1067
1068def _fast_quote(s):
1069 global _fast_safe
1070 if _fast_safe is None:
1071 _fast_safe = {}
1072 for c in _fast_safe_test:
1073 _fast_safe[c] = c
1074 res = list(s)
1075 for i in range(len(res)):
1076 c = res[i]
1077 if not _fast_safe.has_key(c):
Guido van Rossume27a7b82001-01-19 03:28:15 +00001078 res[i] = '%%%02X' % ord(c)
Guido van Rossumb2493f82000-12-15 15:01:37 +00001079 return ''.join(res)
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001080
Guido van Rossum7c395db1994-07-04 22:14:49 +00001081def quote(s, safe = '/'):
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001082 """quote('abc def') -> 'abc%20def'
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001083
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001084 Each part of a URL, e.g. the path info, the query, etc., has a
1085 different set of reserved characters that must be quoted.
1086
1087 RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
1088 the following reserved characters.
1089
1090 reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
1091 "$" | ","
1092
1093 Each of these characters is reserved in some component of a URL,
1094 but not necessarily in all of them.
1095
1096 By default, the quote function is intended for quoting the path
1097 section of a URL. Thus, it will not encode '/'. This character
1098 is reserved, but in typical usage the quote function is being
1099 called on a path where the existing slash characters are used as
1100 reserved characters.
1101 """
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001102 safe = always_safe + safe
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001103 if _fast_safe_test == safe:
1104 return _fast_quote(s)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001105 res = list(s)
1106 for i in range(len(res)):
1107 c = res[i]
1108 if c not in safe:
Guido van Rossume27a7b82001-01-19 03:28:15 +00001109 res[i] = '%%%02X' % ord(c)
Guido van Rossumb2493f82000-12-15 15:01:37 +00001110 return ''.join(res)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001111
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001112def quote_plus(s, safe = ''):
1113 """Quote the query fragment of a URL; replacing ' ' with '+'"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001114 if ' ' in s:
Guido van Rossumb2493f82000-12-15 15:01:37 +00001115 l = s.split(' ')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001116 for i in range(len(l)):
1117 l[i] = quote(l[i], safe)
Guido van Rossumb2493f82000-12-15 15:01:37 +00001118 return '+'.join(l)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001119 else:
1120 return quote(s, safe)
Guido van Rossum0564e121996-12-13 14:47:36 +00001121
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001122def urlencode(query,doseq=0):
1123 """Encode a sequence of two-element tuples or dictionary into a URL query string.
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001124
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001125 If any values in the query arg are sequences and doseq is true, each
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001126 sequence element is converted to a separate parameter.
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001127
1128 If the query arg is a sequence of two-element tuples, the order of the
1129 parameters in the output will match the order of parameters in the
1130 input.
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001131 """
Tim Peters658cba62001-02-09 20:06:00 +00001132
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001133 if hasattr(query,"items"):
1134 # mapping objects
1135 query = query.items()
1136 else:
1137 # it's a bother at times that strings and string-like objects are
1138 # sequences...
1139 try:
1140 # non-sequence items should not work with len()
1141 x = len(query)
1142 # non-empty strings will fail this
1143 if len(query) and type(query[0]) != types.TupleType:
1144 raise TypeError
1145 # zero-length sequences of all types will get here and succeed,
1146 # but that's a minor nit - since the original implementation
1147 # allowed empty dicts that type of behavior probably should be
1148 # preserved for consistency
1149 except TypeError:
1150 ty,va,tb = sys.exc_info()
1151 raise TypeError, "not a valid non-string sequence or mapping object", tb
1152
Guido van Rossume7b146f2000-02-04 15:28:42 +00001153 l = []
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001154 if not doseq:
1155 # preserve old behavior
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001156 for k, v in query:
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001157 k = quote_plus(str(k))
1158 v = quote_plus(str(v))
1159 l.append(k + '=' + v)
1160 else:
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001161 for k, v in query:
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001162 k = quote_plus(str(k))
1163 if type(v) == types.StringType:
1164 v = quote_plus(v)
1165 l.append(k + '=' + v)
1166 elif type(v) == types.UnicodeType:
1167 # is there a reasonable way to convert to ASCII?
1168 # encode generates a string, but "replace" or "ignore"
1169 # lose information and "strict" can raise UnicodeError
1170 v = quote_plus(v.encode("ASCII","replace"))
1171 l.append(k + '=' + v)
1172 else:
1173 try:
1174 # is this a sufficient test for sequence-ness?
1175 x = len(v)
1176 except TypeError:
1177 # not a sequence
1178 v = quote_plus(str(v))
1179 l.append(k + '=' + v)
1180 else:
1181 # loop over the sequence
1182 for elt in v:
1183 l.append(k + '=' + quote_plus(str(elt)))
Guido van Rossumb2493f82000-12-15 15:01:37 +00001184 return '&'.join(l)
Guido van Rossum810a3391998-07-22 21:33:23 +00001185
Guido van Rossum442e7201996-03-20 15:33:11 +00001186# Proxy handling
Mark Hammond4f570b92000-07-26 07:04:38 +00001187def getproxies_environment():
1188 """Return a dictionary of scheme -> proxy server URL mappings.
1189
1190 Scan the environment for variables named <scheme>_proxy;
1191 this seems to be the standard convention. If you need a
1192 different way, you can pass a proxies dictionary to the
1193 [Fancy]URLopener constructor.
1194
1195 """
1196 proxies = {}
1197 for name, value in os.environ.items():
Guido van Rossumb2493f82000-12-15 15:01:37 +00001198 name = name.lower()
Mark Hammond4f570b92000-07-26 07:04:38 +00001199 if value and name[-6:] == '_proxy':
1200 proxies[name[:-6]] = value
1201 return proxies
1202
Guido van Rossum4163e701998-08-06 13:39:09 +00001203if os.name == 'mac':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001204 def getproxies():
1205 """Return a dictionary of scheme -> proxy server URL mappings.
Guido van Rossum442e7201996-03-20 15:33:11 +00001206
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001207 By convention the mac uses Internet Config to store
1208 proxies. An HTTP proxy, for instance, is stored under
1209 the HttpProxy key.
Guido van Rossum442e7201996-03-20 15:33:11 +00001210
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001211 """
1212 try:
1213 import ic
1214 except ImportError:
1215 return {}
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001216
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001217 try:
1218 config = ic.IC()
1219 except ic.error:
1220 return {}
1221 proxies = {}
1222 # HTTP:
1223 if config.has_key('UseHTTPProxy') and config['UseHTTPProxy']:
1224 try:
1225 value = config['HTTPProxyHost']
1226 except ic.error:
1227 pass
1228 else:
1229 proxies['http'] = 'http://%s' % value
1230 # FTP: XXXX To be done.
1231 # Gopher: XXXX To be done.
1232 return proxies
Mark Hammond4f570b92000-07-26 07:04:38 +00001233
1234elif os.name == 'nt':
1235 def getproxies_registry():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001236 """Return a dictionary of scheme -> proxy server URL mappings.
Mark Hammond4f570b92000-07-26 07:04:38 +00001237
1238 Win32 uses the registry to store proxies.
1239
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001240 """
1241 proxies = {}
Mark Hammond4f570b92000-07-26 07:04:38 +00001242 try:
1243 import _winreg
1244 except ImportError:
1245 # Std module, so should be around - but you never know!
1246 return proxies
1247 try:
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001248 internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1249 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
Mark Hammond4f570b92000-07-26 07:04:38 +00001250 proxyEnable = _winreg.QueryValueEx(internetSettings,
1251 'ProxyEnable')[0]
1252 if proxyEnable:
1253 # Returned as Unicode but problems if not converted to ASCII
1254 proxyServer = str(_winreg.QueryValueEx(internetSettings,
1255 'ProxyServer')[0])
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001256 if '=' in proxyServer:
1257 # Per-protocol settings
Mark Hammond4f570b92000-07-26 07:04:38 +00001258 for p in proxyServer.split(';'):
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001259 protocol, address = p.split('=', 1)
Mark Hammond4f570b92000-07-26 07:04:38 +00001260 proxies[protocol] = '%s://%s' % (protocol, address)
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001261 else:
1262 # Use one setting for all protocols
1263 if proxyServer[:5] == 'http:':
1264 proxies['http'] = proxyServer
1265 else:
1266 proxies['http'] = 'http://%s' % proxyServer
1267 proxies['ftp'] = 'ftp://%s' % proxyServer
Mark Hammond4f570b92000-07-26 07:04:38 +00001268 internetSettings.Close()
1269 except (WindowsError, ValueError, TypeError):
1270 # Either registry key not found etc, or the value in an
1271 # unexpected format.
1272 # proxies already set up to be empty so nothing to do
1273 pass
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001274 return proxies
Guido van Rossum442e7201996-03-20 15:33:11 +00001275
Mark Hammond4f570b92000-07-26 07:04:38 +00001276 def getproxies():
1277 """Return a dictionary of scheme -> proxy server URL mappings.
1278
1279 Returns settings gathered from the environment, if specified,
1280 or the registry.
1281
1282 """
1283 return getproxies_environment() or getproxies_registry()
1284else:
1285 # By default use environment variables
1286 getproxies = getproxies_environment
1287
Guido van Rossum442e7201996-03-20 15:33:11 +00001288
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001289# Test and time quote() and unquote()
1290def test1():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001291 import time
1292 s = ''
1293 for i in range(256): s = s + chr(i)
1294 s = s*4
1295 t0 = time.time()
1296 qs = quote(s)
1297 uqs = unquote(qs)
1298 t1 = time.time()
1299 if uqs != s:
1300 print 'Wrong!'
1301 print `s`
1302 print `qs`
1303 print `uqs`
1304 print round(t1 - t0, 3), 'sec'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001305
1306
Guido van Rossum9ab96d41998-09-28 14:07:00 +00001307def reporthook(blocknum, blocksize, totalsize):
1308 # Report during remote transfers
Guido van Rossumb2493f82000-12-15 15:01:37 +00001309 print "Block number: %d, Block size: %d, Total size: %d" % (
1310 blocknum, blocksize, totalsize)
Guido van Rossum9ab96d41998-09-28 14:07:00 +00001311
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001312# Test program
Guido van Rossum23490151998-06-25 02:39:00 +00001313def test(args=[]):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001314 if not args:
1315 args = [
1316 '/etc/passwd',
1317 'file:/etc/passwd',
1318 'file://localhost/etc/passwd',
1319 'ftp://ftp.python.org/etc/passwd',
1320## 'gopher://gopher.micro.umn.edu/1/',
1321 'http://www.python.org/index.html',
1322 ]
Guido van Rossum09c8b6c1999-12-07 21:37:17 +00001323 if hasattr(URLopener, "open_https"):
1324 args.append('https://synergy.as.cmu.edu/~geek/')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001325 try:
1326 for url in args:
1327 print '-'*10, url, '-'*10
1328 fn, h = urlretrieve(url, None, reporthook)
Guido van Rossumb2493f82000-12-15 15:01:37 +00001329 print fn
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001330 if h:
1331 print '======'
1332 for k in h.keys(): print k + ':', h[k]
1333 print '======'
1334 fp = open(fn, 'rb')
1335 data = fp.read()
1336 del fp
1337 if '\r' in data:
1338 table = string.maketrans("", "")
Guido van Rossumb2493f82000-12-15 15:01:37 +00001339 data = data.translate(table, "\r")
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001340 print data
1341 fn, h = None, None
1342 print '-'*40
1343 finally:
1344 urlcleanup()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001345
Guido van Rossum23490151998-06-25 02:39:00 +00001346def main():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001347 import getopt, sys
1348 try:
1349 opts, args = getopt.getopt(sys.argv[1:], "th")
1350 except getopt.error, msg:
1351 print msg
1352 print "Use -h for help"
1353 return
1354 t = 0
1355 for o, a in opts:
1356 if o == '-t':
1357 t = t + 1
1358 if o == '-h':
1359 print "Usage: python urllib.py [-t] [url ...]"
1360 print "-t runs self-test;",
1361 print "otherwise, contents of urls are printed"
1362 return
1363 if t:
1364 if t > 1:
1365 test1()
1366 test(args)
1367 else:
1368 if not args:
1369 print "Use -h for help"
1370 for url in args:
1371 print urlopen(url).read(),
Guido van Rossum23490151998-06-25 02:39:00 +00001372
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001373# Run test program when run as a script
1374if __name__ == '__main__':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001375 main()