blob: 9a2c0baebbd56e749cd9c8726be4a991f869fe0f [file] [log] [blame]
Guido van Rossume7b146f2000-02-04 15:28:42 +00001"""Open an arbitrary URL.
2
3See the following document for more info on URLs:
4"Names and Addresses, URIs, URLs, URNs, URCs", at
5http://www.w3.org/pub/WWW/Addressing/Overview.html
6
7See also the HTTP spec (from which the error codes are derived):
8"HTTP - Hypertext Transfer Protocol", at
9http://www.w3.org/pub/WWW/Protocols/
10
11Related standards and specs:
12- RFC1808: the "relative URL" spec. (authoritative status)
13- RFC1738 - the "URL standard". (authoritative status)
14- RFC1630 - the "URI spec". (informational status)
15
16The object returned by URLopener().open(file) will differ per
17protocol. All you know is that is has methods read(), readline(),
18readlines(), fileno(), close() and info(). The read*(), fileno()
Fredrik Lundhb49f88b2000-09-24 18:51:25 +000019and close() methods work like those of open files.
Guido van Rossume7b146f2000-02-04 15:28:42 +000020The info() method returns a mimetools.Message object which can be
21used to query various info about the object, if available.
22(mimetools.Message objects are queried with the getheader() method.)
23"""
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000024
Guido van Rossum7c395db1994-07-04 22:14:49 +000025import string
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000026import socket
Jack Jansendc3e3f61995-12-15 13:22:13 +000027import os
Guido van Rossum3c8484e1996-11-20 22:02:24 +000028import sys
Martin v. Löwis1d994332000-12-03 18:30:10 +000029import types
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000030
Guido van Rossumb2493f82000-12-15 15:01:37 +000031__version__ = '1.15' # XXX This version is not always updated :-(
Guido van Rossumf668d171997-06-06 21:11:11 +000032
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000033MAXFTPCACHE = 10 # Trim the ftp cache beyond this size
Guido van Rossum6cb15a01995-06-22 19:00:13 +000034
Jack Jansendc3e3f61995-12-15 13:22:13 +000035# Helper for non-unix systems
36if os.name == 'mac':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000037 from macurl2path import url2pathname, pathname2url
Guido van Rossum7e7ca0b1998-03-26 21:01:39 +000038elif os.name == 'nt':
Fredrik Lundhb49f88b2000-09-24 18:51:25 +000039 from nturl2path import url2pathname, pathname2url
Jack Jansendc3e3f61995-12-15 13:22:13 +000040else:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000041 def url2pathname(pathname):
Guido van Rossum367ac801999-03-12 14:31:10 +000042 return unquote(pathname)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000043 def pathname2url(pathname):
Guido van Rossum367ac801999-03-12 14:31:10 +000044 return quote(pathname)
Guido van Rossum33add0a1998-12-18 15:25:22 +000045
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000046# This really consists of two pieces:
47# (1) a class which handles opening of all sorts of URLs
48# (plus assorted utilities etc.)
49# (2) a set of functions for parsing URLs
50# XXX Should these be separated out into different modules?
51
52
53# Shortcut for basic usage
54_urlopener = None
Guido van Rossumbd013741996-12-10 16:00:28 +000055def urlopen(url, data=None):
Skip Montanaro79f1c172000-08-22 03:00:52 +000056 """urlopen(url [, data]) -> open file-like object"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000057 global _urlopener
58 if not _urlopener:
59 _urlopener = FancyURLopener()
60 if data is None:
61 return _urlopener.open(url)
62 else:
63 return _urlopener.open(url, data)
Fred Drake316a7932000-08-24 01:01:26 +000064def urlretrieve(url, filename=None, reporthook=None, data=None):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000065 global _urlopener
66 if not _urlopener:
67 _urlopener = FancyURLopener()
Fred Drake316a7932000-08-24 01:01:26 +000068 return _urlopener.retrieve(url, filename, reporthook, data)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000069def urlcleanup():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000070 if _urlopener:
71 _urlopener.cleanup()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000072
73
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000074ftpcache = {}
75class URLopener:
Guido van Rossume7b146f2000-02-04 15:28:42 +000076 """Class to open URLs.
77 This is a class rather than just a subroutine because we may need
78 more than one set of global protocol-specific options.
79 Note -- this is a base class for those who don't want the
80 automatic handling of errors type 302 (relocated) and 401
81 (authorization needed)."""
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000082
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000083 __tempfiles = None
Guido van Rossum29e77811996-11-27 19:39:58 +000084
Guido van Rossumba311382000-08-24 16:18:04 +000085 version = "Python-urllib/%s" % __version__
86
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000087 # Constructor
Guido van Rossum09c8b6c1999-12-07 21:37:17 +000088 def __init__(self, proxies=None, **x509):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000089 if proxies is None:
90 proxies = getproxies()
91 assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
92 self.proxies = proxies
Guido van Rossum09c8b6c1999-12-07 21:37:17 +000093 self.key_file = x509.get('key_file')
94 self.cert_file = x509.get('cert_file')
Guido van Rossumba311382000-08-24 16:18:04 +000095 self.addheaders = [('User-agent', self.version)]
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000096 self.__tempfiles = []
97 self.__unlink = os.unlink # See cleanup()
98 self.tempcache = None
99 # Undocumented feature: if you assign {} to tempcache,
100 # it is used to cache files retrieved with
101 # self.retrieve(). This is not enabled by default
102 # since it does not work for changing documents (and I
103 # haven't got the logic to check expiration headers
104 # yet).
105 self.ftpcache = ftpcache
106 # Undocumented feature: you can use a different
107 # ftp cache by assigning to the .ftpcache member;
108 # in case you want logically independent URL openers
109 # XXX This is not threadsafe. Bah.
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000110
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000111 def __del__(self):
112 self.close()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000113
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000114 def close(self):
115 self.cleanup()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000116
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000117 def cleanup(self):
118 # This code sometimes runs when the rest of this module
119 # has already been deleted, so it can't use any globals
120 # or import anything.
121 if self.__tempfiles:
122 for file in self.__tempfiles:
123 try:
124 self.__unlink(file)
125 except:
126 pass
127 del self.__tempfiles[:]
128 if self.tempcache:
129 self.tempcache.clear()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000130
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000131 def addheader(self, *args):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000132 """Add a header to be used by the HTTP interface only
133 e.g. u.addheader('Accept', 'sound/basic')"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000134 self.addheaders.append(args)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000135
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000136 # External interface
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000137 def open(self, fullurl, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000138 """Use URLopener().open(file) instead of open(file, 'r')."""
Martin v. Löwis1d994332000-12-03 18:30:10 +0000139 fullurl = unwrap(toBytes(fullurl))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000140 if self.tempcache and self.tempcache.has_key(fullurl):
141 filename, headers = self.tempcache[fullurl]
142 fp = open(filename, 'rb')
143 return addinfourl(fp, headers, fullurl)
Martin v. Löwis1d994332000-12-03 18:30:10 +0000144 urltype, url = splittype(fullurl)
145 if not urltype:
146 urltype = 'file'
147 if self.proxies.has_key(urltype):
148 proxy = self.proxies[urltype]
149 urltype, proxyhost = splittype(proxy)
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000150 host, selector = splithost(proxyhost)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000151 url = (host, fullurl) # Signal special case to open_*()
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000152 else:
153 proxy = None
Martin v. Löwis1d994332000-12-03 18:30:10 +0000154 name = 'open_' + urltype
155 self.type = urltype
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000156 if '-' in name:
157 # replace - with _
Guido van Rossumb2493f82000-12-15 15:01:37 +0000158 name = '_'.join(name.split('-'))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000159 if not hasattr(self, name):
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000160 if proxy:
161 return self.open_unknown_proxy(proxy, fullurl, data)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000162 else:
163 return self.open_unknown(fullurl, data)
164 try:
165 if data is None:
166 return getattr(self, name)(url)
167 else:
168 return getattr(self, name)(url, data)
169 except socket.error, msg:
170 raise IOError, ('socket error', msg), sys.exc_info()[2]
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000171
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000172 def open_unknown(self, fullurl, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000173 """Overridable interface to open unknown URL type."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000174 type, url = splittype(fullurl)
175 raise IOError, ('url error', 'unknown url type', type)
Guido van Rossumca445401995-08-29 19:19:12 +0000176
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000177 def open_unknown_proxy(self, proxy, fullurl, data=None):
178 """Overridable interface to open unknown URL type."""
179 type, url = splittype(fullurl)
180 raise IOError, ('url error', 'invalid proxy for %s' % type, proxy)
181
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000182 # External interface
Sjoerd Mullenderd7b86f02000-08-25 11:23:36 +0000183 def retrieve(self, url, filename=None, reporthook=None, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000184 """retrieve(url) returns (filename, None) for a local object
185 or (tempfilename, headers) for a remote object."""
Martin v. Löwis1d994332000-12-03 18:30:10 +0000186 url = unwrap(toBytes(url))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000187 if self.tempcache and self.tempcache.has_key(url):
188 return self.tempcache[url]
189 type, url1 = splittype(url)
190 if not filename and (not type or type == 'file'):
191 try:
192 fp = self.open_local_file(url1)
193 hdrs = fp.info()
194 del fp
195 return url2pathname(splithost(url1)[1]), hdrs
196 except IOError, msg:
197 pass
Fred Drake316a7932000-08-24 01:01:26 +0000198 fp = self.open(url, data)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000199 headers = fp.info()
200 if not filename:
201 import tempfile
202 garbage, path = splittype(url)
203 garbage, path = splithost(path or "")
204 path, garbage = splitquery(path or "")
205 path, garbage = splitattr(path or "")
206 suffix = os.path.splitext(path)[1]
207 filename = tempfile.mktemp(suffix)
208 self.__tempfiles.append(filename)
209 result = filename, headers
210 if self.tempcache is not None:
211 self.tempcache[url] = result
212 tfp = open(filename, 'wb')
213 bs = 1024*8
214 size = -1
215 blocknum = 1
216 if reporthook:
217 if headers.has_key("content-length"):
218 size = int(headers["Content-Length"])
219 reporthook(0, bs, size)
220 block = fp.read(bs)
221 if reporthook:
222 reporthook(1, bs, size)
223 while block:
224 tfp.write(block)
225 block = fp.read(bs)
226 blocknum = blocknum + 1
227 if reporthook:
228 reporthook(blocknum, bs, size)
229 fp.close()
230 tfp.close()
231 del fp
232 del tfp
233 return result
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000234
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000235 # Each method named open_<type> knows how to open that type of URL
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000236
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000237 def open_http(self, url, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000238 """Use HTTP protocol."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000239 import httplib
240 user_passwd = None
Martin v. Löwis1d994332000-12-03 18:30:10 +0000241 if type(url) is types.StringType:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000242 host, selector = splithost(url)
243 if host:
244 user_passwd, host = splituser(host)
245 host = unquote(host)
246 realhost = host
247 else:
248 host, selector = url
249 urltype, rest = splittype(selector)
250 url = rest
251 user_passwd = None
Guido van Rossumb2493f82000-12-15 15:01:37 +0000252 if urltype.lower() != 'http':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000253 realhost = None
254 else:
255 realhost, rest = splithost(rest)
256 if realhost:
257 user_passwd, realhost = splituser(realhost)
258 if user_passwd:
259 selector = "%s://%s%s" % (urltype, realhost, rest)
260 #print "proxy via http:", host, selector
261 if not host: raise IOError, ('http error', 'no host given')
262 if user_passwd:
263 import base64
Guido van Rossumb2493f82000-12-15 15:01:37 +0000264 auth = base64.encodestring(user_passwd).strip()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000265 else:
266 auth = None
267 h = httplib.HTTP(host)
268 if data is not None:
269 h.putrequest('POST', selector)
270 h.putheader('Content-type', 'application/x-www-form-urlencoded')
271 h.putheader('Content-length', '%d' % len(data))
272 else:
273 h.putrequest('GET', selector)
274 if auth: h.putheader('Authorization', 'Basic %s' % auth)
275 if realhost: h.putheader('Host', realhost)
276 for args in self.addheaders: apply(h.putheader, args)
277 h.endheaders()
278 if data is not None:
279 h.send(data + '\r\n')
280 errcode, errmsg, headers = h.getreply()
281 fp = h.getfile()
282 if errcode == 200:
283 return addinfourl(fp, headers, "http:" + url)
284 else:
285 if data is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000286 return self.http_error(url, fp, errcode, errmsg, headers)
Guido van Rossum29aab751999-03-09 19:31:21 +0000287 else:
288 return self.http_error(url, fp, errcode, errmsg, headers, data)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000289
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000290 def http_error(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000291 """Handle http errors.
292 Derived class can override this, or provide specific handlers
293 named http_error_DDD where DDD is the 3-digit error code."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000294 # First check if there's a specific handler for this error
295 name = 'http_error_%d' % errcode
296 if hasattr(self, name):
297 method = getattr(self, name)
298 if data is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000299 result = method(url, fp, errcode, errmsg, headers)
Jeremy Hyltonb30f52a1999-02-25 16:14:58 +0000300 else:
301 result = method(url, fp, errcode, errmsg, headers, data)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000302 if result: return result
Jeremy Hyltonb30f52a1999-02-25 16:14:58 +0000303 return self.http_error_default(url, fp, errcode, errmsg, headers)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000304
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000305 def http_error_default(self, url, fp, errcode, errmsg, headers):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000306 """Default error handler: close the connection and raise IOError."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000307 void = fp.read()
308 fp.close()
309 raise IOError, ('http error', errcode, errmsg, headers)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000310
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000311 if hasattr(socket, "ssl"):
Andrew M. Kuchling141e9892000-04-23 02:53:11 +0000312 def open_https(self, url, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000313 """Use HTTPS protocol."""
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000314 import httplib
Fred Drake567ca8e2000-08-21 21:42:42 +0000315 user_passwd = None
Moshe Zadkab2a0a832001-01-08 07:09:25 +0000316 if type(url) is types.StringType:
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000317 host, selector = splithost(url)
Fred Drake567ca8e2000-08-21 21:42:42 +0000318 if host:
319 user_passwd, host = splituser(host)
320 host = unquote(host)
321 realhost = host
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000322 else:
323 host, selector = url
324 urltype, rest = splittype(selector)
Fred Drake567ca8e2000-08-21 21:42:42 +0000325 url = rest
326 user_passwd = None
Guido van Rossumb2493f82000-12-15 15:01:37 +0000327 if urltype.lower() != 'https':
Fred Drake567ca8e2000-08-21 21:42:42 +0000328 realhost = None
329 else:
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000330 realhost, rest = splithost(rest)
Fred Drake567ca8e2000-08-21 21:42:42 +0000331 if realhost:
332 user_passwd, realhost = splituser(realhost)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000333 if user_passwd:
334 selector = "%s://%s%s" % (urltype, realhost, rest)
Andrew M. Kuchling7ad47922000-06-10 01:41:48 +0000335 #print "proxy via https:", host, selector
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000336 if not host: raise IOError, ('https error', 'no host given')
337 if user_passwd:
338 import base64
Guido van Rossumb2493f82000-12-15 15:01:37 +0000339 auth = base64.encodestring(user_passwd).strip()
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000340 else:
341 auth = None
342 h = httplib.HTTPS(host, 0,
343 key_file=self.key_file,
344 cert_file=self.cert_file)
Andrew M. Kuchling141e9892000-04-23 02:53:11 +0000345 if data is not None:
346 h.putrequest('POST', selector)
347 h.putheader('Content-type',
348 'application/x-www-form-urlencoded')
349 h.putheader('Content-length', '%d' % len(data))
350 else:
351 h.putrequest('GET', selector)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000352 if auth: h.putheader('Authorization: Basic %s' % auth)
Fred Drake567ca8e2000-08-21 21:42:42 +0000353 if realhost: h.putheader('Host', realhost)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000354 for args in self.addheaders: apply(h.putheader, args)
355 h.endheaders()
Andrew M. Kuchling43c5af02000-04-24 14:17:06 +0000356 if data is not None:
357 h.send(data + '\r\n')
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000358 errcode, errmsg, headers = h.getreply()
359 fp = h.getfile()
360 if errcode == 200:
361 return addinfourl(fp, headers, url)
362 else:
Fred Drake567ca8e2000-08-21 21:42:42 +0000363 if data is None:
364 return self.http_error(url, fp, errcode, errmsg, headers)
365 else:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000366 return self.http_error(url, fp, errcode, errmsg, headers,
367 data)
Fred Drake567ca8e2000-08-21 21:42:42 +0000368
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000369 def open_gopher(self, url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000370 """Use Gopher protocol."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000371 import gopherlib
372 host, selector = splithost(url)
373 if not host: raise IOError, ('gopher error', 'no host given')
374 host = unquote(host)
375 type, selector = splitgophertype(selector)
376 selector, query = splitquery(selector)
377 selector = unquote(selector)
378 if query:
379 query = unquote(query)
380 fp = gopherlib.send_query(selector, query, host)
381 else:
382 fp = gopherlib.send_selector(selector, host)
383 return addinfourl(fp, noheaders(), "gopher:" + url)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000384
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000385 def open_file(self, url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000386 """Use local file or FTP depending on form of URL."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000387 if url[:2] == '//' and url[2:3] != '/':
388 return self.open_ftp(url)
389 else:
390 return self.open_local_file(url)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000391
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000392 def open_local_file(self, url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000393 """Use local file."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000394 import mimetypes, mimetools, StringIO
395 mtype = mimetypes.guess_type(url)[0]
396 headers = mimetools.Message(StringIO.StringIO(
397 'Content-Type: %s\n' % (mtype or 'text/plain')))
398 host, file = splithost(url)
399 if not host:
Guido van Rossum336a2011999-06-24 15:27:36 +0000400 urlfile = file
401 if file[:1] == '/':
402 urlfile = 'file://' + file
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000403 return addinfourl(open(url2pathname(file), 'rb'),
Guido van Rossum336a2011999-06-24 15:27:36 +0000404 headers, urlfile)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000405 host, port = splitport(host)
406 if not port \
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000407 and socket.gethostbyname(host) in (localhost(), thishost()):
Guido van Rossum336a2011999-06-24 15:27:36 +0000408 urlfile = file
409 if file[:1] == '/':
410 urlfile = 'file://' + file
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000411 return addinfourl(open(url2pathname(file), 'rb'),
Guido van Rossum336a2011999-06-24 15:27:36 +0000412 headers, urlfile)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000413 raise IOError, ('local file error', 'not on local host')
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000414
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000415 def open_ftp(self, url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000416 """Use FTP protocol."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000417 host, path = splithost(url)
418 if not host: raise IOError, ('ftp error', 'no host given')
419 host, port = splitport(host)
420 user, host = splituser(host)
421 if user: user, passwd = splitpasswd(user)
422 else: passwd = None
423 host = unquote(host)
424 user = unquote(user or '')
425 passwd = unquote(passwd or '')
426 host = socket.gethostbyname(host)
427 if not port:
428 import ftplib
429 port = ftplib.FTP_PORT
430 else:
431 port = int(port)
432 path, attrs = splitattr(path)
433 path = unquote(path)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000434 dirs = path.split('/')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000435 dirs, file = dirs[:-1], dirs[-1]
436 if dirs and not dirs[0]: dirs = dirs[1:]
Guido van Rossum5e006a31999-08-18 17:40:33 +0000437 if dirs and not dirs[0]: dirs[0] = '/'
Guido van Rossumb2493f82000-12-15 15:01:37 +0000438 key = user, host, port, '/'.join(dirs)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000439 # XXX thread unsafe!
440 if len(self.ftpcache) > MAXFTPCACHE:
441 # Prune the cache, rather arbitrarily
442 for k in self.ftpcache.keys():
443 if k != key:
444 v = self.ftpcache[k]
445 del self.ftpcache[k]
446 v.close()
447 try:
448 if not self.ftpcache.has_key(key):
449 self.ftpcache[key] = \
450 ftpwrapper(user, passwd, host, port, dirs)
451 if not file: type = 'D'
452 else: type = 'I'
453 for attr in attrs:
454 attr, value = splitvalue(attr)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000455 if attr.lower() == 'type' and \
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000456 value in ('a', 'A', 'i', 'I', 'd', 'D'):
Guido van Rossumb2493f82000-12-15 15:01:37 +0000457 type = value.upper()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000458 (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
459 if retrlen is not None and retrlen >= 0:
460 import mimetools, StringIO
461 headers = mimetools.Message(StringIO.StringIO(
462 'Content-Length: %d\n' % retrlen))
463 else:
464 headers = noheaders()
465 return addinfourl(fp, headers, "ftp:" + url)
466 except ftperrors(), msg:
467 raise IOError, ('ftp error', msg), sys.exc_info()[2]
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000468
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000469 def open_data(self, url, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000470 """Use "data" URL."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000471 # ignore POSTed data
472 #
473 # syntax of data URLs:
474 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
475 # mediatype := [ type "/" subtype ] *( ";" parameter )
476 # data := *urlchar
477 # parameter := attribute "=" value
478 import StringIO, mimetools, time
479 try:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000480 [type, data] = url.split(',', 1)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000481 except ValueError:
482 raise IOError, ('data error', 'bad data URL')
483 if not type:
484 type = 'text/plain;charset=US-ASCII'
Guido van Rossumb2493f82000-12-15 15:01:37 +0000485 semi = type.rfind(';')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000486 if semi >= 0 and '=' not in type[semi:]:
487 encoding = type[semi+1:]
488 type = type[:semi]
489 else:
490 encoding = ''
491 msg = []
492 msg.append('Date: %s'%time.strftime('%a, %d %b %Y %T GMT',
493 time.gmtime(time.time())))
494 msg.append('Content-type: %s' % type)
495 if encoding == 'base64':
496 import base64
497 data = base64.decodestring(data)
498 else:
499 data = unquote(data)
500 msg.append('Content-length: %d' % len(data))
501 msg.append('')
502 msg.append(data)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000503 msg = '\n'.join(msg)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000504 f = StringIO.StringIO(msg)
505 headers = mimetools.Message(f, 0)
506 f.fileno = None # needed for addinfourl
507 return addinfourl(f, headers, url)
Guido van Rossum6d4d1c21998-03-12 14:32:55 +0000508
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000509
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000510class FancyURLopener(URLopener):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000511 """Derived class with handlers for errors we can handle (perhaps)."""
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000512
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000513 def __init__(self, *args):
514 apply(URLopener.__init__, (self,) + args)
515 self.auth_cache = {}
Skip Montanaroc3e11d62001-02-15 16:56:36 +0000516 self.tries = 0
517 self.maxtries = 10
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000518
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000519 def http_error_default(self, url, fp, errcode, errmsg, headers):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000520 """Default error handling -- don't raise an exception."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000521 return addinfourl(fp, headers, "http:" + url)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000522
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000523 def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000524 """Error 302 -- relocated (temporarily)."""
Skip Montanaroc3e11d62001-02-15 16:56:36 +0000525 self.tries += 1
526 if self.maxtries and self.tries >= self.maxtries:
527 if hasattr(self, "http_error_500"):
528 meth = self.http_error_500
529 else:
530 meth = self.http_error_default
531 self.tries = 0
532 return meth(url, fp, 500,
533 "Internal Server Error: Redirect Recursion", headers)
534 result = self.redirect_internal(url, fp, errcode, errmsg, headers,
535 data)
536 self.tries = 0
537 return result
538
539 def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000540 if headers.has_key('location'):
541 newurl = headers['location']
542 elif headers.has_key('uri'):
543 newurl = headers['uri']
544 else:
545 return
546 void = fp.read()
547 fp.close()
Guido van Rossum3527f591999-03-29 20:23:41 +0000548 # In case the server sent a relative URL, join with original:
549 newurl = basejoin("http:" + url, newurl)
Guido van Rossum3c8baed2000-02-01 23:36:55 +0000550 if data is None:
551 return self.open(newurl)
552 else:
553 return self.open(newurl, data)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000554
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000555 def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000556 """Error 301 -- also relocated (permanently)."""
557 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
Guido van Rossume6ad8911996-09-10 17:02:56 +0000558
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000559 def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000560 """Error 401 -- authentication required.
561 See this URL for a description of the basic authentication scheme:
562 http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt"""
Moshe Zadkae99bd172001-02-27 06:27:04 +0000563 if not headers.has_key('www-authenticate'):
564 URLopener.http_error_default(self, url, fp,
565 errmsg, headers)
566 stuff = headers['www-authenticate']
567 import re
568 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
569 if not match:
570 URLopener.http_error_default(self, url, fp,
571 errcode, errmsg, headers)
572 scheme, realm = match.groups()
573 if scheme.lower() != 'basic':
574 URLopener.http_error_default(self, url, fp,
575 errcode, errmsg, headers)
576 name = 'retry_' + self.type + '_basic_auth'
577 if data is None:
578 return getattr(self,name)(url, realm)
579 else:
580 return getattr(self,name)(url, realm, data)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000581
Guido van Rossum3c8baed2000-02-01 23:36:55 +0000582 def retry_http_basic_auth(self, url, realm, data=None):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000583 host, selector = splithost(url)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000584 i = host.find('@') + 1
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000585 host = host[i:]
586 user, passwd = self.get_user_passwd(host, realm, i)
587 if not (user or passwd): return None
Guido van Rossumafc4f042001-01-15 18:31:13 +0000588 host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000589 newurl = 'http://' + host + selector
Guido van Rossum3c8baed2000-02-01 23:36:55 +0000590 if data is None:
591 return self.open(newurl)
592 else:
593 return self.open(newurl, data)
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000594
Guido van Rossum3c8baed2000-02-01 23:36:55 +0000595 def retry_https_basic_auth(self, url, realm, data=None):
Tim Peterse1190062001-01-15 03:34:38 +0000596 host, selector = splithost(url)
597 i = host.find('@') + 1
598 host = host[i:]
599 user, passwd = self.get_user_passwd(host, realm, i)
600 if not (user or passwd): return None
Guido van Rossumafc4f042001-01-15 18:31:13 +0000601 host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
Tim Peterse1190062001-01-15 03:34:38 +0000602 newurl = '//' + host + selector
Guido van Rossumafc4f042001-01-15 18:31:13 +0000603 return self.open_https(newurl, data)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000604
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000605 def get_user_passwd(self, host, realm, clear_cache = 0):
Guido van Rossumb2493f82000-12-15 15:01:37 +0000606 key = realm + '@' + host.lower()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000607 if self.auth_cache.has_key(key):
608 if clear_cache:
609 del self.auth_cache[key]
610 else:
611 return self.auth_cache[key]
612 user, passwd = self.prompt_user_passwd(host, realm)
613 if user or passwd: self.auth_cache[key] = (user, passwd)
614 return user, passwd
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000615
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000616 def prompt_user_passwd(self, host, realm):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000617 """Override this in a GUI environment!"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000618 import getpass
619 try:
620 user = raw_input("Enter username for %s at %s: " % (realm,
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000621 host))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000622 passwd = getpass.getpass("Enter password for %s in %s at %s: " %
623 (user, realm, host))
624 return user, passwd
625 except KeyboardInterrupt:
626 print
627 return None, None
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000628
629
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000630# Utility functions
631
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000632_localhost = None
633def localhost():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000634 """Return the IP address of the magic hostname 'localhost'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000635 global _localhost
636 if not _localhost:
637 _localhost = socket.gethostbyname('localhost')
638 return _localhost
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000639
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000640_thishost = None
641def thishost():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000642 """Return the IP address of the current host."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000643 global _thishost
644 if not _thishost:
645 _thishost = socket.gethostbyname(socket.gethostname())
646 return _thishost
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000647
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000648_ftperrors = None
649def ftperrors():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000650 """Return the set of errors raised by the FTP class."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000651 global _ftperrors
652 if not _ftperrors:
653 import ftplib
654 _ftperrors = ftplib.all_errors
655 return _ftperrors
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000656
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000657_noheaders = None
658def noheaders():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000659 """Return an empty mimetools.Message object."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000660 global _noheaders
661 if not _noheaders:
662 import mimetools
663 import StringIO
664 _noheaders = mimetools.Message(StringIO.StringIO(), 0)
665 _noheaders.fp.close() # Recycle file descriptor
666 return _noheaders
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000667
668
669# Utility classes
670
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000671class ftpwrapper:
Guido van Rossume7b146f2000-02-04 15:28:42 +0000672 """Class used by open_ftp() for cache of open FTP connections."""
673
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000674 def __init__(self, user, passwd, host, port, dirs):
675 self.user = user
676 self.passwd = passwd
677 self.host = host
678 self.port = port
679 self.dirs = dirs
680 self.init()
Guido van Rossume7b146f2000-02-04 15:28:42 +0000681
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000682 def init(self):
683 import ftplib
684 self.busy = 0
685 self.ftp = ftplib.FTP()
686 self.ftp.connect(self.host, self.port)
687 self.ftp.login(self.user, self.passwd)
688 for dir in self.dirs:
689 self.ftp.cwd(dir)
Guido van Rossume7b146f2000-02-04 15:28:42 +0000690
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000691 def retrfile(self, file, type):
692 import ftplib
693 self.endtransfer()
694 if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
695 else: cmd = 'TYPE ' + type; isdir = 0
696 try:
697 self.ftp.voidcmd(cmd)
698 except ftplib.all_errors:
699 self.init()
700 self.ftp.voidcmd(cmd)
701 conn = None
702 if file and not isdir:
703 # Use nlst to see if the file exists at all
704 try:
705 self.ftp.nlst(file)
706 except ftplib.error_perm, reason:
707 raise IOError, ('ftp error', reason), sys.exc_info()[2]
708 # Restore the transfer mode!
709 self.ftp.voidcmd(cmd)
710 # Try to retrieve as a file
711 try:
712 cmd = 'RETR ' + file
713 conn = self.ftp.ntransfercmd(cmd)
714 except ftplib.error_perm, reason:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000715 if str(reason)[:3] != '550':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000716 raise IOError, ('ftp error', reason), sys.exc_info()[2]
717 if not conn:
718 # Set transfer mode to ASCII!
719 self.ftp.voidcmd('TYPE A')
720 # Try a directory listing
721 if file: cmd = 'LIST ' + file
722 else: cmd = 'LIST'
723 conn = self.ftp.ntransfercmd(cmd)
724 self.busy = 1
725 # Pass back both a suitably decorated object and a retrieval length
726 return (addclosehook(conn[0].makefile('rb'),
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000727 self.endtransfer), conn[1])
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000728 def endtransfer(self):
729 if not self.busy:
730 return
731 self.busy = 0
732 try:
733 self.ftp.voidresp()
734 except ftperrors():
735 pass
Guido van Rossume7b146f2000-02-04 15:28:42 +0000736
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000737 def close(self):
738 self.endtransfer()
739 try:
740 self.ftp.close()
741 except ftperrors():
742 pass
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000743
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000744class addbase:
Guido van Rossume7b146f2000-02-04 15:28:42 +0000745 """Base class for addinfo and addclosehook."""
746
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000747 def __init__(self, fp):
748 self.fp = fp
749 self.read = self.fp.read
750 self.readline = self.fp.readline
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000751 if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines
752 if hasattr(self.fp, "fileno"): self.fileno = self.fp.fileno
Guido van Rossume7b146f2000-02-04 15:28:42 +0000753
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000754 def __repr__(self):
755 return '<%s at %s whose fp = %s>' % (self.__class__.__name__,
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000756 `id(self)`, `self.fp`)
Guido van Rossume7b146f2000-02-04 15:28:42 +0000757
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000758 def close(self):
759 self.read = None
760 self.readline = None
761 self.readlines = None
762 self.fileno = None
763 if self.fp: self.fp.close()
764 self.fp = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000765
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000766class addclosehook(addbase):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000767 """Class to add a close hook to an open file."""
768
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000769 def __init__(self, fp, closehook, *hookargs):
770 addbase.__init__(self, fp)
771 self.closehook = closehook
772 self.hookargs = hookargs
Guido van Rossume7b146f2000-02-04 15:28:42 +0000773
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000774 def close(self):
Guido van Rossumc580dae2000-05-24 13:21:46 +0000775 addbase.close(self)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000776 if self.closehook:
777 apply(self.closehook, self.hookargs)
778 self.closehook = None
779 self.hookargs = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000780
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000781class addinfo(addbase):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000782 """class to add an info() method to an open file."""
783
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000784 def __init__(self, fp, headers):
785 addbase.__init__(self, fp)
786 self.headers = headers
Guido van Rossume7b146f2000-02-04 15:28:42 +0000787
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000788 def info(self):
789 return self.headers
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000790
Guido van Rossume6ad8911996-09-10 17:02:56 +0000791class addinfourl(addbase):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000792 """class to add info() and geturl() methods to an open file."""
793
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000794 def __init__(self, fp, headers, url):
795 addbase.__init__(self, fp)
796 self.headers = headers
797 self.url = url
Guido van Rossume7b146f2000-02-04 15:28:42 +0000798
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000799 def info(self):
800 return self.headers
Guido van Rossume7b146f2000-02-04 15:28:42 +0000801
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000802 def geturl(self):
803 return self.url
Guido van Rossume6ad8911996-09-10 17:02:56 +0000804
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000805
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000806def basejoin(base, url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000807 """Utility to combine a URL with a base URL to form a new URL."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000808 type, path = splittype(url)
809 if type:
810 # if url is complete (i.e., it contains a type), return it
811 return url
812 host, path = splithost(path)
813 type, basepath = splittype(base) # inherit type from base
814 if host:
815 # if url contains host, just inherit type
816 if type: return type + '://' + host + path
817 else:
818 # no type inherited, so url must have started with //
819 # just return it
820 return url
821 host, basepath = splithost(basepath) # inherit host
Thomas Wouters7e474022000-07-16 12:04:32 +0000822 basepath, basetag = splittag(basepath) # remove extraneous cruft
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000823 basepath, basequery = splitquery(basepath) # idem
824 if path[:1] != '/':
825 # non-absolute path name
826 if path[:1] in ('#', '?'):
827 # path is just a tag or query, attach to basepath
828 i = len(basepath)
829 else:
830 # else replace last component
Guido van Rossumb2493f82000-12-15 15:01:37 +0000831 i = basepath.rfind('/')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000832 if i < 0:
833 # basepath not absolute
834 if host:
835 # host present, make absolute
836 basepath = '/'
837 else:
838 # else keep non-absolute
839 basepath = ''
840 else:
841 # remove last file component
842 basepath = basepath[:i+1]
843 # Interpret ../ (important because of symlinks)
844 while basepath and path[:3] == '../':
845 path = path[3:]
Guido van Rossumb2493f82000-12-15 15:01:37 +0000846 i = basepath[:-1].rfind('/')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000847 if i > 0:
848 basepath = basepath[:i+1]
849 elif i == 0:
850 basepath = '/'
851 break
852 else:
853 basepath = ''
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000854
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000855 path = basepath + path
856 if type and host: return type + '://' + host + path
857 elif type: return type + ':' + path
858 elif host: return '//' + host + path # don't know what this means
859 else: return path
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000860
861
Guido van Rossum7c395db1994-07-04 22:14:49 +0000862# Utilities to parse URLs (most of these return None for missing parts):
Sjoerd Mullendere0371b81995-11-10 10:36:07 +0000863# unwrap('<URL:type://host/path>') --> 'type://host/path'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000864# splittype('type:opaquestring') --> 'type', 'opaquestring'
865# splithost('//host[:port]/path') --> 'host[:port]', '/path'
Guido van Rossum7c395db1994-07-04 22:14:49 +0000866# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'
867# splitpasswd('user:passwd') -> 'user', 'passwd'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000868# splitport('host:port') --> 'host', 'port'
869# splitquery('/path?query') --> '/path', 'query'
870# splittag('/path#tag') --> '/path', 'tag'
Guido van Rossum7c395db1994-07-04 22:14:49 +0000871# splitattr('/path;attr1=value1;attr2=value2;...') ->
872# '/path', ['attr1=value1', 'attr2=value2', ...]
873# splitvalue('attr=value') --> 'attr', 'value'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000874# splitgophertype('/Xselector') --> 'X', 'selector'
875# unquote('abc%20def') -> 'abc def'
876# quote('abc def') -> 'abc%20def')
877
Martin v. Löwis1d994332000-12-03 18:30:10 +0000878def toBytes(url):
879 """toBytes(u"URL") --> 'URL'."""
880 # Most URL schemes require ASCII. If that changes, the conversion
881 # can be relaxed
882 if type(url) is types.UnicodeType:
883 try:
884 url = url.encode("ASCII")
885 except UnicodeError:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000886 raise UnicodeError("URL " + repr(url) +
887 " contains non-ASCII characters")
Martin v. Löwis1d994332000-12-03 18:30:10 +0000888 return url
889
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000890def unwrap(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000891 """unwrap('<URL:type://host/path>') --> 'type://host/path'."""
Guido van Rossumb2493f82000-12-15 15:01:37 +0000892 url = url.strip()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000893 if url[:1] == '<' and url[-1:] == '>':
Guido van Rossumb2493f82000-12-15 15:01:37 +0000894 url = url[1:-1].strip()
895 if url[:4] == 'URL:': url = url[4:].strip()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000896 return url
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000897
Guido van Rossum332e1441997-09-29 23:23:46 +0000898_typeprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000899def splittype(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000900 """splittype('type:opaquestring') --> 'type', 'opaquestring'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000901 global _typeprog
902 if _typeprog is None:
903 import re
904 _typeprog = re.compile('^([^/:]+):')
Guido van Rossum332e1441997-09-29 23:23:46 +0000905
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000906 match = _typeprog.match(url)
907 if match:
908 scheme = match.group(1)
Fred Drake9e94afd2000-07-01 07:03:30 +0000909 return scheme.lower(), url[len(scheme) + 1:]
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000910 return None, url
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000911
Guido van Rossum332e1441997-09-29 23:23:46 +0000912_hostprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000913def splithost(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000914 """splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000915 global _hostprog
916 if _hostprog is None:
917 import re
Guido van Rossum3427c1f1999-07-01 23:20:56 +0000918 _hostprog = re.compile('^//([^/]*)(.*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +0000919
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000920 match = _hostprog.match(url)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000921 if match: return match.group(1, 2)
922 return None, url
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000923
Guido van Rossum332e1441997-09-29 23:23:46 +0000924_userprog = None
Guido van Rossum7c395db1994-07-04 22:14:49 +0000925def splituser(host):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000926 """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000927 global _userprog
928 if _userprog is None:
929 import re
930 _userprog = re.compile('^([^@]*)@(.*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +0000931
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000932 match = _userprog.match(host)
Fred Drake567ca8e2000-08-21 21:42:42 +0000933 if match: return map(unquote, match.group(1, 2))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000934 return None, host
Guido van Rossum7c395db1994-07-04 22:14:49 +0000935
Guido van Rossum332e1441997-09-29 23:23:46 +0000936_passwdprog = None
Guido van Rossum7c395db1994-07-04 22:14:49 +0000937def splitpasswd(user):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000938 """splitpasswd('user:passwd') -> 'user', 'passwd'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000939 global _passwdprog
940 if _passwdprog is None:
941 import re
942 _passwdprog = re.compile('^([^:]*):(.*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +0000943
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000944 match = _passwdprog.match(user)
945 if match: return match.group(1, 2)
946 return user, None
Guido van Rossum7c395db1994-07-04 22:14:49 +0000947
Guido van Rossume7b146f2000-02-04 15:28:42 +0000948# splittag('/path#tag') --> '/path', 'tag'
Guido van Rossum332e1441997-09-29 23:23:46 +0000949_portprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000950def splitport(host):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000951 """splitport('host:port') --> 'host', 'port'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000952 global _portprog
953 if _portprog is None:
954 import re
955 _portprog = re.compile('^(.*):([0-9]+)$')
Guido van Rossum332e1441997-09-29 23:23:46 +0000956
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000957 match = _portprog.match(host)
958 if match: return match.group(1, 2)
959 return host, None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000960
Guido van Rossum332e1441997-09-29 23:23:46 +0000961_nportprog = None
Guido van Rossum53725a21996-06-13 19:12:35 +0000962def splitnport(host, defport=-1):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000963 """Split host and port, returning numeric port.
964 Return given default port if no ':' found; defaults to -1.
965 Return numerical port if a valid number are found after ':'.
966 Return None if ':' but not a valid number."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000967 global _nportprog
968 if _nportprog is None:
969 import re
970 _nportprog = re.compile('^(.*):(.*)$')
Guido van Rossum7e7ca0b1998-03-26 21:01:39 +0000971
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000972 match = _nportprog.match(host)
973 if match:
974 host, port = match.group(1, 2)
975 try:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000976 if not port: raise ValueError, "no digits"
977 nport = int(port)
978 except ValueError:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000979 nport = None
980 return host, nport
981 return host, defport
Guido van Rossum53725a21996-06-13 19:12:35 +0000982
Guido van Rossum332e1441997-09-29 23:23:46 +0000983_queryprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000984def splitquery(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000985 """splitquery('/path?query') --> '/path', 'query'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000986 global _queryprog
987 if _queryprog is None:
988 import re
989 _queryprog = re.compile('^(.*)\?([^?]*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +0000990
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000991 match = _queryprog.match(url)
992 if match: return match.group(1, 2)
993 return url, None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000994
Guido van Rossum332e1441997-09-29 23:23:46 +0000995_tagprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000996def splittag(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000997 """splittag('/path#tag') --> '/path', 'tag'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000998 global _tagprog
999 if _tagprog is None:
1000 import re
1001 _tagprog = re.compile('^(.*)#([^#]*)$')
Guido van Rossum7e7ca0b1998-03-26 21:01:39 +00001002
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001003 match = _tagprog.match(url)
1004 if match: return match.group(1, 2)
1005 return url, None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001006
Guido van Rossum7c395db1994-07-04 22:14:49 +00001007def splitattr(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001008 """splitattr('/path;attr1=value1;attr2=value2;...') ->
1009 '/path', ['attr1=value1', 'attr2=value2', ...]."""
Guido van Rossumb2493f82000-12-15 15:01:37 +00001010 words = url.split(';')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001011 return words[0], words[1:]
Guido van Rossum7c395db1994-07-04 22:14:49 +00001012
Guido van Rossum332e1441997-09-29 23:23:46 +00001013_valueprog = None
Guido van Rossum7c395db1994-07-04 22:14:49 +00001014def splitvalue(attr):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001015 """splitvalue('attr=value') --> 'attr', 'value'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001016 global _valueprog
1017 if _valueprog is None:
1018 import re
1019 _valueprog = re.compile('^([^=]*)=(.*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +00001020
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001021 match = _valueprog.match(attr)
1022 if match: return match.group(1, 2)
1023 return attr, None
Guido van Rossum7c395db1994-07-04 22:14:49 +00001024
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001025def splitgophertype(selector):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001026 """splitgophertype('/Xselector') --> 'X', 'selector'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001027 if selector[:1] == '/' and selector[1:2]:
1028 return selector[1], selector[2:]
1029 return None, selector
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001030
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001031def unquote(s):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001032 """unquote('abc%20def') -> 'abc def'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001033 mychr = chr
Guido van Rossumb2493f82000-12-15 15:01:37 +00001034 myatoi = int
1035 list = s.split('%')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001036 res = [list[0]]
1037 myappend = res.append
1038 del list[0]
1039 for item in list:
1040 if item[1:2]:
1041 try:
1042 myappend(mychr(myatoi(item[:2], 16))
1043 + item[2:])
1044 except:
1045 myappend('%' + item)
1046 else:
1047 myappend('%' + item)
Guido van Rossumb2493f82000-12-15 15:01:37 +00001048 return "".join(res)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001049
Guido van Rossum0564e121996-12-13 14:47:36 +00001050def unquote_plus(s):
Skip Montanaro79f1c172000-08-22 03:00:52 +00001051 """unquote('%7e/abc+def') -> '~/abc def'"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001052 if '+' in s:
1053 # replace '+' with ' '
Guido van Rossumb2493f82000-12-15 15:01:37 +00001054 s = ' '.join(s.split('+'))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001055 return unquote(s)
Guido van Rossum0564e121996-12-13 14:47:36 +00001056
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001057always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
Jeremy Hylton6102e292000-08-31 15:48:10 +00001058 'abcdefghijklmnopqrstuvwxyz'
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001059 '0123456789' '_.-')
1060
1061_fast_safe_test = always_safe + '/'
1062_fast_safe = None
1063
1064def _fast_quote(s):
1065 global _fast_safe
1066 if _fast_safe is None:
1067 _fast_safe = {}
1068 for c in _fast_safe_test:
1069 _fast_safe[c] = c
1070 res = list(s)
1071 for i in range(len(res)):
1072 c = res[i]
1073 if not _fast_safe.has_key(c):
Guido van Rossume27a7b82001-01-19 03:28:15 +00001074 res[i] = '%%%02X' % ord(c)
Guido van Rossumb2493f82000-12-15 15:01:37 +00001075 return ''.join(res)
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001076
Guido van Rossum7c395db1994-07-04 22:14:49 +00001077def quote(s, safe = '/'):
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001078 """quote('abc def') -> 'abc%20def'
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001079
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001080 Each part of a URL, e.g. the path info, the query, etc., has a
1081 different set of reserved characters that must be quoted.
1082
1083 RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
1084 the following reserved characters.
1085
1086 reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
1087 "$" | ","
1088
1089 Each of these characters is reserved in some component of a URL,
1090 but not necessarily in all of them.
1091
1092 By default, the quote function is intended for quoting the path
1093 section of a URL. Thus, it will not encode '/'. This character
1094 is reserved, but in typical usage the quote function is being
1095 called on a path where the existing slash characters are used as
1096 reserved characters.
1097 """
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001098 safe = always_safe + safe
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001099 if _fast_safe_test == safe:
1100 return _fast_quote(s)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001101 res = list(s)
1102 for i in range(len(res)):
1103 c = res[i]
1104 if c not in safe:
Guido van Rossume27a7b82001-01-19 03:28:15 +00001105 res[i] = '%%%02X' % ord(c)
Guido van Rossumb2493f82000-12-15 15:01:37 +00001106 return ''.join(res)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001107
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001108def quote_plus(s, safe = ''):
1109 """Quote the query fragment of a URL; replacing ' ' with '+'"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001110 if ' ' in s:
Guido van Rossumb2493f82000-12-15 15:01:37 +00001111 l = s.split(' ')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001112 for i in range(len(l)):
1113 l[i] = quote(l[i], safe)
Guido van Rossumb2493f82000-12-15 15:01:37 +00001114 return '+'.join(l)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001115 else:
1116 return quote(s, safe)
Guido van Rossum0564e121996-12-13 14:47:36 +00001117
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001118def urlencode(query,doseq=0):
1119 """Encode a sequence of two-element tuples or dictionary into a URL query string.
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001120
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001121 If any values in the query arg are sequences and doseq is true, each
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001122 sequence element is converted to a separate parameter.
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001123
1124 If the query arg is a sequence of two-element tuples, the order of the
1125 parameters in the output will match the order of parameters in the
1126 input.
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001127 """
Tim Peters658cba62001-02-09 20:06:00 +00001128
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001129 if hasattr(query,"items"):
1130 # mapping objects
1131 query = query.items()
1132 else:
1133 # it's a bother at times that strings and string-like objects are
1134 # sequences...
1135 try:
1136 # non-sequence items should not work with len()
1137 x = len(query)
1138 # non-empty strings will fail this
1139 if len(query) and type(query[0]) != types.TupleType:
1140 raise TypeError
1141 # zero-length sequences of all types will get here and succeed,
1142 # but that's a minor nit - since the original implementation
1143 # allowed empty dicts that type of behavior probably should be
1144 # preserved for consistency
1145 except TypeError:
1146 ty,va,tb = sys.exc_info()
1147 raise TypeError, "not a valid non-string sequence or mapping object", tb
1148
Guido van Rossume7b146f2000-02-04 15:28:42 +00001149 l = []
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001150 if not doseq:
1151 # preserve old behavior
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001152 for k, v in query:
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001153 k = quote_plus(str(k))
1154 v = quote_plus(str(v))
1155 l.append(k + '=' + v)
1156 else:
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001157 for k, v in query:
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001158 k = quote_plus(str(k))
1159 if type(v) == types.StringType:
1160 v = quote_plus(v)
1161 l.append(k + '=' + v)
1162 elif type(v) == types.UnicodeType:
1163 # is there a reasonable way to convert to ASCII?
1164 # encode generates a string, but "replace" or "ignore"
1165 # lose information and "strict" can raise UnicodeError
1166 v = quote_plus(v.encode("ASCII","replace"))
1167 l.append(k + '=' + v)
1168 else:
1169 try:
1170 # is this a sufficient test for sequence-ness?
1171 x = len(v)
1172 except TypeError:
1173 # not a sequence
1174 v = quote_plus(str(v))
1175 l.append(k + '=' + v)
1176 else:
1177 # loop over the sequence
1178 for elt in v:
1179 l.append(k + '=' + quote_plus(str(elt)))
Guido van Rossumb2493f82000-12-15 15:01:37 +00001180 return '&'.join(l)
Guido van Rossum810a3391998-07-22 21:33:23 +00001181
Guido van Rossum442e7201996-03-20 15:33:11 +00001182# Proxy handling
Mark Hammond4f570b92000-07-26 07:04:38 +00001183def getproxies_environment():
1184 """Return a dictionary of scheme -> proxy server URL mappings.
1185
1186 Scan the environment for variables named <scheme>_proxy;
1187 this seems to be the standard convention. If you need a
1188 different way, you can pass a proxies dictionary to the
1189 [Fancy]URLopener constructor.
1190
1191 """
1192 proxies = {}
1193 for name, value in os.environ.items():
Guido van Rossumb2493f82000-12-15 15:01:37 +00001194 name = name.lower()
Mark Hammond4f570b92000-07-26 07:04:38 +00001195 if value and name[-6:] == '_proxy':
1196 proxies[name[:-6]] = value
1197 return proxies
1198
Guido van Rossum4163e701998-08-06 13:39:09 +00001199if os.name == 'mac':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001200 def getproxies():
1201 """Return a dictionary of scheme -> proxy server URL mappings.
Guido van Rossum442e7201996-03-20 15:33:11 +00001202
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001203 By convention the mac uses Internet Config to store
1204 proxies. An HTTP proxy, for instance, is stored under
1205 the HttpProxy key.
Guido van Rossum442e7201996-03-20 15:33:11 +00001206
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001207 """
1208 try:
1209 import ic
1210 except ImportError:
1211 return {}
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001212
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001213 try:
1214 config = ic.IC()
1215 except ic.error:
1216 return {}
1217 proxies = {}
1218 # HTTP:
1219 if config.has_key('UseHTTPProxy') and config['UseHTTPProxy']:
1220 try:
1221 value = config['HTTPProxyHost']
1222 except ic.error:
1223 pass
1224 else:
1225 proxies['http'] = 'http://%s' % value
1226 # FTP: XXXX To be done.
1227 # Gopher: XXXX To be done.
1228 return proxies
Mark Hammond4f570b92000-07-26 07:04:38 +00001229
1230elif os.name == 'nt':
1231 def getproxies_registry():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001232 """Return a dictionary of scheme -> proxy server URL mappings.
Mark Hammond4f570b92000-07-26 07:04:38 +00001233
1234 Win32 uses the registry to store proxies.
1235
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001236 """
1237 proxies = {}
Mark Hammond4f570b92000-07-26 07:04:38 +00001238 try:
1239 import _winreg
1240 except ImportError:
1241 # Std module, so should be around - but you never know!
1242 return proxies
1243 try:
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001244 internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1245 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
Mark Hammond4f570b92000-07-26 07:04:38 +00001246 proxyEnable = _winreg.QueryValueEx(internetSettings,
1247 'ProxyEnable')[0]
1248 if proxyEnable:
1249 # Returned as Unicode but problems if not converted to ASCII
1250 proxyServer = str(_winreg.QueryValueEx(internetSettings,
1251 'ProxyServer')[0])
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001252 if '=' in proxyServer:
1253 # Per-protocol settings
Mark Hammond4f570b92000-07-26 07:04:38 +00001254 for p in proxyServer.split(';'):
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001255 protocol, address = p.split('=', 1)
Mark Hammond4f570b92000-07-26 07:04:38 +00001256 proxies[protocol] = '%s://%s' % (protocol, address)
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001257 else:
1258 # Use one setting for all protocols
1259 if proxyServer[:5] == 'http:':
1260 proxies['http'] = proxyServer
1261 else:
1262 proxies['http'] = 'http://%s' % proxyServer
1263 proxies['ftp'] = 'ftp://%s' % proxyServer
Mark Hammond4f570b92000-07-26 07:04:38 +00001264 internetSettings.Close()
1265 except (WindowsError, ValueError, TypeError):
1266 # Either registry key not found etc, or the value in an
1267 # unexpected format.
1268 # proxies already set up to be empty so nothing to do
1269 pass
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001270 return proxies
Guido van Rossum442e7201996-03-20 15:33:11 +00001271
Mark Hammond4f570b92000-07-26 07:04:38 +00001272 def getproxies():
1273 """Return a dictionary of scheme -> proxy server URL mappings.
1274
1275 Returns settings gathered from the environment, if specified,
1276 or the registry.
1277
1278 """
1279 return getproxies_environment() or getproxies_registry()
1280else:
1281 # By default use environment variables
1282 getproxies = getproxies_environment
1283
Guido van Rossum442e7201996-03-20 15:33:11 +00001284
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001285# Test and time quote() and unquote()
1286def test1():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001287 import time
1288 s = ''
1289 for i in range(256): s = s + chr(i)
1290 s = s*4
1291 t0 = time.time()
1292 qs = quote(s)
1293 uqs = unquote(qs)
1294 t1 = time.time()
1295 if uqs != s:
1296 print 'Wrong!'
1297 print `s`
1298 print `qs`
1299 print `uqs`
1300 print round(t1 - t0, 3), 'sec'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001301
1302
Guido van Rossum9ab96d41998-09-28 14:07:00 +00001303def reporthook(blocknum, blocksize, totalsize):
1304 # Report during remote transfers
Guido van Rossumb2493f82000-12-15 15:01:37 +00001305 print "Block number: %d, Block size: %d, Total size: %d" % (
1306 blocknum, blocksize, totalsize)
Guido van Rossum9ab96d41998-09-28 14:07:00 +00001307
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001308# Test program
Guido van Rossum23490151998-06-25 02:39:00 +00001309def test(args=[]):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001310 if not args:
1311 args = [
1312 '/etc/passwd',
1313 'file:/etc/passwd',
1314 'file://localhost/etc/passwd',
1315 'ftp://ftp.python.org/etc/passwd',
1316## 'gopher://gopher.micro.umn.edu/1/',
1317 'http://www.python.org/index.html',
1318 ]
Guido van Rossum09c8b6c1999-12-07 21:37:17 +00001319 if hasattr(URLopener, "open_https"):
1320 args.append('https://synergy.as.cmu.edu/~geek/')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001321 try:
1322 for url in args:
1323 print '-'*10, url, '-'*10
1324 fn, h = urlretrieve(url, None, reporthook)
Guido van Rossumb2493f82000-12-15 15:01:37 +00001325 print fn
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001326 if h:
1327 print '======'
1328 for k in h.keys(): print k + ':', h[k]
1329 print '======'
1330 fp = open(fn, 'rb')
1331 data = fp.read()
1332 del fp
1333 if '\r' in data:
1334 table = string.maketrans("", "")
Guido van Rossumb2493f82000-12-15 15:01:37 +00001335 data = data.translate(table, "\r")
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001336 print data
1337 fn, h = None, None
1338 print '-'*40
1339 finally:
1340 urlcleanup()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001341
Guido van Rossum23490151998-06-25 02:39:00 +00001342def main():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001343 import getopt, sys
1344 try:
1345 opts, args = getopt.getopt(sys.argv[1:], "th")
1346 except getopt.error, msg:
1347 print msg
1348 print "Use -h for help"
1349 return
1350 t = 0
1351 for o, a in opts:
1352 if o == '-t':
1353 t = t + 1
1354 if o == '-h':
1355 print "Usage: python urllib.py [-t] [url ...]"
1356 print "-t runs self-test;",
1357 print "otherwise, contents of urls are printed"
1358 return
1359 if t:
1360 if t > 1:
1361 test1()
1362 test(args)
1363 else:
1364 if not args:
1365 print "Use -h for help"
1366 for url in args:
1367 print urlopen(url).read(),
Guido van Rossum23490151998-06-25 02:39:00 +00001368
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001369# Run test program when run as a script
1370if __name__ == '__main__':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001371 main()