blob: 62b0787a58b3ef95305af3578f431f60f65ad23b [file] [log] [blame]
Guido van Rossume7b146f2000-02-04 15:28:42 +00001"""Open an arbitrary URL.
2
3See the following document for more info on URLs:
4"Names and Addresses, URIs, URLs, URNs, URCs", at
5http://www.w3.org/pub/WWW/Addressing/Overview.html
6
7See also the HTTP spec (from which the error codes are derived):
8"HTTP - Hypertext Transfer Protocol", at
9http://www.w3.org/pub/WWW/Protocols/
10
11Related standards and specs:
12- RFC1808: the "relative URL" spec. (authoritative status)
13- RFC1738 - the "URL standard". (authoritative status)
14- RFC1630 - the "URI spec". (informational status)
15
16The object returned by URLopener().open(file) will differ per
17protocol. All you know is that is has methods read(), readline(),
18readlines(), fileno(), close() and info(). The read*(), fileno()
Fredrik Lundhb49f88b2000-09-24 18:51:25 +000019and close() methods work like those of open files.
Guido van Rossume7b146f2000-02-04 15:28:42 +000020The info() method returns a mimetools.Message object which can be
21used to query various info about the object, if available.
22(mimetools.Message objects are queried with the getheader() method.)
23"""
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000024
Guido van Rossum7c395db1994-07-04 22:14:49 +000025import string
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000026import socket
Jack Jansendc3e3f61995-12-15 13:22:13 +000027import os
Guido van Rossum3c8484e1996-11-20 22:02:24 +000028import sys
Martin v. Löwis1d994332000-12-03 18:30:10 +000029import types
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000030
Guido van Rossumb2493f82000-12-15 15:01:37 +000031__version__ = '1.15' # XXX This version is not always updated :-(
Guido van Rossumf668d171997-06-06 21:11:11 +000032
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000033MAXFTPCACHE = 10 # Trim the ftp cache beyond this size
Guido van Rossum6cb15a01995-06-22 19:00:13 +000034
Jack Jansendc3e3f61995-12-15 13:22:13 +000035# Helper for non-unix systems
36if os.name == 'mac':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000037 from macurl2path import url2pathname, pathname2url
Guido van Rossum7e7ca0b1998-03-26 21:01:39 +000038elif os.name == 'nt':
Fredrik Lundhb49f88b2000-09-24 18:51:25 +000039 from nturl2path import url2pathname, pathname2url
Jack Jansendc3e3f61995-12-15 13:22:13 +000040else:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000041 def url2pathname(pathname):
Guido van Rossum367ac801999-03-12 14:31:10 +000042 return unquote(pathname)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000043 def pathname2url(pathname):
Guido van Rossum367ac801999-03-12 14:31:10 +000044 return quote(pathname)
Guido van Rossum33add0a1998-12-18 15:25:22 +000045
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000046# This really consists of two pieces:
47# (1) a class which handles opening of all sorts of URLs
48# (plus assorted utilities etc.)
49# (2) a set of functions for parsing URLs
50# XXX Should these be separated out into different modules?
51
52
53# Shortcut for basic usage
54_urlopener = None
Guido van Rossumbd013741996-12-10 16:00:28 +000055def urlopen(url, data=None):
Skip Montanaro79f1c172000-08-22 03:00:52 +000056 """urlopen(url [, data]) -> open file-like object"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000057 global _urlopener
58 if not _urlopener:
59 _urlopener = FancyURLopener()
60 if data is None:
61 return _urlopener.open(url)
62 else:
63 return _urlopener.open(url, data)
Fred Drake316a7932000-08-24 01:01:26 +000064def urlretrieve(url, filename=None, reporthook=None, data=None):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000065 global _urlopener
66 if not _urlopener:
67 _urlopener = FancyURLopener()
Fred Drake316a7932000-08-24 01:01:26 +000068 return _urlopener.retrieve(url, filename, reporthook, data)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000069def urlcleanup():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000070 if _urlopener:
71 _urlopener.cleanup()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000072
73
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000074ftpcache = {}
75class URLopener:
Guido van Rossume7b146f2000-02-04 15:28:42 +000076 """Class to open URLs.
77 This is a class rather than just a subroutine because we may need
78 more than one set of global protocol-specific options.
79 Note -- this is a base class for those who don't want the
80 automatic handling of errors type 302 (relocated) and 401
81 (authorization needed)."""
Guido van Rossum7c6ebb51994-03-22 12:05:32 +000082
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000083 __tempfiles = None
Guido van Rossum29e77811996-11-27 19:39:58 +000084
Guido van Rossumba311382000-08-24 16:18:04 +000085 version = "Python-urllib/%s" % __version__
86
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000087 # Constructor
Guido van Rossum09c8b6c1999-12-07 21:37:17 +000088 def __init__(self, proxies=None, **x509):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000089 if proxies is None:
90 proxies = getproxies()
91 assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
92 self.proxies = proxies
Guido van Rossum09c8b6c1999-12-07 21:37:17 +000093 self.key_file = x509.get('key_file')
94 self.cert_file = x509.get('cert_file')
Guido van Rossumba311382000-08-24 16:18:04 +000095 self.addheaders = [('User-agent', self.version)]
Jeremy Hyltonf90b0021999-02-25 16:12:12 +000096 self.__tempfiles = []
97 self.__unlink = os.unlink # See cleanup()
98 self.tempcache = None
99 # Undocumented feature: if you assign {} to tempcache,
100 # it is used to cache files retrieved with
101 # self.retrieve(). This is not enabled by default
102 # since it does not work for changing documents (and I
103 # haven't got the logic to check expiration headers
104 # yet).
105 self.ftpcache = ftpcache
106 # Undocumented feature: you can use a different
107 # ftp cache by assigning to the .ftpcache member;
108 # in case you want logically independent URL openers
109 # XXX This is not threadsafe. Bah.
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000110
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000111 def __del__(self):
112 self.close()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000113
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000114 def close(self):
115 self.cleanup()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000116
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000117 def cleanup(self):
118 # This code sometimes runs when the rest of this module
119 # has already been deleted, so it can't use any globals
120 # or import anything.
121 if self.__tempfiles:
122 for file in self.__tempfiles:
123 try:
124 self.__unlink(file)
125 except:
126 pass
127 del self.__tempfiles[:]
128 if self.tempcache:
129 self.tempcache.clear()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000130
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000131 def addheader(self, *args):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000132 """Add a header to be used by the HTTP interface only
133 e.g. u.addheader('Accept', 'sound/basic')"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000134 self.addheaders.append(args)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000135
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000136 # External interface
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000137 def open(self, fullurl, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000138 """Use URLopener().open(file) instead of open(file, 'r')."""
Martin v. Löwis1d994332000-12-03 18:30:10 +0000139 fullurl = unwrap(toBytes(fullurl))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000140 if self.tempcache and self.tempcache.has_key(fullurl):
141 filename, headers = self.tempcache[fullurl]
142 fp = open(filename, 'rb')
143 return addinfourl(fp, headers, fullurl)
Martin v. Löwis1d994332000-12-03 18:30:10 +0000144 urltype, url = splittype(fullurl)
145 if not urltype:
146 urltype = 'file'
147 if self.proxies.has_key(urltype):
148 proxy = self.proxies[urltype]
149 urltype, proxyhost = splittype(proxy)
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000150 host, selector = splithost(proxyhost)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000151 url = (host, fullurl) # Signal special case to open_*()
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000152 else:
153 proxy = None
Martin v. Löwis1d994332000-12-03 18:30:10 +0000154 name = 'open_' + urltype
155 self.type = urltype
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000156 if '-' in name:
157 # replace - with _
Guido van Rossumb2493f82000-12-15 15:01:37 +0000158 name = '_'.join(name.split('-'))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000159 if not hasattr(self, name):
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000160 if proxy:
161 return self.open_unknown_proxy(proxy, fullurl, data)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000162 else:
163 return self.open_unknown(fullurl, data)
164 try:
165 if data is None:
166 return getattr(self, name)(url)
167 else:
168 return getattr(self, name)(url, data)
169 except socket.error, msg:
170 raise IOError, ('socket error', msg), sys.exc_info()[2]
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000171
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000172 def open_unknown(self, fullurl, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000173 """Overridable interface to open unknown URL type."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000174 type, url = splittype(fullurl)
175 raise IOError, ('url error', 'unknown url type', type)
Guido van Rossumca445401995-08-29 19:19:12 +0000176
Jeremy Hyltond52755f2000-10-02 23:04:02 +0000177 def open_unknown_proxy(self, proxy, fullurl, data=None):
178 """Overridable interface to open unknown URL type."""
179 type, url = splittype(fullurl)
180 raise IOError, ('url error', 'invalid proxy for %s' % type, proxy)
181
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000182 # External interface
Sjoerd Mullenderd7b86f02000-08-25 11:23:36 +0000183 def retrieve(self, url, filename=None, reporthook=None, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000184 """retrieve(url) returns (filename, None) for a local object
185 or (tempfilename, headers) for a remote object."""
Martin v. Löwis1d994332000-12-03 18:30:10 +0000186 url = unwrap(toBytes(url))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000187 if self.tempcache and self.tempcache.has_key(url):
188 return self.tempcache[url]
189 type, url1 = splittype(url)
190 if not filename and (not type or type == 'file'):
191 try:
192 fp = self.open_local_file(url1)
193 hdrs = fp.info()
194 del fp
195 return url2pathname(splithost(url1)[1]), hdrs
196 except IOError, msg:
197 pass
Fred Drake316a7932000-08-24 01:01:26 +0000198 fp = self.open(url, data)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000199 headers = fp.info()
200 if not filename:
201 import tempfile
202 garbage, path = splittype(url)
203 garbage, path = splithost(path or "")
204 path, garbage = splitquery(path or "")
205 path, garbage = splitattr(path or "")
206 suffix = os.path.splitext(path)[1]
207 filename = tempfile.mktemp(suffix)
208 self.__tempfiles.append(filename)
209 result = filename, headers
210 if self.tempcache is not None:
211 self.tempcache[url] = result
212 tfp = open(filename, 'wb')
213 bs = 1024*8
214 size = -1
215 blocknum = 1
216 if reporthook:
217 if headers.has_key("content-length"):
218 size = int(headers["Content-Length"])
219 reporthook(0, bs, size)
220 block = fp.read(bs)
221 if reporthook:
222 reporthook(1, bs, size)
223 while block:
224 tfp.write(block)
225 block = fp.read(bs)
226 blocknum = blocknum + 1
227 if reporthook:
228 reporthook(blocknum, bs, size)
229 fp.close()
230 tfp.close()
231 del fp
232 del tfp
233 return result
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000234
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000235 # Each method named open_<type> knows how to open that type of URL
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000236
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000237 def open_http(self, url, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000238 """Use HTTP protocol."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000239 import httplib
240 user_passwd = None
Martin v. Löwis1d994332000-12-03 18:30:10 +0000241 if type(url) is types.StringType:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000242 host, selector = splithost(url)
243 if host:
244 user_passwd, host = splituser(host)
245 host = unquote(host)
246 realhost = host
247 else:
248 host, selector = url
249 urltype, rest = splittype(selector)
250 url = rest
251 user_passwd = None
Guido van Rossumb2493f82000-12-15 15:01:37 +0000252 if urltype.lower() != 'http':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000253 realhost = None
254 else:
255 realhost, rest = splithost(rest)
256 if realhost:
257 user_passwd, realhost = splituser(realhost)
258 if user_passwd:
259 selector = "%s://%s%s" % (urltype, realhost, rest)
260 #print "proxy via http:", host, selector
261 if not host: raise IOError, ('http error', 'no host given')
262 if user_passwd:
263 import base64
Guido van Rossumb2493f82000-12-15 15:01:37 +0000264 auth = base64.encodestring(user_passwd).strip()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000265 else:
266 auth = None
267 h = httplib.HTTP(host)
268 if data is not None:
269 h.putrequest('POST', selector)
270 h.putheader('Content-type', 'application/x-www-form-urlencoded')
271 h.putheader('Content-length', '%d' % len(data))
272 else:
273 h.putrequest('GET', selector)
274 if auth: h.putheader('Authorization', 'Basic %s' % auth)
275 if realhost: h.putheader('Host', realhost)
276 for args in self.addheaders: apply(h.putheader, args)
277 h.endheaders()
278 if data is not None:
279 h.send(data + '\r\n')
280 errcode, errmsg, headers = h.getreply()
281 fp = h.getfile()
282 if errcode == 200:
283 return addinfourl(fp, headers, "http:" + url)
284 else:
285 if data is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000286 return self.http_error(url, fp, errcode, errmsg, headers)
Guido van Rossum29aab751999-03-09 19:31:21 +0000287 else:
288 return self.http_error(url, fp, errcode, errmsg, headers, data)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000289
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000290 def http_error(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000291 """Handle http errors.
292 Derived class can override this, or provide specific handlers
293 named http_error_DDD where DDD is the 3-digit error code."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000294 # First check if there's a specific handler for this error
295 name = 'http_error_%d' % errcode
296 if hasattr(self, name):
297 method = getattr(self, name)
298 if data is None:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000299 result = method(url, fp, errcode, errmsg, headers)
Jeremy Hyltonb30f52a1999-02-25 16:14:58 +0000300 else:
301 result = method(url, fp, errcode, errmsg, headers, data)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000302 if result: return result
Jeremy Hyltonb30f52a1999-02-25 16:14:58 +0000303 return self.http_error_default(url, fp, errcode, errmsg, headers)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000304
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000305 def http_error_default(self, url, fp, errcode, errmsg, headers):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000306 """Default error handler: close the connection and raise IOError."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000307 void = fp.read()
308 fp.close()
309 raise IOError, ('http error', errcode, errmsg, headers)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000310
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000311 if hasattr(socket, "ssl"):
Andrew M. Kuchling141e9892000-04-23 02:53:11 +0000312 def open_https(self, url, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000313 """Use HTTPS protocol."""
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000314 import httplib
Fred Drake567ca8e2000-08-21 21:42:42 +0000315 user_passwd = None
Moshe Zadkab2a0a832001-01-08 07:09:25 +0000316 if type(url) is types.StringType:
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000317 host, selector = splithost(url)
Fred Drake567ca8e2000-08-21 21:42:42 +0000318 if host:
319 user_passwd, host = splituser(host)
320 host = unquote(host)
321 realhost = host
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000322 else:
323 host, selector = url
324 urltype, rest = splittype(selector)
Fred Drake567ca8e2000-08-21 21:42:42 +0000325 url = rest
326 user_passwd = None
Guido van Rossumb2493f82000-12-15 15:01:37 +0000327 if urltype.lower() != 'https':
Fred Drake567ca8e2000-08-21 21:42:42 +0000328 realhost = None
329 else:
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000330 realhost, rest = splithost(rest)
Fred Drake567ca8e2000-08-21 21:42:42 +0000331 if realhost:
332 user_passwd, realhost = splituser(realhost)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000333 if user_passwd:
334 selector = "%s://%s%s" % (urltype, realhost, rest)
Andrew M. Kuchling7ad47922000-06-10 01:41:48 +0000335 #print "proxy via https:", host, selector
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000336 if not host: raise IOError, ('https error', 'no host given')
337 if user_passwd:
338 import base64
Guido van Rossumb2493f82000-12-15 15:01:37 +0000339 auth = base64.encodestring(user_passwd).strip()
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000340 else:
341 auth = None
342 h = httplib.HTTPS(host, 0,
343 key_file=self.key_file,
344 cert_file=self.cert_file)
Andrew M. Kuchling141e9892000-04-23 02:53:11 +0000345 if data is not None:
346 h.putrequest('POST', selector)
347 h.putheader('Content-type',
348 'application/x-www-form-urlencoded')
349 h.putheader('Content-length', '%d' % len(data))
350 else:
351 h.putrequest('GET', selector)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000352 if auth: h.putheader('Authorization: Basic %s' % auth)
Fred Drake567ca8e2000-08-21 21:42:42 +0000353 if realhost: h.putheader('Host', realhost)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000354 for args in self.addheaders: apply(h.putheader, args)
355 h.endheaders()
Andrew M. Kuchling43c5af02000-04-24 14:17:06 +0000356 if data is not None:
357 h.send(data + '\r\n')
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000358 errcode, errmsg, headers = h.getreply()
359 fp = h.getfile()
360 if errcode == 200:
361 return addinfourl(fp, headers, url)
362 else:
Fred Drake567ca8e2000-08-21 21:42:42 +0000363 if data is None:
364 return self.http_error(url, fp, errcode, errmsg, headers)
365 else:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000366 return self.http_error(url, fp, errcode, errmsg, headers,
367 data)
Fred Drake567ca8e2000-08-21 21:42:42 +0000368
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000369 def open_gopher(self, url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000370 """Use Gopher protocol."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000371 import gopherlib
372 host, selector = splithost(url)
373 if not host: raise IOError, ('gopher error', 'no host given')
374 host = unquote(host)
375 type, selector = splitgophertype(selector)
376 selector, query = splitquery(selector)
377 selector = unquote(selector)
378 if query:
379 query = unquote(query)
380 fp = gopherlib.send_query(selector, query, host)
381 else:
382 fp = gopherlib.send_selector(selector, host)
383 return addinfourl(fp, noheaders(), "gopher:" + url)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000384
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000385 def open_file(self, url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000386 """Use local file or FTP depending on form of URL."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000387 if url[:2] == '//' and url[2:3] != '/':
388 return self.open_ftp(url)
389 else:
390 return self.open_local_file(url)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000391
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000392 def open_local_file(self, url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000393 """Use local file."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000394 import mimetypes, mimetools, StringIO
395 mtype = mimetypes.guess_type(url)[0]
396 headers = mimetools.Message(StringIO.StringIO(
397 'Content-Type: %s\n' % (mtype or 'text/plain')))
398 host, file = splithost(url)
399 if not host:
Guido van Rossum336a2011999-06-24 15:27:36 +0000400 urlfile = file
401 if file[:1] == '/':
402 urlfile = 'file://' + file
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000403 return addinfourl(open(url2pathname(file), 'rb'),
Guido van Rossum336a2011999-06-24 15:27:36 +0000404 headers, urlfile)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000405 host, port = splitport(host)
406 if not port \
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000407 and socket.gethostbyname(host) in (localhost(), thishost()):
Guido van Rossum336a2011999-06-24 15:27:36 +0000408 urlfile = file
409 if file[:1] == '/':
410 urlfile = 'file://' + file
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000411 return addinfourl(open(url2pathname(file), 'rb'),
Guido van Rossum336a2011999-06-24 15:27:36 +0000412 headers, urlfile)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000413 raise IOError, ('local file error', 'not on local host')
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000414
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000415 def open_ftp(self, url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000416 """Use FTP protocol."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000417 host, path = splithost(url)
418 if not host: raise IOError, ('ftp error', 'no host given')
419 host, port = splitport(host)
420 user, host = splituser(host)
421 if user: user, passwd = splitpasswd(user)
422 else: passwd = None
423 host = unquote(host)
424 user = unquote(user or '')
425 passwd = unquote(passwd or '')
426 host = socket.gethostbyname(host)
427 if not port:
428 import ftplib
429 port = ftplib.FTP_PORT
430 else:
431 port = int(port)
432 path, attrs = splitattr(path)
433 path = unquote(path)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000434 dirs = path.split('/')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000435 dirs, file = dirs[:-1], dirs[-1]
436 if dirs and not dirs[0]: dirs = dirs[1:]
Guido van Rossum5e006a31999-08-18 17:40:33 +0000437 if dirs and not dirs[0]: dirs[0] = '/'
Guido van Rossumb2493f82000-12-15 15:01:37 +0000438 key = user, host, port, '/'.join(dirs)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000439 # XXX thread unsafe!
440 if len(self.ftpcache) > MAXFTPCACHE:
441 # Prune the cache, rather arbitrarily
442 for k in self.ftpcache.keys():
443 if k != key:
444 v = self.ftpcache[k]
445 del self.ftpcache[k]
446 v.close()
447 try:
448 if not self.ftpcache.has_key(key):
449 self.ftpcache[key] = \
450 ftpwrapper(user, passwd, host, port, dirs)
451 if not file: type = 'D'
452 else: type = 'I'
453 for attr in attrs:
454 attr, value = splitvalue(attr)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000455 if attr.lower() == 'type' and \
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000456 value in ('a', 'A', 'i', 'I', 'd', 'D'):
Guido van Rossumb2493f82000-12-15 15:01:37 +0000457 type = value.upper()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000458 (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
459 if retrlen is not None and retrlen >= 0:
460 import mimetools, StringIO
461 headers = mimetools.Message(StringIO.StringIO(
462 'Content-Length: %d\n' % retrlen))
463 else:
464 headers = noheaders()
465 return addinfourl(fp, headers, "ftp:" + url)
466 except ftperrors(), msg:
467 raise IOError, ('ftp error', msg), sys.exc_info()[2]
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000468
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000469 def open_data(self, url, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000470 """Use "data" URL."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000471 # ignore POSTed data
472 #
473 # syntax of data URLs:
474 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
475 # mediatype := [ type "/" subtype ] *( ";" parameter )
476 # data := *urlchar
477 # parameter := attribute "=" value
478 import StringIO, mimetools, time
479 try:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000480 [type, data] = url.split(',', 1)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000481 except ValueError:
482 raise IOError, ('data error', 'bad data URL')
483 if not type:
484 type = 'text/plain;charset=US-ASCII'
Guido van Rossumb2493f82000-12-15 15:01:37 +0000485 semi = type.rfind(';')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000486 if semi >= 0 and '=' not in type[semi:]:
487 encoding = type[semi+1:]
488 type = type[:semi]
489 else:
490 encoding = ''
491 msg = []
492 msg.append('Date: %s'%time.strftime('%a, %d %b %Y %T GMT',
493 time.gmtime(time.time())))
494 msg.append('Content-type: %s' % type)
495 if encoding == 'base64':
496 import base64
497 data = base64.decodestring(data)
498 else:
499 data = unquote(data)
500 msg.append('Content-length: %d' % len(data))
501 msg.append('')
502 msg.append(data)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000503 msg = '\n'.join(msg)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000504 f = StringIO.StringIO(msg)
505 headers = mimetools.Message(f, 0)
506 f.fileno = None # needed for addinfourl
507 return addinfourl(f, headers, url)
Guido van Rossum6d4d1c21998-03-12 14:32:55 +0000508
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000509
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000510class FancyURLopener(URLopener):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000511 """Derived class with handlers for errors we can handle (perhaps)."""
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000512
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000513 def __init__(self, *args):
514 apply(URLopener.__init__, (self,) + args)
515 self.auth_cache = {}
Skip Montanaroc3e11d62001-02-15 16:56:36 +0000516 self.tries = 0
517 self.maxtries = 10
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000518
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000519 def http_error_default(self, url, fp, errcode, errmsg, headers):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000520 """Default error handling -- don't raise an exception."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000521 return addinfourl(fp, headers, "http:" + url)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000522
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000523 def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000524 """Error 302 -- relocated (temporarily)."""
Skip Montanaroc3e11d62001-02-15 16:56:36 +0000525 self.tries += 1
526 if self.maxtries and self.tries >= self.maxtries:
527 if hasattr(self, "http_error_500"):
528 meth = self.http_error_500
529 else:
530 meth = self.http_error_default
531 self.tries = 0
532 return meth(url, fp, 500,
533 "Internal Server Error: Redirect Recursion", headers)
534 result = self.redirect_internal(url, fp, errcode, errmsg, headers,
535 data)
536 self.tries = 0
537 return result
538
539 def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000540 if headers.has_key('location'):
541 newurl = headers['location']
542 elif headers.has_key('uri'):
543 newurl = headers['uri']
544 else:
545 return
546 void = fp.read()
547 fp.close()
Guido van Rossum3527f591999-03-29 20:23:41 +0000548 # In case the server sent a relative URL, join with original:
549 newurl = basejoin("http:" + url, newurl)
Guido van Rossum3c8baed2000-02-01 23:36:55 +0000550 if data is None:
551 return self.open(newurl)
552 else:
553 return self.open(newurl, data)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000554
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000555 def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000556 """Error 301 -- also relocated (permanently)."""
557 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
Guido van Rossume6ad8911996-09-10 17:02:56 +0000558
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000559 def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000560 """Error 401 -- authentication required.
561 See this URL for a description of the basic authentication scheme:
562 http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000563 if headers.has_key('www-authenticate'):
564 stuff = headers['www-authenticate']
565 import re
566 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
567 if match:
568 scheme, realm = match.groups()
Guido van Rossumb2493f82000-12-15 15:01:37 +0000569 if scheme.lower() == 'basic':
Tim Peterse1190062001-01-15 03:34:38 +0000570 name = 'retry_' + self.type + '_basic_auth'
571 if data is None:
572 return getattr(self,name)(url, realm)
573 else:
574 return getattr(self,name)(url, realm, data)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000575
Guido van Rossum3c8baed2000-02-01 23:36:55 +0000576 def retry_http_basic_auth(self, url, realm, data=None):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000577 host, selector = splithost(url)
Guido van Rossumb2493f82000-12-15 15:01:37 +0000578 i = host.find('@') + 1
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000579 host = host[i:]
580 user, passwd = self.get_user_passwd(host, realm, i)
581 if not (user or passwd): return None
Guido van Rossumafc4f042001-01-15 18:31:13 +0000582 host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000583 newurl = 'http://' + host + selector
Guido van Rossum3c8baed2000-02-01 23:36:55 +0000584 if data is None:
585 return self.open(newurl)
586 else:
587 return self.open(newurl, data)
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000588
Guido van Rossum3c8baed2000-02-01 23:36:55 +0000589 def retry_https_basic_auth(self, url, realm, data=None):
Tim Peterse1190062001-01-15 03:34:38 +0000590 host, selector = splithost(url)
591 i = host.find('@') + 1
592 host = host[i:]
593 user, passwd = self.get_user_passwd(host, realm, i)
594 if not (user or passwd): return None
Guido van Rossumafc4f042001-01-15 18:31:13 +0000595 host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
Tim Peterse1190062001-01-15 03:34:38 +0000596 newurl = '//' + host + selector
Guido van Rossumafc4f042001-01-15 18:31:13 +0000597 return self.open_https(newurl, data)
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000598
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000599 def get_user_passwd(self, host, realm, clear_cache = 0):
Guido van Rossumb2493f82000-12-15 15:01:37 +0000600 key = realm + '@' + host.lower()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000601 if self.auth_cache.has_key(key):
602 if clear_cache:
603 del self.auth_cache[key]
604 else:
605 return self.auth_cache[key]
606 user, passwd = self.prompt_user_passwd(host, realm)
607 if user or passwd: self.auth_cache[key] = (user, passwd)
608 return user, passwd
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000609
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000610 def prompt_user_passwd(self, host, realm):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000611 """Override this in a GUI environment!"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000612 import getpass
613 try:
614 user = raw_input("Enter username for %s at %s: " % (realm,
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000615 host))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000616 passwd = getpass.getpass("Enter password for %s in %s at %s: " %
617 (user, realm, host))
618 return user, passwd
619 except KeyboardInterrupt:
620 print
621 return None, None
Guido van Rossumbbb0a051995-08-04 04:29:05 +0000622
623
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000624# Utility functions
625
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000626_localhost = None
627def localhost():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000628 """Return the IP address of the magic hostname 'localhost'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000629 global _localhost
630 if not _localhost:
631 _localhost = socket.gethostbyname('localhost')
632 return _localhost
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000633
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000634_thishost = None
635def thishost():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000636 """Return the IP address of the current host."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000637 global _thishost
638 if not _thishost:
639 _thishost = socket.gethostbyname(socket.gethostname())
640 return _thishost
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000641
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000642_ftperrors = None
643def ftperrors():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000644 """Return the set of errors raised by the FTP class."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000645 global _ftperrors
646 if not _ftperrors:
647 import ftplib
648 _ftperrors = ftplib.all_errors
649 return _ftperrors
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000650
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000651_noheaders = None
652def noheaders():
Guido van Rossume7b146f2000-02-04 15:28:42 +0000653 """Return an empty mimetools.Message object."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000654 global _noheaders
655 if not _noheaders:
656 import mimetools
657 import StringIO
658 _noheaders = mimetools.Message(StringIO.StringIO(), 0)
659 _noheaders.fp.close() # Recycle file descriptor
660 return _noheaders
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000661
662
663# Utility classes
664
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000665class ftpwrapper:
Guido van Rossume7b146f2000-02-04 15:28:42 +0000666 """Class used by open_ftp() for cache of open FTP connections."""
667
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000668 def __init__(self, user, passwd, host, port, dirs):
669 self.user = user
670 self.passwd = passwd
671 self.host = host
672 self.port = port
673 self.dirs = dirs
674 self.init()
Guido van Rossume7b146f2000-02-04 15:28:42 +0000675
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000676 def init(self):
677 import ftplib
678 self.busy = 0
679 self.ftp = ftplib.FTP()
680 self.ftp.connect(self.host, self.port)
681 self.ftp.login(self.user, self.passwd)
682 for dir in self.dirs:
683 self.ftp.cwd(dir)
Guido van Rossume7b146f2000-02-04 15:28:42 +0000684
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000685 def retrfile(self, file, type):
686 import ftplib
687 self.endtransfer()
688 if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
689 else: cmd = 'TYPE ' + type; isdir = 0
690 try:
691 self.ftp.voidcmd(cmd)
692 except ftplib.all_errors:
693 self.init()
694 self.ftp.voidcmd(cmd)
695 conn = None
696 if file and not isdir:
697 # Use nlst to see if the file exists at all
698 try:
699 self.ftp.nlst(file)
700 except ftplib.error_perm, reason:
701 raise IOError, ('ftp error', reason), sys.exc_info()[2]
702 # Restore the transfer mode!
703 self.ftp.voidcmd(cmd)
704 # Try to retrieve as a file
705 try:
706 cmd = 'RETR ' + file
707 conn = self.ftp.ntransfercmd(cmd)
708 except ftplib.error_perm, reason:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000709 if str(reason)[:3] != '550':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000710 raise IOError, ('ftp error', reason), sys.exc_info()[2]
711 if not conn:
712 # Set transfer mode to ASCII!
713 self.ftp.voidcmd('TYPE A')
714 # Try a directory listing
715 if file: cmd = 'LIST ' + file
716 else: cmd = 'LIST'
717 conn = self.ftp.ntransfercmd(cmd)
718 self.busy = 1
719 # Pass back both a suitably decorated object and a retrieval length
720 return (addclosehook(conn[0].makefile('rb'),
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000721 self.endtransfer), conn[1])
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000722 def endtransfer(self):
723 if not self.busy:
724 return
725 self.busy = 0
726 try:
727 self.ftp.voidresp()
728 except ftperrors():
729 pass
Guido van Rossume7b146f2000-02-04 15:28:42 +0000730
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000731 def close(self):
732 self.endtransfer()
733 try:
734 self.ftp.close()
735 except ftperrors():
736 pass
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000737
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000738class addbase:
Guido van Rossume7b146f2000-02-04 15:28:42 +0000739 """Base class for addinfo and addclosehook."""
740
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000741 def __init__(self, fp):
742 self.fp = fp
743 self.read = self.fp.read
744 self.readline = self.fp.readline
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000745 if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines
746 if hasattr(self.fp, "fileno"): self.fileno = self.fp.fileno
Guido van Rossume7b146f2000-02-04 15:28:42 +0000747
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000748 def __repr__(self):
749 return '<%s at %s whose fp = %s>' % (self.__class__.__name__,
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000750 `id(self)`, `self.fp`)
Guido van Rossume7b146f2000-02-04 15:28:42 +0000751
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000752 def close(self):
753 self.read = None
754 self.readline = None
755 self.readlines = None
756 self.fileno = None
757 if self.fp: self.fp.close()
758 self.fp = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000759
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000760class addclosehook(addbase):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000761 """Class to add a close hook to an open file."""
762
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000763 def __init__(self, fp, closehook, *hookargs):
764 addbase.__init__(self, fp)
765 self.closehook = closehook
766 self.hookargs = hookargs
Guido van Rossume7b146f2000-02-04 15:28:42 +0000767
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000768 def close(self):
Guido van Rossumc580dae2000-05-24 13:21:46 +0000769 addbase.close(self)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000770 if self.closehook:
771 apply(self.closehook, self.hookargs)
772 self.closehook = None
773 self.hookargs = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000774
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000775class addinfo(addbase):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000776 """class to add an info() method to an open file."""
777
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000778 def __init__(self, fp, headers):
779 addbase.__init__(self, fp)
780 self.headers = headers
Guido van Rossume7b146f2000-02-04 15:28:42 +0000781
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000782 def info(self):
783 return self.headers
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000784
Guido van Rossume6ad8911996-09-10 17:02:56 +0000785class addinfourl(addbase):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000786 """class to add info() and geturl() methods to an open file."""
787
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000788 def __init__(self, fp, headers, url):
789 addbase.__init__(self, fp)
790 self.headers = headers
791 self.url = url
Guido van Rossume7b146f2000-02-04 15:28:42 +0000792
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000793 def info(self):
794 return self.headers
Guido van Rossume7b146f2000-02-04 15:28:42 +0000795
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000796 def geturl(self):
797 return self.url
Guido van Rossume6ad8911996-09-10 17:02:56 +0000798
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000799
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000800def basejoin(base, url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000801 """Utility to combine a URL with a base URL to form a new URL."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000802 type, path = splittype(url)
803 if type:
804 # if url is complete (i.e., it contains a type), return it
805 return url
806 host, path = splithost(path)
807 type, basepath = splittype(base) # inherit type from base
808 if host:
809 # if url contains host, just inherit type
810 if type: return type + '://' + host + path
811 else:
812 # no type inherited, so url must have started with //
813 # just return it
814 return url
815 host, basepath = splithost(basepath) # inherit host
Thomas Wouters7e474022000-07-16 12:04:32 +0000816 basepath, basetag = splittag(basepath) # remove extraneous cruft
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000817 basepath, basequery = splitquery(basepath) # idem
818 if path[:1] != '/':
819 # non-absolute path name
820 if path[:1] in ('#', '?'):
821 # path is just a tag or query, attach to basepath
822 i = len(basepath)
823 else:
824 # else replace last component
Guido van Rossumb2493f82000-12-15 15:01:37 +0000825 i = basepath.rfind('/')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000826 if i < 0:
827 # basepath not absolute
828 if host:
829 # host present, make absolute
830 basepath = '/'
831 else:
832 # else keep non-absolute
833 basepath = ''
834 else:
835 # remove last file component
836 basepath = basepath[:i+1]
837 # Interpret ../ (important because of symlinks)
838 while basepath and path[:3] == '../':
839 path = path[3:]
Guido van Rossumb2493f82000-12-15 15:01:37 +0000840 i = basepath[:-1].rfind('/')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000841 if i > 0:
842 basepath = basepath[:i+1]
843 elif i == 0:
844 basepath = '/'
845 break
846 else:
847 basepath = ''
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000848
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000849 path = basepath + path
850 if type and host: return type + '://' + host + path
851 elif type: return type + ':' + path
852 elif host: return '//' + host + path # don't know what this means
853 else: return path
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000854
855
Guido van Rossum7c395db1994-07-04 22:14:49 +0000856# Utilities to parse URLs (most of these return None for missing parts):
Sjoerd Mullendere0371b81995-11-10 10:36:07 +0000857# unwrap('<URL:type://host/path>') --> 'type://host/path'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000858# splittype('type:opaquestring') --> 'type', 'opaquestring'
859# splithost('//host[:port]/path') --> 'host[:port]', '/path'
Guido van Rossum7c395db1994-07-04 22:14:49 +0000860# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'
861# splitpasswd('user:passwd') -> 'user', 'passwd'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000862# splitport('host:port') --> 'host', 'port'
863# splitquery('/path?query') --> '/path', 'query'
864# splittag('/path#tag') --> '/path', 'tag'
Guido van Rossum7c395db1994-07-04 22:14:49 +0000865# splitattr('/path;attr1=value1;attr2=value2;...') ->
866# '/path', ['attr1=value1', 'attr2=value2', ...]
867# splitvalue('attr=value') --> 'attr', 'value'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000868# splitgophertype('/Xselector') --> 'X', 'selector'
869# unquote('abc%20def') -> 'abc def'
870# quote('abc def') -> 'abc%20def')
871
Martin v. Löwis1d994332000-12-03 18:30:10 +0000872def toBytes(url):
873 """toBytes(u"URL") --> 'URL'."""
874 # Most URL schemes require ASCII. If that changes, the conversion
875 # can be relaxed
876 if type(url) is types.UnicodeType:
877 try:
878 url = url.encode("ASCII")
879 except UnicodeError:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000880 raise UnicodeError("URL " + repr(url) +
881 " contains non-ASCII characters")
Martin v. Löwis1d994332000-12-03 18:30:10 +0000882 return url
883
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000884def unwrap(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000885 """unwrap('<URL:type://host/path>') --> 'type://host/path'."""
Guido van Rossumb2493f82000-12-15 15:01:37 +0000886 url = url.strip()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000887 if url[:1] == '<' and url[-1:] == '>':
Guido van Rossumb2493f82000-12-15 15:01:37 +0000888 url = url[1:-1].strip()
889 if url[:4] == 'URL:': url = url[4:].strip()
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000890 return url
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000891
Guido van Rossum332e1441997-09-29 23:23:46 +0000892_typeprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000893def splittype(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000894 """splittype('type:opaquestring') --> 'type', 'opaquestring'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000895 global _typeprog
896 if _typeprog is None:
897 import re
898 _typeprog = re.compile('^([^/:]+):')
Guido van Rossum332e1441997-09-29 23:23:46 +0000899
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000900 match = _typeprog.match(url)
901 if match:
902 scheme = match.group(1)
Fred Drake9e94afd2000-07-01 07:03:30 +0000903 return scheme.lower(), url[len(scheme) + 1:]
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000904 return None, url
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000905
Guido van Rossum332e1441997-09-29 23:23:46 +0000906_hostprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000907def splithost(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000908 """splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000909 global _hostprog
910 if _hostprog is None:
911 import re
Guido van Rossum3427c1f1999-07-01 23:20:56 +0000912 _hostprog = re.compile('^//([^/]*)(.*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +0000913
Fredrik Lundhb49f88b2000-09-24 18:51:25 +0000914 match = _hostprog.match(url)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000915 if match: return match.group(1, 2)
916 return None, url
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000917
Guido van Rossum332e1441997-09-29 23:23:46 +0000918_userprog = None
Guido van Rossum7c395db1994-07-04 22:14:49 +0000919def splituser(host):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000920 """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000921 global _userprog
922 if _userprog is None:
923 import re
924 _userprog = re.compile('^([^@]*)@(.*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +0000925
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000926 match = _userprog.match(host)
Fred Drake567ca8e2000-08-21 21:42:42 +0000927 if match: return map(unquote, match.group(1, 2))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000928 return None, host
Guido van Rossum7c395db1994-07-04 22:14:49 +0000929
Guido van Rossum332e1441997-09-29 23:23:46 +0000930_passwdprog = None
Guido van Rossum7c395db1994-07-04 22:14:49 +0000931def splitpasswd(user):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000932 """splitpasswd('user:passwd') -> 'user', 'passwd'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000933 global _passwdprog
934 if _passwdprog is None:
935 import re
936 _passwdprog = re.compile('^([^:]*):(.*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +0000937
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000938 match = _passwdprog.match(user)
939 if match: return match.group(1, 2)
940 return user, None
Guido van Rossum7c395db1994-07-04 22:14:49 +0000941
Guido van Rossume7b146f2000-02-04 15:28:42 +0000942# splittag('/path#tag') --> '/path', 'tag'
Guido van Rossum332e1441997-09-29 23:23:46 +0000943_portprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000944def splitport(host):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000945 """splitport('host:port') --> 'host', 'port'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000946 global _portprog
947 if _portprog is None:
948 import re
949 _portprog = re.compile('^(.*):([0-9]+)$')
Guido van Rossum332e1441997-09-29 23:23:46 +0000950
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000951 match = _portprog.match(host)
952 if match: return match.group(1, 2)
953 return host, None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000954
Guido van Rossum332e1441997-09-29 23:23:46 +0000955_nportprog = None
Guido van Rossum53725a21996-06-13 19:12:35 +0000956def splitnport(host, defport=-1):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000957 """Split host and port, returning numeric port.
958 Return given default port if no ':' found; defaults to -1.
959 Return numerical port if a valid number are found after ':'.
960 Return None if ':' but not a valid number."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000961 global _nportprog
962 if _nportprog is None:
963 import re
964 _nportprog = re.compile('^(.*):(.*)$')
Guido van Rossum7e7ca0b1998-03-26 21:01:39 +0000965
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000966 match = _nportprog.match(host)
967 if match:
968 host, port = match.group(1, 2)
969 try:
Guido van Rossumb2493f82000-12-15 15:01:37 +0000970 if not port: raise ValueError, "no digits"
971 nport = int(port)
972 except ValueError:
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000973 nport = None
974 return host, nport
975 return host, defport
Guido van Rossum53725a21996-06-13 19:12:35 +0000976
Guido van Rossum332e1441997-09-29 23:23:46 +0000977_queryprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000978def splitquery(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000979 """splitquery('/path?query') --> '/path', 'query'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000980 global _queryprog
981 if _queryprog is None:
982 import re
983 _queryprog = re.compile('^(.*)\?([^?]*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +0000984
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000985 match = _queryprog.match(url)
986 if match: return match.group(1, 2)
987 return url, None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000988
Guido van Rossum332e1441997-09-29 23:23:46 +0000989_tagprog = None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +0000990def splittag(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +0000991 """splittag('/path#tag') --> '/path', 'tag'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000992 global _tagprog
993 if _tagprog is None:
994 import re
995 _tagprog = re.compile('^(.*)#([^#]*)$')
Guido van Rossum7e7ca0b1998-03-26 21:01:39 +0000996
Jeremy Hyltonf90b0021999-02-25 16:12:12 +0000997 match = _tagprog.match(url)
998 if match: return match.group(1, 2)
999 return url, None
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001000
Guido van Rossum7c395db1994-07-04 22:14:49 +00001001def splitattr(url):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001002 """splitattr('/path;attr1=value1;attr2=value2;...') ->
1003 '/path', ['attr1=value1', 'attr2=value2', ...]."""
Guido van Rossumb2493f82000-12-15 15:01:37 +00001004 words = url.split(';')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001005 return words[0], words[1:]
Guido van Rossum7c395db1994-07-04 22:14:49 +00001006
Guido van Rossum332e1441997-09-29 23:23:46 +00001007_valueprog = None
Guido van Rossum7c395db1994-07-04 22:14:49 +00001008def splitvalue(attr):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001009 """splitvalue('attr=value') --> 'attr', 'value'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001010 global _valueprog
1011 if _valueprog is None:
1012 import re
1013 _valueprog = re.compile('^([^=]*)=(.*)$')
Guido van Rossum332e1441997-09-29 23:23:46 +00001014
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001015 match = _valueprog.match(attr)
1016 if match: return match.group(1, 2)
1017 return attr, None
Guido van Rossum7c395db1994-07-04 22:14:49 +00001018
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001019def splitgophertype(selector):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001020 """splitgophertype('/Xselector') --> 'X', 'selector'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001021 if selector[:1] == '/' and selector[1:2]:
1022 return selector[1], selector[2:]
1023 return None, selector
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001024
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001025def unquote(s):
Guido van Rossume7b146f2000-02-04 15:28:42 +00001026 """unquote('abc%20def') -> 'abc def'."""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001027 mychr = chr
Guido van Rossumb2493f82000-12-15 15:01:37 +00001028 myatoi = int
1029 list = s.split('%')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001030 res = [list[0]]
1031 myappend = res.append
1032 del list[0]
1033 for item in list:
1034 if item[1:2]:
1035 try:
1036 myappend(mychr(myatoi(item[:2], 16))
1037 + item[2:])
1038 except:
1039 myappend('%' + item)
1040 else:
1041 myappend('%' + item)
Guido van Rossumb2493f82000-12-15 15:01:37 +00001042 return "".join(res)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001043
Guido van Rossum0564e121996-12-13 14:47:36 +00001044def unquote_plus(s):
Skip Montanaro79f1c172000-08-22 03:00:52 +00001045 """unquote('%7e/abc+def') -> '~/abc def'"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001046 if '+' in s:
1047 # replace '+' with ' '
Guido van Rossumb2493f82000-12-15 15:01:37 +00001048 s = ' '.join(s.split('+'))
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001049 return unquote(s)
Guido van Rossum0564e121996-12-13 14:47:36 +00001050
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001051always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
Jeremy Hylton6102e292000-08-31 15:48:10 +00001052 'abcdefghijklmnopqrstuvwxyz'
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001053 '0123456789' '_.-')
1054
1055_fast_safe_test = always_safe + '/'
1056_fast_safe = None
1057
1058def _fast_quote(s):
1059 global _fast_safe
1060 if _fast_safe is None:
1061 _fast_safe = {}
1062 for c in _fast_safe_test:
1063 _fast_safe[c] = c
1064 res = list(s)
1065 for i in range(len(res)):
1066 c = res[i]
1067 if not _fast_safe.has_key(c):
Guido van Rossume27a7b82001-01-19 03:28:15 +00001068 res[i] = '%%%02X' % ord(c)
Guido van Rossumb2493f82000-12-15 15:01:37 +00001069 return ''.join(res)
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001070
Guido van Rossum7c395db1994-07-04 22:14:49 +00001071def quote(s, safe = '/'):
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001072 """quote('abc def') -> 'abc%20def'
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001073
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001074 Each part of a URL, e.g. the path info, the query, etc., has a
1075 different set of reserved characters that must be quoted.
1076
1077 RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
1078 the following reserved characters.
1079
1080 reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
1081 "$" | ","
1082
1083 Each of these characters is reserved in some component of a URL,
1084 but not necessarily in all of them.
1085
1086 By default, the quote function is intended for quoting the path
1087 section of a URL. Thus, it will not encode '/'. This character
1088 is reserved, but in typical usage the quote function is being
1089 called on a path where the existing slash characters are used as
1090 reserved characters.
1091 """
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001092 safe = always_safe + safe
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001093 if _fast_safe_test == safe:
1094 return _fast_quote(s)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001095 res = list(s)
1096 for i in range(len(res)):
1097 c = res[i]
1098 if c not in safe:
Guido van Rossume27a7b82001-01-19 03:28:15 +00001099 res[i] = '%%%02X' % ord(c)
Guido van Rossumb2493f82000-12-15 15:01:37 +00001100 return ''.join(res)
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001101
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +00001102def quote_plus(s, safe = ''):
1103 """Quote the query fragment of a URL; replacing ' ' with '+'"""
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001104 if ' ' in s:
Guido van Rossumb2493f82000-12-15 15:01:37 +00001105 l = s.split(' ')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001106 for i in range(len(l)):
1107 l[i] = quote(l[i], safe)
Guido van Rossumb2493f82000-12-15 15:01:37 +00001108 return '+'.join(l)
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001109 else:
1110 return quote(s, safe)
Guido van Rossum0564e121996-12-13 14:47:36 +00001111
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001112def urlencode(query,doseq=0):
1113 """Encode a sequence of two-element tuples or dictionary into a URL query string.
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001114
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001115 If any values in the query arg are sequences and doseq is true, each
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001116 sequence element is converted to a separate parameter.
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001117
1118 If the query arg is a sequence of two-element tuples, the order of the
1119 parameters in the output will match the order of parameters in the
1120 input.
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001121 """
Tim Peters658cba62001-02-09 20:06:00 +00001122
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001123 if hasattr(query,"items"):
1124 # mapping objects
1125 query = query.items()
1126 else:
1127 # it's a bother at times that strings and string-like objects are
1128 # sequences...
1129 try:
1130 # non-sequence items should not work with len()
1131 x = len(query)
1132 # non-empty strings will fail this
1133 if len(query) and type(query[0]) != types.TupleType:
1134 raise TypeError
1135 # zero-length sequences of all types will get here and succeed,
1136 # but that's a minor nit - since the original implementation
1137 # allowed empty dicts that type of behavior probably should be
1138 # preserved for consistency
1139 except TypeError:
1140 ty,va,tb = sys.exc_info()
1141 raise TypeError, "not a valid non-string sequence or mapping object", tb
1142
Guido van Rossume7b146f2000-02-04 15:28:42 +00001143 l = []
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001144 if not doseq:
1145 # preserve old behavior
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001146 for k, v in query:
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001147 k = quote_plus(str(k))
1148 v = quote_plus(str(v))
1149 l.append(k + '=' + v)
1150 else:
Skip Montanaro14f1ad42001-01-28 21:11:12 +00001151 for k, v in query:
Skip Montanaroa5d23a12001-01-20 15:56:39 +00001152 k = quote_plus(str(k))
1153 if type(v) == types.StringType:
1154 v = quote_plus(v)
1155 l.append(k + '=' + v)
1156 elif type(v) == types.UnicodeType:
1157 # is there a reasonable way to convert to ASCII?
1158 # encode generates a string, but "replace" or "ignore"
1159 # lose information and "strict" can raise UnicodeError
1160 v = quote_plus(v.encode("ASCII","replace"))
1161 l.append(k + '=' + v)
1162 else:
1163 try:
1164 # is this a sufficient test for sequence-ness?
1165 x = len(v)
1166 except TypeError:
1167 # not a sequence
1168 v = quote_plus(str(v))
1169 l.append(k + '=' + v)
1170 else:
1171 # loop over the sequence
1172 for elt in v:
1173 l.append(k + '=' + quote_plus(str(elt)))
Guido van Rossumb2493f82000-12-15 15:01:37 +00001174 return '&'.join(l)
Guido van Rossum810a3391998-07-22 21:33:23 +00001175
Guido van Rossum442e7201996-03-20 15:33:11 +00001176# Proxy handling
Mark Hammond4f570b92000-07-26 07:04:38 +00001177def getproxies_environment():
1178 """Return a dictionary of scheme -> proxy server URL mappings.
1179
1180 Scan the environment for variables named <scheme>_proxy;
1181 this seems to be the standard convention. If you need a
1182 different way, you can pass a proxies dictionary to the
1183 [Fancy]URLopener constructor.
1184
1185 """
1186 proxies = {}
1187 for name, value in os.environ.items():
Guido van Rossumb2493f82000-12-15 15:01:37 +00001188 name = name.lower()
Mark Hammond4f570b92000-07-26 07:04:38 +00001189 if value and name[-6:] == '_proxy':
1190 proxies[name[:-6]] = value
1191 return proxies
1192
Guido van Rossum4163e701998-08-06 13:39:09 +00001193if os.name == 'mac':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001194 def getproxies():
1195 """Return a dictionary of scheme -> proxy server URL mappings.
Guido van Rossum442e7201996-03-20 15:33:11 +00001196
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001197 By convention the mac uses Internet Config to store
1198 proxies. An HTTP proxy, for instance, is stored under
1199 the HttpProxy key.
Guido van Rossum442e7201996-03-20 15:33:11 +00001200
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001201 """
1202 try:
1203 import ic
1204 except ImportError:
1205 return {}
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001206
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001207 try:
1208 config = ic.IC()
1209 except ic.error:
1210 return {}
1211 proxies = {}
1212 # HTTP:
1213 if config.has_key('UseHTTPProxy') and config['UseHTTPProxy']:
1214 try:
1215 value = config['HTTPProxyHost']
1216 except ic.error:
1217 pass
1218 else:
1219 proxies['http'] = 'http://%s' % value
1220 # FTP: XXXX To be done.
1221 # Gopher: XXXX To be done.
1222 return proxies
Mark Hammond4f570b92000-07-26 07:04:38 +00001223
1224elif os.name == 'nt':
1225 def getproxies_registry():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001226 """Return a dictionary of scheme -> proxy server URL mappings.
Mark Hammond4f570b92000-07-26 07:04:38 +00001227
1228 Win32 uses the registry to store proxies.
1229
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001230 """
1231 proxies = {}
Mark Hammond4f570b92000-07-26 07:04:38 +00001232 try:
1233 import _winreg
1234 except ImportError:
1235 # Std module, so should be around - but you never know!
1236 return proxies
1237 try:
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001238 internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1239 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
Mark Hammond4f570b92000-07-26 07:04:38 +00001240 proxyEnable = _winreg.QueryValueEx(internetSettings,
1241 'ProxyEnable')[0]
1242 if proxyEnable:
1243 # Returned as Unicode but problems if not converted to ASCII
1244 proxyServer = str(_winreg.QueryValueEx(internetSettings,
1245 'ProxyServer')[0])
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001246 if '=' in proxyServer:
1247 # Per-protocol settings
Mark Hammond4f570b92000-07-26 07:04:38 +00001248 for p in proxyServer.split(';'):
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001249 protocol, address = p.split('=', 1)
Mark Hammond4f570b92000-07-26 07:04:38 +00001250 proxies[protocol] = '%s://%s' % (protocol, address)
Fredrik Lundhb49f88b2000-09-24 18:51:25 +00001251 else:
1252 # Use one setting for all protocols
1253 if proxyServer[:5] == 'http:':
1254 proxies['http'] = proxyServer
1255 else:
1256 proxies['http'] = 'http://%s' % proxyServer
1257 proxies['ftp'] = 'ftp://%s' % proxyServer
Mark Hammond4f570b92000-07-26 07:04:38 +00001258 internetSettings.Close()
1259 except (WindowsError, ValueError, TypeError):
1260 # Either registry key not found etc, or the value in an
1261 # unexpected format.
1262 # proxies already set up to be empty so nothing to do
1263 pass
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001264 return proxies
Guido van Rossum442e7201996-03-20 15:33:11 +00001265
Mark Hammond4f570b92000-07-26 07:04:38 +00001266 def getproxies():
1267 """Return a dictionary of scheme -> proxy server URL mappings.
1268
1269 Returns settings gathered from the environment, if specified,
1270 or the registry.
1271
1272 """
1273 return getproxies_environment() or getproxies_registry()
1274else:
1275 # By default use environment variables
1276 getproxies = getproxies_environment
1277
Guido van Rossum442e7201996-03-20 15:33:11 +00001278
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001279# Test and time quote() and unquote()
1280def test1():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001281 import time
1282 s = ''
1283 for i in range(256): s = s + chr(i)
1284 s = s*4
1285 t0 = time.time()
1286 qs = quote(s)
1287 uqs = unquote(qs)
1288 t1 = time.time()
1289 if uqs != s:
1290 print 'Wrong!'
1291 print `s`
1292 print `qs`
1293 print `uqs`
1294 print round(t1 - t0, 3), 'sec'
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001295
1296
Guido van Rossum9ab96d41998-09-28 14:07:00 +00001297def reporthook(blocknum, blocksize, totalsize):
1298 # Report during remote transfers
Guido van Rossumb2493f82000-12-15 15:01:37 +00001299 print "Block number: %d, Block size: %d, Total size: %d" % (
1300 blocknum, blocksize, totalsize)
Guido van Rossum9ab96d41998-09-28 14:07:00 +00001301
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001302# Test program
Guido van Rossum23490151998-06-25 02:39:00 +00001303def test(args=[]):
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001304 if not args:
1305 args = [
1306 '/etc/passwd',
1307 'file:/etc/passwd',
1308 'file://localhost/etc/passwd',
1309 'ftp://ftp.python.org/etc/passwd',
1310## 'gopher://gopher.micro.umn.edu/1/',
1311 'http://www.python.org/index.html',
1312 ]
Guido van Rossum09c8b6c1999-12-07 21:37:17 +00001313 if hasattr(URLopener, "open_https"):
1314 args.append('https://synergy.as.cmu.edu/~geek/')
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001315 try:
1316 for url in args:
1317 print '-'*10, url, '-'*10
1318 fn, h = urlretrieve(url, None, reporthook)
Guido van Rossumb2493f82000-12-15 15:01:37 +00001319 print fn
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001320 if h:
1321 print '======'
1322 for k in h.keys(): print k + ':', h[k]
1323 print '======'
1324 fp = open(fn, 'rb')
1325 data = fp.read()
1326 del fp
1327 if '\r' in data:
1328 table = string.maketrans("", "")
Guido van Rossumb2493f82000-12-15 15:01:37 +00001329 data = data.translate(table, "\r")
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001330 print data
1331 fn, h = None, None
1332 print '-'*40
1333 finally:
1334 urlcleanup()
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001335
Guido van Rossum23490151998-06-25 02:39:00 +00001336def main():
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001337 import getopt, sys
1338 try:
1339 opts, args = getopt.getopt(sys.argv[1:], "th")
1340 except getopt.error, msg:
1341 print msg
1342 print "Use -h for help"
1343 return
1344 t = 0
1345 for o, a in opts:
1346 if o == '-t':
1347 t = t + 1
1348 if o == '-h':
1349 print "Usage: python urllib.py [-t] [url ...]"
1350 print "-t runs self-test;",
1351 print "otherwise, contents of urls are printed"
1352 return
1353 if t:
1354 if t > 1:
1355 test1()
1356 test(args)
1357 else:
1358 if not args:
1359 print "Use -h for help"
1360 for url in args:
1361 print urlopen(url).read(),
Guido van Rossum23490151998-06-25 02:39:00 +00001362
Guido van Rossum7c6ebb51994-03-22 12:05:32 +00001363# Run test program when run as a script
1364if __name__ == '__main__':
Jeremy Hyltonf90b0021999-02-25 16:12:12 +00001365 main()