blob: 640c6616ca7ae15341a6b70607dd0a885cf580ac [file] [log] [blame]
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001import unittest
2from test import test_support
3
Christian Heimesc5f05e42008-02-23 17:40:11 +00004import os
Facundo Batista4f1b1ed2008-05-29 16:39:26 +00005import socket
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00006import StringIO
Jeremy Hyltone3e61042001-05-09 15:50:25 +00007
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00008import urllib2
9from urllib2 import Request, OpenerDirector
Jeremy Hyltone3e61042001-05-09 15:50:25 +000010
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000011# XXX
12# Request
13# CacheFTPHandler (hard to write)
Georg Brandlfa42bd72006-04-30 07:06:11 +000014# parse_keqv_list, parse_http_list, HTTPDigestAuthHandler
Jeremy Hyltone3e61042001-05-09 15:50:25 +000015
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000016class TrivialTests(unittest.TestCase):
17 def test_trivial(self):
18 # A couple trivial tests
Guido van Rossume2ae77b2001-10-24 20:42:55 +000019
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000020 self.assertRaises(ValueError, urllib2.urlopen, 'bogus url')
Tim Peters861adac2001-07-16 20:49:49 +000021
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000022 # XXX Name hacking to get this to work on Windows.
23 fname = os.path.abspath(urllib2.__file__).replace('\\', '/')
Senthil Kumaran08ecfdd2010-01-10 17:42:29 +000024
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000025 # And more hacking to get it to work on MacOS. This assumes
26 # urllib.pathname2url works, unfortunately...
27 if os.name == 'mac':
28 fname = '/' + fname.replace(':', '/')
29 elif os.name == 'riscos':
30 import string
31 fname = os.expand(fname)
32 fname = fname.translate(string.maketrans("/.", "./"))
33
Senthil Kumaran08ecfdd2010-01-10 17:42:29 +000034 if os.name == 'nt':
35 file_url = "file:///%s" % fname
36 else:
37 file_url = "file://%s" % fname
38
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000039 f = urllib2.urlopen(file_url)
40
41 buf = f.read()
42 f.close()
Tim Petersf5f32b42005-07-17 23:16:17 +000043
Georg Brandle1b13d22005-08-24 22:20:32 +000044 def test_parse_http_list(self):
45 tests = [('a,b,c', ['a', 'b', 'c']),
46 ('path"o,l"og"i"cal, example', ['path"o,l"og"i"cal', 'example']),
47 ('a, b, "c", "d", "e,f", g, h', ['a', 'b', '"c"', '"d"', '"e,f"', 'g', 'h']),
48 ('a="b\\"c", d="e\\,f", g="h\\\\i"', ['a="b"c"', 'd="e,f"', 'g="h\\i"'])]
49 for string, list in tests:
50 self.assertEquals(urllib2.parse_http_list(string), list)
51
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000052
Georg Brandl8c036cc2006-08-20 13:15:39 +000053def test_request_headers_dict():
54 """
55 The Request.headers dictionary is not a documented interface. It should
56 stay that way, because the complete set of headers are only accessible
57 through the .get_header(), .has_header(), .header_items() interface.
58 However, .headers pre-dates those methods, and so real code will be using
59 the dictionary.
60
61 The introduction in 2.4 of those methods was a mistake for the same reason:
62 code that previously saw all (urllib2 user)-provided headers in .headers
63 now sees only a subset (and the function interface is ugly and incomplete).
64 A better change would have been to replace .headers dict with a dict
65 subclass (or UserDict.DictMixin instance?) that preserved the .headers
66 interface and also provided access to the "unredirected" headers. It's
67 probably too late to fix that, though.
68
69
70 Check .capitalize() case normalization:
71
72 >>> url = "http://example.com"
73 >>> Request(url, headers={"Spam-eggs": "blah"}).headers["Spam-eggs"]
74 'blah'
75 >>> Request(url, headers={"spam-EggS": "blah"}).headers["Spam-eggs"]
76 'blah'
77
78 Currently, Request(url, "Spam-eggs").headers["Spam-Eggs"] raises KeyError,
79 but that could be changed in future.
80
81 """
82
83def test_request_headers_methods():
84 """
85 Note the case normalization of header names here, to .capitalize()-case.
86 This should be preserved for backwards-compatibility. (In the HTTP case,
87 normalization to .title()-case is done by urllib2 before sending headers to
88 httplib).
89
90 >>> url = "http://example.com"
91 >>> r = Request(url, headers={"Spam-eggs": "blah"})
92 >>> r.has_header("Spam-eggs")
93 True
94 >>> r.header_items()
95 [('Spam-eggs', 'blah')]
96 >>> r.add_header("Foo-Bar", "baz")
97 >>> items = r.header_items()
98 >>> items.sort()
99 >>> items
100 [('Foo-bar', 'baz'), ('Spam-eggs', 'blah')]
101
102 Note that e.g. r.has_header("spam-EggS") is currently False, and
103 r.get_header("spam-EggS") returns None, but that could be changed in
104 future.
105
106 >>> r.has_header("Not-there")
107 False
108 >>> print r.get_header("Not-there")
109 None
110 >>> r.get_header("Not-there", "default")
111 'default'
112
113 """
114
115
Georg Brandlfa42bd72006-04-30 07:06:11 +0000116def test_password_manager(self):
117 """
118 >>> mgr = urllib2.HTTPPasswordMgr()
119 >>> add = mgr.add_password
120 >>> add("Some Realm", "http://example.com/", "joe", "password")
121 >>> add("Some Realm", "http://example.com/ni", "ni", "ni")
122 >>> add("c", "http://example.com/foo", "foo", "ni")
123 >>> add("c", "http://example.com/bar", "bar", "nini")
124 >>> add("b", "http://example.com/", "first", "blah")
125 >>> add("b", "http://example.com/", "second", "spam")
126 >>> add("a", "http://example.com", "1", "a")
127 >>> add("Some Realm", "http://c.example.com:3128", "3", "c")
128 >>> add("Some Realm", "d.example.com", "4", "d")
129 >>> add("Some Realm", "e.example.com:3128", "5", "e")
130
131 >>> mgr.find_user_password("Some Realm", "example.com")
132 ('joe', 'password')
133 >>> mgr.find_user_password("Some Realm", "http://example.com")
134 ('joe', 'password')
135 >>> mgr.find_user_password("Some Realm", "http://example.com/")
136 ('joe', 'password')
137 >>> mgr.find_user_password("Some Realm", "http://example.com/spam")
138 ('joe', 'password')
139 >>> mgr.find_user_password("Some Realm", "http://example.com/spam/spam")
140 ('joe', 'password')
141 >>> mgr.find_user_password("c", "http://example.com/foo")
142 ('foo', 'ni')
143 >>> mgr.find_user_password("c", "http://example.com/bar")
144 ('bar', 'nini')
145
Georg Brandl2b330372006-05-28 20:23:12 +0000146 Actually, this is really undefined ATM
147## Currently, we use the highest-level path where more than one match:
Georg Brandlfa42bd72006-04-30 07:06:11 +0000148
Georg Brandl2b330372006-05-28 20:23:12 +0000149## >>> mgr.find_user_password("Some Realm", "http://example.com/ni")
150## ('joe', 'password')
Georg Brandlfa42bd72006-04-30 07:06:11 +0000151
152 Use latest add_password() in case of conflict:
153
154 >>> mgr.find_user_password("b", "http://example.com/")
155 ('second', 'spam')
156
157 No special relationship between a.example.com and example.com:
158
159 >>> mgr.find_user_password("a", "http://example.com/")
160 ('1', 'a')
161 >>> mgr.find_user_password("a", "http://a.example.com/")
162 (None, None)
163
164 Ports:
165
166 >>> mgr.find_user_password("Some Realm", "c.example.com")
167 (None, None)
168 >>> mgr.find_user_password("Some Realm", "c.example.com:3128")
169 ('3', 'c')
170 >>> mgr.find_user_password("Some Realm", "http://c.example.com:3128")
171 ('3', 'c')
172 >>> mgr.find_user_password("Some Realm", "d.example.com")
173 ('4', 'd')
174 >>> mgr.find_user_password("Some Realm", "e.example.com:3128")
175 ('5', 'e')
176
177 """
178 pass
179
180
Georg Brandl2b330372006-05-28 20:23:12 +0000181def test_password_manager_default_port(self):
182 """
183 >>> mgr = urllib2.HTTPPasswordMgr()
184 >>> add = mgr.add_password
185
186 The point to note here is that we can't guess the default port if there's
187 no scheme. This applies to both add_password and find_user_password.
188
189 >>> add("f", "http://g.example.com:80", "10", "j")
190 >>> add("g", "http://h.example.com", "11", "k")
191 >>> add("h", "i.example.com:80", "12", "l")
192 >>> add("i", "j.example.com", "13", "m")
193 >>> mgr.find_user_password("f", "g.example.com:100")
194 (None, None)
195 >>> mgr.find_user_password("f", "g.example.com:80")
196 ('10', 'j')
197 >>> mgr.find_user_password("f", "g.example.com")
198 (None, None)
199 >>> mgr.find_user_password("f", "http://g.example.com:100")
200 (None, None)
201 >>> mgr.find_user_password("f", "http://g.example.com:80")
202 ('10', 'j')
203 >>> mgr.find_user_password("f", "http://g.example.com")
204 ('10', 'j')
205 >>> mgr.find_user_password("g", "h.example.com")
206 ('11', 'k')
207 >>> mgr.find_user_password("g", "h.example.com:80")
208 ('11', 'k')
209 >>> mgr.find_user_password("g", "http://h.example.com:80")
210 ('11', 'k')
211 >>> mgr.find_user_password("h", "i.example.com")
212 (None, None)
213 >>> mgr.find_user_password("h", "i.example.com:80")
214 ('12', 'l')
215 >>> mgr.find_user_password("h", "http://i.example.com:80")
216 ('12', 'l')
217 >>> mgr.find_user_password("i", "j.example.com")
218 ('13', 'm')
219 >>> mgr.find_user_password("i", "j.example.com:80")
220 (None, None)
221 >>> mgr.find_user_password("i", "http://j.example.com")
222 ('13', 'm')
223 >>> mgr.find_user_password("i", "http://j.example.com:80")
224 (None, None)
225
226 """
227
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000228class MockOpener:
229 addheaders = []
Senthil Kumaran428e9d12009-07-19 04:20:46 +0000230 def open(self, req, data=None,timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
231 self.req, self.data, self.timeout = req, data, timeout
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000232 def error(self, proto, *args):
233 self.proto, self.args = proto, args
234
235class MockFile:
236 def read(self, count=None): pass
237 def readline(self, count=None): pass
238 def close(self): pass
239
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000240class MockHeaders(dict):
241 def getheaders(self, name):
242 return self.values()
243
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000244class MockResponse(StringIO.StringIO):
245 def __init__(self, code, msg, headers, data, url=None):
246 StringIO.StringIO.__init__(self, data)
247 self.code, self.msg, self.headers, self.url = code, msg, headers, url
248 def info(self):
249 return self.headers
250 def geturl(self):
251 return self.url
252
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000253class MockCookieJar:
254 def add_cookie_header(self, request):
255 self.ach_req = request
256 def extract_cookies(self, response, request):
257 self.ec_req, self.ec_r = request, response
258
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000259class FakeMethod:
260 def __init__(self, meth_name, action, handle):
261 self.meth_name = meth_name
262 self.handle = handle
263 self.action = action
264 def __call__(self, *args):
265 return self.handle(self.meth_name, self.action, *args)
266
Senthil Kumaran81163642009-12-20 06:32:46 +0000267class MockHTTPResponse:
268 def __init__(self, fp, msg, status, reason):
269 self.fp = fp
270 self.msg = msg
271 self.status = status
272 self.reason = reason
273 def read(self):
274 return ''
275
276class MockHTTPClass:
277 def __init__(self):
278 self.req_headers = []
279 self.data = None
280 self.raise_on_endheaders = False
281 self._tunnel_headers = {}
282
283 def __call__(self, host, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
284 self.host = host
285 self.timeout = timeout
286 return self
287
288 def set_debuglevel(self, level):
289 self.level = level
290
291 def _set_tunnel(self, host, port=None, headers=None):
292 self._tunnel_host = host
293 self._tunnel_port = port
294 if headers:
295 self._tunnel_headers = headers
296 else:
297 self._tunnel_headers.clear()
Benjamin Peterson17e1c122009-12-24 01:13:50 +0000298 def request(self, method, url, body=None, headers=None):
Senthil Kumaran81163642009-12-20 06:32:46 +0000299 self.method = method
300 self.selector = url
Benjamin Peterson17e1c122009-12-24 01:13:50 +0000301 if headers is not None:
302 self.req_headers += headers.items()
Senthil Kumaran81163642009-12-20 06:32:46 +0000303 self.req_headers.sort()
304 if body:
305 self.data = body
306 if self.raise_on_endheaders:
307 import socket
308 raise socket.error()
309 def getresponse(self):
310 return MockHTTPResponse(MockFile(), {}, 200, "OK")
311
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000312class MockHandler:
Georg Brandlfa42bd72006-04-30 07:06:11 +0000313 # useful for testing handler machinery
314 # see add_ordered_mock_handlers() docstring
Georg Brandl720096a2006-04-02 20:45:34 +0000315 handler_order = 500
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000316 def __init__(self, methods):
317 self._define_methods(methods)
318 def _define_methods(self, methods):
319 for spec in methods:
320 if len(spec) == 2: name, action = spec
321 else: name, action = spec, None
322 meth = FakeMethod(name, action, self.handle)
323 setattr(self.__class__, name, meth)
324 def handle(self, fn_name, action, *args, **kwds):
325 self.parent.calls.append((self, fn_name, args, kwds))
326 if action is None:
327 return None
328 elif action == "return self":
329 return self
330 elif action == "return response":
331 res = MockResponse(200, "OK", {}, "")
332 return res
333 elif action == "return request":
334 return Request("http://blah/")
335 elif action.startswith("error"):
336 code = action[action.rfind(" ")+1:]
337 try:
338 code = int(code)
339 except ValueError:
340 pass
341 res = MockResponse(200, "OK", {}, "")
342 return self.parent.error("http", args[0], res, code, "", {})
343 elif action == "raise":
344 raise urllib2.URLError("blah")
345 assert False
346 def close(self): pass
347 def add_parent(self, parent):
348 self.parent = parent
349 self.parent.calls = []
350 def __lt__(self, other):
351 if not hasattr(other, "handler_order"):
352 # No handler_order, leave in original order. Yuck.
353 return True
354 return self.handler_order < other.handler_order
355
356def add_ordered_mock_handlers(opener, meth_spec):
357 """Create MockHandlers and add them to an OpenerDirector.
358
359 meth_spec: list of lists of tuples and strings defining methods to define
360 on handlers. eg:
361
362 [["http_error", "ftp_open"], ["http_open"]]
363
364 defines methods .http_error() and .ftp_open() on one handler, and
365 .http_open() on another. These methods just record their arguments and
366 return None. Using a tuple instead of a string causes the method to
367 perform some action (see MockHandler.handle()), eg:
368
369 [["http_error"], [("http_open", "return request")]]
370
371 defines .http_error() on one handler (which simply returns None), and
372 .http_open() on another handler, which returns a Request object.
373
374 """
375 handlers = []
376 count = 0
377 for meths in meth_spec:
378 class MockHandlerSubclass(MockHandler): pass
379 h = MockHandlerSubclass(meths)
Georg Brandl720096a2006-04-02 20:45:34 +0000380 h.handler_order += count
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000381 h.add_parent(opener)
382 count = count + 1
383 handlers.append(h)
384 opener.add_handler(h)
385 return handlers
386
Georg Brandlfa42bd72006-04-30 07:06:11 +0000387def build_test_opener(*handler_instances):
388 opener = OpenerDirector()
389 for h in handler_instances:
390 opener.add_handler(h)
391 return opener
392
393class MockHTTPHandler(urllib2.BaseHandler):
394 # useful for testing redirections and auth
395 # sends supplied headers and code as first response
396 # sends 200 OK as second response
397 def __init__(self, code, headers):
398 self.code = code
399 self.headers = headers
400 self.reset()
401 def reset(self):
402 self._count = 0
403 self.requests = []
404 def http_open(self, req):
405 import mimetools, httplib, copy
406 from StringIO import StringIO
407 self.requests.append(copy.deepcopy(req))
408 if self._count == 0:
409 self._count = self._count + 1
410 name = httplib.responses[self.code]
411 msg = mimetools.Message(StringIO(self.headers))
412 return self.parent.error(
413 "http", req, MockFile(), self.code, name, msg)
414 else:
415 self.req = req
416 msg = mimetools.Message(StringIO("\r\n\r\n"))
417 return MockResponse(200, "OK", msg, "", req.get_full_url())
418
Senthil Kumaran81163642009-12-20 06:32:46 +0000419class MockHTTPSHandler(urllib2.AbstractHTTPHandler):
420 # Useful for testing the Proxy-Authorization request by verifying the
421 # properties of httpcon
Benjamin Peterson17e1c122009-12-24 01:13:50 +0000422
423 def __init__(self):
424 urllib2.AbstractHTTPHandler.__init__(self)
425 self.httpconn = MockHTTPClass()
426
Senthil Kumaran81163642009-12-20 06:32:46 +0000427 def https_open(self, req):
428 return self.do_open(self.httpconn, req)
429
Georg Brandlfa42bd72006-04-30 07:06:11 +0000430class MockPasswordManager:
431 def add_password(self, realm, uri, user, password):
432 self.realm = realm
433 self.url = uri
434 self.user = user
435 self.password = password
436 def find_user_password(self, realm, authuri):
437 self.target_realm = realm
438 self.target_url = authuri
439 return self.user, self.password
440
441
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000442class OpenerDirectorTests(unittest.TestCase):
443
Georg Brandlf91149e2007-07-12 08:05:45 +0000444 def test_add_non_handler(self):
445 class NonHandler(object):
446 pass
447 self.assertRaises(TypeError,
448 OpenerDirector().add_handler, NonHandler())
449
Georg Brandl261e2512006-05-29 20:52:54 +0000450 def test_badly_named_methods(self):
451 # test work-around for three methods that accidentally follow the
452 # naming conventions for handler methods
453 # (*_open() / *_request() / *_response())
454
455 # These used to call the accidentally-named methods, causing a
456 # TypeError in real code; here, returning self from these mock
457 # methods would either cause no exception, or AttributeError.
458
459 from urllib2 import URLError
460
461 o = OpenerDirector()
462 meth_spec = [
463 [("do_open", "return self"), ("proxy_open", "return self")],
464 [("redirect_request", "return self")],
465 ]
466 handlers = add_ordered_mock_handlers(o, meth_spec)
467 o.add_handler(urllib2.UnknownHandler())
468 for scheme in "do", "proxy", "redirect":
469 self.assertRaises(URLError, o.open, scheme+"://example.com/")
470
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000471 def test_handled(self):
472 # handler returning non-None means no more handlers will be called
473 o = OpenerDirector()
474 meth_spec = [
475 ["http_open", "ftp_open", "http_error_302"],
476 ["ftp_open"],
477 [("http_open", "return self")],
478 [("http_open", "return self")],
479 ]
480 handlers = add_ordered_mock_handlers(o, meth_spec)
481
482 req = Request("http://example.com/")
483 r = o.open(req)
484 # Second .http_open() gets called, third doesn't, since second returned
485 # non-None. Handlers without .http_open() never get any methods called
486 # on them.
487 # In fact, second mock handler defining .http_open() returns self
488 # (instead of response), which becomes the OpenerDirector's return
489 # value.
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000490 self.assertEqual(r, handlers[2])
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000491 calls = [(handlers[0], "http_open"), (handlers[2], "http_open")]
492 for expected, got in zip(calls, o.calls):
493 handler, name, args, kwds = got
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000494 self.assertEqual((handler, name), expected)
495 self.assertEqual(args, (req,))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000496
497 def test_handler_order(self):
498 o = OpenerDirector()
499 handlers = []
500 for meths, handler_order in [
501 ([("http_open", "return self")], 500),
502 (["http_open"], 0),
503 ]:
504 class MockHandlerSubclass(MockHandler): pass
505 h = MockHandlerSubclass(meths)
506 h.handler_order = handler_order
507 handlers.append(h)
508 o.add_handler(h)
509
510 r = o.open("http://example.com/")
511 # handlers called in reverse order, thanks to their sort order
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000512 self.assertEqual(o.calls[0][0], handlers[1])
513 self.assertEqual(o.calls[1][0], handlers[0])
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000514
515 def test_raise(self):
516 # raising URLError stops processing of request
517 o = OpenerDirector()
518 meth_spec = [
519 [("http_open", "raise")],
520 [("http_open", "return self")],
521 ]
522 handlers = add_ordered_mock_handlers(o, meth_spec)
523
524 req = Request("http://example.com/")
525 self.assertRaises(urllib2.URLError, o.open, req)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000526 self.assertEqual(o.calls, [(handlers[0], "http_open", (req,), {})])
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000527
528## def test_error(self):
529## # XXX this doesn't actually seem to be used in standard library,
530## # but should really be tested anyway...
531
532 def test_http_error(self):
533 # XXX http_error_default
534 # http errors are a special case
535 o = OpenerDirector()
536 meth_spec = [
537 [("http_open", "error 302")],
538 [("http_error_400", "raise"), "http_open"],
539 [("http_error_302", "return response"), "http_error_303",
540 "http_error"],
541 [("http_error_302")],
542 ]
543 handlers = add_ordered_mock_handlers(o, meth_spec)
544
545 class Unknown:
546 def __eq__(self, other): return True
547
548 req = Request("http://example.com/")
549 r = o.open(req)
550 assert len(o.calls) == 2
551 calls = [(handlers[0], "http_open", (req,)),
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000552 (handlers[2], "http_error_302",
553 (req, Unknown(), 302, "", {}))]
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000554 for expected, got in zip(calls, o.calls):
555 handler, method_name, args = expected
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000556 self.assertEqual((handler, method_name), got[:2])
557 self.assertEqual(args, got[2])
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000558
559 def test_processors(self):
560 # *_request / *_response methods get called appropriately
561 o = OpenerDirector()
562 meth_spec = [
563 [("http_request", "return request"),
564 ("http_response", "return response")],
565 [("http_request", "return request"),
566 ("http_response", "return response")],
567 ]
568 handlers = add_ordered_mock_handlers(o, meth_spec)
569
570 req = Request("http://example.com/")
571 r = o.open(req)
572 # processor methods are called on *all* handlers that define them,
573 # not just the first handler that handles the request
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000574 calls = [
575 (handlers[0], "http_request"), (handlers[1], "http_request"),
576 (handlers[0], "http_response"), (handlers[1], "http_response")]
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000577
578 for i, (handler, name, args, kwds) in enumerate(o.calls):
579 if i < 2:
580 # *_request
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000581 self.assertEqual((handler, name), calls[i])
582 self.assertEqual(len(args), 1)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000583 self.assert_(isinstance(args[0], Request))
584 else:
585 # *_response
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000586 self.assertEqual((handler, name), calls[i])
587 self.assertEqual(len(args), 2)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000588 self.assert_(isinstance(args[0], Request))
589 # response from opener.open is None, because there's no
590 # handler that defines http_open to handle it
591 self.assert_(args[1] is None or
592 isinstance(args[1], MockResponse))
593
594
Tim Peters58eb11c2004-01-18 20:29:55 +0000595def sanepathname2url(path):
596 import urllib
597 urlpath = urllib.pathname2url(path)
598 if os.name == "nt" and urlpath.startswith("///"):
599 urlpath = urlpath[2:]
600 # XXX don't ask me about the mac...
601 return urlpath
602
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000603class HandlerTests(unittest.TestCase):
604
605 def test_ftp(self):
606 class MockFTPWrapper:
607 def __init__(self, data): self.data = data
608 def retrfile(self, filename, filetype):
609 self.filename, self.filetype = filename, filetype
610 return StringIO.StringIO(self.data), len(self.data)
611
612 class NullFTPHandler(urllib2.FTPHandler):
613 def __init__(self, data): self.data = data
Facundo Batista4f1b1ed2008-05-29 16:39:26 +0000614 def connect_ftp(self, user, passwd, host, port, dirs,
615 timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000616 self.user, self.passwd = user, passwd
617 self.host, self.port = host, port
618 self.dirs = dirs
619 self.ftpwrapper = MockFTPWrapper(self.data)
620 return self.ftpwrapper
621
Facundo Batista4f1b1ed2008-05-29 16:39:26 +0000622 import ftplib
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000623 data = "rheum rhaponicum"
624 h = NullFTPHandler(data)
625 o = h.parent = MockOpener()
626
627 for url, host, port, type_, dirs, filename, mimetype in [
628 ("ftp://localhost/foo/bar/baz.html",
629 "localhost", ftplib.FTP_PORT, "I",
630 ["foo", "bar"], "baz.html", "text/html"),
Kurt B. Kaiser3f7cb5d2004-07-11 17:14:13 +0000631 ("ftp://localhost:80/foo/bar/",
632 "localhost", 80, "D",
633 ["foo", "bar"], "", None),
634 ("ftp://localhost/baz.gif;type=a",
635 "localhost", ftplib.FTP_PORT, "A",
636 [], "baz.gif", None), # XXX really this should guess image/gif
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000637 ]:
Facundo Batista10951d52007-06-06 17:15:23 +0000638 req = Request(url)
639 req.timeout = None
640 r = h.ftp_open(req)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000641 # ftp authentication not yet implemented by FTPHandler
642 self.assert_(h.user == h.passwd == "")
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000643 self.assertEqual(h.host, socket.gethostbyname(host))
644 self.assertEqual(h.port, port)
645 self.assertEqual(h.dirs, dirs)
646 self.assertEqual(h.ftpwrapper.filename, filename)
647 self.assertEqual(h.ftpwrapper.filetype, type_)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000648 headers = r.info()
Kurt B. Kaiser3f7cb5d2004-07-11 17:14:13 +0000649 self.assertEqual(headers.get("Content-type"), mimetype)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000650 self.assertEqual(int(headers["Content-length"]), len(data))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000651
652 def test_file(self):
Christian Heimesc5f05e42008-02-23 17:40:11 +0000653 import rfc822, socket
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000654 h = urllib2.FileHandler()
655 o = h.parent = MockOpener()
656
Tim Peters58eb11c2004-01-18 20:29:55 +0000657 TESTFN = test_support.TESTFN
658 urlpath = sanepathname2url(os.path.abspath(TESTFN))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000659 towrite = "hello, world\n"
Georg Brandldd2245f2006-03-31 17:18:06 +0000660 urls = [
Tim Peters58eb11c2004-01-18 20:29:55 +0000661 "file://localhost%s" % urlpath,
662 "file://%s" % urlpath,
663 "file://%s%s" % (socket.gethostbyname('localhost'), urlpath),
Georg Brandldd2245f2006-03-31 17:18:06 +0000664 ]
665 try:
Tim Peters480725d2006-04-03 02:46:44 +0000666 localaddr = socket.gethostbyname(socket.gethostname())
Georg Brandldd2245f2006-03-31 17:18:06 +0000667 except socket.gaierror:
668 localaddr = ''
669 if localaddr:
670 urls.append("file://%s%s" % (localaddr, urlpath))
Tim Peters480725d2006-04-03 02:46:44 +0000671
Georg Brandldd2245f2006-03-31 17:18:06 +0000672 for url in urls:
Tim Peters58eb11c2004-01-18 20:29:55 +0000673 f = open(TESTFN, "wb")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000674 try:
675 try:
676 f.write(towrite)
677 finally:
678 f.close()
679
680 r = h.file_open(Request(url))
681 try:
682 data = r.read()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000683 headers = r.info()
Senthil Kumaran2add1842010-05-08 03:14:33 +0000684 respurl = r.geturl()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000685 finally:
686 r.close()
Tim Peters58eb11c2004-01-18 20:29:55 +0000687 stats = os.stat(TESTFN)
688 modified = rfc822.formatdate(stats.st_mtime)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000689 finally:
690 os.remove(TESTFN)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000691 self.assertEqual(data, towrite)
692 self.assertEqual(headers["Content-type"], "text/plain")
693 self.assertEqual(headers["Content-length"], "13")
Tim Peters58eb11c2004-01-18 20:29:55 +0000694 self.assertEqual(headers["Last-modified"], modified)
Senthil Kumaran2add1842010-05-08 03:14:33 +0000695 self.assertEqual(respurl, url)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000696
697 for url in [
Tim Peters58eb11c2004-01-18 20:29:55 +0000698 "file://localhost:80%s" % urlpath,
Georg Brandlceede5c2007-03-13 08:14:27 +0000699 "file:///file_does_not_exist.txt",
700 "file://%s:80%s/%s" % (socket.gethostbyname('localhost'),
701 os.getcwd(), TESTFN),
702 "file://somerandomhost.ontheinternet.com%s/%s" %
703 (os.getcwd(), TESTFN),
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000704 ]:
705 try:
Tim Peters58eb11c2004-01-18 20:29:55 +0000706 f = open(TESTFN, "wb")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000707 try:
708 f.write(towrite)
709 finally:
710 f.close()
711
712 self.assertRaises(urllib2.URLError,
713 h.file_open, Request(url))
714 finally:
715 os.remove(TESTFN)
716
717 h = urllib2.FileHandler()
718 o = h.parent = MockOpener()
719 # XXXX why does // mean ftp (and /// mean not ftp!), and where
720 # is file: scheme specified? I think this is really a bug, and
721 # what was intended was to distinguish between URLs like:
722 # file:/blah.txt (a file)
723 # file://localhost/blah.txt (a file)
724 # file:///blah.txt (a file)
725 # file://ftp.example.com/blah.txt (an ftp URL)
726 for url, ftp in [
727 ("file://ftp.example.com//foo.txt", True),
728 ("file://ftp.example.com///foo.txt", False),
729# XXXX bug: fails with OSError, should be URLError
730 ("file://ftp.example.com/foo.txt", False),
Senthil Kumaran9d3c5c82010-07-11 03:33:38 +0000731 ("file://somehost//foo/something.txt", True),
732 ("file://localhost//foo/something.txt", False),
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000733 ]:
734 req = Request(url)
735 try:
736 h.file_open(req)
737 # XXXX remove OSError when bug fixed
738 except (urllib2.URLError, OSError):
739 self.assert_(not ftp)
740 else:
741 self.assert_(o.req is req)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000742 self.assertEqual(req.type, "ftp")
Senthil Kumaran9d3c5c82010-07-11 03:33:38 +0000743 self.assertEqual(req.type is "ftp", ftp)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000744
745 def test_http(self):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000746
747 h = urllib2.AbstractHTTPHandler()
748 o = h.parent = MockOpener()
749
750 url = "http://example.com/"
751 for method, data in [("GET", None), ("POST", "blah")]:
752 req = Request(url, data, {"Foo": "bar"})
Facundo Batista10951d52007-06-06 17:15:23 +0000753 req.timeout = None
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000754 req.add_unredirected_header("Spam", "eggs")
755 http = MockHTTPClass()
756 r = h.do_open(http, req)
757
758 # result attributes
759 r.read; r.readline # wrapped MockFile methods
760 r.info; r.geturl # addinfourl methods
761 r.code, r.msg == 200, "OK" # added from MockHTTPClass.getreply()
762 hdrs = r.info()
763 hdrs.get; hdrs.has_key # r.info() gives dict from .getreply()
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000764 self.assertEqual(r.geturl(), url)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000765
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000766 self.assertEqual(http.host, "example.com")
767 self.assertEqual(http.level, 0)
768 self.assertEqual(http.method, method)
769 self.assertEqual(http.selector, "/")
770 self.assertEqual(http.req_headers,
Jeremy Hyltonb3ee6f92004-02-24 19:40:35 +0000771 [("Connection", "close"),
772 ("Foo", "bar"), ("Spam", "eggs")])
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000773 self.assertEqual(http.data, data)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000774
775 # check socket.error converted to URLError
776 http.raise_on_endheaders = True
777 self.assertRaises(urllib2.URLError, h.do_open, http, req)
778
779 # check adding of standard headers
780 o.addheaders = [("Spam", "eggs")]
781 for data in "", None: # POST, GET
782 req = Request("http://example.com/", data)
783 r = MockResponse(200, "OK", {}, "")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000784 newreq = h.do_request_(req)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000785 if data is None: # GET
Georg Brandl8c036cc2006-08-20 13:15:39 +0000786 self.assert_("Content-length" not in req.unredirected_hdrs)
787 self.assert_("Content-type" not in req.unredirected_hdrs)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000788 else: # POST
Georg Brandl8c036cc2006-08-20 13:15:39 +0000789 self.assertEqual(req.unredirected_hdrs["Content-length"], "0")
790 self.assertEqual(req.unredirected_hdrs["Content-type"],
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000791 "application/x-www-form-urlencoded")
792 # XXX the details of Host could be better tested
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000793 self.assertEqual(req.unredirected_hdrs["Host"], "example.com")
794 self.assertEqual(req.unredirected_hdrs["Spam"], "eggs")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000795
796 # don't clobber existing headers
797 req.add_unredirected_header("Content-length", "foo")
798 req.add_unredirected_header("Content-type", "bar")
799 req.add_unredirected_header("Host", "baz")
800 req.add_unredirected_header("Spam", "foo")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000801 newreq = h.do_request_(req)
Georg Brandl8c036cc2006-08-20 13:15:39 +0000802 self.assertEqual(req.unredirected_hdrs["Content-length"], "foo")
803 self.assertEqual(req.unredirected_hdrs["Content-type"], "bar")
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000804 self.assertEqual(req.unredirected_hdrs["Host"], "baz")
805 self.assertEqual(req.unredirected_hdrs["Spam"], "foo")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000806
Facundo Batistaeb90b782008-08-16 14:44:07 +0000807 def test_http_doubleslash(self):
808 # Checks that the presence of an unnecessary double slash in a url doesn't break anything
809 # Previously, a double slash directly after the host could cause incorrect parsing of the url
810 h = urllib2.AbstractHTTPHandler()
811 o = h.parent = MockOpener()
812
813 data = ""
814 ds_urls = [
815 "http://example.com/foo/bar/baz.html",
816 "http://example.com//foo/bar/baz.html",
817 "http://example.com/foo//bar/baz.html",
818 "http://example.com/foo/bar//baz.html",
819 ]
820
821 for ds_url in ds_urls:
822 ds_req = Request(ds_url, data)
823
824 # Check whether host is determined correctly if there is no proxy
825 np_ds_req = h.do_request_(ds_req)
826 self.assertEqual(np_ds_req.unredirected_hdrs["Host"],"example.com")
827
828 # Check whether host is determined correctly if there is a proxy
829 ds_req.set_proxy("someproxy:3128",None)
830 p_ds_req = h.do_request_(ds_req)
831 self.assertEqual(p_ds_req.unredirected_hdrs["Host"],"example.com")
832
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000833 def test_errors(self):
834 h = urllib2.HTTPErrorProcessor()
835 o = h.parent = MockOpener()
836
837 url = "http://example.com/"
838 req = Request(url)
Facundo Batista9fab9f12007-04-23 17:08:31 +0000839 # all 2xx are passed through
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000840 r = MockResponse(200, "OK", {}, "", url)
841 newr = h.http_response(req, r)
842 self.assert_(r is newr)
843 self.assert_(not hasattr(o, "proto")) # o.error not called
Facundo Batista9fab9f12007-04-23 17:08:31 +0000844 r = MockResponse(202, "Accepted", {}, "", url)
845 newr = h.http_response(req, r)
846 self.assert_(r is newr)
847 self.assert_(not hasattr(o, "proto")) # o.error not called
848 r = MockResponse(206, "Partial content", {}, "", url)
849 newr = h.http_response(req, r)
850 self.assert_(r is newr)
851 self.assert_(not hasattr(o, "proto")) # o.error not called
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000852 # anything else calls o.error (and MockOpener returns None, here)
Facundo Batista9fab9f12007-04-23 17:08:31 +0000853 r = MockResponse(502, "Bad gateway", {}, "", url)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000854 self.assert_(h.http_response(req, r) is None)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000855 self.assertEqual(o.proto, "http") # o.error called
Facundo Batista9fab9f12007-04-23 17:08:31 +0000856 self.assertEqual(o.args, (req, r, 502, "Bad gateway", {}))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000857
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000858 def test_cookies(self):
859 cj = MockCookieJar()
860 h = urllib2.HTTPCookieProcessor(cj)
861 o = h.parent = MockOpener()
862
863 req = Request("http://example.com/")
864 r = MockResponse(200, "OK", {}, "")
865 newreq = h.http_request(req)
866 self.assert_(cj.ach_req is req is newreq)
867 self.assertEquals(req.get_origin_req_host(), "example.com")
868 self.assert_(not req.is_unverifiable())
869 newr = h.http_response(req, r)
870 self.assert_(cj.ec_req is req)
871 self.assert_(cj.ec_r is r is newr)
872
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000873 def test_redirect(self):
874 from_url = "http://example.com/a.html"
875 to_url = "http://example.com/b.html"
876 h = urllib2.HTTPRedirectHandler()
877 o = h.parent = MockOpener()
878
879 # ordinary redirect behaviour
880 for code in 301, 302, 303, 307:
881 for data in None, "blah\nblah\n":
882 method = getattr(h, "http_error_%s" % code)
883 req = Request(from_url, data)
884 req.add_header("Nonsense", "viking=withhold")
Senthil Kumaran428e9d12009-07-19 04:20:46 +0000885 req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
Facundo Batista86371d62008-02-07 19:06:52 +0000886 if data is not None:
887 req.add_header("Content-Length", str(len(data)))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000888 req.add_unredirected_header("Spam", "spam")
889 try:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000890 method(req, MockFile(), code, "Blah",
891 MockHeaders({"location": to_url}))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000892 except urllib2.HTTPError:
893 # 307 in response to POST requires user OK
894 self.assert_(code == 307 and data is not None)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000895 self.assertEqual(o.req.get_full_url(), to_url)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000896 try:
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000897 self.assertEqual(o.req.get_method(), "GET")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000898 except AttributeError:
899 self.assert_(not o.req.has_data())
Facundo Batista86371d62008-02-07 19:06:52 +0000900
901 # now it's a GET, there should not be headers regarding content
902 # (possibly dragged from before being a POST)
903 headers = [x.lower() for x in o.req.headers]
904 self.assertTrue("content-length" not in headers)
905 self.assertTrue("content-type" not in headers)
906
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000907 self.assertEqual(o.req.headers["Nonsense"],
908 "viking=withhold")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000909 self.assert_("Spam" not in o.req.headers)
910 self.assert_("Spam" not in o.req.unredirected_hdrs)
911
912 # loop detection
913 req = Request(from_url)
Senthil Kumaran428e9d12009-07-19 04:20:46 +0000914 req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000915 def redirect(h, req, url=to_url):
916 h.http_error_302(req, MockFile(), 302, "Blah",
917 MockHeaders({"location": url}))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000918 # Note that the *original* request shares the same record of
919 # redirections with the sub-requests caused by the redirections.
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000920
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000921 # detect infinite loop redirect of a URL to itself
922 req = Request(from_url, origin_req_host="example.com")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000923 count = 0
Senthil Kumaran428e9d12009-07-19 04:20:46 +0000924 req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000925 try:
926 while 1:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000927 redirect(h, req, "http://example.com/")
928 count = count + 1
929 except urllib2.HTTPError:
930 # don't stop until max_repeats, because cookies may introduce state
931 self.assertEqual(count, urllib2.HTTPRedirectHandler.max_repeats)
932
933 # detect endless non-repeating chain of redirects
934 req = Request(from_url, origin_req_host="example.com")
935 count = 0
Senthil Kumaran428e9d12009-07-19 04:20:46 +0000936 req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000937 try:
938 while 1:
939 redirect(h, req, "http://example.com/%d" % count)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000940 count = count + 1
941 except urllib2.HTTPError:
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000942 self.assertEqual(count,
943 urllib2.HTTPRedirectHandler.max_redirections)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000944
guido@google.comf1509302011-03-28 13:47:01 -0700945 def test_invalid_redirect(self):
946 from_url = "http://example.com/a.html"
947 valid_schemes = ['http', 'https', 'ftp']
948 invalid_schemes = ['file', 'imap', 'ldap']
949 schemeless_url = "example.com/b.html"
950 h = urllib2.HTTPRedirectHandler()
951 o = h.parent = MockOpener()
952 req = Request(from_url)
guido@google.com9a9fdfa2011-03-29 10:48:23 -0700953 req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
guido@google.comf1509302011-03-28 13:47:01 -0700954
955 for scheme in invalid_schemes:
956 invalid_url = scheme + '://' + schemeless_url
957 self.assertRaises(urllib2.HTTPError, h.http_error_302,
958 req, MockFile(), 302, "Security Loophole",
959 MockHeaders({"location": invalid_url}))
960
961 for scheme in valid_schemes:
962 valid_url = scheme + '://' + schemeless_url
963 h.http_error_302(req, MockFile(), 302, "That's fine",
964 MockHeaders({"location": valid_url}))
965 self.assertEqual(o.req.get_full_url(), valid_url)
966
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000967 def test_cookie_redirect(self):
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000968 # cookies shouldn't leak into redirected requests
969 from cookielib import CookieJar
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000970
Neal Norwitzb902f4e2006-04-03 04:45:34 +0000971 from test.test_cookielib import interact_netscape
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000972
973 cj = CookieJar()
974 interact_netscape(cj, "http://www.example.com/", "spam=eggs")
Georg Brandlfa42bd72006-04-30 07:06:11 +0000975 hh = MockHTTPHandler(302, "Location: http://www.cracker.com/\r\n\r\n")
976 hdeh = urllib2.HTTPDefaultErrorHandler()
977 hrh = urllib2.HTTPRedirectHandler()
978 cp = urllib2.HTTPCookieProcessor(cj)
979 o = build_test_opener(hh, hdeh, hrh, cp)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000980 o.open("http://www.example.com/")
981 self.assert_(not hh.req.has_header("Cookie"))
982
Georg Brandl720096a2006-04-02 20:45:34 +0000983 def test_proxy(self):
984 o = OpenerDirector()
985 ph = urllib2.ProxyHandler(dict(http="proxy.example.com:3128"))
986 o.add_handler(ph)
987 meth_spec = [
988 [("http_open", "return response")]
989 ]
990 handlers = add_ordered_mock_handlers(o, meth_spec)
991
992 req = Request("http://acme.example.com/")
993 self.assertEqual(req.get_host(), "acme.example.com")
994 r = o.open(req)
995 self.assertEqual(req.get_host(), "proxy.example.com:3128")
996
997 self.assertEqual([(handlers[0], "http_open")],
998 [tup[0:2] for tup in o.calls])
999
Senthil Kumarandff20282009-10-11 05:35:44 +00001000 def test_proxy_no_proxy(self):
1001 os.environ['no_proxy'] = 'python.org'
1002 o = OpenerDirector()
1003 ph = urllib2.ProxyHandler(dict(http="proxy.example.com"))
1004 o.add_handler(ph)
1005 req = Request("http://www.perl.org/")
1006 self.assertEqual(req.get_host(), "www.perl.org")
1007 r = o.open(req)
1008 self.assertEqual(req.get_host(), "proxy.example.com")
1009 req = Request("http://www.python.org")
1010 self.assertEqual(req.get_host(), "www.python.org")
1011 r = o.open(req)
1012 self.assertEqual(req.get_host(), "www.python.org")
1013 del os.environ['no_proxy']
1014
1015
Senthil Kumaran308681c2009-07-26 12:36:08 +00001016 def test_proxy_https(self):
1017 o = OpenerDirector()
1018 ph = urllib2.ProxyHandler(dict(https='proxy.example.com:3128'))
1019 o.add_handler(ph)
1020 meth_spec = [
1021 [("https_open","return response")]
1022 ]
1023 handlers = add_ordered_mock_handlers(o, meth_spec)
1024 req = Request("https://www.example.com/")
1025 self.assertEqual(req.get_host(), "www.example.com")
1026 r = o.open(req)
1027 self.assertEqual(req.get_host(), "proxy.example.com:3128")
1028 self.assertEqual([(handlers[0], "https_open")],
1029 [tup[0:2] for tup in o.calls])
1030
Senthil Kumaran81163642009-12-20 06:32:46 +00001031 def test_proxy_https_proxy_authorization(self):
1032 o = OpenerDirector()
1033 ph = urllib2.ProxyHandler(dict(https='proxy.example.com:3128'))
1034 o.add_handler(ph)
1035 https_handler = MockHTTPSHandler()
1036 o.add_handler(https_handler)
1037 req = Request("https://www.example.com/")
1038 req.add_header("Proxy-Authorization","FooBar")
1039 req.add_header("User-Agent","Grail")
1040 self.assertEqual(req.get_host(), "www.example.com")
1041 self.assertTrue(req._tunnel_host is None)
1042 r = o.open(req)
1043 # Verify Proxy-Authorization gets tunneled to request.
1044 # httpsconn req_headers do not have the Proxy-Authorization header but
1045 # the req will have.
1046 self.assertFalse(("Proxy-Authorization","FooBar") in
1047 https_handler.httpconn.req_headers)
1048 self.assertTrue(("User-Agent","Grail") in
1049 https_handler.httpconn.req_headers)
1050 self.assertFalse(req._tunnel_host is None)
1051 self.assertEqual(req.get_host(), "proxy.example.com:3128")
1052 self.assertEqual(req.get_header("Proxy-authorization"),"FooBar")
1053
Georg Brandl33124322008-03-21 19:54:00 +00001054 def test_basic_auth(self, quote_char='"'):
Georg Brandlfa42bd72006-04-30 07:06:11 +00001055 opener = OpenerDirector()
1056 password_manager = MockPasswordManager()
1057 auth_handler = urllib2.HTTPBasicAuthHandler(password_manager)
1058 realm = "ACME Widget Store"
1059 http_handler = MockHTTPHandler(
Georg Brandl33124322008-03-21 19:54:00 +00001060 401, 'WWW-Authenticate: Basic realm=%s%s%s\r\n\r\n' %
1061 (quote_char, realm, quote_char) )
Georg Brandl261e2512006-05-29 20:52:54 +00001062 opener.add_handler(auth_handler)
1063 opener.add_handler(http_handler)
Georg Brandlfa42bd72006-04-30 07:06:11 +00001064 self._test_basic_auth(opener, auth_handler, "Authorization",
1065 realm, http_handler, password_manager,
1066 "http://acme.example.com/protected",
1067 "http://acme.example.com/protected",
1068 )
1069
Georg Brandl33124322008-03-21 19:54:00 +00001070 def test_basic_auth_with_single_quoted_realm(self):
1071 self.test_basic_auth(quote_char="'")
1072
Georg Brandlfa42bd72006-04-30 07:06:11 +00001073 def test_proxy_basic_auth(self):
1074 opener = OpenerDirector()
1075 ph = urllib2.ProxyHandler(dict(http="proxy.example.com:3128"))
1076 opener.add_handler(ph)
1077 password_manager = MockPasswordManager()
1078 auth_handler = urllib2.ProxyBasicAuthHandler(password_manager)
1079 realm = "ACME Networks"
1080 http_handler = MockHTTPHandler(
1081 407, 'Proxy-Authenticate: Basic realm="%s"\r\n\r\n' % realm)
Georg Brandl261e2512006-05-29 20:52:54 +00001082 opener.add_handler(auth_handler)
1083 opener.add_handler(http_handler)
Georg Brandl8c036cc2006-08-20 13:15:39 +00001084 self._test_basic_auth(opener, auth_handler, "Proxy-authorization",
Georg Brandlfa42bd72006-04-30 07:06:11 +00001085 realm, http_handler, password_manager,
1086 "http://acme.example.com:3128/protected",
1087 "proxy.example.com:3128",
1088 )
1089
Georg Brandlb5f2e5c2006-05-08 17:36:08 +00001090 def test_basic_and_digest_auth_handlers(self):
1091 # HTTPDigestAuthHandler threw an exception if it couldn't handle a 40*
1092 # response (http://python.org/sf/1479302), where it should instead
1093 # return None to allow another handler (especially
1094 # HTTPBasicAuthHandler) to handle the response.
Georg Brandl261e2512006-05-29 20:52:54 +00001095
1096 # Also (http://python.org/sf/14797027, RFC 2617 section 1.2), we must
1097 # try digest first (since it's the strongest auth scheme), so we record
1098 # order of calls here to check digest comes first:
1099 class RecordingOpenerDirector(OpenerDirector):
1100 def __init__(self):
1101 OpenerDirector.__init__(self)
1102 self.recorded = []
1103 def record(self, info):
1104 self.recorded.append(info)
Georg Brandlb5f2e5c2006-05-08 17:36:08 +00001105 class TestDigestAuthHandler(urllib2.HTTPDigestAuthHandler):
Georg Brandl261e2512006-05-29 20:52:54 +00001106 def http_error_401(self, *args, **kwds):
1107 self.parent.record("digest")
1108 urllib2.HTTPDigestAuthHandler.http_error_401(self,
1109 *args, **kwds)
1110 class TestBasicAuthHandler(urllib2.HTTPBasicAuthHandler):
1111 def http_error_401(self, *args, **kwds):
1112 self.parent.record("basic")
1113 urllib2.HTTPBasicAuthHandler.http_error_401(self,
1114 *args, **kwds)
1115
1116 opener = RecordingOpenerDirector()
Georg Brandlb5f2e5c2006-05-08 17:36:08 +00001117 password_manager = MockPasswordManager()
1118 digest_handler = TestDigestAuthHandler(password_manager)
Georg Brandl261e2512006-05-29 20:52:54 +00001119 basic_handler = TestBasicAuthHandler(password_manager)
Georg Brandlb5f2e5c2006-05-08 17:36:08 +00001120 realm = "ACME Networks"
1121 http_handler = MockHTTPHandler(
1122 401, 'WWW-Authenticate: Basic realm="%s"\r\n\r\n' % realm)
Georg Brandl261e2512006-05-29 20:52:54 +00001123 opener.add_handler(basic_handler)
1124 opener.add_handler(digest_handler)
1125 opener.add_handler(http_handler)
1126
1127 # check basic auth isn't blocked by digest handler failing
Georg Brandlb5f2e5c2006-05-08 17:36:08 +00001128 self._test_basic_auth(opener, basic_handler, "Authorization",
1129 realm, http_handler, password_manager,
1130 "http://acme.example.com/protected",
1131 "http://acme.example.com/protected",
1132 )
Georg Brandl261e2512006-05-29 20:52:54 +00001133 # check digest was tried before basic (twice, because
1134 # _test_basic_auth called .open() twice)
1135 self.assertEqual(opener.recorded, ["digest", "basic"]*2)
Georg Brandlb5f2e5c2006-05-08 17:36:08 +00001136
Georg Brandlfa42bd72006-04-30 07:06:11 +00001137 def _test_basic_auth(self, opener, auth_handler, auth_header,
1138 realm, http_handler, password_manager,
1139 request_url, protected_url):
Christian Heimesc5f05e42008-02-23 17:40:11 +00001140 import base64
Georg Brandlfa42bd72006-04-30 07:06:11 +00001141 user, password = "wile", "coyote"
Georg Brandlfa42bd72006-04-30 07:06:11 +00001142
1143 # .add_password() fed through to password manager
1144 auth_handler.add_password(realm, request_url, user, password)
1145 self.assertEqual(realm, password_manager.realm)
1146 self.assertEqual(request_url, password_manager.url)
1147 self.assertEqual(user, password_manager.user)
1148 self.assertEqual(password, password_manager.password)
1149
1150 r = opener.open(request_url)
1151
1152 # should have asked the password manager for the username/password
1153 self.assertEqual(password_manager.target_realm, realm)
1154 self.assertEqual(password_manager.target_url, protected_url)
1155
1156 # expect one request without authorization, then one with
1157 self.assertEqual(len(http_handler.requests), 2)
1158 self.assertFalse(http_handler.requests[0].has_header(auth_header))
1159 userpass = '%s:%s' % (user, password)
1160 auth_hdr_value = 'Basic '+base64.encodestring(userpass).strip()
1161 self.assertEqual(http_handler.requests[1].get_header(auth_header),
1162 auth_hdr_value)
Senthil Kumarane3c651a2010-02-24 16:49:45 +00001163 self.assertEqual(http_handler.requests[1].unredirected_hdrs[auth_header],
1164 auth_hdr_value)
Georg Brandlfa42bd72006-04-30 07:06:11 +00001165 # if the password manager can't find a password, the handler won't
1166 # handle the HTTP auth error
1167 password_manager.user = password_manager.password = None
1168 http_handler.reset()
1169 r = opener.open(request_url)
1170 self.assertEqual(len(http_handler.requests), 1)
1171 self.assertFalse(http_handler.requests[0].has_header(auth_header))
1172
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001173class MiscTests(unittest.TestCase):
1174
1175 def test_build_opener(self):
1176 class MyHTTPHandler(urllib2.HTTPHandler): pass
1177 class FooHandler(urllib2.BaseHandler):
1178 def foo_open(self): pass
1179 class BarHandler(urllib2.BaseHandler):
1180 def bar_open(self): pass
1181
1182 build_opener = urllib2.build_opener
1183
1184 o = build_opener(FooHandler, BarHandler)
1185 self.opener_has_handler(o, FooHandler)
1186 self.opener_has_handler(o, BarHandler)
1187
1188 # can take a mix of classes and instances
1189 o = build_opener(FooHandler, BarHandler())
1190 self.opener_has_handler(o, FooHandler)
1191 self.opener_has_handler(o, BarHandler)
1192
1193 # subclasses of default handlers override default handlers
1194 o = build_opener(MyHTTPHandler)
1195 self.opener_has_handler(o, MyHTTPHandler)
1196
1197 # a particular case of overriding: default handlers can be passed
1198 # in explicitly
1199 o = build_opener()
1200 self.opener_has_handler(o, urllib2.HTTPHandler)
1201 o = build_opener(urllib2.HTTPHandler)
1202 self.opener_has_handler(o, urllib2.HTTPHandler)
1203 o = build_opener(urllib2.HTTPHandler())
1204 self.opener_has_handler(o, urllib2.HTTPHandler)
1205
Amaury Forgeot d'Arc96865852008-04-22 21:14:41 +00001206 # Issue2670: multiple handlers sharing the same base class
1207 class MyOtherHTTPHandler(urllib2.HTTPHandler): pass
1208 o = build_opener(MyHTTPHandler, MyOtherHTTPHandler)
1209 self.opener_has_handler(o, MyHTTPHandler)
1210 self.opener_has_handler(o, MyOtherHTTPHandler)
1211
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001212 def opener_has_handler(self, opener, handler_class):
1213 for h in opener.handlers:
1214 if h.__class__ == handler_class:
1215 break
1216 else:
1217 self.assert_(False)
1218
Benjamin Petersonc6e80eb2008-12-21 17:01:26 +00001219class RequestTests(unittest.TestCase):
1220
1221 def setUp(self):
1222 self.get = urllib2.Request("http://www.python.org/~jeremy/")
1223 self.post = urllib2.Request("http://www.python.org/~jeremy/",
1224 "data",
1225 headers={"X-Test": "test"})
1226
1227 def test_method(self):
1228 self.assertEqual("POST", self.post.get_method())
1229 self.assertEqual("GET", self.get.get_method())
1230
1231 def test_add_data(self):
1232 self.assert_(not self.get.has_data())
1233 self.assertEqual("GET", self.get.get_method())
1234 self.get.add_data("spam")
1235 self.assert_(self.get.has_data())
1236 self.assertEqual("POST", self.get.get_method())
1237
1238 def test_get_full_url(self):
1239 self.assertEqual("http://www.python.org/~jeremy/",
1240 self.get.get_full_url())
1241
1242 def test_selector(self):
1243 self.assertEqual("/~jeremy/", self.get.get_selector())
1244 req = urllib2.Request("http://www.python.org/")
1245 self.assertEqual("/", req.get_selector())
1246
1247 def test_get_type(self):
1248 self.assertEqual("http", self.get.get_type())
1249
1250 def test_get_host(self):
1251 self.assertEqual("www.python.org", self.get.get_host())
1252
1253 def test_get_host_unquote(self):
1254 req = urllib2.Request("http://www.%70ython.org/")
1255 self.assertEqual("www.python.org", req.get_host())
1256
1257 def test_proxy(self):
1258 self.assert_(not self.get.has_proxy())
1259 self.get.set_proxy("www.perl.org", "http")
1260 self.assert_(self.get.has_proxy())
1261 self.assertEqual("www.python.org", self.get.get_origin_req_host())
1262 self.assertEqual("www.perl.org", self.get.get_host())
1263
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001264
1265def test_main(verbose=None):
Georg Brandlfa42bd72006-04-30 07:06:11 +00001266 from test import test_urllib2
1267 test_support.run_doctest(test_urllib2, verbose)
Georg Brandl720096a2006-04-02 20:45:34 +00001268 test_support.run_doctest(urllib2, verbose)
Andrew M. Kuchlingbd3200f2004-06-29 13:15:46 +00001269 tests = (TrivialTests,
1270 OpenerDirectorTests,
1271 HandlerTests,
Benjamin Petersonc6e80eb2008-12-21 17:01:26 +00001272 MiscTests,
1273 RequestTests)
Andrew M. Kuchlingbd3200f2004-06-29 13:15:46 +00001274 test_support.run_unittest(*tests)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001275
1276if __name__ == "__main__":
1277 test_main(verbose=True)