blob: 01c214a252dc4cd1674ab1fe94f709b2948a3905 [file] [log] [blame]
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001import unittest
2from test import test_support
3
Christian Heimesc5f05e42008-02-23 17:40:11 +00004import os
Facundo Batista4f1b1ed2008-05-29 16:39:26 +00005import socket
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00006import StringIO
Jeremy Hyltone3e61042001-05-09 15:50:25 +00007
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00008import urllib2
9from urllib2 import Request, OpenerDirector
Jeremy Hyltone3e61042001-05-09 15:50:25 +000010
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000011# XXX
12# Request
13# CacheFTPHandler (hard to write)
Georg Brandlfa42bd72006-04-30 07:06:11 +000014# parse_keqv_list, parse_http_list, HTTPDigestAuthHandler
Jeremy Hyltone3e61042001-05-09 15:50:25 +000015
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000016class TrivialTests(unittest.TestCase):
17 def test_trivial(self):
18 # A couple trivial tests
Guido van Rossume2ae77b2001-10-24 20:42:55 +000019
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000020 self.assertRaises(ValueError, urllib2.urlopen, 'bogus url')
Tim Peters861adac2001-07-16 20:49:49 +000021
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000022 # XXX Name hacking to get this to work on Windows.
23 fname = os.path.abspath(urllib2.__file__).replace('\\', '/')
24 if fname[1:2] == ":":
25 fname = fname[2:]
26 # And more hacking to get it to work on MacOS. This assumes
27 # urllib.pathname2url works, unfortunately...
28 if os.name == 'mac':
29 fname = '/' + fname.replace(':', '/')
30 elif os.name == 'riscos':
31 import string
32 fname = os.expand(fname)
33 fname = fname.translate(string.maketrans("/.", "./"))
34
35 file_url = "file://%s" % fname
36 f = urllib2.urlopen(file_url)
37
38 buf = f.read()
39 f.close()
Tim Petersf5f32b42005-07-17 23:16:17 +000040
Georg Brandle1b13d22005-08-24 22:20:32 +000041 def test_parse_http_list(self):
42 tests = [('a,b,c', ['a', 'b', 'c']),
43 ('path"o,l"og"i"cal, example', ['path"o,l"og"i"cal', 'example']),
44 ('a, b, "c", "d", "e,f", g, h', ['a', 'b', '"c"', '"d"', '"e,f"', 'g', 'h']),
45 ('a="b\\"c", d="e\\,f", g="h\\\\i"', ['a="b"c"', 'd="e,f"', 'g="h\\i"'])]
46 for string, list in tests:
47 self.assertEquals(urllib2.parse_http_list(string), list)
48
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000049
Georg Brandl8c036cc2006-08-20 13:15:39 +000050def test_request_headers_dict():
51 """
52 The Request.headers dictionary is not a documented interface. It should
53 stay that way, because the complete set of headers are only accessible
54 through the .get_header(), .has_header(), .header_items() interface.
55 However, .headers pre-dates those methods, and so real code will be using
56 the dictionary.
57
58 The introduction in 2.4 of those methods was a mistake for the same reason:
59 code that previously saw all (urllib2 user)-provided headers in .headers
60 now sees only a subset (and the function interface is ugly and incomplete).
61 A better change would have been to replace .headers dict with a dict
62 subclass (or UserDict.DictMixin instance?) that preserved the .headers
63 interface and also provided access to the "unredirected" headers. It's
64 probably too late to fix that, though.
65
66
67 Check .capitalize() case normalization:
68
69 >>> url = "http://example.com"
70 >>> Request(url, headers={"Spam-eggs": "blah"}).headers["Spam-eggs"]
71 'blah'
72 >>> Request(url, headers={"spam-EggS": "blah"}).headers["Spam-eggs"]
73 'blah'
74
75 Currently, Request(url, "Spam-eggs").headers["Spam-Eggs"] raises KeyError,
76 but that could be changed in future.
77
78 """
79
80def test_request_headers_methods():
81 """
82 Note the case normalization of header names here, to .capitalize()-case.
83 This should be preserved for backwards-compatibility. (In the HTTP case,
84 normalization to .title()-case is done by urllib2 before sending headers to
85 httplib).
86
87 >>> url = "http://example.com"
88 >>> r = Request(url, headers={"Spam-eggs": "blah"})
89 >>> r.has_header("Spam-eggs")
90 True
91 >>> r.header_items()
92 [('Spam-eggs', 'blah')]
93 >>> r.add_header("Foo-Bar", "baz")
94 >>> items = r.header_items()
95 >>> items.sort()
96 >>> items
97 [('Foo-bar', 'baz'), ('Spam-eggs', 'blah')]
98
99 Note that e.g. r.has_header("spam-EggS") is currently False, and
100 r.get_header("spam-EggS") returns None, but that could be changed in
101 future.
102
103 >>> r.has_header("Not-there")
104 False
105 >>> print r.get_header("Not-there")
106 None
107 >>> r.get_header("Not-there", "default")
108 'default'
109
110 """
111
112
Georg Brandlfa42bd72006-04-30 07:06:11 +0000113def test_password_manager(self):
114 """
115 >>> mgr = urllib2.HTTPPasswordMgr()
116 >>> add = mgr.add_password
117 >>> add("Some Realm", "http://example.com/", "joe", "password")
118 >>> add("Some Realm", "http://example.com/ni", "ni", "ni")
119 >>> add("c", "http://example.com/foo", "foo", "ni")
120 >>> add("c", "http://example.com/bar", "bar", "nini")
121 >>> add("b", "http://example.com/", "first", "blah")
122 >>> add("b", "http://example.com/", "second", "spam")
123 >>> add("a", "http://example.com", "1", "a")
124 >>> add("Some Realm", "http://c.example.com:3128", "3", "c")
125 >>> add("Some Realm", "d.example.com", "4", "d")
126 >>> add("Some Realm", "e.example.com:3128", "5", "e")
127
128 >>> mgr.find_user_password("Some Realm", "example.com")
129 ('joe', 'password')
130 >>> mgr.find_user_password("Some Realm", "http://example.com")
131 ('joe', 'password')
132 >>> mgr.find_user_password("Some Realm", "http://example.com/")
133 ('joe', 'password')
134 >>> mgr.find_user_password("Some Realm", "http://example.com/spam")
135 ('joe', 'password')
136 >>> mgr.find_user_password("Some Realm", "http://example.com/spam/spam")
137 ('joe', 'password')
138 >>> mgr.find_user_password("c", "http://example.com/foo")
139 ('foo', 'ni')
140 >>> mgr.find_user_password("c", "http://example.com/bar")
141 ('bar', 'nini')
142
Georg Brandl2b330372006-05-28 20:23:12 +0000143 Actually, this is really undefined ATM
144## Currently, we use the highest-level path where more than one match:
Georg Brandlfa42bd72006-04-30 07:06:11 +0000145
Georg Brandl2b330372006-05-28 20:23:12 +0000146## >>> mgr.find_user_password("Some Realm", "http://example.com/ni")
147## ('joe', 'password')
Georg Brandlfa42bd72006-04-30 07:06:11 +0000148
149 Use latest add_password() in case of conflict:
150
151 >>> mgr.find_user_password("b", "http://example.com/")
152 ('second', 'spam')
153
154 No special relationship between a.example.com and example.com:
155
156 >>> mgr.find_user_password("a", "http://example.com/")
157 ('1', 'a')
158 >>> mgr.find_user_password("a", "http://a.example.com/")
159 (None, None)
160
161 Ports:
162
163 >>> mgr.find_user_password("Some Realm", "c.example.com")
164 (None, None)
165 >>> mgr.find_user_password("Some Realm", "c.example.com:3128")
166 ('3', 'c')
167 >>> mgr.find_user_password("Some Realm", "http://c.example.com:3128")
168 ('3', 'c')
169 >>> mgr.find_user_password("Some Realm", "d.example.com")
170 ('4', 'd')
171 >>> mgr.find_user_password("Some Realm", "e.example.com:3128")
172 ('5', 'e')
173
174 """
175 pass
176
177
Georg Brandl2b330372006-05-28 20:23:12 +0000178def test_password_manager_default_port(self):
179 """
180 >>> mgr = urllib2.HTTPPasswordMgr()
181 >>> add = mgr.add_password
182
183 The point to note here is that we can't guess the default port if there's
184 no scheme. This applies to both add_password and find_user_password.
185
186 >>> add("f", "http://g.example.com:80", "10", "j")
187 >>> add("g", "http://h.example.com", "11", "k")
188 >>> add("h", "i.example.com:80", "12", "l")
189 >>> add("i", "j.example.com", "13", "m")
190 >>> mgr.find_user_password("f", "g.example.com:100")
191 (None, None)
192 >>> mgr.find_user_password("f", "g.example.com:80")
193 ('10', 'j')
194 >>> mgr.find_user_password("f", "g.example.com")
195 (None, None)
196 >>> mgr.find_user_password("f", "http://g.example.com:100")
197 (None, None)
198 >>> mgr.find_user_password("f", "http://g.example.com:80")
199 ('10', 'j')
200 >>> mgr.find_user_password("f", "http://g.example.com")
201 ('10', 'j')
202 >>> mgr.find_user_password("g", "h.example.com")
203 ('11', 'k')
204 >>> mgr.find_user_password("g", "h.example.com:80")
205 ('11', 'k')
206 >>> mgr.find_user_password("g", "http://h.example.com:80")
207 ('11', 'k')
208 >>> mgr.find_user_password("h", "i.example.com")
209 (None, None)
210 >>> mgr.find_user_password("h", "i.example.com:80")
211 ('12', 'l')
212 >>> mgr.find_user_password("h", "http://i.example.com:80")
213 ('12', 'l')
214 >>> mgr.find_user_password("i", "j.example.com")
215 ('13', 'm')
216 >>> mgr.find_user_password("i", "j.example.com:80")
217 (None, None)
218 >>> mgr.find_user_password("i", "http://j.example.com")
219 ('13', 'm')
220 >>> mgr.find_user_password("i", "http://j.example.com:80")
221 (None, None)
222
223 """
224
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000225class MockOpener:
226 addheaders = []
Senthil Kumaran5fee4602009-07-19 02:43:43 +0000227 def open(self, req, data=None,timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
228 self.req, self.data, self.timeout = req, data, timeout
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000229 def error(self, proto, *args):
230 self.proto, self.args = proto, args
231
232class MockFile:
233 def read(self, count=None): pass
234 def readline(self, count=None): pass
235 def close(self): pass
236
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000237class MockHeaders(dict):
238 def getheaders(self, name):
239 return self.values()
240
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000241class MockResponse(StringIO.StringIO):
242 def __init__(self, code, msg, headers, data, url=None):
243 StringIO.StringIO.__init__(self, data)
244 self.code, self.msg, self.headers, self.url = code, msg, headers, url
245 def info(self):
246 return self.headers
247 def geturl(self):
248 return self.url
249
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000250class MockCookieJar:
251 def add_cookie_header(self, request):
252 self.ach_req = request
253 def extract_cookies(self, response, request):
254 self.ec_req, self.ec_r = request, response
255
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000256class FakeMethod:
257 def __init__(self, meth_name, action, handle):
258 self.meth_name = meth_name
259 self.handle = handle
260 self.action = action
261 def __call__(self, *args):
262 return self.handle(self.meth_name, self.action, *args)
263
Senthil Kumaran7713acf2009-12-20 06:05:13 +0000264class MockHTTPResponse:
265 def __init__(self, fp, msg, status, reason):
266 self.fp = fp
267 self.msg = msg
268 self.status = status
269 self.reason = reason
270 def read(self):
271 return ''
272
273class MockHTTPClass:
274 def __init__(self):
275 self.req_headers = []
276 self.data = None
277 self.raise_on_endheaders = False
278 self._tunnel_headers = {}
279
280 def __call__(self, host, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
281 self.host = host
282 self.timeout = timeout
283 return self
284
285 def set_debuglevel(self, level):
286 self.level = level
287
288 def set_tunnel(self, host, port=None, headers=None):
289 self._tunnel_host = host
290 self._tunnel_port = port
291 if headers:
292 self._tunnel_headers = headers
293 else:
294 self._tunnel_headers.clear()
295 def request(self, method, url, body=None, headers={}):
296 self.method = method
297 self.selector = url
298 self.req_headers += headers.items()
299 self.req_headers.sort()
300 if body:
301 self.data = body
302 if self.raise_on_endheaders:
303 import socket
304 raise socket.error()
305 def getresponse(self):
306 return MockHTTPResponse(MockFile(), {}, 200, "OK")
307
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000308class MockHandler:
Georg Brandlfa42bd72006-04-30 07:06:11 +0000309 # useful for testing handler machinery
310 # see add_ordered_mock_handlers() docstring
Georg Brandl720096a2006-04-02 20:45:34 +0000311 handler_order = 500
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000312 def __init__(self, methods):
313 self._define_methods(methods)
314 def _define_methods(self, methods):
315 for spec in methods:
316 if len(spec) == 2: name, action = spec
317 else: name, action = spec, None
318 meth = FakeMethod(name, action, self.handle)
319 setattr(self.__class__, name, meth)
320 def handle(self, fn_name, action, *args, **kwds):
321 self.parent.calls.append((self, fn_name, args, kwds))
322 if action is None:
323 return None
324 elif action == "return self":
325 return self
326 elif action == "return response":
327 res = MockResponse(200, "OK", {}, "")
328 return res
329 elif action == "return request":
330 return Request("http://blah/")
331 elif action.startswith("error"):
332 code = action[action.rfind(" ")+1:]
333 try:
334 code = int(code)
335 except ValueError:
336 pass
337 res = MockResponse(200, "OK", {}, "")
338 return self.parent.error("http", args[0], res, code, "", {})
339 elif action == "raise":
340 raise urllib2.URLError("blah")
341 assert False
342 def close(self): pass
343 def add_parent(self, parent):
344 self.parent = parent
345 self.parent.calls = []
346 def __lt__(self, other):
347 if not hasattr(other, "handler_order"):
348 # No handler_order, leave in original order. Yuck.
349 return True
350 return self.handler_order < other.handler_order
351
352def add_ordered_mock_handlers(opener, meth_spec):
353 """Create MockHandlers and add them to an OpenerDirector.
354
355 meth_spec: list of lists of tuples and strings defining methods to define
356 on handlers. eg:
357
358 [["http_error", "ftp_open"], ["http_open"]]
359
360 defines methods .http_error() and .ftp_open() on one handler, and
361 .http_open() on another. These methods just record their arguments and
362 return None. Using a tuple instead of a string causes the method to
363 perform some action (see MockHandler.handle()), eg:
364
365 [["http_error"], [("http_open", "return request")]]
366
367 defines .http_error() on one handler (which simply returns None), and
368 .http_open() on another handler, which returns a Request object.
369
370 """
371 handlers = []
372 count = 0
373 for meths in meth_spec:
374 class MockHandlerSubclass(MockHandler): pass
375 h = MockHandlerSubclass(meths)
Georg Brandl720096a2006-04-02 20:45:34 +0000376 h.handler_order += count
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000377 h.add_parent(opener)
378 count = count + 1
379 handlers.append(h)
380 opener.add_handler(h)
381 return handlers
382
Georg Brandlfa42bd72006-04-30 07:06:11 +0000383def build_test_opener(*handler_instances):
384 opener = OpenerDirector()
385 for h in handler_instances:
386 opener.add_handler(h)
387 return opener
388
389class MockHTTPHandler(urllib2.BaseHandler):
390 # useful for testing redirections and auth
391 # sends supplied headers and code as first response
392 # sends 200 OK as second response
393 def __init__(self, code, headers):
394 self.code = code
395 self.headers = headers
396 self.reset()
397 def reset(self):
398 self._count = 0
399 self.requests = []
400 def http_open(self, req):
401 import mimetools, httplib, copy
402 from StringIO import StringIO
403 self.requests.append(copy.deepcopy(req))
404 if self._count == 0:
405 self._count = self._count + 1
406 name = httplib.responses[self.code]
407 msg = mimetools.Message(StringIO(self.headers))
408 return self.parent.error(
409 "http", req, MockFile(), self.code, name, msg)
410 else:
411 self.req = req
412 msg = mimetools.Message(StringIO("\r\n\r\n"))
413 return MockResponse(200, "OK", msg, "", req.get_full_url())
414
Senthil Kumaran7713acf2009-12-20 06:05:13 +0000415class MockHTTPSHandler(urllib2.AbstractHTTPHandler):
416 # Useful for testing the Proxy-Authorization request by verifying the
417 # properties of httpcon
418 httpconn = MockHTTPClass()
419 def https_open(self, req):
420 return self.do_open(self.httpconn, req)
421
Georg Brandlfa42bd72006-04-30 07:06:11 +0000422class MockPasswordManager:
423 def add_password(self, realm, uri, user, password):
424 self.realm = realm
425 self.url = uri
426 self.user = user
427 self.password = password
428 def find_user_password(self, realm, authuri):
429 self.target_realm = realm
430 self.target_url = authuri
431 return self.user, self.password
432
433
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000434class OpenerDirectorTests(unittest.TestCase):
435
Georg Brandlf91149e2007-07-12 08:05:45 +0000436 def test_add_non_handler(self):
437 class NonHandler(object):
438 pass
439 self.assertRaises(TypeError,
440 OpenerDirector().add_handler, NonHandler())
441
Georg Brandl261e2512006-05-29 20:52:54 +0000442 def test_badly_named_methods(self):
443 # test work-around for three methods that accidentally follow the
444 # naming conventions for handler methods
445 # (*_open() / *_request() / *_response())
446
447 # These used to call the accidentally-named methods, causing a
448 # TypeError in real code; here, returning self from these mock
449 # methods would either cause no exception, or AttributeError.
450
451 from urllib2 import URLError
452
453 o = OpenerDirector()
454 meth_spec = [
455 [("do_open", "return self"), ("proxy_open", "return self")],
456 [("redirect_request", "return self")],
457 ]
458 handlers = add_ordered_mock_handlers(o, meth_spec)
459 o.add_handler(urllib2.UnknownHandler())
460 for scheme in "do", "proxy", "redirect":
461 self.assertRaises(URLError, o.open, scheme+"://example.com/")
462
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000463 def test_handled(self):
464 # handler returning non-None means no more handlers will be called
465 o = OpenerDirector()
466 meth_spec = [
467 ["http_open", "ftp_open", "http_error_302"],
468 ["ftp_open"],
469 [("http_open", "return self")],
470 [("http_open", "return self")],
471 ]
472 handlers = add_ordered_mock_handlers(o, meth_spec)
473
474 req = Request("http://example.com/")
475 r = o.open(req)
476 # Second .http_open() gets called, third doesn't, since second returned
477 # non-None. Handlers without .http_open() never get any methods called
478 # on them.
479 # In fact, second mock handler defining .http_open() returns self
480 # (instead of response), which becomes the OpenerDirector's return
481 # value.
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000482 self.assertEqual(r, handlers[2])
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000483 calls = [(handlers[0], "http_open"), (handlers[2], "http_open")]
484 for expected, got in zip(calls, o.calls):
485 handler, name, args, kwds = got
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000486 self.assertEqual((handler, name), expected)
487 self.assertEqual(args, (req,))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000488
489 def test_handler_order(self):
490 o = OpenerDirector()
491 handlers = []
492 for meths, handler_order in [
493 ([("http_open", "return self")], 500),
494 (["http_open"], 0),
495 ]:
496 class MockHandlerSubclass(MockHandler): pass
497 h = MockHandlerSubclass(meths)
498 h.handler_order = handler_order
499 handlers.append(h)
500 o.add_handler(h)
501
502 r = o.open("http://example.com/")
503 # handlers called in reverse order, thanks to their sort order
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000504 self.assertEqual(o.calls[0][0], handlers[1])
505 self.assertEqual(o.calls[1][0], handlers[0])
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000506
507 def test_raise(self):
508 # raising URLError stops processing of request
509 o = OpenerDirector()
510 meth_spec = [
511 [("http_open", "raise")],
512 [("http_open", "return self")],
513 ]
514 handlers = add_ordered_mock_handlers(o, meth_spec)
515
516 req = Request("http://example.com/")
517 self.assertRaises(urllib2.URLError, o.open, req)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000518 self.assertEqual(o.calls, [(handlers[0], "http_open", (req,), {})])
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000519
520## def test_error(self):
521## # XXX this doesn't actually seem to be used in standard library,
522## # but should really be tested anyway...
523
524 def test_http_error(self):
525 # XXX http_error_default
526 # http errors are a special case
527 o = OpenerDirector()
528 meth_spec = [
529 [("http_open", "error 302")],
530 [("http_error_400", "raise"), "http_open"],
531 [("http_error_302", "return response"), "http_error_303",
532 "http_error"],
533 [("http_error_302")],
534 ]
535 handlers = add_ordered_mock_handlers(o, meth_spec)
536
537 class Unknown:
538 def __eq__(self, other): return True
539
540 req = Request("http://example.com/")
541 r = o.open(req)
542 assert len(o.calls) == 2
543 calls = [(handlers[0], "http_open", (req,)),
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000544 (handlers[2], "http_error_302",
545 (req, Unknown(), 302, "", {}))]
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000546 for expected, got in zip(calls, o.calls):
547 handler, method_name, args = expected
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000548 self.assertEqual((handler, method_name), got[:2])
549 self.assertEqual(args, got[2])
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000550
551 def test_processors(self):
552 # *_request / *_response methods get called appropriately
553 o = OpenerDirector()
554 meth_spec = [
555 [("http_request", "return request"),
556 ("http_response", "return response")],
557 [("http_request", "return request"),
558 ("http_response", "return response")],
559 ]
560 handlers = add_ordered_mock_handlers(o, meth_spec)
561
562 req = Request("http://example.com/")
563 r = o.open(req)
564 # processor methods are called on *all* handlers that define them,
565 # not just the first handler that handles the request
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000566 calls = [
567 (handlers[0], "http_request"), (handlers[1], "http_request"),
568 (handlers[0], "http_response"), (handlers[1], "http_response")]
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000569
570 for i, (handler, name, args, kwds) in enumerate(o.calls):
571 if i < 2:
572 # *_request
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000573 self.assertEqual((handler, name), calls[i])
574 self.assertEqual(len(args), 1)
Benjamin Peterson5c8da862009-06-30 22:57:08 +0000575 self.assertTrue(isinstance(args[0], Request))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000576 else:
577 # *_response
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000578 self.assertEqual((handler, name), calls[i])
579 self.assertEqual(len(args), 2)
Benjamin Peterson5c8da862009-06-30 22:57:08 +0000580 self.assertTrue(isinstance(args[0], Request))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000581 # response from opener.open is None, because there's no
582 # handler that defines http_open to handle it
Benjamin Peterson5c8da862009-06-30 22:57:08 +0000583 self.assertTrue(args[1] is None or
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000584 isinstance(args[1], MockResponse))
585
586
Tim Peters58eb11c2004-01-18 20:29:55 +0000587def sanepathname2url(path):
588 import urllib
589 urlpath = urllib.pathname2url(path)
590 if os.name == "nt" and urlpath.startswith("///"):
591 urlpath = urlpath[2:]
592 # XXX don't ask me about the mac...
593 return urlpath
594
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000595class HandlerTests(unittest.TestCase):
596
597 def test_ftp(self):
598 class MockFTPWrapper:
599 def __init__(self, data): self.data = data
600 def retrfile(self, filename, filetype):
601 self.filename, self.filetype = filename, filetype
602 return StringIO.StringIO(self.data), len(self.data)
603
604 class NullFTPHandler(urllib2.FTPHandler):
605 def __init__(self, data): self.data = data
Facundo Batista4f1b1ed2008-05-29 16:39:26 +0000606 def connect_ftp(self, user, passwd, host, port, dirs,
607 timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000608 self.user, self.passwd = user, passwd
609 self.host, self.port = host, port
610 self.dirs = dirs
611 self.ftpwrapper = MockFTPWrapper(self.data)
612 return self.ftpwrapper
613
Facundo Batista4f1b1ed2008-05-29 16:39:26 +0000614 import ftplib
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000615 data = "rheum rhaponicum"
616 h = NullFTPHandler(data)
617 o = h.parent = MockOpener()
618
619 for url, host, port, type_, dirs, filename, mimetype in [
620 ("ftp://localhost/foo/bar/baz.html",
621 "localhost", ftplib.FTP_PORT, "I",
622 ["foo", "bar"], "baz.html", "text/html"),
Kurt B. Kaiser3f7cb5d2004-07-11 17:14:13 +0000623 ("ftp://localhost:80/foo/bar/",
624 "localhost", 80, "D",
625 ["foo", "bar"], "", None),
626 ("ftp://localhost/baz.gif;type=a",
627 "localhost", ftplib.FTP_PORT, "A",
628 [], "baz.gif", None), # XXX really this should guess image/gif
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000629 ]:
Facundo Batista10951d52007-06-06 17:15:23 +0000630 req = Request(url)
631 req.timeout = None
632 r = h.ftp_open(req)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000633 # ftp authentication not yet implemented by FTPHandler
Benjamin Peterson5c8da862009-06-30 22:57:08 +0000634 self.assertTrue(h.user == h.passwd == "")
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000635 self.assertEqual(h.host, socket.gethostbyname(host))
636 self.assertEqual(h.port, port)
637 self.assertEqual(h.dirs, dirs)
638 self.assertEqual(h.ftpwrapper.filename, filename)
639 self.assertEqual(h.ftpwrapper.filetype, type_)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000640 headers = r.info()
Kurt B. Kaiser3f7cb5d2004-07-11 17:14:13 +0000641 self.assertEqual(headers.get("Content-type"), mimetype)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000642 self.assertEqual(int(headers["Content-length"]), len(data))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000643
644 def test_file(self):
Christian Heimesc5f05e42008-02-23 17:40:11 +0000645 import rfc822, socket
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000646 h = urllib2.FileHandler()
647 o = h.parent = MockOpener()
648
Tim Peters58eb11c2004-01-18 20:29:55 +0000649 TESTFN = test_support.TESTFN
650 urlpath = sanepathname2url(os.path.abspath(TESTFN))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000651 towrite = "hello, world\n"
Georg Brandldd2245f2006-03-31 17:18:06 +0000652 urls = [
Tim Peters58eb11c2004-01-18 20:29:55 +0000653 "file://localhost%s" % urlpath,
654 "file://%s" % urlpath,
655 "file://%s%s" % (socket.gethostbyname('localhost'), urlpath),
Georg Brandldd2245f2006-03-31 17:18:06 +0000656 ]
657 try:
Tim Peters480725d2006-04-03 02:46:44 +0000658 localaddr = socket.gethostbyname(socket.gethostname())
Georg Brandldd2245f2006-03-31 17:18:06 +0000659 except socket.gaierror:
660 localaddr = ''
661 if localaddr:
662 urls.append("file://%s%s" % (localaddr, urlpath))
Tim Peters480725d2006-04-03 02:46:44 +0000663
Georg Brandldd2245f2006-03-31 17:18:06 +0000664 for url in urls:
Tim Peters58eb11c2004-01-18 20:29:55 +0000665 f = open(TESTFN, "wb")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000666 try:
667 try:
668 f.write(towrite)
669 finally:
670 f.close()
671
672 r = h.file_open(Request(url))
673 try:
674 data = r.read()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000675 headers = r.info()
676 newurl = r.geturl()
677 finally:
678 r.close()
Tim Peters58eb11c2004-01-18 20:29:55 +0000679 stats = os.stat(TESTFN)
680 modified = rfc822.formatdate(stats.st_mtime)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000681 finally:
682 os.remove(TESTFN)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000683 self.assertEqual(data, towrite)
684 self.assertEqual(headers["Content-type"], "text/plain")
685 self.assertEqual(headers["Content-length"], "13")
Tim Peters58eb11c2004-01-18 20:29:55 +0000686 self.assertEqual(headers["Last-modified"], modified)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000687
688 for url in [
Tim Peters58eb11c2004-01-18 20:29:55 +0000689 "file://localhost:80%s" % urlpath,
Georg Brandlceede5c2007-03-13 08:14:27 +0000690 "file:///file_does_not_exist.txt",
691 "file://%s:80%s/%s" % (socket.gethostbyname('localhost'),
692 os.getcwd(), TESTFN),
693 "file://somerandomhost.ontheinternet.com%s/%s" %
694 (os.getcwd(), TESTFN),
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000695 ]:
696 try:
Tim Peters58eb11c2004-01-18 20:29:55 +0000697 f = open(TESTFN, "wb")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000698 try:
699 f.write(towrite)
700 finally:
701 f.close()
702
703 self.assertRaises(urllib2.URLError,
704 h.file_open, Request(url))
705 finally:
706 os.remove(TESTFN)
707
708 h = urllib2.FileHandler()
709 o = h.parent = MockOpener()
710 # XXXX why does // mean ftp (and /// mean not ftp!), and where
711 # is file: scheme specified? I think this is really a bug, and
712 # what was intended was to distinguish between URLs like:
713 # file:/blah.txt (a file)
714 # file://localhost/blah.txt (a file)
715 # file:///blah.txt (a file)
716 # file://ftp.example.com/blah.txt (an ftp URL)
717 for url, ftp in [
718 ("file://ftp.example.com//foo.txt", True),
719 ("file://ftp.example.com///foo.txt", False),
720# XXXX bug: fails with OSError, should be URLError
721 ("file://ftp.example.com/foo.txt", False),
722 ]:
723 req = Request(url)
724 try:
725 h.file_open(req)
726 # XXXX remove OSError when bug fixed
727 except (urllib2.URLError, OSError):
Benjamin Peterson5c8da862009-06-30 22:57:08 +0000728 self.assertTrue(not ftp)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000729 else:
Benjamin Peterson5c8da862009-06-30 22:57:08 +0000730 self.assertTrue(o.req is req)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000731 self.assertEqual(req.type, "ftp")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000732
733 def test_http(self):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000734
735 h = urllib2.AbstractHTTPHandler()
736 o = h.parent = MockOpener()
737
738 url = "http://example.com/"
739 for method, data in [("GET", None), ("POST", "blah")]:
740 req = Request(url, data, {"Foo": "bar"})
Facundo Batista10951d52007-06-06 17:15:23 +0000741 req.timeout = None
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000742 req.add_unredirected_header("Spam", "eggs")
743 http = MockHTTPClass()
744 r = h.do_open(http, req)
745
746 # result attributes
747 r.read; r.readline # wrapped MockFile methods
748 r.info; r.geturl # addinfourl methods
749 r.code, r.msg == 200, "OK" # added from MockHTTPClass.getreply()
750 hdrs = r.info()
751 hdrs.get; hdrs.has_key # r.info() gives dict from .getreply()
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000752 self.assertEqual(r.geturl(), url)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000753
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000754 self.assertEqual(http.host, "example.com")
755 self.assertEqual(http.level, 0)
756 self.assertEqual(http.method, method)
757 self.assertEqual(http.selector, "/")
758 self.assertEqual(http.req_headers,
Jeremy Hyltonb3ee6f92004-02-24 19:40:35 +0000759 [("Connection", "close"),
760 ("Foo", "bar"), ("Spam", "eggs")])
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000761 self.assertEqual(http.data, data)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000762
763 # check socket.error converted to URLError
764 http.raise_on_endheaders = True
765 self.assertRaises(urllib2.URLError, h.do_open, http, req)
766
767 # check adding of standard headers
768 o.addheaders = [("Spam", "eggs")]
769 for data in "", None: # POST, GET
770 req = Request("http://example.com/", data)
771 r = MockResponse(200, "OK", {}, "")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000772 newreq = h.do_request_(req)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000773 if data is None: # GET
Benjamin Peterson5c8da862009-06-30 22:57:08 +0000774 self.assertTrue("Content-length" not in req.unredirected_hdrs)
775 self.assertTrue("Content-type" not in req.unredirected_hdrs)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000776 else: # POST
Georg Brandl8c036cc2006-08-20 13:15:39 +0000777 self.assertEqual(req.unredirected_hdrs["Content-length"], "0")
778 self.assertEqual(req.unredirected_hdrs["Content-type"],
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000779 "application/x-www-form-urlencoded")
780 # XXX the details of Host could be better tested
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000781 self.assertEqual(req.unredirected_hdrs["Host"], "example.com")
782 self.assertEqual(req.unredirected_hdrs["Spam"], "eggs")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000783
784 # don't clobber existing headers
785 req.add_unredirected_header("Content-length", "foo")
786 req.add_unredirected_header("Content-type", "bar")
787 req.add_unredirected_header("Host", "baz")
788 req.add_unredirected_header("Spam", "foo")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000789 newreq = h.do_request_(req)
Georg Brandl8c036cc2006-08-20 13:15:39 +0000790 self.assertEqual(req.unredirected_hdrs["Content-length"], "foo")
791 self.assertEqual(req.unredirected_hdrs["Content-type"], "bar")
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000792 self.assertEqual(req.unredirected_hdrs["Host"], "baz")
793 self.assertEqual(req.unredirected_hdrs["Spam"], "foo")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000794
Facundo Batistaeb90b782008-08-16 14:44:07 +0000795 def test_http_doubleslash(self):
796 # Checks that the presence of an unnecessary double slash in a url doesn't break anything
797 # Previously, a double slash directly after the host could cause incorrect parsing of the url
798 h = urllib2.AbstractHTTPHandler()
799 o = h.parent = MockOpener()
800
801 data = ""
802 ds_urls = [
803 "http://example.com/foo/bar/baz.html",
804 "http://example.com//foo/bar/baz.html",
805 "http://example.com/foo//bar/baz.html",
806 "http://example.com/foo/bar//baz.html",
807 ]
808
809 for ds_url in ds_urls:
810 ds_req = Request(ds_url, data)
811
812 # Check whether host is determined correctly if there is no proxy
813 np_ds_req = h.do_request_(ds_req)
814 self.assertEqual(np_ds_req.unredirected_hdrs["Host"],"example.com")
815
816 # Check whether host is determined correctly if there is a proxy
817 ds_req.set_proxy("someproxy:3128",None)
818 p_ds_req = h.do_request_(ds_req)
819 self.assertEqual(p_ds_req.unredirected_hdrs["Host"],"example.com")
820
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000821 def test_errors(self):
822 h = urllib2.HTTPErrorProcessor()
823 o = h.parent = MockOpener()
824
825 url = "http://example.com/"
826 req = Request(url)
Facundo Batista9fab9f12007-04-23 17:08:31 +0000827 # all 2xx are passed through
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000828 r = MockResponse(200, "OK", {}, "", url)
829 newr = h.http_response(req, r)
Benjamin Peterson5c8da862009-06-30 22:57:08 +0000830 self.assertTrue(r is newr)
831 self.assertTrue(not hasattr(o, "proto")) # o.error not called
Facundo Batista9fab9f12007-04-23 17:08:31 +0000832 r = MockResponse(202, "Accepted", {}, "", url)
833 newr = h.http_response(req, r)
Benjamin Peterson5c8da862009-06-30 22:57:08 +0000834 self.assertTrue(r is newr)
835 self.assertTrue(not hasattr(o, "proto")) # o.error not called
Facundo Batista9fab9f12007-04-23 17:08:31 +0000836 r = MockResponse(206, "Partial content", {}, "", url)
837 newr = h.http_response(req, r)
Benjamin Peterson5c8da862009-06-30 22:57:08 +0000838 self.assertTrue(r is newr)
839 self.assertTrue(not hasattr(o, "proto")) # o.error not called
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000840 # anything else calls o.error (and MockOpener returns None, here)
Facundo Batista9fab9f12007-04-23 17:08:31 +0000841 r = MockResponse(502, "Bad gateway", {}, "", url)
Benjamin Peterson5c8da862009-06-30 22:57:08 +0000842 self.assertTrue(h.http_response(req, r) is None)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000843 self.assertEqual(o.proto, "http") # o.error called
Facundo Batista9fab9f12007-04-23 17:08:31 +0000844 self.assertEqual(o.args, (req, r, 502, "Bad gateway", {}))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000845
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000846 def test_cookies(self):
847 cj = MockCookieJar()
848 h = urllib2.HTTPCookieProcessor(cj)
849 o = h.parent = MockOpener()
850
851 req = Request("http://example.com/")
852 r = MockResponse(200, "OK", {}, "")
853 newreq = h.http_request(req)
Benjamin Peterson5c8da862009-06-30 22:57:08 +0000854 self.assertTrue(cj.ach_req is req is newreq)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000855 self.assertEquals(req.get_origin_req_host(), "example.com")
Benjamin Peterson5c8da862009-06-30 22:57:08 +0000856 self.assertTrue(not req.is_unverifiable())
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000857 newr = h.http_response(req, r)
Benjamin Peterson5c8da862009-06-30 22:57:08 +0000858 self.assertTrue(cj.ec_req is req)
859 self.assertTrue(cj.ec_r is r is newr)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000860
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000861 def test_redirect(self):
862 from_url = "http://example.com/a.html"
863 to_url = "http://example.com/b.html"
864 h = urllib2.HTTPRedirectHandler()
865 o = h.parent = MockOpener()
866
867 # ordinary redirect behaviour
868 for code in 301, 302, 303, 307:
869 for data in None, "blah\nblah\n":
870 method = getattr(h, "http_error_%s" % code)
871 req = Request(from_url, data)
872 req.add_header("Nonsense", "viking=withhold")
Senthil Kumaran5fee4602009-07-19 02:43:43 +0000873 req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
Facundo Batista86371d62008-02-07 19:06:52 +0000874 if data is not None:
875 req.add_header("Content-Length", str(len(data)))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000876 req.add_unredirected_header("Spam", "spam")
877 try:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000878 method(req, MockFile(), code, "Blah",
879 MockHeaders({"location": to_url}))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000880 except urllib2.HTTPError:
881 # 307 in response to POST requires user OK
Benjamin Peterson5c8da862009-06-30 22:57:08 +0000882 self.assertTrue(code == 307 and data is not None)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000883 self.assertEqual(o.req.get_full_url(), to_url)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000884 try:
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000885 self.assertEqual(o.req.get_method(), "GET")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000886 except AttributeError:
Benjamin Peterson5c8da862009-06-30 22:57:08 +0000887 self.assertTrue(not o.req.has_data())
Facundo Batista86371d62008-02-07 19:06:52 +0000888
889 # now it's a GET, there should not be headers regarding content
890 # (possibly dragged from before being a POST)
891 headers = [x.lower() for x in o.req.headers]
892 self.assertTrue("content-length" not in headers)
893 self.assertTrue("content-type" not in headers)
894
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000895 self.assertEqual(o.req.headers["Nonsense"],
896 "viking=withhold")
Benjamin Peterson5c8da862009-06-30 22:57:08 +0000897 self.assertTrue("Spam" not in o.req.headers)
898 self.assertTrue("Spam" not in o.req.unredirected_hdrs)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000899
900 # loop detection
901 req = Request(from_url)
Senthil Kumaran5fee4602009-07-19 02:43:43 +0000902 req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000903 def redirect(h, req, url=to_url):
904 h.http_error_302(req, MockFile(), 302, "Blah",
905 MockHeaders({"location": url}))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000906 # Note that the *original* request shares the same record of
907 # redirections with the sub-requests caused by the redirections.
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000908
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000909 # detect infinite loop redirect of a URL to itself
910 req = Request(from_url, origin_req_host="example.com")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000911 count = 0
Senthil Kumaran5fee4602009-07-19 02:43:43 +0000912 req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000913 try:
914 while 1:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000915 redirect(h, req, "http://example.com/")
916 count = count + 1
917 except urllib2.HTTPError:
918 # don't stop until max_repeats, because cookies may introduce state
919 self.assertEqual(count, urllib2.HTTPRedirectHandler.max_repeats)
920
921 # detect endless non-repeating chain of redirects
922 req = Request(from_url, origin_req_host="example.com")
923 count = 0
Senthil Kumaran5fee4602009-07-19 02:43:43 +0000924 req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000925 try:
926 while 1:
927 redirect(h, req, "http://example.com/%d" % count)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000928 count = count + 1
929 except urllib2.HTTPError:
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000930 self.assertEqual(count,
931 urllib2.HTTPRedirectHandler.max_redirections)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000932
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000933 def test_cookie_redirect(self):
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000934 # cookies shouldn't leak into redirected requests
935 from cookielib import CookieJar
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000936
Neal Norwitzb902f4e2006-04-03 04:45:34 +0000937 from test.test_cookielib import interact_netscape
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000938
939 cj = CookieJar()
940 interact_netscape(cj, "http://www.example.com/", "spam=eggs")
Georg Brandlfa42bd72006-04-30 07:06:11 +0000941 hh = MockHTTPHandler(302, "Location: http://www.cracker.com/\r\n\r\n")
942 hdeh = urllib2.HTTPDefaultErrorHandler()
943 hrh = urllib2.HTTPRedirectHandler()
944 cp = urllib2.HTTPCookieProcessor(cj)
945 o = build_test_opener(hh, hdeh, hrh, cp)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000946 o.open("http://www.example.com/")
Benjamin Peterson5c8da862009-06-30 22:57:08 +0000947 self.assertTrue(not hh.req.has_header("Cookie"))
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000948
Georg Brandl720096a2006-04-02 20:45:34 +0000949 def test_proxy(self):
950 o = OpenerDirector()
951 ph = urllib2.ProxyHandler(dict(http="proxy.example.com:3128"))
952 o.add_handler(ph)
953 meth_spec = [
954 [("http_open", "return response")]
955 ]
956 handlers = add_ordered_mock_handlers(o, meth_spec)
957
958 req = Request("http://acme.example.com/")
959 self.assertEqual(req.get_host(), "acme.example.com")
960 r = o.open(req)
961 self.assertEqual(req.get_host(), "proxy.example.com:3128")
962
963 self.assertEqual([(handlers[0], "http_open")],
964 [tup[0:2] for tup in o.calls])
965
Senthil Kumaran27468662009-10-11 02:00:07 +0000966 def test_proxy_no_proxy(self):
967 os.environ['no_proxy'] = 'python.org'
968 o = OpenerDirector()
969 ph = urllib2.ProxyHandler(dict(http="proxy.example.com"))
970 o.add_handler(ph)
971 req = Request("http://www.perl.org/")
972 self.assertEqual(req.get_host(), "www.perl.org")
973 r = o.open(req)
974 self.assertEqual(req.get_host(), "proxy.example.com")
975 req = Request("http://www.python.org")
976 self.assertEqual(req.get_host(), "www.python.org")
977 r = o.open(req)
978 self.assertEqual(req.get_host(), "www.python.org")
979 del os.environ['no_proxy']
980
981
Senthil Kumarane266f252009-05-24 09:14:50 +0000982 def test_proxy_https(self):
983 o = OpenerDirector()
984 ph = urllib2.ProxyHandler(dict(https='proxy.example.com:3128'))
985 o.add_handler(ph)
986 meth_spec = [
987 [("https_open","return response")]
988 ]
989 handlers = add_ordered_mock_handlers(o, meth_spec)
990 req = Request("https://www.example.com/")
991 self.assertEqual(req.get_host(), "www.example.com")
992 r = o.open(req)
993 self.assertEqual(req.get_host(), "proxy.example.com:3128")
994 self.assertEqual([(handlers[0], "https_open")],
995 [tup[0:2] for tup in o.calls])
996
Senthil Kumaran7713acf2009-12-20 06:05:13 +0000997 def test_proxy_https_proxy_authorization(self):
998 o = OpenerDirector()
999 ph = urllib2.ProxyHandler(dict(https='proxy.example.com:3128'))
1000 o.add_handler(ph)
1001 https_handler = MockHTTPSHandler()
1002 o.add_handler(https_handler)
1003 req = Request("https://www.example.com/")
1004 req.add_header("Proxy-Authorization","FooBar")
1005 req.add_header("User-Agent","Grail")
1006 self.assertEqual(req.get_host(), "www.example.com")
1007 self.assertIsNone(req._tunnel_host)
1008 r = o.open(req)
1009 # Verify Proxy-Authorization gets tunneled to request.
1010 # httpsconn req_headers do not have the Proxy-Authorization header but
1011 # the req will have.
1012 self.assertFalse(("Proxy-Authorization","FooBar") in
1013 https_handler.httpconn.req_headers)
1014 self.assertTrue(("User-Agent","Grail") in
1015 https_handler.httpconn.req_headers)
1016 self.assertIsNotNone(req._tunnel_host)
1017 self.assertEqual(req.get_host(), "proxy.example.com:3128")
1018 self.assertEqual(req.get_header("Proxy-authorization"),"FooBar")
1019
Georg Brandl33124322008-03-21 19:54:00 +00001020 def test_basic_auth(self, quote_char='"'):
Georg Brandlfa42bd72006-04-30 07:06:11 +00001021 opener = OpenerDirector()
1022 password_manager = MockPasswordManager()
1023 auth_handler = urllib2.HTTPBasicAuthHandler(password_manager)
1024 realm = "ACME Widget Store"
1025 http_handler = MockHTTPHandler(
Georg Brandl33124322008-03-21 19:54:00 +00001026 401, 'WWW-Authenticate: Basic realm=%s%s%s\r\n\r\n' %
1027 (quote_char, realm, quote_char) )
Georg Brandl261e2512006-05-29 20:52:54 +00001028 opener.add_handler(auth_handler)
1029 opener.add_handler(http_handler)
Georg Brandlfa42bd72006-04-30 07:06:11 +00001030 self._test_basic_auth(opener, auth_handler, "Authorization",
1031 realm, http_handler, password_manager,
1032 "http://acme.example.com/protected",
1033 "http://acme.example.com/protected",
1034 )
1035
Georg Brandl33124322008-03-21 19:54:00 +00001036 def test_basic_auth_with_single_quoted_realm(self):
1037 self.test_basic_auth(quote_char="'")
1038
Georg Brandlfa42bd72006-04-30 07:06:11 +00001039 def test_proxy_basic_auth(self):
1040 opener = OpenerDirector()
1041 ph = urllib2.ProxyHandler(dict(http="proxy.example.com:3128"))
1042 opener.add_handler(ph)
1043 password_manager = MockPasswordManager()
1044 auth_handler = urllib2.ProxyBasicAuthHandler(password_manager)
1045 realm = "ACME Networks"
1046 http_handler = MockHTTPHandler(
1047 407, 'Proxy-Authenticate: Basic realm="%s"\r\n\r\n' % realm)
Georg Brandl261e2512006-05-29 20:52:54 +00001048 opener.add_handler(auth_handler)
1049 opener.add_handler(http_handler)
Georg Brandl8c036cc2006-08-20 13:15:39 +00001050 self._test_basic_auth(opener, auth_handler, "Proxy-authorization",
Georg Brandlfa42bd72006-04-30 07:06:11 +00001051 realm, http_handler, password_manager,
1052 "http://acme.example.com:3128/protected",
1053 "proxy.example.com:3128",
1054 )
1055
Georg Brandlb5f2e5c2006-05-08 17:36:08 +00001056 def test_basic_and_digest_auth_handlers(self):
1057 # HTTPDigestAuthHandler threw an exception if it couldn't handle a 40*
1058 # response (http://python.org/sf/1479302), where it should instead
1059 # return None to allow another handler (especially
1060 # HTTPBasicAuthHandler) to handle the response.
Georg Brandl261e2512006-05-29 20:52:54 +00001061
1062 # Also (http://python.org/sf/14797027, RFC 2617 section 1.2), we must
1063 # try digest first (since it's the strongest auth scheme), so we record
1064 # order of calls here to check digest comes first:
1065 class RecordingOpenerDirector(OpenerDirector):
1066 def __init__(self):
1067 OpenerDirector.__init__(self)
1068 self.recorded = []
1069 def record(self, info):
1070 self.recorded.append(info)
Georg Brandlb5f2e5c2006-05-08 17:36:08 +00001071 class TestDigestAuthHandler(urllib2.HTTPDigestAuthHandler):
Georg Brandl261e2512006-05-29 20:52:54 +00001072 def http_error_401(self, *args, **kwds):
1073 self.parent.record("digest")
1074 urllib2.HTTPDigestAuthHandler.http_error_401(self,
1075 *args, **kwds)
1076 class TestBasicAuthHandler(urllib2.HTTPBasicAuthHandler):
1077 def http_error_401(self, *args, **kwds):
1078 self.parent.record("basic")
1079 urllib2.HTTPBasicAuthHandler.http_error_401(self,
1080 *args, **kwds)
1081
1082 opener = RecordingOpenerDirector()
Georg Brandlb5f2e5c2006-05-08 17:36:08 +00001083 password_manager = MockPasswordManager()
1084 digest_handler = TestDigestAuthHandler(password_manager)
Georg Brandl261e2512006-05-29 20:52:54 +00001085 basic_handler = TestBasicAuthHandler(password_manager)
Georg Brandlb5f2e5c2006-05-08 17:36:08 +00001086 realm = "ACME Networks"
1087 http_handler = MockHTTPHandler(
1088 401, 'WWW-Authenticate: Basic realm="%s"\r\n\r\n' % realm)
Georg Brandl261e2512006-05-29 20:52:54 +00001089 opener.add_handler(basic_handler)
1090 opener.add_handler(digest_handler)
1091 opener.add_handler(http_handler)
1092
1093 # check basic auth isn't blocked by digest handler failing
Georg Brandlb5f2e5c2006-05-08 17:36:08 +00001094 self._test_basic_auth(opener, basic_handler, "Authorization",
1095 realm, http_handler, password_manager,
1096 "http://acme.example.com/protected",
1097 "http://acme.example.com/protected",
1098 )
Georg Brandl261e2512006-05-29 20:52:54 +00001099 # check digest was tried before basic (twice, because
1100 # _test_basic_auth called .open() twice)
1101 self.assertEqual(opener.recorded, ["digest", "basic"]*2)
Georg Brandlb5f2e5c2006-05-08 17:36:08 +00001102
Georg Brandlfa42bd72006-04-30 07:06:11 +00001103 def _test_basic_auth(self, opener, auth_handler, auth_header,
1104 realm, http_handler, password_manager,
1105 request_url, protected_url):
Christian Heimesc5f05e42008-02-23 17:40:11 +00001106 import base64
Georg Brandlfa42bd72006-04-30 07:06:11 +00001107 user, password = "wile", "coyote"
Georg Brandlfa42bd72006-04-30 07:06:11 +00001108
1109 # .add_password() fed through to password manager
1110 auth_handler.add_password(realm, request_url, user, password)
1111 self.assertEqual(realm, password_manager.realm)
1112 self.assertEqual(request_url, password_manager.url)
1113 self.assertEqual(user, password_manager.user)
1114 self.assertEqual(password, password_manager.password)
1115
1116 r = opener.open(request_url)
1117
1118 # should have asked the password manager for the username/password
1119 self.assertEqual(password_manager.target_realm, realm)
1120 self.assertEqual(password_manager.target_url, protected_url)
1121
1122 # expect one request without authorization, then one with
1123 self.assertEqual(len(http_handler.requests), 2)
1124 self.assertFalse(http_handler.requests[0].has_header(auth_header))
1125 userpass = '%s:%s' % (user, password)
1126 auth_hdr_value = 'Basic '+base64.encodestring(userpass).strip()
1127 self.assertEqual(http_handler.requests[1].get_header(auth_header),
1128 auth_hdr_value)
1129
1130 # if the password manager can't find a password, the handler won't
1131 # handle the HTTP auth error
1132 password_manager.user = password_manager.password = None
1133 http_handler.reset()
1134 r = opener.open(request_url)
1135 self.assertEqual(len(http_handler.requests), 1)
1136 self.assertFalse(http_handler.requests[0].has_header(auth_header))
1137
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001138
1139class MiscTests(unittest.TestCase):
1140
1141 def test_build_opener(self):
1142 class MyHTTPHandler(urllib2.HTTPHandler): pass
1143 class FooHandler(urllib2.BaseHandler):
1144 def foo_open(self): pass
1145 class BarHandler(urllib2.BaseHandler):
1146 def bar_open(self): pass
1147
1148 build_opener = urllib2.build_opener
1149
1150 o = build_opener(FooHandler, BarHandler)
1151 self.opener_has_handler(o, FooHandler)
1152 self.opener_has_handler(o, BarHandler)
1153
1154 # can take a mix of classes and instances
1155 o = build_opener(FooHandler, BarHandler())
1156 self.opener_has_handler(o, FooHandler)
1157 self.opener_has_handler(o, BarHandler)
1158
1159 # subclasses of default handlers override default handlers
1160 o = build_opener(MyHTTPHandler)
1161 self.opener_has_handler(o, MyHTTPHandler)
1162
1163 # a particular case of overriding: default handlers can be passed
1164 # in explicitly
1165 o = build_opener()
1166 self.opener_has_handler(o, urllib2.HTTPHandler)
1167 o = build_opener(urllib2.HTTPHandler)
1168 self.opener_has_handler(o, urllib2.HTTPHandler)
1169 o = build_opener(urllib2.HTTPHandler())
1170 self.opener_has_handler(o, urllib2.HTTPHandler)
1171
Amaury Forgeot d'Arc96865852008-04-22 21:14:41 +00001172 # Issue2670: multiple handlers sharing the same base class
1173 class MyOtherHTTPHandler(urllib2.HTTPHandler): pass
1174 o = build_opener(MyHTTPHandler, MyOtherHTTPHandler)
1175 self.opener_has_handler(o, MyHTTPHandler)
1176 self.opener_has_handler(o, MyOtherHTTPHandler)
1177
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001178 def opener_has_handler(self, opener, handler_class):
1179 for h in opener.handlers:
1180 if h.__class__ == handler_class:
1181 break
1182 else:
Benjamin Peterson5c8da862009-06-30 22:57:08 +00001183 self.assertTrue(False)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001184
Jeremy Hylton1868d7c2008-12-09 21:03:10 +00001185class RequestTests(unittest.TestCase):
1186
1187 def setUp(self):
1188 self.get = urllib2.Request("http://www.python.org/~jeremy/")
1189 self.post = urllib2.Request("http://www.python.org/~jeremy/",
1190 "data",
1191 headers={"X-Test": "test"})
1192
1193 def test_method(self):
1194 self.assertEqual("POST", self.post.get_method())
1195 self.assertEqual("GET", self.get.get_method())
1196
1197 def test_add_data(self):
Benjamin Peterson5c8da862009-06-30 22:57:08 +00001198 self.assertTrue(not self.get.has_data())
Jeremy Hylton1868d7c2008-12-09 21:03:10 +00001199 self.assertEqual("GET", self.get.get_method())
1200 self.get.add_data("spam")
Benjamin Peterson5c8da862009-06-30 22:57:08 +00001201 self.assertTrue(self.get.has_data())
Jeremy Hylton1868d7c2008-12-09 21:03:10 +00001202 self.assertEqual("POST", self.get.get_method())
1203
1204 def test_get_full_url(self):
1205 self.assertEqual("http://www.python.org/~jeremy/",
1206 self.get.get_full_url())
1207
1208 def test_selector(self):
1209 self.assertEqual("/~jeremy/", self.get.get_selector())
1210 req = urllib2.Request("http://www.python.org/")
1211 self.assertEqual("/", req.get_selector())
1212
1213 def test_get_type(self):
1214 self.assertEqual("http", self.get.get_type())
1215
1216 def test_get_host(self):
1217 self.assertEqual("www.python.org", self.get.get_host())
1218
1219 def test_get_host_unquote(self):
1220 req = urllib2.Request("http://www.%70ython.org/")
1221 self.assertEqual("www.python.org", req.get_host())
1222
1223 def test_proxy(self):
Benjamin Peterson5c8da862009-06-30 22:57:08 +00001224 self.assertTrue(not self.get.has_proxy())
Jeremy Hylton1868d7c2008-12-09 21:03:10 +00001225 self.get.set_proxy("www.perl.org", "http")
Benjamin Peterson5c8da862009-06-30 22:57:08 +00001226 self.assertTrue(self.get.has_proxy())
Jeremy Hylton1868d7c2008-12-09 21:03:10 +00001227 self.assertEqual("www.python.org", self.get.get_origin_req_host())
1228 self.assertEqual("www.perl.org", self.get.get_host())
1229
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001230
1231def test_main(verbose=None):
Georg Brandlfa42bd72006-04-30 07:06:11 +00001232 from test import test_urllib2
1233 test_support.run_doctest(test_urllib2, verbose)
Georg Brandl720096a2006-04-02 20:45:34 +00001234 test_support.run_doctest(urllib2, verbose)
Andrew M. Kuchlingbd3200f2004-06-29 13:15:46 +00001235 tests = (TrivialTests,
1236 OpenerDirectorTests,
1237 HandlerTests,
Jeremy Hylton1868d7c2008-12-09 21:03:10 +00001238 MiscTests,
1239 RequestTests)
Andrew M. Kuchlingbd3200f2004-06-29 13:15:46 +00001240 test_support.run_unittest(*tests)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001241
1242if __name__ == "__main__":
1243 test_main(verbose=True)