blob: c0366dd18fcc55c2ff4e8e3915e617a0e1fcff3d [file] [log] [blame]
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001import unittest
2from test import test_support
3
Christian Heimesc5f05e42008-02-23 17:40:11 +00004import os
Facundo Batista4f1b1ed2008-05-29 16:39:26 +00005import socket
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00006import StringIO
Jeremy Hyltone3e61042001-05-09 15:50:25 +00007
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00008import urllib2
9from urllib2 import Request, OpenerDirector
Jeremy Hyltone3e61042001-05-09 15:50:25 +000010
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000011# XXX
12# Request
13# CacheFTPHandler (hard to write)
Georg Brandlfa42bd72006-04-30 07:06:11 +000014# parse_keqv_list, parse_http_list, HTTPDigestAuthHandler
Jeremy Hyltone3e61042001-05-09 15:50:25 +000015
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000016class TrivialTests(unittest.TestCase):
17 def test_trivial(self):
18 # A couple trivial tests
Guido van Rossume2ae77b2001-10-24 20:42:55 +000019
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000020 self.assertRaises(ValueError, urllib2.urlopen, 'bogus url')
Tim Peters861adac2001-07-16 20:49:49 +000021
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000022 # XXX Name hacking to get this to work on Windows.
23 fname = os.path.abspath(urllib2.__file__).replace('\\', '/')
Senthil Kumaran2e3da142010-01-10 17:35:05 +000024
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000025 # And more hacking to get it to work on MacOS. This assumes
26 # urllib.pathname2url works, unfortunately...
27 if os.name == 'mac':
28 fname = '/' + fname.replace(':', '/')
29 elif os.name == 'riscos':
30 import string
31 fname = os.expand(fname)
32 fname = fname.translate(string.maketrans("/.", "./"))
33
Senthil Kumaran2e3da142010-01-10 17:35:05 +000034 if os.name == 'nt':
35 file_url = "file:///%s" % fname
36 else:
37 file_url = "file://%s" % fname
38
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000039 f = urllib2.urlopen(file_url)
40
41 buf = f.read()
42 f.close()
Tim Petersf5f32b42005-07-17 23:16:17 +000043
Georg Brandle1b13d22005-08-24 22:20:32 +000044 def test_parse_http_list(self):
45 tests = [('a,b,c', ['a', 'b', 'c']),
46 ('path"o,l"og"i"cal, example', ['path"o,l"og"i"cal', 'example']),
47 ('a, b, "c", "d", "e,f", g, h', ['a', 'b', '"c"', '"d"', '"e,f"', 'g', 'h']),
48 ('a="b\\"c", d="e\\,f", g="h\\\\i"', ['a="b"c"', 'd="e,f"', 'g="h\\i"'])]
49 for string, list in tests:
50 self.assertEquals(urllib2.parse_http_list(string), list)
51
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000052
Georg Brandl8c036cc2006-08-20 13:15:39 +000053def test_request_headers_dict():
54 """
55 The Request.headers dictionary is not a documented interface. It should
56 stay that way, because the complete set of headers are only accessible
57 through the .get_header(), .has_header(), .header_items() interface.
58 However, .headers pre-dates those methods, and so real code will be using
59 the dictionary.
60
61 The introduction in 2.4 of those methods was a mistake for the same reason:
62 code that previously saw all (urllib2 user)-provided headers in .headers
63 now sees only a subset (and the function interface is ugly and incomplete).
64 A better change would have been to replace .headers dict with a dict
65 subclass (or UserDict.DictMixin instance?) that preserved the .headers
66 interface and also provided access to the "unredirected" headers. It's
67 probably too late to fix that, though.
68
69
70 Check .capitalize() case normalization:
71
72 >>> url = "http://example.com"
73 >>> Request(url, headers={"Spam-eggs": "blah"}).headers["Spam-eggs"]
74 'blah'
75 >>> Request(url, headers={"spam-EggS": "blah"}).headers["Spam-eggs"]
76 'blah'
77
78 Currently, Request(url, "Spam-eggs").headers["Spam-Eggs"] raises KeyError,
79 but that could be changed in future.
80
81 """
82
83def test_request_headers_methods():
84 """
85 Note the case normalization of header names here, to .capitalize()-case.
86 This should be preserved for backwards-compatibility. (In the HTTP case,
87 normalization to .title()-case is done by urllib2 before sending headers to
88 httplib).
89
90 >>> url = "http://example.com"
91 >>> r = Request(url, headers={"Spam-eggs": "blah"})
92 >>> r.has_header("Spam-eggs")
93 True
94 >>> r.header_items()
95 [('Spam-eggs', 'blah')]
96 >>> r.add_header("Foo-Bar", "baz")
97 >>> items = r.header_items()
98 >>> items.sort()
99 >>> items
100 [('Foo-bar', 'baz'), ('Spam-eggs', 'blah')]
101
102 Note that e.g. r.has_header("spam-EggS") is currently False, and
103 r.get_header("spam-EggS") returns None, but that could be changed in
104 future.
105
106 >>> r.has_header("Not-there")
107 False
108 >>> print r.get_header("Not-there")
109 None
110 >>> r.get_header("Not-there", "default")
111 'default'
112
113 """
114
115
Georg Brandlfa42bd72006-04-30 07:06:11 +0000116def test_password_manager(self):
117 """
118 >>> mgr = urllib2.HTTPPasswordMgr()
119 >>> add = mgr.add_password
120 >>> add("Some Realm", "http://example.com/", "joe", "password")
121 >>> add("Some Realm", "http://example.com/ni", "ni", "ni")
122 >>> add("c", "http://example.com/foo", "foo", "ni")
123 >>> add("c", "http://example.com/bar", "bar", "nini")
124 >>> add("b", "http://example.com/", "first", "blah")
125 >>> add("b", "http://example.com/", "second", "spam")
126 >>> add("a", "http://example.com", "1", "a")
127 >>> add("Some Realm", "http://c.example.com:3128", "3", "c")
128 >>> add("Some Realm", "d.example.com", "4", "d")
129 >>> add("Some Realm", "e.example.com:3128", "5", "e")
130
131 >>> mgr.find_user_password("Some Realm", "example.com")
132 ('joe', 'password')
133 >>> mgr.find_user_password("Some Realm", "http://example.com")
134 ('joe', 'password')
135 >>> mgr.find_user_password("Some Realm", "http://example.com/")
136 ('joe', 'password')
137 >>> mgr.find_user_password("Some Realm", "http://example.com/spam")
138 ('joe', 'password')
139 >>> mgr.find_user_password("Some Realm", "http://example.com/spam/spam")
140 ('joe', 'password')
141 >>> mgr.find_user_password("c", "http://example.com/foo")
142 ('foo', 'ni')
143 >>> mgr.find_user_password("c", "http://example.com/bar")
144 ('bar', 'nini')
145
Georg Brandl2b330372006-05-28 20:23:12 +0000146 Actually, this is really undefined ATM
147## Currently, we use the highest-level path where more than one match:
Georg Brandlfa42bd72006-04-30 07:06:11 +0000148
Georg Brandl2b330372006-05-28 20:23:12 +0000149## >>> mgr.find_user_password("Some Realm", "http://example.com/ni")
150## ('joe', 'password')
Georg Brandlfa42bd72006-04-30 07:06:11 +0000151
152 Use latest add_password() in case of conflict:
153
154 >>> mgr.find_user_password("b", "http://example.com/")
155 ('second', 'spam')
156
157 No special relationship between a.example.com and example.com:
158
159 >>> mgr.find_user_password("a", "http://example.com/")
160 ('1', 'a')
161 >>> mgr.find_user_password("a", "http://a.example.com/")
162 (None, None)
163
164 Ports:
165
166 >>> mgr.find_user_password("Some Realm", "c.example.com")
167 (None, None)
168 >>> mgr.find_user_password("Some Realm", "c.example.com:3128")
169 ('3', 'c')
170 >>> mgr.find_user_password("Some Realm", "http://c.example.com:3128")
171 ('3', 'c')
172 >>> mgr.find_user_password("Some Realm", "d.example.com")
173 ('4', 'd')
174 >>> mgr.find_user_password("Some Realm", "e.example.com:3128")
175 ('5', 'e')
176
177 """
178 pass
179
180
Georg Brandl2b330372006-05-28 20:23:12 +0000181def test_password_manager_default_port(self):
182 """
183 >>> mgr = urllib2.HTTPPasswordMgr()
184 >>> add = mgr.add_password
185
186 The point to note here is that we can't guess the default port if there's
187 no scheme. This applies to both add_password and find_user_password.
188
189 >>> add("f", "http://g.example.com:80", "10", "j")
190 >>> add("g", "http://h.example.com", "11", "k")
191 >>> add("h", "i.example.com:80", "12", "l")
192 >>> add("i", "j.example.com", "13", "m")
193 >>> mgr.find_user_password("f", "g.example.com:100")
194 (None, None)
195 >>> mgr.find_user_password("f", "g.example.com:80")
196 ('10', 'j')
197 >>> mgr.find_user_password("f", "g.example.com")
198 (None, None)
199 >>> mgr.find_user_password("f", "http://g.example.com:100")
200 (None, None)
201 >>> mgr.find_user_password("f", "http://g.example.com:80")
202 ('10', 'j')
203 >>> mgr.find_user_password("f", "http://g.example.com")
204 ('10', 'j')
205 >>> mgr.find_user_password("g", "h.example.com")
206 ('11', 'k')
207 >>> mgr.find_user_password("g", "h.example.com:80")
208 ('11', 'k')
209 >>> mgr.find_user_password("g", "http://h.example.com:80")
210 ('11', 'k')
211 >>> mgr.find_user_password("h", "i.example.com")
212 (None, None)
213 >>> mgr.find_user_password("h", "i.example.com:80")
214 ('12', 'l')
215 >>> mgr.find_user_password("h", "http://i.example.com:80")
216 ('12', 'l')
217 >>> mgr.find_user_password("i", "j.example.com")
218 ('13', 'm')
219 >>> mgr.find_user_password("i", "j.example.com:80")
220 (None, None)
221 >>> mgr.find_user_password("i", "http://j.example.com")
222 ('13', 'm')
223 >>> mgr.find_user_password("i", "http://j.example.com:80")
224 (None, None)
225
226 """
227
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000228class MockOpener:
229 addheaders = []
Senthil Kumaran5fee4602009-07-19 02:43:43 +0000230 def open(self, req, data=None,timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
231 self.req, self.data, self.timeout = req, data, timeout
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000232 def error(self, proto, *args):
233 self.proto, self.args = proto, args
234
235class MockFile:
236 def read(self, count=None): pass
237 def readline(self, count=None): pass
238 def close(self): pass
239
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000240class MockHeaders(dict):
241 def getheaders(self, name):
242 return self.values()
243
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000244class MockResponse(StringIO.StringIO):
245 def __init__(self, code, msg, headers, data, url=None):
246 StringIO.StringIO.__init__(self, data)
247 self.code, self.msg, self.headers, self.url = code, msg, headers, url
248 def info(self):
249 return self.headers
250 def geturl(self):
251 return self.url
252
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000253class MockCookieJar:
254 def add_cookie_header(self, request):
255 self.ach_req = request
256 def extract_cookies(self, response, request):
257 self.ec_req, self.ec_r = request, response
258
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000259class FakeMethod:
260 def __init__(self, meth_name, action, handle):
261 self.meth_name = meth_name
262 self.handle = handle
263 self.action = action
264 def __call__(self, *args):
265 return self.handle(self.meth_name, self.action, *args)
266
Senthil Kumaran7713acf2009-12-20 06:05:13 +0000267class MockHTTPResponse:
268 def __init__(self, fp, msg, status, reason):
269 self.fp = fp
270 self.msg = msg
271 self.status = status
272 self.reason = reason
273 def read(self):
274 return ''
275
276class MockHTTPClass:
277 def __init__(self):
278 self.req_headers = []
279 self.data = None
280 self.raise_on_endheaders = False
281 self._tunnel_headers = {}
282
283 def __call__(self, host, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
284 self.host = host
285 self.timeout = timeout
286 return self
287
288 def set_debuglevel(self, level):
289 self.level = level
290
291 def set_tunnel(self, host, port=None, headers=None):
292 self._tunnel_host = host
293 self._tunnel_port = port
294 if headers:
295 self._tunnel_headers = headers
296 else:
297 self._tunnel_headers.clear()
Benjamin Peterson32935932009-12-24 01:09:53 +0000298 def request(self, method, url, body=None, headers=None):
Senthil Kumaran7713acf2009-12-20 06:05:13 +0000299 self.method = method
300 self.selector = url
Benjamin Peterson32935932009-12-24 01:09:53 +0000301 if headers is not None:
302 self.req_headers += headers.items()
Senthil Kumaran7713acf2009-12-20 06:05:13 +0000303 self.req_headers.sort()
304 if body:
305 self.data = body
306 if self.raise_on_endheaders:
307 import socket
308 raise socket.error()
309 def getresponse(self):
310 return MockHTTPResponse(MockFile(), {}, 200, "OK")
311
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000312class MockHandler:
Georg Brandlfa42bd72006-04-30 07:06:11 +0000313 # useful for testing handler machinery
314 # see add_ordered_mock_handlers() docstring
Georg Brandl720096a2006-04-02 20:45:34 +0000315 handler_order = 500
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000316 def __init__(self, methods):
317 self._define_methods(methods)
318 def _define_methods(self, methods):
319 for spec in methods:
320 if len(spec) == 2: name, action = spec
321 else: name, action = spec, None
322 meth = FakeMethod(name, action, self.handle)
323 setattr(self.__class__, name, meth)
324 def handle(self, fn_name, action, *args, **kwds):
325 self.parent.calls.append((self, fn_name, args, kwds))
326 if action is None:
327 return None
328 elif action == "return self":
329 return self
330 elif action == "return response":
331 res = MockResponse(200, "OK", {}, "")
332 return res
333 elif action == "return request":
334 return Request("http://blah/")
335 elif action.startswith("error"):
336 code = action[action.rfind(" ")+1:]
337 try:
338 code = int(code)
339 except ValueError:
340 pass
341 res = MockResponse(200, "OK", {}, "")
342 return self.parent.error("http", args[0], res, code, "", {})
343 elif action == "raise":
344 raise urllib2.URLError("blah")
345 assert False
346 def close(self): pass
347 def add_parent(self, parent):
348 self.parent = parent
349 self.parent.calls = []
350 def __lt__(self, other):
351 if not hasattr(other, "handler_order"):
352 # No handler_order, leave in original order. Yuck.
353 return True
354 return self.handler_order < other.handler_order
355
356def add_ordered_mock_handlers(opener, meth_spec):
357 """Create MockHandlers and add them to an OpenerDirector.
358
359 meth_spec: list of lists of tuples and strings defining methods to define
360 on handlers. eg:
361
362 [["http_error", "ftp_open"], ["http_open"]]
363
364 defines methods .http_error() and .ftp_open() on one handler, and
365 .http_open() on another. These methods just record their arguments and
366 return None. Using a tuple instead of a string causes the method to
367 perform some action (see MockHandler.handle()), eg:
368
369 [["http_error"], [("http_open", "return request")]]
370
371 defines .http_error() on one handler (which simply returns None), and
372 .http_open() on another handler, which returns a Request object.
373
374 """
375 handlers = []
376 count = 0
377 for meths in meth_spec:
378 class MockHandlerSubclass(MockHandler): pass
379 h = MockHandlerSubclass(meths)
Georg Brandl720096a2006-04-02 20:45:34 +0000380 h.handler_order += count
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000381 h.add_parent(opener)
382 count = count + 1
383 handlers.append(h)
384 opener.add_handler(h)
385 return handlers
386
Georg Brandlfa42bd72006-04-30 07:06:11 +0000387def build_test_opener(*handler_instances):
388 opener = OpenerDirector()
389 for h in handler_instances:
390 opener.add_handler(h)
391 return opener
392
393class MockHTTPHandler(urllib2.BaseHandler):
394 # useful for testing redirections and auth
395 # sends supplied headers and code as first response
396 # sends 200 OK as second response
397 def __init__(self, code, headers):
398 self.code = code
399 self.headers = headers
400 self.reset()
401 def reset(self):
402 self._count = 0
403 self.requests = []
404 def http_open(self, req):
405 import mimetools, httplib, copy
406 from StringIO import StringIO
407 self.requests.append(copy.deepcopy(req))
408 if self._count == 0:
409 self._count = self._count + 1
410 name = httplib.responses[self.code]
411 msg = mimetools.Message(StringIO(self.headers))
412 return self.parent.error(
413 "http", req, MockFile(), self.code, name, msg)
414 else:
415 self.req = req
416 msg = mimetools.Message(StringIO("\r\n\r\n"))
417 return MockResponse(200, "OK", msg, "", req.get_full_url())
418
Senthil Kumaran7713acf2009-12-20 06:05:13 +0000419class MockHTTPSHandler(urllib2.AbstractHTTPHandler):
420 # Useful for testing the Proxy-Authorization request by verifying the
421 # properties of httpcon
Benjamin Peterson32935932009-12-24 01:09:53 +0000422
423 def __init__(self):
424 urllib2.AbstractHTTPHandler.__init__(self)
425 self.httpconn = MockHTTPClass()
426
Senthil Kumaran7713acf2009-12-20 06:05:13 +0000427 def https_open(self, req):
428 return self.do_open(self.httpconn, req)
429
Georg Brandlfa42bd72006-04-30 07:06:11 +0000430class MockPasswordManager:
431 def add_password(self, realm, uri, user, password):
432 self.realm = realm
433 self.url = uri
434 self.user = user
435 self.password = password
436 def find_user_password(self, realm, authuri):
437 self.target_realm = realm
438 self.target_url = authuri
439 return self.user, self.password
440
441
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000442class OpenerDirectorTests(unittest.TestCase):
443
Georg Brandlf91149e2007-07-12 08:05:45 +0000444 def test_add_non_handler(self):
445 class NonHandler(object):
446 pass
447 self.assertRaises(TypeError,
448 OpenerDirector().add_handler, NonHandler())
449
Georg Brandl261e2512006-05-29 20:52:54 +0000450 def test_badly_named_methods(self):
451 # test work-around for three methods that accidentally follow the
452 # naming conventions for handler methods
453 # (*_open() / *_request() / *_response())
454
455 # These used to call the accidentally-named methods, causing a
456 # TypeError in real code; here, returning self from these mock
457 # methods would either cause no exception, or AttributeError.
458
459 from urllib2 import URLError
460
461 o = OpenerDirector()
462 meth_spec = [
463 [("do_open", "return self"), ("proxy_open", "return self")],
464 [("redirect_request", "return self")],
465 ]
466 handlers = add_ordered_mock_handlers(o, meth_spec)
467 o.add_handler(urllib2.UnknownHandler())
468 for scheme in "do", "proxy", "redirect":
469 self.assertRaises(URLError, o.open, scheme+"://example.com/")
470
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000471 def test_handled(self):
472 # handler returning non-None means no more handlers will be called
473 o = OpenerDirector()
474 meth_spec = [
475 ["http_open", "ftp_open", "http_error_302"],
476 ["ftp_open"],
477 [("http_open", "return self")],
478 [("http_open", "return self")],
479 ]
480 handlers = add_ordered_mock_handlers(o, meth_spec)
481
482 req = Request("http://example.com/")
483 r = o.open(req)
484 # Second .http_open() gets called, third doesn't, since second returned
485 # non-None. Handlers without .http_open() never get any methods called
486 # on them.
487 # In fact, second mock handler defining .http_open() returns self
488 # (instead of response), which becomes the OpenerDirector's return
489 # value.
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000490 self.assertEqual(r, handlers[2])
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000491 calls = [(handlers[0], "http_open"), (handlers[2], "http_open")]
492 for expected, got in zip(calls, o.calls):
493 handler, name, args, kwds = got
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000494 self.assertEqual((handler, name), expected)
495 self.assertEqual(args, (req,))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000496
497 def test_handler_order(self):
498 o = OpenerDirector()
499 handlers = []
500 for meths, handler_order in [
501 ([("http_open", "return self")], 500),
502 (["http_open"], 0),
503 ]:
504 class MockHandlerSubclass(MockHandler): pass
505 h = MockHandlerSubclass(meths)
506 h.handler_order = handler_order
507 handlers.append(h)
508 o.add_handler(h)
509
510 r = o.open("http://example.com/")
511 # handlers called in reverse order, thanks to their sort order
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000512 self.assertEqual(o.calls[0][0], handlers[1])
513 self.assertEqual(o.calls[1][0], handlers[0])
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000514
515 def test_raise(self):
516 # raising URLError stops processing of request
517 o = OpenerDirector()
518 meth_spec = [
519 [("http_open", "raise")],
520 [("http_open", "return self")],
521 ]
522 handlers = add_ordered_mock_handlers(o, meth_spec)
523
524 req = Request("http://example.com/")
525 self.assertRaises(urllib2.URLError, o.open, req)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000526 self.assertEqual(o.calls, [(handlers[0], "http_open", (req,), {})])
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000527
528## def test_error(self):
529## # XXX this doesn't actually seem to be used in standard library,
530## # but should really be tested anyway...
531
532 def test_http_error(self):
533 # XXX http_error_default
534 # http errors are a special case
535 o = OpenerDirector()
536 meth_spec = [
537 [("http_open", "error 302")],
538 [("http_error_400", "raise"), "http_open"],
539 [("http_error_302", "return response"), "http_error_303",
540 "http_error"],
541 [("http_error_302")],
542 ]
543 handlers = add_ordered_mock_handlers(o, meth_spec)
544
545 class Unknown:
546 def __eq__(self, other): return True
547
548 req = Request("http://example.com/")
549 r = o.open(req)
550 assert len(o.calls) == 2
551 calls = [(handlers[0], "http_open", (req,)),
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000552 (handlers[2], "http_error_302",
553 (req, Unknown(), 302, "", {}))]
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000554 for expected, got in zip(calls, o.calls):
555 handler, method_name, args = expected
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000556 self.assertEqual((handler, method_name), got[:2])
557 self.assertEqual(args, got[2])
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000558
559 def test_processors(self):
560 # *_request / *_response methods get called appropriately
561 o = OpenerDirector()
562 meth_spec = [
563 [("http_request", "return request"),
564 ("http_response", "return response")],
565 [("http_request", "return request"),
566 ("http_response", "return response")],
567 ]
568 handlers = add_ordered_mock_handlers(o, meth_spec)
569
570 req = Request("http://example.com/")
571 r = o.open(req)
572 # processor methods are called on *all* handlers that define them,
573 # not just the first handler that handles the request
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000574 calls = [
575 (handlers[0], "http_request"), (handlers[1], "http_request"),
576 (handlers[0], "http_response"), (handlers[1], "http_response")]
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000577
578 for i, (handler, name, args, kwds) in enumerate(o.calls):
579 if i < 2:
580 # *_request
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000581 self.assertEqual((handler, name), calls[i])
582 self.assertEqual(len(args), 1)
Ezio Melottib0f5adc2010-01-24 16:58:36 +0000583 self.assertIsInstance(args[0], Request)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000584 else:
585 # *_response
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000586 self.assertEqual((handler, name), calls[i])
587 self.assertEqual(len(args), 2)
Ezio Melottib0f5adc2010-01-24 16:58:36 +0000588 self.assertIsInstance(args[0], Request)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000589 # response from opener.open is None, because there's no
590 # handler that defines http_open to handle it
Benjamin Peterson5c8da862009-06-30 22:57:08 +0000591 self.assertTrue(args[1] is None or
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000592 isinstance(args[1], MockResponse))
593
594
Tim Peters58eb11c2004-01-18 20:29:55 +0000595def sanepathname2url(path):
596 import urllib
597 urlpath = urllib.pathname2url(path)
598 if os.name == "nt" and urlpath.startswith("///"):
599 urlpath = urlpath[2:]
600 # XXX don't ask me about the mac...
601 return urlpath
602
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000603class HandlerTests(unittest.TestCase):
604
605 def test_ftp(self):
606 class MockFTPWrapper:
607 def __init__(self, data): self.data = data
608 def retrfile(self, filename, filetype):
609 self.filename, self.filetype = filename, filetype
610 return StringIO.StringIO(self.data), len(self.data)
611
612 class NullFTPHandler(urllib2.FTPHandler):
613 def __init__(self, data): self.data = data
Facundo Batista4f1b1ed2008-05-29 16:39:26 +0000614 def connect_ftp(self, user, passwd, host, port, dirs,
615 timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000616 self.user, self.passwd = user, passwd
617 self.host, self.port = host, port
618 self.dirs = dirs
619 self.ftpwrapper = MockFTPWrapper(self.data)
620 return self.ftpwrapper
621
Facundo Batista4f1b1ed2008-05-29 16:39:26 +0000622 import ftplib
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000623 data = "rheum rhaponicum"
624 h = NullFTPHandler(data)
625 o = h.parent = MockOpener()
626
627 for url, host, port, type_, dirs, filename, mimetype in [
628 ("ftp://localhost/foo/bar/baz.html",
629 "localhost", ftplib.FTP_PORT, "I",
630 ["foo", "bar"], "baz.html", "text/html"),
Kurt B. Kaiser3f7cb5d2004-07-11 17:14:13 +0000631 ("ftp://localhost:80/foo/bar/",
632 "localhost", 80, "D",
633 ["foo", "bar"], "", None),
634 ("ftp://localhost/baz.gif;type=a",
635 "localhost", ftplib.FTP_PORT, "A",
636 [], "baz.gif", None), # XXX really this should guess image/gif
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000637 ]:
Facundo Batista10951d52007-06-06 17:15:23 +0000638 req = Request(url)
639 req.timeout = None
640 r = h.ftp_open(req)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000641 # ftp authentication not yet implemented by FTPHandler
Benjamin Peterson5c8da862009-06-30 22:57:08 +0000642 self.assertTrue(h.user == h.passwd == "")
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000643 self.assertEqual(h.host, socket.gethostbyname(host))
644 self.assertEqual(h.port, port)
645 self.assertEqual(h.dirs, dirs)
646 self.assertEqual(h.ftpwrapper.filename, filename)
647 self.assertEqual(h.ftpwrapper.filetype, type_)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000648 headers = r.info()
Kurt B. Kaiser3f7cb5d2004-07-11 17:14:13 +0000649 self.assertEqual(headers.get("Content-type"), mimetype)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000650 self.assertEqual(int(headers["Content-length"]), len(data))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000651
652 def test_file(self):
Christian Heimesc5f05e42008-02-23 17:40:11 +0000653 import rfc822, socket
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000654 h = urllib2.FileHandler()
655 o = h.parent = MockOpener()
656
Tim Peters58eb11c2004-01-18 20:29:55 +0000657 TESTFN = test_support.TESTFN
658 urlpath = sanepathname2url(os.path.abspath(TESTFN))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000659 towrite = "hello, world\n"
Georg Brandldd2245f2006-03-31 17:18:06 +0000660 urls = [
Tim Peters58eb11c2004-01-18 20:29:55 +0000661 "file://localhost%s" % urlpath,
662 "file://%s" % urlpath,
663 "file://%s%s" % (socket.gethostbyname('localhost'), urlpath),
Georg Brandldd2245f2006-03-31 17:18:06 +0000664 ]
665 try:
Tim Peters480725d2006-04-03 02:46:44 +0000666 localaddr = socket.gethostbyname(socket.gethostname())
Georg Brandldd2245f2006-03-31 17:18:06 +0000667 except socket.gaierror:
668 localaddr = ''
669 if localaddr:
670 urls.append("file://%s%s" % (localaddr, urlpath))
Tim Peters480725d2006-04-03 02:46:44 +0000671
Georg Brandldd2245f2006-03-31 17:18:06 +0000672 for url in urls:
Tim Peters58eb11c2004-01-18 20:29:55 +0000673 f = open(TESTFN, "wb")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000674 try:
675 try:
676 f.write(towrite)
677 finally:
678 f.close()
679
680 r = h.file_open(Request(url))
681 try:
682 data = r.read()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000683 headers = r.info()
684 newurl = r.geturl()
685 finally:
686 r.close()
Tim Peters58eb11c2004-01-18 20:29:55 +0000687 stats = os.stat(TESTFN)
688 modified = rfc822.formatdate(stats.st_mtime)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000689 finally:
690 os.remove(TESTFN)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000691 self.assertEqual(data, towrite)
692 self.assertEqual(headers["Content-type"], "text/plain")
693 self.assertEqual(headers["Content-length"], "13")
Tim Peters58eb11c2004-01-18 20:29:55 +0000694 self.assertEqual(headers["Last-modified"], modified)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000695
696 for url in [
Tim Peters58eb11c2004-01-18 20:29:55 +0000697 "file://localhost:80%s" % urlpath,
Georg Brandlceede5c2007-03-13 08:14:27 +0000698 "file:///file_does_not_exist.txt",
699 "file://%s:80%s/%s" % (socket.gethostbyname('localhost'),
700 os.getcwd(), TESTFN),
701 "file://somerandomhost.ontheinternet.com%s/%s" %
702 (os.getcwd(), TESTFN),
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000703 ]:
704 try:
Tim Peters58eb11c2004-01-18 20:29:55 +0000705 f = open(TESTFN, "wb")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000706 try:
707 f.write(towrite)
708 finally:
709 f.close()
710
711 self.assertRaises(urllib2.URLError,
712 h.file_open, Request(url))
713 finally:
714 os.remove(TESTFN)
715
716 h = urllib2.FileHandler()
717 o = h.parent = MockOpener()
718 # XXXX why does // mean ftp (and /// mean not ftp!), and where
719 # is file: scheme specified? I think this is really a bug, and
720 # what was intended was to distinguish between URLs like:
721 # file:/blah.txt (a file)
722 # file://localhost/blah.txt (a file)
723 # file:///blah.txt (a file)
724 # file://ftp.example.com/blah.txt (an ftp URL)
725 for url, ftp in [
726 ("file://ftp.example.com//foo.txt", True),
727 ("file://ftp.example.com///foo.txt", False),
728# XXXX bug: fails with OSError, should be URLError
729 ("file://ftp.example.com/foo.txt", False),
730 ]:
731 req = Request(url)
732 try:
733 h.file_open(req)
734 # XXXX remove OSError when bug fixed
735 except (urllib2.URLError, OSError):
Benjamin Peterson5c8da862009-06-30 22:57:08 +0000736 self.assertTrue(not ftp)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000737 else:
Benjamin Peterson5c8da862009-06-30 22:57:08 +0000738 self.assertTrue(o.req is req)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000739 self.assertEqual(req.type, "ftp")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000740
741 def test_http(self):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000742
743 h = urllib2.AbstractHTTPHandler()
744 o = h.parent = MockOpener()
745
746 url = "http://example.com/"
747 for method, data in [("GET", None), ("POST", "blah")]:
748 req = Request(url, data, {"Foo": "bar"})
Facundo Batista10951d52007-06-06 17:15:23 +0000749 req.timeout = None
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000750 req.add_unredirected_header("Spam", "eggs")
751 http = MockHTTPClass()
752 r = h.do_open(http, req)
753
754 # result attributes
755 r.read; r.readline # wrapped MockFile methods
756 r.info; r.geturl # addinfourl methods
757 r.code, r.msg == 200, "OK" # added from MockHTTPClass.getreply()
758 hdrs = r.info()
759 hdrs.get; hdrs.has_key # r.info() gives dict from .getreply()
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000760 self.assertEqual(r.geturl(), url)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000761
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000762 self.assertEqual(http.host, "example.com")
763 self.assertEqual(http.level, 0)
764 self.assertEqual(http.method, method)
765 self.assertEqual(http.selector, "/")
766 self.assertEqual(http.req_headers,
Jeremy Hyltonb3ee6f92004-02-24 19:40:35 +0000767 [("Connection", "close"),
768 ("Foo", "bar"), ("Spam", "eggs")])
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000769 self.assertEqual(http.data, data)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000770
771 # check socket.error converted to URLError
772 http.raise_on_endheaders = True
773 self.assertRaises(urllib2.URLError, h.do_open, http, req)
774
775 # check adding of standard headers
776 o.addheaders = [("Spam", "eggs")]
777 for data in "", None: # POST, GET
778 req = Request("http://example.com/", data)
779 r = MockResponse(200, "OK", {}, "")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000780 newreq = h.do_request_(req)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000781 if data is None: # GET
Ezio Melottiaa980582010-01-23 23:04:36 +0000782 self.assertNotIn("Content-length", req.unredirected_hdrs)
783 self.assertNotIn("Content-type", req.unredirected_hdrs)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000784 else: # POST
Georg Brandl8c036cc2006-08-20 13:15:39 +0000785 self.assertEqual(req.unredirected_hdrs["Content-length"], "0")
786 self.assertEqual(req.unredirected_hdrs["Content-type"],
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000787 "application/x-www-form-urlencoded")
788 # XXX the details of Host could be better tested
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000789 self.assertEqual(req.unredirected_hdrs["Host"], "example.com")
790 self.assertEqual(req.unredirected_hdrs["Spam"], "eggs")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000791
792 # don't clobber existing headers
793 req.add_unredirected_header("Content-length", "foo")
794 req.add_unredirected_header("Content-type", "bar")
795 req.add_unredirected_header("Host", "baz")
796 req.add_unredirected_header("Spam", "foo")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000797 newreq = h.do_request_(req)
Georg Brandl8c036cc2006-08-20 13:15:39 +0000798 self.assertEqual(req.unredirected_hdrs["Content-length"], "foo")
799 self.assertEqual(req.unredirected_hdrs["Content-type"], "bar")
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000800 self.assertEqual(req.unredirected_hdrs["Host"], "baz")
801 self.assertEqual(req.unredirected_hdrs["Spam"], "foo")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000802
Facundo Batistaeb90b782008-08-16 14:44:07 +0000803 def test_http_doubleslash(self):
804 # Checks that the presence of an unnecessary double slash in a url doesn't break anything
805 # Previously, a double slash directly after the host could cause incorrect parsing of the url
806 h = urllib2.AbstractHTTPHandler()
807 o = h.parent = MockOpener()
808
809 data = ""
810 ds_urls = [
811 "http://example.com/foo/bar/baz.html",
812 "http://example.com//foo/bar/baz.html",
813 "http://example.com/foo//bar/baz.html",
814 "http://example.com/foo/bar//baz.html",
815 ]
816
817 for ds_url in ds_urls:
818 ds_req = Request(ds_url, data)
819
820 # Check whether host is determined correctly if there is no proxy
821 np_ds_req = h.do_request_(ds_req)
822 self.assertEqual(np_ds_req.unredirected_hdrs["Host"],"example.com")
823
824 # Check whether host is determined correctly if there is a proxy
825 ds_req.set_proxy("someproxy:3128",None)
826 p_ds_req = h.do_request_(ds_req)
827 self.assertEqual(p_ds_req.unredirected_hdrs["Host"],"example.com")
828
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000829 def test_errors(self):
830 h = urllib2.HTTPErrorProcessor()
831 o = h.parent = MockOpener()
832
833 url = "http://example.com/"
834 req = Request(url)
Facundo Batista9fab9f12007-04-23 17:08:31 +0000835 # all 2xx are passed through
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000836 r = MockResponse(200, "OK", {}, "", url)
837 newr = h.http_response(req, r)
Benjamin Peterson5c8da862009-06-30 22:57:08 +0000838 self.assertTrue(r is newr)
839 self.assertTrue(not hasattr(o, "proto")) # o.error not called
Facundo Batista9fab9f12007-04-23 17:08:31 +0000840 r = MockResponse(202, "Accepted", {}, "", url)
841 newr = h.http_response(req, r)
Benjamin Peterson5c8da862009-06-30 22:57:08 +0000842 self.assertTrue(r is newr)
843 self.assertTrue(not hasattr(o, "proto")) # o.error not called
Facundo Batista9fab9f12007-04-23 17:08:31 +0000844 r = MockResponse(206, "Partial content", {}, "", url)
845 newr = h.http_response(req, r)
Benjamin Peterson5c8da862009-06-30 22:57:08 +0000846 self.assertTrue(r is newr)
847 self.assertTrue(not hasattr(o, "proto")) # o.error not called
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000848 # anything else calls o.error (and MockOpener returns None, here)
Facundo Batista9fab9f12007-04-23 17:08:31 +0000849 r = MockResponse(502, "Bad gateway", {}, "", url)
Benjamin Peterson5c8da862009-06-30 22:57:08 +0000850 self.assertTrue(h.http_response(req, r) is None)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000851 self.assertEqual(o.proto, "http") # o.error called
Facundo Batista9fab9f12007-04-23 17:08:31 +0000852 self.assertEqual(o.args, (req, r, 502, "Bad gateway", {}))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000853
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000854 def test_cookies(self):
855 cj = MockCookieJar()
856 h = urllib2.HTTPCookieProcessor(cj)
857 o = h.parent = MockOpener()
858
859 req = Request("http://example.com/")
860 r = MockResponse(200, "OK", {}, "")
861 newreq = h.http_request(req)
Benjamin Peterson5c8da862009-06-30 22:57:08 +0000862 self.assertTrue(cj.ach_req is req is newreq)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000863 self.assertEquals(req.get_origin_req_host(), "example.com")
Benjamin Peterson5c8da862009-06-30 22:57:08 +0000864 self.assertTrue(not req.is_unverifiable())
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000865 newr = h.http_response(req, r)
Benjamin Peterson5c8da862009-06-30 22:57:08 +0000866 self.assertTrue(cj.ec_req is req)
867 self.assertTrue(cj.ec_r is r is newr)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000868
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000869 def test_redirect(self):
870 from_url = "http://example.com/a.html"
871 to_url = "http://example.com/b.html"
872 h = urllib2.HTTPRedirectHandler()
873 o = h.parent = MockOpener()
874
875 # ordinary redirect behaviour
876 for code in 301, 302, 303, 307:
877 for data in None, "blah\nblah\n":
878 method = getattr(h, "http_error_%s" % code)
879 req = Request(from_url, data)
880 req.add_header("Nonsense", "viking=withhold")
Senthil Kumaran5fee4602009-07-19 02:43:43 +0000881 req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
Facundo Batista86371d62008-02-07 19:06:52 +0000882 if data is not None:
883 req.add_header("Content-Length", str(len(data)))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000884 req.add_unredirected_header("Spam", "spam")
885 try:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000886 method(req, MockFile(), code, "Blah",
887 MockHeaders({"location": to_url}))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000888 except urllib2.HTTPError:
889 # 307 in response to POST requires user OK
Benjamin Peterson5c8da862009-06-30 22:57:08 +0000890 self.assertTrue(code == 307 and data is not None)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000891 self.assertEqual(o.req.get_full_url(), to_url)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000892 try:
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000893 self.assertEqual(o.req.get_method(), "GET")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000894 except AttributeError:
Benjamin Peterson5c8da862009-06-30 22:57:08 +0000895 self.assertTrue(not o.req.has_data())
Facundo Batista86371d62008-02-07 19:06:52 +0000896
897 # now it's a GET, there should not be headers regarding content
898 # (possibly dragged from before being a POST)
899 headers = [x.lower() for x in o.req.headers]
Ezio Melottiaa980582010-01-23 23:04:36 +0000900 self.assertNotIn("content-length", headers)
901 self.assertNotIn("content-type", headers)
Facundo Batista86371d62008-02-07 19:06:52 +0000902
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000903 self.assertEqual(o.req.headers["Nonsense"],
904 "viking=withhold")
Ezio Melottiaa980582010-01-23 23:04:36 +0000905 self.assertNotIn("Spam", o.req.headers)
906 self.assertNotIn("Spam", o.req.unredirected_hdrs)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000907
908 # loop detection
909 req = Request(from_url)
Senthil Kumaran5fee4602009-07-19 02:43:43 +0000910 req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000911 def redirect(h, req, url=to_url):
912 h.http_error_302(req, MockFile(), 302, "Blah",
913 MockHeaders({"location": url}))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000914 # Note that the *original* request shares the same record of
915 # redirections with the sub-requests caused by the redirections.
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000916
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000917 # detect infinite loop redirect of a URL to itself
918 req = Request(from_url, origin_req_host="example.com")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000919 count = 0
Senthil Kumaran5fee4602009-07-19 02:43:43 +0000920 req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000921 try:
922 while 1:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000923 redirect(h, req, "http://example.com/")
924 count = count + 1
925 except urllib2.HTTPError:
926 # don't stop until max_repeats, because cookies may introduce state
927 self.assertEqual(count, urllib2.HTTPRedirectHandler.max_repeats)
928
929 # detect endless non-repeating chain of redirects
930 req = Request(from_url, origin_req_host="example.com")
931 count = 0
Senthil Kumaran5fee4602009-07-19 02:43:43 +0000932 req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000933 try:
934 while 1:
935 redirect(h, req, "http://example.com/%d" % count)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000936 count = count + 1
937 except urllib2.HTTPError:
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000938 self.assertEqual(count,
939 urllib2.HTTPRedirectHandler.max_redirections)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000940
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000941 def test_cookie_redirect(self):
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000942 # cookies shouldn't leak into redirected requests
943 from cookielib import CookieJar
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000944
Neal Norwitzb902f4e2006-04-03 04:45:34 +0000945 from test.test_cookielib import interact_netscape
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000946
947 cj = CookieJar()
948 interact_netscape(cj, "http://www.example.com/", "spam=eggs")
Georg Brandlfa42bd72006-04-30 07:06:11 +0000949 hh = MockHTTPHandler(302, "Location: http://www.cracker.com/\r\n\r\n")
950 hdeh = urllib2.HTTPDefaultErrorHandler()
951 hrh = urllib2.HTTPRedirectHandler()
952 cp = urllib2.HTTPCookieProcessor(cj)
953 o = build_test_opener(hh, hdeh, hrh, cp)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000954 o.open("http://www.example.com/")
Benjamin Peterson5c8da862009-06-30 22:57:08 +0000955 self.assertTrue(not hh.req.has_header("Cookie"))
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000956
Georg Brandl720096a2006-04-02 20:45:34 +0000957 def test_proxy(self):
958 o = OpenerDirector()
959 ph = urllib2.ProxyHandler(dict(http="proxy.example.com:3128"))
960 o.add_handler(ph)
961 meth_spec = [
962 [("http_open", "return response")]
963 ]
964 handlers = add_ordered_mock_handlers(o, meth_spec)
965
966 req = Request("http://acme.example.com/")
967 self.assertEqual(req.get_host(), "acme.example.com")
968 r = o.open(req)
969 self.assertEqual(req.get_host(), "proxy.example.com:3128")
970
971 self.assertEqual([(handlers[0], "http_open")],
972 [tup[0:2] for tup in o.calls])
973
Senthil Kumaran27468662009-10-11 02:00:07 +0000974 def test_proxy_no_proxy(self):
975 os.environ['no_proxy'] = 'python.org'
976 o = OpenerDirector()
977 ph = urllib2.ProxyHandler(dict(http="proxy.example.com"))
978 o.add_handler(ph)
979 req = Request("http://www.perl.org/")
980 self.assertEqual(req.get_host(), "www.perl.org")
981 r = o.open(req)
982 self.assertEqual(req.get_host(), "proxy.example.com")
983 req = Request("http://www.python.org")
984 self.assertEqual(req.get_host(), "www.python.org")
985 r = o.open(req)
986 self.assertEqual(req.get_host(), "www.python.org")
987 del os.environ['no_proxy']
988
989
Senthil Kumarane266f252009-05-24 09:14:50 +0000990 def test_proxy_https(self):
991 o = OpenerDirector()
992 ph = urllib2.ProxyHandler(dict(https='proxy.example.com:3128'))
993 o.add_handler(ph)
994 meth_spec = [
995 [("https_open","return response")]
996 ]
997 handlers = add_ordered_mock_handlers(o, meth_spec)
998 req = Request("https://www.example.com/")
999 self.assertEqual(req.get_host(), "www.example.com")
1000 r = o.open(req)
1001 self.assertEqual(req.get_host(), "proxy.example.com:3128")
1002 self.assertEqual([(handlers[0], "https_open")],
1003 [tup[0:2] for tup in o.calls])
1004
Senthil Kumaran7713acf2009-12-20 06:05:13 +00001005 def test_proxy_https_proxy_authorization(self):
1006 o = OpenerDirector()
1007 ph = urllib2.ProxyHandler(dict(https='proxy.example.com:3128'))
1008 o.add_handler(ph)
1009 https_handler = MockHTTPSHandler()
1010 o.add_handler(https_handler)
1011 req = Request("https://www.example.com/")
1012 req.add_header("Proxy-Authorization","FooBar")
1013 req.add_header("User-Agent","Grail")
1014 self.assertEqual(req.get_host(), "www.example.com")
1015 self.assertIsNone(req._tunnel_host)
1016 r = o.open(req)
1017 # Verify Proxy-Authorization gets tunneled to request.
1018 # httpsconn req_headers do not have the Proxy-Authorization header but
1019 # the req will have.
Ezio Melottiaa980582010-01-23 23:04:36 +00001020 self.assertNotIn(("Proxy-Authorization","FooBar"),
Senthil Kumaran7713acf2009-12-20 06:05:13 +00001021 https_handler.httpconn.req_headers)
Ezio Melottiaa980582010-01-23 23:04:36 +00001022 self.assertIn(("User-Agent","Grail"),
1023 https_handler.httpconn.req_headers)
Senthil Kumaran7713acf2009-12-20 06:05:13 +00001024 self.assertIsNotNone(req._tunnel_host)
1025 self.assertEqual(req.get_host(), "proxy.example.com:3128")
1026 self.assertEqual(req.get_header("Proxy-authorization"),"FooBar")
1027
Georg Brandl33124322008-03-21 19:54:00 +00001028 def test_basic_auth(self, quote_char='"'):
Georg Brandlfa42bd72006-04-30 07:06:11 +00001029 opener = OpenerDirector()
1030 password_manager = MockPasswordManager()
1031 auth_handler = urllib2.HTTPBasicAuthHandler(password_manager)
1032 realm = "ACME Widget Store"
1033 http_handler = MockHTTPHandler(
Georg Brandl33124322008-03-21 19:54:00 +00001034 401, 'WWW-Authenticate: Basic realm=%s%s%s\r\n\r\n' %
1035 (quote_char, realm, quote_char) )
Georg Brandl261e2512006-05-29 20:52:54 +00001036 opener.add_handler(auth_handler)
1037 opener.add_handler(http_handler)
Georg Brandlfa42bd72006-04-30 07:06:11 +00001038 self._test_basic_auth(opener, auth_handler, "Authorization",
1039 realm, http_handler, password_manager,
1040 "http://acme.example.com/protected",
1041 "http://acme.example.com/protected",
1042 )
1043
Georg Brandl33124322008-03-21 19:54:00 +00001044 def test_basic_auth_with_single_quoted_realm(self):
1045 self.test_basic_auth(quote_char="'")
1046
Georg Brandlfa42bd72006-04-30 07:06:11 +00001047 def test_proxy_basic_auth(self):
1048 opener = OpenerDirector()
1049 ph = urllib2.ProxyHandler(dict(http="proxy.example.com:3128"))
1050 opener.add_handler(ph)
1051 password_manager = MockPasswordManager()
1052 auth_handler = urllib2.ProxyBasicAuthHandler(password_manager)
1053 realm = "ACME Networks"
1054 http_handler = MockHTTPHandler(
1055 407, 'Proxy-Authenticate: Basic realm="%s"\r\n\r\n' % realm)
Georg Brandl261e2512006-05-29 20:52:54 +00001056 opener.add_handler(auth_handler)
1057 opener.add_handler(http_handler)
Georg Brandl8c036cc2006-08-20 13:15:39 +00001058 self._test_basic_auth(opener, auth_handler, "Proxy-authorization",
Georg Brandlfa42bd72006-04-30 07:06:11 +00001059 realm, http_handler, password_manager,
1060 "http://acme.example.com:3128/protected",
1061 "proxy.example.com:3128",
1062 )
1063
Georg Brandlb5f2e5c2006-05-08 17:36:08 +00001064 def test_basic_and_digest_auth_handlers(self):
1065 # HTTPDigestAuthHandler threw an exception if it couldn't handle a 40*
1066 # response (http://python.org/sf/1479302), where it should instead
1067 # return None to allow another handler (especially
1068 # HTTPBasicAuthHandler) to handle the response.
Georg Brandl261e2512006-05-29 20:52:54 +00001069
1070 # Also (http://python.org/sf/14797027, RFC 2617 section 1.2), we must
1071 # try digest first (since it's the strongest auth scheme), so we record
1072 # order of calls here to check digest comes first:
1073 class RecordingOpenerDirector(OpenerDirector):
1074 def __init__(self):
1075 OpenerDirector.__init__(self)
1076 self.recorded = []
1077 def record(self, info):
1078 self.recorded.append(info)
Georg Brandlb5f2e5c2006-05-08 17:36:08 +00001079 class TestDigestAuthHandler(urllib2.HTTPDigestAuthHandler):
Georg Brandl261e2512006-05-29 20:52:54 +00001080 def http_error_401(self, *args, **kwds):
1081 self.parent.record("digest")
1082 urllib2.HTTPDigestAuthHandler.http_error_401(self,
1083 *args, **kwds)
1084 class TestBasicAuthHandler(urllib2.HTTPBasicAuthHandler):
1085 def http_error_401(self, *args, **kwds):
1086 self.parent.record("basic")
1087 urllib2.HTTPBasicAuthHandler.http_error_401(self,
1088 *args, **kwds)
1089
1090 opener = RecordingOpenerDirector()
Georg Brandlb5f2e5c2006-05-08 17:36:08 +00001091 password_manager = MockPasswordManager()
1092 digest_handler = TestDigestAuthHandler(password_manager)
Georg Brandl261e2512006-05-29 20:52:54 +00001093 basic_handler = TestBasicAuthHandler(password_manager)
Georg Brandlb5f2e5c2006-05-08 17:36:08 +00001094 realm = "ACME Networks"
1095 http_handler = MockHTTPHandler(
1096 401, 'WWW-Authenticate: Basic realm="%s"\r\n\r\n' % realm)
Georg Brandl261e2512006-05-29 20:52:54 +00001097 opener.add_handler(basic_handler)
1098 opener.add_handler(digest_handler)
1099 opener.add_handler(http_handler)
1100
1101 # check basic auth isn't blocked by digest handler failing
Georg Brandlb5f2e5c2006-05-08 17:36:08 +00001102 self._test_basic_auth(opener, basic_handler, "Authorization",
1103 realm, http_handler, password_manager,
1104 "http://acme.example.com/protected",
1105 "http://acme.example.com/protected",
1106 )
Georg Brandl261e2512006-05-29 20:52:54 +00001107 # check digest was tried before basic (twice, because
1108 # _test_basic_auth called .open() twice)
1109 self.assertEqual(opener.recorded, ["digest", "basic"]*2)
Georg Brandlb5f2e5c2006-05-08 17:36:08 +00001110
Georg Brandlfa42bd72006-04-30 07:06:11 +00001111 def _test_basic_auth(self, opener, auth_handler, auth_header,
1112 realm, http_handler, password_manager,
1113 request_url, protected_url):
Christian Heimesc5f05e42008-02-23 17:40:11 +00001114 import base64
Georg Brandlfa42bd72006-04-30 07:06:11 +00001115 user, password = "wile", "coyote"
Georg Brandlfa42bd72006-04-30 07:06:11 +00001116
1117 # .add_password() fed through to password manager
1118 auth_handler.add_password(realm, request_url, user, password)
1119 self.assertEqual(realm, password_manager.realm)
1120 self.assertEqual(request_url, password_manager.url)
1121 self.assertEqual(user, password_manager.user)
1122 self.assertEqual(password, password_manager.password)
1123
1124 r = opener.open(request_url)
1125
1126 # should have asked the password manager for the username/password
1127 self.assertEqual(password_manager.target_realm, realm)
1128 self.assertEqual(password_manager.target_url, protected_url)
1129
1130 # expect one request without authorization, then one with
1131 self.assertEqual(len(http_handler.requests), 2)
1132 self.assertFalse(http_handler.requests[0].has_header(auth_header))
1133 userpass = '%s:%s' % (user, password)
1134 auth_hdr_value = 'Basic '+base64.encodestring(userpass).strip()
1135 self.assertEqual(http_handler.requests[1].get_header(auth_header),
1136 auth_hdr_value)
Senthil Kumaran8526adf2010-02-24 16:45:46 +00001137 self.assertEqual(http_handler.requests[1].unredirected_hdrs[auth_header],
1138 auth_hdr_value)
Georg Brandlfa42bd72006-04-30 07:06:11 +00001139 # if the password manager can't find a password, the handler won't
1140 # handle the HTTP auth error
1141 password_manager.user = password_manager.password = None
1142 http_handler.reset()
1143 r = opener.open(request_url)
1144 self.assertEqual(len(http_handler.requests), 1)
1145 self.assertFalse(http_handler.requests[0].has_header(auth_header))
1146
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001147
1148class MiscTests(unittest.TestCase):
1149
1150 def test_build_opener(self):
1151 class MyHTTPHandler(urllib2.HTTPHandler): pass
1152 class FooHandler(urllib2.BaseHandler):
1153 def foo_open(self): pass
1154 class BarHandler(urllib2.BaseHandler):
1155 def bar_open(self): pass
1156
1157 build_opener = urllib2.build_opener
1158
1159 o = build_opener(FooHandler, BarHandler)
1160 self.opener_has_handler(o, FooHandler)
1161 self.opener_has_handler(o, BarHandler)
1162
1163 # can take a mix of classes and instances
1164 o = build_opener(FooHandler, BarHandler())
1165 self.opener_has_handler(o, FooHandler)
1166 self.opener_has_handler(o, BarHandler)
1167
1168 # subclasses of default handlers override default handlers
1169 o = build_opener(MyHTTPHandler)
1170 self.opener_has_handler(o, MyHTTPHandler)
1171
1172 # a particular case of overriding: default handlers can be passed
1173 # in explicitly
1174 o = build_opener()
1175 self.opener_has_handler(o, urllib2.HTTPHandler)
1176 o = build_opener(urllib2.HTTPHandler)
1177 self.opener_has_handler(o, urllib2.HTTPHandler)
1178 o = build_opener(urllib2.HTTPHandler())
1179 self.opener_has_handler(o, urllib2.HTTPHandler)
1180
Amaury Forgeot d'Arc96865852008-04-22 21:14:41 +00001181 # Issue2670: multiple handlers sharing the same base class
1182 class MyOtherHTTPHandler(urllib2.HTTPHandler): pass
1183 o = build_opener(MyHTTPHandler, MyOtherHTTPHandler)
1184 self.opener_has_handler(o, MyHTTPHandler)
1185 self.opener_has_handler(o, MyOtherHTTPHandler)
1186
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001187 def opener_has_handler(self, opener, handler_class):
1188 for h in opener.handlers:
1189 if h.__class__ == handler_class:
1190 break
1191 else:
Benjamin Peterson5c8da862009-06-30 22:57:08 +00001192 self.assertTrue(False)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001193
Jeremy Hylton1868d7c2008-12-09 21:03:10 +00001194class RequestTests(unittest.TestCase):
1195
1196 def setUp(self):
1197 self.get = urllib2.Request("http://www.python.org/~jeremy/")
1198 self.post = urllib2.Request("http://www.python.org/~jeremy/",
1199 "data",
1200 headers={"X-Test": "test"})
1201
1202 def test_method(self):
1203 self.assertEqual("POST", self.post.get_method())
1204 self.assertEqual("GET", self.get.get_method())
1205
1206 def test_add_data(self):
Benjamin Peterson5c8da862009-06-30 22:57:08 +00001207 self.assertTrue(not self.get.has_data())
Jeremy Hylton1868d7c2008-12-09 21:03:10 +00001208 self.assertEqual("GET", self.get.get_method())
1209 self.get.add_data("spam")
Benjamin Peterson5c8da862009-06-30 22:57:08 +00001210 self.assertTrue(self.get.has_data())
Jeremy Hylton1868d7c2008-12-09 21:03:10 +00001211 self.assertEqual("POST", self.get.get_method())
1212
1213 def test_get_full_url(self):
1214 self.assertEqual("http://www.python.org/~jeremy/",
1215 self.get.get_full_url())
1216
1217 def test_selector(self):
1218 self.assertEqual("/~jeremy/", self.get.get_selector())
1219 req = urllib2.Request("http://www.python.org/")
1220 self.assertEqual("/", req.get_selector())
1221
1222 def test_get_type(self):
1223 self.assertEqual("http", self.get.get_type())
1224
1225 def test_get_host(self):
1226 self.assertEqual("www.python.org", self.get.get_host())
1227
1228 def test_get_host_unquote(self):
1229 req = urllib2.Request("http://www.%70ython.org/")
1230 self.assertEqual("www.python.org", req.get_host())
1231
1232 def test_proxy(self):
Benjamin Peterson5c8da862009-06-30 22:57:08 +00001233 self.assertTrue(not self.get.has_proxy())
Jeremy Hylton1868d7c2008-12-09 21:03:10 +00001234 self.get.set_proxy("www.perl.org", "http")
Benjamin Peterson5c8da862009-06-30 22:57:08 +00001235 self.assertTrue(self.get.has_proxy())
Jeremy Hylton1868d7c2008-12-09 21:03:10 +00001236 self.assertEqual("www.python.org", self.get.get_origin_req_host())
1237 self.assertEqual("www.perl.org", self.get.get_host())
1238
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001239
1240def test_main(verbose=None):
Georg Brandlfa42bd72006-04-30 07:06:11 +00001241 from test import test_urllib2
1242 test_support.run_doctest(test_urllib2, verbose)
Georg Brandl720096a2006-04-02 20:45:34 +00001243 test_support.run_doctest(urllib2, verbose)
Andrew M. Kuchlingbd3200f2004-06-29 13:15:46 +00001244 tests = (TrivialTests,
1245 OpenerDirectorTests,
1246 HandlerTests,
Jeremy Hylton1868d7c2008-12-09 21:03:10 +00001247 MiscTests,
1248 RequestTests)
Andrew M. Kuchlingbd3200f2004-06-29 13:15:46 +00001249 test_support.run_unittest(*tests)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001250
1251if __name__ == "__main__":
1252 test_main(verbose=True)