blob: 3f781988f5a4f33cacf2166d19018f3b3d00a4ac [file] [log] [blame]
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001import unittest
2from test import test_support
3
Christian Heimesc5f05e42008-02-23 17:40:11 +00004import os
Facundo Batista4f1b1ed2008-05-29 16:39:26 +00005import socket
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00006import StringIO
Jeremy Hyltone3e61042001-05-09 15:50:25 +00007
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00008import urllib2
9from urllib2 import Request, OpenerDirector
Jeremy Hyltone3e61042001-05-09 15:50:25 +000010
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000011# XXX
12# Request
13# CacheFTPHandler (hard to write)
Georg Brandlfa42bd72006-04-30 07:06:11 +000014# parse_keqv_list, parse_http_list, HTTPDigestAuthHandler
Jeremy Hyltone3e61042001-05-09 15:50:25 +000015
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000016class TrivialTests(unittest.TestCase):
17 def test_trivial(self):
18 # A couple trivial tests
Guido van Rossume2ae77b2001-10-24 20:42:55 +000019
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000020 self.assertRaises(ValueError, urllib2.urlopen, 'bogus url')
Tim Peters861adac2001-07-16 20:49:49 +000021
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000022 # XXX Name hacking to get this to work on Windows.
23 fname = os.path.abspath(urllib2.__file__).replace('\\', '/')
Senthil Kumaran08ecfdd2010-01-10 17:42:29 +000024
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000025 # And more hacking to get it to work on MacOS. This assumes
26 # urllib.pathname2url works, unfortunately...
27 if os.name == 'mac':
28 fname = '/' + fname.replace(':', '/')
29 elif os.name == 'riscos':
30 import string
31 fname = os.expand(fname)
32 fname = fname.translate(string.maketrans("/.", "./"))
33
Senthil Kumaran08ecfdd2010-01-10 17:42:29 +000034 if os.name == 'nt':
35 file_url = "file:///%s" % fname
36 else:
37 file_url = "file://%s" % fname
38
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000039 f = urllib2.urlopen(file_url)
40
41 buf = f.read()
42 f.close()
Tim Petersf5f32b42005-07-17 23:16:17 +000043
Georg Brandle1b13d22005-08-24 22:20:32 +000044 def test_parse_http_list(self):
45 tests = [('a,b,c', ['a', 'b', 'c']),
46 ('path"o,l"og"i"cal, example', ['path"o,l"og"i"cal', 'example']),
47 ('a, b, "c", "d", "e,f", g, h', ['a', 'b', '"c"', '"d"', '"e,f"', 'g', 'h']),
48 ('a="b\\"c", d="e\\,f", g="h\\\\i"', ['a="b"c"', 'd="e,f"', 'g="h\\i"'])]
49 for string, list in tests:
50 self.assertEquals(urllib2.parse_http_list(string), list)
51
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000052
Georg Brandl8c036cc2006-08-20 13:15:39 +000053def test_request_headers_dict():
54 """
55 The Request.headers dictionary is not a documented interface. It should
56 stay that way, because the complete set of headers are only accessible
57 through the .get_header(), .has_header(), .header_items() interface.
58 However, .headers pre-dates those methods, and so real code will be using
59 the dictionary.
60
61 The introduction in 2.4 of those methods was a mistake for the same reason:
62 code that previously saw all (urllib2 user)-provided headers in .headers
63 now sees only a subset (and the function interface is ugly and incomplete).
64 A better change would have been to replace .headers dict with a dict
65 subclass (or UserDict.DictMixin instance?) that preserved the .headers
66 interface and also provided access to the "unredirected" headers. It's
67 probably too late to fix that, though.
68
69
70 Check .capitalize() case normalization:
71
72 >>> url = "http://example.com"
73 >>> Request(url, headers={"Spam-eggs": "blah"}).headers["Spam-eggs"]
74 'blah'
75 >>> Request(url, headers={"spam-EggS": "blah"}).headers["Spam-eggs"]
76 'blah'
77
78 Currently, Request(url, "Spam-eggs").headers["Spam-Eggs"] raises KeyError,
79 but that could be changed in future.
80
81 """
82
83def test_request_headers_methods():
84 """
85 Note the case normalization of header names here, to .capitalize()-case.
86 This should be preserved for backwards-compatibility. (In the HTTP case,
87 normalization to .title()-case is done by urllib2 before sending headers to
88 httplib).
89
90 >>> url = "http://example.com"
91 >>> r = Request(url, headers={"Spam-eggs": "blah"})
92 >>> r.has_header("Spam-eggs")
93 True
94 >>> r.header_items()
95 [('Spam-eggs', 'blah')]
96 >>> r.add_header("Foo-Bar", "baz")
97 >>> items = r.header_items()
98 >>> items.sort()
99 >>> items
100 [('Foo-bar', 'baz'), ('Spam-eggs', 'blah')]
101
102 Note that e.g. r.has_header("spam-EggS") is currently False, and
103 r.get_header("spam-EggS") returns None, but that could be changed in
104 future.
105
106 >>> r.has_header("Not-there")
107 False
108 >>> print r.get_header("Not-there")
109 None
110 >>> r.get_header("Not-there", "default")
111 'default'
112
113 """
114
115
Georg Brandlfa42bd72006-04-30 07:06:11 +0000116def test_password_manager(self):
117 """
118 >>> mgr = urllib2.HTTPPasswordMgr()
119 >>> add = mgr.add_password
120 >>> add("Some Realm", "http://example.com/", "joe", "password")
121 >>> add("Some Realm", "http://example.com/ni", "ni", "ni")
122 >>> add("c", "http://example.com/foo", "foo", "ni")
123 >>> add("c", "http://example.com/bar", "bar", "nini")
124 >>> add("b", "http://example.com/", "first", "blah")
125 >>> add("b", "http://example.com/", "second", "spam")
126 >>> add("a", "http://example.com", "1", "a")
127 >>> add("Some Realm", "http://c.example.com:3128", "3", "c")
128 >>> add("Some Realm", "d.example.com", "4", "d")
129 >>> add("Some Realm", "e.example.com:3128", "5", "e")
130
131 >>> mgr.find_user_password("Some Realm", "example.com")
132 ('joe', 'password')
133 >>> mgr.find_user_password("Some Realm", "http://example.com")
134 ('joe', 'password')
135 >>> mgr.find_user_password("Some Realm", "http://example.com/")
136 ('joe', 'password')
137 >>> mgr.find_user_password("Some Realm", "http://example.com/spam")
138 ('joe', 'password')
139 >>> mgr.find_user_password("Some Realm", "http://example.com/spam/spam")
140 ('joe', 'password')
141 >>> mgr.find_user_password("c", "http://example.com/foo")
142 ('foo', 'ni')
143 >>> mgr.find_user_password("c", "http://example.com/bar")
144 ('bar', 'nini')
145
Georg Brandl2b330372006-05-28 20:23:12 +0000146 Actually, this is really undefined ATM
147## Currently, we use the highest-level path where more than one match:
Georg Brandlfa42bd72006-04-30 07:06:11 +0000148
Georg Brandl2b330372006-05-28 20:23:12 +0000149## >>> mgr.find_user_password("Some Realm", "http://example.com/ni")
150## ('joe', 'password')
Georg Brandlfa42bd72006-04-30 07:06:11 +0000151
152 Use latest add_password() in case of conflict:
153
154 >>> mgr.find_user_password("b", "http://example.com/")
155 ('second', 'spam')
156
157 No special relationship between a.example.com and example.com:
158
159 >>> mgr.find_user_password("a", "http://example.com/")
160 ('1', 'a')
161 >>> mgr.find_user_password("a", "http://a.example.com/")
162 (None, None)
163
164 Ports:
165
166 >>> mgr.find_user_password("Some Realm", "c.example.com")
167 (None, None)
168 >>> mgr.find_user_password("Some Realm", "c.example.com:3128")
169 ('3', 'c')
170 >>> mgr.find_user_password("Some Realm", "http://c.example.com:3128")
171 ('3', 'c')
172 >>> mgr.find_user_password("Some Realm", "d.example.com")
173 ('4', 'd')
174 >>> mgr.find_user_password("Some Realm", "e.example.com:3128")
175 ('5', 'e')
176
177 """
178 pass
179
180
Georg Brandl2b330372006-05-28 20:23:12 +0000181def test_password_manager_default_port(self):
182 """
183 >>> mgr = urllib2.HTTPPasswordMgr()
184 >>> add = mgr.add_password
185
186 The point to note here is that we can't guess the default port if there's
187 no scheme. This applies to both add_password and find_user_password.
188
189 >>> add("f", "http://g.example.com:80", "10", "j")
190 >>> add("g", "http://h.example.com", "11", "k")
191 >>> add("h", "i.example.com:80", "12", "l")
192 >>> add("i", "j.example.com", "13", "m")
193 >>> mgr.find_user_password("f", "g.example.com:100")
194 (None, None)
195 >>> mgr.find_user_password("f", "g.example.com:80")
196 ('10', 'j')
197 >>> mgr.find_user_password("f", "g.example.com")
198 (None, None)
199 >>> mgr.find_user_password("f", "http://g.example.com:100")
200 (None, None)
201 >>> mgr.find_user_password("f", "http://g.example.com:80")
202 ('10', 'j')
203 >>> mgr.find_user_password("f", "http://g.example.com")
204 ('10', 'j')
205 >>> mgr.find_user_password("g", "h.example.com")
206 ('11', 'k')
207 >>> mgr.find_user_password("g", "h.example.com:80")
208 ('11', 'k')
209 >>> mgr.find_user_password("g", "http://h.example.com:80")
210 ('11', 'k')
211 >>> mgr.find_user_password("h", "i.example.com")
212 (None, None)
213 >>> mgr.find_user_password("h", "i.example.com:80")
214 ('12', 'l')
215 >>> mgr.find_user_password("h", "http://i.example.com:80")
216 ('12', 'l')
217 >>> mgr.find_user_password("i", "j.example.com")
218 ('13', 'm')
219 >>> mgr.find_user_password("i", "j.example.com:80")
220 (None, None)
221 >>> mgr.find_user_password("i", "http://j.example.com")
222 ('13', 'm')
223 >>> mgr.find_user_password("i", "http://j.example.com:80")
224 (None, None)
225
226 """
227
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000228class MockOpener:
229 addheaders = []
Senthil Kumaran428e9d12009-07-19 04:20:46 +0000230 def open(self, req, data=None,timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
231 self.req, self.data, self.timeout = req, data, timeout
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000232 def error(self, proto, *args):
233 self.proto, self.args = proto, args
234
235class MockFile:
236 def read(self, count=None): pass
237 def readline(self, count=None): pass
238 def close(self): pass
239
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000240class MockHeaders(dict):
241 def getheaders(self, name):
242 return self.values()
243
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000244class MockResponse(StringIO.StringIO):
245 def __init__(self, code, msg, headers, data, url=None):
246 StringIO.StringIO.__init__(self, data)
247 self.code, self.msg, self.headers, self.url = code, msg, headers, url
248 def info(self):
249 return self.headers
250 def geturl(self):
251 return self.url
252
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000253class MockCookieJar:
254 def add_cookie_header(self, request):
255 self.ach_req = request
256 def extract_cookies(self, response, request):
257 self.ec_req, self.ec_r = request, response
258
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000259class FakeMethod:
260 def __init__(self, meth_name, action, handle):
261 self.meth_name = meth_name
262 self.handle = handle
263 self.action = action
264 def __call__(self, *args):
265 return self.handle(self.meth_name, self.action, *args)
266
Senthil Kumaran81163642009-12-20 06:32:46 +0000267class MockHTTPResponse:
268 def __init__(self, fp, msg, status, reason):
269 self.fp = fp
270 self.msg = msg
271 self.status = status
272 self.reason = reason
273 def read(self):
274 return ''
275
276class MockHTTPClass:
277 def __init__(self):
278 self.req_headers = []
279 self.data = None
280 self.raise_on_endheaders = False
281 self._tunnel_headers = {}
282
283 def __call__(self, host, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
284 self.host = host
285 self.timeout = timeout
286 return self
287
288 def set_debuglevel(self, level):
289 self.level = level
290
291 def _set_tunnel(self, host, port=None, headers=None):
292 self._tunnel_host = host
293 self._tunnel_port = port
294 if headers:
295 self._tunnel_headers = headers
296 else:
297 self._tunnel_headers.clear()
Benjamin Peterson17e1c122009-12-24 01:13:50 +0000298 def request(self, method, url, body=None, headers=None):
Senthil Kumaran81163642009-12-20 06:32:46 +0000299 self.method = method
300 self.selector = url
Benjamin Peterson17e1c122009-12-24 01:13:50 +0000301 if headers is not None:
302 self.req_headers += headers.items()
Senthil Kumaran81163642009-12-20 06:32:46 +0000303 self.req_headers.sort()
304 if body:
305 self.data = body
306 if self.raise_on_endheaders:
307 import socket
308 raise socket.error()
309 def getresponse(self):
310 return MockHTTPResponse(MockFile(), {}, 200, "OK")
311
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000312class MockHandler:
Georg Brandlfa42bd72006-04-30 07:06:11 +0000313 # useful for testing handler machinery
314 # see add_ordered_mock_handlers() docstring
Georg Brandl720096a2006-04-02 20:45:34 +0000315 handler_order = 500
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000316 def __init__(self, methods):
317 self._define_methods(methods)
318 def _define_methods(self, methods):
319 for spec in methods:
320 if len(spec) == 2: name, action = spec
321 else: name, action = spec, None
322 meth = FakeMethod(name, action, self.handle)
323 setattr(self.__class__, name, meth)
324 def handle(self, fn_name, action, *args, **kwds):
325 self.parent.calls.append((self, fn_name, args, kwds))
326 if action is None:
327 return None
328 elif action == "return self":
329 return self
330 elif action == "return response":
331 res = MockResponse(200, "OK", {}, "")
332 return res
333 elif action == "return request":
334 return Request("http://blah/")
335 elif action.startswith("error"):
336 code = action[action.rfind(" ")+1:]
337 try:
338 code = int(code)
339 except ValueError:
340 pass
341 res = MockResponse(200, "OK", {}, "")
342 return self.parent.error("http", args[0], res, code, "", {})
343 elif action == "raise":
344 raise urllib2.URLError("blah")
345 assert False
346 def close(self): pass
347 def add_parent(self, parent):
348 self.parent = parent
349 self.parent.calls = []
350 def __lt__(self, other):
351 if not hasattr(other, "handler_order"):
352 # No handler_order, leave in original order. Yuck.
353 return True
354 return self.handler_order < other.handler_order
355
356def add_ordered_mock_handlers(opener, meth_spec):
357 """Create MockHandlers and add them to an OpenerDirector.
358
359 meth_spec: list of lists of tuples and strings defining methods to define
360 on handlers. eg:
361
362 [["http_error", "ftp_open"], ["http_open"]]
363
364 defines methods .http_error() and .ftp_open() on one handler, and
365 .http_open() on another. These methods just record their arguments and
366 return None. Using a tuple instead of a string causes the method to
367 perform some action (see MockHandler.handle()), eg:
368
369 [["http_error"], [("http_open", "return request")]]
370
371 defines .http_error() on one handler (which simply returns None), and
372 .http_open() on another handler, which returns a Request object.
373
374 """
375 handlers = []
376 count = 0
377 for meths in meth_spec:
378 class MockHandlerSubclass(MockHandler): pass
379 h = MockHandlerSubclass(meths)
Georg Brandl720096a2006-04-02 20:45:34 +0000380 h.handler_order += count
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000381 h.add_parent(opener)
382 count = count + 1
383 handlers.append(h)
384 opener.add_handler(h)
385 return handlers
386
Georg Brandlfa42bd72006-04-30 07:06:11 +0000387def build_test_opener(*handler_instances):
388 opener = OpenerDirector()
389 for h in handler_instances:
390 opener.add_handler(h)
391 return opener
392
393class MockHTTPHandler(urllib2.BaseHandler):
394 # useful for testing redirections and auth
395 # sends supplied headers and code as first response
396 # sends 200 OK as second response
397 def __init__(self, code, headers):
398 self.code = code
399 self.headers = headers
400 self.reset()
401 def reset(self):
402 self._count = 0
403 self.requests = []
404 def http_open(self, req):
405 import mimetools, httplib, copy
406 from StringIO import StringIO
407 self.requests.append(copy.deepcopy(req))
408 if self._count == 0:
409 self._count = self._count + 1
410 name = httplib.responses[self.code]
411 msg = mimetools.Message(StringIO(self.headers))
412 return self.parent.error(
413 "http", req, MockFile(), self.code, name, msg)
414 else:
415 self.req = req
416 msg = mimetools.Message(StringIO("\r\n\r\n"))
417 return MockResponse(200, "OK", msg, "", req.get_full_url())
418
Senthil Kumaran81163642009-12-20 06:32:46 +0000419class MockHTTPSHandler(urllib2.AbstractHTTPHandler):
420 # Useful for testing the Proxy-Authorization request by verifying the
421 # properties of httpcon
Benjamin Peterson17e1c122009-12-24 01:13:50 +0000422
423 def __init__(self):
424 urllib2.AbstractHTTPHandler.__init__(self)
425 self.httpconn = MockHTTPClass()
426
Senthil Kumaran81163642009-12-20 06:32:46 +0000427 def https_open(self, req):
428 return self.do_open(self.httpconn, req)
429
Georg Brandlfa42bd72006-04-30 07:06:11 +0000430class MockPasswordManager:
431 def add_password(self, realm, uri, user, password):
432 self.realm = realm
433 self.url = uri
434 self.user = user
435 self.password = password
436 def find_user_password(self, realm, authuri):
437 self.target_realm = realm
438 self.target_url = authuri
439 return self.user, self.password
440
441
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000442class OpenerDirectorTests(unittest.TestCase):
443
Georg Brandlf91149e2007-07-12 08:05:45 +0000444 def test_add_non_handler(self):
445 class NonHandler(object):
446 pass
447 self.assertRaises(TypeError,
448 OpenerDirector().add_handler, NonHandler())
449
Georg Brandl261e2512006-05-29 20:52:54 +0000450 def test_badly_named_methods(self):
451 # test work-around for three methods that accidentally follow the
452 # naming conventions for handler methods
453 # (*_open() / *_request() / *_response())
454
455 # These used to call the accidentally-named methods, causing a
456 # TypeError in real code; here, returning self from these mock
457 # methods would either cause no exception, or AttributeError.
458
459 from urllib2 import URLError
460
461 o = OpenerDirector()
462 meth_spec = [
463 [("do_open", "return self"), ("proxy_open", "return self")],
464 [("redirect_request", "return self")],
465 ]
466 handlers = add_ordered_mock_handlers(o, meth_spec)
467 o.add_handler(urllib2.UnknownHandler())
468 for scheme in "do", "proxy", "redirect":
469 self.assertRaises(URLError, o.open, scheme+"://example.com/")
470
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000471 def test_handled(self):
472 # handler returning non-None means no more handlers will be called
473 o = OpenerDirector()
474 meth_spec = [
475 ["http_open", "ftp_open", "http_error_302"],
476 ["ftp_open"],
477 [("http_open", "return self")],
478 [("http_open", "return self")],
479 ]
480 handlers = add_ordered_mock_handlers(o, meth_spec)
481
482 req = Request("http://example.com/")
483 r = o.open(req)
484 # Second .http_open() gets called, third doesn't, since second returned
485 # non-None. Handlers without .http_open() never get any methods called
486 # on them.
487 # In fact, second mock handler defining .http_open() returns self
488 # (instead of response), which becomes the OpenerDirector's return
489 # value.
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000490 self.assertEqual(r, handlers[2])
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000491 calls = [(handlers[0], "http_open"), (handlers[2], "http_open")]
492 for expected, got in zip(calls, o.calls):
493 handler, name, args, kwds = got
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000494 self.assertEqual((handler, name), expected)
495 self.assertEqual(args, (req,))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000496
497 def test_handler_order(self):
498 o = OpenerDirector()
499 handlers = []
500 for meths, handler_order in [
501 ([("http_open", "return self")], 500),
502 (["http_open"], 0),
503 ]:
504 class MockHandlerSubclass(MockHandler): pass
505 h = MockHandlerSubclass(meths)
506 h.handler_order = handler_order
507 handlers.append(h)
508 o.add_handler(h)
509
510 r = o.open("http://example.com/")
511 # handlers called in reverse order, thanks to their sort order
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000512 self.assertEqual(o.calls[0][0], handlers[1])
513 self.assertEqual(o.calls[1][0], handlers[0])
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000514
515 def test_raise(self):
516 # raising URLError stops processing of request
517 o = OpenerDirector()
518 meth_spec = [
519 [("http_open", "raise")],
520 [("http_open", "return self")],
521 ]
522 handlers = add_ordered_mock_handlers(o, meth_spec)
523
524 req = Request("http://example.com/")
525 self.assertRaises(urllib2.URLError, o.open, req)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000526 self.assertEqual(o.calls, [(handlers[0], "http_open", (req,), {})])
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000527
528## def test_error(self):
529## # XXX this doesn't actually seem to be used in standard library,
530## # but should really be tested anyway...
531
532 def test_http_error(self):
533 # XXX http_error_default
534 # http errors are a special case
535 o = OpenerDirector()
536 meth_spec = [
537 [("http_open", "error 302")],
538 [("http_error_400", "raise"), "http_open"],
539 [("http_error_302", "return response"), "http_error_303",
540 "http_error"],
541 [("http_error_302")],
542 ]
543 handlers = add_ordered_mock_handlers(o, meth_spec)
544
545 class Unknown:
546 def __eq__(self, other): return True
547
548 req = Request("http://example.com/")
549 r = o.open(req)
550 assert len(o.calls) == 2
551 calls = [(handlers[0], "http_open", (req,)),
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000552 (handlers[2], "http_error_302",
553 (req, Unknown(), 302, "", {}))]
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000554 for expected, got in zip(calls, o.calls):
555 handler, method_name, args = expected
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000556 self.assertEqual((handler, method_name), got[:2])
557 self.assertEqual(args, got[2])
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000558
559 def test_processors(self):
560 # *_request / *_response methods get called appropriately
561 o = OpenerDirector()
562 meth_spec = [
563 [("http_request", "return request"),
564 ("http_response", "return response")],
565 [("http_request", "return request"),
566 ("http_response", "return response")],
567 ]
568 handlers = add_ordered_mock_handlers(o, meth_spec)
569
570 req = Request("http://example.com/")
571 r = o.open(req)
572 # processor methods are called on *all* handlers that define them,
573 # not just the first handler that handles the request
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000574 calls = [
575 (handlers[0], "http_request"), (handlers[1], "http_request"),
576 (handlers[0], "http_response"), (handlers[1], "http_response")]
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000577
578 for i, (handler, name, args, kwds) in enumerate(o.calls):
579 if i < 2:
580 # *_request
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000581 self.assertEqual((handler, name), calls[i])
582 self.assertEqual(len(args), 1)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000583 self.assert_(isinstance(args[0], Request))
584 else:
585 # *_response
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000586 self.assertEqual((handler, name), calls[i])
587 self.assertEqual(len(args), 2)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000588 self.assert_(isinstance(args[0], Request))
589 # response from opener.open is None, because there's no
590 # handler that defines http_open to handle it
591 self.assert_(args[1] is None or
592 isinstance(args[1], MockResponse))
593
594
Tim Peters58eb11c2004-01-18 20:29:55 +0000595def sanepathname2url(path):
596 import urllib
597 urlpath = urllib.pathname2url(path)
598 if os.name == "nt" and urlpath.startswith("///"):
599 urlpath = urlpath[2:]
600 # XXX don't ask me about the mac...
601 return urlpath
602
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000603class HandlerTests(unittest.TestCase):
604
605 def test_ftp(self):
606 class MockFTPWrapper:
607 def __init__(self, data): self.data = data
608 def retrfile(self, filename, filetype):
609 self.filename, self.filetype = filename, filetype
610 return StringIO.StringIO(self.data), len(self.data)
611
612 class NullFTPHandler(urllib2.FTPHandler):
613 def __init__(self, data): self.data = data
Facundo Batista4f1b1ed2008-05-29 16:39:26 +0000614 def connect_ftp(self, user, passwd, host, port, dirs,
615 timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000616 self.user, self.passwd = user, passwd
617 self.host, self.port = host, port
618 self.dirs = dirs
619 self.ftpwrapper = MockFTPWrapper(self.data)
620 return self.ftpwrapper
621
Facundo Batista4f1b1ed2008-05-29 16:39:26 +0000622 import ftplib
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000623 data = "rheum rhaponicum"
624 h = NullFTPHandler(data)
625 o = h.parent = MockOpener()
626
627 for url, host, port, type_, dirs, filename, mimetype in [
628 ("ftp://localhost/foo/bar/baz.html",
629 "localhost", ftplib.FTP_PORT, "I",
630 ["foo", "bar"], "baz.html", "text/html"),
Kurt B. Kaiser3f7cb5d2004-07-11 17:14:13 +0000631 ("ftp://localhost:80/foo/bar/",
632 "localhost", 80, "D",
633 ["foo", "bar"], "", None),
634 ("ftp://localhost/baz.gif;type=a",
635 "localhost", ftplib.FTP_PORT, "A",
636 [], "baz.gif", None), # XXX really this should guess image/gif
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000637 ]:
Facundo Batista10951d52007-06-06 17:15:23 +0000638 req = Request(url)
639 req.timeout = None
640 r = h.ftp_open(req)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000641 # ftp authentication not yet implemented by FTPHandler
642 self.assert_(h.user == h.passwd == "")
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000643 self.assertEqual(h.host, socket.gethostbyname(host))
644 self.assertEqual(h.port, port)
645 self.assertEqual(h.dirs, dirs)
646 self.assertEqual(h.ftpwrapper.filename, filename)
647 self.assertEqual(h.ftpwrapper.filetype, type_)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000648 headers = r.info()
Kurt B. Kaiser3f7cb5d2004-07-11 17:14:13 +0000649 self.assertEqual(headers.get("Content-type"), mimetype)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000650 self.assertEqual(int(headers["Content-length"]), len(data))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000651
652 def test_file(self):
Christian Heimesc5f05e42008-02-23 17:40:11 +0000653 import rfc822, socket
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000654 h = urllib2.FileHandler()
655 o = h.parent = MockOpener()
656
Tim Peters58eb11c2004-01-18 20:29:55 +0000657 TESTFN = test_support.TESTFN
658 urlpath = sanepathname2url(os.path.abspath(TESTFN))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000659 towrite = "hello, world\n"
Georg Brandldd2245f2006-03-31 17:18:06 +0000660 urls = [
Tim Peters58eb11c2004-01-18 20:29:55 +0000661 "file://localhost%s" % urlpath,
662 "file://%s" % urlpath,
663 "file://%s%s" % (socket.gethostbyname('localhost'), urlpath),
Georg Brandldd2245f2006-03-31 17:18:06 +0000664 ]
665 try:
Tim Peters480725d2006-04-03 02:46:44 +0000666 localaddr = socket.gethostbyname(socket.gethostname())
Georg Brandldd2245f2006-03-31 17:18:06 +0000667 except socket.gaierror:
668 localaddr = ''
669 if localaddr:
670 urls.append("file://%s%s" % (localaddr, urlpath))
Tim Peters480725d2006-04-03 02:46:44 +0000671
Georg Brandldd2245f2006-03-31 17:18:06 +0000672 for url in urls:
Tim Peters58eb11c2004-01-18 20:29:55 +0000673 f = open(TESTFN, "wb")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000674 try:
675 try:
676 f.write(towrite)
677 finally:
678 f.close()
679
680 r = h.file_open(Request(url))
681 try:
682 data = r.read()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000683 headers = r.info()
Senthil Kumaran2add1842010-05-08 03:14:33 +0000684 respurl = r.geturl()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000685 finally:
686 r.close()
Tim Peters58eb11c2004-01-18 20:29:55 +0000687 stats = os.stat(TESTFN)
688 modified = rfc822.formatdate(stats.st_mtime)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000689 finally:
690 os.remove(TESTFN)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000691 self.assertEqual(data, towrite)
692 self.assertEqual(headers["Content-type"], "text/plain")
693 self.assertEqual(headers["Content-length"], "13")
Tim Peters58eb11c2004-01-18 20:29:55 +0000694 self.assertEqual(headers["Last-modified"], modified)
Senthil Kumaran2add1842010-05-08 03:14:33 +0000695 self.assertEqual(respurl, url)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000696
697 for url in [
Tim Peters58eb11c2004-01-18 20:29:55 +0000698 "file://localhost:80%s" % urlpath,
Georg Brandlceede5c2007-03-13 08:14:27 +0000699 "file:///file_does_not_exist.txt",
700 "file://%s:80%s/%s" % (socket.gethostbyname('localhost'),
701 os.getcwd(), TESTFN),
702 "file://somerandomhost.ontheinternet.com%s/%s" %
703 (os.getcwd(), TESTFN),
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000704 ]:
705 try:
Tim Peters58eb11c2004-01-18 20:29:55 +0000706 f = open(TESTFN, "wb")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000707 try:
708 f.write(towrite)
709 finally:
710 f.close()
711
712 self.assertRaises(urllib2.URLError,
713 h.file_open, Request(url))
714 finally:
715 os.remove(TESTFN)
716
717 h = urllib2.FileHandler()
718 o = h.parent = MockOpener()
719 # XXXX why does // mean ftp (and /// mean not ftp!), and where
720 # is file: scheme specified? I think this is really a bug, and
721 # what was intended was to distinguish between URLs like:
722 # file:/blah.txt (a file)
723 # file://localhost/blah.txt (a file)
724 # file:///blah.txt (a file)
725 # file://ftp.example.com/blah.txt (an ftp URL)
726 for url, ftp in [
727 ("file://ftp.example.com//foo.txt", True),
728 ("file://ftp.example.com///foo.txt", False),
729# XXXX bug: fails with OSError, should be URLError
730 ("file://ftp.example.com/foo.txt", False),
731 ]:
732 req = Request(url)
733 try:
734 h.file_open(req)
735 # XXXX remove OSError when bug fixed
736 except (urllib2.URLError, OSError):
737 self.assert_(not ftp)
738 else:
739 self.assert_(o.req is req)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000740 self.assertEqual(req.type, "ftp")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000741
742 def test_http(self):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000743
744 h = urllib2.AbstractHTTPHandler()
745 o = h.parent = MockOpener()
746
747 url = "http://example.com/"
748 for method, data in [("GET", None), ("POST", "blah")]:
749 req = Request(url, data, {"Foo": "bar"})
Facundo Batista10951d52007-06-06 17:15:23 +0000750 req.timeout = None
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000751 req.add_unredirected_header("Spam", "eggs")
752 http = MockHTTPClass()
753 r = h.do_open(http, req)
754
755 # result attributes
756 r.read; r.readline # wrapped MockFile methods
757 r.info; r.geturl # addinfourl methods
758 r.code, r.msg == 200, "OK" # added from MockHTTPClass.getreply()
759 hdrs = r.info()
760 hdrs.get; hdrs.has_key # r.info() gives dict from .getreply()
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000761 self.assertEqual(r.geturl(), url)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000762
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000763 self.assertEqual(http.host, "example.com")
764 self.assertEqual(http.level, 0)
765 self.assertEqual(http.method, method)
766 self.assertEqual(http.selector, "/")
767 self.assertEqual(http.req_headers,
Jeremy Hyltonb3ee6f92004-02-24 19:40:35 +0000768 [("Connection", "close"),
769 ("Foo", "bar"), ("Spam", "eggs")])
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000770 self.assertEqual(http.data, data)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000771
772 # check socket.error converted to URLError
773 http.raise_on_endheaders = True
774 self.assertRaises(urllib2.URLError, h.do_open, http, req)
775
776 # check adding of standard headers
777 o.addheaders = [("Spam", "eggs")]
778 for data in "", None: # POST, GET
779 req = Request("http://example.com/", data)
780 r = MockResponse(200, "OK", {}, "")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000781 newreq = h.do_request_(req)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000782 if data is None: # GET
Georg Brandl8c036cc2006-08-20 13:15:39 +0000783 self.assert_("Content-length" not in req.unredirected_hdrs)
784 self.assert_("Content-type" not in req.unredirected_hdrs)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000785 else: # POST
Georg Brandl8c036cc2006-08-20 13:15:39 +0000786 self.assertEqual(req.unredirected_hdrs["Content-length"], "0")
787 self.assertEqual(req.unredirected_hdrs["Content-type"],
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000788 "application/x-www-form-urlencoded")
789 # XXX the details of Host could be better tested
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000790 self.assertEqual(req.unredirected_hdrs["Host"], "example.com")
791 self.assertEqual(req.unredirected_hdrs["Spam"], "eggs")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000792
793 # don't clobber existing headers
794 req.add_unredirected_header("Content-length", "foo")
795 req.add_unredirected_header("Content-type", "bar")
796 req.add_unredirected_header("Host", "baz")
797 req.add_unredirected_header("Spam", "foo")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000798 newreq = h.do_request_(req)
Georg Brandl8c036cc2006-08-20 13:15:39 +0000799 self.assertEqual(req.unredirected_hdrs["Content-length"], "foo")
800 self.assertEqual(req.unredirected_hdrs["Content-type"], "bar")
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000801 self.assertEqual(req.unredirected_hdrs["Host"], "baz")
802 self.assertEqual(req.unredirected_hdrs["Spam"], "foo")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000803
Facundo Batistaeb90b782008-08-16 14:44:07 +0000804 def test_http_doubleslash(self):
805 # Checks that the presence of an unnecessary double slash in a url doesn't break anything
806 # Previously, a double slash directly after the host could cause incorrect parsing of the url
807 h = urllib2.AbstractHTTPHandler()
808 o = h.parent = MockOpener()
809
810 data = ""
811 ds_urls = [
812 "http://example.com/foo/bar/baz.html",
813 "http://example.com//foo/bar/baz.html",
814 "http://example.com/foo//bar/baz.html",
815 "http://example.com/foo/bar//baz.html",
816 ]
817
818 for ds_url in ds_urls:
819 ds_req = Request(ds_url, data)
820
821 # Check whether host is determined correctly if there is no proxy
822 np_ds_req = h.do_request_(ds_req)
823 self.assertEqual(np_ds_req.unredirected_hdrs["Host"],"example.com")
824
825 # Check whether host is determined correctly if there is a proxy
826 ds_req.set_proxy("someproxy:3128",None)
827 p_ds_req = h.do_request_(ds_req)
828 self.assertEqual(p_ds_req.unredirected_hdrs["Host"],"example.com")
829
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000830 def test_errors(self):
831 h = urllib2.HTTPErrorProcessor()
832 o = h.parent = MockOpener()
833
834 url = "http://example.com/"
835 req = Request(url)
Facundo Batista9fab9f12007-04-23 17:08:31 +0000836 # all 2xx are passed through
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000837 r = MockResponse(200, "OK", {}, "", url)
838 newr = h.http_response(req, r)
839 self.assert_(r is newr)
840 self.assert_(not hasattr(o, "proto")) # o.error not called
Facundo Batista9fab9f12007-04-23 17:08:31 +0000841 r = MockResponse(202, "Accepted", {}, "", url)
842 newr = h.http_response(req, r)
843 self.assert_(r is newr)
844 self.assert_(not hasattr(o, "proto")) # o.error not called
845 r = MockResponse(206, "Partial content", {}, "", url)
846 newr = h.http_response(req, r)
847 self.assert_(r is newr)
848 self.assert_(not hasattr(o, "proto")) # o.error not called
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000849 # anything else calls o.error (and MockOpener returns None, here)
Facundo Batista9fab9f12007-04-23 17:08:31 +0000850 r = MockResponse(502, "Bad gateway", {}, "", url)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000851 self.assert_(h.http_response(req, r) is None)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000852 self.assertEqual(o.proto, "http") # o.error called
Facundo Batista9fab9f12007-04-23 17:08:31 +0000853 self.assertEqual(o.args, (req, r, 502, "Bad gateway", {}))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000854
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000855 def test_cookies(self):
856 cj = MockCookieJar()
857 h = urllib2.HTTPCookieProcessor(cj)
858 o = h.parent = MockOpener()
859
860 req = Request("http://example.com/")
861 r = MockResponse(200, "OK", {}, "")
862 newreq = h.http_request(req)
863 self.assert_(cj.ach_req is req is newreq)
864 self.assertEquals(req.get_origin_req_host(), "example.com")
865 self.assert_(not req.is_unverifiable())
866 newr = h.http_response(req, r)
867 self.assert_(cj.ec_req is req)
868 self.assert_(cj.ec_r is r is newr)
869
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000870 def test_redirect(self):
871 from_url = "http://example.com/a.html"
872 to_url = "http://example.com/b.html"
873 h = urllib2.HTTPRedirectHandler()
874 o = h.parent = MockOpener()
875
876 # ordinary redirect behaviour
877 for code in 301, 302, 303, 307:
878 for data in None, "blah\nblah\n":
879 method = getattr(h, "http_error_%s" % code)
880 req = Request(from_url, data)
881 req.add_header("Nonsense", "viking=withhold")
Senthil Kumaran428e9d12009-07-19 04:20:46 +0000882 req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
Facundo Batista86371d62008-02-07 19:06:52 +0000883 if data is not None:
884 req.add_header("Content-Length", str(len(data)))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000885 req.add_unredirected_header("Spam", "spam")
886 try:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000887 method(req, MockFile(), code, "Blah",
888 MockHeaders({"location": to_url}))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000889 except urllib2.HTTPError:
890 # 307 in response to POST requires user OK
891 self.assert_(code == 307 and data is not None)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000892 self.assertEqual(o.req.get_full_url(), to_url)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000893 try:
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000894 self.assertEqual(o.req.get_method(), "GET")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000895 except AttributeError:
896 self.assert_(not o.req.has_data())
Facundo Batista86371d62008-02-07 19:06:52 +0000897
898 # now it's a GET, there should not be headers regarding content
899 # (possibly dragged from before being a POST)
900 headers = [x.lower() for x in o.req.headers]
901 self.assertTrue("content-length" not in headers)
902 self.assertTrue("content-type" not in headers)
903
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000904 self.assertEqual(o.req.headers["Nonsense"],
905 "viking=withhold")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000906 self.assert_("Spam" not in o.req.headers)
907 self.assert_("Spam" not in o.req.unredirected_hdrs)
908
909 # loop detection
910 req = Request(from_url)
Senthil Kumaran428e9d12009-07-19 04:20:46 +0000911 req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000912 def redirect(h, req, url=to_url):
913 h.http_error_302(req, MockFile(), 302, "Blah",
914 MockHeaders({"location": url}))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000915 # Note that the *original* request shares the same record of
916 # redirections with the sub-requests caused by the redirections.
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000917
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000918 # detect infinite loop redirect of a URL to itself
919 req = Request(from_url, origin_req_host="example.com")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000920 count = 0
Senthil Kumaran428e9d12009-07-19 04:20:46 +0000921 req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000922 try:
923 while 1:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000924 redirect(h, req, "http://example.com/")
925 count = count + 1
926 except urllib2.HTTPError:
927 # don't stop until max_repeats, because cookies may introduce state
928 self.assertEqual(count, urllib2.HTTPRedirectHandler.max_repeats)
929
930 # detect endless non-repeating chain of redirects
931 req = Request(from_url, origin_req_host="example.com")
932 count = 0
Senthil Kumaran428e9d12009-07-19 04:20:46 +0000933 req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000934 try:
935 while 1:
936 redirect(h, req, "http://example.com/%d" % count)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000937 count = count + 1
938 except urllib2.HTTPError:
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000939 self.assertEqual(count,
940 urllib2.HTTPRedirectHandler.max_redirections)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000941
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000942 def test_cookie_redirect(self):
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000943 # cookies shouldn't leak into redirected requests
944 from cookielib import CookieJar
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000945
Neal Norwitzb902f4e2006-04-03 04:45:34 +0000946 from test.test_cookielib import interact_netscape
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000947
948 cj = CookieJar()
949 interact_netscape(cj, "http://www.example.com/", "spam=eggs")
Georg Brandlfa42bd72006-04-30 07:06:11 +0000950 hh = MockHTTPHandler(302, "Location: http://www.cracker.com/\r\n\r\n")
951 hdeh = urllib2.HTTPDefaultErrorHandler()
952 hrh = urllib2.HTTPRedirectHandler()
953 cp = urllib2.HTTPCookieProcessor(cj)
954 o = build_test_opener(hh, hdeh, hrh, cp)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000955 o.open("http://www.example.com/")
956 self.assert_(not hh.req.has_header("Cookie"))
957
Georg Brandl720096a2006-04-02 20:45:34 +0000958 def test_proxy(self):
959 o = OpenerDirector()
960 ph = urllib2.ProxyHandler(dict(http="proxy.example.com:3128"))
961 o.add_handler(ph)
962 meth_spec = [
963 [("http_open", "return response")]
964 ]
965 handlers = add_ordered_mock_handlers(o, meth_spec)
966
967 req = Request("http://acme.example.com/")
968 self.assertEqual(req.get_host(), "acme.example.com")
969 r = o.open(req)
970 self.assertEqual(req.get_host(), "proxy.example.com:3128")
971
972 self.assertEqual([(handlers[0], "http_open")],
973 [tup[0:2] for tup in o.calls])
974
Senthil Kumarandff20282009-10-11 05:35:44 +0000975 def test_proxy_no_proxy(self):
976 os.environ['no_proxy'] = 'python.org'
977 o = OpenerDirector()
978 ph = urllib2.ProxyHandler(dict(http="proxy.example.com"))
979 o.add_handler(ph)
980 req = Request("http://www.perl.org/")
981 self.assertEqual(req.get_host(), "www.perl.org")
982 r = o.open(req)
983 self.assertEqual(req.get_host(), "proxy.example.com")
984 req = Request("http://www.python.org")
985 self.assertEqual(req.get_host(), "www.python.org")
986 r = o.open(req)
987 self.assertEqual(req.get_host(), "www.python.org")
988 del os.environ['no_proxy']
989
990
Senthil Kumaran308681c2009-07-26 12:36:08 +0000991 def test_proxy_https(self):
992 o = OpenerDirector()
993 ph = urllib2.ProxyHandler(dict(https='proxy.example.com:3128'))
994 o.add_handler(ph)
995 meth_spec = [
996 [("https_open","return response")]
997 ]
998 handlers = add_ordered_mock_handlers(o, meth_spec)
999 req = Request("https://www.example.com/")
1000 self.assertEqual(req.get_host(), "www.example.com")
1001 r = o.open(req)
1002 self.assertEqual(req.get_host(), "proxy.example.com:3128")
1003 self.assertEqual([(handlers[0], "https_open")],
1004 [tup[0:2] for tup in o.calls])
1005
Senthil Kumaran81163642009-12-20 06:32:46 +00001006 def test_proxy_https_proxy_authorization(self):
1007 o = OpenerDirector()
1008 ph = urllib2.ProxyHandler(dict(https='proxy.example.com:3128'))
1009 o.add_handler(ph)
1010 https_handler = MockHTTPSHandler()
1011 o.add_handler(https_handler)
1012 req = Request("https://www.example.com/")
1013 req.add_header("Proxy-Authorization","FooBar")
1014 req.add_header("User-Agent","Grail")
1015 self.assertEqual(req.get_host(), "www.example.com")
1016 self.assertTrue(req._tunnel_host is None)
1017 r = o.open(req)
1018 # Verify Proxy-Authorization gets tunneled to request.
1019 # httpsconn req_headers do not have the Proxy-Authorization header but
1020 # the req will have.
1021 self.assertFalse(("Proxy-Authorization","FooBar") in
1022 https_handler.httpconn.req_headers)
1023 self.assertTrue(("User-Agent","Grail") in
1024 https_handler.httpconn.req_headers)
1025 self.assertFalse(req._tunnel_host is None)
1026 self.assertEqual(req.get_host(), "proxy.example.com:3128")
1027 self.assertEqual(req.get_header("Proxy-authorization"),"FooBar")
1028
Georg Brandl33124322008-03-21 19:54:00 +00001029 def test_basic_auth(self, quote_char='"'):
Georg Brandlfa42bd72006-04-30 07:06:11 +00001030 opener = OpenerDirector()
1031 password_manager = MockPasswordManager()
1032 auth_handler = urllib2.HTTPBasicAuthHandler(password_manager)
1033 realm = "ACME Widget Store"
1034 http_handler = MockHTTPHandler(
Georg Brandl33124322008-03-21 19:54:00 +00001035 401, 'WWW-Authenticate: Basic realm=%s%s%s\r\n\r\n' %
1036 (quote_char, realm, quote_char) )
Georg Brandl261e2512006-05-29 20:52:54 +00001037 opener.add_handler(auth_handler)
1038 opener.add_handler(http_handler)
Georg Brandlfa42bd72006-04-30 07:06:11 +00001039 self._test_basic_auth(opener, auth_handler, "Authorization",
1040 realm, http_handler, password_manager,
1041 "http://acme.example.com/protected",
1042 "http://acme.example.com/protected",
1043 )
1044
Georg Brandl33124322008-03-21 19:54:00 +00001045 def test_basic_auth_with_single_quoted_realm(self):
1046 self.test_basic_auth(quote_char="'")
1047
Georg Brandlfa42bd72006-04-30 07:06:11 +00001048 def test_proxy_basic_auth(self):
1049 opener = OpenerDirector()
1050 ph = urllib2.ProxyHandler(dict(http="proxy.example.com:3128"))
1051 opener.add_handler(ph)
1052 password_manager = MockPasswordManager()
1053 auth_handler = urllib2.ProxyBasicAuthHandler(password_manager)
1054 realm = "ACME Networks"
1055 http_handler = MockHTTPHandler(
1056 407, 'Proxy-Authenticate: Basic realm="%s"\r\n\r\n' % realm)
Georg Brandl261e2512006-05-29 20:52:54 +00001057 opener.add_handler(auth_handler)
1058 opener.add_handler(http_handler)
Georg Brandl8c036cc2006-08-20 13:15:39 +00001059 self._test_basic_auth(opener, auth_handler, "Proxy-authorization",
Georg Brandlfa42bd72006-04-30 07:06:11 +00001060 realm, http_handler, password_manager,
1061 "http://acme.example.com:3128/protected",
1062 "proxy.example.com:3128",
1063 )
1064
Georg Brandlb5f2e5c2006-05-08 17:36:08 +00001065 def test_basic_and_digest_auth_handlers(self):
1066 # HTTPDigestAuthHandler threw an exception if it couldn't handle a 40*
1067 # response (http://python.org/sf/1479302), where it should instead
1068 # return None to allow another handler (especially
1069 # HTTPBasicAuthHandler) to handle the response.
Georg Brandl261e2512006-05-29 20:52:54 +00001070
1071 # Also (http://python.org/sf/14797027, RFC 2617 section 1.2), we must
1072 # try digest first (since it's the strongest auth scheme), so we record
1073 # order of calls here to check digest comes first:
1074 class RecordingOpenerDirector(OpenerDirector):
1075 def __init__(self):
1076 OpenerDirector.__init__(self)
1077 self.recorded = []
1078 def record(self, info):
1079 self.recorded.append(info)
Georg Brandlb5f2e5c2006-05-08 17:36:08 +00001080 class TestDigestAuthHandler(urllib2.HTTPDigestAuthHandler):
Georg Brandl261e2512006-05-29 20:52:54 +00001081 def http_error_401(self, *args, **kwds):
1082 self.parent.record("digest")
1083 urllib2.HTTPDigestAuthHandler.http_error_401(self,
1084 *args, **kwds)
1085 class TestBasicAuthHandler(urllib2.HTTPBasicAuthHandler):
1086 def http_error_401(self, *args, **kwds):
1087 self.parent.record("basic")
1088 urllib2.HTTPBasicAuthHandler.http_error_401(self,
1089 *args, **kwds)
1090
1091 opener = RecordingOpenerDirector()
Georg Brandlb5f2e5c2006-05-08 17:36:08 +00001092 password_manager = MockPasswordManager()
1093 digest_handler = TestDigestAuthHandler(password_manager)
Georg Brandl261e2512006-05-29 20:52:54 +00001094 basic_handler = TestBasicAuthHandler(password_manager)
Georg Brandlb5f2e5c2006-05-08 17:36:08 +00001095 realm = "ACME Networks"
1096 http_handler = MockHTTPHandler(
1097 401, 'WWW-Authenticate: Basic realm="%s"\r\n\r\n' % realm)
Georg Brandl261e2512006-05-29 20:52:54 +00001098 opener.add_handler(basic_handler)
1099 opener.add_handler(digest_handler)
1100 opener.add_handler(http_handler)
1101
1102 # check basic auth isn't blocked by digest handler failing
Georg Brandlb5f2e5c2006-05-08 17:36:08 +00001103 self._test_basic_auth(opener, basic_handler, "Authorization",
1104 realm, http_handler, password_manager,
1105 "http://acme.example.com/protected",
1106 "http://acme.example.com/protected",
1107 )
Georg Brandl261e2512006-05-29 20:52:54 +00001108 # check digest was tried before basic (twice, because
1109 # _test_basic_auth called .open() twice)
1110 self.assertEqual(opener.recorded, ["digest", "basic"]*2)
Georg Brandlb5f2e5c2006-05-08 17:36:08 +00001111
Georg Brandlfa42bd72006-04-30 07:06:11 +00001112 def _test_basic_auth(self, opener, auth_handler, auth_header,
1113 realm, http_handler, password_manager,
1114 request_url, protected_url):
Christian Heimesc5f05e42008-02-23 17:40:11 +00001115 import base64
Georg Brandlfa42bd72006-04-30 07:06:11 +00001116 user, password = "wile", "coyote"
Georg Brandlfa42bd72006-04-30 07:06:11 +00001117
1118 # .add_password() fed through to password manager
1119 auth_handler.add_password(realm, request_url, user, password)
1120 self.assertEqual(realm, password_manager.realm)
1121 self.assertEqual(request_url, password_manager.url)
1122 self.assertEqual(user, password_manager.user)
1123 self.assertEqual(password, password_manager.password)
1124
1125 r = opener.open(request_url)
1126
1127 # should have asked the password manager for the username/password
1128 self.assertEqual(password_manager.target_realm, realm)
1129 self.assertEqual(password_manager.target_url, protected_url)
1130
1131 # expect one request without authorization, then one with
1132 self.assertEqual(len(http_handler.requests), 2)
1133 self.assertFalse(http_handler.requests[0].has_header(auth_header))
1134 userpass = '%s:%s' % (user, password)
1135 auth_hdr_value = 'Basic '+base64.encodestring(userpass).strip()
1136 self.assertEqual(http_handler.requests[1].get_header(auth_header),
1137 auth_hdr_value)
Senthil Kumarane3c651a2010-02-24 16:49:45 +00001138 self.assertEqual(http_handler.requests[1].unredirected_hdrs[auth_header],
1139 auth_hdr_value)
Georg Brandlfa42bd72006-04-30 07:06:11 +00001140 # if the password manager can't find a password, the handler won't
1141 # handle the HTTP auth error
1142 password_manager.user = password_manager.password = None
1143 http_handler.reset()
1144 r = opener.open(request_url)
1145 self.assertEqual(len(http_handler.requests), 1)
1146 self.assertFalse(http_handler.requests[0].has_header(auth_header))
1147
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001148
1149class MiscTests(unittest.TestCase):
1150
1151 def test_build_opener(self):
1152 class MyHTTPHandler(urllib2.HTTPHandler): pass
1153 class FooHandler(urllib2.BaseHandler):
1154 def foo_open(self): pass
1155 class BarHandler(urllib2.BaseHandler):
1156 def bar_open(self): pass
1157
1158 build_opener = urllib2.build_opener
1159
1160 o = build_opener(FooHandler, BarHandler)
1161 self.opener_has_handler(o, FooHandler)
1162 self.opener_has_handler(o, BarHandler)
1163
1164 # can take a mix of classes and instances
1165 o = build_opener(FooHandler, BarHandler())
1166 self.opener_has_handler(o, FooHandler)
1167 self.opener_has_handler(o, BarHandler)
1168
1169 # subclasses of default handlers override default handlers
1170 o = build_opener(MyHTTPHandler)
1171 self.opener_has_handler(o, MyHTTPHandler)
1172
1173 # a particular case of overriding: default handlers can be passed
1174 # in explicitly
1175 o = build_opener()
1176 self.opener_has_handler(o, urllib2.HTTPHandler)
1177 o = build_opener(urllib2.HTTPHandler)
1178 self.opener_has_handler(o, urllib2.HTTPHandler)
1179 o = build_opener(urllib2.HTTPHandler())
1180 self.opener_has_handler(o, urllib2.HTTPHandler)
1181
Amaury Forgeot d'Arc96865852008-04-22 21:14:41 +00001182 # Issue2670: multiple handlers sharing the same base class
1183 class MyOtherHTTPHandler(urllib2.HTTPHandler): pass
1184 o = build_opener(MyHTTPHandler, MyOtherHTTPHandler)
1185 self.opener_has_handler(o, MyHTTPHandler)
1186 self.opener_has_handler(o, MyOtherHTTPHandler)
1187
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001188 def opener_has_handler(self, opener, handler_class):
1189 for h in opener.handlers:
1190 if h.__class__ == handler_class:
1191 break
1192 else:
1193 self.assert_(False)
1194
Benjamin Petersonc6e80eb2008-12-21 17:01:26 +00001195class RequestTests(unittest.TestCase):
1196
1197 def setUp(self):
1198 self.get = urllib2.Request("http://www.python.org/~jeremy/")
1199 self.post = urllib2.Request("http://www.python.org/~jeremy/",
1200 "data",
1201 headers={"X-Test": "test"})
1202
1203 def test_method(self):
1204 self.assertEqual("POST", self.post.get_method())
1205 self.assertEqual("GET", self.get.get_method())
1206
1207 def test_add_data(self):
1208 self.assert_(not self.get.has_data())
1209 self.assertEqual("GET", self.get.get_method())
1210 self.get.add_data("spam")
1211 self.assert_(self.get.has_data())
1212 self.assertEqual("POST", self.get.get_method())
1213
1214 def test_get_full_url(self):
1215 self.assertEqual("http://www.python.org/~jeremy/",
1216 self.get.get_full_url())
1217
1218 def test_selector(self):
1219 self.assertEqual("/~jeremy/", self.get.get_selector())
1220 req = urllib2.Request("http://www.python.org/")
1221 self.assertEqual("/", req.get_selector())
1222
1223 def test_get_type(self):
1224 self.assertEqual("http", self.get.get_type())
1225
1226 def test_get_host(self):
1227 self.assertEqual("www.python.org", self.get.get_host())
1228
1229 def test_get_host_unquote(self):
1230 req = urllib2.Request("http://www.%70ython.org/")
1231 self.assertEqual("www.python.org", req.get_host())
1232
1233 def test_proxy(self):
1234 self.assert_(not self.get.has_proxy())
1235 self.get.set_proxy("www.perl.org", "http")
1236 self.assert_(self.get.has_proxy())
1237 self.assertEqual("www.python.org", self.get.get_origin_req_host())
1238 self.assertEqual("www.perl.org", self.get.get_host())
1239
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001240
1241def test_main(verbose=None):
Georg Brandlfa42bd72006-04-30 07:06:11 +00001242 from test import test_urllib2
1243 test_support.run_doctest(test_urllib2, verbose)
Georg Brandl720096a2006-04-02 20:45:34 +00001244 test_support.run_doctest(urllib2, verbose)
Andrew M. Kuchlingbd3200f2004-06-29 13:15:46 +00001245 tests = (TrivialTests,
1246 OpenerDirectorTests,
1247 HandlerTests,
Benjamin Petersonc6e80eb2008-12-21 17:01:26 +00001248 MiscTests,
1249 RequestTests)
Andrew M. Kuchlingbd3200f2004-06-29 13:15:46 +00001250 test_support.run_unittest(*tests)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001251
1252if __name__ == "__main__":
1253 test_main(verbose=True)