blob: 65ad8e307a2e3b1b698ee18b879412b4f414d9c9 [file] [log] [blame]
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001import unittest
2from test import test_support
3
Christian Heimesc5f05e42008-02-23 17:40:11 +00004import os
Facundo Batista4f1b1ed2008-05-29 16:39:26 +00005import socket
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00006import StringIO
Jeremy Hyltone3e61042001-05-09 15:50:25 +00007
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00008import urllib2
9from urllib2 import Request, OpenerDirector
Jeremy Hyltone3e61042001-05-09 15:50:25 +000010
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000011# XXX
12# Request
13# CacheFTPHandler (hard to write)
Georg Brandlfa42bd72006-04-30 07:06:11 +000014# parse_keqv_list, parse_http_list, HTTPDigestAuthHandler
Jeremy Hyltone3e61042001-05-09 15:50:25 +000015
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000016class TrivialTests(unittest.TestCase):
17 def test_trivial(self):
18 # A couple trivial tests
Guido van Rossume2ae77b2001-10-24 20:42:55 +000019
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000020 self.assertRaises(ValueError, urllib2.urlopen, 'bogus url')
Tim Peters861adac2001-07-16 20:49:49 +000021
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000022 # XXX Name hacking to get this to work on Windows.
23 fname = os.path.abspath(urllib2.__file__).replace('\\', '/')
Senthil Kumaran08ecfdd2010-01-10 17:42:29 +000024
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000025 # And more hacking to get it to work on MacOS. This assumes
26 # urllib.pathname2url works, unfortunately...
27 if os.name == 'mac':
28 fname = '/' + fname.replace(':', '/')
29 elif os.name == 'riscos':
30 import string
31 fname = os.expand(fname)
32 fname = fname.translate(string.maketrans("/.", "./"))
33
Senthil Kumaran08ecfdd2010-01-10 17:42:29 +000034 if os.name == 'nt':
35 file_url = "file:///%s" % fname
36 else:
37 file_url = "file://%s" % fname
38
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000039 f = urllib2.urlopen(file_url)
40
41 buf = f.read()
42 f.close()
Tim Petersf5f32b42005-07-17 23:16:17 +000043
Georg Brandle1b13d22005-08-24 22:20:32 +000044 def test_parse_http_list(self):
45 tests = [('a,b,c', ['a', 'b', 'c']),
46 ('path"o,l"og"i"cal, example', ['path"o,l"og"i"cal', 'example']),
47 ('a, b, "c", "d", "e,f", g, h', ['a', 'b', '"c"', '"d"', '"e,f"', 'g', 'h']),
48 ('a="b\\"c", d="e\\,f", g="h\\\\i"', ['a="b"c"', 'd="e,f"', 'g="h\\i"'])]
49 for string, list in tests:
50 self.assertEquals(urllib2.parse_http_list(string), list)
51
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000052
Georg Brandl8c036cc2006-08-20 13:15:39 +000053def test_request_headers_dict():
54 """
55 The Request.headers dictionary is not a documented interface. It should
56 stay that way, because the complete set of headers are only accessible
57 through the .get_header(), .has_header(), .header_items() interface.
58 However, .headers pre-dates those methods, and so real code will be using
59 the dictionary.
60
61 The introduction in 2.4 of those methods was a mistake for the same reason:
62 code that previously saw all (urllib2 user)-provided headers in .headers
63 now sees only a subset (and the function interface is ugly and incomplete).
64 A better change would have been to replace .headers dict with a dict
65 subclass (or UserDict.DictMixin instance?) that preserved the .headers
66 interface and also provided access to the "unredirected" headers. It's
67 probably too late to fix that, though.
68
69
70 Check .capitalize() case normalization:
71
72 >>> url = "http://example.com"
73 >>> Request(url, headers={"Spam-eggs": "blah"}).headers["Spam-eggs"]
74 'blah'
75 >>> Request(url, headers={"spam-EggS": "blah"}).headers["Spam-eggs"]
76 'blah'
77
78 Currently, Request(url, "Spam-eggs").headers["Spam-Eggs"] raises KeyError,
79 but that could be changed in future.
80
81 """
82
83def test_request_headers_methods():
84 """
85 Note the case normalization of header names here, to .capitalize()-case.
86 This should be preserved for backwards-compatibility. (In the HTTP case,
87 normalization to .title()-case is done by urllib2 before sending headers to
88 httplib).
89
90 >>> url = "http://example.com"
91 >>> r = Request(url, headers={"Spam-eggs": "blah"})
92 >>> r.has_header("Spam-eggs")
93 True
94 >>> r.header_items()
95 [('Spam-eggs', 'blah')]
96 >>> r.add_header("Foo-Bar", "baz")
97 >>> items = r.header_items()
98 >>> items.sort()
99 >>> items
100 [('Foo-bar', 'baz'), ('Spam-eggs', 'blah')]
101
102 Note that e.g. r.has_header("spam-EggS") is currently False, and
103 r.get_header("spam-EggS") returns None, but that could be changed in
104 future.
105
106 >>> r.has_header("Not-there")
107 False
108 >>> print r.get_header("Not-there")
109 None
110 >>> r.get_header("Not-there", "default")
111 'default'
112
113 """
114
115
Georg Brandlfa42bd72006-04-30 07:06:11 +0000116def test_password_manager(self):
117 """
118 >>> mgr = urllib2.HTTPPasswordMgr()
119 >>> add = mgr.add_password
120 >>> add("Some Realm", "http://example.com/", "joe", "password")
121 >>> add("Some Realm", "http://example.com/ni", "ni", "ni")
122 >>> add("c", "http://example.com/foo", "foo", "ni")
123 >>> add("c", "http://example.com/bar", "bar", "nini")
124 >>> add("b", "http://example.com/", "first", "blah")
125 >>> add("b", "http://example.com/", "second", "spam")
126 >>> add("a", "http://example.com", "1", "a")
127 >>> add("Some Realm", "http://c.example.com:3128", "3", "c")
128 >>> add("Some Realm", "d.example.com", "4", "d")
129 >>> add("Some Realm", "e.example.com:3128", "5", "e")
130
131 >>> mgr.find_user_password("Some Realm", "example.com")
132 ('joe', 'password')
133 >>> mgr.find_user_password("Some Realm", "http://example.com")
134 ('joe', 'password')
135 >>> mgr.find_user_password("Some Realm", "http://example.com/")
136 ('joe', 'password')
137 >>> mgr.find_user_password("Some Realm", "http://example.com/spam")
138 ('joe', 'password')
139 >>> mgr.find_user_password("Some Realm", "http://example.com/spam/spam")
140 ('joe', 'password')
141 >>> mgr.find_user_password("c", "http://example.com/foo")
142 ('foo', 'ni')
143 >>> mgr.find_user_password("c", "http://example.com/bar")
144 ('bar', 'nini')
145
Georg Brandl2b330372006-05-28 20:23:12 +0000146 Actually, this is really undefined ATM
147## Currently, we use the highest-level path where more than one match:
Georg Brandlfa42bd72006-04-30 07:06:11 +0000148
Georg Brandl2b330372006-05-28 20:23:12 +0000149## >>> mgr.find_user_password("Some Realm", "http://example.com/ni")
150## ('joe', 'password')
Georg Brandlfa42bd72006-04-30 07:06:11 +0000151
152 Use latest add_password() in case of conflict:
153
154 >>> mgr.find_user_password("b", "http://example.com/")
155 ('second', 'spam')
156
157 No special relationship between a.example.com and example.com:
158
159 >>> mgr.find_user_password("a", "http://example.com/")
160 ('1', 'a')
161 >>> mgr.find_user_password("a", "http://a.example.com/")
162 (None, None)
163
164 Ports:
165
166 >>> mgr.find_user_password("Some Realm", "c.example.com")
167 (None, None)
168 >>> mgr.find_user_password("Some Realm", "c.example.com:3128")
169 ('3', 'c')
170 >>> mgr.find_user_password("Some Realm", "http://c.example.com:3128")
171 ('3', 'c')
172 >>> mgr.find_user_password("Some Realm", "d.example.com")
173 ('4', 'd')
174 >>> mgr.find_user_password("Some Realm", "e.example.com:3128")
175 ('5', 'e')
176
177 """
178 pass
179
180
Georg Brandl2b330372006-05-28 20:23:12 +0000181def test_password_manager_default_port(self):
182 """
183 >>> mgr = urllib2.HTTPPasswordMgr()
184 >>> add = mgr.add_password
185
186 The point to note here is that we can't guess the default port if there's
187 no scheme. This applies to both add_password and find_user_password.
188
189 >>> add("f", "http://g.example.com:80", "10", "j")
190 >>> add("g", "http://h.example.com", "11", "k")
191 >>> add("h", "i.example.com:80", "12", "l")
192 >>> add("i", "j.example.com", "13", "m")
193 >>> mgr.find_user_password("f", "g.example.com:100")
194 (None, None)
195 >>> mgr.find_user_password("f", "g.example.com:80")
196 ('10', 'j')
197 >>> mgr.find_user_password("f", "g.example.com")
198 (None, None)
199 >>> mgr.find_user_password("f", "http://g.example.com:100")
200 (None, None)
201 >>> mgr.find_user_password("f", "http://g.example.com:80")
202 ('10', 'j')
203 >>> mgr.find_user_password("f", "http://g.example.com")
204 ('10', 'j')
205 >>> mgr.find_user_password("g", "h.example.com")
206 ('11', 'k')
207 >>> mgr.find_user_password("g", "h.example.com:80")
208 ('11', 'k')
209 >>> mgr.find_user_password("g", "http://h.example.com:80")
210 ('11', 'k')
211 >>> mgr.find_user_password("h", "i.example.com")
212 (None, None)
213 >>> mgr.find_user_password("h", "i.example.com:80")
214 ('12', 'l')
215 >>> mgr.find_user_password("h", "http://i.example.com:80")
216 ('12', 'l')
217 >>> mgr.find_user_password("i", "j.example.com")
218 ('13', 'm')
219 >>> mgr.find_user_password("i", "j.example.com:80")
220 (None, None)
221 >>> mgr.find_user_password("i", "http://j.example.com")
222 ('13', 'm')
223 >>> mgr.find_user_password("i", "http://j.example.com:80")
224 (None, None)
225
226 """
227
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000228class MockOpener:
229 addheaders = []
Senthil Kumaran428e9d12009-07-19 04:20:46 +0000230 def open(self, req, data=None,timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
231 self.req, self.data, self.timeout = req, data, timeout
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000232 def error(self, proto, *args):
233 self.proto, self.args = proto, args
234
235class MockFile:
236 def read(self, count=None): pass
237 def readline(self, count=None): pass
238 def close(self): pass
239
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000240class MockHeaders(dict):
241 def getheaders(self, name):
242 return self.values()
243
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000244class MockResponse(StringIO.StringIO):
245 def __init__(self, code, msg, headers, data, url=None):
246 StringIO.StringIO.__init__(self, data)
247 self.code, self.msg, self.headers, self.url = code, msg, headers, url
248 def info(self):
249 return self.headers
250 def geturl(self):
251 return self.url
252
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000253class MockCookieJar:
254 def add_cookie_header(self, request):
255 self.ach_req = request
256 def extract_cookies(self, response, request):
257 self.ec_req, self.ec_r = request, response
258
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000259class FakeMethod:
260 def __init__(self, meth_name, action, handle):
261 self.meth_name = meth_name
262 self.handle = handle
263 self.action = action
264 def __call__(self, *args):
265 return self.handle(self.meth_name, self.action, *args)
266
Senthil Kumaran81163642009-12-20 06:32:46 +0000267class MockHTTPResponse:
268 def __init__(self, fp, msg, status, reason):
269 self.fp = fp
270 self.msg = msg
271 self.status = status
272 self.reason = reason
273 def read(self):
274 return ''
275
276class MockHTTPClass:
277 def __init__(self):
278 self.req_headers = []
279 self.data = None
280 self.raise_on_endheaders = False
281 self._tunnel_headers = {}
282
283 def __call__(self, host, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
284 self.host = host
285 self.timeout = timeout
286 return self
287
288 def set_debuglevel(self, level):
289 self.level = level
290
291 def _set_tunnel(self, host, port=None, headers=None):
292 self._tunnel_host = host
293 self._tunnel_port = port
294 if headers:
295 self._tunnel_headers = headers
296 else:
297 self._tunnel_headers.clear()
Benjamin Peterson17e1c122009-12-24 01:13:50 +0000298 def request(self, method, url, body=None, headers=None):
Senthil Kumaran81163642009-12-20 06:32:46 +0000299 self.method = method
300 self.selector = url
Benjamin Peterson17e1c122009-12-24 01:13:50 +0000301 if headers is not None:
302 self.req_headers += headers.items()
Senthil Kumaran81163642009-12-20 06:32:46 +0000303 self.req_headers.sort()
304 if body:
305 self.data = body
306 if self.raise_on_endheaders:
307 import socket
308 raise socket.error()
309 def getresponse(self):
310 return MockHTTPResponse(MockFile(), {}, 200, "OK")
311
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000312class MockHandler:
Georg Brandlfa42bd72006-04-30 07:06:11 +0000313 # useful for testing handler machinery
314 # see add_ordered_mock_handlers() docstring
Georg Brandl720096a2006-04-02 20:45:34 +0000315 handler_order = 500
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000316 def __init__(self, methods):
317 self._define_methods(methods)
318 def _define_methods(self, methods):
319 for spec in methods:
320 if len(spec) == 2: name, action = spec
321 else: name, action = spec, None
322 meth = FakeMethod(name, action, self.handle)
323 setattr(self.__class__, name, meth)
324 def handle(self, fn_name, action, *args, **kwds):
325 self.parent.calls.append((self, fn_name, args, kwds))
326 if action is None:
327 return None
328 elif action == "return self":
329 return self
330 elif action == "return response":
331 res = MockResponse(200, "OK", {}, "")
332 return res
333 elif action == "return request":
334 return Request("http://blah/")
335 elif action.startswith("error"):
336 code = action[action.rfind(" ")+1:]
337 try:
338 code = int(code)
339 except ValueError:
340 pass
341 res = MockResponse(200, "OK", {}, "")
342 return self.parent.error("http", args[0], res, code, "", {})
343 elif action == "raise":
344 raise urllib2.URLError("blah")
345 assert False
346 def close(self): pass
347 def add_parent(self, parent):
348 self.parent = parent
349 self.parent.calls = []
350 def __lt__(self, other):
351 if not hasattr(other, "handler_order"):
352 # No handler_order, leave in original order. Yuck.
353 return True
354 return self.handler_order < other.handler_order
355
356def add_ordered_mock_handlers(opener, meth_spec):
357 """Create MockHandlers and add them to an OpenerDirector.
358
359 meth_spec: list of lists of tuples and strings defining methods to define
360 on handlers. eg:
361
362 [["http_error", "ftp_open"], ["http_open"]]
363
364 defines methods .http_error() and .ftp_open() on one handler, and
365 .http_open() on another. These methods just record their arguments and
366 return None. Using a tuple instead of a string causes the method to
367 perform some action (see MockHandler.handle()), eg:
368
369 [["http_error"], [("http_open", "return request")]]
370
371 defines .http_error() on one handler (which simply returns None), and
372 .http_open() on another handler, which returns a Request object.
373
374 """
375 handlers = []
376 count = 0
377 for meths in meth_spec:
378 class MockHandlerSubclass(MockHandler): pass
379 h = MockHandlerSubclass(meths)
Georg Brandl720096a2006-04-02 20:45:34 +0000380 h.handler_order += count
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000381 h.add_parent(opener)
382 count = count + 1
383 handlers.append(h)
384 opener.add_handler(h)
385 return handlers
386
Georg Brandlfa42bd72006-04-30 07:06:11 +0000387def build_test_opener(*handler_instances):
388 opener = OpenerDirector()
389 for h in handler_instances:
390 opener.add_handler(h)
391 return opener
392
393class MockHTTPHandler(urllib2.BaseHandler):
394 # useful for testing redirections and auth
395 # sends supplied headers and code as first response
396 # sends 200 OK as second response
397 def __init__(self, code, headers):
398 self.code = code
399 self.headers = headers
400 self.reset()
401 def reset(self):
402 self._count = 0
403 self.requests = []
404 def http_open(self, req):
405 import mimetools, httplib, copy
406 from StringIO import StringIO
407 self.requests.append(copy.deepcopy(req))
408 if self._count == 0:
409 self._count = self._count + 1
410 name = httplib.responses[self.code]
411 msg = mimetools.Message(StringIO(self.headers))
412 return self.parent.error(
413 "http", req, MockFile(), self.code, name, msg)
414 else:
415 self.req = req
416 msg = mimetools.Message(StringIO("\r\n\r\n"))
417 return MockResponse(200, "OK", msg, "", req.get_full_url())
418
Senthil Kumaran81163642009-12-20 06:32:46 +0000419class MockHTTPSHandler(urllib2.AbstractHTTPHandler):
420 # Useful for testing the Proxy-Authorization request by verifying the
421 # properties of httpcon
Benjamin Peterson17e1c122009-12-24 01:13:50 +0000422
423 def __init__(self):
424 urllib2.AbstractHTTPHandler.__init__(self)
425 self.httpconn = MockHTTPClass()
426
Senthil Kumaran81163642009-12-20 06:32:46 +0000427 def https_open(self, req):
428 return self.do_open(self.httpconn, req)
429
Georg Brandlfa42bd72006-04-30 07:06:11 +0000430class MockPasswordManager:
431 def add_password(self, realm, uri, user, password):
432 self.realm = realm
433 self.url = uri
434 self.user = user
435 self.password = password
436 def find_user_password(self, realm, authuri):
437 self.target_realm = realm
438 self.target_url = authuri
439 return self.user, self.password
440
441
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000442class OpenerDirectorTests(unittest.TestCase):
443
Georg Brandlf91149e2007-07-12 08:05:45 +0000444 def test_add_non_handler(self):
445 class NonHandler(object):
446 pass
447 self.assertRaises(TypeError,
448 OpenerDirector().add_handler, NonHandler())
449
Georg Brandl261e2512006-05-29 20:52:54 +0000450 def test_badly_named_methods(self):
451 # test work-around for three methods that accidentally follow the
452 # naming conventions for handler methods
453 # (*_open() / *_request() / *_response())
454
455 # These used to call the accidentally-named methods, causing a
456 # TypeError in real code; here, returning self from these mock
457 # methods would either cause no exception, or AttributeError.
458
459 from urllib2 import URLError
460
461 o = OpenerDirector()
462 meth_spec = [
463 [("do_open", "return self"), ("proxy_open", "return self")],
464 [("redirect_request", "return self")],
465 ]
466 handlers = add_ordered_mock_handlers(o, meth_spec)
467 o.add_handler(urllib2.UnknownHandler())
468 for scheme in "do", "proxy", "redirect":
469 self.assertRaises(URLError, o.open, scheme+"://example.com/")
470
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000471 def test_handled(self):
472 # handler returning non-None means no more handlers will be called
473 o = OpenerDirector()
474 meth_spec = [
475 ["http_open", "ftp_open", "http_error_302"],
476 ["ftp_open"],
477 [("http_open", "return self")],
478 [("http_open", "return self")],
479 ]
480 handlers = add_ordered_mock_handlers(o, meth_spec)
481
482 req = Request("http://example.com/")
483 r = o.open(req)
484 # Second .http_open() gets called, third doesn't, since second returned
485 # non-None. Handlers without .http_open() never get any methods called
486 # on them.
487 # In fact, second mock handler defining .http_open() returns self
488 # (instead of response), which becomes the OpenerDirector's return
489 # value.
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000490 self.assertEqual(r, handlers[2])
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000491 calls = [(handlers[0], "http_open"), (handlers[2], "http_open")]
492 for expected, got in zip(calls, o.calls):
493 handler, name, args, kwds = got
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000494 self.assertEqual((handler, name), expected)
495 self.assertEqual(args, (req,))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000496
497 def test_handler_order(self):
498 o = OpenerDirector()
499 handlers = []
500 for meths, handler_order in [
501 ([("http_open", "return self")], 500),
502 (["http_open"], 0),
503 ]:
504 class MockHandlerSubclass(MockHandler): pass
505 h = MockHandlerSubclass(meths)
506 h.handler_order = handler_order
507 handlers.append(h)
508 o.add_handler(h)
509
510 r = o.open("http://example.com/")
511 # handlers called in reverse order, thanks to their sort order
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000512 self.assertEqual(o.calls[0][0], handlers[1])
513 self.assertEqual(o.calls[1][0], handlers[0])
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000514
515 def test_raise(self):
516 # raising URLError stops processing of request
517 o = OpenerDirector()
518 meth_spec = [
519 [("http_open", "raise")],
520 [("http_open", "return self")],
521 ]
522 handlers = add_ordered_mock_handlers(o, meth_spec)
523
524 req = Request("http://example.com/")
525 self.assertRaises(urllib2.URLError, o.open, req)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000526 self.assertEqual(o.calls, [(handlers[0], "http_open", (req,), {})])
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000527
528## def test_error(self):
529## # XXX this doesn't actually seem to be used in standard library,
530## # but should really be tested anyway...
531
532 def test_http_error(self):
533 # XXX http_error_default
534 # http errors are a special case
535 o = OpenerDirector()
536 meth_spec = [
537 [("http_open", "error 302")],
538 [("http_error_400", "raise"), "http_open"],
539 [("http_error_302", "return response"), "http_error_303",
540 "http_error"],
541 [("http_error_302")],
542 ]
543 handlers = add_ordered_mock_handlers(o, meth_spec)
544
545 class Unknown:
546 def __eq__(self, other): return True
547
548 req = Request("http://example.com/")
549 r = o.open(req)
550 assert len(o.calls) == 2
551 calls = [(handlers[0], "http_open", (req,)),
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000552 (handlers[2], "http_error_302",
553 (req, Unknown(), 302, "", {}))]
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000554 for expected, got in zip(calls, o.calls):
555 handler, method_name, args = expected
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000556 self.assertEqual((handler, method_name), got[:2])
557 self.assertEqual(args, got[2])
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000558
559 def test_processors(self):
560 # *_request / *_response methods get called appropriately
561 o = OpenerDirector()
562 meth_spec = [
563 [("http_request", "return request"),
564 ("http_response", "return response")],
565 [("http_request", "return request"),
566 ("http_response", "return response")],
567 ]
568 handlers = add_ordered_mock_handlers(o, meth_spec)
569
570 req = Request("http://example.com/")
571 r = o.open(req)
572 # processor methods are called on *all* handlers that define them,
573 # not just the first handler that handles the request
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000574 calls = [
575 (handlers[0], "http_request"), (handlers[1], "http_request"),
576 (handlers[0], "http_response"), (handlers[1], "http_response")]
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000577
578 for i, (handler, name, args, kwds) in enumerate(o.calls):
579 if i < 2:
580 # *_request
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000581 self.assertEqual((handler, name), calls[i])
582 self.assertEqual(len(args), 1)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000583 self.assert_(isinstance(args[0], Request))
584 else:
585 # *_response
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000586 self.assertEqual((handler, name), calls[i])
587 self.assertEqual(len(args), 2)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000588 self.assert_(isinstance(args[0], Request))
589 # response from opener.open is None, because there's no
590 # handler that defines http_open to handle it
591 self.assert_(args[1] is None or
592 isinstance(args[1], MockResponse))
593
594
Tim Peters58eb11c2004-01-18 20:29:55 +0000595def sanepathname2url(path):
596 import urllib
597 urlpath = urllib.pathname2url(path)
598 if os.name == "nt" and urlpath.startswith("///"):
599 urlpath = urlpath[2:]
600 # XXX don't ask me about the mac...
601 return urlpath
602
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000603class HandlerTests(unittest.TestCase):
604
605 def test_ftp(self):
606 class MockFTPWrapper:
607 def __init__(self, data): self.data = data
608 def retrfile(self, filename, filetype):
609 self.filename, self.filetype = filename, filetype
610 return StringIO.StringIO(self.data), len(self.data)
611
612 class NullFTPHandler(urllib2.FTPHandler):
613 def __init__(self, data): self.data = data
Facundo Batista4f1b1ed2008-05-29 16:39:26 +0000614 def connect_ftp(self, user, passwd, host, port, dirs,
615 timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000616 self.user, self.passwd = user, passwd
617 self.host, self.port = host, port
618 self.dirs = dirs
619 self.ftpwrapper = MockFTPWrapper(self.data)
620 return self.ftpwrapper
621
Facundo Batista4f1b1ed2008-05-29 16:39:26 +0000622 import ftplib
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000623 data = "rheum rhaponicum"
624 h = NullFTPHandler(data)
625 o = h.parent = MockOpener()
626
627 for url, host, port, type_, dirs, filename, mimetype in [
628 ("ftp://localhost/foo/bar/baz.html",
629 "localhost", ftplib.FTP_PORT, "I",
630 ["foo", "bar"], "baz.html", "text/html"),
Kurt B. Kaiser3f7cb5d2004-07-11 17:14:13 +0000631 ("ftp://localhost:80/foo/bar/",
632 "localhost", 80, "D",
633 ["foo", "bar"], "", None),
634 ("ftp://localhost/baz.gif;type=a",
635 "localhost", ftplib.FTP_PORT, "A",
636 [], "baz.gif", None), # XXX really this should guess image/gif
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000637 ]:
Facundo Batista10951d52007-06-06 17:15:23 +0000638 req = Request(url)
639 req.timeout = None
640 r = h.ftp_open(req)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000641 # ftp authentication not yet implemented by FTPHandler
642 self.assert_(h.user == h.passwd == "")
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000643 self.assertEqual(h.host, socket.gethostbyname(host))
644 self.assertEqual(h.port, port)
645 self.assertEqual(h.dirs, dirs)
646 self.assertEqual(h.ftpwrapper.filename, filename)
647 self.assertEqual(h.ftpwrapper.filetype, type_)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000648 headers = r.info()
Kurt B. Kaiser3f7cb5d2004-07-11 17:14:13 +0000649 self.assertEqual(headers.get("Content-type"), mimetype)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000650 self.assertEqual(int(headers["Content-length"]), len(data))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000651
652 def test_file(self):
Christian Heimesc5f05e42008-02-23 17:40:11 +0000653 import rfc822, socket
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000654 h = urllib2.FileHandler()
655 o = h.parent = MockOpener()
656
Tim Peters58eb11c2004-01-18 20:29:55 +0000657 TESTFN = test_support.TESTFN
658 urlpath = sanepathname2url(os.path.abspath(TESTFN))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000659 towrite = "hello, world\n"
Georg Brandldd2245f2006-03-31 17:18:06 +0000660 urls = [
Tim Peters58eb11c2004-01-18 20:29:55 +0000661 "file://localhost%s" % urlpath,
662 "file://%s" % urlpath,
663 "file://%s%s" % (socket.gethostbyname('localhost'), urlpath),
Georg Brandldd2245f2006-03-31 17:18:06 +0000664 ]
665 try:
Tim Peters480725d2006-04-03 02:46:44 +0000666 localaddr = socket.gethostbyname(socket.gethostname())
Georg Brandldd2245f2006-03-31 17:18:06 +0000667 except socket.gaierror:
668 localaddr = ''
669 if localaddr:
670 urls.append("file://%s%s" % (localaddr, urlpath))
Tim Peters480725d2006-04-03 02:46:44 +0000671
Georg Brandldd2245f2006-03-31 17:18:06 +0000672 for url in urls:
Tim Peters58eb11c2004-01-18 20:29:55 +0000673 f = open(TESTFN, "wb")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000674 try:
675 try:
676 f.write(towrite)
677 finally:
678 f.close()
679
680 r = h.file_open(Request(url))
681 try:
682 data = r.read()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000683 headers = r.info()
Senthil Kumaran2add1842010-05-08 03:14:33 +0000684 respurl = r.geturl()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000685 finally:
686 r.close()
Tim Peters58eb11c2004-01-18 20:29:55 +0000687 stats = os.stat(TESTFN)
688 modified = rfc822.formatdate(stats.st_mtime)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000689 finally:
690 os.remove(TESTFN)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000691 self.assertEqual(data, towrite)
692 self.assertEqual(headers["Content-type"], "text/plain")
693 self.assertEqual(headers["Content-length"], "13")
Tim Peters58eb11c2004-01-18 20:29:55 +0000694 self.assertEqual(headers["Last-modified"], modified)
Senthil Kumaran2add1842010-05-08 03:14:33 +0000695 self.assertEqual(respurl, url)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000696
697 for url in [
Tim Peters58eb11c2004-01-18 20:29:55 +0000698 "file://localhost:80%s" % urlpath,
Georg Brandlceede5c2007-03-13 08:14:27 +0000699 "file:///file_does_not_exist.txt",
700 "file://%s:80%s/%s" % (socket.gethostbyname('localhost'),
701 os.getcwd(), TESTFN),
702 "file://somerandomhost.ontheinternet.com%s/%s" %
703 (os.getcwd(), TESTFN),
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000704 ]:
705 try:
Tim Peters58eb11c2004-01-18 20:29:55 +0000706 f = open(TESTFN, "wb")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000707 try:
708 f.write(towrite)
709 finally:
710 f.close()
711
712 self.assertRaises(urllib2.URLError,
713 h.file_open, Request(url))
714 finally:
715 os.remove(TESTFN)
716
717 h = urllib2.FileHandler()
718 o = h.parent = MockOpener()
719 # XXXX why does // mean ftp (and /// mean not ftp!), and where
720 # is file: scheme specified? I think this is really a bug, and
721 # what was intended was to distinguish between URLs like:
722 # file:/blah.txt (a file)
723 # file://localhost/blah.txt (a file)
724 # file:///blah.txt (a file)
725 # file://ftp.example.com/blah.txt (an ftp URL)
726 for url, ftp in [
727 ("file://ftp.example.com//foo.txt", True),
728 ("file://ftp.example.com///foo.txt", False),
729# XXXX bug: fails with OSError, should be URLError
730 ("file://ftp.example.com/foo.txt", False),
Senthil Kumaran9d3c5c82010-07-11 03:33:38 +0000731 ("file://somehost//foo/something.txt", True),
732 ("file://localhost//foo/something.txt", False),
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000733 ]:
734 req = Request(url)
735 try:
736 h.file_open(req)
737 # XXXX remove OSError when bug fixed
738 except (urllib2.URLError, OSError):
739 self.assert_(not ftp)
740 else:
741 self.assert_(o.req is req)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000742 self.assertEqual(req.type, "ftp")
Senthil Kumaran9d3c5c82010-07-11 03:33:38 +0000743 self.assertEqual(req.type is "ftp", ftp)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000744
745 def test_http(self):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000746
747 h = urllib2.AbstractHTTPHandler()
748 o = h.parent = MockOpener()
749
750 url = "http://example.com/"
751 for method, data in [("GET", None), ("POST", "blah")]:
752 req = Request(url, data, {"Foo": "bar"})
Facundo Batista10951d52007-06-06 17:15:23 +0000753 req.timeout = None
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000754 req.add_unredirected_header("Spam", "eggs")
755 http = MockHTTPClass()
756 r = h.do_open(http, req)
757
758 # result attributes
759 r.read; r.readline # wrapped MockFile methods
760 r.info; r.geturl # addinfourl methods
761 r.code, r.msg == 200, "OK" # added from MockHTTPClass.getreply()
762 hdrs = r.info()
763 hdrs.get; hdrs.has_key # r.info() gives dict from .getreply()
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000764 self.assertEqual(r.geturl(), url)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000765
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000766 self.assertEqual(http.host, "example.com")
767 self.assertEqual(http.level, 0)
768 self.assertEqual(http.method, method)
769 self.assertEqual(http.selector, "/")
770 self.assertEqual(http.req_headers,
Jeremy Hyltonb3ee6f92004-02-24 19:40:35 +0000771 [("Connection", "close"),
772 ("Foo", "bar"), ("Spam", "eggs")])
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000773 self.assertEqual(http.data, data)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000774
775 # check socket.error converted to URLError
776 http.raise_on_endheaders = True
777 self.assertRaises(urllib2.URLError, h.do_open, http, req)
778
779 # check adding of standard headers
780 o.addheaders = [("Spam", "eggs")]
781 for data in "", None: # POST, GET
782 req = Request("http://example.com/", data)
783 r = MockResponse(200, "OK", {}, "")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000784 newreq = h.do_request_(req)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000785 if data is None: # GET
Georg Brandl8c036cc2006-08-20 13:15:39 +0000786 self.assert_("Content-length" not in req.unredirected_hdrs)
787 self.assert_("Content-type" not in req.unredirected_hdrs)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000788 else: # POST
Georg Brandl8c036cc2006-08-20 13:15:39 +0000789 self.assertEqual(req.unredirected_hdrs["Content-length"], "0")
790 self.assertEqual(req.unredirected_hdrs["Content-type"],
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000791 "application/x-www-form-urlencoded")
792 # XXX the details of Host could be better tested
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000793 self.assertEqual(req.unredirected_hdrs["Host"], "example.com")
794 self.assertEqual(req.unredirected_hdrs["Spam"], "eggs")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000795
796 # don't clobber existing headers
797 req.add_unredirected_header("Content-length", "foo")
798 req.add_unredirected_header("Content-type", "bar")
799 req.add_unredirected_header("Host", "baz")
800 req.add_unredirected_header("Spam", "foo")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000801 newreq = h.do_request_(req)
Georg Brandl8c036cc2006-08-20 13:15:39 +0000802 self.assertEqual(req.unredirected_hdrs["Content-length"], "foo")
803 self.assertEqual(req.unredirected_hdrs["Content-type"], "bar")
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000804 self.assertEqual(req.unredirected_hdrs["Host"], "baz")
805 self.assertEqual(req.unredirected_hdrs["Spam"], "foo")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000806
Facundo Batistaeb90b782008-08-16 14:44:07 +0000807 def test_http_doubleslash(self):
808 # Checks that the presence of an unnecessary double slash in a url doesn't break anything
809 # Previously, a double slash directly after the host could cause incorrect parsing of the url
810 h = urllib2.AbstractHTTPHandler()
811 o = h.parent = MockOpener()
812
813 data = ""
814 ds_urls = [
815 "http://example.com/foo/bar/baz.html",
816 "http://example.com//foo/bar/baz.html",
817 "http://example.com/foo//bar/baz.html",
818 "http://example.com/foo/bar//baz.html",
819 ]
820
821 for ds_url in ds_urls:
822 ds_req = Request(ds_url, data)
823
824 # Check whether host is determined correctly if there is no proxy
825 np_ds_req = h.do_request_(ds_req)
826 self.assertEqual(np_ds_req.unredirected_hdrs["Host"],"example.com")
827
828 # Check whether host is determined correctly if there is a proxy
829 ds_req.set_proxy("someproxy:3128",None)
830 p_ds_req = h.do_request_(ds_req)
831 self.assertEqual(p_ds_req.unredirected_hdrs["Host"],"example.com")
832
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000833 def test_errors(self):
834 h = urllib2.HTTPErrorProcessor()
835 o = h.parent = MockOpener()
836
837 url = "http://example.com/"
838 req = Request(url)
Facundo Batista9fab9f12007-04-23 17:08:31 +0000839 # all 2xx are passed through
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000840 r = MockResponse(200, "OK", {}, "", url)
841 newr = h.http_response(req, r)
842 self.assert_(r is newr)
843 self.assert_(not hasattr(o, "proto")) # o.error not called
Facundo Batista9fab9f12007-04-23 17:08:31 +0000844 r = MockResponse(202, "Accepted", {}, "", url)
845 newr = h.http_response(req, r)
846 self.assert_(r is newr)
847 self.assert_(not hasattr(o, "proto")) # o.error not called
848 r = MockResponse(206, "Partial content", {}, "", url)
849 newr = h.http_response(req, r)
850 self.assert_(r is newr)
851 self.assert_(not hasattr(o, "proto")) # o.error not called
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000852 # anything else calls o.error (and MockOpener returns None, here)
Facundo Batista9fab9f12007-04-23 17:08:31 +0000853 r = MockResponse(502, "Bad gateway", {}, "", url)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000854 self.assert_(h.http_response(req, r) is None)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000855 self.assertEqual(o.proto, "http") # o.error called
Facundo Batista9fab9f12007-04-23 17:08:31 +0000856 self.assertEqual(o.args, (req, r, 502, "Bad gateway", {}))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000857
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000858 def test_cookies(self):
859 cj = MockCookieJar()
860 h = urllib2.HTTPCookieProcessor(cj)
861 o = h.parent = MockOpener()
862
863 req = Request("http://example.com/")
864 r = MockResponse(200, "OK", {}, "")
865 newreq = h.http_request(req)
866 self.assert_(cj.ach_req is req is newreq)
867 self.assertEquals(req.get_origin_req_host(), "example.com")
868 self.assert_(not req.is_unverifiable())
869 newr = h.http_response(req, r)
870 self.assert_(cj.ec_req is req)
871 self.assert_(cj.ec_r is r is newr)
872
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000873 def test_redirect(self):
874 from_url = "http://example.com/a.html"
875 to_url = "http://example.com/b.html"
876 h = urllib2.HTTPRedirectHandler()
877 o = h.parent = MockOpener()
878
879 # ordinary redirect behaviour
880 for code in 301, 302, 303, 307:
881 for data in None, "blah\nblah\n":
882 method = getattr(h, "http_error_%s" % code)
883 req = Request(from_url, data)
884 req.add_header("Nonsense", "viking=withhold")
Senthil Kumaran428e9d12009-07-19 04:20:46 +0000885 req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
Facundo Batista86371d62008-02-07 19:06:52 +0000886 if data is not None:
887 req.add_header("Content-Length", str(len(data)))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000888 req.add_unredirected_header("Spam", "spam")
889 try:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000890 method(req, MockFile(), code, "Blah",
891 MockHeaders({"location": to_url}))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000892 except urllib2.HTTPError:
893 # 307 in response to POST requires user OK
894 self.assert_(code == 307 and data is not None)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000895 self.assertEqual(o.req.get_full_url(), to_url)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000896 try:
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000897 self.assertEqual(o.req.get_method(), "GET")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000898 except AttributeError:
899 self.assert_(not o.req.has_data())
Facundo Batista86371d62008-02-07 19:06:52 +0000900
901 # now it's a GET, there should not be headers regarding content
902 # (possibly dragged from before being a POST)
903 headers = [x.lower() for x in o.req.headers]
904 self.assertTrue("content-length" not in headers)
905 self.assertTrue("content-type" not in headers)
906
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000907 self.assertEqual(o.req.headers["Nonsense"],
908 "viking=withhold")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000909 self.assert_("Spam" not in o.req.headers)
910 self.assert_("Spam" not in o.req.unredirected_hdrs)
911
912 # loop detection
913 req = Request(from_url)
Senthil Kumaran428e9d12009-07-19 04:20:46 +0000914 req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000915 def redirect(h, req, url=to_url):
916 h.http_error_302(req, MockFile(), 302, "Blah",
917 MockHeaders({"location": url}))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000918 # Note that the *original* request shares the same record of
919 # redirections with the sub-requests caused by the redirections.
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000920
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000921 # detect infinite loop redirect of a URL to itself
922 req = Request(from_url, origin_req_host="example.com")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000923 count = 0
Senthil Kumaran428e9d12009-07-19 04:20:46 +0000924 req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000925 try:
926 while 1:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000927 redirect(h, req, "http://example.com/")
928 count = count + 1
929 except urllib2.HTTPError:
930 # don't stop until max_repeats, because cookies may introduce state
931 self.assertEqual(count, urllib2.HTTPRedirectHandler.max_repeats)
932
933 # detect endless non-repeating chain of redirects
934 req = Request(from_url, origin_req_host="example.com")
935 count = 0
Senthil Kumaran428e9d12009-07-19 04:20:46 +0000936 req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000937 try:
938 while 1:
939 redirect(h, req, "http://example.com/%d" % count)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000940 count = count + 1
941 except urllib2.HTTPError:
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000942 self.assertEqual(count,
943 urllib2.HTTPRedirectHandler.max_redirections)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000944
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000945 def test_cookie_redirect(self):
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000946 # cookies shouldn't leak into redirected requests
947 from cookielib import CookieJar
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000948
Neal Norwitzb902f4e2006-04-03 04:45:34 +0000949 from test.test_cookielib import interact_netscape
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000950
951 cj = CookieJar()
952 interact_netscape(cj, "http://www.example.com/", "spam=eggs")
Georg Brandlfa42bd72006-04-30 07:06:11 +0000953 hh = MockHTTPHandler(302, "Location: http://www.cracker.com/\r\n\r\n")
954 hdeh = urllib2.HTTPDefaultErrorHandler()
955 hrh = urllib2.HTTPRedirectHandler()
956 cp = urllib2.HTTPCookieProcessor(cj)
957 o = build_test_opener(hh, hdeh, hrh, cp)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000958 o.open("http://www.example.com/")
959 self.assert_(not hh.req.has_header("Cookie"))
960
Georg Brandl720096a2006-04-02 20:45:34 +0000961 def test_proxy(self):
962 o = OpenerDirector()
963 ph = urllib2.ProxyHandler(dict(http="proxy.example.com:3128"))
964 o.add_handler(ph)
965 meth_spec = [
966 [("http_open", "return response")]
967 ]
968 handlers = add_ordered_mock_handlers(o, meth_spec)
969
970 req = Request("http://acme.example.com/")
971 self.assertEqual(req.get_host(), "acme.example.com")
972 r = o.open(req)
973 self.assertEqual(req.get_host(), "proxy.example.com:3128")
974
975 self.assertEqual([(handlers[0], "http_open")],
976 [tup[0:2] for tup in o.calls])
977
Senthil Kumarandff20282009-10-11 05:35:44 +0000978 def test_proxy_no_proxy(self):
979 os.environ['no_proxy'] = 'python.org'
980 o = OpenerDirector()
981 ph = urllib2.ProxyHandler(dict(http="proxy.example.com"))
982 o.add_handler(ph)
983 req = Request("http://www.perl.org/")
984 self.assertEqual(req.get_host(), "www.perl.org")
985 r = o.open(req)
986 self.assertEqual(req.get_host(), "proxy.example.com")
987 req = Request("http://www.python.org")
988 self.assertEqual(req.get_host(), "www.python.org")
989 r = o.open(req)
990 self.assertEqual(req.get_host(), "www.python.org")
991 del os.environ['no_proxy']
992
993
Senthil Kumaran308681c2009-07-26 12:36:08 +0000994 def test_proxy_https(self):
995 o = OpenerDirector()
996 ph = urllib2.ProxyHandler(dict(https='proxy.example.com:3128'))
997 o.add_handler(ph)
998 meth_spec = [
999 [("https_open","return response")]
1000 ]
1001 handlers = add_ordered_mock_handlers(o, meth_spec)
1002 req = Request("https://www.example.com/")
1003 self.assertEqual(req.get_host(), "www.example.com")
1004 r = o.open(req)
1005 self.assertEqual(req.get_host(), "proxy.example.com:3128")
1006 self.assertEqual([(handlers[0], "https_open")],
1007 [tup[0:2] for tup in o.calls])
1008
Senthil Kumaran81163642009-12-20 06:32:46 +00001009 def test_proxy_https_proxy_authorization(self):
1010 o = OpenerDirector()
1011 ph = urllib2.ProxyHandler(dict(https='proxy.example.com:3128'))
1012 o.add_handler(ph)
1013 https_handler = MockHTTPSHandler()
1014 o.add_handler(https_handler)
1015 req = Request("https://www.example.com/")
1016 req.add_header("Proxy-Authorization","FooBar")
1017 req.add_header("User-Agent","Grail")
1018 self.assertEqual(req.get_host(), "www.example.com")
1019 self.assertTrue(req._tunnel_host is None)
1020 r = o.open(req)
1021 # Verify Proxy-Authorization gets tunneled to request.
1022 # httpsconn req_headers do not have the Proxy-Authorization header but
1023 # the req will have.
1024 self.assertFalse(("Proxy-Authorization","FooBar") in
1025 https_handler.httpconn.req_headers)
1026 self.assertTrue(("User-Agent","Grail") in
1027 https_handler.httpconn.req_headers)
1028 self.assertFalse(req._tunnel_host is None)
1029 self.assertEqual(req.get_host(), "proxy.example.com:3128")
1030 self.assertEqual(req.get_header("Proxy-authorization"),"FooBar")
1031
Georg Brandl33124322008-03-21 19:54:00 +00001032 def test_basic_auth(self, quote_char='"'):
Georg Brandlfa42bd72006-04-30 07:06:11 +00001033 opener = OpenerDirector()
1034 password_manager = MockPasswordManager()
1035 auth_handler = urllib2.HTTPBasicAuthHandler(password_manager)
1036 realm = "ACME Widget Store"
1037 http_handler = MockHTTPHandler(
Georg Brandl33124322008-03-21 19:54:00 +00001038 401, 'WWW-Authenticate: Basic realm=%s%s%s\r\n\r\n' %
1039 (quote_char, realm, quote_char) )
Georg Brandl261e2512006-05-29 20:52:54 +00001040 opener.add_handler(auth_handler)
1041 opener.add_handler(http_handler)
Georg Brandlfa42bd72006-04-30 07:06:11 +00001042 self._test_basic_auth(opener, auth_handler, "Authorization",
1043 realm, http_handler, password_manager,
1044 "http://acme.example.com/protected",
1045 "http://acme.example.com/protected",
1046 )
1047
Georg Brandl33124322008-03-21 19:54:00 +00001048 def test_basic_auth_with_single_quoted_realm(self):
1049 self.test_basic_auth(quote_char="'")
1050
Georg Brandlfa42bd72006-04-30 07:06:11 +00001051 def test_proxy_basic_auth(self):
1052 opener = OpenerDirector()
1053 ph = urllib2.ProxyHandler(dict(http="proxy.example.com:3128"))
1054 opener.add_handler(ph)
1055 password_manager = MockPasswordManager()
1056 auth_handler = urllib2.ProxyBasicAuthHandler(password_manager)
1057 realm = "ACME Networks"
1058 http_handler = MockHTTPHandler(
1059 407, 'Proxy-Authenticate: Basic realm="%s"\r\n\r\n' % realm)
Georg Brandl261e2512006-05-29 20:52:54 +00001060 opener.add_handler(auth_handler)
1061 opener.add_handler(http_handler)
Georg Brandl8c036cc2006-08-20 13:15:39 +00001062 self._test_basic_auth(opener, auth_handler, "Proxy-authorization",
Georg Brandlfa42bd72006-04-30 07:06:11 +00001063 realm, http_handler, password_manager,
1064 "http://acme.example.com:3128/protected",
1065 "proxy.example.com:3128",
1066 )
1067
Georg Brandlb5f2e5c2006-05-08 17:36:08 +00001068 def test_basic_and_digest_auth_handlers(self):
1069 # HTTPDigestAuthHandler threw an exception if it couldn't handle a 40*
1070 # response (http://python.org/sf/1479302), where it should instead
1071 # return None to allow another handler (especially
1072 # HTTPBasicAuthHandler) to handle the response.
Georg Brandl261e2512006-05-29 20:52:54 +00001073
1074 # Also (http://python.org/sf/14797027, RFC 2617 section 1.2), we must
1075 # try digest first (since it's the strongest auth scheme), so we record
1076 # order of calls here to check digest comes first:
1077 class RecordingOpenerDirector(OpenerDirector):
1078 def __init__(self):
1079 OpenerDirector.__init__(self)
1080 self.recorded = []
1081 def record(self, info):
1082 self.recorded.append(info)
Georg Brandlb5f2e5c2006-05-08 17:36:08 +00001083 class TestDigestAuthHandler(urllib2.HTTPDigestAuthHandler):
Georg Brandl261e2512006-05-29 20:52:54 +00001084 def http_error_401(self, *args, **kwds):
1085 self.parent.record("digest")
1086 urllib2.HTTPDigestAuthHandler.http_error_401(self,
1087 *args, **kwds)
1088 class TestBasicAuthHandler(urllib2.HTTPBasicAuthHandler):
1089 def http_error_401(self, *args, **kwds):
1090 self.parent.record("basic")
1091 urllib2.HTTPBasicAuthHandler.http_error_401(self,
1092 *args, **kwds)
1093
1094 opener = RecordingOpenerDirector()
Georg Brandlb5f2e5c2006-05-08 17:36:08 +00001095 password_manager = MockPasswordManager()
1096 digest_handler = TestDigestAuthHandler(password_manager)
Georg Brandl261e2512006-05-29 20:52:54 +00001097 basic_handler = TestBasicAuthHandler(password_manager)
Georg Brandlb5f2e5c2006-05-08 17:36:08 +00001098 realm = "ACME Networks"
1099 http_handler = MockHTTPHandler(
1100 401, 'WWW-Authenticate: Basic realm="%s"\r\n\r\n' % realm)
Georg Brandl261e2512006-05-29 20:52:54 +00001101 opener.add_handler(basic_handler)
1102 opener.add_handler(digest_handler)
1103 opener.add_handler(http_handler)
1104
1105 # check basic auth isn't blocked by digest handler failing
Georg Brandlb5f2e5c2006-05-08 17:36:08 +00001106 self._test_basic_auth(opener, basic_handler, "Authorization",
1107 realm, http_handler, password_manager,
1108 "http://acme.example.com/protected",
1109 "http://acme.example.com/protected",
1110 )
Georg Brandl261e2512006-05-29 20:52:54 +00001111 # check digest was tried before basic (twice, because
1112 # _test_basic_auth called .open() twice)
1113 self.assertEqual(opener.recorded, ["digest", "basic"]*2)
Georg Brandlb5f2e5c2006-05-08 17:36:08 +00001114
Georg Brandlfa42bd72006-04-30 07:06:11 +00001115 def _test_basic_auth(self, opener, auth_handler, auth_header,
1116 realm, http_handler, password_manager,
1117 request_url, protected_url):
Christian Heimesc5f05e42008-02-23 17:40:11 +00001118 import base64
Georg Brandlfa42bd72006-04-30 07:06:11 +00001119 user, password = "wile", "coyote"
Georg Brandlfa42bd72006-04-30 07:06:11 +00001120
1121 # .add_password() fed through to password manager
1122 auth_handler.add_password(realm, request_url, user, password)
1123 self.assertEqual(realm, password_manager.realm)
1124 self.assertEqual(request_url, password_manager.url)
1125 self.assertEqual(user, password_manager.user)
1126 self.assertEqual(password, password_manager.password)
1127
1128 r = opener.open(request_url)
1129
1130 # should have asked the password manager for the username/password
1131 self.assertEqual(password_manager.target_realm, realm)
1132 self.assertEqual(password_manager.target_url, protected_url)
1133
1134 # expect one request without authorization, then one with
1135 self.assertEqual(len(http_handler.requests), 2)
1136 self.assertFalse(http_handler.requests[0].has_header(auth_header))
1137 userpass = '%s:%s' % (user, password)
1138 auth_hdr_value = 'Basic '+base64.encodestring(userpass).strip()
1139 self.assertEqual(http_handler.requests[1].get_header(auth_header),
1140 auth_hdr_value)
Senthil Kumarane3c651a2010-02-24 16:49:45 +00001141 self.assertEqual(http_handler.requests[1].unredirected_hdrs[auth_header],
1142 auth_hdr_value)
Georg Brandlfa42bd72006-04-30 07:06:11 +00001143 # if the password manager can't find a password, the handler won't
1144 # handle the HTTP auth error
1145 password_manager.user = password_manager.password = None
1146 http_handler.reset()
1147 r = opener.open(request_url)
1148 self.assertEqual(len(http_handler.requests), 1)
1149 self.assertFalse(http_handler.requests[0].has_header(auth_header))
1150
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001151class MiscTests(unittest.TestCase):
1152
1153 def test_build_opener(self):
1154 class MyHTTPHandler(urllib2.HTTPHandler): pass
1155 class FooHandler(urllib2.BaseHandler):
1156 def foo_open(self): pass
1157 class BarHandler(urllib2.BaseHandler):
1158 def bar_open(self): pass
1159
1160 build_opener = urllib2.build_opener
1161
1162 o = build_opener(FooHandler, BarHandler)
1163 self.opener_has_handler(o, FooHandler)
1164 self.opener_has_handler(o, BarHandler)
1165
1166 # can take a mix of classes and instances
1167 o = build_opener(FooHandler, BarHandler())
1168 self.opener_has_handler(o, FooHandler)
1169 self.opener_has_handler(o, BarHandler)
1170
1171 # subclasses of default handlers override default handlers
1172 o = build_opener(MyHTTPHandler)
1173 self.opener_has_handler(o, MyHTTPHandler)
1174
1175 # a particular case of overriding: default handlers can be passed
1176 # in explicitly
1177 o = build_opener()
1178 self.opener_has_handler(o, urllib2.HTTPHandler)
1179 o = build_opener(urllib2.HTTPHandler)
1180 self.opener_has_handler(o, urllib2.HTTPHandler)
1181 o = build_opener(urllib2.HTTPHandler())
1182 self.opener_has_handler(o, urllib2.HTTPHandler)
1183
Amaury Forgeot d'Arc96865852008-04-22 21:14:41 +00001184 # Issue2670: multiple handlers sharing the same base class
1185 class MyOtherHTTPHandler(urllib2.HTTPHandler): pass
1186 o = build_opener(MyHTTPHandler, MyOtherHTTPHandler)
1187 self.opener_has_handler(o, MyHTTPHandler)
1188 self.opener_has_handler(o, MyOtherHTTPHandler)
1189
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001190 def opener_has_handler(self, opener, handler_class):
1191 for h in opener.handlers:
1192 if h.__class__ == handler_class:
1193 break
1194 else:
1195 self.assert_(False)
1196
Benjamin Petersonc6e80eb2008-12-21 17:01:26 +00001197class RequestTests(unittest.TestCase):
1198
1199 def setUp(self):
1200 self.get = urllib2.Request("http://www.python.org/~jeremy/")
1201 self.post = urllib2.Request("http://www.python.org/~jeremy/",
1202 "data",
1203 headers={"X-Test": "test"})
1204
1205 def test_method(self):
1206 self.assertEqual("POST", self.post.get_method())
1207 self.assertEqual("GET", self.get.get_method())
1208
1209 def test_add_data(self):
1210 self.assert_(not self.get.has_data())
1211 self.assertEqual("GET", self.get.get_method())
1212 self.get.add_data("spam")
1213 self.assert_(self.get.has_data())
1214 self.assertEqual("POST", self.get.get_method())
1215
1216 def test_get_full_url(self):
1217 self.assertEqual("http://www.python.org/~jeremy/",
1218 self.get.get_full_url())
1219
1220 def test_selector(self):
1221 self.assertEqual("/~jeremy/", self.get.get_selector())
1222 req = urllib2.Request("http://www.python.org/")
1223 self.assertEqual("/", req.get_selector())
1224
1225 def test_get_type(self):
1226 self.assertEqual("http", self.get.get_type())
1227
1228 def test_get_host(self):
1229 self.assertEqual("www.python.org", self.get.get_host())
1230
1231 def test_get_host_unquote(self):
1232 req = urllib2.Request("http://www.%70ython.org/")
1233 self.assertEqual("www.python.org", req.get_host())
1234
1235 def test_proxy(self):
1236 self.assert_(not self.get.has_proxy())
1237 self.get.set_proxy("www.perl.org", "http")
1238 self.assert_(self.get.has_proxy())
1239 self.assertEqual("www.python.org", self.get.get_origin_req_host())
1240 self.assertEqual("www.perl.org", self.get.get_host())
1241
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001242
1243def test_main(verbose=None):
Georg Brandlfa42bd72006-04-30 07:06:11 +00001244 from test import test_urllib2
1245 test_support.run_doctest(test_urllib2, verbose)
Georg Brandl720096a2006-04-02 20:45:34 +00001246 test_support.run_doctest(urllib2, verbose)
Andrew M. Kuchlingbd3200f2004-06-29 13:15:46 +00001247 tests = (TrivialTests,
1248 OpenerDirectorTests,
1249 HandlerTests,
Benjamin Petersonc6e80eb2008-12-21 17:01:26 +00001250 MiscTests,
1251 RequestTests)
Andrew M. Kuchlingbd3200f2004-06-29 13:15:46 +00001252 test_support.run_unittest(*tests)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001253
1254if __name__ == "__main__":
1255 test_main(verbose=True)