blob: 1ecb56e9d73c271d7339d01db0839110e83a9572 [file] [log] [blame]
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001import unittest
2from test import test_support
3
Christian Heimesc5f05e42008-02-23 17:40:11 +00004import os
Facundo Batista4f1b1ed2008-05-29 16:39:26 +00005import socket
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00006import StringIO
Jeremy Hyltone3e61042001-05-09 15:50:25 +00007
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00008import urllib2
9from urllib2 import Request, OpenerDirector
Jeremy Hyltone3e61042001-05-09 15:50:25 +000010
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000011# XXX
12# Request
13# CacheFTPHandler (hard to write)
Georg Brandlfa42bd72006-04-30 07:06:11 +000014# parse_keqv_list, parse_http_list, HTTPDigestAuthHandler
Jeremy Hyltone3e61042001-05-09 15:50:25 +000015
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000016class TrivialTests(unittest.TestCase):
17 def test_trivial(self):
18 # A couple trivial tests
Guido van Rossume2ae77b2001-10-24 20:42:55 +000019
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000020 self.assertRaises(ValueError, urllib2.urlopen, 'bogus url')
Tim Peters861adac2001-07-16 20:49:49 +000021
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000022 # XXX Name hacking to get this to work on Windows.
23 fname = os.path.abspath(urllib2.__file__).replace('\\', '/')
24 if fname[1:2] == ":":
25 fname = fname[2:]
26 # And more hacking to get it to work on MacOS. This assumes
27 # urllib.pathname2url works, unfortunately...
28 if os.name == 'mac':
29 fname = '/' + fname.replace(':', '/')
30 elif os.name == 'riscos':
31 import string
32 fname = os.expand(fname)
33 fname = fname.translate(string.maketrans("/.", "./"))
34
35 file_url = "file://%s" % fname
36 f = urllib2.urlopen(file_url)
37
38 buf = f.read()
39 f.close()
Tim Petersf5f32b42005-07-17 23:16:17 +000040
Georg Brandle1b13d22005-08-24 22:20:32 +000041 def test_parse_http_list(self):
42 tests = [('a,b,c', ['a', 'b', 'c']),
43 ('path"o,l"og"i"cal, example', ['path"o,l"og"i"cal', 'example']),
44 ('a, b, "c", "d", "e,f", g, h', ['a', 'b', '"c"', '"d"', '"e,f"', 'g', 'h']),
45 ('a="b\\"c", d="e\\,f", g="h\\\\i"', ['a="b"c"', 'd="e,f"', 'g="h\\i"'])]
46 for string, list in tests:
47 self.assertEquals(urllib2.parse_http_list(string), list)
48
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000049
Georg Brandl8c036cc2006-08-20 13:15:39 +000050def test_request_headers_dict():
51 """
52 The Request.headers dictionary is not a documented interface. It should
53 stay that way, because the complete set of headers are only accessible
54 through the .get_header(), .has_header(), .header_items() interface.
55 However, .headers pre-dates those methods, and so real code will be using
56 the dictionary.
57
58 The introduction in 2.4 of those methods was a mistake for the same reason:
59 code that previously saw all (urllib2 user)-provided headers in .headers
60 now sees only a subset (and the function interface is ugly and incomplete).
61 A better change would have been to replace .headers dict with a dict
62 subclass (or UserDict.DictMixin instance?) that preserved the .headers
63 interface and also provided access to the "unredirected" headers. It's
64 probably too late to fix that, though.
65
66
67 Check .capitalize() case normalization:
68
69 >>> url = "http://example.com"
70 >>> Request(url, headers={"Spam-eggs": "blah"}).headers["Spam-eggs"]
71 'blah'
72 >>> Request(url, headers={"spam-EggS": "blah"}).headers["Spam-eggs"]
73 'blah'
74
75 Currently, Request(url, "Spam-eggs").headers["Spam-Eggs"] raises KeyError,
76 but that could be changed in future.
77
78 """
79
80def test_request_headers_methods():
81 """
82 Note the case normalization of header names here, to .capitalize()-case.
83 This should be preserved for backwards-compatibility. (In the HTTP case,
84 normalization to .title()-case is done by urllib2 before sending headers to
85 httplib).
86
87 >>> url = "http://example.com"
88 >>> r = Request(url, headers={"Spam-eggs": "blah"})
89 >>> r.has_header("Spam-eggs")
90 True
91 >>> r.header_items()
92 [('Spam-eggs', 'blah')]
93 >>> r.add_header("Foo-Bar", "baz")
94 >>> items = r.header_items()
95 >>> items.sort()
96 >>> items
97 [('Foo-bar', 'baz'), ('Spam-eggs', 'blah')]
98
99 Note that e.g. r.has_header("spam-EggS") is currently False, and
100 r.get_header("spam-EggS") returns None, but that could be changed in
101 future.
102
103 >>> r.has_header("Not-there")
104 False
105 >>> print r.get_header("Not-there")
106 None
107 >>> r.get_header("Not-there", "default")
108 'default'
109
110 """
111
112
Georg Brandlfa42bd72006-04-30 07:06:11 +0000113def test_password_manager(self):
114 """
115 >>> mgr = urllib2.HTTPPasswordMgr()
116 >>> add = mgr.add_password
117 >>> add("Some Realm", "http://example.com/", "joe", "password")
118 >>> add("Some Realm", "http://example.com/ni", "ni", "ni")
119 >>> add("c", "http://example.com/foo", "foo", "ni")
120 >>> add("c", "http://example.com/bar", "bar", "nini")
121 >>> add("b", "http://example.com/", "first", "blah")
122 >>> add("b", "http://example.com/", "second", "spam")
123 >>> add("a", "http://example.com", "1", "a")
124 >>> add("Some Realm", "http://c.example.com:3128", "3", "c")
125 >>> add("Some Realm", "d.example.com", "4", "d")
126 >>> add("Some Realm", "e.example.com:3128", "5", "e")
127
128 >>> mgr.find_user_password("Some Realm", "example.com")
129 ('joe', 'password')
130 >>> mgr.find_user_password("Some Realm", "http://example.com")
131 ('joe', 'password')
132 >>> mgr.find_user_password("Some Realm", "http://example.com/")
133 ('joe', 'password')
134 >>> mgr.find_user_password("Some Realm", "http://example.com/spam")
135 ('joe', 'password')
136 >>> mgr.find_user_password("Some Realm", "http://example.com/spam/spam")
137 ('joe', 'password')
138 >>> mgr.find_user_password("c", "http://example.com/foo")
139 ('foo', 'ni')
140 >>> mgr.find_user_password("c", "http://example.com/bar")
141 ('bar', 'nini')
142
Georg Brandl2b330372006-05-28 20:23:12 +0000143 Actually, this is really undefined ATM
144## Currently, we use the highest-level path where more than one match:
Georg Brandlfa42bd72006-04-30 07:06:11 +0000145
Georg Brandl2b330372006-05-28 20:23:12 +0000146## >>> mgr.find_user_password("Some Realm", "http://example.com/ni")
147## ('joe', 'password')
Georg Brandlfa42bd72006-04-30 07:06:11 +0000148
149 Use latest add_password() in case of conflict:
150
151 >>> mgr.find_user_password("b", "http://example.com/")
152 ('second', 'spam')
153
154 No special relationship between a.example.com and example.com:
155
156 >>> mgr.find_user_password("a", "http://example.com/")
157 ('1', 'a')
158 >>> mgr.find_user_password("a", "http://a.example.com/")
159 (None, None)
160
161 Ports:
162
163 >>> mgr.find_user_password("Some Realm", "c.example.com")
164 (None, None)
165 >>> mgr.find_user_password("Some Realm", "c.example.com:3128")
166 ('3', 'c')
167 >>> mgr.find_user_password("Some Realm", "http://c.example.com:3128")
168 ('3', 'c')
169 >>> mgr.find_user_password("Some Realm", "d.example.com")
170 ('4', 'd')
171 >>> mgr.find_user_password("Some Realm", "e.example.com:3128")
172 ('5', 'e')
173
174 """
175 pass
176
177
Georg Brandl2b330372006-05-28 20:23:12 +0000178def test_password_manager_default_port(self):
179 """
180 >>> mgr = urllib2.HTTPPasswordMgr()
181 >>> add = mgr.add_password
182
183 The point to note here is that we can't guess the default port if there's
184 no scheme. This applies to both add_password and find_user_password.
185
186 >>> add("f", "http://g.example.com:80", "10", "j")
187 >>> add("g", "http://h.example.com", "11", "k")
188 >>> add("h", "i.example.com:80", "12", "l")
189 >>> add("i", "j.example.com", "13", "m")
190 >>> mgr.find_user_password("f", "g.example.com:100")
191 (None, None)
192 >>> mgr.find_user_password("f", "g.example.com:80")
193 ('10', 'j')
194 >>> mgr.find_user_password("f", "g.example.com")
195 (None, None)
196 >>> mgr.find_user_password("f", "http://g.example.com:100")
197 (None, None)
198 >>> mgr.find_user_password("f", "http://g.example.com:80")
199 ('10', 'j')
200 >>> mgr.find_user_password("f", "http://g.example.com")
201 ('10', 'j')
202 >>> mgr.find_user_password("g", "h.example.com")
203 ('11', 'k')
204 >>> mgr.find_user_password("g", "h.example.com:80")
205 ('11', 'k')
206 >>> mgr.find_user_password("g", "http://h.example.com:80")
207 ('11', 'k')
208 >>> mgr.find_user_password("h", "i.example.com")
209 (None, None)
210 >>> mgr.find_user_password("h", "i.example.com:80")
211 ('12', 'l')
212 >>> mgr.find_user_password("h", "http://i.example.com:80")
213 ('12', 'l')
214 >>> mgr.find_user_password("i", "j.example.com")
215 ('13', 'm')
216 >>> mgr.find_user_password("i", "j.example.com:80")
217 (None, None)
218 >>> mgr.find_user_password("i", "http://j.example.com")
219 ('13', 'm')
220 >>> mgr.find_user_password("i", "http://j.example.com:80")
221 (None, None)
222
223 """
224
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000225class MockOpener:
226 addheaders = []
Senthil Kumaran428e9d12009-07-19 04:20:46 +0000227 def open(self, req, data=None,timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
228 self.req, self.data, self.timeout = req, data, timeout
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000229 def error(self, proto, *args):
230 self.proto, self.args = proto, args
231
232class MockFile:
233 def read(self, count=None): pass
234 def readline(self, count=None): pass
235 def close(self): pass
236
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000237class MockHeaders(dict):
238 def getheaders(self, name):
239 return self.values()
240
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000241class MockResponse(StringIO.StringIO):
242 def __init__(self, code, msg, headers, data, url=None):
243 StringIO.StringIO.__init__(self, data)
244 self.code, self.msg, self.headers, self.url = code, msg, headers, url
245 def info(self):
246 return self.headers
247 def geturl(self):
248 return self.url
249
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000250class MockCookieJar:
251 def add_cookie_header(self, request):
252 self.ach_req = request
253 def extract_cookies(self, response, request):
254 self.ec_req, self.ec_r = request, response
255
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000256class FakeMethod:
257 def __init__(self, meth_name, action, handle):
258 self.meth_name = meth_name
259 self.handle = handle
260 self.action = action
261 def __call__(self, *args):
262 return self.handle(self.meth_name, self.action, *args)
263
Senthil Kumaran81163642009-12-20 06:32:46 +0000264class MockHTTPResponse:
265 def __init__(self, fp, msg, status, reason):
266 self.fp = fp
267 self.msg = msg
268 self.status = status
269 self.reason = reason
270 def read(self):
271 return ''
272
273class MockHTTPClass:
274 def __init__(self):
275 self.req_headers = []
276 self.data = None
277 self.raise_on_endheaders = False
278 self._tunnel_headers = {}
279
280 def __call__(self, host, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
281 self.host = host
282 self.timeout = timeout
283 return self
284
285 def set_debuglevel(self, level):
286 self.level = level
287
288 def _set_tunnel(self, host, port=None, headers=None):
289 self._tunnel_host = host
290 self._tunnel_port = port
291 if headers:
292 self._tunnel_headers = headers
293 else:
294 self._tunnel_headers.clear()
Benjamin Peterson17e1c122009-12-24 01:13:50 +0000295 def request(self, method, url, body=None, headers=None):
Senthil Kumaran81163642009-12-20 06:32:46 +0000296 self.method = method
297 self.selector = url
Benjamin Peterson17e1c122009-12-24 01:13:50 +0000298 if headers is not None:
299 self.req_headers += headers.items()
Senthil Kumaran81163642009-12-20 06:32:46 +0000300 self.req_headers.sort()
301 if body:
302 self.data = body
303 if self.raise_on_endheaders:
304 import socket
305 raise socket.error()
306 def getresponse(self):
307 return MockHTTPResponse(MockFile(), {}, 200, "OK")
308
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000309class MockHandler:
Georg Brandlfa42bd72006-04-30 07:06:11 +0000310 # useful for testing handler machinery
311 # see add_ordered_mock_handlers() docstring
Georg Brandl720096a2006-04-02 20:45:34 +0000312 handler_order = 500
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000313 def __init__(self, methods):
314 self._define_methods(methods)
315 def _define_methods(self, methods):
316 for spec in methods:
317 if len(spec) == 2: name, action = spec
318 else: name, action = spec, None
319 meth = FakeMethod(name, action, self.handle)
320 setattr(self.__class__, name, meth)
321 def handle(self, fn_name, action, *args, **kwds):
322 self.parent.calls.append((self, fn_name, args, kwds))
323 if action is None:
324 return None
325 elif action == "return self":
326 return self
327 elif action == "return response":
328 res = MockResponse(200, "OK", {}, "")
329 return res
330 elif action == "return request":
331 return Request("http://blah/")
332 elif action.startswith("error"):
333 code = action[action.rfind(" ")+1:]
334 try:
335 code = int(code)
336 except ValueError:
337 pass
338 res = MockResponse(200, "OK", {}, "")
339 return self.parent.error("http", args[0], res, code, "", {})
340 elif action == "raise":
341 raise urllib2.URLError("blah")
342 assert False
343 def close(self): pass
344 def add_parent(self, parent):
345 self.parent = parent
346 self.parent.calls = []
347 def __lt__(self, other):
348 if not hasattr(other, "handler_order"):
349 # No handler_order, leave in original order. Yuck.
350 return True
351 return self.handler_order < other.handler_order
352
353def add_ordered_mock_handlers(opener, meth_spec):
354 """Create MockHandlers and add them to an OpenerDirector.
355
356 meth_spec: list of lists of tuples and strings defining methods to define
357 on handlers. eg:
358
359 [["http_error", "ftp_open"], ["http_open"]]
360
361 defines methods .http_error() and .ftp_open() on one handler, and
362 .http_open() on another. These methods just record their arguments and
363 return None. Using a tuple instead of a string causes the method to
364 perform some action (see MockHandler.handle()), eg:
365
366 [["http_error"], [("http_open", "return request")]]
367
368 defines .http_error() on one handler (which simply returns None), and
369 .http_open() on another handler, which returns a Request object.
370
371 """
372 handlers = []
373 count = 0
374 for meths in meth_spec:
375 class MockHandlerSubclass(MockHandler): pass
376 h = MockHandlerSubclass(meths)
Georg Brandl720096a2006-04-02 20:45:34 +0000377 h.handler_order += count
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000378 h.add_parent(opener)
379 count = count + 1
380 handlers.append(h)
381 opener.add_handler(h)
382 return handlers
383
Georg Brandlfa42bd72006-04-30 07:06:11 +0000384def build_test_opener(*handler_instances):
385 opener = OpenerDirector()
386 for h in handler_instances:
387 opener.add_handler(h)
388 return opener
389
390class MockHTTPHandler(urllib2.BaseHandler):
391 # useful for testing redirections and auth
392 # sends supplied headers and code as first response
393 # sends 200 OK as second response
394 def __init__(self, code, headers):
395 self.code = code
396 self.headers = headers
397 self.reset()
398 def reset(self):
399 self._count = 0
400 self.requests = []
401 def http_open(self, req):
402 import mimetools, httplib, copy
403 from StringIO import StringIO
404 self.requests.append(copy.deepcopy(req))
405 if self._count == 0:
406 self._count = self._count + 1
407 name = httplib.responses[self.code]
408 msg = mimetools.Message(StringIO(self.headers))
409 return self.parent.error(
410 "http", req, MockFile(), self.code, name, msg)
411 else:
412 self.req = req
413 msg = mimetools.Message(StringIO("\r\n\r\n"))
414 return MockResponse(200, "OK", msg, "", req.get_full_url())
415
Senthil Kumaran81163642009-12-20 06:32:46 +0000416class MockHTTPSHandler(urllib2.AbstractHTTPHandler):
417 # Useful for testing the Proxy-Authorization request by verifying the
418 # properties of httpcon
Benjamin Peterson17e1c122009-12-24 01:13:50 +0000419
420 def __init__(self):
421 urllib2.AbstractHTTPHandler.__init__(self)
422 self.httpconn = MockHTTPClass()
423
Senthil Kumaran81163642009-12-20 06:32:46 +0000424 def https_open(self, req):
425 return self.do_open(self.httpconn, req)
426
Georg Brandlfa42bd72006-04-30 07:06:11 +0000427class MockPasswordManager:
428 def add_password(self, realm, uri, user, password):
429 self.realm = realm
430 self.url = uri
431 self.user = user
432 self.password = password
433 def find_user_password(self, realm, authuri):
434 self.target_realm = realm
435 self.target_url = authuri
436 return self.user, self.password
437
438
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000439class OpenerDirectorTests(unittest.TestCase):
440
Georg Brandlf91149e2007-07-12 08:05:45 +0000441 def test_add_non_handler(self):
442 class NonHandler(object):
443 pass
444 self.assertRaises(TypeError,
445 OpenerDirector().add_handler, NonHandler())
446
Georg Brandl261e2512006-05-29 20:52:54 +0000447 def test_badly_named_methods(self):
448 # test work-around for three methods that accidentally follow the
449 # naming conventions for handler methods
450 # (*_open() / *_request() / *_response())
451
452 # These used to call the accidentally-named methods, causing a
453 # TypeError in real code; here, returning self from these mock
454 # methods would either cause no exception, or AttributeError.
455
456 from urllib2 import URLError
457
458 o = OpenerDirector()
459 meth_spec = [
460 [("do_open", "return self"), ("proxy_open", "return self")],
461 [("redirect_request", "return self")],
462 ]
463 handlers = add_ordered_mock_handlers(o, meth_spec)
464 o.add_handler(urllib2.UnknownHandler())
465 for scheme in "do", "proxy", "redirect":
466 self.assertRaises(URLError, o.open, scheme+"://example.com/")
467
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000468 def test_handled(self):
469 # handler returning non-None means no more handlers will be called
470 o = OpenerDirector()
471 meth_spec = [
472 ["http_open", "ftp_open", "http_error_302"],
473 ["ftp_open"],
474 [("http_open", "return self")],
475 [("http_open", "return self")],
476 ]
477 handlers = add_ordered_mock_handlers(o, meth_spec)
478
479 req = Request("http://example.com/")
480 r = o.open(req)
481 # Second .http_open() gets called, third doesn't, since second returned
482 # non-None. Handlers without .http_open() never get any methods called
483 # on them.
484 # In fact, second mock handler defining .http_open() returns self
485 # (instead of response), which becomes the OpenerDirector's return
486 # value.
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000487 self.assertEqual(r, handlers[2])
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000488 calls = [(handlers[0], "http_open"), (handlers[2], "http_open")]
489 for expected, got in zip(calls, o.calls):
490 handler, name, args, kwds = got
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000491 self.assertEqual((handler, name), expected)
492 self.assertEqual(args, (req,))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000493
494 def test_handler_order(self):
495 o = OpenerDirector()
496 handlers = []
497 for meths, handler_order in [
498 ([("http_open", "return self")], 500),
499 (["http_open"], 0),
500 ]:
501 class MockHandlerSubclass(MockHandler): pass
502 h = MockHandlerSubclass(meths)
503 h.handler_order = handler_order
504 handlers.append(h)
505 o.add_handler(h)
506
507 r = o.open("http://example.com/")
508 # handlers called in reverse order, thanks to their sort order
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000509 self.assertEqual(o.calls[0][0], handlers[1])
510 self.assertEqual(o.calls[1][0], handlers[0])
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000511
512 def test_raise(self):
513 # raising URLError stops processing of request
514 o = OpenerDirector()
515 meth_spec = [
516 [("http_open", "raise")],
517 [("http_open", "return self")],
518 ]
519 handlers = add_ordered_mock_handlers(o, meth_spec)
520
521 req = Request("http://example.com/")
522 self.assertRaises(urllib2.URLError, o.open, req)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000523 self.assertEqual(o.calls, [(handlers[0], "http_open", (req,), {})])
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000524
525## def test_error(self):
526## # XXX this doesn't actually seem to be used in standard library,
527## # but should really be tested anyway...
528
529 def test_http_error(self):
530 # XXX http_error_default
531 # http errors are a special case
532 o = OpenerDirector()
533 meth_spec = [
534 [("http_open", "error 302")],
535 [("http_error_400", "raise"), "http_open"],
536 [("http_error_302", "return response"), "http_error_303",
537 "http_error"],
538 [("http_error_302")],
539 ]
540 handlers = add_ordered_mock_handlers(o, meth_spec)
541
542 class Unknown:
543 def __eq__(self, other): return True
544
545 req = Request("http://example.com/")
546 r = o.open(req)
547 assert len(o.calls) == 2
548 calls = [(handlers[0], "http_open", (req,)),
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000549 (handlers[2], "http_error_302",
550 (req, Unknown(), 302, "", {}))]
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000551 for expected, got in zip(calls, o.calls):
552 handler, method_name, args = expected
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000553 self.assertEqual((handler, method_name), got[:2])
554 self.assertEqual(args, got[2])
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000555
556 def test_processors(self):
557 # *_request / *_response methods get called appropriately
558 o = OpenerDirector()
559 meth_spec = [
560 [("http_request", "return request"),
561 ("http_response", "return response")],
562 [("http_request", "return request"),
563 ("http_response", "return response")],
564 ]
565 handlers = add_ordered_mock_handlers(o, meth_spec)
566
567 req = Request("http://example.com/")
568 r = o.open(req)
569 # processor methods are called on *all* handlers that define them,
570 # not just the first handler that handles the request
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000571 calls = [
572 (handlers[0], "http_request"), (handlers[1], "http_request"),
573 (handlers[0], "http_response"), (handlers[1], "http_response")]
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000574
575 for i, (handler, name, args, kwds) in enumerate(o.calls):
576 if i < 2:
577 # *_request
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000578 self.assertEqual((handler, name), calls[i])
579 self.assertEqual(len(args), 1)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000580 self.assert_(isinstance(args[0], Request))
581 else:
582 # *_response
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000583 self.assertEqual((handler, name), calls[i])
584 self.assertEqual(len(args), 2)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000585 self.assert_(isinstance(args[0], Request))
586 # response from opener.open is None, because there's no
587 # handler that defines http_open to handle it
588 self.assert_(args[1] is None or
589 isinstance(args[1], MockResponse))
590
591
Tim Peters58eb11c2004-01-18 20:29:55 +0000592def sanepathname2url(path):
593 import urllib
594 urlpath = urllib.pathname2url(path)
595 if os.name == "nt" and urlpath.startswith("///"):
596 urlpath = urlpath[2:]
597 # XXX don't ask me about the mac...
598 return urlpath
599
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000600class HandlerTests(unittest.TestCase):
601
602 def test_ftp(self):
603 class MockFTPWrapper:
604 def __init__(self, data): self.data = data
605 def retrfile(self, filename, filetype):
606 self.filename, self.filetype = filename, filetype
607 return StringIO.StringIO(self.data), len(self.data)
608
609 class NullFTPHandler(urllib2.FTPHandler):
610 def __init__(self, data): self.data = data
Facundo Batista4f1b1ed2008-05-29 16:39:26 +0000611 def connect_ftp(self, user, passwd, host, port, dirs,
612 timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000613 self.user, self.passwd = user, passwd
614 self.host, self.port = host, port
615 self.dirs = dirs
616 self.ftpwrapper = MockFTPWrapper(self.data)
617 return self.ftpwrapper
618
Facundo Batista4f1b1ed2008-05-29 16:39:26 +0000619 import ftplib
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000620 data = "rheum rhaponicum"
621 h = NullFTPHandler(data)
622 o = h.parent = MockOpener()
623
624 for url, host, port, type_, dirs, filename, mimetype in [
625 ("ftp://localhost/foo/bar/baz.html",
626 "localhost", ftplib.FTP_PORT, "I",
627 ["foo", "bar"], "baz.html", "text/html"),
Kurt B. Kaiser3f7cb5d2004-07-11 17:14:13 +0000628 ("ftp://localhost:80/foo/bar/",
629 "localhost", 80, "D",
630 ["foo", "bar"], "", None),
631 ("ftp://localhost/baz.gif;type=a",
632 "localhost", ftplib.FTP_PORT, "A",
633 [], "baz.gif", None), # XXX really this should guess image/gif
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000634 ]:
Facundo Batista10951d52007-06-06 17:15:23 +0000635 req = Request(url)
636 req.timeout = None
637 r = h.ftp_open(req)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000638 # ftp authentication not yet implemented by FTPHandler
639 self.assert_(h.user == h.passwd == "")
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000640 self.assertEqual(h.host, socket.gethostbyname(host))
641 self.assertEqual(h.port, port)
642 self.assertEqual(h.dirs, dirs)
643 self.assertEqual(h.ftpwrapper.filename, filename)
644 self.assertEqual(h.ftpwrapper.filetype, type_)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000645 headers = r.info()
Kurt B. Kaiser3f7cb5d2004-07-11 17:14:13 +0000646 self.assertEqual(headers.get("Content-type"), mimetype)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000647 self.assertEqual(int(headers["Content-length"]), len(data))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000648
649 def test_file(self):
Christian Heimesc5f05e42008-02-23 17:40:11 +0000650 import rfc822, socket
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000651 h = urllib2.FileHandler()
652 o = h.parent = MockOpener()
653
Tim Peters58eb11c2004-01-18 20:29:55 +0000654 TESTFN = test_support.TESTFN
655 urlpath = sanepathname2url(os.path.abspath(TESTFN))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000656 towrite = "hello, world\n"
Georg Brandldd2245f2006-03-31 17:18:06 +0000657 urls = [
Tim Peters58eb11c2004-01-18 20:29:55 +0000658 "file://localhost%s" % urlpath,
659 "file://%s" % urlpath,
660 "file://%s%s" % (socket.gethostbyname('localhost'), urlpath),
Georg Brandldd2245f2006-03-31 17:18:06 +0000661 ]
662 try:
Tim Peters480725d2006-04-03 02:46:44 +0000663 localaddr = socket.gethostbyname(socket.gethostname())
Georg Brandldd2245f2006-03-31 17:18:06 +0000664 except socket.gaierror:
665 localaddr = ''
666 if localaddr:
667 urls.append("file://%s%s" % (localaddr, urlpath))
Tim Peters480725d2006-04-03 02:46:44 +0000668
Georg Brandldd2245f2006-03-31 17:18:06 +0000669 for url in urls:
Tim Peters58eb11c2004-01-18 20:29:55 +0000670 f = open(TESTFN, "wb")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000671 try:
672 try:
673 f.write(towrite)
674 finally:
675 f.close()
676
677 r = h.file_open(Request(url))
678 try:
679 data = r.read()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000680 headers = r.info()
681 newurl = r.geturl()
682 finally:
683 r.close()
Tim Peters58eb11c2004-01-18 20:29:55 +0000684 stats = os.stat(TESTFN)
685 modified = rfc822.formatdate(stats.st_mtime)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000686 finally:
687 os.remove(TESTFN)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000688 self.assertEqual(data, towrite)
689 self.assertEqual(headers["Content-type"], "text/plain")
690 self.assertEqual(headers["Content-length"], "13")
Tim Peters58eb11c2004-01-18 20:29:55 +0000691 self.assertEqual(headers["Last-modified"], modified)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000692
693 for url in [
Tim Peters58eb11c2004-01-18 20:29:55 +0000694 "file://localhost:80%s" % urlpath,
Georg Brandlceede5c2007-03-13 08:14:27 +0000695 "file:///file_does_not_exist.txt",
696 "file://%s:80%s/%s" % (socket.gethostbyname('localhost'),
697 os.getcwd(), TESTFN),
698 "file://somerandomhost.ontheinternet.com%s/%s" %
699 (os.getcwd(), TESTFN),
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000700 ]:
701 try:
Tim Peters58eb11c2004-01-18 20:29:55 +0000702 f = open(TESTFN, "wb")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000703 try:
704 f.write(towrite)
705 finally:
706 f.close()
707
708 self.assertRaises(urllib2.URLError,
709 h.file_open, Request(url))
710 finally:
711 os.remove(TESTFN)
712
713 h = urllib2.FileHandler()
714 o = h.parent = MockOpener()
715 # XXXX why does // mean ftp (and /// mean not ftp!), and where
716 # is file: scheme specified? I think this is really a bug, and
717 # what was intended was to distinguish between URLs like:
718 # file:/blah.txt (a file)
719 # file://localhost/blah.txt (a file)
720 # file:///blah.txt (a file)
721 # file://ftp.example.com/blah.txt (an ftp URL)
722 for url, ftp in [
723 ("file://ftp.example.com//foo.txt", True),
724 ("file://ftp.example.com///foo.txt", False),
725# XXXX bug: fails with OSError, should be URLError
726 ("file://ftp.example.com/foo.txt", False),
727 ]:
728 req = Request(url)
729 try:
730 h.file_open(req)
731 # XXXX remove OSError when bug fixed
732 except (urllib2.URLError, OSError):
733 self.assert_(not ftp)
734 else:
735 self.assert_(o.req is req)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000736 self.assertEqual(req.type, "ftp")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000737
738 def test_http(self):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000739
740 h = urllib2.AbstractHTTPHandler()
741 o = h.parent = MockOpener()
742
743 url = "http://example.com/"
744 for method, data in [("GET", None), ("POST", "blah")]:
745 req = Request(url, data, {"Foo": "bar"})
Facundo Batista10951d52007-06-06 17:15:23 +0000746 req.timeout = None
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000747 req.add_unredirected_header("Spam", "eggs")
748 http = MockHTTPClass()
749 r = h.do_open(http, req)
750
751 # result attributes
752 r.read; r.readline # wrapped MockFile methods
753 r.info; r.geturl # addinfourl methods
754 r.code, r.msg == 200, "OK" # added from MockHTTPClass.getreply()
755 hdrs = r.info()
756 hdrs.get; hdrs.has_key # r.info() gives dict from .getreply()
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000757 self.assertEqual(r.geturl(), url)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000758
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000759 self.assertEqual(http.host, "example.com")
760 self.assertEqual(http.level, 0)
761 self.assertEqual(http.method, method)
762 self.assertEqual(http.selector, "/")
763 self.assertEqual(http.req_headers,
Jeremy Hyltonb3ee6f92004-02-24 19:40:35 +0000764 [("Connection", "close"),
765 ("Foo", "bar"), ("Spam", "eggs")])
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000766 self.assertEqual(http.data, data)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000767
768 # check socket.error converted to URLError
769 http.raise_on_endheaders = True
770 self.assertRaises(urllib2.URLError, h.do_open, http, req)
771
772 # check adding of standard headers
773 o.addheaders = [("Spam", "eggs")]
774 for data in "", None: # POST, GET
775 req = Request("http://example.com/", data)
776 r = MockResponse(200, "OK", {}, "")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000777 newreq = h.do_request_(req)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000778 if data is None: # GET
Georg Brandl8c036cc2006-08-20 13:15:39 +0000779 self.assert_("Content-length" not in req.unredirected_hdrs)
780 self.assert_("Content-type" not in req.unredirected_hdrs)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000781 else: # POST
Georg Brandl8c036cc2006-08-20 13:15:39 +0000782 self.assertEqual(req.unredirected_hdrs["Content-length"], "0")
783 self.assertEqual(req.unredirected_hdrs["Content-type"],
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000784 "application/x-www-form-urlencoded")
785 # XXX the details of Host could be better tested
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000786 self.assertEqual(req.unredirected_hdrs["Host"], "example.com")
787 self.assertEqual(req.unredirected_hdrs["Spam"], "eggs")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000788
789 # don't clobber existing headers
790 req.add_unredirected_header("Content-length", "foo")
791 req.add_unredirected_header("Content-type", "bar")
792 req.add_unredirected_header("Host", "baz")
793 req.add_unredirected_header("Spam", "foo")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000794 newreq = h.do_request_(req)
Georg Brandl8c036cc2006-08-20 13:15:39 +0000795 self.assertEqual(req.unredirected_hdrs["Content-length"], "foo")
796 self.assertEqual(req.unredirected_hdrs["Content-type"], "bar")
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000797 self.assertEqual(req.unredirected_hdrs["Host"], "baz")
798 self.assertEqual(req.unredirected_hdrs["Spam"], "foo")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000799
Facundo Batistaeb90b782008-08-16 14:44:07 +0000800 def test_http_doubleslash(self):
801 # Checks that the presence of an unnecessary double slash in a url doesn't break anything
802 # Previously, a double slash directly after the host could cause incorrect parsing of the url
803 h = urllib2.AbstractHTTPHandler()
804 o = h.parent = MockOpener()
805
806 data = ""
807 ds_urls = [
808 "http://example.com/foo/bar/baz.html",
809 "http://example.com//foo/bar/baz.html",
810 "http://example.com/foo//bar/baz.html",
811 "http://example.com/foo/bar//baz.html",
812 ]
813
814 for ds_url in ds_urls:
815 ds_req = Request(ds_url, data)
816
817 # Check whether host is determined correctly if there is no proxy
818 np_ds_req = h.do_request_(ds_req)
819 self.assertEqual(np_ds_req.unredirected_hdrs["Host"],"example.com")
820
821 # Check whether host is determined correctly if there is a proxy
822 ds_req.set_proxy("someproxy:3128",None)
823 p_ds_req = h.do_request_(ds_req)
824 self.assertEqual(p_ds_req.unredirected_hdrs["Host"],"example.com")
825
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000826 def test_errors(self):
827 h = urllib2.HTTPErrorProcessor()
828 o = h.parent = MockOpener()
829
830 url = "http://example.com/"
831 req = Request(url)
Facundo Batista9fab9f12007-04-23 17:08:31 +0000832 # all 2xx are passed through
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000833 r = MockResponse(200, "OK", {}, "", url)
834 newr = h.http_response(req, r)
835 self.assert_(r is newr)
836 self.assert_(not hasattr(o, "proto")) # o.error not called
Facundo Batista9fab9f12007-04-23 17:08:31 +0000837 r = MockResponse(202, "Accepted", {}, "", url)
838 newr = h.http_response(req, r)
839 self.assert_(r is newr)
840 self.assert_(not hasattr(o, "proto")) # o.error not called
841 r = MockResponse(206, "Partial content", {}, "", url)
842 newr = h.http_response(req, r)
843 self.assert_(r is newr)
844 self.assert_(not hasattr(o, "proto")) # o.error not called
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000845 # anything else calls o.error (and MockOpener returns None, here)
Facundo Batista9fab9f12007-04-23 17:08:31 +0000846 r = MockResponse(502, "Bad gateway", {}, "", url)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000847 self.assert_(h.http_response(req, r) is None)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000848 self.assertEqual(o.proto, "http") # o.error called
Facundo Batista9fab9f12007-04-23 17:08:31 +0000849 self.assertEqual(o.args, (req, r, 502, "Bad gateway", {}))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000850
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000851 def test_cookies(self):
852 cj = MockCookieJar()
853 h = urllib2.HTTPCookieProcessor(cj)
854 o = h.parent = MockOpener()
855
856 req = Request("http://example.com/")
857 r = MockResponse(200, "OK", {}, "")
858 newreq = h.http_request(req)
859 self.assert_(cj.ach_req is req is newreq)
860 self.assertEquals(req.get_origin_req_host(), "example.com")
861 self.assert_(not req.is_unverifiable())
862 newr = h.http_response(req, r)
863 self.assert_(cj.ec_req is req)
864 self.assert_(cj.ec_r is r is newr)
865
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000866 def test_redirect(self):
867 from_url = "http://example.com/a.html"
868 to_url = "http://example.com/b.html"
869 h = urllib2.HTTPRedirectHandler()
870 o = h.parent = MockOpener()
871
872 # ordinary redirect behaviour
873 for code in 301, 302, 303, 307:
874 for data in None, "blah\nblah\n":
875 method = getattr(h, "http_error_%s" % code)
876 req = Request(from_url, data)
877 req.add_header("Nonsense", "viking=withhold")
Senthil Kumaran428e9d12009-07-19 04:20:46 +0000878 req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
Facundo Batista86371d62008-02-07 19:06:52 +0000879 if data is not None:
880 req.add_header("Content-Length", str(len(data)))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000881 req.add_unredirected_header("Spam", "spam")
882 try:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000883 method(req, MockFile(), code, "Blah",
884 MockHeaders({"location": to_url}))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000885 except urllib2.HTTPError:
886 # 307 in response to POST requires user OK
887 self.assert_(code == 307 and data is not None)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000888 self.assertEqual(o.req.get_full_url(), to_url)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000889 try:
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000890 self.assertEqual(o.req.get_method(), "GET")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000891 except AttributeError:
892 self.assert_(not o.req.has_data())
Facundo Batista86371d62008-02-07 19:06:52 +0000893
894 # now it's a GET, there should not be headers regarding content
895 # (possibly dragged from before being a POST)
896 headers = [x.lower() for x in o.req.headers]
897 self.assertTrue("content-length" not in headers)
898 self.assertTrue("content-type" not in headers)
899
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000900 self.assertEqual(o.req.headers["Nonsense"],
901 "viking=withhold")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000902 self.assert_("Spam" not in o.req.headers)
903 self.assert_("Spam" not in o.req.unredirected_hdrs)
904
905 # loop detection
906 req = Request(from_url)
Senthil Kumaran428e9d12009-07-19 04:20:46 +0000907 req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000908 def redirect(h, req, url=to_url):
909 h.http_error_302(req, MockFile(), 302, "Blah",
910 MockHeaders({"location": url}))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000911 # Note that the *original* request shares the same record of
912 # redirections with the sub-requests caused by the redirections.
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000913
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000914 # detect infinite loop redirect of a URL to itself
915 req = Request(from_url, origin_req_host="example.com")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000916 count = 0
Senthil Kumaran428e9d12009-07-19 04:20:46 +0000917 req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000918 try:
919 while 1:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000920 redirect(h, req, "http://example.com/")
921 count = count + 1
922 except urllib2.HTTPError:
923 # don't stop until max_repeats, because cookies may introduce state
924 self.assertEqual(count, urllib2.HTTPRedirectHandler.max_repeats)
925
926 # detect endless non-repeating chain of redirects
927 req = Request(from_url, origin_req_host="example.com")
928 count = 0
Senthil Kumaran428e9d12009-07-19 04:20:46 +0000929 req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000930 try:
931 while 1:
932 redirect(h, req, "http://example.com/%d" % count)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000933 count = count + 1
934 except urllib2.HTTPError:
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000935 self.assertEqual(count,
936 urllib2.HTTPRedirectHandler.max_redirections)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000937
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000938 def test_cookie_redirect(self):
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000939 # cookies shouldn't leak into redirected requests
940 from cookielib import CookieJar
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000941
Neal Norwitzb902f4e2006-04-03 04:45:34 +0000942 from test.test_cookielib import interact_netscape
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000943
944 cj = CookieJar()
945 interact_netscape(cj, "http://www.example.com/", "spam=eggs")
Georg Brandlfa42bd72006-04-30 07:06:11 +0000946 hh = MockHTTPHandler(302, "Location: http://www.cracker.com/\r\n\r\n")
947 hdeh = urllib2.HTTPDefaultErrorHandler()
948 hrh = urllib2.HTTPRedirectHandler()
949 cp = urllib2.HTTPCookieProcessor(cj)
950 o = build_test_opener(hh, hdeh, hrh, cp)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000951 o.open("http://www.example.com/")
952 self.assert_(not hh.req.has_header("Cookie"))
953
Georg Brandl720096a2006-04-02 20:45:34 +0000954 def test_proxy(self):
955 o = OpenerDirector()
956 ph = urllib2.ProxyHandler(dict(http="proxy.example.com:3128"))
957 o.add_handler(ph)
958 meth_spec = [
959 [("http_open", "return response")]
960 ]
961 handlers = add_ordered_mock_handlers(o, meth_spec)
962
963 req = Request("http://acme.example.com/")
964 self.assertEqual(req.get_host(), "acme.example.com")
965 r = o.open(req)
966 self.assertEqual(req.get_host(), "proxy.example.com:3128")
967
968 self.assertEqual([(handlers[0], "http_open")],
969 [tup[0:2] for tup in o.calls])
970
Senthil Kumarandff20282009-10-11 05:35:44 +0000971 def test_proxy_no_proxy(self):
972 os.environ['no_proxy'] = 'python.org'
973 o = OpenerDirector()
974 ph = urllib2.ProxyHandler(dict(http="proxy.example.com"))
975 o.add_handler(ph)
976 req = Request("http://www.perl.org/")
977 self.assertEqual(req.get_host(), "www.perl.org")
978 r = o.open(req)
979 self.assertEqual(req.get_host(), "proxy.example.com")
980 req = Request("http://www.python.org")
981 self.assertEqual(req.get_host(), "www.python.org")
982 r = o.open(req)
983 self.assertEqual(req.get_host(), "www.python.org")
984 del os.environ['no_proxy']
985
986
Senthil Kumaran308681c2009-07-26 12:36:08 +0000987 def test_proxy_https(self):
988 o = OpenerDirector()
989 ph = urllib2.ProxyHandler(dict(https='proxy.example.com:3128'))
990 o.add_handler(ph)
991 meth_spec = [
992 [("https_open","return response")]
993 ]
994 handlers = add_ordered_mock_handlers(o, meth_spec)
995 req = Request("https://www.example.com/")
996 self.assertEqual(req.get_host(), "www.example.com")
997 r = o.open(req)
998 self.assertEqual(req.get_host(), "proxy.example.com:3128")
999 self.assertEqual([(handlers[0], "https_open")],
1000 [tup[0:2] for tup in o.calls])
1001
Senthil Kumaran81163642009-12-20 06:32:46 +00001002 def test_proxy_https_proxy_authorization(self):
1003 o = OpenerDirector()
1004 ph = urllib2.ProxyHandler(dict(https='proxy.example.com:3128'))
1005 o.add_handler(ph)
1006 https_handler = MockHTTPSHandler()
1007 o.add_handler(https_handler)
1008 req = Request("https://www.example.com/")
1009 req.add_header("Proxy-Authorization","FooBar")
1010 req.add_header("User-Agent","Grail")
1011 self.assertEqual(req.get_host(), "www.example.com")
1012 self.assertTrue(req._tunnel_host is None)
1013 r = o.open(req)
1014 # Verify Proxy-Authorization gets tunneled to request.
1015 # httpsconn req_headers do not have the Proxy-Authorization header but
1016 # the req will have.
1017 self.assertFalse(("Proxy-Authorization","FooBar") in
1018 https_handler.httpconn.req_headers)
1019 self.assertTrue(("User-Agent","Grail") in
1020 https_handler.httpconn.req_headers)
1021 self.assertFalse(req._tunnel_host is None)
1022 self.assertEqual(req.get_host(), "proxy.example.com:3128")
1023 self.assertEqual(req.get_header("Proxy-authorization"),"FooBar")
1024
Georg Brandl33124322008-03-21 19:54:00 +00001025 def test_basic_auth(self, quote_char='"'):
Georg Brandlfa42bd72006-04-30 07:06:11 +00001026 opener = OpenerDirector()
1027 password_manager = MockPasswordManager()
1028 auth_handler = urllib2.HTTPBasicAuthHandler(password_manager)
1029 realm = "ACME Widget Store"
1030 http_handler = MockHTTPHandler(
Georg Brandl33124322008-03-21 19:54:00 +00001031 401, 'WWW-Authenticate: Basic realm=%s%s%s\r\n\r\n' %
1032 (quote_char, realm, quote_char) )
Georg Brandl261e2512006-05-29 20:52:54 +00001033 opener.add_handler(auth_handler)
1034 opener.add_handler(http_handler)
Georg Brandlfa42bd72006-04-30 07:06:11 +00001035 self._test_basic_auth(opener, auth_handler, "Authorization",
1036 realm, http_handler, password_manager,
1037 "http://acme.example.com/protected",
1038 "http://acme.example.com/protected",
1039 )
1040
Georg Brandl33124322008-03-21 19:54:00 +00001041 def test_basic_auth_with_single_quoted_realm(self):
1042 self.test_basic_auth(quote_char="'")
1043
Georg Brandlfa42bd72006-04-30 07:06:11 +00001044 def test_proxy_basic_auth(self):
1045 opener = OpenerDirector()
1046 ph = urllib2.ProxyHandler(dict(http="proxy.example.com:3128"))
1047 opener.add_handler(ph)
1048 password_manager = MockPasswordManager()
1049 auth_handler = urllib2.ProxyBasicAuthHandler(password_manager)
1050 realm = "ACME Networks"
1051 http_handler = MockHTTPHandler(
1052 407, 'Proxy-Authenticate: Basic realm="%s"\r\n\r\n' % realm)
Georg Brandl261e2512006-05-29 20:52:54 +00001053 opener.add_handler(auth_handler)
1054 opener.add_handler(http_handler)
Georg Brandl8c036cc2006-08-20 13:15:39 +00001055 self._test_basic_auth(opener, auth_handler, "Proxy-authorization",
Georg Brandlfa42bd72006-04-30 07:06:11 +00001056 realm, http_handler, password_manager,
1057 "http://acme.example.com:3128/protected",
1058 "proxy.example.com:3128",
1059 )
1060
Georg Brandlb5f2e5c2006-05-08 17:36:08 +00001061 def test_basic_and_digest_auth_handlers(self):
1062 # HTTPDigestAuthHandler threw an exception if it couldn't handle a 40*
1063 # response (http://python.org/sf/1479302), where it should instead
1064 # return None to allow another handler (especially
1065 # HTTPBasicAuthHandler) to handle the response.
Georg Brandl261e2512006-05-29 20:52:54 +00001066
1067 # Also (http://python.org/sf/14797027, RFC 2617 section 1.2), we must
1068 # try digest first (since it's the strongest auth scheme), so we record
1069 # order of calls here to check digest comes first:
1070 class RecordingOpenerDirector(OpenerDirector):
1071 def __init__(self):
1072 OpenerDirector.__init__(self)
1073 self.recorded = []
1074 def record(self, info):
1075 self.recorded.append(info)
Georg Brandlb5f2e5c2006-05-08 17:36:08 +00001076 class TestDigestAuthHandler(urllib2.HTTPDigestAuthHandler):
Georg Brandl261e2512006-05-29 20:52:54 +00001077 def http_error_401(self, *args, **kwds):
1078 self.parent.record("digest")
1079 urllib2.HTTPDigestAuthHandler.http_error_401(self,
1080 *args, **kwds)
1081 class TestBasicAuthHandler(urllib2.HTTPBasicAuthHandler):
1082 def http_error_401(self, *args, **kwds):
1083 self.parent.record("basic")
1084 urllib2.HTTPBasicAuthHandler.http_error_401(self,
1085 *args, **kwds)
1086
1087 opener = RecordingOpenerDirector()
Georg Brandlb5f2e5c2006-05-08 17:36:08 +00001088 password_manager = MockPasswordManager()
1089 digest_handler = TestDigestAuthHandler(password_manager)
Georg Brandl261e2512006-05-29 20:52:54 +00001090 basic_handler = TestBasicAuthHandler(password_manager)
Georg Brandlb5f2e5c2006-05-08 17:36:08 +00001091 realm = "ACME Networks"
1092 http_handler = MockHTTPHandler(
1093 401, 'WWW-Authenticate: Basic realm="%s"\r\n\r\n' % realm)
Georg Brandl261e2512006-05-29 20:52:54 +00001094 opener.add_handler(basic_handler)
1095 opener.add_handler(digest_handler)
1096 opener.add_handler(http_handler)
1097
1098 # check basic auth isn't blocked by digest handler failing
Georg Brandlb5f2e5c2006-05-08 17:36:08 +00001099 self._test_basic_auth(opener, basic_handler, "Authorization",
1100 realm, http_handler, password_manager,
1101 "http://acme.example.com/protected",
1102 "http://acme.example.com/protected",
1103 )
Georg Brandl261e2512006-05-29 20:52:54 +00001104 # check digest was tried before basic (twice, because
1105 # _test_basic_auth called .open() twice)
1106 self.assertEqual(opener.recorded, ["digest", "basic"]*2)
Georg Brandlb5f2e5c2006-05-08 17:36:08 +00001107
Georg Brandlfa42bd72006-04-30 07:06:11 +00001108 def _test_basic_auth(self, opener, auth_handler, auth_header,
1109 realm, http_handler, password_manager,
1110 request_url, protected_url):
Christian Heimesc5f05e42008-02-23 17:40:11 +00001111 import base64
Georg Brandlfa42bd72006-04-30 07:06:11 +00001112 user, password = "wile", "coyote"
Georg Brandlfa42bd72006-04-30 07:06:11 +00001113
1114 # .add_password() fed through to password manager
1115 auth_handler.add_password(realm, request_url, user, password)
1116 self.assertEqual(realm, password_manager.realm)
1117 self.assertEqual(request_url, password_manager.url)
1118 self.assertEqual(user, password_manager.user)
1119 self.assertEqual(password, password_manager.password)
1120
1121 r = opener.open(request_url)
1122
1123 # should have asked the password manager for the username/password
1124 self.assertEqual(password_manager.target_realm, realm)
1125 self.assertEqual(password_manager.target_url, protected_url)
1126
1127 # expect one request without authorization, then one with
1128 self.assertEqual(len(http_handler.requests), 2)
1129 self.assertFalse(http_handler.requests[0].has_header(auth_header))
1130 userpass = '%s:%s' % (user, password)
1131 auth_hdr_value = 'Basic '+base64.encodestring(userpass).strip()
1132 self.assertEqual(http_handler.requests[1].get_header(auth_header),
1133 auth_hdr_value)
1134
1135 # if the password manager can't find a password, the handler won't
1136 # handle the HTTP auth error
1137 password_manager.user = password_manager.password = None
1138 http_handler.reset()
1139 r = opener.open(request_url)
1140 self.assertEqual(len(http_handler.requests), 1)
1141 self.assertFalse(http_handler.requests[0].has_header(auth_header))
1142
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001143
1144class MiscTests(unittest.TestCase):
1145
1146 def test_build_opener(self):
1147 class MyHTTPHandler(urllib2.HTTPHandler): pass
1148 class FooHandler(urllib2.BaseHandler):
1149 def foo_open(self): pass
1150 class BarHandler(urllib2.BaseHandler):
1151 def bar_open(self): pass
1152
1153 build_opener = urllib2.build_opener
1154
1155 o = build_opener(FooHandler, BarHandler)
1156 self.opener_has_handler(o, FooHandler)
1157 self.opener_has_handler(o, BarHandler)
1158
1159 # can take a mix of classes and instances
1160 o = build_opener(FooHandler, BarHandler())
1161 self.opener_has_handler(o, FooHandler)
1162 self.opener_has_handler(o, BarHandler)
1163
1164 # subclasses of default handlers override default handlers
1165 o = build_opener(MyHTTPHandler)
1166 self.opener_has_handler(o, MyHTTPHandler)
1167
1168 # a particular case of overriding: default handlers can be passed
1169 # in explicitly
1170 o = build_opener()
1171 self.opener_has_handler(o, urllib2.HTTPHandler)
1172 o = build_opener(urllib2.HTTPHandler)
1173 self.opener_has_handler(o, urllib2.HTTPHandler)
1174 o = build_opener(urllib2.HTTPHandler())
1175 self.opener_has_handler(o, urllib2.HTTPHandler)
1176
Amaury Forgeot d'Arc96865852008-04-22 21:14:41 +00001177 # Issue2670: multiple handlers sharing the same base class
1178 class MyOtherHTTPHandler(urllib2.HTTPHandler): pass
1179 o = build_opener(MyHTTPHandler, MyOtherHTTPHandler)
1180 self.opener_has_handler(o, MyHTTPHandler)
1181 self.opener_has_handler(o, MyOtherHTTPHandler)
1182
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001183 def opener_has_handler(self, opener, handler_class):
1184 for h in opener.handlers:
1185 if h.__class__ == handler_class:
1186 break
1187 else:
1188 self.assert_(False)
1189
Benjamin Petersonc6e80eb2008-12-21 17:01:26 +00001190class RequestTests(unittest.TestCase):
1191
1192 def setUp(self):
1193 self.get = urllib2.Request("http://www.python.org/~jeremy/")
1194 self.post = urllib2.Request("http://www.python.org/~jeremy/",
1195 "data",
1196 headers={"X-Test": "test"})
1197
1198 def test_method(self):
1199 self.assertEqual("POST", self.post.get_method())
1200 self.assertEqual("GET", self.get.get_method())
1201
1202 def test_add_data(self):
1203 self.assert_(not self.get.has_data())
1204 self.assertEqual("GET", self.get.get_method())
1205 self.get.add_data("spam")
1206 self.assert_(self.get.has_data())
1207 self.assertEqual("POST", self.get.get_method())
1208
1209 def test_get_full_url(self):
1210 self.assertEqual("http://www.python.org/~jeremy/",
1211 self.get.get_full_url())
1212
1213 def test_selector(self):
1214 self.assertEqual("/~jeremy/", self.get.get_selector())
1215 req = urllib2.Request("http://www.python.org/")
1216 self.assertEqual("/", req.get_selector())
1217
1218 def test_get_type(self):
1219 self.assertEqual("http", self.get.get_type())
1220
1221 def test_get_host(self):
1222 self.assertEqual("www.python.org", self.get.get_host())
1223
1224 def test_get_host_unquote(self):
1225 req = urllib2.Request("http://www.%70ython.org/")
1226 self.assertEqual("www.python.org", req.get_host())
1227
1228 def test_proxy(self):
1229 self.assert_(not self.get.has_proxy())
1230 self.get.set_proxy("www.perl.org", "http")
1231 self.assert_(self.get.has_proxy())
1232 self.assertEqual("www.python.org", self.get.get_origin_req_host())
1233 self.assertEqual("www.perl.org", self.get.get_host())
1234
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001235
1236def test_main(verbose=None):
Georg Brandlfa42bd72006-04-30 07:06:11 +00001237 from test import test_urllib2
1238 test_support.run_doctest(test_urllib2, verbose)
Georg Brandl720096a2006-04-02 20:45:34 +00001239 test_support.run_doctest(urllib2, verbose)
Andrew M. Kuchlingbd3200f2004-06-29 13:15:46 +00001240 tests = (TrivialTests,
1241 OpenerDirectorTests,
1242 HandlerTests,
Benjamin Petersonc6e80eb2008-12-21 17:01:26 +00001243 MiscTests,
1244 RequestTests)
Andrew M. Kuchlingbd3200f2004-06-29 13:15:46 +00001245 test_support.run_unittest(*tests)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001246
1247if __name__ == "__main__":
1248 test_main(verbose=True)