blob: e889bc3d10ba0cceb30dfb23b8ed26abb64ea3fe [file] [log] [blame]
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001import unittest
2from test import test_support
3
Christian Heimesc5f05e42008-02-23 17:40:11 +00004import os
Facundo Batista4f1b1ed2008-05-29 16:39:26 +00005import socket
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00006import StringIO
Jeremy Hyltone3e61042001-05-09 15:50:25 +00007
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00008import urllib2
9from urllib2 import Request, OpenerDirector
Jeremy Hyltone3e61042001-05-09 15:50:25 +000010
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000011# XXX
12# Request
13# CacheFTPHandler (hard to write)
Georg Brandlfa42bd72006-04-30 07:06:11 +000014# parse_keqv_list, parse_http_list, HTTPDigestAuthHandler
Jeremy Hyltone3e61042001-05-09 15:50:25 +000015
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000016class TrivialTests(unittest.TestCase):
17 def test_trivial(self):
18 # A couple trivial tests
Guido van Rossume2ae77b2001-10-24 20:42:55 +000019
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000020 self.assertRaises(ValueError, urllib2.urlopen, 'bogus url')
Tim Peters861adac2001-07-16 20:49:49 +000021
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000022 # XXX Name hacking to get this to work on Windows.
23 fname = os.path.abspath(urllib2.__file__).replace('\\', '/')
Senthil Kumaran2e3da142010-01-10 17:35:05 +000024
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000025 # And more hacking to get it to work on MacOS. This assumes
26 # urllib.pathname2url works, unfortunately...
Ronald Oussoren9545a232010-05-05 19:09:31 +000027 if os.name == 'riscos':
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000028 import string
29 fname = os.expand(fname)
30 fname = fname.translate(string.maketrans("/.", "./"))
31
Senthil Kumaran2e3da142010-01-10 17:35:05 +000032 if os.name == 'nt':
33 file_url = "file:///%s" % fname
34 else:
35 file_url = "file://%s" % fname
36
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000037 f = urllib2.urlopen(file_url)
38
39 buf = f.read()
40 f.close()
Tim Petersf5f32b42005-07-17 23:16:17 +000041
Georg Brandle1b13d22005-08-24 22:20:32 +000042 def test_parse_http_list(self):
43 tests = [('a,b,c', ['a', 'b', 'c']),
44 ('path"o,l"og"i"cal, example', ['path"o,l"og"i"cal', 'example']),
45 ('a, b, "c", "d", "e,f", g, h', ['a', 'b', '"c"', '"d"', '"e,f"', 'g', 'h']),
46 ('a="b\\"c", d="e\\,f", g="h\\\\i"', ['a="b"c"', 'd="e,f"', 'g="h\\i"'])]
47 for string, list in tests:
Ezio Melotti2623a372010-11-21 13:34:58 +000048 self.assertEqual(urllib2.parse_http_list(string), list)
Georg Brandle1b13d22005-08-24 22:20:32 +000049
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000050
Georg Brandl8c036cc2006-08-20 13:15:39 +000051def test_request_headers_dict():
52 """
53 The Request.headers dictionary is not a documented interface. It should
54 stay that way, because the complete set of headers are only accessible
55 through the .get_header(), .has_header(), .header_items() interface.
56 However, .headers pre-dates those methods, and so real code will be using
57 the dictionary.
58
59 The introduction in 2.4 of those methods was a mistake for the same reason:
60 code that previously saw all (urllib2 user)-provided headers in .headers
61 now sees only a subset (and the function interface is ugly and incomplete).
62 A better change would have been to replace .headers dict with a dict
63 subclass (or UserDict.DictMixin instance?) that preserved the .headers
64 interface and also provided access to the "unredirected" headers. It's
65 probably too late to fix that, though.
66
67
68 Check .capitalize() case normalization:
69
70 >>> url = "http://example.com"
71 >>> Request(url, headers={"Spam-eggs": "blah"}).headers["Spam-eggs"]
72 'blah'
73 >>> Request(url, headers={"spam-EggS": "blah"}).headers["Spam-eggs"]
74 'blah'
75
76 Currently, Request(url, "Spam-eggs").headers["Spam-Eggs"] raises KeyError,
77 but that could be changed in future.
78
79 """
80
81def test_request_headers_methods():
82 """
83 Note the case normalization of header names here, to .capitalize()-case.
84 This should be preserved for backwards-compatibility. (In the HTTP case,
85 normalization to .title()-case is done by urllib2 before sending headers to
86 httplib).
87
88 >>> url = "http://example.com"
89 >>> r = Request(url, headers={"Spam-eggs": "blah"})
90 >>> r.has_header("Spam-eggs")
91 True
92 >>> r.header_items()
93 [('Spam-eggs', 'blah')]
94 >>> r.add_header("Foo-Bar", "baz")
95 >>> items = r.header_items()
96 >>> items.sort()
97 >>> items
98 [('Foo-bar', 'baz'), ('Spam-eggs', 'blah')]
99
100 Note that e.g. r.has_header("spam-EggS") is currently False, and
101 r.get_header("spam-EggS") returns None, but that could be changed in
102 future.
103
104 >>> r.has_header("Not-there")
105 False
106 >>> print r.get_header("Not-there")
107 None
108 >>> r.get_header("Not-there", "default")
109 'default'
110
111 """
112
113
Georg Brandlfa42bd72006-04-30 07:06:11 +0000114def test_password_manager(self):
115 """
116 >>> mgr = urllib2.HTTPPasswordMgr()
117 >>> add = mgr.add_password
118 >>> add("Some Realm", "http://example.com/", "joe", "password")
119 >>> add("Some Realm", "http://example.com/ni", "ni", "ni")
120 >>> add("c", "http://example.com/foo", "foo", "ni")
121 >>> add("c", "http://example.com/bar", "bar", "nini")
122 >>> add("b", "http://example.com/", "first", "blah")
123 >>> add("b", "http://example.com/", "second", "spam")
124 >>> add("a", "http://example.com", "1", "a")
125 >>> add("Some Realm", "http://c.example.com:3128", "3", "c")
126 >>> add("Some Realm", "d.example.com", "4", "d")
127 >>> add("Some Realm", "e.example.com:3128", "5", "e")
128
129 >>> mgr.find_user_password("Some Realm", "example.com")
130 ('joe', 'password')
131 >>> mgr.find_user_password("Some Realm", "http://example.com")
132 ('joe', 'password')
133 >>> mgr.find_user_password("Some Realm", "http://example.com/")
134 ('joe', 'password')
135 >>> mgr.find_user_password("Some Realm", "http://example.com/spam")
136 ('joe', 'password')
137 >>> mgr.find_user_password("Some Realm", "http://example.com/spam/spam")
138 ('joe', 'password')
139 >>> mgr.find_user_password("c", "http://example.com/foo")
140 ('foo', 'ni')
141 >>> mgr.find_user_password("c", "http://example.com/bar")
142 ('bar', 'nini')
143
Georg Brandl2b330372006-05-28 20:23:12 +0000144 Actually, this is really undefined ATM
145## Currently, we use the highest-level path where more than one match:
Georg Brandlfa42bd72006-04-30 07:06:11 +0000146
Georg Brandl2b330372006-05-28 20:23:12 +0000147## >>> mgr.find_user_password("Some Realm", "http://example.com/ni")
148## ('joe', 'password')
Georg Brandlfa42bd72006-04-30 07:06:11 +0000149
150 Use latest add_password() in case of conflict:
151
152 >>> mgr.find_user_password("b", "http://example.com/")
153 ('second', 'spam')
154
155 No special relationship between a.example.com and example.com:
156
157 >>> mgr.find_user_password("a", "http://example.com/")
158 ('1', 'a')
159 >>> mgr.find_user_password("a", "http://a.example.com/")
160 (None, None)
161
162 Ports:
163
164 >>> mgr.find_user_password("Some Realm", "c.example.com")
165 (None, None)
166 >>> mgr.find_user_password("Some Realm", "c.example.com:3128")
167 ('3', 'c')
168 >>> mgr.find_user_password("Some Realm", "http://c.example.com:3128")
169 ('3', 'c')
170 >>> mgr.find_user_password("Some Realm", "d.example.com")
171 ('4', 'd')
172 >>> mgr.find_user_password("Some Realm", "e.example.com:3128")
173 ('5', 'e')
174
175 """
176 pass
177
178
Georg Brandl2b330372006-05-28 20:23:12 +0000179def test_password_manager_default_port(self):
180 """
181 >>> mgr = urllib2.HTTPPasswordMgr()
182 >>> add = mgr.add_password
183
184 The point to note here is that we can't guess the default port if there's
185 no scheme. This applies to both add_password and find_user_password.
186
187 >>> add("f", "http://g.example.com:80", "10", "j")
188 >>> add("g", "http://h.example.com", "11", "k")
189 >>> add("h", "i.example.com:80", "12", "l")
190 >>> add("i", "j.example.com", "13", "m")
191 >>> mgr.find_user_password("f", "g.example.com:100")
192 (None, None)
193 >>> mgr.find_user_password("f", "g.example.com:80")
194 ('10', 'j')
195 >>> mgr.find_user_password("f", "g.example.com")
196 (None, None)
197 >>> mgr.find_user_password("f", "http://g.example.com:100")
198 (None, None)
199 >>> mgr.find_user_password("f", "http://g.example.com:80")
200 ('10', 'j')
201 >>> mgr.find_user_password("f", "http://g.example.com")
202 ('10', 'j')
203 >>> mgr.find_user_password("g", "h.example.com")
204 ('11', 'k')
205 >>> mgr.find_user_password("g", "h.example.com:80")
206 ('11', 'k')
207 >>> mgr.find_user_password("g", "http://h.example.com:80")
208 ('11', 'k')
209 >>> mgr.find_user_password("h", "i.example.com")
210 (None, None)
211 >>> mgr.find_user_password("h", "i.example.com:80")
212 ('12', 'l')
213 >>> mgr.find_user_password("h", "http://i.example.com:80")
214 ('12', 'l')
215 >>> mgr.find_user_password("i", "j.example.com")
216 ('13', 'm')
217 >>> mgr.find_user_password("i", "j.example.com:80")
218 (None, None)
219 >>> mgr.find_user_password("i", "http://j.example.com")
220 ('13', 'm')
221 >>> mgr.find_user_password("i", "http://j.example.com:80")
222 (None, None)
223
224 """
225
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000226class MockOpener:
227 addheaders = []
Senthil Kumaran5fee4602009-07-19 02:43:43 +0000228 def open(self, req, data=None,timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
229 self.req, self.data, self.timeout = req, data, timeout
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000230 def error(self, proto, *args):
231 self.proto, self.args = proto, args
232
233class MockFile:
234 def read(self, count=None): pass
235 def readline(self, count=None): pass
236 def close(self): pass
237
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000238class MockHeaders(dict):
239 def getheaders(self, name):
240 return self.values()
241
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000242class MockResponse(StringIO.StringIO):
243 def __init__(self, code, msg, headers, data, url=None):
244 StringIO.StringIO.__init__(self, data)
245 self.code, self.msg, self.headers, self.url = code, msg, headers, url
246 def info(self):
247 return self.headers
248 def geturl(self):
249 return self.url
250
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000251class MockCookieJar:
252 def add_cookie_header(self, request):
253 self.ach_req = request
254 def extract_cookies(self, response, request):
255 self.ec_req, self.ec_r = request, response
256
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000257class FakeMethod:
258 def __init__(self, meth_name, action, handle):
259 self.meth_name = meth_name
260 self.handle = handle
261 self.action = action
262 def __call__(self, *args):
263 return self.handle(self.meth_name, self.action, *args)
264
Senthil Kumaran7713acf2009-12-20 06:05:13 +0000265class MockHTTPResponse:
266 def __init__(self, fp, msg, status, reason):
267 self.fp = fp
268 self.msg = msg
269 self.status = status
270 self.reason = reason
271 def read(self):
272 return ''
273
274class MockHTTPClass:
275 def __init__(self):
276 self.req_headers = []
277 self.data = None
278 self.raise_on_endheaders = False
279 self._tunnel_headers = {}
280
281 def __call__(self, host, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
282 self.host = host
283 self.timeout = timeout
284 return self
285
286 def set_debuglevel(self, level):
287 self.level = level
288
289 def set_tunnel(self, host, port=None, headers=None):
290 self._tunnel_host = host
291 self._tunnel_port = port
292 if headers:
293 self._tunnel_headers = headers
294 else:
295 self._tunnel_headers.clear()
Victor Stinnerc74a6ba2011-06-17 14:06:27 +0200296
Benjamin Peterson32935932009-12-24 01:09:53 +0000297 def request(self, method, url, body=None, headers=None):
Senthil Kumaran7713acf2009-12-20 06:05:13 +0000298 self.method = method
299 self.selector = url
Benjamin Peterson32935932009-12-24 01:09:53 +0000300 if headers is not None:
301 self.req_headers += headers.items()
Senthil Kumaran7713acf2009-12-20 06:05:13 +0000302 self.req_headers.sort()
303 if body:
304 self.data = body
305 if self.raise_on_endheaders:
306 import socket
307 raise socket.error()
Victor Stinnerc74a6ba2011-06-17 14:06:27 +0200308
Senthil Kumaran7713acf2009-12-20 06:05:13 +0000309 def getresponse(self):
310 return MockHTTPResponse(MockFile(), {}, 200, "OK")
311
Victor Stinnerc74a6ba2011-06-17 14:06:27 +0200312 def close(self):
313 pass
314
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000315class MockHandler:
Georg Brandlfa42bd72006-04-30 07:06:11 +0000316 # useful for testing handler machinery
317 # see add_ordered_mock_handlers() docstring
Georg Brandl720096a2006-04-02 20:45:34 +0000318 handler_order = 500
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000319 def __init__(self, methods):
320 self._define_methods(methods)
321 def _define_methods(self, methods):
322 for spec in methods:
323 if len(spec) == 2: name, action = spec
324 else: name, action = spec, None
325 meth = FakeMethod(name, action, self.handle)
326 setattr(self.__class__, name, meth)
327 def handle(self, fn_name, action, *args, **kwds):
328 self.parent.calls.append((self, fn_name, args, kwds))
329 if action is None:
330 return None
331 elif action == "return self":
332 return self
333 elif action == "return response":
334 res = MockResponse(200, "OK", {}, "")
335 return res
336 elif action == "return request":
337 return Request("http://blah/")
338 elif action.startswith("error"):
339 code = action[action.rfind(" ")+1:]
340 try:
341 code = int(code)
342 except ValueError:
343 pass
344 res = MockResponse(200, "OK", {}, "")
345 return self.parent.error("http", args[0], res, code, "", {})
346 elif action == "raise":
347 raise urllib2.URLError("blah")
348 assert False
349 def close(self): pass
350 def add_parent(self, parent):
351 self.parent = parent
352 self.parent.calls = []
353 def __lt__(self, other):
354 if not hasattr(other, "handler_order"):
355 # No handler_order, leave in original order. Yuck.
356 return True
357 return self.handler_order < other.handler_order
358
359def add_ordered_mock_handlers(opener, meth_spec):
360 """Create MockHandlers and add them to an OpenerDirector.
361
362 meth_spec: list of lists of tuples and strings defining methods to define
363 on handlers. eg:
364
365 [["http_error", "ftp_open"], ["http_open"]]
366
367 defines methods .http_error() and .ftp_open() on one handler, and
368 .http_open() on another. These methods just record their arguments and
369 return None. Using a tuple instead of a string causes the method to
370 perform some action (see MockHandler.handle()), eg:
371
372 [["http_error"], [("http_open", "return request")]]
373
374 defines .http_error() on one handler (which simply returns None), and
375 .http_open() on another handler, which returns a Request object.
376
377 """
378 handlers = []
379 count = 0
380 for meths in meth_spec:
381 class MockHandlerSubclass(MockHandler): pass
382 h = MockHandlerSubclass(meths)
Georg Brandl720096a2006-04-02 20:45:34 +0000383 h.handler_order += count
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000384 h.add_parent(opener)
385 count = count + 1
386 handlers.append(h)
387 opener.add_handler(h)
388 return handlers
389
Georg Brandlfa42bd72006-04-30 07:06:11 +0000390def build_test_opener(*handler_instances):
391 opener = OpenerDirector()
392 for h in handler_instances:
393 opener.add_handler(h)
394 return opener
395
396class MockHTTPHandler(urllib2.BaseHandler):
397 # useful for testing redirections and auth
398 # sends supplied headers and code as first response
399 # sends 200 OK as second response
400 def __init__(self, code, headers):
401 self.code = code
402 self.headers = headers
403 self.reset()
404 def reset(self):
405 self._count = 0
406 self.requests = []
407 def http_open(self, req):
408 import mimetools, httplib, copy
409 from StringIO import StringIO
410 self.requests.append(copy.deepcopy(req))
411 if self._count == 0:
412 self._count = self._count + 1
413 name = httplib.responses[self.code]
414 msg = mimetools.Message(StringIO(self.headers))
415 return self.parent.error(
416 "http", req, MockFile(), self.code, name, msg)
417 else:
418 self.req = req
419 msg = mimetools.Message(StringIO("\r\n\r\n"))
420 return MockResponse(200, "OK", msg, "", req.get_full_url())
421
Senthil Kumaran7713acf2009-12-20 06:05:13 +0000422class MockHTTPSHandler(urllib2.AbstractHTTPHandler):
423 # Useful for testing the Proxy-Authorization request by verifying the
424 # properties of httpcon
Benjamin Peterson32935932009-12-24 01:09:53 +0000425
426 def __init__(self):
427 urllib2.AbstractHTTPHandler.__init__(self)
428 self.httpconn = MockHTTPClass()
429
Senthil Kumaran7713acf2009-12-20 06:05:13 +0000430 def https_open(self, req):
431 return self.do_open(self.httpconn, req)
432
Georg Brandlfa42bd72006-04-30 07:06:11 +0000433class MockPasswordManager:
434 def add_password(self, realm, uri, user, password):
435 self.realm = realm
436 self.url = uri
437 self.user = user
438 self.password = password
439 def find_user_password(self, realm, authuri):
440 self.target_realm = realm
441 self.target_url = authuri
442 return self.user, self.password
443
444
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000445class OpenerDirectorTests(unittest.TestCase):
446
Georg Brandlf91149e2007-07-12 08:05:45 +0000447 def test_add_non_handler(self):
448 class NonHandler(object):
449 pass
450 self.assertRaises(TypeError,
451 OpenerDirector().add_handler, NonHandler())
452
Georg Brandl261e2512006-05-29 20:52:54 +0000453 def test_badly_named_methods(self):
454 # test work-around for three methods that accidentally follow the
455 # naming conventions for handler methods
456 # (*_open() / *_request() / *_response())
457
458 # These used to call the accidentally-named methods, causing a
459 # TypeError in real code; here, returning self from these mock
460 # methods would either cause no exception, or AttributeError.
461
462 from urllib2 import URLError
463
464 o = OpenerDirector()
465 meth_spec = [
466 [("do_open", "return self"), ("proxy_open", "return self")],
467 [("redirect_request", "return self")],
468 ]
469 handlers = add_ordered_mock_handlers(o, meth_spec)
470 o.add_handler(urllib2.UnknownHandler())
471 for scheme in "do", "proxy", "redirect":
472 self.assertRaises(URLError, o.open, scheme+"://example.com/")
473
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000474 def test_handled(self):
475 # handler returning non-None means no more handlers will be called
476 o = OpenerDirector()
477 meth_spec = [
478 ["http_open", "ftp_open", "http_error_302"],
479 ["ftp_open"],
480 [("http_open", "return self")],
481 [("http_open", "return self")],
482 ]
483 handlers = add_ordered_mock_handlers(o, meth_spec)
484
485 req = Request("http://example.com/")
486 r = o.open(req)
487 # Second .http_open() gets called, third doesn't, since second returned
488 # non-None. Handlers without .http_open() never get any methods called
489 # on them.
490 # In fact, second mock handler defining .http_open() returns self
491 # (instead of response), which becomes the OpenerDirector's return
492 # value.
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000493 self.assertEqual(r, handlers[2])
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000494 calls = [(handlers[0], "http_open"), (handlers[2], "http_open")]
495 for expected, got in zip(calls, o.calls):
496 handler, name, args, kwds = got
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000497 self.assertEqual((handler, name), expected)
498 self.assertEqual(args, (req,))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000499
500 def test_handler_order(self):
501 o = OpenerDirector()
502 handlers = []
503 for meths, handler_order in [
504 ([("http_open", "return self")], 500),
505 (["http_open"], 0),
506 ]:
507 class MockHandlerSubclass(MockHandler): pass
508 h = MockHandlerSubclass(meths)
509 h.handler_order = handler_order
510 handlers.append(h)
511 o.add_handler(h)
512
513 r = o.open("http://example.com/")
514 # handlers called in reverse order, thanks to their sort order
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000515 self.assertEqual(o.calls[0][0], handlers[1])
516 self.assertEqual(o.calls[1][0], handlers[0])
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000517
518 def test_raise(self):
519 # raising URLError stops processing of request
520 o = OpenerDirector()
521 meth_spec = [
522 [("http_open", "raise")],
523 [("http_open", "return self")],
524 ]
525 handlers = add_ordered_mock_handlers(o, meth_spec)
526
527 req = Request("http://example.com/")
528 self.assertRaises(urllib2.URLError, o.open, req)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000529 self.assertEqual(o.calls, [(handlers[0], "http_open", (req,), {})])
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000530
531## def test_error(self):
532## # XXX this doesn't actually seem to be used in standard library,
533## # but should really be tested anyway...
534
535 def test_http_error(self):
536 # XXX http_error_default
537 # http errors are a special case
538 o = OpenerDirector()
539 meth_spec = [
540 [("http_open", "error 302")],
541 [("http_error_400", "raise"), "http_open"],
542 [("http_error_302", "return response"), "http_error_303",
543 "http_error"],
544 [("http_error_302")],
545 ]
546 handlers = add_ordered_mock_handlers(o, meth_spec)
547
548 class Unknown:
549 def __eq__(self, other): return True
550
551 req = Request("http://example.com/")
552 r = o.open(req)
553 assert len(o.calls) == 2
554 calls = [(handlers[0], "http_open", (req,)),
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000555 (handlers[2], "http_error_302",
556 (req, Unknown(), 302, "", {}))]
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000557 for expected, got in zip(calls, o.calls):
558 handler, method_name, args = expected
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000559 self.assertEqual((handler, method_name), got[:2])
560 self.assertEqual(args, got[2])
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000561
562 def test_processors(self):
563 # *_request / *_response methods get called appropriately
564 o = OpenerDirector()
565 meth_spec = [
566 [("http_request", "return request"),
567 ("http_response", "return response")],
568 [("http_request", "return request"),
569 ("http_response", "return response")],
570 ]
571 handlers = add_ordered_mock_handlers(o, meth_spec)
572
573 req = Request("http://example.com/")
574 r = o.open(req)
575 # processor methods are called on *all* handlers that define them,
576 # not just the first handler that handles the request
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000577 calls = [
578 (handlers[0], "http_request"), (handlers[1], "http_request"),
579 (handlers[0], "http_response"), (handlers[1], "http_response")]
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000580
581 for i, (handler, name, args, kwds) in enumerate(o.calls):
582 if i < 2:
583 # *_request
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000584 self.assertEqual((handler, name), calls[i])
585 self.assertEqual(len(args), 1)
Ezio Melottib0f5adc2010-01-24 16:58:36 +0000586 self.assertIsInstance(args[0], Request)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000587 else:
588 # *_response
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000589 self.assertEqual((handler, name), calls[i])
590 self.assertEqual(len(args), 2)
Ezio Melottib0f5adc2010-01-24 16:58:36 +0000591 self.assertIsInstance(args[0], Request)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000592 # response from opener.open is None, because there's no
593 # handler that defines http_open to handle it
Benjamin Peterson5c8da862009-06-30 22:57:08 +0000594 self.assertTrue(args[1] is None or
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000595 isinstance(args[1], MockResponse))
596
597
Tim Peters58eb11c2004-01-18 20:29:55 +0000598def sanepathname2url(path):
599 import urllib
600 urlpath = urllib.pathname2url(path)
601 if os.name == "nt" and urlpath.startswith("///"):
602 urlpath = urlpath[2:]
603 # XXX don't ask me about the mac...
604 return urlpath
605
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000606class HandlerTests(unittest.TestCase):
607
608 def test_ftp(self):
609 class MockFTPWrapper:
610 def __init__(self, data): self.data = data
611 def retrfile(self, filename, filetype):
612 self.filename, self.filetype = filename, filetype
613 return StringIO.StringIO(self.data), len(self.data)
614
615 class NullFTPHandler(urllib2.FTPHandler):
616 def __init__(self, data): self.data = data
Facundo Batista4f1b1ed2008-05-29 16:39:26 +0000617 def connect_ftp(self, user, passwd, host, port, dirs,
618 timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000619 self.user, self.passwd = user, passwd
620 self.host, self.port = host, port
621 self.dirs = dirs
622 self.ftpwrapper = MockFTPWrapper(self.data)
623 return self.ftpwrapper
624
Facundo Batista4f1b1ed2008-05-29 16:39:26 +0000625 import ftplib
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000626 data = "rheum rhaponicum"
627 h = NullFTPHandler(data)
628 o = h.parent = MockOpener()
629
Senthil Kumaran9fce5512010-11-20 11:24:08 +0000630 for url, host, port, user, passwd, type_, dirs, filename, mimetype in [
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000631 ("ftp://localhost/foo/bar/baz.html",
Senthil Kumaran9fce5512010-11-20 11:24:08 +0000632 "localhost", ftplib.FTP_PORT, "", "", "I",
633 ["foo", "bar"], "baz.html", "text/html"),
634 ("ftp://parrot@localhost/foo/bar/baz.html",
635 "localhost", ftplib.FTP_PORT, "parrot", "", "I",
636 ["foo", "bar"], "baz.html", "text/html"),
637 ("ftp://%25parrot@localhost/foo/bar/baz.html",
638 "localhost", ftplib.FTP_PORT, "%parrot", "", "I",
639 ["foo", "bar"], "baz.html", "text/html"),
640 ("ftp://%2542parrot@localhost/foo/bar/baz.html",
641 "localhost", ftplib.FTP_PORT, "%42parrot", "", "I",
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000642 ["foo", "bar"], "baz.html", "text/html"),
Kurt B. Kaiser3f7cb5d2004-07-11 17:14:13 +0000643 ("ftp://localhost:80/foo/bar/",
Senthil Kumaran9fce5512010-11-20 11:24:08 +0000644 "localhost", 80, "", "", "D",
Kurt B. Kaiser3f7cb5d2004-07-11 17:14:13 +0000645 ["foo", "bar"], "", None),
646 ("ftp://localhost/baz.gif;type=a",
Senthil Kumaran9fce5512010-11-20 11:24:08 +0000647 "localhost", ftplib.FTP_PORT, "", "", "A",
Kurt B. Kaiser3f7cb5d2004-07-11 17:14:13 +0000648 [], "baz.gif", None), # XXX really this should guess image/gif
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000649 ]:
Facundo Batista10951d52007-06-06 17:15:23 +0000650 req = Request(url)
651 req.timeout = None
652 r = h.ftp_open(req)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000653 # ftp authentication not yet implemented by FTPHandler
Senthil Kumaran9fce5512010-11-20 11:24:08 +0000654 self.assertEqual(h.user, user)
655 self.assertEqual(h.passwd, passwd)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000656 self.assertEqual(h.host, socket.gethostbyname(host))
657 self.assertEqual(h.port, port)
658 self.assertEqual(h.dirs, dirs)
659 self.assertEqual(h.ftpwrapper.filename, filename)
660 self.assertEqual(h.ftpwrapper.filetype, type_)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000661 headers = r.info()
Kurt B. Kaiser3f7cb5d2004-07-11 17:14:13 +0000662 self.assertEqual(headers.get("Content-type"), mimetype)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000663 self.assertEqual(int(headers["Content-length"]), len(data))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000664
665 def test_file(self):
Christian Heimesc5f05e42008-02-23 17:40:11 +0000666 import rfc822, socket
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000667 h = urllib2.FileHandler()
668 o = h.parent = MockOpener()
669
Tim Peters58eb11c2004-01-18 20:29:55 +0000670 TESTFN = test_support.TESTFN
671 urlpath = sanepathname2url(os.path.abspath(TESTFN))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000672 towrite = "hello, world\n"
Georg Brandldd2245f2006-03-31 17:18:06 +0000673 urls = [
Tim Peters58eb11c2004-01-18 20:29:55 +0000674 "file://localhost%s" % urlpath,
675 "file://%s" % urlpath,
676 "file://%s%s" % (socket.gethostbyname('localhost'), urlpath),
Georg Brandldd2245f2006-03-31 17:18:06 +0000677 ]
678 try:
Tim Peters480725d2006-04-03 02:46:44 +0000679 localaddr = socket.gethostbyname(socket.gethostname())
Georg Brandldd2245f2006-03-31 17:18:06 +0000680 except socket.gaierror:
681 localaddr = ''
682 if localaddr:
683 urls.append("file://%s%s" % (localaddr, urlpath))
Tim Peters480725d2006-04-03 02:46:44 +0000684
Georg Brandldd2245f2006-03-31 17:18:06 +0000685 for url in urls:
Tim Peters58eb11c2004-01-18 20:29:55 +0000686 f = open(TESTFN, "wb")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000687 try:
688 try:
689 f.write(towrite)
690 finally:
691 f.close()
692
693 r = h.file_open(Request(url))
694 try:
695 data = r.read()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000696 headers = r.info()
Senthil Kumaran6057ba12010-05-08 03:11:50 +0000697 respurl = r.geturl()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000698 finally:
699 r.close()
Tim Peters58eb11c2004-01-18 20:29:55 +0000700 stats = os.stat(TESTFN)
701 modified = rfc822.formatdate(stats.st_mtime)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000702 finally:
703 os.remove(TESTFN)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000704 self.assertEqual(data, towrite)
705 self.assertEqual(headers["Content-type"], "text/plain")
706 self.assertEqual(headers["Content-length"], "13")
Tim Peters58eb11c2004-01-18 20:29:55 +0000707 self.assertEqual(headers["Last-modified"], modified)
Senthil Kumaran6057ba12010-05-08 03:11:50 +0000708 self.assertEqual(respurl, url)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000709
710 for url in [
Tim Peters58eb11c2004-01-18 20:29:55 +0000711 "file://localhost:80%s" % urlpath,
Georg Brandlceede5c2007-03-13 08:14:27 +0000712 "file:///file_does_not_exist.txt",
713 "file://%s:80%s/%s" % (socket.gethostbyname('localhost'),
714 os.getcwd(), TESTFN),
715 "file://somerandomhost.ontheinternet.com%s/%s" %
716 (os.getcwd(), TESTFN),
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000717 ]:
718 try:
Tim Peters58eb11c2004-01-18 20:29:55 +0000719 f = open(TESTFN, "wb")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000720 try:
721 f.write(towrite)
722 finally:
723 f.close()
724
725 self.assertRaises(urllib2.URLError,
726 h.file_open, Request(url))
727 finally:
728 os.remove(TESTFN)
729
730 h = urllib2.FileHandler()
731 o = h.parent = MockOpener()
732 # XXXX why does // mean ftp (and /// mean not ftp!), and where
733 # is file: scheme specified? I think this is really a bug, and
734 # what was intended was to distinguish between URLs like:
735 # file:/blah.txt (a file)
736 # file://localhost/blah.txt (a file)
737 # file:///blah.txt (a file)
738 # file://ftp.example.com/blah.txt (an ftp URL)
739 for url, ftp in [
740 ("file://ftp.example.com//foo.txt", True),
741 ("file://ftp.example.com///foo.txt", False),
742# XXXX bug: fails with OSError, should be URLError
743 ("file://ftp.example.com/foo.txt", False),
Senthil Kumaran87ed31a2010-07-11 03:18:51 +0000744 ("file://somehost//foo/something.txt", True),
745 ("file://localhost//foo/something.txt", False),
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000746 ]:
747 req = Request(url)
748 try:
749 h.file_open(req)
750 # XXXX remove OSError when bug fixed
751 except (urllib2.URLError, OSError):
Benjamin Peterson5c8da862009-06-30 22:57:08 +0000752 self.assertTrue(not ftp)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000753 else:
Benjamin Peterson5c8da862009-06-30 22:57:08 +0000754 self.assertTrue(o.req is req)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000755 self.assertEqual(req.type, "ftp")
Benjamin Peterson98104272011-01-12 19:27:17 +0000756 self.assertEqual(req.type == "ftp", ftp)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000757
758 def test_http(self):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000759
760 h = urllib2.AbstractHTTPHandler()
761 o = h.parent = MockOpener()
762
763 url = "http://example.com/"
764 for method, data in [("GET", None), ("POST", "blah")]:
765 req = Request(url, data, {"Foo": "bar"})
Facundo Batista10951d52007-06-06 17:15:23 +0000766 req.timeout = None
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000767 req.add_unredirected_header("Spam", "eggs")
768 http = MockHTTPClass()
769 r = h.do_open(http, req)
770
771 # result attributes
772 r.read; r.readline # wrapped MockFile methods
773 r.info; r.geturl # addinfourl methods
774 r.code, r.msg == 200, "OK" # added from MockHTTPClass.getreply()
775 hdrs = r.info()
776 hdrs.get; hdrs.has_key # r.info() gives dict from .getreply()
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000777 self.assertEqual(r.geturl(), url)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000778
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000779 self.assertEqual(http.host, "example.com")
780 self.assertEqual(http.level, 0)
781 self.assertEqual(http.method, method)
782 self.assertEqual(http.selector, "/")
783 self.assertEqual(http.req_headers,
Jeremy Hyltonb3ee6f92004-02-24 19:40:35 +0000784 [("Connection", "close"),
785 ("Foo", "bar"), ("Spam", "eggs")])
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000786 self.assertEqual(http.data, data)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000787
788 # check socket.error converted to URLError
789 http.raise_on_endheaders = True
790 self.assertRaises(urllib2.URLError, h.do_open, http, req)
791
792 # check adding of standard headers
793 o.addheaders = [("Spam", "eggs")]
794 for data in "", None: # POST, GET
795 req = Request("http://example.com/", data)
796 r = MockResponse(200, "OK", {}, "")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000797 newreq = h.do_request_(req)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000798 if data is None: # GET
Ezio Melottiaa980582010-01-23 23:04:36 +0000799 self.assertNotIn("Content-length", req.unredirected_hdrs)
800 self.assertNotIn("Content-type", req.unredirected_hdrs)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000801 else: # POST
Georg Brandl8c036cc2006-08-20 13:15:39 +0000802 self.assertEqual(req.unredirected_hdrs["Content-length"], "0")
803 self.assertEqual(req.unredirected_hdrs["Content-type"],
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000804 "application/x-www-form-urlencoded")
805 # XXX the details of Host could be better tested
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000806 self.assertEqual(req.unredirected_hdrs["Host"], "example.com")
807 self.assertEqual(req.unredirected_hdrs["Spam"], "eggs")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000808
809 # don't clobber existing headers
810 req.add_unredirected_header("Content-length", "foo")
811 req.add_unredirected_header("Content-type", "bar")
812 req.add_unredirected_header("Host", "baz")
813 req.add_unredirected_header("Spam", "foo")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000814 newreq = h.do_request_(req)
Georg Brandl8c036cc2006-08-20 13:15:39 +0000815 self.assertEqual(req.unredirected_hdrs["Content-length"], "foo")
816 self.assertEqual(req.unredirected_hdrs["Content-type"], "bar")
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000817 self.assertEqual(req.unredirected_hdrs["Host"], "baz")
818 self.assertEqual(req.unredirected_hdrs["Spam"], "foo")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000819
Facundo Batistaeb90b782008-08-16 14:44:07 +0000820 def test_http_doubleslash(self):
821 # Checks that the presence of an unnecessary double slash in a url doesn't break anything
822 # Previously, a double slash directly after the host could cause incorrect parsing of the url
823 h = urllib2.AbstractHTTPHandler()
824 o = h.parent = MockOpener()
825
826 data = ""
827 ds_urls = [
828 "http://example.com/foo/bar/baz.html",
829 "http://example.com//foo/bar/baz.html",
830 "http://example.com/foo//bar/baz.html",
831 "http://example.com/foo/bar//baz.html",
832 ]
833
834 for ds_url in ds_urls:
835 ds_req = Request(ds_url, data)
836
837 # Check whether host is determined correctly if there is no proxy
838 np_ds_req = h.do_request_(ds_req)
839 self.assertEqual(np_ds_req.unredirected_hdrs["Host"],"example.com")
840
841 # Check whether host is determined correctly if there is a proxy
842 ds_req.set_proxy("someproxy:3128",None)
843 p_ds_req = h.do_request_(ds_req)
844 self.assertEqual(p_ds_req.unredirected_hdrs["Host"],"example.com")
845
Senthil Kumaran0b7cac12010-11-22 05:04:33 +0000846 def test_fixpath_in_weirdurls(self):
847 # Issue4493: urllib2 to supply '/' when to urls where path does not
848 # start with'/'
849
850 h = urllib2.AbstractHTTPHandler()
851 o = h.parent = MockOpener()
852
853 weird_url = 'http://www.python.org?getspam'
854 req = Request(weird_url)
855 newreq = h.do_request_(req)
856 self.assertEqual(newreq.get_host(),'www.python.org')
857 self.assertEqual(newreq.get_selector(),'/?getspam')
858
859 url_without_path = 'http://www.python.org'
860 req = Request(url_without_path)
861 newreq = h.do_request_(req)
862 self.assertEqual(newreq.get_host(),'www.python.org')
863 self.assertEqual(newreq.get_selector(),'')
864
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000865 def test_errors(self):
866 h = urllib2.HTTPErrorProcessor()
867 o = h.parent = MockOpener()
868
869 url = "http://example.com/"
870 req = Request(url)
Facundo Batista9fab9f12007-04-23 17:08:31 +0000871 # all 2xx are passed through
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000872 r = MockResponse(200, "OK", {}, "", url)
873 newr = h.http_response(req, r)
Benjamin Peterson5c8da862009-06-30 22:57:08 +0000874 self.assertTrue(r is newr)
875 self.assertTrue(not hasattr(o, "proto")) # o.error not called
Facundo Batista9fab9f12007-04-23 17:08:31 +0000876 r = MockResponse(202, "Accepted", {}, "", url)
877 newr = h.http_response(req, r)
Benjamin Peterson5c8da862009-06-30 22:57:08 +0000878 self.assertTrue(r is newr)
879 self.assertTrue(not hasattr(o, "proto")) # o.error not called
Facundo Batista9fab9f12007-04-23 17:08:31 +0000880 r = MockResponse(206, "Partial content", {}, "", url)
881 newr = h.http_response(req, r)
Benjamin Peterson5c8da862009-06-30 22:57:08 +0000882 self.assertTrue(r is newr)
883 self.assertTrue(not hasattr(o, "proto")) # o.error not called
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000884 # anything else calls o.error (and MockOpener returns None, here)
Facundo Batista9fab9f12007-04-23 17:08:31 +0000885 r = MockResponse(502, "Bad gateway", {}, "", url)
Benjamin Peterson5c8da862009-06-30 22:57:08 +0000886 self.assertTrue(h.http_response(req, r) is None)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000887 self.assertEqual(o.proto, "http") # o.error called
Facundo Batista9fab9f12007-04-23 17:08:31 +0000888 self.assertEqual(o.args, (req, r, 502, "Bad gateway", {}))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000889
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000890 def test_cookies(self):
891 cj = MockCookieJar()
892 h = urllib2.HTTPCookieProcessor(cj)
893 o = h.parent = MockOpener()
894
895 req = Request("http://example.com/")
896 r = MockResponse(200, "OK", {}, "")
897 newreq = h.http_request(req)
Benjamin Peterson5c8da862009-06-30 22:57:08 +0000898 self.assertTrue(cj.ach_req is req is newreq)
Ezio Melotti2623a372010-11-21 13:34:58 +0000899 self.assertEqual(req.get_origin_req_host(), "example.com")
Benjamin Peterson5c8da862009-06-30 22:57:08 +0000900 self.assertTrue(not req.is_unverifiable())
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000901 newr = h.http_response(req, r)
Benjamin Peterson5c8da862009-06-30 22:57:08 +0000902 self.assertTrue(cj.ec_req is req)
903 self.assertTrue(cj.ec_r is r is newr)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000904
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000905 def test_redirect(self):
906 from_url = "http://example.com/a.html"
907 to_url = "http://example.com/b.html"
908 h = urllib2.HTTPRedirectHandler()
909 o = h.parent = MockOpener()
910
911 # ordinary redirect behaviour
912 for code in 301, 302, 303, 307:
913 for data in None, "blah\nblah\n":
914 method = getattr(h, "http_error_%s" % code)
915 req = Request(from_url, data)
916 req.add_header("Nonsense", "viking=withhold")
Senthil Kumaran5fee4602009-07-19 02:43:43 +0000917 req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
Facundo Batista86371d62008-02-07 19:06:52 +0000918 if data is not None:
919 req.add_header("Content-Length", str(len(data)))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000920 req.add_unredirected_header("Spam", "spam")
921 try:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000922 method(req, MockFile(), code, "Blah",
923 MockHeaders({"location": to_url}))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000924 except urllib2.HTTPError:
925 # 307 in response to POST requires user OK
Benjamin Peterson5c8da862009-06-30 22:57:08 +0000926 self.assertTrue(code == 307 and data is not None)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000927 self.assertEqual(o.req.get_full_url(), to_url)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000928 try:
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000929 self.assertEqual(o.req.get_method(), "GET")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000930 except AttributeError:
Benjamin Peterson5c8da862009-06-30 22:57:08 +0000931 self.assertTrue(not o.req.has_data())
Facundo Batista86371d62008-02-07 19:06:52 +0000932
933 # now it's a GET, there should not be headers regarding content
934 # (possibly dragged from before being a POST)
935 headers = [x.lower() for x in o.req.headers]
Ezio Melottiaa980582010-01-23 23:04:36 +0000936 self.assertNotIn("content-length", headers)
937 self.assertNotIn("content-type", headers)
Facundo Batista86371d62008-02-07 19:06:52 +0000938
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000939 self.assertEqual(o.req.headers["Nonsense"],
940 "viking=withhold")
Ezio Melottiaa980582010-01-23 23:04:36 +0000941 self.assertNotIn("Spam", o.req.headers)
942 self.assertNotIn("Spam", o.req.unredirected_hdrs)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000943
944 # loop detection
945 req = Request(from_url)
Senthil Kumaran5fee4602009-07-19 02:43:43 +0000946 req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000947 def redirect(h, req, url=to_url):
948 h.http_error_302(req, MockFile(), 302, "Blah",
949 MockHeaders({"location": url}))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000950 # Note that the *original* request shares the same record of
951 # redirections with the sub-requests caused by the redirections.
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000952
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000953 # detect infinite loop redirect of a URL to itself
954 req = Request(from_url, origin_req_host="example.com")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000955 count = 0
Senthil Kumaran5fee4602009-07-19 02:43:43 +0000956 req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000957 try:
958 while 1:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000959 redirect(h, req, "http://example.com/")
960 count = count + 1
961 except urllib2.HTTPError:
962 # don't stop until max_repeats, because cookies may introduce state
963 self.assertEqual(count, urllib2.HTTPRedirectHandler.max_repeats)
964
965 # detect endless non-repeating chain of redirects
966 req = Request(from_url, origin_req_host="example.com")
967 count = 0
Senthil Kumaran5fee4602009-07-19 02:43:43 +0000968 req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000969 try:
970 while 1:
971 redirect(h, req, "http://example.com/%d" % count)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000972 count = count + 1
973 except urllib2.HTTPError:
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000974 self.assertEqual(count,
975 urllib2.HTTPRedirectHandler.max_redirections)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000976
guido@google.comf1509302011-03-28 13:47:01 -0700977 def test_invalid_redirect(self):
978 from_url = "http://example.com/a.html"
979 valid_schemes = ['http', 'https', 'ftp']
980 invalid_schemes = ['file', 'imap', 'ldap']
981 schemeless_url = "example.com/b.html"
982 h = urllib2.HTTPRedirectHandler()
983 o = h.parent = MockOpener()
984 req = Request(from_url)
guido@google.com9a9fdfa2011-03-29 10:48:23 -0700985 req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
guido@google.comf1509302011-03-28 13:47:01 -0700986
987 for scheme in invalid_schemes:
988 invalid_url = scheme + '://' + schemeless_url
989 self.assertRaises(urllib2.HTTPError, h.http_error_302,
990 req, MockFile(), 302, "Security Loophole",
991 MockHeaders({"location": invalid_url}))
992
993 for scheme in valid_schemes:
994 valid_url = scheme + '://' + schemeless_url
995 h.http_error_302(req, MockFile(), 302, "That's fine",
996 MockHeaders({"location": valid_url}))
997 self.assertEqual(o.req.get_full_url(), valid_url)
998
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000999 def test_cookie_redirect(self):
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001000 # cookies shouldn't leak into redirected requests
1001 from cookielib import CookieJar
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001002
Neal Norwitzb902f4e2006-04-03 04:45:34 +00001003 from test.test_cookielib import interact_netscape
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001004
1005 cj = CookieJar()
1006 interact_netscape(cj, "http://www.example.com/", "spam=eggs")
Georg Brandlfa42bd72006-04-30 07:06:11 +00001007 hh = MockHTTPHandler(302, "Location: http://www.cracker.com/\r\n\r\n")
1008 hdeh = urllib2.HTTPDefaultErrorHandler()
1009 hrh = urllib2.HTTPRedirectHandler()
1010 cp = urllib2.HTTPCookieProcessor(cj)
1011 o = build_test_opener(hh, hdeh, hrh, cp)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001012 o.open("http://www.example.com/")
Benjamin Peterson5c8da862009-06-30 22:57:08 +00001013 self.assertTrue(not hh.req.has_header("Cookie"))
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001014
Senthil Kumaran49c44082011-04-13 07:31:45 +08001015 def test_redirect_fragment(self):
1016 redirected_url = 'http://www.example.com/index.html#OK\r\n\r\n'
1017 hh = MockHTTPHandler(302, 'Location: ' + redirected_url)
1018 hdeh = urllib2.HTTPDefaultErrorHandler()
1019 hrh = urllib2.HTTPRedirectHandler()
1020 o = build_test_opener(hh, hdeh, hrh)
1021 fp = o.open('http://www.example.com')
1022 self.assertEqual(fp.geturl(), redirected_url.strip())
1023
Georg Brandl720096a2006-04-02 20:45:34 +00001024 def test_proxy(self):
1025 o = OpenerDirector()
1026 ph = urllib2.ProxyHandler(dict(http="proxy.example.com:3128"))
1027 o.add_handler(ph)
1028 meth_spec = [
1029 [("http_open", "return response")]
1030 ]
1031 handlers = add_ordered_mock_handlers(o, meth_spec)
1032
1033 req = Request("http://acme.example.com/")
1034 self.assertEqual(req.get_host(), "acme.example.com")
1035 r = o.open(req)
1036 self.assertEqual(req.get_host(), "proxy.example.com:3128")
1037
1038 self.assertEqual([(handlers[0], "http_open")],
1039 [tup[0:2] for tup in o.calls])
1040
Senthil Kumaran27468662009-10-11 02:00:07 +00001041 def test_proxy_no_proxy(self):
1042 os.environ['no_proxy'] = 'python.org'
1043 o = OpenerDirector()
1044 ph = urllib2.ProxyHandler(dict(http="proxy.example.com"))
1045 o.add_handler(ph)
1046 req = Request("http://www.perl.org/")
1047 self.assertEqual(req.get_host(), "www.perl.org")
1048 r = o.open(req)
1049 self.assertEqual(req.get_host(), "proxy.example.com")
1050 req = Request("http://www.python.org")
1051 self.assertEqual(req.get_host(), "www.python.org")
1052 r = o.open(req)
1053 self.assertEqual(req.get_host(), "www.python.org")
1054 del os.environ['no_proxy']
1055
1056
Senthil Kumarane266f252009-05-24 09:14:50 +00001057 def test_proxy_https(self):
1058 o = OpenerDirector()
1059 ph = urllib2.ProxyHandler(dict(https='proxy.example.com:3128'))
1060 o.add_handler(ph)
1061 meth_spec = [
1062 [("https_open","return response")]
1063 ]
1064 handlers = add_ordered_mock_handlers(o, meth_spec)
1065 req = Request("https://www.example.com/")
1066 self.assertEqual(req.get_host(), "www.example.com")
1067 r = o.open(req)
1068 self.assertEqual(req.get_host(), "proxy.example.com:3128")
1069 self.assertEqual([(handlers[0], "https_open")],
1070 [tup[0:2] for tup in o.calls])
1071
Senthil Kumaran7713acf2009-12-20 06:05:13 +00001072 def test_proxy_https_proxy_authorization(self):
1073 o = OpenerDirector()
1074 ph = urllib2.ProxyHandler(dict(https='proxy.example.com:3128'))
1075 o.add_handler(ph)
1076 https_handler = MockHTTPSHandler()
1077 o.add_handler(https_handler)
1078 req = Request("https://www.example.com/")
1079 req.add_header("Proxy-Authorization","FooBar")
1080 req.add_header("User-Agent","Grail")
1081 self.assertEqual(req.get_host(), "www.example.com")
1082 self.assertIsNone(req._tunnel_host)
1083 r = o.open(req)
1084 # Verify Proxy-Authorization gets tunneled to request.
1085 # httpsconn req_headers do not have the Proxy-Authorization header but
1086 # the req will have.
Ezio Melottiaa980582010-01-23 23:04:36 +00001087 self.assertNotIn(("Proxy-Authorization","FooBar"),
Senthil Kumaran7713acf2009-12-20 06:05:13 +00001088 https_handler.httpconn.req_headers)
Ezio Melottiaa980582010-01-23 23:04:36 +00001089 self.assertIn(("User-Agent","Grail"),
1090 https_handler.httpconn.req_headers)
Senthil Kumaran7713acf2009-12-20 06:05:13 +00001091 self.assertIsNotNone(req._tunnel_host)
1092 self.assertEqual(req.get_host(), "proxy.example.com:3128")
1093 self.assertEqual(req.get_header("Proxy-authorization"),"FooBar")
1094
Georg Brandl33124322008-03-21 19:54:00 +00001095 def test_basic_auth(self, quote_char='"'):
Georg Brandlfa42bd72006-04-30 07:06:11 +00001096 opener = OpenerDirector()
1097 password_manager = MockPasswordManager()
1098 auth_handler = urllib2.HTTPBasicAuthHandler(password_manager)
1099 realm = "ACME Widget Store"
1100 http_handler = MockHTTPHandler(
Georg Brandl33124322008-03-21 19:54:00 +00001101 401, 'WWW-Authenticate: Basic realm=%s%s%s\r\n\r\n' %
1102 (quote_char, realm, quote_char) )
Georg Brandl261e2512006-05-29 20:52:54 +00001103 opener.add_handler(auth_handler)
1104 opener.add_handler(http_handler)
Georg Brandlfa42bd72006-04-30 07:06:11 +00001105 self._test_basic_auth(opener, auth_handler, "Authorization",
1106 realm, http_handler, password_manager,
1107 "http://acme.example.com/protected",
1108 "http://acme.example.com/protected",
1109 )
1110
Georg Brandl33124322008-03-21 19:54:00 +00001111 def test_basic_auth_with_single_quoted_realm(self):
1112 self.test_basic_auth(quote_char="'")
1113
Georg Brandlfa42bd72006-04-30 07:06:11 +00001114 def test_proxy_basic_auth(self):
1115 opener = OpenerDirector()
1116 ph = urllib2.ProxyHandler(dict(http="proxy.example.com:3128"))
1117 opener.add_handler(ph)
1118 password_manager = MockPasswordManager()
1119 auth_handler = urllib2.ProxyBasicAuthHandler(password_manager)
1120 realm = "ACME Networks"
1121 http_handler = MockHTTPHandler(
1122 407, 'Proxy-Authenticate: Basic realm="%s"\r\n\r\n' % realm)
Georg Brandl261e2512006-05-29 20:52:54 +00001123 opener.add_handler(auth_handler)
1124 opener.add_handler(http_handler)
Georg Brandl8c036cc2006-08-20 13:15:39 +00001125 self._test_basic_auth(opener, auth_handler, "Proxy-authorization",
Georg Brandlfa42bd72006-04-30 07:06:11 +00001126 realm, http_handler, password_manager,
1127 "http://acme.example.com:3128/protected",
1128 "proxy.example.com:3128",
1129 )
1130
Georg Brandlb5f2e5c2006-05-08 17:36:08 +00001131 def test_basic_and_digest_auth_handlers(self):
1132 # HTTPDigestAuthHandler threw an exception if it couldn't handle a 40*
1133 # response (http://python.org/sf/1479302), where it should instead
1134 # return None to allow another handler (especially
1135 # HTTPBasicAuthHandler) to handle the response.
Georg Brandl261e2512006-05-29 20:52:54 +00001136
1137 # Also (http://python.org/sf/14797027, RFC 2617 section 1.2), we must
1138 # try digest first (since it's the strongest auth scheme), so we record
1139 # order of calls here to check digest comes first:
1140 class RecordingOpenerDirector(OpenerDirector):
1141 def __init__(self):
1142 OpenerDirector.__init__(self)
1143 self.recorded = []
1144 def record(self, info):
1145 self.recorded.append(info)
Georg Brandlb5f2e5c2006-05-08 17:36:08 +00001146 class TestDigestAuthHandler(urllib2.HTTPDigestAuthHandler):
Georg Brandl261e2512006-05-29 20:52:54 +00001147 def http_error_401(self, *args, **kwds):
1148 self.parent.record("digest")
1149 urllib2.HTTPDigestAuthHandler.http_error_401(self,
1150 *args, **kwds)
1151 class TestBasicAuthHandler(urllib2.HTTPBasicAuthHandler):
1152 def http_error_401(self, *args, **kwds):
1153 self.parent.record("basic")
1154 urllib2.HTTPBasicAuthHandler.http_error_401(self,
1155 *args, **kwds)
1156
1157 opener = RecordingOpenerDirector()
Georg Brandlb5f2e5c2006-05-08 17:36:08 +00001158 password_manager = MockPasswordManager()
1159 digest_handler = TestDigestAuthHandler(password_manager)
Georg Brandl261e2512006-05-29 20:52:54 +00001160 basic_handler = TestBasicAuthHandler(password_manager)
Georg Brandlb5f2e5c2006-05-08 17:36:08 +00001161 realm = "ACME Networks"
1162 http_handler = MockHTTPHandler(
1163 401, 'WWW-Authenticate: Basic realm="%s"\r\n\r\n' % realm)
Georg Brandl261e2512006-05-29 20:52:54 +00001164 opener.add_handler(basic_handler)
1165 opener.add_handler(digest_handler)
1166 opener.add_handler(http_handler)
1167
1168 # check basic auth isn't blocked by digest handler failing
Georg Brandlb5f2e5c2006-05-08 17:36:08 +00001169 self._test_basic_auth(opener, basic_handler, "Authorization",
1170 realm, http_handler, password_manager,
1171 "http://acme.example.com/protected",
1172 "http://acme.example.com/protected",
1173 )
Georg Brandl261e2512006-05-29 20:52:54 +00001174 # check digest was tried before basic (twice, because
1175 # _test_basic_auth called .open() twice)
1176 self.assertEqual(opener.recorded, ["digest", "basic"]*2)
Georg Brandlb5f2e5c2006-05-08 17:36:08 +00001177
Georg Brandlfa42bd72006-04-30 07:06:11 +00001178 def _test_basic_auth(self, opener, auth_handler, auth_header,
1179 realm, http_handler, password_manager,
1180 request_url, protected_url):
Christian Heimesc5f05e42008-02-23 17:40:11 +00001181 import base64
Georg Brandlfa42bd72006-04-30 07:06:11 +00001182 user, password = "wile", "coyote"
Georg Brandlfa42bd72006-04-30 07:06:11 +00001183
1184 # .add_password() fed through to password manager
1185 auth_handler.add_password(realm, request_url, user, password)
1186 self.assertEqual(realm, password_manager.realm)
1187 self.assertEqual(request_url, password_manager.url)
1188 self.assertEqual(user, password_manager.user)
1189 self.assertEqual(password, password_manager.password)
1190
1191 r = opener.open(request_url)
1192
1193 # should have asked the password manager for the username/password
1194 self.assertEqual(password_manager.target_realm, realm)
1195 self.assertEqual(password_manager.target_url, protected_url)
1196
1197 # expect one request without authorization, then one with
1198 self.assertEqual(len(http_handler.requests), 2)
1199 self.assertFalse(http_handler.requests[0].has_header(auth_header))
1200 userpass = '%s:%s' % (user, password)
1201 auth_hdr_value = 'Basic '+base64.encodestring(userpass).strip()
1202 self.assertEqual(http_handler.requests[1].get_header(auth_header),
1203 auth_hdr_value)
Senthil Kumaran8526adf2010-02-24 16:45:46 +00001204 self.assertEqual(http_handler.requests[1].unredirected_hdrs[auth_header],
1205 auth_hdr_value)
Georg Brandlfa42bd72006-04-30 07:06:11 +00001206 # if the password manager can't find a password, the handler won't
1207 # handle the HTTP auth error
1208 password_manager.user = password_manager.password = None
1209 http_handler.reset()
1210 r = opener.open(request_url)
1211 self.assertEqual(len(http_handler.requests), 1)
1212 self.assertFalse(http_handler.requests[0].has_header(auth_header))
1213
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001214class MiscTests(unittest.TestCase):
1215
1216 def test_build_opener(self):
1217 class MyHTTPHandler(urllib2.HTTPHandler): pass
1218 class FooHandler(urllib2.BaseHandler):
1219 def foo_open(self): pass
1220 class BarHandler(urllib2.BaseHandler):
1221 def bar_open(self): pass
1222
1223 build_opener = urllib2.build_opener
1224
1225 o = build_opener(FooHandler, BarHandler)
1226 self.opener_has_handler(o, FooHandler)
1227 self.opener_has_handler(o, BarHandler)
1228
1229 # can take a mix of classes and instances
1230 o = build_opener(FooHandler, BarHandler())
1231 self.opener_has_handler(o, FooHandler)
1232 self.opener_has_handler(o, BarHandler)
1233
1234 # subclasses of default handlers override default handlers
1235 o = build_opener(MyHTTPHandler)
1236 self.opener_has_handler(o, MyHTTPHandler)
1237
1238 # a particular case of overriding: default handlers can be passed
1239 # in explicitly
1240 o = build_opener()
1241 self.opener_has_handler(o, urllib2.HTTPHandler)
1242 o = build_opener(urllib2.HTTPHandler)
1243 self.opener_has_handler(o, urllib2.HTTPHandler)
1244 o = build_opener(urllib2.HTTPHandler())
1245 self.opener_has_handler(o, urllib2.HTTPHandler)
1246
Amaury Forgeot d'Arc96865852008-04-22 21:14:41 +00001247 # Issue2670: multiple handlers sharing the same base class
1248 class MyOtherHTTPHandler(urllib2.HTTPHandler): pass
1249 o = build_opener(MyHTTPHandler, MyOtherHTTPHandler)
1250 self.opener_has_handler(o, MyHTTPHandler)
1251 self.opener_has_handler(o, MyOtherHTTPHandler)
1252
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001253 def opener_has_handler(self, opener, handler_class):
1254 for h in opener.handlers:
1255 if h.__class__ == handler_class:
1256 break
1257 else:
Benjamin Peterson5c8da862009-06-30 22:57:08 +00001258 self.assertTrue(False)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001259
Jeremy Hylton1868d7c2008-12-09 21:03:10 +00001260class RequestTests(unittest.TestCase):
1261
1262 def setUp(self):
1263 self.get = urllib2.Request("http://www.python.org/~jeremy/")
1264 self.post = urllib2.Request("http://www.python.org/~jeremy/",
1265 "data",
1266 headers={"X-Test": "test"})
1267
1268 def test_method(self):
1269 self.assertEqual("POST", self.post.get_method())
1270 self.assertEqual("GET", self.get.get_method())
1271
1272 def test_add_data(self):
Benjamin Peterson5c8da862009-06-30 22:57:08 +00001273 self.assertTrue(not self.get.has_data())
Jeremy Hylton1868d7c2008-12-09 21:03:10 +00001274 self.assertEqual("GET", self.get.get_method())
1275 self.get.add_data("spam")
Benjamin Peterson5c8da862009-06-30 22:57:08 +00001276 self.assertTrue(self.get.has_data())
Jeremy Hylton1868d7c2008-12-09 21:03:10 +00001277 self.assertEqual("POST", self.get.get_method())
1278
1279 def test_get_full_url(self):
1280 self.assertEqual("http://www.python.org/~jeremy/",
1281 self.get.get_full_url())
1282
1283 def test_selector(self):
1284 self.assertEqual("/~jeremy/", self.get.get_selector())
1285 req = urllib2.Request("http://www.python.org/")
1286 self.assertEqual("/", req.get_selector())
1287
1288 def test_get_type(self):
1289 self.assertEqual("http", self.get.get_type())
1290
1291 def test_get_host(self):
1292 self.assertEqual("www.python.org", self.get.get_host())
1293
1294 def test_get_host_unquote(self):
1295 req = urllib2.Request("http://www.%70ython.org/")
1296 self.assertEqual("www.python.org", req.get_host())
1297
1298 def test_proxy(self):
Benjamin Peterson5c8da862009-06-30 22:57:08 +00001299 self.assertTrue(not self.get.has_proxy())
Jeremy Hylton1868d7c2008-12-09 21:03:10 +00001300 self.get.set_proxy("www.perl.org", "http")
Benjamin Peterson5c8da862009-06-30 22:57:08 +00001301 self.assertTrue(self.get.has_proxy())
Jeremy Hylton1868d7c2008-12-09 21:03:10 +00001302 self.assertEqual("www.python.org", self.get.get_origin_req_host())
1303 self.assertEqual("www.perl.org", self.get.get_host())
1304
Senthil Kumaranb4ec7ee2010-08-08 11:43:45 +00001305 def test_wrapped_url(self):
1306 req = Request("<URL:http://www.python.org>")
1307 self.assertEqual("www.python.org", req.get_host())
1308
Senthil Kumaran49c44082011-04-13 07:31:45 +08001309 def test_url_fragment(self):
Senthil Kumaranb4ec7ee2010-08-08 11:43:45 +00001310 req = Request("http://www.python.org/?qs=query#fragment=true")
1311 self.assertEqual("/?qs=query", req.get_selector())
1312 req = Request("http://www.python.org/#fun=true")
1313 self.assertEqual("/", req.get_selector())
1314
Senthil Kumaran49c44082011-04-13 07:31:45 +08001315 # Issue 11703: geturl() omits fragment in the original URL.
1316 url = 'http://docs.python.org/library/urllib2.html#OK'
1317 req = Request(url)
1318 self.assertEqual(req.get_full_url(), url)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001319
1320def test_main(verbose=None):
Georg Brandlfa42bd72006-04-30 07:06:11 +00001321 from test import test_urllib2
1322 test_support.run_doctest(test_urllib2, verbose)
Georg Brandl720096a2006-04-02 20:45:34 +00001323 test_support.run_doctest(urllib2, verbose)
Andrew M. Kuchlingbd3200f2004-06-29 13:15:46 +00001324 tests = (TrivialTests,
1325 OpenerDirectorTests,
1326 HandlerTests,
Jeremy Hylton1868d7c2008-12-09 21:03:10 +00001327 MiscTests,
1328 RequestTests)
Andrew M. Kuchlingbd3200f2004-06-29 13:15:46 +00001329 test_support.run_unittest(*tests)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001330
1331if __name__ == "__main__":
1332 test_main(verbose=True)