Fix Issue4493 - urllib2 adds '/' to the path component of url, when it does not
starts with one. This behavior is exhibited by browser and other clients.
diff --git a/Lib/test/test_urllib2.py b/Lib/test/test_urllib2.py
index e5e3c39..9cc9697 100644
--- a/Lib/test/test_urllib2.py
+++ b/Lib/test/test_urllib2.py
@@ -848,6 +848,25 @@
p_ds_req = h.do_request_(ds_req)
self.assertEqual(p_ds_req.unredirected_hdrs["Host"],"example.com")
+ def test_fixpath_in_weirdurls(self):
+ # Issue4493: urllib2 to supply '/' when to urls where path does not
+ # start with'/'
+
+ h = urllib.request.AbstractHTTPHandler()
+ o = h.parent = MockOpener()
+
+ weird_url = 'http://www.python.org?getspam'
+ req = Request(weird_url)
+ newreq = h.do_request_(req)
+ self.assertEqual(newreq.host,'www.python.org')
+ self.assertEqual(newreq.selector,'/?getspam')
+
+ url_without_path = 'http://www.python.org'
+ req = Request(url_without_path)
+ newreq = h.do_request_(req)
+ self.assertEqual(newreq.host,'www.python.org')
+ self.assertEqual(newreq.selector,'')
+
def test_errors(self):
h = urllib.request.HTTPErrorProcessor()
diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py
index 2ddd281..78f3084 100644
--- a/Lib/urllib/parse.py
+++ b/Lib/urllib/parse.py
@@ -699,7 +699,12 @@
_hostprog = re.compile('^//([^/?]*)(.*)$')
match = _hostprog.match(url)
- if match: return match.group(1, 2)
+ if match:
+ host_port = match.group(1)
+ path = match.group(2)
+ if path and not path.startswith('/'):
+ path = '/' + path
+ return host_port, path
return None, url
_userprog = None
diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py
index f3fb7be..fe66a67 100644
--- a/Lib/urllib/request.py
+++ b/Lib/urllib/request.py
@@ -105,7 +105,7 @@
# check for SSL
try:
import ssl
-except:
+except ImportError:
_have_ssl = False
else:
_have_ssl = True