urllib: Simplify splithost by calling into urlparse. (#1849)
The current regex based splitting produces a wrong result. For example::
http://abc#@def
Web browsers parse that URL as ``http://abc/#@def``, that is, the host
is ``abc``, the path is ``/``, and the fragment is ``#@def``.
diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py
index 1af2906..01eb549 100644
--- a/Lib/urllib/parse.py
+++ b/Lib/urllib/parse.py
@@ -947,7 +947,7 @@
"""splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
global _hostprog
if _hostprog is None:
- _hostprog = re.compile('//([^/?]*)(.*)', re.DOTALL)
+ _hostprog = re.compile('//([^/#?]*)(.*)', re.DOTALL)
match = _hostprog.match(url)
if match: