Sjoerd Mullender:
File names with "funny" characters get translated wrong by
pathname2url (any variety). E.g. the (Unix) file "/ufs/sjoerd/#tmp"
gets translated into "/ufs/sjoerd/#tmp" which, when interpreted as a
URL is file "/ufs/sjoerd/" with fragment ID "tmp".
Here's an easy fix. (An alternative fix would be to change the
various implementations of pathname2url and url2pathname to include
calls to quote and unquote.
[The main problem is with the normal use of URLs:
url = url2pathname(file)
transmit url
url, tag = splittag(url)
urlopen(url)
]
In addition, this patch fixes some uses of unquote:
- the host part of URLs should be unquoted
- the file path in the FTP URL should be unquoted before it is split
into components.
- because of the latter, I removed all unquoting from ftpwrapper,
and moved it to the caller, but that is not essential
diff --git a/Lib/urllib.py b/Lib/urllib.py
index 62e5c7f..d175eef 100644
--- a/Lib/urllib.py
+++ b/Lib/urllib.py
@@ -42,6 +42,13 @@
def pathname2url(pathname):
return pathname
+_url2pathname = url2pathname
+def url2pathname(url):
+ return _url2pathname(unquote(url))
+_pathname2url = pathname2url
+def pathname2url(p):
+ return quote(_pathname2url(p))
+
# This really consists of two pieces:
# (1) a class which handles opening of all sorts of URLs
# (plus assorted utilities etc.)
@@ -228,6 +235,7 @@
host, selector = splithost(url)
if host:
user_passwd, host = splituser(host)
+ host = unquote(host)
realhost = host
else:
host, selector = url
@@ -298,6 +306,7 @@
import gopherlib
host, selector = splithost(url)
if not host: raise IOError, ('gopher error', 'no host given')
+ host = unquote(host)
type, selector = splitgophertype(selector)
selector, query = splitquery(selector)
selector = unquote(selector)
@@ -329,7 +338,6 @@
host, port = splitport(host)
if not port and socket.gethostbyname(host) in (
localhost(), thishost()):
- file = unquote(file)
return addinfourl(
open(url2pathname(file), 'rb'),
headers, 'file:'+file)
@@ -343,6 +351,9 @@
user, host = splituser(host)
if user: user, passwd = splitpasswd(user)
else: passwd = None
+ host = unquote(host)
+ user = unquote(user or '')
+ passwd = unquote(passwd or '')
host = socket.gethostbyname(host)
if not port:
import ftplib
@@ -350,6 +361,7 @@
else:
port = int(port)
path, attrs = splitattr(path)
+ path = unquote(path)
dirs = string.splitfields(path, '/')
dirs, file = dirs[:-1], dirs[-1]
if dirs and not dirs[0]: dirs = dirs[1:]
@@ -548,13 +560,11 @@
# Class used by open_ftp() for cache of open FTP connections
class ftpwrapper:
def __init__(self, user, passwd, host, port, dirs):
- self.user = unquote(user or '')
- self.passwd = unquote(passwd or '')
+ self.user = user
+ self.passwd = passwd
self.host = host
self.port = port
- self.dirs = []
- for dir in dirs:
- self.dirs.append(unquote(dir))
+ self.dirs = dirs
self.init()
def init(self):
import ftplib