Issue 10484 - Incorporate improvements to CGI module - Suggested by Glenn Linderman. Refactor code and tests
diff --git a/Lib/CGIHTTPServer.py b/Lib/CGIHTTPServer.py
index 61f34f8..47a994c 100644
--- a/Lib/CGIHTTPServer.py
+++ b/Lib/CGIHTTPServer.py
@@ -84,10 +84,9 @@
path begins with one of the strings in self.cgi_directories
(and the next character is a '/' or the end of the string).
"""
- splitpath = _url_collapse_path_split(self.path)
- joined_path = '/'.join(splitpath)
- dir_sep = joined_path.find('/', 1)
- head, tail = joined_path[:dir_sep], joined_path[dir_sep+1:]
+ collapsed_path = _url_collapse_path(self.path)
+ dir_sep = collapsed_path.find('/', 1)
+ head, tail = collapsed_path[:dir_sep], collapsed_path[dir_sep+1:]
if head in self.cgi_directories:
self.cgi_info = head, tail
return True
@@ -301,44 +300,46 @@
self.log_message("CGI script exited OK")
-# TODO(gregory.p.smith): Move this into an appropriate library.
-def _url_collapse_path_split(path):
+def _url_collapse_path(path):
"""
Given a URL path, remove extra '/'s and '.' path elements and collapse
- any '..' references.
+ any '..' references and returns a colllapsed path.
Implements something akin to RFC-2396 5.2 step 6 to parse relative paths.
+ The utility of this function is limited to is_cgi method and helps
+ preventing some security attacks.
Returns: A tuple of (head, tail) where tail is everything after the final /
and head is everything before it. Head will always start with a '/' and,
if it contains anything else, never have a trailing '/'.
Raises: IndexError if too many '..' occur within the path.
+
"""
# Similar to os.path.split(os.path.normpath(path)) but specific to URL
# path semantics rather than local operating system semantics.
- path_parts = []
- for part in path.split('/'):
- if part == '.':
- path_parts.append('')
- else:
- path_parts.append(part)
- # Filter out blank non trailing parts before consuming the '..'.
- path_parts = [part for part in path_parts[:-1] if part] + path_parts[-1:]
+ path_parts = path.split('/')
+ head_parts = []
+ for part in path_parts[:-1]:
+ if part == '..':
+ head_parts.pop() # IndexError if more '..' than prior parts
+ elif part and part != '.':
+ head_parts.append( part )
if path_parts:
tail_part = path_parts.pop()
+ if tail_part:
+ if tail_part == '..':
+ head_parts.pop()
+ tail_part = ''
+ elif tail_part == '.':
+ tail_part = ''
else:
tail_part = ''
- head_parts = []
- for part in path_parts:
- if part == '..':
- head_parts.pop()
- else:
- head_parts.append(part)
- if tail_part and tail_part == '..':
- head_parts.pop()
- tail_part = ''
- return ('/' + '/'.join(head_parts), tail_part)
+
+ splitpath = ('/' + '/'.join(head_parts), tail_part)
+ collapsed_path = "/".join(splitpath)
+
+ return collapsed_path
nobody = None
diff --git a/Lib/test/test_httpservers.py b/Lib/test/test_httpservers.py
index d1b924d..5dcedc0 100644
--- a/Lib/test/test_httpservers.py
+++ b/Lib/test/test_httpservers.py
@@ -4,11 +4,6 @@
Josip Dzolonga, and Michael Otteneder for the 2007/08 GHOP contest.
"""
-from BaseHTTPServer import BaseHTTPRequestHandler, HTTPServer
-from SimpleHTTPServer import SimpleHTTPRequestHandler
-from CGIHTTPServer import CGIHTTPRequestHandler
-import CGIHTTPServer
-
import os
import sys
import re
@@ -17,12 +12,17 @@
import urllib
import httplib
import tempfile
-
import unittest
+import CGIHTTPServer
+
+from BaseHTTPServer import BaseHTTPRequestHandler, HTTPServer
+from SimpleHTTPServer import SimpleHTTPRequestHandler
+from CGIHTTPServer import CGIHTTPRequestHandler
from StringIO import StringIO
-
from test import test_support
+
+
threading = test_support.import_module('threading')
@@ -43,7 +43,7 @@
self.end_headers()
self.wfile.write(b'<html><body>Data</body></html>\r\n')
- def log_message(self, format, *args):
+ def log_message(self, fmt, *args):
pass
@@ -97,9 +97,9 @@
self.handler = SocketlessRequestHandler()
def send_typical_request(self, message):
- input = StringIO(message)
+ input_msg = StringIO(message)
output = StringIO()
- self.handler.rfile = input
+ self.handler.rfile = input_msg
self.handler.wfile = output
self.handler.handle_one_request()
output.seek(0)
@@ -296,7 +296,7 @@
os.chdir(self.cwd)
try:
shutil.rmtree(self.tempdir)
- except:
+ except OSError:
pass
finally:
BaseTestCase.tearDown(self)
@@ -418,42 +418,44 @@
finally:
BaseTestCase.tearDown(self)
- def test_url_collapse_path_split(self):
+ def test_url_collapse_path(self):
# verify tail is the last portion and head is the rest on proper urls
test_vectors = {
- '': ('/', ''),
+ '': '//',
'..': IndexError,
'/.//..': IndexError,
- '/': ('/', ''),
- '//': ('/', ''),
- '/\\': ('/', '\\'),
- '/.//': ('/', ''),
- 'cgi-bin/file1.py': ('/cgi-bin', 'file1.py'),
- '/cgi-bin/file1.py': ('/cgi-bin', 'file1.py'),
- 'a': ('/', 'a'),
- '/a': ('/', 'a'),
- '//a': ('/', 'a'),
- './a': ('/', 'a'),
- './C:/': ('/C:', ''),
- '/a/b': ('/a', 'b'),
- '/a/b/': ('/a/b', ''),
- '/a/b/c/..': ('/a/b', ''),
- '/a/b/c/../d': ('/a/b', 'd'),
- '/a/b/c/../d/e/../f': ('/a/b/d', 'f'),
- '/a/b/c/../d/e/../../f': ('/a/b', 'f'),
- '/a/b/c/../d/e/.././././..//f': ('/a/b', 'f'),
+ '/': '//',
+ '//': '//',
+ '/\\': '//\\',
+ '/.//': '//',
+ 'cgi-bin/file1.py': '/cgi-bin/file1.py',
+ '/cgi-bin/file1.py': '/cgi-bin/file1.py',
+ 'a': '//a',
+ '/a': '//a',
+ '//a': '//a',
+ './a': '//a',
+ './C:/': '/C:/',
+ '/a/b': '/a/b',
+ '/a/b/': '/a/b/',
+ '/a/b/.': '/a/b/',
+ '/a/b/c/..': '/a/b/',
+ '/a/b/c/../d': '/a/b/d',
+ '/a/b/c/../d/e/../f': '/a/b/d/f',
+ '/a/b/c/../d/e/../../f': '/a/b/f',
+ '/a/b/c/../d/e/.././././..//f': '/a/b/f',
'../a/b/c/../d/e/.././././..//f': IndexError,
- '/a/b/c/../d/e/../../../f': ('/a', 'f'),
- '/a/b/c/../d/e/../../../../f': ('/', 'f'),
+ '/a/b/c/../d/e/../../../f': '/a/f',
+ '/a/b/c/../d/e/../../../../f': '//f',
'/a/b/c/../d/e/../../../../../f': IndexError,
- '/a/b/c/../d/e/../../../../f/..': ('/', ''),
+ '/a/b/c/../d/e/../../../../f/..': '//',
+ '/a/b/c/../d/e/../../../../f/../.': '//',
}
for path, expected in test_vectors.iteritems():
if isinstance(expected, type) and issubclass(expected, Exception):
self.assertRaises(expected,
- CGIHTTPServer._url_collapse_path_split, path)
+ CGIHTTPServer._url_collapse_path, path)
else:
- actual = CGIHTTPServer._url_collapse_path_split(path)
+ actual = CGIHTTPServer._url_collapse_path(path)
self.assertEqual(expected, actual,
msg='path = %r\nGot: %r\nWanted: %r' %
(path, actual, expected))