blob: 6b29759da44d01bc2abad8d73242b7d7bb1f660d [file] [log] [blame]
import cgi
import os
import sys
import tempfile
import unittest
from collections import namedtuple
from io import StringIO, BytesIO
from test import support
class HackedSysModule:
# The regression test will have real values in sys.argv, which
# will completely confuse the test of the cgi module
argv = []
stdin = sys.stdin
cgi.sys = HackedSysModule()
class ComparableException:
def __init__(self, err):
self.err = err
def __str__(self):
return str(self.err)
def __eq__(self, anExc):
if not isinstance(anExc, Exception):
return NotImplemented
return (self.err.__class__ == anExc.__class__ and
self.err.args == anExc.args)
def __getattr__(self, attr):
return getattr(self.err, attr)
def do_test(buf, method):
env = {}
if method == "GET":
fp = None
env['REQUEST_METHOD'] = 'GET'
env['QUERY_STRING'] = buf
elif method == "POST":
fp = BytesIO(buf.encode('latin-1')) # FieldStorage expects bytes
env['REQUEST_METHOD'] = 'POST'
env['CONTENT_TYPE'] = 'application/x-www-form-urlencoded'
env['CONTENT_LENGTH'] = str(len(buf))
else:
raise ValueError("unknown method: %s" % method)
try:
return cgi.parse(fp, env, strict_parsing=1)
except Exception as err:
return ComparableException(err)
parse_strict_test_cases = [
("", ValueError("bad query field: ''")),
("&", ValueError("bad query field: ''")),
("&&", ValueError("bad query field: ''")),
(";", ValueError("bad query field: ''")),
(";&;", ValueError("bad query field: ''")),
# Should the next few really be valid?
("=", {}),
("=&=", {}),
("=;=", {}),
# This rest seem to make sense
("=a", {'': ['a']}),
("&=a", ValueError("bad query field: ''")),
("=a&", ValueError("bad query field: ''")),
("=&a", ValueError("bad query field: 'a'")),
("b=a", {'b': ['a']}),
("b+=a", {'b ': ['a']}),
("a=b=a", {'a': ['b=a']}),
("a=+b=a", {'a': [' b=a']}),
("&b=a", ValueError("bad query field: ''")),
("b&=a", ValueError("bad query field: 'b'")),
("a=a+b&b=b+c", {'a': ['a b'], 'b': ['b c']}),
("a=a+b&a=b+a", {'a': ['a b', 'b a']}),
("x=1&y=2.0&z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}),
("x=1;y=2.0&z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}),
("x=1;y=2.0;z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}),
("Hbc5161168c542333633315dee1182227:key_store_seqid=400006&cuyer=r&view=bustomer&order_id=0bb2e248638833d48cb7fed300000f1b&expire=964546263&lobale=en-US&kid=130003.300038&ss=env",
{'Hbc5161168c542333633315dee1182227:key_store_seqid': ['400006'],
'cuyer': ['r'],
'expire': ['964546263'],
'kid': ['130003.300038'],
'lobale': ['en-US'],
'order_id': ['0bb2e248638833d48cb7fed300000f1b'],
'ss': ['env'],
'view': ['bustomer'],
}),
("group_id=5470&set=custom&_assigned_to=31392&_status=1&_category=100&SUBMIT=Browse",
{'SUBMIT': ['Browse'],
'_assigned_to': ['31392'],
'_category': ['100'],
'_status': ['1'],
'group_id': ['5470'],
'set': ['custom'],
})
]
def norm(seq):
return sorted(seq, key=repr)
def first_elts(list):
return [p[0] for p in list]
def first_second_elts(list):
return [(p[0], p[1][0]) for p in list]
def gen_result(data, environ):
encoding = 'latin-1'
fake_stdin = BytesIO(data.encode(encoding))
fake_stdin.seek(0)
form = cgi.FieldStorage(fp=fake_stdin, environ=environ, encoding=encoding)
result = {}
for k, v in dict(form).items():
result[k] = isinstance(v, list) and form.getlist(k) or v.value
return result
class CgiTests(unittest.TestCase):
def test_parse_multipart(self):
fp = BytesIO(POSTDATA.encode('latin1'))
env = {'boundary': BOUNDARY.encode('latin1'),
'CONTENT-LENGTH': '558'}
result = cgi.parse_multipart(fp, env)
expected = {'submit': [' Add '], 'id': ['1234'],
'file': [b'Testing 123.\n'], 'title': ['']}
self.assertEqual(result, expected)
def test_parse_multipart_without_content_length(self):
POSTDATA = '''--JfISa01
Content-Disposition: form-data; name="submit-name"
just a string
--JfISa01--
'''
fp = BytesIO(POSTDATA.encode('latin1'))
env = {'boundary': 'JfISa01'.encode('latin1')}
result = cgi.parse_multipart(fp, env)
expected = {'submit-name': ['just a string\n']}
self.assertEqual(result, expected)
def test_parse_multipart_invalid_encoding(self):
BOUNDARY = "JfISa01"
POSTDATA = """--JfISa01
Content-Disposition: form-data; name="submit-name"
Content-Length: 3
\u2603
--JfISa01"""
fp = BytesIO(POSTDATA.encode('utf8'))
env = {'boundary': BOUNDARY.encode('latin1'),
'CONTENT-LENGTH': str(len(POSTDATA.encode('utf8')))}
result = cgi.parse_multipart(fp, env, encoding="ascii",
errors="surrogateescape")
expected = {'submit-name': ["\udce2\udc98\udc83"]}
self.assertEqual(result, expected)
self.assertEqual("\u2603".encode('utf8'),
result["submit-name"][0].encode('utf8', 'surrogateescape'))
def test_fieldstorage_properties(self):
fs = cgi.FieldStorage()
self.assertFalse(fs)
self.assertIn("FieldStorage", repr(fs))
self.assertEqual(list(fs), list(fs.keys()))
fs.list.append(namedtuple('MockFieldStorage', 'name')('fieldvalue'))
self.assertTrue(fs)
def test_fieldstorage_invalid(self):
self.assertRaises(TypeError, cgi.FieldStorage, "not-a-file-obj",
environ={"REQUEST_METHOD":"PUT"})
self.assertRaises(TypeError, cgi.FieldStorage, "foo", "bar")
fs = cgi.FieldStorage(headers={'content-type':'text/plain'})
self.assertRaises(TypeError, bool, fs)
def test_strict(self):
for orig, expect in parse_strict_test_cases:
# Test basic parsing
d = do_test(orig, "GET")
self.assertEqual(d, expect, "Error parsing %s method GET" % repr(orig))
d = do_test(orig, "POST")
self.assertEqual(d, expect, "Error parsing %s method POST" % repr(orig))
env = {'QUERY_STRING': orig}
fs = cgi.FieldStorage(environ=env)
if isinstance(expect, dict):
# test dict interface
self.assertEqual(len(expect), len(fs))
self.assertCountEqual(expect.keys(), fs.keys())
##self.assertEqual(norm(expect.values()), norm(fs.values()))
##self.assertEqual(norm(expect.items()), norm(fs.items()))
self.assertEqual(fs.getvalue("nonexistent field", "default"), "default")
# test individual fields
for key in expect.keys():
expect_val = expect[key]
self.assertIn(key, fs)
if len(expect_val) > 1:
self.assertEqual(fs.getvalue(key), expect_val)
else:
self.assertEqual(fs.getvalue(key), expect_val[0])
def test_log(self):
cgi.log("Testing")
cgi.logfp = StringIO()
cgi.initlog("%s", "Testing initlog 1")
cgi.log("%s", "Testing log 2")
self.assertEqual(cgi.logfp.getvalue(), "Testing initlog 1\nTesting log 2\n")
if os.path.exists(os.devnull):
cgi.logfp = None
cgi.logfile = os.devnull
cgi.initlog("%s", "Testing log 3")
self.addCleanup(cgi.closelog)
cgi.log("Testing log 4")
def test_fieldstorage_readline(self):
# FieldStorage uses readline, which has the capacity to read all
# contents of the input file into memory; we use readline's size argument
# to prevent that for files that do not contain any newlines in
# non-GET/HEAD requests
class TestReadlineFile:
def __init__(self, file):
self.file = file
self.numcalls = 0
def readline(self, size=None):
self.numcalls += 1
if size:
return self.file.readline(size)
else:
return self.file.readline()
def __getattr__(self, name):
file = self.__dict__['file']
a = getattr(file, name)
if not isinstance(a, int):
setattr(self, name, a)
return a
f = TestReadlineFile(tempfile.TemporaryFile("wb+"))
self.addCleanup(f.close)
f.write(b'x' * 256 * 1024)
f.seek(0)
env = {'REQUEST_METHOD':'PUT'}
fs = cgi.FieldStorage(fp=f, environ=env)
self.addCleanup(fs.file.close)
# if we're not chunking properly, readline is only called twice
# (by read_binary); if we are chunking properly, it will be called 5 times
# as long as the chunksize is 1 << 16.
self.assertGreater(f.numcalls, 2)
f.close()
def test_fieldstorage_multipart(self):
#Test basic FieldStorage multipart parsing
env = {
'REQUEST_METHOD': 'POST',
'CONTENT_TYPE': 'multipart/form-data; boundary={}'.format(BOUNDARY),
'CONTENT_LENGTH': '558'}
fp = BytesIO(POSTDATA.encode('latin-1'))
fs = cgi.FieldStorage(fp, environ=env, encoding="latin-1")
self.assertEqual(len(fs.list), 4)
expect = [{'name':'id', 'filename':None, 'value':'1234'},
{'name':'title', 'filename':None, 'value':''},
{'name':'file', 'filename':'test.txt', 'value':b'Testing 123.\n'},
{'name':'submit', 'filename':None, 'value':' Add '}]
for x in range(len(fs.list)):
for k, exp in expect[x].items():
got = getattr(fs.list[x], k)
self.assertEqual(got, exp)
def test_fieldstorage_multipart_leading_whitespace(self):
env = {
'REQUEST_METHOD': 'POST',
'CONTENT_TYPE': 'multipart/form-data; boundary={}'.format(BOUNDARY),
'CONTENT_LENGTH': '560'}
# Add some leading whitespace to our post data that will cause the
# first line to not be the innerboundary.
fp = BytesIO(b"\r\n" + POSTDATA.encode('latin-1'))
fs = cgi.FieldStorage(fp, environ=env, encoding="latin-1")
self.assertEqual(len(fs.list), 4)
expect = [{'name':'id', 'filename':None, 'value':'1234'},
{'name':'title', 'filename':None, 'value':''},
{'name':'file', 'filename':'test.txt', 'value':b'Testing 123.\n'},
{'name':'submit', 'filename':None, 'value':' Add '}]
for x in range(len(fs.list)):
for k, exp in expect[x].items():
got = getattr(fs.list[x], k)
self.assertEqual(got, exp)
def test_fieldstorage_multipart_non_ascii(self):
#Test basic FieldStorage multipart parsing
env = {'REQUEST_METHOD':'POST',
'CONTENT_TYPE': 'multipart/form-data; boundary={}'.format(BOUNDARY),
'CONTENT_LENGTH':'558'}
for encoding in ['iso-8859-1','utf-8']:
fp = BytesIO(POSTDATA_NON_ASCII.encode(encoding))
fs = cgi.FieldStorage(fp, environ=env,encoding=encoding)
self.assertEqual(len(fs.list), 1)
expect = [{'name':'id', 'filename':None, 'value':'\xe7\xf1\x80'}]
for x in range(len(fs.list)):
for k, exp in expect[x].items():
got = getattr(fs.list[x], k)
self.assertEqual(got, exp)
def test_fieldstorage_multipart_maxline(self):
# Issue #18167
maxline = 1 << 16
self.maxDiff = None
def check(content):
data = """---123
Content-Disposition: form-data; name="upload"; filename="fake.txt"
Content-Type: text/plain
%s
---123--
""".replace('\n', '\r\n') % content
environ = {
'CONTENT_LENGTH': str(len(data)),
'CONTENT_TYPE': 'multipart/form-data; boundary=-123',
'REQUEST_METHOD': 'POST',
}
self.assertEqual(gen_result(data, environ),
{'upload': content.encode('latin1')})
check('x' * (maxline - 1))
check('x' * (maxline - 1) + '\r')
check('x' * (maxline - 1) + '\r' + 'y' * (maxline - 1))
def test_fieldstorage_multipart_w3c(self):
# Test basic FieldStorage multipart parsing (W3C sample)
env = {
'REQUEST_METHOD': 'POST',
'CONTENT_TYPE': 'multipart/form-data; boundary={}'.format(BOUNDARY_W3),
'CONTENT_LENGTH': str(len(POSTDATA_W3))}
fp = BytesIO(POSTDATA_W3.encode('latin-1'))
fs = cgi.FieldStorage(fp, environ=env, encoding="latin-1")
self.assertEqual(len(fs.list), 2)
self.assertEqual(fs.list[0].name, 'submit-name')
self.assertEqual(fs.list[0].value, 'Larry')
self.assertEqual(fs.list[1].name, 'files')
files = fs.list[1].value
self.assertEqual(len(files), 2)
expect = [{'name': None, 'filename': 'file1.txt', 'value': b'... contents of file1.txt ...'},
{'name': None, 'filename': 'file2.gif', 'value': b'...contents of file2.gif...'}]
for x in range(len(files)):
for k, exp in expect[x].items():
got = getattr(files[x], k)
self.assertEqual(got, exp)
def test_fieldstorage_part_content_length(self):
BOUNDARY = "JfISa01"
POSTDATA = """--JfISa01
Content-Disposition: form-data; name="submit-name"
Content-Length: 5
Larry
--JfISa01"""
env = {
'REQUEST_METHOD': 'POST',
'CONTENT_TYPE': 'multipart/form-data; boundary={}'.format(BOUNDARY),
'CONTENT_LENGTH': str(len(POSTDATA))}
fp = BytesIO(POSTDATA.encode('latin-1'))
fs = cgi.FieldStorage(fp, environ=env, encoding="latin-1")
self.assertEqual(len(fs.list), 1)
self.assertEqual(fs.list[0].name, 'submit-name')
self.assertEqual(fs.list[0].value, 'Larry')
def test_field_storage_multipart_no_content_length(self):
fp = BytesIO(b"""--MyBoundary
Content-Disposition: form-data; name="my-arg"; filename="foo"
Test
--MyBoundary--
""")
env = {
"REQUEST_METHOD": "POST",
"CONTENT_TYPE": "multipart/form-data; boundary=MyBoundary",
"wsgi.input": fp,
}
fields = cgi.FieldStorage(fp, environ=env)
self.assertEqual(len(fields["my-arg"].file.read()), 5)
def test_fieldstorage_as_context_manager(self):
fp = BytesIO(b'x' * 10)
env = {'REQUEST_METHOD': 'PUT'}
with cgi.FieldStorage(fp=fp, environ=env) as fs:
content = fs.file.read()
self.assertFalse(fs.file.closed)
self.assertTrue(fs.file.closed)
self.assertEqual(content, 'x' * 10)
with self.assertRaisesRegex(ValueError, 'I/O operation on closed file'):
fs.file.read()
_qs_result = {
'key1': 'value1',
'key2': ['value2x', 'value2y'],
'key3': 'value3',
'key4': 'value4'
}
def testQSAndUrlEncode(self):
data = "key2=value2x&key3=value3&key4=value4"
environ = {
'CONTENT_LENGTH': str(len(data)),
'CONTENT_TYPE': 'application/x-www-form-urlencoded',
'QUERY_STRING': 'key1=value1&key2=value2y',
'REQUEST_METHOD': 'POST',
}
v = gen_result(data, environ)
self.assertEqual(self._qs_result, v)
def test_max_num_fields(self):
# For application/x-www-form-urlencoded
data = '&'.join(['a=a']*11)
environ = {
'CONTENT_LENGTH': str(len(data)),
'CONTENT_TYPE': 'application/x-www-form-urlencoded',
'REQUEST_METHOD': 'POST',
}
with self.assertRaises(ValueError):
cgi.FieldStorage(
fp=BytesIO(data.encode()),
environ=environ,
max_num_fields=10,
)
# For multipart/form-data
data = """---123
Content-Disposition: form-data; name="a"
3
---123
Content-Type: application/x-www-form-urlencoded
a=4
---123
Content-Type: application/x-www-form-urlencoded
a=5
---123--
"""
environ = {
'CONTENT_LENGTH': str(len(data)),
'CONTENT_TYPE': 'multipart/form-data; boundary=-123',
'QUERY_STRING': 'a=1&a=2',
'REQUEST_METHOD': 'POST',
}
# 2 GET entities
# 1 top level POST entities
# 1 entity within the second POST entity
# 1 entity within the third POST entity
with self.assertRaises(ValueError):
cgi.FieldStorage(
fp=BytesIO(data.encode()),
environ=environ,
max_num_fields=4,
)
cgi.FieldStorage(
fp=BytesIO(data.encode()),
environ=environ,
max_num_fields=5,
)
def testQSAndFormData(self):
data = """---123
Content-Disposition: form-data; name="key2"
value2y
---123
Content-Disposition: form-data; name="key3"
value3
---123
Content-Disposition: form-data; name="key4"
value4
---123--
"""
environ = {
'CONTENT_LENGTH': str(len(data)),
'CONTENT_TYPE': 'multipart/form-data; boundary=-123',
'QUERY_STRING': 'key1=value1&key2=value2x',
'REQUEST_METHOD': 'POST',
}
v = gen_result(data, environ)
self.assertEqual(self._qs_result, v)
def testQSAndFormDataFile(self):
data = """---123
Content-Disposition: form-data; name="key2"
value2y
---123
Content-Disposition: form-data; name="key3"
value3
---123
Content-Disposition: form-data; name="key4"
value4
---123
Content-Disposition: form-data; name="upload"; filename="fake.txt"
Content-Type: text/plain
this is the content of the fake file
---123--
"""
environ = {
'CONTENT_LENGTH': str(len(data)),
'CONTENT_TYPE': 'multipart/form-data; boundary=-123',
'QUERY_STRING': 'key1=value1&key2=value2x',
'REQUEST_METHOD': 'POST',
}
result = self._qs_result.copy()
result.update({
'upload': b'this is the content of the fake file\n'
})
v = gen_result(data, environ)
self.assertEqual(result, v)
def test_parse_header(self):
self.assertEqual(
cgi.parse_header("text/plain"),
("text/plain", {}))
self.assertEqual(
cgi.parse_header("text/vnd.just.made.this.up ; "),
("text/vnd.just.made.this.up", {}))
self.assertEqual(
cgi.parse_header("text/plain;charset=us-ascii"),
("text/plain", {"charset": "us-ascii"}))
self.assertEqual(
cgi.parse_header('text/plain ; charset="us-ascii"'),
("text/plain", {"charset": "us-ascii"}))
self.assertEqual(
cgi.parse_header('text/plain ; charset="us-ascii"; another=opt'),
("text/plain", {"charset": "us-ascii", "another": "opt"}))
self.assertEqual(
cgi.parse_header('attachment; filename="silly.txt"'),
("attachment", {"filename": "silly.txt"}))
self.assertEqual(
cgi.parse_header('attachment; filename="strange;name"'),
("attachment", {"filename": "strange;name"}))
self.assertEqual(
cgi.parse_header('attachment; filename="strange;name";size=123;'),
("attachment", {"filename": "strange;name", "size": "123"}))
self.assertEqual(
cgi.parse_header('form-data; name="files"; filename="fo\\"o;bar"'),
("form-data", {"name": "files", "filename": 'fo"o;bar'}))
def test_all(self):
not_exported = {
"logfile", "logfp", "initlog", "dolog", "nolog", "closelog", "log",
"maxlen", "valid_boundary"}
support.check__all__(self, cgi, not_exported=not_exported)
BOUNDARY = "---------------------------721837373350705526688164684"
POSTDATA = """-----------------------------721837373350705526688164684
Content-Disposition: form-data; name="id"
1234
-----------------------------721837373350705526688164684
Content-Disposition: form-data; name="title"
-----------------------------721837373350705526688164684
Content-Disposition: form-data; name="file"; filename="test.txt"
Content-Type: text/plain
Testing 123.
-----------------------------721837373350705526688164684
Content-Disposition: form-data; name="submit"
Add\x20
-----------------------------721837373350705526688164684--
"""
POSTDATA_NON_ASCII = """-----------------------------721837373350705526688164684
Content-Disposition: form-data; name="id"
\xe7\xf1\x80
-----------------------------721837373350705526688164684
"""
# http://www.w3.org/TR/html401/interact/forms.html#h-17.13.4
BOUNDARY_W3 = "AaB03x"
POSTDATA_W3 = """--AaB03x
Content-Disposition: form-data; name="submit-name"
Larry
--AaB03x
Content-Disposition: form-data; name="files"
Content-Type: multipart/mixed; boundary=BbC04y
--BbC04y
Content-Disposition: file; filename="file1.txt"
Content-Type: text/plain
... contents of file1.txt ...
--BbC04y
Content-Disposition: file; filename="file2.gif"
Content-Type: image/gif
Content-Transfer-Encoding: binary
...contents of file2.gif...
--BbC04y--
--AaB03x--
"""
if __name__ == '__main__':
unittest.main()