bpo-42967: only use '&' as a query string separator (#24297)
bpo-42967: [security] Address a web cache-poisoning issue reported in urllib.parse.parse_qsl().
urllib.parse will only us "&" as query string separator by default instead of both ";" and "&" as allowed in earlier versions. An optional argument seperator with default value "&" is added to specify the separator.
Co-authored-by: Éric Araujo <merwok@netwok.org>
Co-authored-by: blurb-it[bot] <43283697+blurb-it[bot]@users.noreply.github.com>
Co-authored-by: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com>
Co-authored-by: Éric Araujo <merwok@netwok.org>
diff --git a/Lib/test/test_cgi.py b/Lib/test/test_cgi.py
index 6b29759..239d975 100644
--- a/Lib/test/test_cgi.py
+++ b/Lib/test/test_cgi.py
@@ -53,12 +53,9 @@ def do_test(buf, method):
("", ValueError("bad query field: ''")),
("&", ValueError("bad query field: ''")),
("&&", ValueError("bad query field: ''")),
- (";", ValueError("bad query field: ''")),
- (";&;", ValueError("bad query field: ''")),
# Should the next few really be valid?
("=", {}),
("=&=", {}),
- ("=;=", {}),
# This rest seem to make sense
("=a", {'': ['a']}),
("&=a", ValueError("bad query field: ''")),
@@ -73,8 +70,6 @@ def do_test(buf, method):
("a=a+b&b=b+c", {'a': ['a b'], 'b': ['b c']}),
("a=a+b&a=b+a", {'a': ['a b', 'b a']}),
("x=1&y=2.0&z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}),
- ("x=1;y=2.0&z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}),
- ("x=1;y=2.0;z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}),
("Hbc5161168c542333633315dee1182227:key_store_seqid=400006&cuyer=r&view=bustomer&order_id=0bb2e248638833d48cb7fed300000f1b&expire=964546263&lobale=en-US&kid=130003.300038&ss=env",
{'Hbc5161168c542333633315dee1182227:key_store_seqid': ['400006'],
'cuyer': ['r'],
@@ -201,6 +196,30 @@ def test_strict(self):
else:
self.assertEqual(fs.getvalue(key), expect_val[0])
+ def test_separator(self):
+ parse_semicolon = [
+ ("x=1;y=2.0", {'x': ['1'], 'y': ['2.0']}),
+ ("x=1;y=2.0;z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}),
+ (";", ValueError("bad query field: ''")),
+ (";;", ValueError("bad query field: ''")),
+ ("=;a", ValueError("bad query field: 'a'")),
+ (";b=a", ValueError("bad query field: ''")),
+ ("b;=a", ValueError("bad query field: 'b'")),
+ ("a=a+b;b=b+c", {'a': ['a b'], 'b': ['b c']}),
+ ("a=a+b;a=b+a", {'a': ['a b', 'b a']}),
+ ]
+ for orig, expect in parse_semicolon:
+ env = {'QUERY_STRING': orig}
+ fs = cgi.FieldStorage(separator=';', environ=env)
+ if isinstance(expect, dict):
+ for key in expect.keys():
+ expect_val = expect[key]
+ self.assertIn(key, fs)
+ if len(expect_val) > 1:
+ self.assertEqual(fs.getvalue(key), expect_val)
+ else:
+ self.assertEqual(fs.getvalue(key), expect_val[0])
+
def test_log(self):
cgi.log("Testing")
diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py
index 7625007..3b1c360 100644
--- a/Lib/test/test_urlparse.py
+++ b/Lib/test/test_urlparse.py
@@ -32,16 +32,10 @@
(b"&a=b", [(b'a', b'b')]),
(b"a=a+b&b=b+c", [(b'a', b'a b'), (b'b', b'b c')]),
(b"a=1&a=2", [(b'a', b'1'), (b'a', b'2')]),
- (";", []),
- (";;", []),
- (";a=b", [('a', 'b')]),
- ("a=a+b;b=b+c", [('a', 'a b'), ('b', 'b c')]),
- ("a=1;a=2", [('a', '1'), ('a', '2')]),
- (b";", []),
- (b";;", []),
- (b";a=b", [(b'a', b'b')]),
- (b"a=a+b;b=b+c", [(b'a', b'a b'), (b'b', b'b c')]),
- (b"a=1;a=2", [(b'a', b'1'), (b'a', b'2')]),
+ (";a=b", [(';a', 'b')]),
+ ("a=a+b;b=b+c", [('a', 'a b;b=b c')]),
+ (b";a=b", [(b';a', b'b')]),
+ (b"a=a+b;b=b+c", [(b'a', b'a b;b=b c')]),
]
# Each parse_qs testcase is a two-tuple that contains
@@ -68,16 +62,10 @@
(b"&a=b", {b'a': [b'b']}),
(b"a=a+b&b=b+c", {b'a': [b'a b'], b'b': [b'b c']}),
(b"a=1&a=2", {b'a': [b'1', b'2']}),
- (";", {}),
- (";;", {}),
- (";a=b", {'a': ['b']}),
- ("a=a+b;b=b+c", {'a': ['a b'], 'b': ['b c']}),
- ("a=1;a=2", {'a': ['1', '2']}),
- (b";", {}),
- (b";;", {}),
- (b";a=b", {b'a': [b'b']}),
- (b"a=a+b;b=b+c", {b'a': [b'a b'], b'b': [b'b c']}),
- (b"a=1;a=2", {b'a': [b'1', b'2']}),
+ (";a=b", {';a': ['b']}),
+ ("a=a+b;b=b+c", {'a': ['a b;b=b c']}),
+ (b";a=b", {b';a': [b'b']}),
+ (b"a=a+b;b=b+c", {b'a':[ b'a b;b=b c']}),
]
class UrlParseTestCase(unittest.TestCase):
@@ -886,10 +874,46 @@ def test_parse_qsl_encoding(self):
def test_parse_qsl_max_num_fields(self):
with self.assertRaises(ValueError):
urllib.parse.parse_qs('&'.join(['a=a']*11), max_num_fields=10)
- with self.assertRaises(ValueError):
- urllib.parse.parse_qs(';'.join(['a=a']*11), max_num_fields=10)
urllib.parse.parse_qs('&'.join(['a=a']*10), max_num_fields=10)
+ def test_parse_qs_separator(self):
+ parse_qs_semicolon_cases = [
+ (";", {}),
+ (";;", {}),
+ (";a=b", {'a': ['b']}),
+ ("a=a+b;b=b+c", {'a': ['a b'], 'b': ['b c']}),
+ ("a=1;a=2", {'a': ['1', '2']}),
+ (b";", {}),
+ (b";;", {}),
+ (b";a=b", {b'a': [b'b']}),
+ (b"a=a+b;b=b+c", {b'a': [b'a b'], b'b': [b'b c']}),
+ (b"a=1;a=2", {b'a': [b'1', b'2']}),
+ ]
+ for orig, expect in parse_qs_semicolon_cases:
+ with self.subTest(f"Original: {orig!r}, Expected: {expect!r}"):
+ result = urllib.parse.parse_qs(orig, separator=';')
+ self.assertEqual(result, expect, "Error parsing %r" % orig)
+
+
+ def test_parse_qsl_separator(self):
+ parse_qsl_semicolon_cases = [
+ (";", []),
+ (";;", []),
+ (";a=b", [('a', 'b')]),
+ ("a=a+b;b=b+c", [('a', 'a b'), ('b', 'b c')]),
+ ("a=1;a=2", [('a', '1'), ('a', '2')]),
+ (b";", []),
+ (b";;", []),
+ (b";a=b", [(b'a', b'b')]),
+ (b"a=a+b;b=b+c", [(b'a', b'a b'), (b'b', b'b c')]),
+ (b"a=1;a=2", [(b'a', b'1'), (b'a', b'2')]),
+ ]
+ for orig, expect in parse_qsl_semicolon_cases:
+ with self.subTest(f"Original: {orig!r}, Expected: {expect!r}"):
+ result = urllib.parse.parse_qsl(orig, separator=';')
+ self.assertEqual(result, expect, "Error parsing %r" % orig)
+
+
def test_urlencode_sequences(self):
# Other tests incidentally urlencode things; test non-covered cases:
# Sequence and object values.