port simplejson upgrade from the trunk #4136

json also now works only with unicode strings

Patch by Antoine Pitrou; updated by me
diff --git a/Lib/json/tests/test_decode.py b/Lib/json/tests/test_decode.py
index 67c4c29..78e7e07 100644
--- a/Lib/json/tests/test_decode.py
+++ b/Lib/json/tests/test_decode.py
@@ -32,3 +32,10 @@
                                     object_pairs_hook = OrderedDict,
                                     object_hook = lambda x: None),
                          OrderedDict(p))
+
+    def test_decoder_optimizations(self):
+        # Several optimizations were made that skip over calls to
+        # the whitespace regex, so this test is designed to try and
+        # exercise the uncommon cases. The array cases are already covered.
+        rval = json.loads('{   "key"    :    "value"    ,  "k":"v"    }')
+        self.assertEquals(rval, {"key":"value", "k":"v"})
diff --git a/Lib/json/tests/test_dump.py b/Lib/json/tests/test_dump.py
index 06a56d7..54e218f 100644
--- a/Lib/json/tests/test_dump.py
+++ b/Lib/json/tests/test_dump.py
@@ -11,3 +11,11 @@
 
     def test_dumps(self):
         self.assertEquals(json.dumps({}), '{}')
+
+    def test_encode_truefalse(self):
+        self.assertEquals(json.dumps(
+                 {True: False, False: True}, sort_keys=True),
+                 '{"false": true, "true": false}')
+        self.assertEquals(json.dumps(
+                {2: 3.0, 4.0: 5, False: 1, 6: True}, sort_keys=True),
+                '{"false": 1, "2": 3.0, "4.0": 5, "6": true}')
diff --git a/Lib/json/tests/test_encode_basestring_ascii.py b/Lib/json/tests/test_encode_basestring_ascii.py
index 4517ae9..68ce7ef 100644
--- a/Lib/json/tests/test_encode_basestring_ascii.py
+++ b/Lib/json/tests/test_encode_basestring_ascii.py
@@ -3,22 +3,20 @@
 import json.encoder
 
 CASES = [
-    ('/\\"\ucafe\ubabe\uab98\ufcde\ubcda\uef4a\x08\x0c\n\r\t`1~!@#$%^&*()_+-=[]{}|;:\',./<>?', b'"/\\\\\\"\\ucafe\\ubabe\\uab98\\ufcde\\ubcda\\uef4a\\b\\f\\n\\r\\t`1~!@#$%^&*()_+-=[]{}|;:\',./<>?"'),
-    ('\u0123\u4567\u89ab\ucdef\uabcd\uef4a', b'"\\u0123\\u4567\\u89ab\\ucdef\\uabcd\\uef4a"'),
-    ('controls', b'"controls"'),
-    ('\x08\x0c\n\r\t', b'"\\b\\f\\n\\r\\t"'),
-    ('{"object with 1 member":["array with 1 element"]}', b'"{\\"object with 1 member\\":[\\"array with 1 element\\"]}"'),
-    (' s p a c e d ', b'" s p a c e d "'),
-    ('\U0001d120', b'"\\ud834\\udd20"'),
-    ('\u03b1\u03a9', b'"\\u03b1\\u03a9"'),
-    (b'\xce\xb1\xce\xa9', b'"\\u03b1\\u03a9"'),
-    ('\u03b1\u03a9', b'"\\u03b1\\u03a9"'),
-    (b'\xce\xb1\xce\xa9', b'"\\u03b1\\u03a9"'),
-    ('\u03b1\u03a9', b'"\\u03b1\\u03a9"'),
-    ('\u03b1\u03a9', b'"\\u03b1\\u03a9"'),
-    ("`1~!@#$%^&*()_+-={':[,]}|;.</>?", b'"`1~!@#$%^&*()_+-={\':[,]}|;.</>?"'),
-    ('\x08\x0c\n\r\t', b'"\\b\\f\\n\\r\\t"'),
-    ('\u0123\u4567\u89ab\ucdef\uabcd\uef4a', b'"\\u0123\\u4567\\u89ab\\ucdef\\uabcd\\uef4a"'),
+    ('/\\"\ucafe\ubabe\uab98\ufcde\ubcda\uef4a\x08\x0c\n\r\t`1~!@#$%^&*()_+-=[]{}|;:\',./<>?', '"/\\\\\\"\\ucafe\\ubabe\\uab98\\ufcde\\ubcda\\uef4a\\b\\f\\n\\r\\t`1~!@#$%^&*()_+-=[]{}|;:\',./<>?"'),
+    ('\u0123\u4567\u89ab\ucdef\uabcd\uef4a', '"\\u0123\\u4567\\u89ab\\ucdef\\uabcd\\uef4a"'),
+    ('controls', '"controls"'),
+    ('\x08\x0c\n\r\t', '"\\b\\f\\n\\r\\t"'),
+    ('{"object with 1 member":["array with 1 element"]}', '"{\\"object with 1 member\\":[\\"array with 1 element\\"]}"'),
+    (' s p a c e d ', '" s p a c e d "'),
+    ('\U0001d120', '"\\ud834\\udd20"'),
+    ('\u03b1\u03a9', '"\\u03b1\\u03a9"'),
+    ('\u03b1\u03a9', '"\\u03b1\\u03a9"'),
+    ('\u03b1\u03a9', '"\\u03b1\\u03a9"'),
+    ('\u03b1\u03a9', '"\\u03b1\\u03a9"'),
+    ("`1~!@#$%^&*()_+-={':[,]}|;.</>?", '"`1~!@#$%^&*()_+-={\':[,]}|;.</>?"'),
+    ('\x08\x0c\n\r\t', '"\\b\\f\\n\\r\\t"'),
+    ('\u0123\u4567\u89ab\ucdef\uabcd\uef4a', '"\\u0123\\u4567\\u89ab\\ucdef\\uabcd\\uef4a"'),
 ]
 
 class TestEncodeBaseStringAscii(TestCase):
@@ -26,12 +24,14 @@
         self._test_encode_basestring_ascii(json.encoder.py_encode_basestring_ascii)
 
     def test_c_encode_basestring_ascii(self):
-        if json.encoder.c_encode_basestring_ascii is not None:
-            self._test_encode_basestring_ascii(json.encoder.c_encode_basestring_ascii)
+        if not json.encoder.c_encode_basestring_ascii:
+            return
+        self._test_encode_basestring_ascii(json.encoder.c_encode_basestring_ascii)
 
     def _test_encode_basestring_ascii(self, encode_basestring_ascii):
         fname = encode_basestring_ascii.__name__
         for input_string, expect in CASES:
             result = encode_basestring_ascii(input_string)
-            result = result.encode("ascii")
-            self.assertEquals(result, expect)
+            self.assertEquals(result, expect,
+                '{0!r} != {1!r} for {2}({3!r})'.format(
+                    result, expect, fname, input_string))
diff --git a/Lib/json/tests/test_fail.py b/Lib/json/tests/test_fail.py
index 252ebc4..94e01e6 100644
--- a/Lib/json/tests/test_fail.py
+++ b/Lib/json/tests/test_fail.py
@@ -73,4 +73,4 @@
             except ValueError:
                 pass
             else:
-                self.fail("Expected failure for fail%d.json: %r" % (idx, doc))
+                self.fail("Expected failure for fail{0}.json: {1!r}".format(idx, doc))
diff --git a/Lib/json/tests/test_float.py b/Lib/json/tests/test_float.py
index 9df6d1e..2d4aea2 100644
--- a/Lib/json/tests/test_float.py
+++ b/Lib/json/tests/test_float.py
@@ -5,5 +5,11 @@
 
 class TestFloat(TestCase):
     def test_floats(self):
-        for num in [1617161771.7650001, math.pi, math.pi**100, math.pi**-100]:
+        for num in [1617161771.7650001, math.pi, math.pi**100, math.pi**-100, 3.1]:
             self.assertEquals(float(json.dumps(num)), num)
+            self.assertEquals(json.loads(json.dumps(num)), num)
+
+    def test_ints(self):
+        for num in [1, 1<<32, 1<<64]:
+            self.assertEquals(json.dumps(num), str(num))
+            self.assertEquals(int(json.dumps(num)), num)
diff --git a/Lib/json/tests/test_scanstring.py b/Lib/json/tests/test_scanstring.py
index 025d15d..2d55672 100644
--- a/Lib/json/tests/test_scanstring.py
+++ b/Lib/json/tests/test_scanstring.py
@@ -15,96 +15,90 @@
 
     def _test_scanstring(self, scanstring):
         self.assertEquals(
-            scanstring('"z\\ud834\\udd20x"', 1, None, True),
+            scanstring('"z\\ud834\\udd20x"', 1, True),
             ('z\U0001d120x', 16))
 
         if sys.maxunicode == 65535:
             self.assertEquals(
-                scanstring('"z\U0001d120x"', 1, None, True),
+                scanstring('"z\U0001d120x"', 1, True),
                 ('z\U0001d120x', 6))
         else:
             self.assertEquals(
-                scanstring('"z\U0001d120x"', 1, None, True),
+                scanstring('"z\U0001d120x"', 1, True),
                 ('z\U0001d120x', 5))
 
         self.assertEquals(
-            scanstring('"\\u007b"', 1, None, True),
+            scanstring('"\\u007b"', 1, True),
             ('{', 8))
 
         self.assertEquals(
-            scanstring('"A JSON payload should be an object or array, not a string."', 1, None, True),
+            scanstring('"A JSON payload should be an object or array, not a string."', 1, True),
             ('A JSON payload should be an object or array, not a string.', 60))
 
         self.assertEquals(
-            scanstring('["Unclosed array"', 2, None, True),
+            scanstring('["Unclosed array"', 2, True),
             ('Unclosed array', 17))
 
         self.assertEquals(
-            scanstring('["extra comma",]', 2, None, True),
+            scanstring('["extra comma",]', 2, True),
             ('extra comma', 14))
 
         self.assertEquals(
-            scanstring('["double extra comma",,]', 2, None, True),
+            scanstring('["double extra comma",,]', 2, True),
             ('double extra comma', 21))
 
         self.assertEquals(
-            scanstring('["Comma after the close"],', 2, None, True),
+            scanstring('["Comma after the close"],', 2, True),
             ('Comma after the close', 24))
 
         self.assertEquals(
-            scanstring('["Extra close"]]', 2, None, True),
+            scanstring('["Extra close"]]', 2, True),
             ('Extra close', 14))
 
         self.assertEquals(
-            scanstring('{"Extra comma": true,}', 2, None, True),
+            scanstring('{"Extra comma": true,}', 2, True),
             ('Extra comma', 14))
 
         self.assertEquals(
-            scanstring('{"Extra value after close": true} "misplaced quoted value"', 2, None, True),
+            scanstring('{"Extra value after close": true} "misplaced quoted value"', 2, True),
             ('Extra value after close', 26))
 
         self.assertEquals(
-            scanstring('{"Illegal expression": 1 + 2}', 2, None, True),
+            scanstring('{"Illegal expression": 1 + 2}', 2, True),
             ('Illegal expression', 21))
 
         self.assertEquals(
-            scanstring('{"Illegal invocation": alert()}', 2, None, True),
+            scanstring('{"Illegal invocation": alert()}', 2, True),
             ('Illegal invocation', 21))
 
         self.assertEquals(
-            scanstring('{"Numbers cannot have leading zeroes": 013}', 2, None, True),
+            scanstring('{"Numbers cannot have leading zeroes": 013}', 2, True),
             ('Numbers cannot have leading zeroes', 37))
 
         self.assertEquals(
-            scanstring('{"Numbers cannot be hex": 0x14}', 2, None, True),
+            scanstring('{"Numbers cannot be hex": 0x14}', 2, True),
             ('Numbers cannot be hex', 24))
 
         self.assertEquals(
-            scanstring('[[[[[[[[[[[[[[[[[[[["Too deep"]]]]]]]]]]]]]]]]]]]]', 21, None, True),
+            scanstring('[[[[[[[[[[[[[[[[[[[["Too deep"]]]]]]]]]]]]]]]]]]]]', 21, True),
             ('Too deep', 30))
 
         self.assertEquals(
-            scanstring('{"Missing colon" null}', 2, None, True),
+            scanstring('{"Missing colon" null}', 2, True),
             ('Missing colon', 16))
 
         self.assertEquals(
-            scanstring('{"Double colon":: null}', 2, None, True),
+            scanstring('{"Double colon":: null}', 2, True),
             ('Double colon', 15))
 
         self.assertEquals(
-            scanstring('{"Comma instead of colon", null}', 2, None, True),
+            scanstring('{"Comma instead of colon", null}', 2, True),
             ('Comma instead of colon', 25))
 
         self.assertEquals(
-            scanstring('["Colon instead of comma": false]', 2, None, True),
+            scanstring('["Colon instead of comma": false]', 2, True),
             ('Colon instead of comma', 25))
 
         self.assertEquals(
-            scanstring('["Bad value", truth]', 2, None, True),
+            scanstring('["Bad value", truth]', 2, True),
             ('Bad value', 12))
-
-    def test_issue3623(self):
-        self.assertRaises(ValueError, json.decoder.scanstring, b"xxx", 1,
-                          "xxx")
-        self.assertRaises(UnicodeDecodeError,
-                          json.encoder.encode_basestring_ascii, b"xx\xff")
diff --git a/Lib/json/tests/test_unicode.py b/Lib/json/tests/test_unicode.py
index 00bf58e..12de83c 100644
--- a/Lib/json/tests/test_unicode.py
+++ b/Lib/json/tests/test_unicode.py
@@ -4,20 +4,8 @@
 from collections import OrderedDict
 
 class TestUnicode(TestCase):
-    def test_encoding1(self):
-        encoder = json.JSONEncoder(encoding='utf-8')
-        u = '\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}'
-        s = u.encode('utf-8')
-        ju = encoder.encode(u)
-        js = encoder.encode(s)
-        self.assertEquals(ju, js)
-
-    def test_encoding2(self):
-        u = '\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}'
-        s = u.encode('utf-8')
-        ju = json.dumps(u, encoding='utf-8')
-        js = json.dumps(s, encoding='utf-8')
-        self.assertEquals(ju, js)
+    # test_encoding1 and test_encoding2 from 2.x are irrelevant (only str
+    # is supported as input, not bytes).
 
     def test_encoding3(self):
         u = '\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}'
@@ -52,8 +40,22 @@
     def test_unicode_decode(self):
         for i in range(0, 0xd7ff):
             u = chr(i)
-            js = '"\\u{0:04x}"'.format(i)
-            self.assertEquals(json.loads(js), u)
+            s = '"\\u{0:04x}"'.format(i)
+            self.assertEquals(json.loads(s), u)
+
+    def test_unicode_preservation(self):
+        self.assertEquals(type(json.loads('""')), str)
+        self.assertEquals(type(json.loads('"a"')), str)
+        self.assertEquals(type(json.loads('["a"]')[0]), str)
+
+    def test_bytes_encode(self):
+        self.assertRaises(TypeError, json.dumps, b"hi")
+        self.assertRaises(TypeError, json.dumps, [b"hi"])
+
+    def test_bytes_decode(self):
+        self.assertRaises(TypeError, json.loads, b'"hi"')
+        self.assertRaises(TypeError, json.loads, b'["hi"]')
+
 
     def test_object_pairs_hook_with_unicode(self):
         s = '{"xkd":1, "kcw":2, "art":3, "hxm":4, "qrt":5, "pad":6, "hoy":7}'