blob: 1b3c8e9f4693f4b3dd04afab0f799dd346c76914 [file] [log] [blame]
Jon Skeetfb248822015-09-04 12:41:14 +01001#region Copyright notice and license
2// Protocol Buffers - Google's data interchange format
3// Copyright 2008 Google Inc. All rights reserved.
4// https://developers.google.com/protocol-buffers/
5//
6// Redistribution and use in source and binary forms, with or without
7// modification, are permitted provided that the following conditions are
8// met:
9//
10// * Redistributions of source code must retain the above copyright
11// notice, this list of conditions and the following disclaimer.
12// * Redistributions in binary form must reproduce the above
13// copyright notice, this list of conditions and the following disclaimer
14// in the documentation and/or other materials provided with the
15// distribution.
16// * Neither the name of Google Inc. nor the names of its
17// contributors may be used to endorse or promote products derived from
18// this software without specific prior written permission.
19//
20// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31#endregion
32using NUnit.Framework;
33using System;
34using System.IO;
35
36namespace Google.Protobuf
37{
38 public class JsonTokenizerTest
39 {
40 [Test]
41 public void EmptyObjectValue()
42 {
43 AssertTokens("{}", JsonToken.StartObject, JsonToken.EndObject);
44 }
45
46 [Test]
47 public void EmptyArrayValue()
48 {
49 AssertTokens("[]", JsonToken.StartArray, JsonToken.EndArray);
50 }
51
52 [Test]
53 [TestCase("foo", "foo")]
54 [TestCase("tab\\t", "tab\t")]
55 [TestCase("line\\nfeed", "line\nfeed")]
56 [TestCase("carriage\\rreturn", "carriage\rreturn")]
57 [TestCase("back\\bspace", "back\bspace")]
58 [TestCase("form\\ffeed", "form\ffeed")]
59 [TestCase("escaped\\/slash", "escaped/slash")]
60 [TestCase("escaped\\\\backslash", "escaped\\backslash")]
61 [TestCase("escaped\\\"quote", "escaped\"quote")]
62 [TestCase("foo {}[] bar", "foo {}[] bar")]
63 [TestCase("foo\\u09aFbar", "foo\u09afbar")] // Digits, upper hex, lower hex
64 [TestCase("ab\ud800\udc00cd", "ab\ud800\udc00cd")]
65 [TestCase("ab\\ud800\\udc00cd", "ab\ud800\udc00cd")]
66 public void StringValue(string json, string expectedValue)
67 {
68 AssertTokensNoReplacement("\"" + json + "\"", JsonToken.Value(expectedValue));
69 }
70
71 // Valid surrogate pairs, with mixed escaping. These test cases can't be expressed
72 // using TestCase as they have no valid UTF-8 representation.
73 // It's unclear exactly how we should handle a mixture of escaped or not: that can't
74 // come from UTF-8 text, but could come from a .NET string. For the moment,
75 // treat it as valid in the obvious way.
76 [Test]
77 public void MixedSurrogatePairs()
78 {
79 string expected = "\ud800\udc00";
80 AssertTokens("'\\ud800\udc00'", JsonToken.Value(expected));
81 AssertTokens("'\ud800\\udc00'", JsonToken.Value(expected));
82 }
83
84 [Test]
85 [TestCase("embedded tab\t")]
86 [TestCase("embedded CR\r")]
87 [TestCase("embedded LF\n")]
88 [TestCase("embedded bell\u0007")]
89 [TestCase("bad escape\\a")]
90 [TestCase("incomplete escape\\")]
91 [TestCase("incomplete Unicode escape\\u000")]
92 [TestCase("invalid Unicode escape\\u000H")]
93 // Surrogate pair handling, both in raw .NET strings and escaped. We only need
94 // to detect this in strings, as non-ASCII characters anywhere other than in strings
95 // will already lead to parsing errors.
96 [TestCase("\\ud800")]
97 [TestCase("\\udc00")]
98 [TestCase("\\ud800x")]
99 [TestCase("\\udc00x")]
100 [TestCase("\\udc00\\ud800y")]
101 public void InvalidStringValue(string json)
102 {
103 AssertThrowsAfter("\"" + json + "\"");
104 }
105
106 // Tests for invalid strings that can't be expressed in attributes,
107 // as the constants can't be expressed as UTF-8 strings.
108 [Test]
109 public void InvalidSurrogatePairs()
110 {
111 AssertThrowsAfter("\"\ud800x\"");
112 AssertThrowsAfter("\"\udc00y\"");
113 AssertThrowsAfter("\"\udc00\ud800y\"");
114 }
115
116 [Test]
117 [TestCase("0", 0)]
118 [TestCase("-0", 0)] // We don't distinguish between positive and negative 0
119 [TestCase("1", 1)]
120 [TestCase("-1", -1)]
121 // From here on, assume leading sign is okay...
122 [TestCase("1.125", 1.125)]
123 [TestCase("1.0", 1)]
124 [TestCase("1e5", 100000)]
125 [TestCase("1e000000", 1)] // Weird, but not prohibited by the spec
126 [TestCase("1E5", 100000)]
127 [TestCase("1e+5", 100000)]
128 [TestCase("1E-5", 0.00001)]
129 [TestCase("123E-2", 1.23)]
130 [TestCase("123.45E3", 123450)]
131 [TestCase(" 1 ", 1)]
132 public void NumberValue(string json, double expectedValue)
133 {
134 AssertTokens(json, JsonToken.Value(expectedValue));
135 }
136
137 [Test]
138 [TestCase("00")]
139 [TestCase(".5")]
140 [TestCase("1.")]
141 [TestCase("1e")]
142 [TestCase("1e-")]
143 [TestCase("--")]
144 [TestCase("--1")]
145 [TestCase("-1.7977e308")]
146 [TestCase("1.7977e308")]
147 public void InvalidNumberValue(string json)
148 {
149 AssertThrowsAfter(json);
150 }
151
152 [Test]
153 [TestCase("nul")]
154 [TestCase("nothing")]
155 [TestCase("truth")]
156 [TestCase("fALSEhood")]
157 public void InvalidLiterals(string json)
158 {
159 AssertThrowsAfter(json);
160 }
161
162 [Test]
163 public void NullValue()
164 {
165 AssertTokens("null", JsonToken.Null);
166 }
167
168 [Test]
169 public void TrueValue()
170 {
171 AssertTokens("true", JsonToken.True);
172 }
173
174 [Test]
175 public void FalseValue()
176 {
177 AssertTokens("false", JsonToken.False);
178 }
179
180 [Test]
181 public void SimpleObject()
182 {
183 AssertTokens("{'x': 'y'}",
184 JsonToken.StartObject, JsonToken.Name("x"), JsonToken.Value("y"), JsonToken.EndObject);
185 }
186
187 [Test]
188 [TestCase("[10, 20", 3)]
189 [TestCase("[10,", 2)]
190 [TestCase("[10:20]", 2)]
191 [TestCase("[", 1)]
192 [TestCase("[,", 1)]
193 [TestCase("{", 1)]
194 [TestCase("{,", 1)]
195 [TestCase("{", 1)]
196 [TestCase("{[", 1)]
197 [TestCase("{{", 1)]
198 [TestCase("{0", 1)]
199 [TestCase("{null", 1)]
200 [TestCase("{false", 1)]
201 [TestCase("{true", 1)]
202 [TestCase("}", 0)]
203 [TestCase("]", 0)]
204 [TestCase(",", 0)]
205 [TestCase("'foo' 'bar'", 1)]
206 [TestCase(":", 0)]
207 [TestCase("'foo", 0)] // Incomplete string
208 [TestCase("{ 'foo' }", 2)]
209 [TestCase("{ x:1", 1)] // Property names must be quoted
210 [TestCase("{]", 1)]
211 [TestCase("[}", 1)]
212 [TestCase("[1,", 2)]
213 [TestCase("{'x':0]", 3)]
214 [TestCase("{ 'foo': }", 2)]
215 [TestCase("{ 'foo':'bar', }", 3)]
216 public void InvalidStructure(string json, int expectedValidTokens)
217 {
218 // Note: we don't test that the earlier tokens are exactly as expected,
219 // partly because that's hard to parameterize.
220 var reader = new StringReader(json.Replace('\'', '"'));
221 var tokenizer = new JsonTokenizer(reader);
222 for (int i = 0; i < expectedValidTokens; i++)
223 {
224 Assert.IsNotNull(tokenizer.Next());
225 }
Jon Skeet0fb39c42015-11-04 11:49:15 +0000226 Assert.Throws<InvalidJsonException>(() => tokenizer.Next());
Jon Skeetfb248822015-09-04 12:41:14 +0100227 }
228
229 [Test]
230 public void ArrayMixedType()
231 {
232 AssertTokens("[1, 'foo', null, false, true, [2], {'x':'y' }]",
233 JsonToken.StartArray,
234 JsonToken.Value(1),
235 JsonToken.Value("foo"),
236 JsonToken.Null,
237 JsonToken.False,
238 JsonToken.True,
239 JsonToken.StartArray,
240 JsonToken.Value(2),
241 JsonToken.EndArray,
242 JsonToken.StartObject,
243 JsonToken.Name("x"),
244 JsonToken.Value("y"),
245 JsonToken.EndObject,
246 JsonToken.EndArray);
247 }
248
249 [Test]
250 public void ObjectMixedType()
251 {
252 AssertTokens(@"{'a': 1, 'b': 'bar', 'c': null, 'd': false, 'e': true,
253 'f': [2], 'g': {'x':'y' }}",
254 JsonToken.StartObject,
255 JsonToken.Name("a"),
256 JsonToken.Value(1),
257 JsonToken.Name("b"),
258 JsonToken.Value("bar"),
259 JsonToken.Name("c"),
260 JsonToken.Null,
261 JsonToken.Name("d"),
262 JsonToken.False,
263 JsonToken.Name("e"),
264 JsonToken.True,
265 JsonToken.Name("f"),
266 JsonToken.StartArray,
267 JsonToken.Value(2),
268 JsonToken.EndArray,
269 JsonToken.Name("g"),
270 JsonToken.StartObject,
271 JsonToken.Name("x"),
272 JsonToken.Value("y"),
273 JsonToken.EndObject,
274 JsonToken.EndObject);
275 }
276
277 [Test]
278 public void NextAfterEndDocumentThrows()
279 {
280 var tokenizer = new JsonTokenizer(new StringReader("null"));
281 Assert.AreEqual(JsonToken.Null, tokenizer.Next());
282 Assert.AreEqual(JsonToken.EndDocument, tokenizer.Next());
283 Assert.Throws<InvalidOperationException>(() => tokenizer.Next());
284 }
285
286 [Test]
287 public void CanPushBackEndDocument()
288 {
289 var tokenizer = new JsonTokenizer(new StringReader("null"));
290 Assert.AreEqual(JsonToken.Null, tokenizer.Next());
291 Assert.AreEqual(JsonToken.EndDocument, tokenizer.Next());
292 tokenizer.PushBack(JsonToken.EndDocument);
293 Assert.AreEqual(JsonToken.EndDocument, tokenizer.Next());
294 Assert.Throws<InvalidOperationException>(() => tokenizer.Next());
295 }
296
297 /// <summary>
298 /// Asserts that the specified JSON is tokenized into the given sequence of tokens.
299 /// All apostrophes are first converted to double quotes, allowing any tests
300 /// that don't need to check actual apostrophe handling to use apostrophes in the JSON, avoiding
301 /// messy string literal escaping. The "end document" token is not specified in the list of
302 /// expected tokens, but is implicit.
303 /// </summary>
304 private static void AssertTokens(string json, params JsonToken[] expectedTokens)
305 {
306 AssertTokensNoReplacement(json.Replace('\'', '"'), expectedTokens);
307 }
308
309 /// <summary>
310 /// Asserts that the specified JSON is tokenized into the given sequence of tokens.
311 /// Unlike <see cref="AssertTokens(string, JsonToken[])"/>, this does not perform any character
312 /// replacement on the specified JSON, and should be used when the text contains apostrophes which
313 /// are expected to be used *as* apostrophes. The "end document" token is not specified in the list of
314 /// expected tokens, but is implicit.
315 /// </summary>
316 private static void AssertTokensNoReplacement(string json, params JsonToken[] expectedTokens)
317 {
318 var reader = new StringReader(json);
319 var tokenizer = new JsonTokenizer(reader);
320 for (int i = 0; i < expectedTokens.Length; i++)
321 {
322 var actualToken = tokenizer.Next();
323 if (actualToken == JsonToken.EndDocument)
324 {
325 Assert.Fail("Expected {0} but reached end of token stream", expectedTokens[i]);
326 }
327 Assert.AreEqual(expectedTokens[i], actualToken);
328 }
329 var finalToken = tokenizer.Next();
330 if (finalToken != JsonToken.EndDocument)
331 {
332 Assert.Fail("Expected token stream to be exhausted; received {0}", finalToken);
333 }
334 }
335
336 private static void AssertThrowsAfter(string json, params JsonToken[] expectedTokens)
337 {
338 var reader = new StringReader(json);
339 var tokenizer = new JsonTokenizer(reader);
340 for (int i = 0; i < expectedTokens.Length; i++)
341 {
342 var actualToken = tokenizer.Next();
343 if (actualToken == JsonToken.EndDocument)
344 {
345 Assert.Fail("Expected {0} but reached end of document", expectedTokens[i]);
346 }
347 Assert.AreEqual(expectedTokens[i], actualToken);
348 }
Jon Skeet0fb39c42015-11-04 11:49:15 +0000349 Assert.Throws<InvalidJsonException>(() => tokenizer.Next());
Jon Skeetfb248822015-09-04 12:41:14 +0100350 }
351 }
352}