blob: ba7b467edf607d7c550d2b8f9dc8901fdd3c9251 [file] [log] [blame]
Jon Skeet60c059b2008-10-23 21:17:56 +01001// Protocol Buffers - Google's data interchange format
2// Copyright 2008 Google Inc. All rights reserved.
3// http://github.com/jskeet/dotnet-protobufs/
4// Original C++/Java/Python code:
Jon Skeet68036862008-10-22 13:30:34 +01005// http://code.google.com/p/protobuf/
6//
Jon Skeet60c059b2008-10-23 21:17:56 +01007// Redistribution and use in source and binary forms, with or without
8// modification, are permitted provided that the following conditions are
9// met:
Jon Skeet68036862008-10-22 13:30:34 +010010//
Jon Skeet60c059b2008-10-23 21:17:56 +010011// * Redistributions of source code must retain the above copyright
12// notice, this list of conditions and the following disclaimer.
13// * Redistributions in binary form must reproduce the above
14// copyright notice, this list of conditions and the following disclaimer
15// in the documentation and/or other materials provided with the
16// distribution.
17// * Neither the name of Google Inc. nor the names of its
18// contributors may be used to endorse or promote products derived from
19// this software without specific prior written permission.
Jon Skeet68036862008-10-22 13:30:34 +010020//
Jon Skeet60c059b2008-10-23 21:17:56 +010021// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Jon Skeet68036862008-10-22 13:30:34 +010032using System;
33using System.Collections.Generic;
34using System.Globalization;
35using System.IO;
36using System.Text;
37using Google.ProtocolBuffers.Descriptors;
38using System.Collections;
39
40namespace Google.ProtocolBuffers {
41 /// <summary>
42 /// Provides ASCII text formatting support for messages.
43 /// TODO(jonskeet): Parsing support.
44 /// </summary>
45 public static class TextFormat {
46
47 /// <summary>
48 /// Outputs a textual representation of the Protocol Message supplied into
49 /// the parameter output.
50 /// </summary>
51 public static void Print(IMessage message, TextWriter output) {
52 TextGenerator generator = new TextGenerator(output);
53 Print(message, generator);
54 }
55
56 /// <summary>
57 /// Outputs a textual representation of <paramref name="fields" /> to <paramref name="output"/>.
58 /// </summary>
59 public static void Print(UnknownFieldSet fields, TextWriter output) {
60 TextGenerator generator = new TextGenerator(output);
61 PrintUnknownFields(fields, generator);
62 }
63
64 public static string PrintToString(IMessage message) {
65 StringWriter text = new StringWriter();
66 Print(message, text);
67 return text.ToString();
68 }
69
70 public static string PrintToString(UnknownFieldSet fields) {
71 StringWriter text = new StringWriter();
72 Print(fields, text);
73 return text.ToString();
74 }
75
76 private static void Print(IMessage message, TextGenerator generator) {
77 foreach (KeyValuePair<FieldDescriptor, object> entry in message.AllFields) {
78 PrintField(entry.Key, entry.Value, generator);
79 }
80 PrintUnknownFields(message.UnknownFields, generator);
81 }
82
83 internal static void PrintField(FieldDescriptor field, object value, TextGenerator generator) {
84 if (field.IsRepeated) {
85 // Repeated field. Print each element.
86 foreach (object element in (IEnumerable) value) {
87 PrintSingleField(field, element, generator);
88 }
89 } else {
90 PrintSingleField(field, value, generator);
91 }
92 }
93
94 private static void PrintSingleField(FieldDescriptor field, Object value, TextGenerator generator) {
95 if (field.IsExtension) {
96 generator.Print("[");
97 // We special-case MessageSet elements for compatibility with proto1.
98 if (field.ContainingType.Options.MessageSetWireFormat
99 && field.FieldType == FieldType.Message
100 && field.IsOptional
101 // object equality (TODO(jonskeet): Work out what this comment means!)
102 && field.ExtensionScope == field.MessageType) {
103 generator.Print(field.MessageType.FullName);
104 } else {
105 generator.Print(field.FullName);
106 }
107 generator.Print("]");
108 } else {
109 if (field.FieldType == FieldType.Group) {
110 // Groups must be serialized with their original capitalization.
111 generator.Print(field.MessageType.Name);
112 } else {
113 generator.Print(field.Name);
114 }
115 }
116
117 if (field.MappedType == MappedType.Message) {
118 generator.Print(" {\n");
119 generator.Indent();
120 } else {
121 generator.Print(": ");
122 }
123
124 PrintFieldValue(field, value, generator);
125
126 if (field.MappedType == MappedType.Message) {
127 generator.Outdent();
128 generator.Print("}");
129 }
130 generator.Print("\n");
131 }
132
133 private static void PrintFieldValue(FieldDescriptor field, object value, TextGenerator generator) {
134 switch (field.FieldType) {
135 case FieldType.Int32:
136 case FieldType.Int64:
137 case FieldType.SInt32:
138 case FieldType.SInt64:
139 case FieldType.SFixed32:
140 case FieldType.SFixed64:
141 case FieldType.Float:
142 case FieldType.Double:
143 case FieldType.UInt32:
144 case FieldType.UInt64:
145 case FieldType.Fixed32:
146 case FieldType.Fixed64:
147 // The simple Object.ToString converts using the current culture.
148 // We want to always use the invariant culture so it's predictable.
149 generator.Print(((IConvertible) value).ToString(CultureInfo.InvariantCulture));
150 break;
151 case FieldType.Bool:
152 // Explicitly use the Java true/false
153 generator.Print((bool) value ? "true" : "false");
154 break;
155
156 case FieldType.String:
157 generator.Print("\"");
158 generator.Print(EscapeText((string) value));
159 generator.Print("\"");
160 break;
161
162 case FieldType.Bytes: {
163 generator.Print("\"");
164 generator.Print(EscapeBytes((ByteString) value));
165 generator.Print("\"");
166 break;
167 }
168
169 case FieldType.Enum: {
170 generator.Print(((EnumValueDescriptor) value).Name);
171 break;
172 }
173
174 case FieldType.Message:
175 case FieldType.Group:
176 Print((IMessage) value, generator);
177 break;
178 }
179 }
180
181 private static void PrintUnknownFields(UnknownFieldSet unknownFields, TextGenerator generator) {
182 foreach (KeyValuePair<int, UnknownField> entry in unknownFields.FieldDictionary) {
183 String prefix = entry.Key.ToString() + ": ";
184 UnknownField field = entry.Value;
185
186 foreach (ulong value in field.VarintList) {
187 generator.Print(prefix);
188 generator.Print(value.ToString());
189 generator.Print("\n");
190 }
191 foreach (uint value in field.Fixed32List) {
192 generator.Print(prefix);
193 generator.Print(string.Format("0x{0:x8}", value));
194 generator.Print("\n");
195 }
196 foreach (ulong value in field.Fixed64List) {
197 generator.Print(prefix);
198 generator.Print(string.Format("0x{0:x16}", value));
199 generator.Print("\n");
200 }
201 foreach (ByteString value in field.LengthDelimitedList) {
202 generator.Print(entry.Key.ToString());
203 generator.Print(": \"");
204 generator.Print(EscapeBytes(value));
205 generator.Print("\"\n");
206 }
207 foreach (UnknownFieldSet value in field.GroupList) {
208 generator.Print(entry.Key.ToString());
209 generator.Print(" {\n");
210 generator.Indent();
211 PrintUnknownFields(value, generator);
212 generator.Outdent();
213 generator.Print("}\n");
214 }
215 }
216 }
217
218 internal static ulong ParseUInt64(string text) {
219 return (ulong) ParseInteger(text, false, true);
220 }
221
222 internal static long ParseInt64(string text) {
223 return ParseInteger(text, true, true);
224 }
225
226 internal static uint ParseUInt32(string text) {
227 return (uint) ParseInteger(text, false, false);
228 }
229
230 internal static int ParseInt32(string text) {
231 return (int) ParseInteger(text, true, false);
232 }
233
Jon Skeetd33bff02009-05-11 19:42:18 +0100234 internal static float ParseFloat(string text) {
235 switch (text) {
236 case "-inf":
237 case "-infinity":
238 case "-inff":
239 case "-infinityf":
240 return float.NegativeInfinity;
241 case "inf":
242 case "infinity":
243 case "inff":
244 case "infinityf":
245 return float.PositiveInfinity;
246 case "nan":
247 case "nanf":
248 return float.NaN;
249 default:
250 return float.Parse(text, CultureInfo.InvariantCulture);
251 }
252 }
253
254 internal static double ParseDouble(string text) {
255 switch (text) {
256 case "-inf":
257 case "-infinity":
258 return double.NegativeInfinity;
259 case "inf":
260 case "infinity":
261 return double.PositiveInfinity;
262 case "nan":
263 return double.NaN;
264 default:
265 return double.Parse(text, CultureInfo.InvariantCulture);
266 }
267 }
268
Jon Skeet68036862008-10-22 13:30:34 +0100269 /// <summary>
270 /// Parses an integer in hex (leading 0x), decimal (no prefix) or octal (leading 0).
271 /// Only a negative sign is permitted, and it must come before the radix indicator.
272 /// </summary>
273 private static long ParseInteger(string text, bool isSigned, bool isLong) {
274 string original = text;
275 bool negative = false;
276 if (text.StartsWith("-")) {
277 if (!isSigned) {
278 throw new FormatException("Number must be positive: " + original);
279 }
280 negative = true;
281 text = text.Substring(1);
282 }
283
284 int radix = 10;
285 if (text.StartsWith("0x")) {
286 radix = 16;
287 text = text.Substring(2);
288 } else if (text.StartsWith("0")) {
289 radix = 8;
290 }
291
292 ulong result;
293 try {
294 // Workaround for https://connect.microsoft.com/VisualStudio/feedback/ViewFeedback.aspx?FeedbackID=278448
295 // We should be able to use Convert.ToUInt64 for all cases.
296 result = radix == 10 ? ulong.Parse(text) : Convert.ToUInt64(text, radix);
297 } catch (OverflowException) {
298 // Convert OverflowException to FormatException so there's a single exception type this method can throw.
299 string numberDescription = string.Format("{0}-bit {1}signed integer", isLong ? 64 : 32, isSigned ? "" : "un");
300 throw new FormatException("Number out of range for " + numberDescription + ": " + original);
301 }
302
303 if (negative) {
304 ulong max = isLong ? 0x8000000000000000UL : 0x80000000L;
305 if (result > max) {
306 string numberDescription = string.Format("{0}-bit signed integer", isLong ? 64 : 32);
307 throw new FormatException("Number out of range for " + numberDescription + ": " + original);
308 }
309 return -((long) result);
310 } else {
311 ulong max = isSigned
312 ? (isLong ? (ulong) long.MaxValue : int.MaxValue)
313 : (isLong ? ulong.MaxValue : uint.MaxValue);
314 if (result > max) {
315 string numberDescription = string.Format("{0}-bit {1}signed integer", isLong ? 64 : 32, isSigned ? "" : "un");
316 throw new FormatException("Number out of range for " + numberDescription + ": " + original);
317 }
318 return (long) result;
319 }
320 }
321
322 /// <summary>
323 /// Tests a character to see if it's an octal digit.
324 /// </summary>
325 private static bool IsOctal(char c) {
326 return '0' <= c && c <= '7';
327 }
328
329 /// <summary>
330 /// Tests a character to see if it's a hex digit.
331 /// </summary>
332 private static bool IsHex(char c) {
333 return ('0' <= c && c <= '9') ||
334 ('a' <= c && c <= 'f') ||
335 ('A' <= c && c <= 'F');
336 }
337
338 /// <summary>
339 /// Interprets a character as a digit (in any base up to 36) and returns the
340 /// numeric value.
341 /// </summary>
342 private static int ParseDigit(char c) {
343 if ('0' <= c && c <= '9') {
344 return c - '0';
345 } else if ('a' <= c && c <= 'z') {
346 return c - 'a' + 10;
347 } else {
348 return c - 'A' + 10;
349 }
350 }
351
352 /// <summary>
353 /// Unescapes a text string as escaped using <see cref="EscapeText(string)" />.
354 /// Two-digit hex escapes (starting with "\x" are also recognised.
355 /// </summary>
356 internal static string UnescapeText(string input) {
357 return UnescapeBytes(input).ToStringUtf8();
358 }
359
360 /// <summary>
361 /// Like <see cref="EscapeBytes" /> but escapes a text string.
362 /// The string is first encoded as UTF-8, then each byte escaped individually.
363 /// The returned value is guaranteed to be entirely ASCII.
364 /// </summary>
365 internal static string EscapeText(string input) {
366 return EscapeBytes(ByteString.CopyFromUtf8(input));
367 }
368
369 /// <summary>
370 /// Escapes bytes in the format used in protocol buffer text format, which
371 /// is the same as the format used for C string literals. All bytes
372 /// that are not printable 7-bit ASCII characters are escaped, as well as
373 /// backslash, single-quote, and double-quote characters. Characters for
374 /// which no defined short-hand escape sequence is defined will be escaped
375 /// using 3-digit octal sequences.
376 /// The returned value is guaranteed to be entirely ASCII.
377 /// </summary>
378 internal static String EscapeBytes(ByteString input) {
379 StringBuilder builder = new StringBuilder(input.Length);
380 foreach (byte b in input) {
381 switch (b) {
382 // C# does not use \a or \v
383 case 0x07: builder.Append("\\a" ); break;
384 case (byte)'\b': builder.Append("\\b" ); break;
385 case (byte)'\f': builder.Append("\\f" ); break;
386 case (byte)'\n': builder.Append("\\n" ); break;
387 case (byte)'\r': builder.Append("\\r" ); break;
388 case (byte)'\t': builder.Append("\\t" ); break;
389 case 0x0b: builder.Append("\\v" ); break;
390 case (byte)'\\': builder.Append("\\\\"); break;
391 case (byte)'\'': builder.Append("\\\'"); break;
392 case (byte)'"' : builder.Append("\\\""); break;
393 default:
394 if (b >= 0x20 && b < 128) {
395 builder.Append((char) b);
396 } else {
397 builder.Append('\\');
398 builder.Append((char) ('0' + ((b >> 6) & 3)));
399 builder.Append((char) ('0' + ((b >> 3) & 7)));
400 builder.Append((char) ('0' + (b & 7)));
401 }
402 break;
403 }
404 }
405 return builder.ToString();
406 }
407
408 /// <summary>
409 /// Performs string unescaping from C style (octal, hex, form feeds, tab etc) into a byte string.
410 /// </summary>
411 internal static ByteString UnescapeBytes(string input) {
412 byte[] result = new byte[input.Length];
413 int pos = 0;
414 for (int i = 0; i < input.Length; i++) {
415 char c = input[i];
416 if (c > 127 || c < 32) {
417 throw new FormatException("Escaped string must only contain ASCII");
418 }
419 if (c != '\\') {
420 result[pos++] = (byte) c;
421 continue;
422 }
423 if (i + 1 >= input.Length) {
424 throw new FormatException("Invalid escape sequence: '\\' at end of string.");
425 }
426
427 i++;
428 c = input[i];
429 if (c >= '0' && c <= '7') {
430 // Octal escape.
431 int code = ParseDigit(c);
432 if (i + 1 < input.Length && IsOctal(input[i+1])) {
433 i++;
434 code = code * 8 + ParseDigit(input[i]);
435 }
436 if (i + 1 < input.Length && IsOctal(input[i+1])) {
437 i++;
438 code = code * 8 + ParseDigit(input[i]);
439 }
440 result[pos++] = (byte) code;
441 } else {
442 switch (c) {
443 case 'a': result[pos++] = 0x07; break;
444 case 'b': result[pos++] = (byte) '\b'; break;
445 case 'f': result[pos++] = (byte) '\f'; break;
446 case 'n': result[pos++] = (byte) '\n'; break;
447 case 'r': result[pos++] = (byte) '\r'; break;
448 case 't': result[pos++] = (byte) '\t'; break;
449 case 'v': result[pos++] = 0x0b; break;
450 case '\\': result[pos++] = (byte) '\\'; break;
451 case '\'': result[pos++] = (byte) '\''; break;
452 case '"': result[pos++] = (byte) '\"'; break;
453
454 case 'x':
455 // hex escape
456 int code;
457 if (i + 1 < input.Length && IsHex(input[i+1])) {
458 i++;
459 code = ParseDigit(input[i]);
460 } else {
461 throw new FormatException("Invalid escape sequence: '\\x' with no digits");
462 }
463 if (i + 1 < input.Length && IsHex(input[i+1])) {
464 ++i;
465 code = code * 16 + ParseDigit(input[i]);
466 }
467 result[pos++] = (byte)code;
468 break;
469
470 default:
471 throw new FormatException("Invalid escape sequence: '\\" + c + "'");
472 }
473 }
474 }
475
476 return ByteString.CopyFrom(result, 0, pos);
477 }
478
479 public static void Merge(string text, IBuilder builder) {
480 Merge(text, ExtensionRegistry.Empty, builder);
481 }
482
483 public static void Merge(TextReader reader, IBuilder builder) {
484 Merge(reader, ExtensionRegistry.Empty, builder);
485 }
486
487 public static void Merge(TextReader reader, ExtensionRegistry registry, IBuilder builder) {
488 Merge(reader.ReadToEnd(), registry, builder);
489 }
490
491 public static void Merge(string text, ExtensionRegistry registry, IBuilder builder) {
492 TextTokenizer tokenizer = new TextTokenizer(text);
493
494 while (!tokenizer.AtEnd) {
495 MergeField(tokenizer, registry, builder);
496 }
497 }
498
499 /// <summary>
500 /// Parses a single field from the specified tokenizer and merges it into
501 /// the builder.
502 /// </summary>
503 private static void MergeField(TextTokenizer tokenizer, ExtensionRegistry extensionRegistry,
504 IBuilder builder) {
505
506 FieldDescriptor field;
507 MessageDescriptor type = builder.DescriptorForType;
508 ExtensionInfo extension = null;
509
510 if (tokenizer.TryConsume("[")) {
511 // An extension.
512 StringBuilder name = new StringBuilder(tokenizer.ConsumeIdentifier());
513 while (tokenizer.TryConsume(".")) {
514 name.Append(".");
515 name.Append(tokenizer.ConsumeIdentifier());
516 }
517
518 extension = extensionRegistry[name.ToString()];
519
520 if (extension == null) {
521 throw tokenizer.CreateFormatExceptionPreviousToken("Extension \"" + name + "\" not found in the ExtensionRegistry.");
522 } else if (extension.Descriptor.ContainingType != type) {
523 throw tokenizer.CreateFormatExceptionPreviousToken("Extension \"" + name + "\" does not extend message type \"" +
524 type.FullName + "\".");
525 }
526
527 tokenizer.Consume("]");
528
529 field = extension.Descriptor;
530 } else {
531 String name = tokenizer.ConsumeIdentifier();
532 field = type.FindDescriptor<FieldDescriptor>(name);
533
534 // Group names are expected to be capitalized as they appear in the
535 // .proto file, which actually matches their type names, not their field
536 // names.
537 if (field == null) {
538 // Explicitly specify the invariant culture so that this code does not break when
539 // executing in Turkey.
Jon Skeet60fb63e2009-06-20 20:46:28 +0100540 String lowerName = name.ToLower(CultureInfo.InvariantCulture);
Jon Skeet68036862008-10-22 13:30:34 +0100541 field = type.FindDescriptor<FieldDescriptor>(lowerName);
542 // If the case-insensitive match worked but the field is NOT a group,
543 // TODO(jonskeet): What? Java comment ends here!
544 if (field != null && field.FieldType != FieldType.Group) {
545 field = null;
546 }
547 }
548 // Again, special-case group names as described above.
549 if (field != null && field.FieldType == FieldType.Group && field.MessageType.Name != name) {
550 field = null;
551 }
552
553 if (field == null) {
554 throw tokenizer.CreateFormatExceptionPreviousToken(
555 "Message type \"" + type.FullName + "\" has no field named \"" + name + "\".");
556 }
557 }
558
559 object value = null;
560
561 if (field.MappedType == MappedType.Message) {
562 tokenizer.TryConsume(":"); // optional
563
564 String endToken;
565 if (tokenizer.TryConsume("<")) {
566 endToken = ">";
567 } else {
568 tokenizer.Consume("{");
569 endToken = "}";
570 }
571
572 IBuilder subBuilder;
573 if (extension == null) {
574 subBuilder = builder.CreateBuilderForField(field);
575 } else {
576 subBuilder = extension.DefaultInstance.WeakCreateBuilderForType();
577 }
578
579 while (!tokenizer.TryConsume(endToken)) {
580 if (tokenizer.AtEnd) {
581 throw tokenizer.CreateFormatException("Expected \"" + endToken + "\".");
582 }
583 MergeField(tokenizer, extensionRegistry, subBuilder);
584 }
585
586 value = subBuilder.WeakBuild();
587
588 } else {
589 tokenizer.Consume(":");
590
591 switch (field.FieldType) {
592 case FieldType.Int32:
593 case FieldType.SInt32:
594 case FieldType.SFixed32:
595 value = tokenizer.ConsumeInt32();
596 break;
597
598 case FieldType.Int64:
599 case FieldType.SInt64:
600 case FieldType.SFixed64:
601 value = tokenizer.ConsumeInt64();
602 break;
603
604 case FieldType.UInt32:
605 case FieldType.Fixed32:
606 value = tokenizer.ConsumeUInt32();
607 break;
608
609 case FieldType.UInt64:
610 case FieldType.Fixed64:
611 value = tokenizer.ConsumeUInt64();
612 break;
613
614 case FieldType.Float:
615 value = tokenizer.ConsumeFloat();
616 break;
617
618 case FieldType.Double:
619 value = tokenizer.ConsumeDouble();
620 break;
621
622 case FieldType.Bool:
623 value = tokenizer.ConsumeBoolean();
624 break;
625
626 case FieldType.String:
627 value = tokenizer.ConsumeString();
628 break;
629
630 case FieldType.Bytes:
631 value = tokenizer.ConsumeByteString();
632 break;
633
634 case FieldType.Enum: {
635 EnumDescriptor enumType = field.EnumType;
636
637 if (tokenizer.LookingAtInteger()) {
638 int number = tokenizer.ConsumeInt32();
639 value = enumType.FindValueByNumber(number);
640 if (value == null) {
641 throw tokenizer.CreateFormatExceptionPreviousToken(
642 "Enum type \"" + enumType.FullName +
643 "\" has no value with number " + number + ".");
644 }
645 } else {
646 String id = tokenizer.ConsumeIdentifier();
647 value = enumType.FindValueByName(id);
648 if (value == null) {
649 throw tokenizer.CreateFormatExceptionPreviousToken(
650 "Enum type \"" + enumType.FullName +
651 "\" has no value named \"" + id + "\".");
652 }
653 }
654
655 break;
656 }
657
658 case FieldType.Message:
659 case FieldType.Group:
660 throw new InvalidOperationException("Can't get here.");
661 }
662 }
663
664 if (field.IsRepeated) {
665 builder.WeakAddRepeatedField(field, value);
666 } else {
667 builder.SetField(field, value);
668 }
669 }
670 }
671}