blob: 81033087648d5147d4d1761fca968e9b86d368f7 [file] [log] [blame]
Jon Skeet0aac0e42009-09-09 18:48:02 +01001#region Copyright notice and license
Jon Skeet60c059b2008-10-23 21:17:56 +01002// Protocol Buffers - Google's data interchange format
3// Copyright 2008 Google Inc. All rights reserved.
4// http://github.com/jskeet/dotnet-protobufs/
5// Original C++/Java/Python code:
Jon Skeet68036862008-10-22 13:30:34 +01006// http://code.google.com/p/protobuf/
7//
Jon Skeet60c059b2008-10-23 21:17:56 +01008// Redistribution and use in source and binary forms, with or without
9// modification, are permitted provided that the following conditions are
10// met:
Jon Skeet68036862008-10-22 13:30:34 +010011//
Jon Skeet60c059b2008-10-23 21:17:56 +010012// * Redistributions of source code must retain the above copyright
13// notice, this list of conditions and the following disclaimer.
14// * Redistributions in binary form must reproduce the above
15// copyright notice, this list of conditions and the following disclaimer
16// in the documentation and/or other materials provided with the
17// distribution.
18// * Neither the name of Google Inc. nor the names of its
19// contributors may be used to endorse or promote products derived from
20// this software without specific prior written permission.
Jon Skeet68036862008-10-22 13:30:34 +010021//
Jon Skeet60c059b2008-10-23 21:17:56 +010022// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Jon Skeet0aac0e42009-09-09 18:48:02 +010033#endregion
34
Jon Skeet68036862008-10-22 13:30:34 +010035using System;
36using System.Collections.Generic;
37using System.Globalization;
38using System.IO;
39using System.Text;
40using Google.ProtocolBuffers.Descriptors;
41using System.Collections;
42
43namespace Google.ProtocolBuffers {
44 /// <summary>
45 /// Provides ASCII text formatting support for messages.
46 /// TODO(jonskeet): Parsing support.
47 /// </summary>
48 public static class TextFormat {
49
50 /// <summary>
51 /// Outputs a textual representation of the Protocol Message supplied into
52 /// the parameter output.
53 /// </summary>
54 public static void Print(IMessage message, TextWriter output) {
55 TextGenerator generator = new TextGenerator(output);
56 Print(message, generator);
57 }
58
59 /// <summary>
60 /// Outputs a textual representation of <paramref name="fields" /> to <paramref name="output"/>.
61 /// </summary>
62 public static void Print(UnknownFieldSet fields, TextWriter output) {
63 TextGenerator generator = new TextGenerator(output);
64 PrintUnknownFields(fields, generator);
65 }
66
67 public static string PrintToString(IMessage message) {
68 StringWriter text = new StringWriter();
69 Print(message, text);
70 return text.ToString();
71 }
72
73 public static string PrintToString(UnknownFieldSet fields) {
74 StringWriter text = new StringWriter();
75 Print(fields, text);
76 return text.ToString();
77 }
78
79 private static void Print(IMessage message, TextGenerator generator) {
80 foreach (KeyValuePair<FieldDescriptor, object> entry in message.AllFields) {
81 PrintField(entry.Key, entry.Value, generator);
82 }
83 PrintUnknownFields(message.UnknownFields, generator);
84 }
85
86 internal static void PrintField(FieldDescriptor field, object value, TextGenerator generator) {
87 if (field.IsRepeated) {
88 // Repeated field. Print each element.
89 foreach (object element in (IEnumerable) value) {
90 PrintSingleField(field, element, generator);
91 }
92 } else {
93 PrintSingleField(field, value, generator);
94 }
95 }
96
97 private static void PrintSingleField(FieldDescriptor field, Object value, TextGenerator generator) {
98 if (field.IsExtension) {
99 generator.Print("[");
100 // We special-case MessageSet elements for compatibility with proto1.
101 if (field.ContainingType.Options.MessageSetWireFormat
102 && field.FieldType == FieldType.Message
103 && field.IsOptional
104 // object equality (TODO(jonskeet): Work out what this comment means!)
105 && field.ExtensionScope == field.MessageType) {
106 generator.Print(field.MessageType.FullName);
107 } else {
108 generator.Print(field.FullName);
109 }
110 generator.Print("]");
111 } else {
112 if (field.FieldType == FieldType.Group) {
113 // Groups must be serialized with their original capitalization.
114 generator.Print(field.MessageType.Name);
115 } else {
116 generator.Print(field.Name);
117 }
118 }
119
120 if (field.MappedType == MappedType.Message) {
121 generator.Print(" {\n");
122 generator.Indent();
123 } else {
124 generator.Print(": ");
125 }
126
127 PrintFieldValue(field, value, generator);
128
129 if (field.MappedType == MappedType.Message) {
130 generator.Outdent();
131 generator.Print("}");
132 }
133 generator.Print("\n");
134 }
135
136 private static void PrintFieldValue(FieldDescriptor field, object value, TextGenerator generator) {
137 switch (field.FieldType) {
138 case FieldType.Int32:
139 case FieldType.Int64:
140 case FieldType.SInt32:
141 case FieldType.SInt64:
142 case FieldType.SFixed32:
143 case FieldType.SFixed64:
144 case FieldType.Float:
145 case FieldType.Double:
146 case FieldType.UInt32:
147 case FieldType.UInt64:
148 case FieldType.Fixed32:
149 case FieldType.Fixed64:
150 // The simple Object.ToString converts using the current culture.
151 // We want to always use the invariant culture so it's predictable.
152 generator.Print(((IConvertible) value).ToString(CultureInfo.InvariantCulture));
153 break;
154 case FieldType.Bool:
155 // Explicitly use the Java true/false
156 generator.Print((bool) value ? "true" : "false");
157 break;
158
159 case FieldType.String:
160 generator.Print("\"");
161 generator.Print(EscapeText((string) value));
162 generator.Print("\"");
163 break;
164
165 case FieldType.Bytes: {
166 generator.Print("\"");
167 generator.Print(EscapeBytes((ByteString) value));
168 generator.Print("\"");
169 break;
170 }
171
172 case FieldType.Enum: {
173 generator.Print(((EnumValueDescriptor) value).Name);
174 break;
175 }
176
177 case FieldType.Message:
178 case FieldType.Group:
csharptest7d396f92010-11-08 20:06:46 -0600179 if (value is IMessage) {
180 Print((IMessage)value, generator);
181 } else {
182#warning ToDo - What do we print for IMessageLite?
183 }
Jon Skeet68036862008-10-22 13:30:34 +0100184 break;
185 }
186 }
187
188 private static void PrintUnknownFields(UnknownFieldSet unknownFields, TextGenerator generator) {
189 foreach (KeyValuePair<int, UnknownField> entry in unknownFields.FieldDictionary) {
190 String prefix = entry.Key.ToString() + ": ";
191 UnknownField field = entry.Value;
192
193 foreach (ulong value in field.VarintList) {
194 generator.Print(prefix);
195 generator.Print(value.ToString());
196 generator.Print("\n");
197 }
198 foreach (uint value in field.Fixed32List) {
199 generator.Print(prefix);
200 generator.Print(string.Format("0x{0:x8}", value));
201 generator.Print("\n");
202 }
203 foreach (ulong value in field.Fixed64List) {
204 generator.Print(prefix);
205 generator.Print(string.Format("0x{0:x16}", value));
206 generator.Print("\n");
207 }
208 foreach (ByteString value in field.LengthDelimitedList) {
209 generator.Print(entry.Key.ToString());
210 generator.Print(": \"");
211 generator.Print(EscapeBytes(value));
212 generator.Print("\"\n");
213 }
214 foreach (UnknownFieldSet value in field.GroupList) {
215 generator.Print(entry.Key.ToString());
216 generator.Print(" {\n");
217 generator.Indent();
218 PrintUnknownFields(value, generator);
219 generator.Outdent();
220 generator.Print("}\n");
221 }
222 }
223 }
224
225 internal static ulong ParseUInt64(string text) {
226 return (ulong) ParseInteger(text, false, true);
227 }
228
229 internal static long ParseInt64(string text) {
230 return ParseInteger(text, true, true);
231 }
232
233 internal static uint ParseUInt32(string text) {
234 return (uint) ParseInteger(text, false, false);
235 }
236
237 internal static int ParseInt32(string text) {
238 return (int) ParseInteger(text, true, false);
239 }
240
Jon Skeetd33bff02009-05-11 19:42:18 +0100241 internal static float ParseFloat(string text) {
242 switch (text) {
243 case "-inf":
244 case "-infinity":
245 case "-inff":
246 case "-infinityf":
247 return float.NegativeInfinity;
248 case "inf":
249 case "infinity":
250 case "inff":
251 case "infinityf":
252 return float.PositiveInfinity;
253 case "nan":
254 case "nanf":
255 return float.NaN;
256 default:
257 return float.Parse(text, CultureInfo.InvariantCulture);
258 }
259 }
260
261 internal static double ParseDouble(string text) {
262 switch (text) {
263 case "-inf":
264 case "-infinity":
265 return double.NegativeInfinity;
266 case "inf":
267 case "infinity":
268 return double.PositiveInfinity;
269 case "nan":
270 return double.NaN;
271 default:
272 return double.Parse(text, CultureInfo.InvariantCulture);
273 }
274 }
275
Jon Skeet68036862008-10-22 13:30:34 +0100276 /// <summary>
277 /// Parses an integer in hex (leading 0x), decimal (no prefix) or octal (leading 0).
278 /// Only a negative sign is permitted, and it must come before the radix indicator.
279 /// </summary>
280 private static long ParseInteger(string text, bool isSigned, bool isLong) {
281 string original = text;
282 bool negative = false;
283 if (text.StartsWith("-")) {
284 if (!isSigned) {
285 throw new FormatException("Number must be positive: " + original);
286 }
287 negative = true;
288 text = text.Substring(1);
289 }
290
291 int radix = 10;
292 if (text.StartsWith("0x")) {
293 radix = 16;
294 text = text.Substring(2);
295 } else if (text.StartsWith("0")) {
296 radix = 8;
297 }
298
299 ulong result;
300 try {
301 // Workaround for https://connect.microsoft.com/VisualStudio/feedback/ViewFeedback.aspx?FeedbackID=278448
302 // We should be able to use Convert.ToUInt64 for all cases.
303 result = radix == 10 ? ulong.Parse(text) : Convert.ToUInt64(text, radix);
304 } catch (OverflowException) {
305 // Convert OverflowException to FormatException so there's a single exception type this method can throw.
306 string numberDescription = string.Format("{0}-bit {1}signed integer", isLong ? 64 : 32, isSigned ? "" : "un");
307 throw new FormatException("Number out of range for " + numberDescription + ": " + original);
308 }
309
310 if (negative) {
311 ulong max = isLong ? 0x8000000000000000UL : 0x80000000L;
312 if (result > max) {
313 string numberDescription = string.Format("{0}-bit signed integer", isLong ? 64 : 32);
314 throw new FormatException("Number out of range for " + numberDescription + ": " + original);
315 }
316 return -((long) result);
317 } else {
318 ulong max = isSigned
319 ? (isLong ? (ulong) long.MaxValue : int.MaxValue)
320 : (isLong ? ulong.MaxValue : uint.MaxValue);
321 if (result > max) {
322 string numberDescription = string.Format("{0}-bit {1}signed integer", isLong ? 64 : 32, isSigned ? "" : "un");
323 throw new FormatException("Number out of range for " + numberDescription + ": " + original);
324 }
325 return (long) result;
326 }
327 }
328
329 /// <summary>
330 /// Tests a character to see if it's an octal digit.
331 /// </summary>
332 private static bool IsOctal(char c) {
333 return '0' <= c && c <= '7';
334 }
335
336 /// <summary>
337 /// Tests a character to see if it's a hex digit.
338 /// </summary>
339 private static bool IsHex(char c) {
340 return ('0' <= c && c <= '9') ||
341 ('a' <= c && c <= 'f') ||
342 ('A' <= c && c <= 'F');
343 }
344
345 /// <summary>
346 /// Interprets a character as a digit (in any base up to 36) and returns the
347 /// numeric value.
348 /// </summary>
349 private static int ParseDigit(char c) {
350 if ('0' <= c && c <= '9') {
351 return c - '0';
352 } else if ('a' <= c && c <= 'z') {
353 return c - 'a' + 10;
354 } else {
355 return c - 'A' + 10;
356 }
357 }
358
359 /// <summary>
360 /// Unescapes a text string as escaped using <see cref="EscapeText(string)" />.
361 /// Two-digit hex escapes (starting with "\x" are also recognised.
362 /// </summary>
363 internal static string UnescapeText(string input) {
364 return UnescapeBytes(input).ToStringUtf8();
365 }
366
367 /// <summary>
368 /// Like <see cref="EscapeBytes" /> but escapes a text string.
369 /// The string is first encoded as UTF-8, then each byte escaped individually.
370 /// The returned value is guaranteed to be entirely ASCII.
371 /// </summary>
372 internal static string EscapeText(string input) {
373 return EscapeBytes(ByteString.CopyFromUtf8(input));
374 }
375
376 /// <summary>
377 /// Escapes bytes in the format used in protocol buffer text format, which
378 /// is the same as the format used for C string literals. All bytes
379 /// that are not printable 7-bit ASCII characters are escaped, as well as
380 /// backslash, single-quote, and double-quote characters. Characters for
381 /// which no defined short-hand escape sequence is defined will be escaped
382 /// using 3-digit octal sequences.
383 /// The returned value is guaranteed to be entirely ASCII.
384 /// </summary>
385 internal static String EscapeBytes(ByteString input) {
386 StringBuilder builder = new StringBuilder(input.Length);
387 foreach (byte b in input) {
388 switch (b) {
389 // C# does not use \a or \v
390 case 0x07: builder.Append("\\a" ); break;
391 case (byte)'\b': builder.Append("\\b" ); break;
392 case (byte)'\f': builder.Append("\\f" ); break;
393 case (byte)'\n': builder.Append("\\n" ); break;
394 case (byte)'\r': builder.Append("\\r" ); break;
395 case (byte)'\t': builder.Append("\\t" ); break;
396 case 0x0b: builder.Append("\\v" ); break;
397 case (byte)'\\': builder.Append("\\\\"); break;
398 case (byte)'\'': builder.Append("\\\'"); break;
399 case (byte)'"' : builder.Append("\\\""); break;
400 default:
401 if (b >= 0x20 && b < 128) {
402 builder.Append((char) b);
403 } else {
404 builder.Append('\\');
405 builder.Append((char) ('0' + ((b >> 6) & 3)));
406 builder.Append((char) ('0' + ((b >> 3) & 7)));
407 builder.Append((char) ('0' + (b & 7)));
408 }
409 break;
410 }
411 }
412 return builder.ToString();
413 }
414
415 /// <summary>
416 /// Performs string unescaping from C style (octal, hex, form feeds, tab etc) into a byte string.
417 /// </summary>
418 internal static ByteString UnescapeBytes(string input) {
419 byte[] result = new byte[input.Length];
420 int pos = 0;
421 for (int i = 0; i < input.Length; i++) {
422 char c = input[i];
423 if (c > 127 || c < 32) {
424 throw new FormatException("Escaped string must only contain ASCII");
425 }
426 if (c != '\\') {
427 result[pos++] = (byte) c;
428 continue;
429 }
430 if (i + 1 >= input.Length) {
431 throw new FormatException("Invalid escape sequence: '\\' at end of string.");
432 }
433
434 i++;
435 c = input[i];
436 if (c >= '0' && c <= '7') {
437 // Octal escape.
438 int code = ParseDigit(c);
439 if (i + 1 < input.Length && IsOctal(input[i+1])) {
440 i++;
441 code = code * 8 + ParseDigit(input[i]);
442 }
443 if (i + 1 < input.Length && IsOctal(input[i+1])) {
444 i++;
445 code = code * 8 + ParseDigit(input[i]);
446 }
447 result[pos++] = (byte) code;
448 } else {
449 switch (c) {
450 case 'a': result[pos++] = 0x07; break;
451 case 'b': result[pos++] = (byte) '\b'; break;
452 case 'f': result[pos++] = (byte) '\f'; break;
453 case 'n': result[pos++] = (byte) '\n'; break;
454 case 'r': result[pos++] = (byte) '\r'; break;
455 case 't': result[pos++] = (byte) '\t'; break;
456 case 'v': result[pos++] = 0x0b; break;
457 case '\\': result[pos++] = (byte) '\\'; break;
458 case '\'': result[pos++] = (byte) '\''; break;
459 case '"': result[pos++] = (byte) '\"'; break;
460
461 case 'x':
462 // hex escape
463 int code;
464 if (i + 1 < input.Length && IsHex(input[i+1])) {
465 i++;
466 code = ParseDigit(input[i]);
467 } else {
468 throw new FormatException("Invalid escape sequence: '\\x' with no digits");
469 }
470 if (i + 1 < input.Length && IsHex(input[i+1])) {
471 ++i;
472 code = code * 16 + ParseDigit(input[i]);
473 }
474 result[pos++] = (byte)code;
475 break;
476
477 default:
478 throw new FormatException("Invalid escape sequence: '\\" + c + "'");
479 }
480 }
481 }
482
483 return ByteString.CopyFrom(result, 0, pos);
484 }
485
486 public static void Merge(string text, IBuilder builder) {
487 Merge(text, ExtensionRegistry.Empty, builder);
488 }
489
490 public static void Merge(TextReader reader, IBuilder builder) {
491 Merge(reader, ExtensionRegistry.Empty, builder);
492 }
493
494 public static void Merge(TextReader reader, ExtensionRegistry registry, IBuilder builder) {
495 Merge(reader.ReadToEnd(), registry, builder);
496 }
497
498 public static void Merge(string text, ExtensionRegistry registry, IBuilder builder) {
499 TextTokenizer tokenizer = new TextTokenizer(text);
500
501 while (!tokenizer.AtEnd) {
502 MergeField(tokenizer, registry, builder);
503 }
504 }
505
506 /// <summary>
507 /// Parses a single field from the specified tokenizer and merges it into
508 /// the builder.
509 /// </summary>
510 private static void MergeField(TextTokenizer tokenizer, ExtensionRegistry extensionRegistry,
511 IBuilder builder) {
512
513 FieldDescriptor field;
514 MessageDescriptor type = builder.DescriptorForType;
515 ExtensionInfo extension = null;
516
517 if (tokenizer.TryConsume("[")) {
518 // An extension.
519 StringBuilder name = new StringBuilder(tokenizer.ConsumeIdentifier());
520 while (tokenizer.TryConsume(".")) {
521 name.Append(".");
522 name.Append(tokenizer.ConsumeIdentifier());
523 }
524
525 extension = extensionRegistry[name.ToString()];
526
527 if (extension == null) {
528 throw tokenizer.CreateFormatExceptionPreviousToken("Extension \"" + name + "\" not found in the ExtensionRegistry.");
529 } else if (extension.Descriptor.ContainingType != type) {
530 throw tokenizer.CreateFormatExceptionPreviousToken("Extension \"" + name + "\" does not extend message type \"" +
531 type.FullName + "\".");
532 }
533
534 tokenizer.Consume("]");
535
536 field = extension.Descriptor;
537 } else {
538 String name = tokenizer.ConsumeIdentifier();
539 field = type.FindDescriptor<FieldDescriptor>(name);
540
541 // Group names are expected to be capitalized as they appear in the
542 // .proto file, which actually matches their type names, not their field
543 // names.
544 if (field == null) {
545 // Explicitly specify the invariant culture so that this code does not break when
546 // executing in Turkey.
Jon Skeet60fb63e2009-06-20 20:46:28 +0100547 String lowerName = name.ToLower(CultureInfo.InvariantCulture);
Jon Skeet68036862008-10-22 13:30:34 +0100548 field = type.FindDescriptor<FieldDescriptor>(lowerName);
549 // If the case-insensitive match worked but the field is NOT a group,
550 // TODO(jonskeet): What? Java comment ends here!
551 if (field != null && field.FieldType != FieldType.Group) {
552 field = null;
553 }
554 }
555 // Again, special-case group names as described above.
556 if (field != null && field.FieldType == FieldType.Group && field.MessageType.Name != name) {
557 field = null;
558 }
559
560 if (field == null) {
561 throw tokenizer.CreateFormatExceptionPreviousToken(
562 "Message type \"" + type.FullName + "\" has no field named \"" + name + "\".");
563 }
564 }
565
566 object value = null;
567
568 if (field.MappedType == MappedType.Message) {
569 tokenizer.TryConsume(":"); // optional
570
571 String endToken;
572 if (tokenizer.TryConsume("<")) {
573 endToken = ">";
574 } else {
575 tokenizer.Consume("{");
576 endToken = "}";
577 }
578
579 IBuilder subBuilder;
580 if (extension == null) {
581 subBuilder = builder.CreateBuilderForField(field);
582 } else {
583 subBuilder = extension.DefaultInstance.WeakCreateBuilderForType();
584 }
585
586 while (!tokenizer.TryConsume(endToken)) {
587 if (tokenizer.AtEnd) {
588 throw tokenizer.CreateFormatException("Expected \"" + endToken + "\".");
589 }
590 MergeField(tokenizer, extensionRegistry, subBuilder);
591 }
592
593 value = subBuilder.WeakBuild();
594
595 } else {
596 tokenizer.Consume(":");
597
598 switch (field.FieldType) {
599 case FieldType.Int32:
600 case FieldType.SInt32:
601 case FieldType.SFixed32:
602 value = tokenizer.ConsumeInt32();
603 break;
604
605 case FieldType.Int64:
606 case FieldType.SInt64:
607 case FieldType.SFixed64:
608 value = tokenizer.ConsumeInt64();
609 break;
610
611 case FieldType.UInt32:
612 case FieldType.Fixed32:
613 value = tokenizer.ConsumeUInt32();
614 break;
615
616 case FieldType.UInt64:
617 case FieldType.Fixed64:
618 value = tokenizer.ConsumeUInt64();
619 break;
620
621 case FieldType.Float:
622 value = tokenizer.ConsumeFloat();
623 break;
624
625 case FieldType.Double:
626 value = tokenizer.ConsumeDouble();
627 break;
628
629 case FieldType.Bool:
630 value = tokenizer.ConsumeBoolean();
631 break;
632
633 case FieldType.String:
634 value = tokenizer.ConsumeString();
635 break;
636
637 case FieldType.Bytes:
638 value = tokenizer.ConsumeByteString();
639 break;
640
641 case FieldType.Enum: {
642 EnumDescriptor enumType = field.EnumType;
643
644 if (tokenizer.LookingAtInteger()) {
645 int number = tokenizer.ConsumeInt32();
646 value = enumType.FindValueByNumber(number);
647 if (value == null) {
648 throw tokenizer.CreateFormatExceptionPreviousToken(
649 "Enum type \"" + enumType.FullName +
650 "\" has no value with number " + number + ".");
651 }
652 } else {
653 String id = tokenizer.ConsumeIdentifier();
654 value = enumType.FindValueByName(id);
655 if (value == null) {
656 throw tokenizer.CreateFormatExceptionPreviousToken(
657 "Enum type \"" + enumType.FullName +
658 "\" has no value named \"" + id + "\".");
659 }
660 }
661
662 break;
663 }
664
665 case FieldType.Message:
666 case FieldType.Group:
667 throw new InvalidOperationException("Can't get here.");
668 }
669 }
670
671 if (field.IsRepeated) {
672 builder.WeakAddRepeatedField(field, value);
673 } else {
674 builder.SetField(field, value);
675 }
676 }
677 }
678}