blob: 5f07e693ac91da15ae5a5fc13070375968cc9457 [file] [log] [blame]
Jon Skeet0aac0e42009-09-09 18:48:02 +01001#region Copyright notice and license
Jon Skeet60c059b2008-10-23 21:17:56 +01002// Protocol Buffers - Google's data interchange format
3// Copyright 2008 Google Inc. All rights reserved.
4// http://github.com/jskeet/dotnet-protobufs/
5// Original C++/Java/Python code:
Jon Skeet68036862008-10-22 13:30:34 +01006// http://code.google.com/p/protobuf/
7//
Jon Skeet60c059b2008-10-23 21:17:56 +01008// Redistribution and use in source and binary forms, with or without
9// modification, are permitted provided that the following conditions are
10// met:
Jon Skeet68036862008-10-22 13:30:34 +010011//
Jon Skeet60c059b2008-10-23 21:17:56 +010012// * Redistributions of source code must retain the above copyright
13// notice, this list of conditions and the following disclaimer.
14// * Redistributions in binary form must reproduce the above
15// copyright notice, this list of conditions and the following disclaimer
16// in the documentation and/or other materials provided with the
17// distribution.
18// * Neither the name of Google Inc. nor the names of its
19// contributors may be used to endorse or promote products derived from
20// this software without specific prior written permission.
Jon Skeet68036862008-10-22 13:30:34 +010021//
Jon Skeet60c059b2008-10-23 21:17:56 +010022// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Jon Skeet0aac0e42009-09-09 18:48:02 +010033#endregion
34
Jon Skeet68036862008-10-22 13:30:34 +010035using System;
36using System.Collections.Generic;
37using System.Globalization;
38using System.IO;
39using System.Text;
40using Google.ProtocolBuffers.Descriptors;
41using System.Collections;
42
43namespace Google.ProtocolBuffers {
44 /// <summary>
45 /// Provides ASCII text formatting support for messages.
46 /// TODO(jonskeet): Parsing support.
47 /// </summary>
48 public static class TextFormat {
49
50 /// <summary>
51 /// Outputs a textual representation of the Protocol Message supplied into
52 /// the parameter output.
53 /// </summary>
54 public static void Print(IMessage message, TextWriter output) {
55 TextGenerator generator = new TextGenerator(output);
56 Print(message, generator);
57 }
58
59 /// <summary>
60 /// Outputs a textual representation of <paramref name="fields" /> to <paramref name="output"/>.
61 /// </summary>
62 public static void Print(UnknownFieldSet fields, TextWriter output) {
63 TextGenerator generator = new TextGenerator(output);
64 PrintUnknownFields(fields, generator);
65 }
66
67 public static string PrintToString(IMessage message) {
68 StringWriter text = new StringWriter();
69 Print(message, text);
70 return text.ToString();
71 }
72
73 public static string PrintToString(UnknownFieldSet fields) {
74 StringWriter text = new StringWriter();
75 Print(fields, text);
76 return text.ToString();
77 }
78
79 private static void Print(IMessage message, TextGenerator generator) {
80 foreach (KeyValuePair<FieldDescriptor, object> entry in message.AllFields) {
81 PrintField(entry.Key, entry.Value, generator);
82 }
83 PrintUnknownFields(message.UnknownFields, generator);
84 }
85
86 internal static void PrintField(FieldDescriptor field, object value, TextGenerator generator) {
87 if (field.IsRepeated) {
88 // Repeated field. Print each element.
89 foreach (object element in (IEnumerable) value) {
90 PrintSingleField(field, element, generator);
91 }
92 } else {
93 PrintSingleField(field, value, generator);
94 }
95 }
96
97 private static void PrintSingleField(FieldDescriptor field, Object value, TextGenerator generator) {
98 if (field.IsExtension) {
99 generator.Print("[");
100 // We special-case MessageSet elements for compatibility with proto1.
101 if (field.ContainingType.Options.MessageSetWireFormat
102 && field.FieldType == FieldType.Message
103 && field.IsOptional
104 // object equality (TODO(jonskeet): Work out what this comment means!)
105 && field.ExtensionScope == field.MessageType) {
106 generator.Print(field.MessageType.FullName);
107 } else {
108 generator.Print(field.FullName);
109 }
110 generator.Print("]");
111 } else {
112 if (field.FieldType == FieldType.Group) {
113 // Groups must be serialized with their original capitalization.
114 generator.Print(field.MessageType.Name);
115 } else {
116 generator.Print(field.Name);
117 }
118 }
119
120 if (field.MappedType == MappedType.Message) {
121 generator.Print(" {\n");
122 generator.Indent();
123 } else {
124 generator.Print(": ");
125 }
126
127 PrintFieldValue(field, value, generator);
128
129 if (field.MappedType == MappedType.Message) {
130 generator.Outdent();
131 generator.Print("}");
132 }
133 generator.Print("\n");
134 }
135
136 private static void PrintFieldValue(FieldDescriptor field, object value, TextGenerator generator) {
137 switch (field.FieldType) {
138 case FieldType.Int32:
139 case FieldType.Int64:
140 case FieldType.SInt32:
141 case FieldType.SInt64:
142 case FieldType.SFixed32:
143 case FieldType.SFixed64:
144 case FieldType.Float:
145 case FieldType.Double:
146 case FieldType.UInt32:
147 case FieldType.UInt64:
148 case FieldType.Fixed32:
149 case FieldType.Fixed64:
150 // The simple Object.ToString converts using the current culture.
151 // We want to always use the invariant culture so it's predictable.
152 generator.Print(((IConvertible) value).ToString(CultureInfo.InvariantCulture));
153 break;
154 case FieldType.Bool:
155 // Explicitly use the Java true/false
156 generator.Print((bool) value ? "true" : "false");
157 break;
158
159 case FieldType.String:
160 generator.Print("\"");
161 generator.Print(EscapeText((string) value));
162 generator.Print("\"");
163 break;
164
165 case FieldType.Bytes: {
166 generator.Print("\"");
167 generator.Print(EscapeBytes((ByteString) value));
168 generator.Print("\"");
169 break;
170 }
171
172 case FieldType.Enum: {
173 generator.Print(((EnumValueDescriptor) value).Name);
174 break;
175 }
176
177 case FieldType.Message:
178 case FieldType.Group:
179 Print((IMessage) value, generator);
180 break;
181 }
182 }
183
184 private static void PrintUnknownFields(UnknownFieldSet unknownFields, TextGenerator generator) {
185 foreach (KeyValuePair<int, UnknownField> entry in unknownFields.FieldDictionary) {
186 String prefix = entry.Key.ToString() + ": ";
187 UnknownField field = entry.Value;
188
189 foreach (ulong value in field.VarintList) {
190 generator.Print(prefix);
191 generator.Print(value.ToString());
192 generator.Print("\n");
193 }
194 foreach (uint value in field.Fixed32List) {
195 generator.Print(prefix);
196 generator.Print(string.Format("0x{0:x8}", value));
197 generator.Print("\n");
198 }
199 foreach (ulong value in field.Fixed64List) {
200 generator.Print(prefix);
201 generator.Print(string.Format("0x{0:x16}", value));
202 generator.Print("\n");
203 }
204 foreach (ByteString value in field.LengthDelimitedList) {
205 generator.Print(entry.Key.ToString());
206 generator.Print(": \"");
207 generator.Print(EscapeBytes(value));
208 generator.Print("\"\n");
209 }
210 foreach (UnknownFieldSet value in field.GroupList) {
211 generator.Print(entry.Key.ToString());
212 generator.Print(" {\n");
213 generator.Indent();
214 PrintUnknownFields(value, generator);
215 generator.Outdent();
216 generator.Print("}\n");
217 }
218 }
219 }
220
221 internal static ulong ParseUInt64(string text) {
222 return (ulong) ParseInteger(text, false, true);
223 }
224
225 internal static long ParseInt64(string text) {
226 return ParseInteger(text, true, true);
227 }
228
229 internal static uint ParseUInt32(string text) {
230 return (uint) ParseInteger(text, false, false);
231 }
232
233 internal static int ParseInt32(string text) {
234 return (int) ParseInteger(text, true, false);
235 }
236
Jon Skeetd33bff02009-05-11 19:42:18 +0100237 internal static float ParseFloat(string text) {
238 switch (text) {
239 case "-inf":
240 case "-infinity":
241 case "-inff":
242 case "-infinityf":
243 return float.NegativeInfinity;
244 case "inf":
245 case "infinity":
246 case "inff":
247 case "infinityf":
248 return float.PositiveInfinity;
249 case "nan":
250 case "nanf":
251 return float.NaN;
252 default:
253 return float.Parse(text, CultureInfo.InvariantCulture);
254 }
255 }
256
257 internal static double ParseDouble(string text) {
258 switch (text) {
259 case "-inf":
260 case "-infinity":
261 return double.NegativeInfinity;
262 case "inf":
263 case "infinity":
264 return double.PositiveInfinity;
265 case "nan":
266 return double.NaN;
267 default:
268 return double.Parse(text, CultureInfo.InvariantCulture);
269 }
270 }
271
Jon Skeet68036862008-10-22 13:30:34 +0100272 /// <summary>
273 /// Parses an integer in hex (leading 0x), decimal (no prefix) or octal (leading 0).
274 /// Only a negative sign is permitted, and it must come before the radix indicator.
275 /// </summary>
276 private static long ParseInteger(string text, bool isSigned, bool isLong) {
277 string original = text;
278 bool negative = false;
279 if (text.StartsWith("-")) {
280 if (!isSigned) {
281 throw new FormatException("Number must be positive: " + original);
282 }
283 negative = true;
284 text = text.Substring(1);
285 }
286
287 int radix = 10;
288 if (text.StartsWith("0x")) {
289 radix = 16;
290 text = text.Substring(2);
291 } else if (text.StartsWith("0")) {
292 radix = 8;
293 }
294
295 ulong result;
296 try {
297 // Workaround for https://connect.microsoft.com/VisualStudio/feedback/ViewFeedback.aspx?FeedbackID=278448
298 // We should be able to use Convert.ToUInt64 for all cases.
299 result = radix == 10 ? ulong.Parse(text) : Convert.ToUInt64(text, radix);
300 } catch (OverflowException) {
301 // Convert OverflowException to FormatException so there's a single exception type this method can throw.
302 string numberDescription = string.Format("{0}-bit {1}signed integer", isLong ? 64 : 32, isSigned ? "" : "un");
303 throw new FormatException("Number out of range for " + numberDescription + ": " + original);
304 }
305
306 if (negative) {
307 ulong max = isLong ? 0x8000000000000000UL : 0x80000000L;
308 if (result > max) {
309 string numberDescription = string.Format("{0}-bit signed integer", isLong ? 64 : 32);
310 throw new FormatException("Number out of range for " + numberDescription + ": " + original);
311 }
312 return -((long) result);
313 } else {
314 ulong max = isSigned
315 ? (isLong ? (ulong) long.MaxValue : int.MaxValue)
316 : (isLong ? ulong.MaxValue : uint.MaxValue);
317 if (result > max) {
318 string numberDescription = string.Format("{0}-bit {1}signed integer", isLong ? 64 : 32, isSigned ? "" : "un");
319 throw new FormatException("Number out of range for " + numberDescription + ": " + original);
320 }
321 return (long) result;
322 }
323 }
324
325 /// <summary>
326 /// Tests a character to see if it's an octal digit.
327 /// </summary>
328 private static bool IsOctal(char c) {
329 return '0' <= c && c <= '7';
330 }
331
332 /// <summary>
333 /// Tests a character to see if it's a hex digit.
334 /// </summary>
335 private static bool IsHex(char c) {
336 return ('0' <= c && c <= '9') ||
337 ('a' <= c && c <= 'f') ||
338 ('A' <= c && c <= 'F');
339 }
340
341 /// <summary>
342 /// Interprets a character as a digit (in any base up to 36) and returns the
343 /// numeric value.
344 /// </summary>
345 private static int ParseDigit(char c) {
346 if ('0' <= c && c <= '9') {
347 return c - '0';
348 } else if ('a' <= c && c <= 'z') {
349 return c - 'a' + 10;
350 } else {
351 return c - 'A' + 10;
352 }
353 }
354
355 /// <summary>
356 /// Unescapes a text string as escaped using <see cref="EscapeText(string)" />.
357 /// Two-digit hex escapes (starting with "\x" are also recognised.
358 /// </summary>
359 internal static string UnescapeText(string input) {
360 return UnescapeBytes(input).ToStringUtf8();
361 }
362
363 /// <summary>
364 /// Like <see cref="EscapeBytes" /> but escapes a text string.
365 /// The string is first encoded as UTF-8, then each byte escaped individually.
366 /// The returned value is guaranteed to be entirely ASCII.
367 /// </summary>
368 internal static string EscapeText(string input) {
369 return EscapeBytes(ByteString.CopyFromUtf8(input));
370 }
371
372 /// <summary>
373 /// Escapes bytes in the format used in protocol buffer text format, which
374 /// is the same as the format used for C string literals. All bytes
375 /// that are not printable 7-bit ASCII characters are escaped, as well as
376 /// backslash, single-quote, and double-quote characters. Characters for
377 /// which no defined short-hand escape sequence is defined will be escaped
378 /// using 3-digit octal sequences.
379 /// The returned value is guaranteed to be entirely ASCII.
380 /// </summary>
381 internal static String EscapeBytes(ByteString input) {
382 StringBuilder builder = new StringBuilder(input.Length);
383 foreach (byte b in input) {
384 switch (b) {
385 // C# does not use \a or \v
386 case 0x07: builder.Append("\\a" ); break;
387 case (byte)'\b': builder.Append("\\b" ); break;
388 case (byte)'\f': builder.Append("\\f" ); break;
389 case (byte)'\n': builder.Append("\\n" ); break;
390 case (byte)'\r': builder.Append("\\r" ); break;
391 case (byte)'\t': builder.Append("\\t" ); break;
392 case 0x0b: builder.Append("\\v" ); break;
393 case (byte)'\\': builder.Append("\\\\"); break;
394 case (byte)'\'': builder.Append("\\\'"); break;
395 case (byte)'"' : builder.Append("\\\""); break;
396 default:
397 if (b >= 0x20 && b < 128) {
398 builder.Append((char) b);
399 } else {
400 builder.Append('\\');
401 builder.Append((char) ('0' + ((b >> 6) & 3)));
402 builder.Append((char) ('0' + ((b >> 3) & 7)));
403 builder.Append((char) ('0' + (b & 7)));
404 }
405 break;
406 }
407 }
408 return builder.ToString();
409 }
410
411 /// <summary>
412 /// Performs string unescaping from C style (octal, hex, form feeds, tab etc) into a byte string.
413 /// </summary>
414 internal static ByteString UnescapeBytes(string input) {
415 byte[] result = new byte[input.Length];
416 int pos = 0;
417 for (int i = 0; i < input.Length; i++) {
418 char c = input[i];
419 if (c > 127 || c < 32) {
420 throw new FormatException("Escaped string must only contain ASCII");
421 }
422 if (c != '\\') {
423 result[pos++] = (byte) c;
424 continue;
425 }
426 if (i + 1 >= input.Length) {
427 throw new FormatException("Invalid escape sequence: '\\' at end of string.");
428 }
429
430 i++;
431 c = input[i];
432 if (c >= '0' && c <= '7') {
433 // Octal escape.
434 int code = ParseDigit(c);
435 if (i + 1 < input.Length && IsOctal(input[i+1])) {
436 i++;
437 code = code * 8 + ParseDigit(input[i]);
438 }
439 if (i + 1 < input.Length && IsOctal(input[i+1])) {
440 i++;
441 code = code * 8 + ParseDigit(input[i]);
442 }
443 result[pos++] = (byte) code;
444 } else {
445 switch (c) {
446 case 'a': result[pos++] = 0x07; break;
447 case 'b': result[pos++] = (byte) '\b'; break;
448 case 'f': result[pos++] = (byte) '\f'; break;
449 case 'n': result[pos++] = (byte) '\n'; break;
450 case 'r': result[pos++] = (byte) '\r'; break;
451 case 't': result[pos++] = (byte) '\t'; break;
452 case 'v': result[pos++] = 0x0b; break;
453 case '\\': result[pos++] = (byte) '\\'; break;
454 case '\'': result[pos++] = (byte) '\''; break;
455 case '"': result[pos++] = (byte) '\"'; break;
456
457 case 'x':
458 // hex escape
459 int code;
460 if (i + 1 < input.Length && IsHex(input[i+1])) {
461 i++;
462 code = ParseDigit(input[i]);
463 } else {
464 throw new FormatException("Invalid escape sequence: '\\x' with no digits");
465 }
466 if (i + 1 < input.Length && IsHex(input[i+1])) {
467 ++i;
468 code = code * 16 + ParseDigit(input[i]);
469 }
470 result[pos++] = (byte)code;
471 break;
472
473 default:
474 throw new FormatException("Invalid escape sequence: '\\" + c + "'");
475 }
476 }
477 }
478
479 return ByteString.CopyFrom(result, 0, pos);
480 }
481
482 public static void Merge(string text, IBuilder builder) {
483 Merge(text, ExtensionRegistry.Empty, builder);
484 }
485
486 public static void Merge(TextReader reader, IBuilder builder) {
487 Merge(reader, ExtensionRegistry.Empty, builder);
488 }
489
490 public static void Merge(TextReader reader, ExtensionRegistry registry, IBuilder builder) {
491 Merge(reader.ReadToEnd(), registry, builder);
492 }
493
494 public static void Merge(string text, ExtensionRegistry registry, IBuilder builder) {
495 TextTokenizer tokenizer = new TextTokenizer(text);
496
497 while (!tokenizer.AtEnd) {
498 MergeField(tokenizer, registry, builder);
499 }
500 }
501
502 /// <summary>
503 /// Parses a single field from the specified tokenizer and merges it into
504 /// the builder.
505 /// </summary>
506 private static void MergeField(TextTokenizer tokenizer, ExtensionRegistry extensionRegistry,
507 IBuilder builder) {
508
509 FieldDescriptor field;
510 MessageDescriptor type = builder.DescriptorForType;
511 ExtensionInfo extension = null;
512
513 if (tokenizer.TryConsume("[")) {
514 // An extension.
515 StringBuilder name = new StringBuilder(tokenizer.ConsumeIdentifier());
516 while (tokenizer.TryConsume(".")) {
517 name.Append(".");
518 name.Append(tokenizer.ConsumeIdentifier());
519 }
520
521 extension = extensionRegistry[name.ToString()];
522
523 if (extension == null) {
524 throw tokenizer.CreateFormatExceptionPreviousToken("Extension \"" + name + "\" not found in the ExtensionRegistry.");
525 } else if (extension.Descriptor.ContainingType != type) {
526 throw tokenizer.CreateFormatExceptionPreviousToken("Extension \"" + name + "\" does not extend message type \"" +
527 type.FullName + "\".");
528 }
529
530 tokenizer.Consume("]");
531
532 field = extension.Descriptor;
533 } else {
534 String name = tokenizer.ConsumeIdentifier();
535 field = type.FindDescriptor<FieldDescriptor>(name);
536
537 // Group names are expected to be capitalized as they appear in the
538 // .proto file, which actually matches their type names, not their field
539 // names.
540 if (field == null) {
541 // Explicitly specify the invariant culture so that this code does not break when
542 // executing in Turkey.
Jon Skeet60fb63e2009-06-20 20:46:28 +0100543 String lowerName = name.ToLower(CultureInfo.InvariantCulture);
Jon Skeet68036862008-10-22 13:30:34 +0100544 field = type.FindDescriptor<FieldDescriptor>(lowerName);
545 // If the case-insensitive match worked but the field is NOT a group,
546 // TODO(jonskeet): What? Java comment ends here!
547 if (field != null && field.FieldType != FieldType.Group) {
548 field = null;
549 }
550 }
551 // Again, special-case group names as described above.
552 if (field != null && field.FieldType == FieldType.Group && field.MessageType.Name != name) {
553 field = null;
554 }
555
556 if (field == null) {
557 throw tokenizer.CreateFormatExceptionPreviousToken(
558 "Message type \"" + type.FullName + "\" has no field named \"" + name + "\".");
559 }
560 }
561
562 object value = null;
563
564 if (field.MappedType == MappedType.Message) {
565 tokenizer.TryConsume(":"); // optional
566
567 String endToken;
568 if (tokenizer.TryConsume("<")) {
569 endToken = ">";
570 } else {
571 tokenizer.Consume("{");
572 endToken = "}";
573 }
574
575 IBuilder subBuilder;
576 if (extension == null) {
577 subBuilder = builder.CreateBuilderForField(field);
578 } else {
579 subBuilder = extension.DefaultInstance.WeakCreateBuilderForType();
580 }
581
582 while (!tokenizer.TryConsume(endToken)) {
583 if (tokenizer.AtEnd) {
584 throw tokenizer.CreateFormatException("Expected \"" + endToken + "\".");
585 }
586 MergeField(tokenizer, extensionRegistry, subBuilder);
587 }
588
589 value = subBuilder.WeakBuild();
590
591 } else {
592 tokenizer.Consume(":");
593
594 switch (field.FieldType) {
595 case FieldType.Int32:
596 case FieldType.SInt32:
597 case FieldType.SFixed32:
598 value = tokenizer.ConsumeInt32();
599 break;
600
601 case FieldType.Int64:
602 case FieldType.SInt64:
603 case FieldType.SFixed64:
604 value = tokenizer.ConsumeInt64();
605 break;
606
607 case FieldType.UInt32:
608 case FieldType.Fixed32:
609 value = tokenizer.ConsumeUInt32();
610 break;
611
612 case FieldType.UInt64:
613 case FieldType.Fixed64:
614 value = tokenizer.ConsumeUInt64();
615 break;
616
617 case FieldType.Float:
618 value = tokenizer.ConsumeFloat();
619 break;
620
621 case FieldType.Double:
622 value = tokenizer.ConsumeDouble();
623 break;
624
625 case FieldType.Bool:
626 value = tokenizer.ConsumeBoolean();
627 break;
628
629 case FieldType.String:
630 value = tokenizer.ConsumeString();
631 break;
632
633 case FieldType.Bytes:
634 value = tokenizer.ConsumeByteString();
635 break;
636
637 case FieldType.Enum: {
638 EnumDescriptor enumType = field.EnumType;
639
640 if (tokenizer.LookingAtInteger()) {
641 int number = tokenizer.ConsumeInt32();
642 value = enumType.FindValueByNumber(number);
643 if (value == null) {
644 throw tokenizer.CreateFormatExceptionPreviousToken(
645 "Enum type \"" + enumType.FullName +
646 "\" has no value with number " + number + ".");
647 }
648 } else {
649 String id = tokenizer.ConsumeIdentifier();
650 value = enumType.FindValueByName(id);
651 if (value == null) {
652 throw tokenizer.CreateFormatExceptionPreviousToken(
653 "Enum type \"" + enumType.FullName +
654 "\" has no value named \"" + id + "\".");
655 }
656 }
657
658 break;
659 }
660
661 case FieldType.Message:
662 case FieldType.Group:
663 throw new InvalidOperationException("Can't get here.");
664 }
665 }
666
667 if (field.IsRepeated) {
668 builder.WeakAddRepeatedField(field, value);
669 } else {
670 builder.SetField(field, value);
671 }
672 }
673 }
674}