blob: 537f43ae175356f991f3f464788aa41eef4894e8 [file] [log] [blame]
Jon Skeet60c059b2008-10-23 21:17:56 +01001// Protocol Buffers - Google's data interchange format
2// Copyright 2008 Google Inc. All rights reserved.
3// http://github.com/jskeet/dotnet-protobufs/
4// Original C++/Java/Python code:
Jon Skeet68036862008-10-22 13:30:34 +01005// http://code.google.com/p/protobuf/
6//
Jon Skeet60c059b2008-10-23 21:17:56 +01007// Redistribution and use in source and binary forms, with or without
8// modification, are permitted provided that the following conditions are
9// met:
Jon Skeet68036862008-10-22 13:30:34 +010010//
Jon Skeet60c059b2008-10-23 21:17:56 +010011// * Redistributions of source code must retain the above copyright
12// notice, this list of conditions and the following disclaimer.
13// * Redistributions in binary form must reproduce the above
14// copyright notice, this list of conditions and the following disclaimer
15// in the documentation and/or other materials provided with the
16// distribution.
17// * Neither the name of Google Inc. nor the names of its
18// contributors may be used to endorse or promote products derived from
19// this software without specific prior written permission.
Jon Skeet68036862008-10-22 13:30:34 +010020//
Jon Skeet60c059b2008-10-23 21:17:56 +010021// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Jon Skeet68036862008-10-22 13:30:34 +010032using System;
33using System.Collections.Generic;
34using System.Globalization;
35using System.IO;
36using System.Text;
37using Google.ProtocolBuffers.Descriptors;
38using System.Collections;
39
40namespace Google.ProtocolBuffers {
41 /// <summary>
42 /// Provides ASCII text formatting support for messages.
43 /// TODO(jonskeet): Parsing support.
44 /// </summary>
45 public static class TextFormat {
46
47 /// <summary>
48 /// Outputs a textual representation of the Protocol Message supplied into
49 /// the parameter output.
50 /// </summary>
51 public static void Print(IMessage message, TextWriter output) {
52 TextGenerator generator = new TextGenerator(output);
53 Print(message, generator);
54 }
55
56 /// <summary>
57 /// Outputs a textual representation of <paramref name="fields" /> to <paramref name="output"/>.
58 /// </summary>
59 public static void Print(UnknownFieldSet fields, TextWriter output) {
60 TextGenerator generator = new TextGenerator(output);
61 PrintUnknownFields(fields, generator);
62 }
63
64 public static string PrintToString(IMessage message) {
65 StringWriter text = new StringWriter();
66 Print(message, text);
67 return text.ToString();
68 }
69
70 public static string PrintToString(UnknownFieldSet fields) {
71 StringWriter text = new StringWriter();
72 Print(fields, text);
73 return text.ToString();
74 }
75
76 private static void Print(IMessage message, TextGenerator generator) {
77 foreach (KeyValuePair<FieldDescriptor, object> entry in message.AllFields) {
78 PrintField(entry.Key, entry.Value, generator);
79 }
80 PrintUnknownFields(message.UnknownFields, generator);
81 }
82
83 internal static void PrintField(FieldDescriptor field, object value, TextGenerator generator) {
84 if (field.IsRepeated) {
85 // Repeated field. Print each element.
86 foreach (object element in (IEnumerable) value) {
87 PrintSingleField(field, element, generator);
88 }
89 } else {
90 PrintSingleField(field, value, generator);
91 }
92 }
93
94 private static void PrintSingleField(FieldDescriptor field, Object value, TextGenerator generator) {
95 if (field.IsExtension) {
96 generator.Print("[");
97 // We special-case MessageSet elements for compatibility with proto1.
98 if (field.ContainingType.Options.MessageSetWireFormat
99 && field.FieldType == FieldType.Message
100 && field.IsOptional
101 // object equality (TODO(jonskeet): Work out what this comment means!)
102 && field.ExtensionScope == field.MessageType) {
103 generator.Print(field.MessageType.FullName);
104 } else {
105 generator.Print(field.FullName);
106 }
107 generator.Print("]");
108 } else {
109 if (field.FieldType == FieldType.Group) {
110 // Groups must be serialized with their original capitalization.
111 generator.Print(field.MessageType.Name);
112 } else {
113 generator.Print(field.Name);
114 }
115 }
116
117 if (field.MappedType == MappedType.Message) {
118 generator.Print(" {\n");
119 generator.Indent();
120 } else {
121 generator.Print(": ");
122 }
123
124 PrintFieldValue(field, value, generator);
125
126 if (field.MappedType == MappedType.Message) {
127 generator.Outdent();
128 generator.Print("}");
129 }
130 generator.Print("\n");
131 }
132
133 private static void PrintFieldValue(FieldDescriptor field, object value, TextGenerator generator) {
134 switch (field.FieldType) {
135 case FieldType.Int32:
136 case FieldType.Int64:
137 case FieldType.SInt32:
138 case FieldType.SInt64:
139 case FieldType.SFixed32:
140 case FieldType.SFixed64:
141 case FieldType.Float:
142 case FieldType.Double:
143 case FieldType.UInt32:
144 case FieldType.UInt64:
145 case FieldType.Fixed32:
146 case FieldType.Fixed64:
147 // The simple Object.ToString converts using the current culture.
148 // We want to always use the invariant culture so it's predictable.
149 generator.Print(((IConvertible) value).ToString(CultureInfo.InvariantCulture));
150 break;
151 case FieldType.Bool:
152 // Explicitly use the Java true/false
153 generator.Print((bool) value ? "true" : "false");
154 break;
155
156 case FieldType.String:
157 generator.Print("\"");
158 generator.Print(EscapeText((string) value));
159 generator.Print("\"");
160 break;
161
162 case FieldType.Bytes: {
163 generator.Print("\"");
164 generator.Print(EscapeBytes((ByteString) value));
165 generator.Print("\"");
166 break;
167 }
168
169 case FieldType.Enum: {
170 generator.Print(((EnumValueDescriptor) value).Name);
171 break;
172 }
173
174 case FieldType.Message:
175 case FieldType.Group:
176 Print((IMessage) value, generator);
177 break;
178 }
179 }
180
181 private static void PrintUnknownFields(UnknownFieldSet unknownFields, TextGenerator generator) {
182 foreach (KeyValuePair<int, UnknownField> entry in unknownFields.FieldDictionary) {
183 String prefix = entry.Key.ToString() + ": ";
184 UnknownField field = entry.Value;
185
186 foreach (ulong value in field.VarintList) {
187 generator.Print(prefix);
188 generator.Print(value.ToString());
189 generator.Print("\n");
190 }
191 foreach (uint value in field.Fixed32List) {
192 generator.Print(prefix);
193 generator.Print(string.Format("0x{0:x8}", value));
194 generator.Print("\n");
195 }
196 foreach (ulong value in field.Fixed64List) {
197 generator.Print(prefix);
198 generator.Print(string.Format("0x{0:x16}", value));
199 generator.Print("\n");
200 }
201 foreach (ByteString value in field.LengthDelimitedList) {
202 generator.Print(entry.Key.ToString());
203 generator.Print(": \"");
204 generator.Print(EscapeBytes(value));
205 generator.Print("\"\n");
206 }
207 foreach (UnknownFieldSet value in field.GroupList) {
208 generator.Print(entry.Key.ToString());
209 generator.Print(" {\n");
210 generator.Indent();
211 PrintUnknownFields(value, generator);
212 generator.Outdent();
213 generator.Print("}\n");
214 }
215 }
216 }
217
218 internal static ulong ParseUInt64(string text) {
219 return (ulong) ParseInteger(text, false, true);
220 }
221
222 internal static long ParseInt64(string text) {
223 return ParseInteger(text, true, true);
224 }
225
226 internal static uint ParseUInt32(string text) {
227 return (uint) ParseInteger(text, false, false);
228 }
229
230 internal static int ParseInt32(string text) {
231 return (int) ParseInteger(text, true, false);
232 }
233
234 /// <summary>
235 /// Parses an integer in hex (leading 0x), decimal (no prefix) or octal (leading 0).
236 /// Only a negative sign is permitted, and it must come before the radix indicator.
237 /// </summary>
238 private static long ParseInteger(string text, bool isSigned, bool isLong) {
239 string original = text;
240 bool negative = false;
241 if (text.StartsWith("-")) {
242 if (!isSigned) {
243 throw new FormatException("Number must be positive: " + original);
244 }
245 negative = true;
246 text = text.Substring(1);
247 }
248
249 int radix = 10;
250 if (text.StartsWith("0x")) {
251 radix = 16;
252 text = text.Substring(2);
253 } else if (text.StartsWith("0")) {
254 radix = 8;
255 }
256
257 ulong result;
258 try {
259 // Workaround for https://connect.microsoft.com/VisualStudio/feedback/ViewFeedback.aspx?FeedbackID=278448
260 // We should be able to use Convert.ToUInt64 for all cases.
261 result = radix == 10 ? ulong.Parse(text) : Convert.ToUInt64(text, radix);
262 } catch (OverflowException) {
263 // Convert OverflowException to FormatException so there's a single exception type this method can throw.
264 string numberDescription = string.Format("{0}-bit {1}signed integer", isLong ? 64 : 32, isSigned ? "" : "un");
265 throw new FormatException("Number out of range for " + numberDescription + ": " + original);
266 }
267
268 if (negative) {
269 ulong max = isLong ? 0x8000000000000000UL : 0x80000000L;
270 if (result > max) {
271 string numberDescription = string.Format("{0}-bit signed integer", isLong ? 64 : 32);
272 throw new FormatException("Number out of range for " + numberDescription + ": " + original);
273 }
274 return -((long) result);
275 } else {
276 ulong max = isSigned
277 ? (isLong ? (ulong) long.MaxValue : int.MaxValue)
278 : (isLong ? ulong.MaxValue : uint.MaxValue);
279 if (result > max) {
280 string numberDescription = string.Format("{0}-bit {1}signed integer", isLong ? 64 : 32, isSigned ? "" : "un");
281 throw new FormatException("Number out of range for " + numberDescription + ": " + original);
282 }
283 return (long) result;
284 }
285 }
286
287 /// <summary>
288 /// Tests a character to see if it's an octal digit.
289 /// </summary>
290 private static bool IsOctal(char c) {
291 return '0' <= c && c <= '7';
292 }
293
294 /// <summary>
295 /// Tests a character to see if it's a hex digit.
296 /// </summary>
297 private static bool IsHex(char c) {
298 return ('0' <= c && c <= '9') ||
299 ('a' <= c && c <= 'f') ||
300 ('A' <= c && c <= 'F');
301 }
302
303 /// <summary>
304 /// Interprets a character as a digit (in any base up to 36) and returns the
305 /// numeric value.
306 /// </summary>
307 private static int ParseDigit(char c) {
308 if ('0' <= c && c <= '9') {
309 return c - '0';
310 } else if ('a' <= c && c <= 'z') {
311 return c - 'a' + 10;
312 } else {
313 return c - 'A' + 10;
314 }
315 }
316
317 /// <summary>
318 /// Unescapes a text string as escaped using <see cref="EscapeText(string)" />.
319 /// Two-digit hex escapes (starting with "\x" are also recognised.
320 /// </summary>
321 internal static string UnescapeText(string input) {
322 return UnescapeBytes(input).ToStringUtf8();
323 }
324
325 /// <summary>
326 /// Like <see cref="EscapeBytes" /> but escapes a text string.
327 /// The string is first encoded as UTF-8, then each byte escaped individually.
328 /// The returned value is guaranteed to be entirely ASCII.
329 /// </summary>
330 internal static string EscapeText(string input) {
331 return EscapeBytes(ByteString.CopyFromUtf8(input));
332 }
333
334 /// <summary>
335 /// Escapes bytes in the format used in protocol buffer text format, which
336 /// is the same as the format used for C string literals. All bytes
337 /// that are not printable 7-bit ASCII characters are escaped, as well as
338 /// backslash, single-quote, and double-quote characters. Characters for
339 /// which no defined short-hand escape sequence is defined will be escaped
340 /// using 3-digit octal sequences.
341 /// The returned value is guaranteed to be entirely ASCII.
342 /// </summary>
343 internal static String EscapeBytes(ByteString input) {
344 StringBuilder builder = new StringBuilder(input.Length);
345 foreach (byte b in input) {
346 switch (b) {
347 // C# does not use \a or \v
348 case 0x07: builder.Append("\\a" ); break;
349 case (byte)'\b': builder.Append("\\b" ); break;
350 case (byte)'\f': builder.Append("\\f" ); break;
351 case (byte)'\n': builder.Append("\\n" ); break;
352 case (byte)'\r': builder.Append("\\r" ); break;
353 case (byte)'\t': builder.Append("\\t" ); break;
354 case 0x0b: builder.Append("\\v" ); break;
355 case (byte)'\\': builder.Append("\\\\"); break;
356 case (byte)'\'': builder.Append("\\\'"); break;
357 case (byte)'"' : builder.Append("\\\""); break;
358 default:
359 if (b >= 0x20 && b < 128) {
360 builder.Append((char) b);
361 } else {
362 builder.Append('\\');
363 builder.Append((char) ('0' + ((b >> 6) & 3)));
364 builder.Append((char) ('0' + ((b >> 3) & 7)));
365 builder.Append((char) ('0' + (b & 7)));
366 }
367 break;
368 }
369 }
370 return builder.ToString();
371 }
372
373 /// <summary>
374 /// Performs string unescaping from C style (octal, hex, form feeds, tab etc) into a byte string.
375 /// </summary>
376 internal static ByteString UnescapeBytes(string input) {
377 byte[] result = new byte[input.Length];
378 int pos = 0;
379 for (int i = 0; i < input.Length; i++) {
380 char c = input[i];
381 if (c > 127 || c < 32) {
382 throw new FormatException("Escaped string must only contain ASCII");
383 }
384 if (c != '\\') {
385 result[pos++] = (byte) c;
386 continue;
387 }
388 if (i + 1 >= input.Length) {
389 throw new FormatException("Invalid escape sequence: '\\' at end of string.");
390 }
391
392 i++;
393 c = input[i];
394 if (c >= '0' && c <= '7') {
395 // Octal escape.
396 int code = ParseDigit(c);
397 if (i + 1 < input.Length && IsOctal(input[i+1])) {
398 i++;
399 code = code * 8 + ParseDigit(input[i]);
400 }
401 if (i + 1 < input.Length && IsOctal(input[i+1])) {
402 i++;
403 code = code * 8 + ParseDigit(input[i]);
404 }
405 result[pos++] = (byte) code;
406 } else {
407 switch (c) {
408 case 'a': result[pos++] = 0x07; break;
409 case 'b': result[pos++] = (byte) '\b'; break;
410 case 'f': result[pos++] = (byte) '\f'; break;
411 case 'n': result[pos++] = (byte) '\n'; break;
412 case 'r': result[pos++] = (byte) '\r'; break;
413 case 't': result[pos++] = (byte) '\t'; break;
414 case 'v': result[pos++] = 0x0b; break;
415 case '\\': result[pos++] = (byte) '\\'; break;
416 case '\'': result[pos++] = (byte) '\''; break;
417 case '"': result[pos++] = (byte) '\"'; break;
418
419 case 'x':
420 // hex escape
421 int code;
422 if (i + 1 < input.Length && IsHex(input[i+1])) {
423 i++;
424 code = ParseDigit(input[i]);
425 } else {
426 throw new FormatException("Invalid escape sequence: '\\x' with no digits");
427 }
428 if (i + 1 < input.Length && IsHex(input[i+1])) {
429 ++i;
430 code = code * 16 + ParseDigit(input[i]);
431 }
432 result[pos++] = (byte)code;
433 break;
434
435 default:
436 throw new FormatException("Invalid escape sequence: '\\" + c + "'");
437 }
438 }
439 }
440
441 return ByteString.CopyFrom(result, 0, pos);
442 }
443
444 public static void Merge(string text, IBuilder builder) {
445 Merge(text, ExtensionRegistry.Empty, builder);
446 }
447
448 public static void Merge(TextReader reader, IBuilder builder) {
449 Merge(reader, ExtensionRegistry.Empty, builder);
450 }
451
452 public static void Merge(TextReader reader, ExtensionRegistry registry, IBuilder builder) {
453 Merge(reader.ReadToEnd(), registry, builder);
454 }
455
456 public static void Merge(string text, ExtensionRegistry registry, IBuilder builder) {
457 TextTokenizer tokenizer = new TextTokenizer(text);
458
459 while (!tokenizer.AtEnd) {
460 MergeField(tokenizer, registry, builder);
461 }
462 }
463
464 /// <summary>
465 /// Parses a single field from the specified tokenizer and merges it into
466 /// the builder.
467 /// </summary>
468 private static void MergeField(TextTokenizer tokenizer, ExtensionRegistry extensionRegistry,
469 IBuilder builder) {
470
471 FieldDescriptor field;
472 MessageDescriptor type = builder.DescriptorForType;
473 ExtensionInfo extension = null;
474
475 if (tokenizer.TryConsume("[")) {
476 // An extension.
477 StringBuilder name = new StringBuilder(tokenizer.ConsumeIdentifier());
478 while (tokenizer.TryConsume(".")) {
479 name.Append(".");
480 name.Append(tokenizer.ConsumeIdentifier());
481 }
482
483 extension = extensionRegistry[name.ToString()];
484
485 if (extension == null) {
486 throw tokenizer.CreateFormatExceptionPreviousToken("Extension \"" + name + "\" not found in the ExtensionRegistry.");
487 } else if (extension.Descriptor.ContainingType != type) {
488 throw tokenizer.CreateFormatExceptionPreviousToken("Extension \"" + name + "\" does not extend message type \"" +
489 type.FullName + "\".");
490 }
491
492 tokenizer.Consume("]");
493
494 field = extension.Descriptor;
495 } else {
496 String name = tokenizer.ConsumeIdentifier();
497 field = type.FindDescriptor<FieldDescriptor>(name);
498
499 // Group names are expected to be capitalized as they appear in the
500 // .proto file, which actually matches their type names, not their field
501 // names.
502 if (field == null) {
503 // Explicitly specify the invariant culture so that this code does not break when
504 // executing in Turkey.
505 String lowerName = name.ToLowerInvariant();
506 field = type.FindDescriptor<FieldDescriptor>(lowerName);
507 // If the case-insensitive match worked but the field is NOT a group,
508 // TODO(jonskeet): What? Java comment ends here!
509 if (field != null && field.FieldType != FieldType.Group) {
510 field = null;
511 }
512 }
513 // Again, special-case group names as described above.
514 if (field != null && field.FieldType == FieldType.Group && field.MessageType.Name != name) {
515 field = null;
516 }
517
518 if (field == null) {
519 throw tokenizer.CreateFormatExceptionPreviousToken(
520 "Message type \"" + type.FullName + "\" has no field named \"" + name + "\".");
521 }
522 }
523
524 object value = null;
525
526 if (field.MappedType == MappedType.Message) {
527 tokenizer.TryConsume(":"); // optional
528
529 String endToken;
530 if (tokenizer.TryConsume("<")) {
531 endToken = ">";
532 } else {
533 tokenizer.Consume("{");
534 endToken = "}";
535 }
536
537 IBuilder subBuilder;
538 if (extension == null) {
539 subBuilder = builder.CreateBuilderForField(field);
540 } else {
541 subBuilder = extension.DefaultInstance.WeakCreateBuilderForType();
542 }
543
544 while (!tokenizer.TryConsume(endToken)) {
545 if (tokenizer.AtEnd) {
546 throw tokenizer.CreateFormatException("Expected \"" + endToken + "\".");
547 }
548 MergeField(tokenizer, extensionRegistry, subBuilder);
549 }
550
551 value = subBuilder.WeakBuild();
552
553 } else {
554 tokenizer.Consume(":");
555
556 switch (field.FieldType) {
557 case FieldType.Int32:
558 case FieldType.SInt32:
559 case FieldType.SFixed32:
560 value = tokenizer.ConsumeInt32();
561 break;
562
563 case FieldType.Int64:
564 case FieldType.SInt64:
565 case FieldType.SFixed64:
566 value = tokenizer.ConsumeInt64();
567 break;
568
569 case FieldType.UInt32:
570 case FieldType.Fixed32:
571 value = tokenizer.ConsumeUInt32();
572 break;
573
574 case FieldType.UInt64:
575 case FieldType.Fixed64:
576 value = tokenizer.ConsumeUInt64();
577 break;
578
579 case FieldType.Float:
580 value = tokenizer.ConsumeFloat();
581 break;
582
583 case FieldType.Double:
584 value = tokenizer.ConsumeDouble();
585 break;
586
587 case FieldType.Bool:
588 value = tokenizer.ConsumeBoolean();
589 break;
590
591 case FieldType.String:
592 value = tokenizer.ConsumeString();
593 break;
594
595 case FieldType.Bytes:
596 value = tokenizer.ConsumeByteString();
597 break;
598
599 case FieldType.Enum: {
600 EnumDescriptor enumType = field.EnumType;
601
602 if (tokenizer.LookingAtInteger()) {
603 int number = tokenizer.ConsumeInt32();
604 value = enumType.FindValueByNumber(number);
605 if (value == null) {
606 throw tokenizer.CreateFormatExceptionPreviousToken(
607 "Enum type \"" + enumType.FullName +
608 "\" has no value with number " + number + ".");
609 }
610 } else {
611 String id = tokenizer.ConsumeIdentifier();
612 value = enumType.FindValueByName(id);
613 if (value == null) {
614 throw tokenizer.CreateFormatExceptionPreviousToken(
615 "Enum type \"" + enumType.FullName +
616 "\" has no value named \"" + id + "\".");
617 }
618 }
619
620 break;
621 }
622
623 case FieldType.Message:
624 case FieldType.Group:
625 throw new InvalidOperationException("Can't get here.");
626 }
627 }
628
629 if (field.IsRepeated) {
630 builder.WeakAddRepeatedField(field, value);
631 } else {
632 builder.SetField(field, value);
633 }
634 }
635 }
636}