blob: d487bd61f2bfca3f5b4ddff6ca0b7b090cbe1976 [file] [log] [blame]
Jon Skeet68036862008-10-22 13:30:34 +01001// Protocol Buffers - Google's data interchange format
2// Copyright 2008 Google Inc.
3// http://code.google.com/p/protobuf/
4//
5// Licensed under the Apache License, Version 2.0 (the "License");
6// you may not use this file except in compliance with the License.
7// You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS,
13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14// See the License for the specific language governing permissions and
15// limitations under the License.
16using System;
17using System.Collections.Generic;
18using System.Globalization;
19using System.IO;
20using System.Text;
21using Google.ProtocolBuffers.Descriptors;
22using System.Collections;
23
24namespace Google.ProtocolBuffers {
25 /// <summary>
26 /// Provides ASCII text formatting support for messages.
27 /// TODO(jonskeet): Parsing support.
28 /// </summary>
29 public static class TextFormat {
30
31 /// <summary>
32 /// Outputs a textual representation of the Protocol Message supplied into
33 /// the parameter output.
34 /// </summary>
35 public static void Print(IMessage message, TextWriter output) {
36 TextGenerator generator = new TextGenerator(output);
37 Print(message, generator);
38 }
39
40 /// <summary>
41 /// Outputs a textual representation of <paramref name="fields" /> to <paramref name="output"/>.
42 /// </summary>
43 public static void Print(UnknownFieldSet fields, TextWriter output) {
44 TextGenerator generator = new TextGenerator(output);
45 PrintUnknownFields(fields, generator);
46 }
47
48 public static string PrintToString(IMessage message) {
49 StringWriter text = new StringWriter();
50 Print(message, text);
51 return text.ToString();
52 }
53
54 public static string PrintToString(UnknownFieldSet fields) {
55 StringWriter text = new StringWriter();
56 Print(fields, text);
57 return text.ToString();
58 }
59
60 private static void Print(IMessage message, TextGenerator generator) {
61 foreach (KeyValuePair<FieldDescriptor, object> entry in message.AllFields) {
62 PrintField(entry.Key, entry.Value, generator);
63 }
64 PrintUnknownFields(message.UnknownFields, generator);
65 }
66
67 internal static void PrintField(FieldDescriptor field, object value, TextGenerator generator) {
68 if (field.IsRepeated) {
69 // Repeated field. Print each element.
70 foreach (object element in (IEnumerable) value) {
71 PrintSingleField(field, element, generator);
72 }
73 } else {
74 PrintSingleField(field, value, generator);
75 }
76 }
77
78 private static void PrintSingleField(FieldDescriptor field, Object value, TextGenerator generator) {
79 if (field.IsExtension) {
80 generator.Print("[");
81 // We special-case MessageSet elements for compatibility with proto1.
82 if (field.ContainingType.Options.MessageSetWireFormat
83 && field.FieldType == FieldType.Message
84 && field.IsOptional
85 // object equality (TODO(jonskeet): Work out what this comment means!)
86 && field.ExtensionScope == field.MessageType) {
87 generator.Print(field.MessageType.FullName);
88 } else {
89 generator.Print(field.FullName);
90 }
91 generator.Print("]");
92 } else {
93 if (field.FieldType == FieldType.Group) {
94 // Groups must be serialized with their original capitalization.
95 generator.Print(field.MessageType.Name);
96 } else {
97 generator.Print(field.Name);
98 }
99 }
100
101 if (field.MappedType == MappedType.Message) {
102 generator.Print(" {\n");
103 generator.Indent();
104 } else {
105 generator.Print(": ");
106 }
107
108 PrintFieldValue(field, value, generator);
109
110 if (field.MappedType == MappedType.Message) {
111 generator.Outdent();
112 generator.Print("}");
113 }
114 generator.Print("\n");
115 }
116
117 private static void PrintFieldValue(FieldDescriptor field, object value, TextGenerator generator) {
118 switch (field.FieldType) {
119 case FieldType.Int32:
120 case FieldType.Int64:
121 case FieldType.SInt32:
122 case FieldType.SInt64:
123 case FieldType.SFixed32:
124 case FieldType.SFixed64:
125 case FieldType.Float:
126 case FieldType.Double:
127 case FieldType.UInt32:
128 case FieldType.UInt64:
129 case FieldType.Fixed32:
130 case FieldType.Fixed64:
131 // The simple Object.ToString converts using the current culture.
132 // We want to always use the invariant culture so it's predictable.
133 generator.Print(((IConvertible) value).ToString(CultureInfo.InvariantCulture));
134 break;
135 case FieldType.Bool:
136 // Explicitly use the Java true/false
137 generator.Print((bool) value ? "true" : "false");
138 break;
139
140 case FieldType.String:
141 generator.Print("\"");
142 generator.Print(EscapeText((string) value));
143 generator.Print("\"");
144 break;
145
146 case FieldType.Bytes: {
147 generator.Print("\"");
148 generator.Print(EscapeBytes((ByteString) value));
149 generator.Print("\"");
150 break;
151 }
152
153 case FieldType.Enum: {
154 generator.Print(((EnumValueDescriptor) value).Name);
155 break;
156 }
157
158 case FieldType.Message:
159 case FieldType.Group:
160 Print((IMessage) value, generator);
161 break;
162 }
163 }
164
165 private static void PrintUnknownFields(UnknownFieldSet unknownFields, TextGenerator generator) {
166 foreach (KeyValuePair<int, UnknownField> entry in unknownFields.FieldDictionary) {
167 String prefix = entry.Key.ToString() + ": ";
168 UnknownField field = entry.Value;
169
170 foreach (ulong value in field.VarintList) {
171 generator.Print(prefix);
172 generator.Print(value.ToString());
173 generator.Print("\n");
174 }
175 foreach (uint value in field.Fixed32List) {
176 generator.Print(prefix);
177 generator.Print(string.Format("0x{0:x8}", value));
178 generator.Print("\n");
179 }
180 foreach (ulong value in field.Fixed64List) {
181 generator.Print(prefix);
182 generator.Print(string.Format("0x{0:x16}", value));
183 generator.Print("\n");
184 }
185 foreach (ByteString value in field.LengthDelimitedList) {
186 generator.Print(entry.Key.ToString());
187 generator.Print(": \"");
188 generator.Print(EscapeBytes(value));
189 generator.Print("\"\n");
190 }
191 foreach (UnknownFieldSet value in field.GroupList) {
192 generator.Print(entry.Key.ToString());
193 generator.Print(" {\n");
194 generator.Indent();
195 PrintUnknownFields(value, generator);
196 generator.Outdent();
197 generator.Print("}\n");
198 }
199 }
200 }
201
202 internal static ulong ParseUInt64(string text) {
203 return (ulong) ParseInteger(text, false, true);
204 }
205
206 internal static long ParseInt64(string text) {
207 return ParseInteger(text, true, true);
208 }
209
210 internal static uint ParseUInt32(string text) {
211 return (uint) ParseInteger(text, false, false);
212 }
213
214 internal static int ParseInt32(string text) {
215 return (int) ParseInteger(text, true, false);
216 }
217
218 /// <summary>
219 /// Parses an integer in hex (leading 0x), decimal (no prefix) or octal (leading 0).
220 /// Only a negative sign is permitted, and it must come before the radix indicator.
221 /// </summary>
222 private static long ParseInteger(string text, bool isSigned, bool isLong) {
223 string original = text;
224 bool negative = false;
225 if (text.StartsWith("-")) {
226 if (!isSigned) {
227 throw new FormatException("Number must be positive: " + original);
228 }
229 negative = true;
230 text = text.Substring(1);
231 }
232
233 int radix = 10;
234 if (text.StartsWith("0x")) {
235 radix = 16;
236 text = text.Substring(2);
237 } else if (text.StartsWith("0")) {
238 radix = 8;
239 }
240
241 ulong result;
242 try {
243 // Workaround for https://connect.microsoft.com/VisualStudio/feedback/ViewFeedback.aspx?FeedbackID=278448
244 // We should be able to use Convert.ToUInt64 for all cases.
245 result = radix == 10 ? ulong.Parse(text) : Convert.ToUInt64(text, radix);
246 } catch (OverflowException) {
247 // Convert OverflowException to FormatException so there's a single exception type this method can throw.
248 string numberDescription = string.Format("{0}-bit {1}signed integer", isLong ? 64 : 32, isSigned ? "" : "un");
249 throw new FormatException("Number out of range for " + numberDescription + ": " + original);
250 }
251
252 if (negative) {
253 ulong max = isLong ? 0x8000000000000000UL : 0x80000000L;
254 if (result > max) {
255 string numberDescription = string.Format("{0}-bit signed integer", isLong ? 64 : 32);
256 throw new FormatException("Number out of range for " + numberDescription + ": " + original);
257 }
258 return -((long) result);
259 } else {
260 ulong max = isSigned
261 ? (isLong ? (ulong) long.MaxValue : int.MaxValue)
262 : (isLong ? ulong.MaxValue : uint.MaxValue);
263 if (result > max) {
264 string numberDescription = string.Format("{0}-bit {1}signed integer", isLong ? 64 : 32, isSigned ? "" : "un");
265 throw new FormatException("Number out of range for " + numberDescription + ": " + original);
266 }
267 return (long) result;
268 }
269 }
270
271 /// <summary>
272 /// Tests a character to see if it's an octal digit.
273 /// </summary>
274 private static bool IsOctal(char c) {
275 return '0' <= c && c <= '7';
276 }
277
278 /// <summary>
279 /// Tests a character to see if it's a hex digit.
280 /// </summary>
281 private static bool IsHex(char c) {
282 return ('0' <= c && c <= '9') ||
283 ('a' <= c && c <= 'f') ||
284 ('A' <= c && c <= 'F');
285 }
286
287 /// <summary>
288 /// Interprets a character as a digit (in any base up to 36) and returns the
289 /// numeric value.
290 /// </summary>
291 private static int ParseDigit(char c) {
292 if ('0' <= c && c <= '9') {
293 return c - '0';
294 } else if ('a' <= c && c <= 'z') {
295 return c - 'a' + 10;
296 } else {
297 return c - 'A' + 10;
298 }
299 }
300
301 /// <summary>
302 /// Unescapes a text string as escaped using <see cref="EscapeText(string)" />.
303 /// Two-digit hex escapes (starting with "\x" are also recognised.
304 /// </summary>
305 internal static string UnescapeText(string input) {
306 return UnescapeBytes(input).ToStringUtf8();
307 }
308
309 /// <summary>
310 /// Like <see cref="EscapeBytes" /> but escapes a text string.
311 /// The string is first encoded as UTF-8, then each byte escaped individually.
312 /// The returned value is guaranteed to be entirely ASCII.
313 /// </summary>
314 internal static string EscapeText(string input) {
315 return EscapeBytes(ByteString.CopyFromUtf8(input));
316 }
317
318 /// <summary>
319 /// Escapes bytes in the format used in protocol buffer text format, which
320 /// is the same as the format used for C string literals. All bytes
321 /// that are not printable 7-bit ASCII characters are escaped, as well as
322 /// backslash, single-quote, and double-quote characters. Characters for
323 /// which no defined short-hand escape sequence is defined will be escaped
324 /// using 3-digit octal sequences.
325 /// The returned value is guaranteed to be entirely ASCII.
326 /// </summary>
327 internal static String EscapeBytes(ByteString input) {
328 StringBuilder builder = new StringBuilder(input.Length);
329 foreach (byte b in input) {
330 switch (b) {
331 // C# does not use \a or \v
332 case 0x07: builder.Append("\\a" ); break;
333 case (byte)'\b': builder.Append("\\b" ); break;
334 case (byte)'\f': builder.Append("\\f" ); break;
335 case (byte)'\n': builder.Append("\\n" ); break;
336 case (byte)'\r': builder.Append("\\r" ); break;
337 case (byte)'\t': builder.Append("\\t" ); break;
338 case 0x0b: builder.Append("\\v" ); break;
339 case (byte)'\\': builder.Append("\\\\"); break;
340 case (byte)'\'': builder.Append("\\\'"); break;
341 case (byte)'"' : builder.Append("\\\""); break;
342 default:
343 if (b >= 0x20 && b < 128) {
344 builder.Append((char) b);
345 } else {
346 builder.Append('\\');
347 builder.Append((char) ('0' + ((b >> 6) & 3)));
348 builder.Append((char) ('0' + ((b >> 3) & 7)));
349 builder.Append((char) ('0' + (b & 7)));
350 }
351 break;
352 }
353 }
354 return builder.ToString();
355 }
356
357 /// <summary>
358 /// Performs string unescaping from C style (octal, hex, form feeds, tab etc) into a byte string.
359 /// </summary>
360 internal static ByteString UnescapeBytes(string input) {
361 byte[] result = new byte[input.Length];
362 int pos = 0;
363 for (int i = 0; i < input.Length; i++) {
364 char c = input[i];
365 if (c > 127 || c < 32) {
366 throw new FormatException("Escaped string must only contain ASCII");
367 }
368 if (c != '\\') {
369 result[pos++] = (byte) c;
370 continue;
371 }
372 if (i + 1 >= input.Length) {
373 throw new FormatException("Invalid escape sequence: '\\' at end of string.");
374 }
375
376 i++;
377 c = input[i];
378 if (c >= '0' && c <= '7') {
379 // Octal escape.
380 int code = ParseDigit(c);
381 if (i + 1 < input.Length && IsOctal(input[i+1])) {
382 i++;
383 code = code * 8 + ParseDigit(input[i]);
384 }
385 if (i + 1 < input.Length && IsOctal(input[i+1])) {
386 i++;
387 code = code * 8 + ParseDigit(input[i]);
388 }
389 result[pos++] = (byte) code;
390 } else {
391 switch (c) {
392 case 'a': result[pos++] = 0x07; break;
393 case 'b': result[pos++] = (byte) '\b'; break;
394 case 'f': result[pos++] = (byte) '\f'; break;
395 case 'n': result[pos++] = (byte) '\n'; break;
396 case 'r': result[pos++] = (byte) '\r'; break;
397 case 't': result[pos++] = (byte) '\t'; break;
398 case 'v': result[pos++] = 0x0b; break;
399 case '\\': result[pos++] = (byte) '\\'; break;
400 case '\'': result[pos++] = (byte) '\''; break;
401 case '"': result[pos++] = (byte) '\"'; break;
402
403 case 'x':
404 // hex escape
405 int code;
406 if (i + 1 < input.Length && IsHex(input[i+1])) {
407 i++;
408 code = ParseDigit(input[i]);
409 } else {
410 throw new FormatException("Invalid escape sequence: '\\x' with no digits");
411 }
412 if (i + 1 < input.Length && IsHex(input[i+1])) {
413 ++i;
414 code = code * 16 + ParseDigit(input[i]);
415 }
416 result[pos++] = (byte)code;
417 break;
418
419 default:
420 throw new FormatException("Invalid escape sequence: '\\" + c + "'");
421 }
422 }
423 }
424
425 return ByteString.CopyFrom(result, 0, pos);
426 }
427
428 public static void Merge(string text, IBuilder builder) {
429 Merge(text, ExtensionRegistry.Empty, builder);
430 }
431
432 public static void Merge(TextReader reader, IBuilder builder) {
433 Merge(reader, ExtensionRegistry.Empty, builder);
434 }
435
436 public static void Merge(TextReader reader, ExtensionRegistry registry, IBuilder builder) {
437 Merge(reader.ReadToEnd(), registry, builder);
438 }
439
440 public static void Merge(string text, ExtensionRegistry registry, IBuilder builder) {
441 TextTokenizer tokenizer = new TextTokenizer(text);
442
443 while (!tokenizer.AtEnd) {
444 MergeField(tokenizer, registry, builder);
445 }
446 }
447
448 /// <summary>
449 /// Parses a single field from the specified tokenizer and merges it into
450 /// the builder.
451 /// </summary>
452 private static void MergeField(TextTokenizer tokenizer, ExtensionRegistry extensionRegistry,
453 IBuilder builder) {
454
455 FieldDescriptor field;
456 MessageDescriptor type = builder.DescriptorForType;
457 ExtensionInfo extension = null;
458
459 if (tokenizer.TryConsume("[")) {
460 // An extension.
461 StringBuilder name = new StringBuilder(tokenizer.ConsumeIdentifier());
462 while (tokenizer.TryConsume(".")) {
463 name.Append(".");
464 name.Append(tokenizer.ConsumeIdentifier());
465 }
466
467 extension = extensionRegistry[name.ToString()];
468
469 if (extension == null) {
470 throw tokenizer.CreateFormatExceptionPreviousToken("Extension \"" + name + "\" not found in the ExtensionRegistry.");
471 } else if (extension.Descriptor.ContainingType != type) {
472 throw tokenizer.CreateFormatExceptionPreviousToken("Extension \"" + name + "\" does not extend message type \"" +
473 type.FullName + "\".");
474 }
475
476 tokenizer.Consume("]");
477
478 field = extension.Descriptor;
479 } else {
480 String name = tokenizer.ConsumeIdentifier();
481 field = type.FindDescriptor<FieldDescriptor>(name);
482
483 // Group names are expected to be capitalized as they appear in the
484 // .proto file, which actually matches their type names, not their field
485 // names.
486 if (field == null) {
487 // Explicitly specify the invariant culture so that this code does not break when
488 // executing in Turkey.
489 String lowerName = name.ToLowerInvariant();
490 field = type.FindDescriptor<FieldDescriptor>(lowerName);
491 // If the case-insensitive match worked but the field is NOT a group,
492 // TODO(jonskeet): What? Java comment ends here!
493 if (field != null && field.FieldType != FieldType.Group) {
494 field = null;
495 }
496 }
497 // Again, special-case group names as described above.
498 if (field != null && field.FieldType == FieldType.Group && field.MessageType.Name != name) {
499 field = null;
500 }
501
502 if (field == null) {
503 throw tokenizer.CreateFormatExceptionPreviousToken(
504 "Message type \"" + type.FullName + "\" has no field named \"" + name + "\".");
505 }
506 }
507
508 object value = null;
509
510 if (field.MappedType == MappedType.Message) {
511 tokenizer.TryConsume(":"); // optional
512
513 String endToken;
514 if (tokenizer.TryConsume("<")) {
515 endToken = ">";
516 } else {
517 tokenizer.Consume("{");
518 endToken = "}";
519 }
520
521 IBuilder subBuilder;
522 if (extension == null) {
523 subBuilder = builder.CreateBuilderForField(field);
524 } else {
525 subBuilder = extension.DefaultInstance.WeakCreateBuilderForType();
526 }
527
528 while (!tokenizer.TryConsume(endToken)) {
529 if (tokenizer.AtEnd) {
530 throw tokenizer.CreateFormatException("Expected \"" + endToken + "\".");
531 }
532 MergeField(tokenizer, extensionRegistry, subBuilder);
533 }
534
535 value = subBuilder.WeakBuild();
536
537 } else {
538 tokenizer.Consume(":");
539
540 switch (field.FieldType) {
541 case FieldType.Int32:
542 case FieldType.SInt32:
543 case FieldType.SFixed32:
544 value = tokenizer.ConsumeInt32();
545 break;
546
547 case FieldType.Int64:
548 case FieldType.SInt64:
549 case FieldType.SFixed64:
550 value = tokenizer.ConsumeInt64();
551 break;
552
553 case FieldType.UInt32:
554 case FieldType.Fixed32:
555 value = tokenizer.ConsumeUInt32();
556 break;
557
558 case FieldType.UInt64:
559 case FieldType.Fixed64:
560 value = tokenizer.ConsumeUInt64();
561 break;
562
563 case FieldType.Float:
564 value = tokenizer.ConsumeFloat();
565 break;
566
567 case FieldType.Double:
568 value = tokenizer.ConsumeDouble();
569 break;
570
571 case FieldType.Bool:
572 value = tokenizer.ConsumeBoolean();
573 break;
574
575 case FieldType.String:
576 value = tokenizer.ConsumeString();
577 break;
578
579 case FieldType.Bytes:
580 value = tokenizer.ConsumeByteString();
581 break;
582
583 case FieldType.Enum: {
584 EnumDescriptor enumType = field.EnumType;
585
586 if (tokenizer.LookingAtInteger()) {
587 int number = tokenizer.ConsumeInt32();
588 value = enumType.FindValueByNumber(number);
589 if (value == null) {
590 throw tokenizer.CreateFormatExceptionPreviousToken(
591 "Enum type \"" + enumType.FullName +
592 "\" has no value with number " + number + ".");
593 }
594 } else {
595 String id = tokenizer.ConsumeIdentifier();
596 value = enumType.FindValueByName(id);
597 if (value == null) {
598 throw tokenizer.CreateFormatExceptionPreviousToken(
599 "Enum type \"" + enumType.FullName +
600 "\" has no value named \"" + id + "\".");
601 }
602 }
603
604 break;
605 }
606
607 case FieldType.Message:
608 case FieldType.Group:
609 throw new InvalidOperationException("Can't get here.");
610 }
611 }
612
613 if (field.IsRepeated) {
614 builder.WeakAddRepeatedField(field, value);
615 } else {
616 builder.SetField(field, value);
617 }
618 }
619 }
620}