blob: 0d169af26be8632af95bcdc44a5405d5331bba85 [file] [log] [blame]
Michael J. Spencer22120c42012-04-03 23:09:22 +00001//===--- YAMLParser.cpp - Simple YAML parser ------------------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements a YAML parser.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/Support/YAMLParser.h"
David Majnemer0d955d02016-08-11 22:21:41 +000015#include "llvm/ADT/STLExtras.h"
Benjamin Kramer16132e62015-03-23 18:07:13 +000016#include "llvm/ADT/SmallString.h"
Michael J. Spencer22120c42012-04-03 23:09:22 +000017#include "llvm/ADT/SmallVector.h"
18#include "llvm/ADT/StringExtras.h"
19#include "llvm/ADT/Twine.h"
Duncan P. N. Exon Smith23d83062016-09-11 22:40:40 +000020#include "llvm/ADT/AllocatorList.h"
Michael J. Spencer22120c42012-04-03 23:09:22 +000021#include "llvm/Support/ErrorHandling.h"
22#include "llvm/Support/MemoryBuffer.h"
Michael J. Spencer22120c42012-04-03 23:09:22 +000023#include "llvm/Support/SourceMgr.h"
Chandler Carruthed0881b2012-12-03 16:50:05 +000024#include "llvm/Support/raw_ostream.h"
Michael J. Spencer22120c42012-04-03 23:09:22 +000025
26using namespace llvm;
27using namespace yaml;
28
29enum UnicodeEncodingForm {
Dmitri Gribenkodbeafa72012-06-09 00:01:45 +000030 UEF_UTF32_LE, ///< UTF-32 Little Endian
31 UEF_UTF32_BE, ///< UTF-32 Big Endian
32 UEF_UTF16_LE, ///< UTF-16 Little Endian
33 UEF_UTF16_BE, ///< UTF-16 Big Endian
34 UEF_UTF8, ///< UTF-8 or ascii.
35 UEF_Unknown ///< Not a valid Unicode encoding.
Michael J. Spencer22120c42012-04-03 23:09:22 +000036};
37
38/// EncodingInfo - Holds the encoding type and length of the byte order mark if
39/// it exists. Length is in {0, 2, 3, 4}.
40typedef std::pair<UnicodeEncodingForm, unsigned> EncodingInfo;
41
42/// getUnicodeEncoding - Reads up to the first 4 bytes to determine the Unicode
43/// encoding form of \a Input.
44///
45/// @param Input A string of length 0 or more.
46/// @returns An EncodingInfo indicating the Unicode encoding form of the input
47/// and how long the byte order mark is if one exists.
48static EncodingInfo getUnicodeEncoding(StringRef Input) {
49 if (Input.size() == 0)
50 return std::make_pair(UEF_Unknown, 0);
51
52 switch (uint8_t(Input[0])) {
53 case 0x00:
54 if (Input.size() >= 4) {
55 if ( Input[1] == 0
56 && uint8_t(Input[2]) == 0xFE
57 && uint8_t(Input[3]) == 0xFF)
58 return std::make_pair(UEF_UTF32_BE, 4);
59 if (Input[1] == 0 && Input[2] == 0 && Input[3] != 0)
60 return std::make_pair(UEF_UTF32_BE, 0);
61 }
62
63 if (Input.size() >= 2 && Input[1] != 0)
64 return std::make_pair(UEF_UTF16_BE, 0);
65 return std::make_pair(UEF_Unknown, 0);
66 case 0xFF:
67 if ( Input.size() >= 4
68 && uint8_t(Input[1]) == 0xFE
69 && Input[2] == 0
70 && Input[3] == 0)
71 return std::make_pair(UEF_UTF32_LE, 4);
72
73 if (Input.size() >= 2 && uint8_t(Input[1]) == 0xFE)
74 return std::make_pair(UEF_UTF16_LE, 2);
75 return std::make_pair(UEF_Unknown, 0);
76 case 0xFE:
77 if (Input.size() >= 2 && uint8_t(Input[1]) == 0xFF)
78 return std::make_pair(UEF_UTF16_BE, 2);
79 return std::make_pair(UEF_Unknown, 0);
80 case 0xEF:
81 if ( Input.size() >= 3
82 && uint8_t(Input[1]) == 0xBB
83 && uint8_t(Input[2]) == 0xBF)
84 return std::make_pair(UEF_UTF8, 3);
85 return std::make_pair(UEF_Unknown, 0);
86 }
87
88 // It could still be utf-32 or utf-16.
89 if (Input.size() >= 4 && Input[1] == 0 && Input[2] == 0 && Input[3] == 0)
90 return std::make_pair(UEF_UTF32_LE, 0);
91
92 if (Input.size() >= 2 && Input[1] == 0)
93 return std::make_pair(UEF_UTF16_LE, 0);
94
95 return std::make_pair(UEF_UTF8, 0);
96}
97
98namespace llvm {
99namespace yaml {
Juergen Ributzkad12ccbd2013-11-19 00:57:56 +0000100/// Pin the vtables to this file.
101void Node::anchor() {}
102void NullNode::anchor() {}
103void ScalarNode::anchor() {}
Alex Lorenza22b250c2015-05-13 23:10:51 +0000104void BlockScalarNode::anchor() {}
Juergen Ributzkad12ccbd2013-11-19 00:57:56 +0000105void KeyValueNode::anchor() {}
106void MappingNode::anchor() {}
107void SequenceNode::anchor() {}
108void AliasNode::anchor() {}
109
Michael J. Spencer22120c42012-04-03 23:09:22 +0000110/// Token - A single YAML token.
Duncan P. N. Exon Smith23d83062016-09-11 22:40:40 +0000111struct Token {
Michael J. Spencer22120c42012-04-03 23:09:22 +0000112 enum TokenKind {
113 TK_Error, // Uninitialized token.
114 TK_StreamStart,
115 TK_StreamEnd,
116 TK_VersionDirective,
117 TK_TagDirective,
118 TK_DocumentStart,
119 TK_DocumentEnd,
120 TK_BlockEntry,
121 TK_BlockEnd,
122 TK_BlockSequenceStart,
123 TK_BlockMappingStart,
124 TK_FlowEntry,
125 TK_FlowSequenceStart,
126 TK_FlowSequenceEnd,
127 TK_FlowMappingStart,
128 TK_FlowMappingEnd,
129 TK_Key,
130 TK_Value,
131 TK_Scalar,
Alex Lorenza22b250c2015-05-13 23:10:51 +0000132 TK_BlockScalar,
Michael J. Spencer22120c42012-04-03 23:09:22 +0000133 TK_Alias,
134 TK_Anchor,
135 TK_Tag
136 } Kind;
137
138 /// A string of length 0 or more whose begin() points to the logical location
139 /// of the token in the input.
140 StringRef Range;
141
Alex Lorenza22b250c2015-05-13 23:10:51 +0000142 /// The value of a block scalar node.
143 std::string Value;
144
Michael J. Spencer22120c42012-04-03 23:09:22 +0000145 Token() : Kind(TK_Error) {}
146};
Alexander Kornienkof00654e2015-06-23 09:49:53 +0000147}
148}
Michael J. Spencer22120c42012-04-03 23:09:22 +0000149
Duncan P. N. Exon Smith23d83062016-09-11 22:40:40 +0000150typedef llvm::BumpPtrList<Token> TokenQueueT;
Michael J. Spencer22120c42012-04-03 23:09:22 +0000151
152namespace {
153/// @brief This struct is used to track simple keys.
154///
155/// Simple keys are handled by creating an entry in SimpleKeys for each Token
156/// which could legally be the start of a simple key. When peekNext is called,
157/// if the Token To be returned is referenced by a SimpleKey, we continue
158/// tokenizing until that potential simple key has either been found to not be
159/// a simple key (we moved on to the next line or went further than 1024 chars).
160/// Or when we run into a Value, and then insert a Key token (and possibly
161/// others) before the SimpleKey's Tok.
162struct SimpleKey {
163 TokenQueueT::iterator Tok;
164 unsigned Column;
165 unsigned Line;
166 unsigned FlowLevel;
167 bool IsRequired;
168
169 bool operator ==(const SimpleKey &Other) {
170 return Tok == Other.Tok;
171 }
172};
Alexander Kornienkof00654e2015-06-23 09:49:53 +0000173}
Michael J. Spencer22120c42012-04-03 23:09:22 +0000174
175/// @brief The Unicode scalar value of a UTF-8 minimal well-formed code unit
176/// subsequence and the subsequence's length in code units (uint8_t).
177/// A length of 0 represents an error.
178typedef std::pair<uint32_t, unsigned> UTF8Decoded;
179
180static UTF8Decoded decodeUTF8(StringRef Range) {
181 StringRef::iterator Position= Range.begin();
182 StringRef::iterator End = Range.end();
183 // 1 byte: [0x00, 0x7f]
184 // Bit pattern: 0xxxxxxx
185 if ((*Position & 0x80) == 0) {
186 return std::make_pair(*Position, 1);
187 }
188 // 2 bytes: [0x80, 0x7ff]
189 // Bit pattern: 110xxxxx 10xxxxxx
190 if (Position + 1 != End &&
191 ((*Position & 0xE0) == 0xC0) &&
192 ((*(Position + 1) & 0xC0) == 0x80)) {
193 uint32_t codepoint = ((*Position & 0x1F) << 6) |
194 (*(Position + 1) & 0x3F);
195 if (codepoint >= 0x80)
196 return std::make_pair(codepoint, 2);
197 }
198 // 3 bytes: [0x8000, 0xffff]
199 // Bit pattern: 1110xxxx 10xxxxxx 10xxxxxx
200 if (Position + 2 != End &&
201 ((*Position & 0xF0) == 0xE0) &&
202 ((*(Position + 1) & 0xC0) == 0x80) &&
203 ((*(Position + 2) & 0xC0) == 0x80)) {
204 uint32_t codepoint = ((*Position & 0x0F) << 12) |
205 ((*(Position + 1) & 0x3F) << 6) |
206 (*(Position + 2) & 0x3F);
207 // Codepoints between 0xD800 and 0xDFFF are invalid, as
208 // they are high / low surrogate halves used by UTF-16.
209 if (codepoint >= 0x800 &&
210 (codepoint < 0xD800 || codepoint > 0xDFFF))
211 return std::make_pair(codepoint, 3);
212 }
213 // 4 bytes: [0x10000, 0x10FFFF]
214 // Bit pattern: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
215 if (Position + 3 != End &&
216 ((*Position & 0xF8) == 0xF0) &&
217 ((*(Position + 1) & 0xC0) == 0x80) &&
218 ((*(Position + 2) & 0xC0) == 0x80) &&
219 ((*(Position + 3) & 0xC0) == 0x80)) {
220 uint32_t codepoint = ((*Position & 0x07) << 18) |
221 ((*(Position + 1) & 0x3F) << 12) |
222 ((*(Position + 2) & 0x3F) << 6) |
223 (*(Position + 3) & 0x3F);
224 if (codepoint >= 0x10000 && codepoint <= 0x10FFFF)
225 return std::make_pair(codepoint, 4);
226 }
227 return std::make_pair(0, 0);
228}
229
230namespace llvm {
231namespace yaml {
232/// @brief Scans YAML tokens from a MemoryBuffer.
233class Scanner {
234public:
Alex Lorenze4bcfbf2015-05-07 18:08:46 +0000235 Scanner(StringRef Input, SourceMgr &SM, bool ShowColors = true);
236 Scanner(MemoryBufferRef Buffer, SourceMgr &SM_, bool ShowColors = true);
Michael J. Spencer22120c42012-04-03 23:09:22 +0000237
238 /// @brief Parse the next token and return it without popping it.
239 Token &peekNext();
240
241 /// @brief Parse the next token and pop it from the queue.
242 Token getNext();
243
244 void printError(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Message,
Dmitri Gribenko3238fb72013-05-05 00:40:33 +0000245 ArrayRef<SMRange> Ranges = None) {
Alex Lorenze4bcfbf2015-05-07 18:08:46 +0000246 SM.PrintMessage(Loc, Kind, Message, Ranges, /* FixIts= */ None, ShowColors);
Michael J. Spencer22120c42012-04-03 23:09:22 +0000247 }
248
249 void setError(const Twine &Message, StringRef::iterator Position) {
250 if (Current >= End)
251 Current = End - 1;
252
253 // Don't print out more errors after the first one we encounter. The rest
254 // are just the result of the first, and have no meaning.
255 if (!Failed)
256 printError(SMLoc::getFromPointer(Current), SourceMgr::DK_Error, Message);
257 Failed = true;
258 }
259
260 void setError(const Twine &Message) {
261 setError(Message, Current);
262 }
263
264 /// @brief Returns true if an error occurred while parsing.
265 bool failed() {
266 return Failed;
267 }
268
269private:
Rafael Espindola68669e32014-08-27 19:03:22 +0000270 void init(MemoryBufferRef Buffer);
271
Michael J. Spencer22120c42012-04-03 23:09:22 +0000272 StringRef currentInput() {
273 return StringRef(Current, End - Current);
274 }
275
276 /// @brief Decode a UTF-8 minimal well-formed code unit subsequence starting
277 /// at \a Position.
278 ///
279 /// If the UTF-8 code units starting at Position do not form a well-formed
280 /// code unit subsequence, then the Unicode scalar value is 0, and the length
281 /// is 0.
282 UTF8Decoded decodeUTF8(StringRef::iterator Position) {
283 return ::decodeUTF8(StringRef(Position, End - Position));
284 }
285
286 // The following functions are based on the gramar rules in the YAML spec. The
287 // style of the function names it meant to closely match how they are written
288 // in the spec. The number within the [] is the number of the grammar rule in
289 // the spec.
290 //
291 // See 4.2 [Production Naming Conventions] for the meaning of the prefixes.
292 //
293 // c-
294 // A production starting and ending with a special character.
295 // b-
296 // A production matching a single line break.
297 // nb-
298 // A production starting and ending with a non-break character.
299 // s-
300 // A production starting and ending with a white space character.
301 // ns-
302 // A production starting and ending with a non-space character.
303 // l-
304 // A production matching complete line(s).
305
306 /// @brief Skip a single nb-char[27] starting at Position.
307 ///
308 /// A nb-char is 0x9 | [0x20-0x7E] | 0x85 | [0xA0-0xD7FF] | [0xE000-0xFEFE]
309 /// | [0xFF00-0xFFFD] | [0x10000-0x10FFFF]
310 ///
311 /// @returns The code unit after the nb-char, or Position if it's not an
312 /// nb-char.
313 StringRef::iterator skip_nb_char(StringRef::iterator Position);
314
315 /// @brief Skip a single b-break[28] starting at Position.
316 ///
317 /// A b-break is 0xD 0xA | 0xD | 0xA
318 ///
319 /// @returns The code unit after the b-break, or Position if it's not a
320 /// b-break.
321 StringRef::iterator skip_b_break(StringRef::iterator Position);
322
Alex Lorenza22b250c2015-05-13 23:10:51 +0000323 /// Skip a single s-space[31] starting at Position.
324 ///
325 /// An s-space is 0x20
326 ///
327 /// @returns The code unit after the s-space, or Position if it's not a
328 /// s-space.
329 StringRef::iterator skip_s_space(StringRef::iterator Position);
330
Michael J. Spencer22120c42012-04-03 23:09:22 +0000331 /// @brief Skip a single s-white[33] starting at Position.
332 ///
333 /// A s-white is 0x20 | 0x9
334 ///
335 /// @returns The code unit after the s-white, or Position if it's not a
336 /// s-white.
337 StringRef::iterator skip_s_white(StringRef::iterator Position);
338
339 /// @brief Skip a single ns-char[34] starting at Position.
340 ///
341 /// A ns-char is nb-char - s-white
342 ///
343 /// @returns The code unit after the ns-char, or Position if it's not a
344 /// ns-char.
345 StringRef::iterator skip_ns_char(StringRef::iterator Position);
346
347 typedef StringRef::iterator (Scanner::*SkipWhileFunc)(StringRef::iterator);
348 /// @brief Skip minimal well-formed code unit subsequences until Func
349 /// returns its input.
350 ///
351 /// @returns The code unit after the last minimal well-formed code unit
352 /// subsequence that Func accepted.
353 StringRef::iterator skip_while( SkipWhileFunc Func
354 , StringRef::iterator Position);
355
Alex Lorenza22b250c2015-05-13 23:10:51 +0000356 /// Skip minimal well-formed code unit subsequences until Func returns its
357 /// input.
358 void advanceWhile(SkipWhileFunc Func);
359
Michael J. Spencer22120c42012-04-03 23:09:22 +0000360 /// @brief Scan ns-uri-char[39]s starting at Cur.
361 ///
362 /// This updates Cur and Column while scanning.
363 ///
364 /// @returns A StringRef starting at Cur which covers the longest contiguous
365 /// sequence of ns-uri-char.
366 StringRef scan_ns_uri_char();
367
Michael J. Spencer22120c42012-04-03 23:09:22 +0000368 /// @brief Consume a minimal well-formed code unit subsequence starting at
369 /// \a Cur. Return false if it is not the same Unicode scalar value as
370 /// \a Expected. This updates \a Column.
371 bool consume(uint32_t Expected);
372
373 /// @brief Skip \a Distance UTF-8 code units. Updates \a Cur and \a Column.
374 void skip(uint32_t Distance);
375
376 /// @brief Return true if the minimal well-formed code unit subsequence at
377 /// Pos is whitespace or a new line
378 bool isBlankOrBreak(StringRef::iterator Position);
379
Alex Lorenza22b250c2015-05-13 23:10:51 +0000380 /// Consume a single b-break[28] if it's present at the current position.
381 ///
382 /// Return false if the code unit at the current position isn't a line break.
383 bool consumeLineBreakIfPresent();
384
Michael J. Spencer22120c42012-04-03 23:09:22 +0000385 /// @brief If IsSimpleKeyAllowed, create and push_back a new SimpleKey.
386 void saveSimpleKeyCandidate( TokenQueueT::iterator Tok
387 , unsigned AtColumn
388 , bool IsRequired);
389
390 /// @brief Remove simple keys that can no longer be valid simple keys.
391 ///
392 /// Invalid simple keys are not on the current line or are further than 1024
393 /// columns back.
394 void removeStaleSimpleKeyCandidates();
395
396 /// @brief Remove all simple keys on FlowLevel \a Level.
397 void removeSimpleKeyCandidatesOnFlowLevel(unsigned Level);
398
399 /// @brief Unroll indentation in \a Indents back to \a Col. Creates BlockEnd
400 /// tokens if needed.
401 bool unrollIndent(int ToColumn);
402
403 /// @brief Increase indent to \a Col. Creates \a Kind token at \a InsertPoint
404 /// if needed.
405 bool rollIndent( int ToColumn
406 , Token::TokenKind Kind
407 , TokenQueueT::iterator InsertPoint);
408
Alex Lorenzfe6f1862015-05-06 23:00:45 +0000409 /// @brief Skip a single-line comment when the comment starts at the current
410 /// position of the scanner.
411 void skipComment();
412
Michael J. Spencer22120c42012-04-03 23:09:22 +0000413 /// @brief Skip whitespace and comments until the start of the next token.
414 void scanToNextToken();
415
416 /// @brief Must be the first token generated.
417 bool scanStreamStart();
418
419 /// @brief Generate tokens needed to close out the stream.
420 bool scanStreamEnd();
421
422 /// @brief Scan a %BLAH directive.
423 bool scanDirective();
424
425 /// @brief Scan a ... or ---.
426 bool scanDocumentIndicator(bool IsStart);
427
428 /// @brief Scan a [ or { and generate the proper flow collection start token.
429 bool scanFlowCollectionStart(bool IsSequence);
430
431 /// @brief Scan a ] or } and generate the proper flow collection end token.
432 bool scanFlowCollectionEnd(bool IsSequence);
433
434 /// @brief Scan the , that separates entries in a flow collection.
435 bool scanFlowEntry();
436
437 /// @brief Scan the - that starts block sequence entries.
438 bool scanBlockEntry();
439
440 /// @brief Scan an explicit ? indicating a key.
441 bool scanKey();
442
443 /// @brief Scan an explicit : indicating a value.
444 bool scanValue();
445
446 /// @brief Scan a quoted scalar.
447 bool scanFlowScalar(bool IsDoubleQuoted);
448
449 /// @brief Scan an unquoted scalar.
450 bool scanPlainScalar();
451
452 /// @brief Scan an Alias or Anchor starting with * or &.
453 bool scanAliasOrAnchor(bool IsAlias);
454
455 /// @brief Scan a block scalar starting with | or >.
456 bool scanBlockScalar(bool IsLiteral);
457
Alex Lorenza22b250c2015-05-13 23:10:51 +0000458 /// Scan a chomping indicator in a block scalar header.
459 char scanBlockChompingIndicator();
460
461 /// Scan an indentation indicator in a block scalar header.
462 unsigned scanBlockIndentationIndicator();
463
464 /// Scan a block scalar header.
465 ///
466 /// Return false if an error occurred.
467 bool scanBlockScalarHeader(char &ChompingIndicator, unsigned &IndentIndicator,
468 bool &IsDone);
469
470 /// Look for the indentation level of a block scalar.
471 ///
472 /// Return false if an error occurred.
473 bool findBlockScalarIndent(unsigned &BlockIndent, unsigned BlockExitIndent,
474 unsigned &LineBreaks, bool &IsDone);
475
476 /// Scan the indentation of a text line in a block scalar.
477 ///
478 /// Return false if an error occurred.
479 bool scanBlockScalarIndent(unsigned BlockIndent, unsigned BlockExitIndent,
480 bool &IsDone);
481
Michael J. Spencer22120c42012-04-03 23:09:22 +0000482 /// @brief Scan a tag of the form !stuff.
483 bool scanTag();
484
485 /// @brief Dispatch to the next scanning function based on \a *Cur.
486 bool fetchMoreTokens();
487
488 /// @brief The SourceMgr used for diagnostics and buffer management.
489 SourceMgr &SM;
490
491 /// @brief The original input.
Rafael Espindola68669e32014-08-27 19:03:22 +0000492 MemoryBufferRef InputBuffer;
Michael J. Spencer22120c42012-04-03 23:09:22 +0000493
494 /// @brief The current position of the scanner.
495 StringRef::iterator Current;
496
497 /// @brief The end of the input (one past the last character).
498 StringRef::iterator End;
499
500 /// @brief Current YAML indentation level in spaces.
501 int Indent;
502
503 /// @brief Current column number in Unicode code points.
504 unsigned Column;
505
506 /// @brief Current line number.
507 unsigned Line;
508
509 /// @brief How deep we are in flow style containers. 0 Means at block level.
510 unsigned FlowLevel;
511
512 /// @brief Are we at the start of the stream?
513 bool IsStartOfStream;
514
515 /// @brief Can the next token be the start of a simple key?
516 bool IsSimpleKeyAllowed;
517
Michael J. Spencer22120c42012-04-03 23:09:22 +0000518 /// @brief True if an error has occurred.
519 bool Failed;
520
Alex Lorenze4bcfbf2015-05-07 18:08:46 +0000521 /// @brief Should colors be used when printing out the diagnostic messages?
522 bool ShowColors;
523
Michael J. Spencer22120c42012-04-03 23:09:22 +0000524 /// @brief Queue of tokens. This is required to queue up tokens while looking
525 /// for the end of a simple key. And for cases where a single character
526 /// can produce multiple tokens (e.g. BlockEnd).
527 TokenQueueT TokenQueue;
528
529 /// @brief Indentation levels.
530 SmallVector<int, 4> Indents;
531
532 /// @brief Potential simple keys.
533 SmallVector<SimpleKey, 4> SimpleKeys;
534};
535
536} // end namespace yaml
537} // end namespace llvm
538
539/// encodeUTF8 - Encode \a UnicodeScalarValue in UTF-8 and append it to result.
540static void encodeUTF8( uint32_t UnicodeScalarValue
541 , SmallVectorImpl<char> &Result) {
542 if (UnicodeScalarValue <= 0x7F) {
543 Result.push_back(UnicodeScalarValue & 0x7F);
544 } else if (UnicodeScalarValue <= 0x7FF) {
545 uint8_t FirstByte = 0xC0 | ((UnicodeScalarValue & 0x7C0) >> 6);
546 uint8_t SecondByte = 0x80 | (UnicodeScalarValue & 0x3F);
547 Result.push_back(FirstByte);
548 Result.push_back(SecondByte);
549 } else if (UnicodeScalarValue <= 0xFFFF) {
550 uint8_t FirstByte = 0xE0 | ((UnicodeScalarValue & 0xF000) >> 12);
551 uint8_t SecondByte = 0x80 | ((UnicodeScalarValue & 0xFC0) >> 6);
552 uint8_t ThirdByte = 0x80 | (UnicodeScalarValue & 0x3F);
553 Result.push_back(FirstByte);
554 Result.push_back(SecondByte);
555 Result.push_back(ThirdByte);
556 } else if (UnicodeScalarValue <= 0x10FFFF) {
557 uint8_t FirstByte = 0xF0 | ((UnicodeScalarValue & 0x1F0000) >> 18);
558 uint8_t SecondByte = 0x80 | ((UnicodeScalarValue & 0x3F000) >> 12);
559 uint8_t ThirdByte = 0x80 | ((UnicodeScalarValue & 0xFC0) >> 6);
560 uint8_t FourthByte = 0x80 | (UnicodeScalarValue & 0x3F);
561 Result.push_back(FirstByte);
562 Result.push_back(SecondByte);
563 Result.push_back(ThirdByte);
564 Result.push_back(FourthByte);
565 }
566}
567
568bool yaml::dumpTokens(StringRef Input, raw_ostream &OS) {
569 SourceMgr SM;
570 Scanner scanner(Input, SM);
571 while (true) {
572 Token T = scanner.getNext();
573 switch (T.Kind) {
574 case Token::TK_StreamStart:
575 OS << "Stream-Start: ";
576 break;
577 case Token::TK_StreamEnd:
578 OS << "Stream-End: ";
579 break;
580 case Token::TK_VersionDirective:
581 OS << "Version-Directive: ";
582 break;
583 case Token::TK_TagDirective:
584 OS << "Tag-Directive: ";
585 break;
586 case Token::TK_DocumentStart:
587 OS << "Document-Start: ";
588 break;
589 case Token::TK_DocumentEnd:
590 OS << "Document-End: ";
591 break;
592 case Token::TK_BlockEntry:
593 OS << "Block-Entry: ";
594 break;
595 case Token::TK_BlockEnd:
596 OS << "Block-End: ";
597 break;
598 case Token::TK_BlockSequenceStart:
599 OS << "Block-Sequence-Start: ";
600 break;
601 case Token::TK_BlockMappingStart:
602 OS << "Block-Mapping-Start: ";
603 break;
604 case Token::TK_FlowEntry:
605 OS << "Flow-Entry: ";
606 break;
607 case Token::TK_FlowSequenceStart:
608 OS << "Flow-Sequence-Start: ";
609 break;
610 case Token::TK_FlowSequenceEnd:
611 OS << "Flow-Sequence-End: ";
612 break;
613 case Token::TK_FlowMappingStart:
614 OS << "Flow-Mapping-Start: ";
615 break;
616 case Token::TK_FlowMappingEnd:
617 OS << "Flow-Mapping-End: ";
618 break;
619 case Token::TK_Key:
620 OS << "Key: ";
621 break;
622 case Token::TK_Value:
623 OS << "Value: ";
624 break;
625 case Token::TK_Scalar:
626 OS << "Scalar: ";
627 break;
Alex Lorenza22b250c2015-05-13 23:10:51 +0000628 case Token::TK_BlockScalar:
629 OS << "Block Scalar: ";
630 break;
Michael J. Spencer22120c42012-04-03 23:09:22 +0000631 case Token::TK_Alias:
632 OS << "Alias: ";
633 break;
634 case Token::TK_Anchor:
635 OS << "Anchor: ";
636 break;
637 case Token::TK_Tag:
638 OS << "Tag: ";
639 break;
640 case Token::TK_Error:
641 break;
642 }
643 OS << T.Range << "\n";
644 if (T.Kind == Token::TK_StreamEnd)
645 break;
646 else if (T.Kind == Token::TK_Error)
647 return false;
648 }
649 return true;
650}
651
652bool yaml::scanTokens(StringRef Input) {
653 llvm::SourceMgr SM;
654 llvm::yaml::Scanner scanner(Input, SM);
655 for (;;) {
656 llvm::yaml::Token T = scanner.getNext();
657 if (T.Kind == Token::TK_StreamEnd)
658 break;
659 else if (T.Kind == Token::TK_Error)
660 return false;
661 }
662 return true;
663}
664
665std::string yaml::escape(StringRef Input) {
666 std::string EscapedInput;
667 for (StringRef::iterator i = Input.begin(), e = Input.end(); i != e; ++i) {
668 if (*i == '\\')
669 EscapedInput += "\\\\";
670 else if (*i == '"')
671 EscapedInput += "\\\"";
672 else if (*i == 0)
673 EscapedInput += "\\0";
674 else if (*i == 0x07)
675 EscapedInput += "\\a";
676 else if (*i == 0x08)
677 EscapedInput += "\\b";
678 else if (*i == 0x09)
679 EscapedInput += "\\t";
680 else if (*i == 0x0A)
681 EscapedInput += "\\n";
682 else if (*i == 0x0B)
683 EscapedInput += "\\v";
684 else if (*i == 0x0C)
685 EscapedInput += "\\f";
686 else if (*i == 0x0D)
687 EscapedInput += "\\r";
688 else if (*i == 0x1B)
689 EscapedInput += "\\e";
Benjamin Kramer0aa0d3d2012-04-21 10:51:42 +0000690 else if ((unsigned char)*i < 0x20) { // Control characters not handled above.
Michael J. Spencer22120c42012-04-03 23:09:22 +0000691 std::string HexStr = utohexstr(*i);
692 EscapedInput += "\\x" + std::string(2 - HexStr.size(), '0') + HexStr;
693 } else if (*i & 0x80) { // UTF-8 multiple code unit subsequence.
694 UTF8Decoded UnicodeScalarValue
695 = decodeUTF8(StringRef(i, Input.end() - i));
696 if (UnicodeScalarValue.second == 0) {
697 // Found invalid char.
698 SmallString<4> Val;
699 encodeUTF8(0xFFFD, Val);
700 EscapedInput.insert(EscapedInput.end(), Val.begin(), Val.end());
701 // FIXME: Error reporting.
702 return EscapedInput;
703 }
704 if (UnicodeScalarValue.first == 0x85)
705 EscapedInput += "\\N";
706 else if (UnicodeScalarValue.first == 0xA0)
707 EscapedInput += "\\_";
708 else if (UnicodeScalarValue.first == 0x2028)
709 EscapedInput += "\\L";
710 else if (UnicodeScalarValue.first == 0x2029)
711 EscapedInput += "\\P";
712 else {
713 std::string HexStr = utohexstr(UnicodeScalarValue.first);
714 if (HexStr.size() <= 2)
715 EscapedInput += "\\x" + std::string(2 - HexStr.size(), '0') + HexStr;
716 else if (HexStr.size() <= 4)
717 EscapedInput += "\\u" + std::string(4 - HexStr.size(), '0') + HexStr;
718 else if (HexStr.size() <= 8)
719 EscapedInput += "\\U" + std::string(8 - HexStr.size(), '0') + HexStr;
720 }
721 i += UnicodeScalarValue.second - 1;
722 } else
723 EscapedInput.push_back(*i);
724 }
725 return EscapedInput;
726}
727
Alex Lorenze4bcfbf2015-05-07 18:08:46 +0000728Scanner::Scanner(StringRef Input, SourceMgr &sm, bool ShowColors)
729 : SM(sm), ShowColors(ShowColors) {
Rafael Espindola68669e32014-08-27 19:03:22 +0000730 init(MemoryBufferRef(Input, "YAML"));
Michael J. Spencer22120c42012-04-03 23:09:22 +0000731}
732
Alex Lorenze4bcfbf2015-05-07 18:08:46 +0000733Scanner::Scanner(MemoryBufferRef Buffer, SourceMgr &SM_, bool ShowColors)
734 : SM(SM_), ShowColors(ShowColors) {
Rafael Espindola68669e32014-08-27 19:03:22 +0000735 init(Buffer);
736}
737
738void Scanner::init(MemoryBufferRef Buffer) {
739 InputBuffer = Buffer;
740 Current = InputBuffer.getBufferStart();
741 End = InputBuffer.getBufferEnd();
742 Indent = -1;
743 Column = 0;
744 Line = 0;
745 FlowLevel = 0;
746 IsStartOfStream = true;
747 IsSimpleKeyAllowed = true;
748 Failed = false;
749 std::unique_ptr<MemoryBuffer> InputBufferOwner =
750 MemoryBuffer::getMemBuffer(Buffer);
751 SM.AddNewSourceBuffer(std::move(InputBufferOwner), SMLoc());
Sean Silvaaba82702012-11-19 23:21:47 +0000752}
753
Michael J. Spencer22120c42012-04-03 23:09:22 +0000754Token &Scanner::peekNext() {
755 // If the current token is a possible simple key, keep parsing until we
756 // can confirm.
757 bool NeedMore = false;
758 while (true) {
759 if (TokenQueue.empty() || NeedMore) {
760 if (!fetchMoreTokens()) {
761 TokenQueue.clear();
762 TokenQueue.push_back(Token());
763 return TokenQueue.front();
764 }
765 }
766 assert(!TokenQueue.empty() &&
767 "fetchMoreTokens lied about getting tokens!");
768
769 removeStaleSimpleKeyCandidates();
770 SimpleKey SK;
Duncan P. N. Exon Smith6eeaff12015-10-08 22:47:55 +0000771 SK.Tok = TokenQueue.begin();
David Majnemer0d955d02016-08-11 22:21:41 +0000772 if (!is_contained(SimpleKeys, SK))
Michael J. Spencer22120c42012-04-03 23:09:22 +0000773 break;
774 else
775 NeedMore = true;
776 }
777 return TokenQueue.front();
778}
779
780Token Scanner::getNext() {
781 Token Ret = peekNext();
782 // TokenQueue can be empty if there was an error getting the next token.
783 if (!TokenQueue.empty())
784 TokenQueue.pop_front();
785
786 // There cannot be any referenced Token's if the TokenQueue is empty. So do a
787 // quick deallocation of them all.
Duncan P. N. Exon Smith23d83062016-09-11 22:40:40 +0000788 if (TokenQueue.empty())
789 TokenQueue.resetAlloc();
Michael J. Spencer22120c42012-04-03 23:09:22 +0000790
791 return Ret;
792}
793
794StringRef::iterator Scanner::skip_nb_char(StringRef::iterator Position) {
Michael J. Spencer60331132012-04-27 21:12:20 +0000795 if (Position == End)
796 return Position;
Michael J. Spencer22120c42012-04-03 23:09:22 +0000797 // Check 7 bit c-printable - b-char.
798 if ( *Position == 0x09
799 || (*Position >= 0x20 && *Position <= 0x7E))
800 return Position + 1;
801
802 // Check for valid UTF-8.
803 if (uint8_t(*Position) & 0x80) {
804 UTF8Decoded u8d = decodeUTF8(Position);
805 if ( u8d.second != 0
806 && u8d.first != 0xFEFF
807 && ( u8d.first == 0x85
808 || ( u8d.first >= 0xA0
809 && u8d.first <= 0xD7FF)
810 || ( u8d.first >= 0xE000
811 && u8d.first <= 0xFFFD)
812 || ( u8d.first >= 0x10000
813 && u8d.first <= 0x10FFFF)))
814 return Position + u8d.second;
815 }
816 return Position;
817}
818
819StringRef::iterator Scanner::skip_b_break(StringRef::iterator Position) {
Michael J. Spencer60331132012-04-27 21:12:20 +0000820 if (Position == End)
821 return Position;
Michael J. Spencer22120c42012-04-03 23:09:22 +0000822 if (*Position == 0x0D) {
823 if (Position + 1 != End && *(Position + 1) == 0x0A)
824 return Position + 2;
825 return Position + 1;
826 }
827
828 if (*Position == 0x0A)
829 return Position + 1;
830 return Position;
831}
832
Alex Lorenza22b250c2015-05-13 23:10:51 +0000833StringRef::iterator Scanner::skip_s_space(StringRef::iterator Position) {
834 if (Position == End)
835 return Position;
836 if (*Position == ' ')
837 return Position + 1;
838 return Position;
839}
Michael J. Spencer22120c42012-04-03 23:09:22 +0000840
841StringRef::iterator Scanner::skip_s_white(StringRef::iterator Position) {
842 if (Position == End)
843 return Position;
844 if (*Position == ' ' || *Position == '\t')
845 return Position + 1;
846 return Position;
847}
848
849StringRef::iterator Scanner::skip_ns_char(StringRef::iterator Position) {
850 if (Position == End)
851 return Position;
852 if (*Position == ' ' || *Position == '\t')
853 return Position;
854 return skip_nb_char(Position);
855}
856
857StringRef::iterator Scanner::skip_while( SkipWhileFunc Func
858 , StringRef::iterator Position) {
859 while (true) {
860 StringRef::iterator i = (this->*Func)(Position);
861 if (i == Position)
862 break;
863 Position = i;
864 }
865 return Position;
866}
867
Alex Lorenza22b250c2015-05-13 23:10:51 +0000868void Scanner::advanceWhile(SkipWhileFunc Func) {
869 auto Final = skip_while(Func, Current);
870 Column += Final - Current;
871 Current = Final;
872}
873
Michael J. Spencer22120c42012-04-03 23:09:22 +0000874static bool is_ns_hex_digit(const char C) {
875 return (C >= '0' && C <= '9')
876 || (C >= 'a' && C <= 'z')
877 || (C >= 'A' && C <= 'Z');
878}
879
880static bool is_ns_word_char(const char C) {
881 return C == '-'
882 || (C >= 'a' && C <= 'z')
883 || (C >= 'A' && C <= 'Z');
884}
885
886StringRef Scanner::scan_ns_uri_char() {
887 StringRef::iterator Start = Current;
888 while (true) {
889 if (Current == End)
890 break;
891 if (( *Current == '%'
892 && Current + 2 < End
893 && is_ns_hex_digit(*(Current + 1))
894 && is_ns_hex_digit(*(Current + 2)))
895 || is_ns_word_char(*Current)
896 || StringRef(Current, 1).find_first_of("#;/?:@&=+$,_.!~*'()[]")
897 != StringRef::npos) {
898 ++Current;
899 ++Column;
900 } else
901 break;
902 }
903 return StringRef(Start, Current - Start);
904}
905
Michael J. Spencer22120c42012-04-03 23:09:22 +0000906bool Scanner::consume(uint32_t Expected) {
907 if (Expected >= 0x80)
908 report_fatal_error("Not dealing with this yet");
909 if (Current == End)
910 return false;
911 if (uint8_t(*Current) >= 0x80)
912 report_fatal_error("Not dealing with this yet");
913 if (uint8_t(*Current) == Expected) {
914 ++Current;
915 ++Column;
916 return true;
917 }
918 return false;
919}
920
921void Scanner::skip(uint32_t Distance) {
922 Current += Distance;
923 Column += Distance;
Benjamin Kramer8fb58f62012-09-26 15:52:15 +0000924 assert(Current <= End && "Skipped past the end");
Michael J. Spencer22120c42012-04-03 23:09:22 +0000925}
926
927bool Scanner::isBlankOrBreak(StringRef::iterator Position) {
928 if (Position == End)
929 return false;
Alexander Kornienko66da20a2015-12-28 15:46:15 +0000930 return *Position == ' ' || *Position == '\t' || *Position == '\r' ||
931 *Position == '\n';
Michael J. Spencer22120c42012-04-03 23:09:22 +0000932}
933
Alex Lorenza22b250c2015-05-13 23:10:51 +0000934bool Scanner::consumeLineBreakIfPresent() {
935 auto Next = skip_b_break(Current);
936 if (Next == Current)
937 return false;
938 Column = 0;
939 ++Line;
940 Current = Next;
941 return true;
942}
943
Michael J. Spencer22120c42012-04-03 23:09:22 +0000944void Scanner::saveSimpleKeyCandidate( TokenQueueT::iterator Tok
945 , unsigned AtColumn
946 , bool IsRequired) {
947 if (IsSimpleKeyAllowed) {
948 SimpleKey SK;
949 SK.Tok = Tok;
950 SK.Line = Line;
951 SK.Column = AtColumn;
952 SK.IsRequired = IsRequired;
953 SK.FlowLevel = FlowLevel;
954 SimpleKeys.push_back(SK);
955 }
956}
957
958void Scanner::removeStaleSimpleKeyCandidates() {
959 for (SmallVectorImpl<SimpleKey>::iterator i = SimpleKeys.begin();
960 i != SimpleKeys.end();) {
961 if (i->Line != Line || i->Column + 1024 < Column) {
962 if (i->IsRequired)
963 setError( "Could not find expected : for simple key"
964 , i->Tok->Range.begin());
965 i = SimpleKeys.erase(i);
966 } else
967 ++i;
968 }
969}
970
971void Scanner::removeSimpleKeyCandidatesOnFlowLevel(unsigned Level) {
972 if (!SimpleKeys.empty() && (SimpleKeys.end() - 1)->FlowLevel == Level)
973 SimpleKeys.pop_back();
974}
975
976bool Scanner::unrollIndent(int ToColumn) {
977 Token T;
978 // Indentation is ignored in flow.
979 if (FlowLevel != 0)
980 return true;
981
982 while (Indent > ToColumn) {
983 T.Kind = Token::TK_BlockEnd;
984 T.Range = StringRef(Current, 1);
985 TokenQueue.push_back(T);
986 Indent = Indents.pop_back_val();
987 }
988
989 return true;
990}
991
992bool Scanner::rollIndent( int ToColumn
993 , Token::TokenKind Kind
994 , TokenQueueT::iterator InsertPoint) {
995 if (FlowLevel)
996 return true;
997 if (Indent < ToColumn) {
998 Indents.push_back(Indent);
999 Indent = ToColumn;
1000
1001 Token T;
1002 T.Kind = Kind;
1003 T.Range = StringRef(Current, 0);
1004 TokenQueue.insert(InsertPoint, T);
1005 }
1006 return true;
1007}
1008
Alex Lorenzfe6f1862015-05-06 23:00:45 +00001009void Scanner::skipComment() {
1010 if (*Current != '#')
1011 return;
1012 while (true) {
1013 // This may skip more than one byte, thus Column is only incremented
1014 // for code points.
1015 StringRef::iterator I = skip_nb_char(Current);
1016 if (I == Current)
1017 break;
1018 Current = I;
1019 ++Column;
1020 }
1021}
1022
Michael J. Spencer22120c42012-04-03 23:09:22 +00001023void Scanner::scanToNextToken() {
1024 while (true) {
1025 while (*Current == ' ' || *Current == '\t') {
1026 skip(1);
1027 }
1028
Alex Lorenzfe6f1862015-05-06 23:00:45 +00001029 skipComment();
Michael J. Spencer22120c42012-04-03 23:09:22 +00001030
1031 // Skip EOL.
1032 StringRef::iterator i = skip_b_break(Current);
1033 if (i == Current)
1034 break;
1035 Current = i;
1036 ++Line;
1037 Column = 0;
1038 // New lines may start a simple key.
1039 if (!FlowLevel)
1040 IsSimpleKeyAllowed = true;
1041 }
1042}
1043
1044bool Scanner::scanStreamStart() {
1045 IsStartOfStream = false;
1046
1047 EncodingInfo EI = getUnicodeEncoding(currentInput());
1048
1049 Token T;
1050 T.Kind = Token::TK_StreamStart;
1051 T.Range = StringRef(Current, EI.second);
1052 TokenQueue.push_back(T);
1053 Current += EI.second;
1054 return true;
1055}
1056
1057bool Scanner::scanStreamEnd() {
1058 // Force an ending new line if one isn't present.
1059 if (Column != 0) {
1060 Column = 0;
1061 ++Line;
1062 }
1063
1064 unrollIndent(-1);
1065 SimpleKeys.clear();
1066 IsSimpleKeyAllowed = false;
1067
1068 Token T;
1069 T.Kind = Token::TK_StreamEnd;
1070 T.Range = StringRef(Current, 0);
1071 TokenQueue.push_back(T);
1072 return true;
1073}
1074
1075bool Scanner::scanDirective() {
1076 // Reset the indentation level.
1077 unrollIndent(-1);
1078 SimpleKeys.clear();
1079 IsSimpleKeyAllowed = false;
1080
1081 StringRef::iterator Start = Current;
1082 consume('%');
1083 StringRef::iterator NameStart = Current;
1084 Current = skip_while(&Scanner::skip_ns_char, Current);
1085 StringRef Name(NameStart, Current - NameStart);
1086 Current = skip_while(&Scanner::skip_s_white, Current);
Michael J. Spencerc064a9a2013-10-18 22:38:04 +00001087
1088 Token T;
Michael J. Spencer22120c42012-04-03 23:09:22 +00001089 if (Name == "YAML") {
1090 Current = skip_while(&Scanner::skip_ns_char, Current);
Michael J. Spencer22120c42012-04-03 23:09:22 +00001091 T.Kind = Token::TK_VersionDirective;
1092 T.Range = StringRef(Start, Current - Start);
1093 TokenQueue.push_back(T);
1094 return true;
Michael J. Spencerc064a9a2013-10-18 22:38:04 +00001095 } else if(Name == "TAG") {
1096 Current = skip_while(&Scanner::skip_ns_char, Current);
1097 Current = skip_while(&Scanner::skip_s_white, Current);
1098 Current = skip_while(&Scanner::skip_ns_char, Current);
1099 T.Kind = Token::TK_TagDirective;
1100 T.Range = StringRef(Start, Current - Start);
1101 TokenQueue.push_back(T);
1102 return true;
Michael J. Spencer22120c42012-04-03 23:09:22 +00001103 }
1104 return false;
1105}
1106
1107bool Scanner::scanDocumentIndicator(bool IsStart) {
1108 unrollIndent(-1);
1109 SimpleKeys.clear();
1110 IsSimpleKeyAllowed = false;
1111
1112 Token T;
1113 T.Kind = IsStart ? Token::TK_DocumentStart : Token::TK_DocumentEnd;
1114 T.Range = StringRef(Current, 3);
1115 skip(3);
1116 TokenQueue.push_back(T);
1117 return true;
1118}
1119
1120bool Scanner::scanFlowCollectionStart(bool IsSequence) {
1121 Token T;
1122 T.Kind = IsSequence ? Token::TK_FlowSequenceStart
1123 : Token::TK_FlowMappingStart;
1124 T.Range = StringRef(Current, 1);
1125 skip(1);
1126 TokenQueue.push_back(T);
1127
1128 // [ and { may begin a simple key.
Duncan P. N. Exon Smith6eeaff12015-10-08 22:47:55 +00001129 saveSimpleKeyCandidate(--TokenQueue.end(), Column - 1, false);
Michael J. Spencer22120c42012-04-03 23:09:22 +00001130
1131 // And may also be followed by a simple key.
1132 IsSimpleKeyAllowed = true;
1133 ++FlowLevel;
1134 return true;
1135}
1136
1137bool Scanner::scanFlowCollectionEnd(bool IsSequence) {
1138 removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
1139 IsSimpleKeyAllowed = false;
1140 Token T;
1141 T.Kind = IsSequence ? Token::TK_FlowSequenceEnd
1142 : Token::TK_FlowMappingEnd;
1143 T.Range = StringRef(Current, 1);
1144 skip(1);
1145 TokenQueue.push_back(T);
1146 if (FlowLevel)
1147 --FlowLevel;
1148 return true;
1149}
1150
1151bool Scanner::scanFlowEntry() {
1152 removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
1153 IsSimpleKeyAllowed = true;
1154 Token T;
1155 T.Kind = Token::TK_FlowEntry;
1156 T.Range = StringRef(Current, 1);
1157 skip(1);
1158 TokenQueue.push_back(T);
1159 return true;
1160}
1161
1162bool Scanner::scanBlockEntry() {
1163 rollIndent(Column, Token::TK_BlockSequenceStart, TokenQueue.end());
1164 removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
1165 IsSimpleKeyAllowed = true;
1166 Token T;
1167 T.Kind = Token::TK_BlockEntry;
1168 T.Range = StringRef(Current, 1);
1169 skip(1);
1170 TokenQueue.push_back(T);
1171 return true;
1172}
1173
1174bool Scanner::scanKey() {
1175 if (!FlowLevel)
1176 rollIndent(Column, Token::TK_BlockMappingStart, TokenQueue.end());
1177
1178 removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
1179 IsSimpleKeyAllowed = !FlowLevel;
1180
1181 Token T;
1182 T.Kind = Token::TK_Key;
1183 T.Range = StringRef(Current, 1);
1184 skip(1);
1185 TokenQueue.push_back(T);
1186 return true;
1187}
1188
1189bool Scanner::scanValue() {
1190 // If the previous token could have been a simple key, insert the key token
1191 // into the token queue.
1192 if (!SimpleKeys.empty()) {
1193 SimpleKey SK = SimpleKeys.pop_back_val();
1194 Token T;
1195 T.Kind = Token::TK_Key;
1196 T.Range = SK.Tok->Range;
1197 TokenQueueT::iterator i, e;
1198 for (i = TokenQueue.begin(), e = TokenQueue.end(); i != e; ++i) {
1199 if (i == SK.Tok)
1200 break;
1201 }
1202 assert(i != e && "SimpleKey not in token queue!");
1203 i = TokenQueue.insert(i, T);
1204
1205 // We may also need to add a Block-Mapping-Start token.
1206 rollIndent(SK.Column, Token::TK_BlockMappingStart, i);
1207
1208 IsSimpleKeyAllowed = false;
1209 } else {
1210 if (!FlowLevel)
1211 rollIndent(Column, Token::TK_BlockMappingStart, TokenQueue.end());
1212 IsSimpleKeyAllowed = !FlowLevel;
1213 }
1214
1215 Token T;
1216 T.Kind = Token::TK_Value;
1217 T.Range = StringRef(Current, 1);
1218 skip(1);
1219 TokenQueue.push_back(T);
1220 return true;
1221}
1222
1223// Forbidding inlining improves performance by roughly 20%.
1224// FIXME: Remove once llvm optimizes this to the faster version without hints.
1225LLVM_ATTRIBUTE_NOINLINE static bool
1226wasEscaped(StringRef::iterator First, StringRef::iterator Position);
1227
1228// Returns whether a character at 'Position' was escaped with a leading '\'.
1229// 'First' specifies the position of the first character in the string.
1230static bool wasEscaped(StringRef::iterator First,
1231 StringRef::iterator Position) {
1232 assert(Position - 1 >= First);
1233 StringRef::iterator I = Position - 1;
1234 // We calculate the number of consecutive '\'s before the current position
1235 // by iterating backwards through our string.
1236 while (I >= First && *I == '\\') --I;
1237 // (Position - 1 - I) now contains the number of '\'s before the current
1238 // position. If it is odd, the character at 'Position' was escaped.
1239 return (Position - 1 - I) % 2 == 1;
1240}
1241
1242bool Scanner::scanFlowScalar(bool IsDoubleQuoted) {
1243 StringRef::iterator Start = Current;
1244 unsigned ColStart = Column;
1245 if (IsDoubleQuoted) {
1246 do {
1247 ++Current;
1248 while (Current != End && *Current != '"')
1249 ++Current;
1250 // Repeat until the previous character was not a '\' or was an escaped
1251 // backslash.
Michael J. Spencer60331132012-04-27 21:12:20 +00001252 } while ( Current != End
1253 && *(Current - 1) == '\\'
1254 && wasEscaped(Start + 1, Current));
Michael J. Spencer22120c42012-04-03 23:09:22 +00001255 } else {
1256 skip(1);
1257 while (true) {
1258 // Skip a ' followed by another '.
1259 if (Current + 1 < End && *Current == '\'' && *(Current + 1) == '\'') {
1260 skip(2);
1261 continue;
1262 } else if (*Current == '\'')
1263 break;
1264 StringRef::iterator i = skip_nb_char(Current);
1265 if (i == Current) {
1266 i = skip_b_break(Current);
1267 if (i == Current)
1268 break;
1269 Current = i;
1270 Column = 0;
1271 ++Line;
1272 } else {
1273 if (i == End)
1274 break;
1275 Current = i;
1276 ++Column;
1277 }
1278 }
1279 }
Benjamin Kramer8fb58f62012-09-26 15:52:15 +00001280
1281 if (Current == End) {
1282 setError("Expected quote at end of scalar", Current);
1283 return false;
1284 }
1285
Michael J. Spencer22120c42012-04-03 23:09:22 +00001286 skip(1); // Skip ending quote.
1287 Token T;
1288 T.Kind = Token::TK_Scalar;
1289 T.Range = StringRef(Start, Current - Start);
1290 TokenQueue.push_back(T);
1291
Duncan P. N. Exon Smith6eeaff12015-10-08 22:47:55 +00001292 saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false);
Michael J. Spencer22120c42012-04-03 23:09:22 +00001293
1294 IsSimpleKeyAllowed = false;
1295
1296 return true;
1297}
1298
1299bool Scanner::scanPlainScalar() {
1300 StringRef::iterator Start = Current;
1301 unsigned ColStart = Column;
1302 unsigned LeadingBlanks = 0;
1303 assert(Indent >= -1 && "Indent must be >= -1 !");
1304 unsigned indent = static_cast<unsigned>(Indent + 1);
1305 while (true) {
1306 if (*Current == '#')
1307 break;
1308
1309 while (!isBlankOrBreak(Current)) {
1310 if ( FlowLevel && *Current == ':'
1311 && !(isBlankOrBreak(Current + 1) || *(Current + 1) == ',')) {
1312 setError("Found unexpected ':' while scanning a plain scalar", Current);
1313 return false;
1314 }
1315
1316 // Check for the end of the plain scalar.
1317 if ( (*Current == ':' && isBlankOrBreak(Current + 1))
1318 || ( FlowLevel
1319 && (StringRef(Current, 1).find_first_of(",:?[]{}")
1320 != StringRef::npos)))
1321 break;
1322
1323 StringRef::iterator i = skip_nb_char(Current);
1324 if (i == Current)
1325 break;
1326 Current = i;
1327 ++Column;
1328 }
1329
1330 // Are we at the end?
1331 if (!isBlankOrBreak(Current))
1332 break;
1333
1334 // Eat blanks.
1335 StringRef::iterator Tmp = Current;
1336 while (isBlankOrBreak(Tmp)) {
1337 StringRef::iterator i = skip_s_white(Tmp);
1338 if (i != Tmp) {
1339 if (LeadingBlanks && (Column < indent) && *Tmp == '\t') {
1340 setError("Found invalid tab character in indentation", Tmp);
1341 return false;
1342 }
1343 Tmp = i;
1344 ++Column;
1345 } else {
1346 i = skip_b_break(Tmp);
1347 if (!LeadingBlanks)
1348 LeadingBlanks = 1;
1349 Tmp = i;
1350 Column = 0;
1351 ++Line;
1352 }
1353 }
1354
1355 if (!FlowLevel && Column < indent)
1356 break;
1357
1358 Current = Tmp;
1359 }
1360 if (Start == Current) {
1361 setError("Got empty plain scalar", Start);
1362 return false;
1363 }
1364 Token T;
1365 T.Kind = Token::TK_Scalar;
1366 T.Range = StringRef(Start, Current - Start);
1367 TokenQueue.push_back(T);
1368
1369 // Plain scalars can be simple keys.
Duncan P. N. Exon Smith6eeaff12015-10-08 22:47:55 +00001370 saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false);
Michael J. Spencer22120c42012-04-03 23:09:22 +00001371
1372 IsSimpleKeyAllowed = false;
1373
1374 return true;
1375}
1376
1377bool Scanner::scanAliasOrAnchor(bool IsAlias) {
1378 StringRef::iterator Start = Current;
1379 unsigned ColStart = Column;
1380 skip(1);
1381 while(true) {
1382 if ( *Current == '[' || *Current == ']'
1383 || *Current == '{' || *Current == '}'
1384 || *Current == ','
1385 || *Current == ':')
1386 break;
1387 StringRef::iterator i = skip_ns_char(Current);
1388 if (i == Current)
1389 break;
1390 Current = i;
1391 ++Column;
1392 }
1393
1394 if (Start == Current) {
1395 setError("Got empty alias or anchor", Start);
1396 return false;
1397 }
1398
1399 Token T;
1400 T.Kind = IsAlias ? Token::TK_Alias : Token::TK_Anchor;
1401 T.Range = StringRef(Start, Current - Start);
1402 TokenQueue.push_back(T);
1403
1404 // Alias and anchors can be simple keys.
Duncan P. N. Exon Smith6eeaff12015-10-08 22:47:55 +00001405 saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false);
Michael J. Spencer22120c42012-04-03 23:09:22 +00001406
1407 IsSimpleKeyAllowed = false;
1408
1409 return true;
1410}
1411
Alex Lorenza22b250c2015-05-13 23:10:51 +00001412char Scanner::scanBlockChompingIndicator() {
1413 char Indicator = ' ';
1414 if (Current != End && (*Current == '+' || *Current == '-')) {
1415 Indicator = *Current;
1416 skip(1);
1417 }
1418 return Indicator;
1419}
1420
1421/// Get the number of line breaks after chomping.
1422///
1423/// Return the number of trailing line breaks to emit, depending on
1424/// \p ChompingIndicator.
1425static unsigned getChompedLineBreaks(char ChompingIndicator,
1426 unsigned LineBreaks, StringRef Str) {
1427 if (ChompingIndicator == '-') // Strip all line breaks.
1428 return 0;
1429 if (ChompingIndicator == '+') // Keep all line breaks.
1430 return LineBreaks;
1431 // Clip trailing lines.
1432 return Str.empty() ? 0 : 1;
1433}
1434
1435unsigned Scanner::scanBlockIndentationIndicator() {
1436 unsigned Indent = 0;
1437 if (Current != End && (*Current >= '1' && *Current <= '9')) {
1438 Indent = unsigned(*Current - '0');
1439 skip(1);
1440 }
1441 return Indent;
1442}
1443
1444bool Scanner::scanBlockScalarHeader(char &ChompingIndicator,
1445 unsigned &IndentIndicator, bool &IsDone) {
1446 auto Start = Current;
1447
1448 ChompingIndicator = scanBlockChompingIndicator();
1449 IndentIndicator = scanBlockIndentationIndicator();
1450 // Check for the chomping indicator once again.
1451 if (ChompingIndicator == ' ')
1452 ChompingIndicator = scanBlockChompingIndicator();
1453 Current = skip_while(&Scanner::skip_s_white, Current);
1454 skipComment();
1455
1456 if (Current == End) { // EOF, we have an empty scalar.
1457 Token T;
1458 T.Kind = Token::TK_BlockScalar;
1459 T.Range = StringRef(Start, Current - Start);
1460 TokenQueue.push_back(T);
1461 IsDone = true;
1462 return true;
1463 }
1464
1465 if (!consumeLineBreakIfPresent()) {
1466 setError("Expected a line break after block scalar header", Current);
1467 return false;
1468 }
1469 return true;
1470}
1471
1472bool Scanner::findBlockScalarIndent(unsigned &BlockIndent,
1473 unsigned BlockExitIndent,
1474 unsigned &LineBreaks, bool &IsDone) {
1475 unsigned MaxAllSpaceLineCharacters = 0;
1476 StringRef::iterator LongestAllSpaceLine;
1477
1478 while (true) {
1479 advanceWhile(&Scanner::skip_s_space);
1480 if (skip_nb_char(Current) != Current) {
1481 // This line isn't empty, so try and find the indentation.
1482 if (Column <= BlockExitIndent) { // End of the block literal.
1483 IsDone = true;
1484 return true;
1485 }
1486 // We found the block's indentation.
1487 BlockIndent = Column;
1488 if (MaxAllSpaceLineCharacters > BlockIndent) {
1489 setError(
1490 "Leading all-spaces line must be smaller than the block indent",
1491 LongestAllSpaceLine);
Michael J. Spencer22120c42012-04-03 23:09:22 +00001492 return false;
1493 }
Alex Lorenza22b250c2015-05-13 23:10:51 +00001494 return true;
Michael J. Spencer22120c42012-04-03 23:09:22 +00001495 }
Alex Lorenza22b250c2015-05-13 23:10:51 +00001496 if (skip_b_break(Current) != Current &&
1497 Column > MaxAllSpaceLineCharacters) {
1498 // Record the longest all-space line in case it's longer than the
1499 // discovered block indent.
1500 MaxAllSpaceLineCharacters = Column;
1501 LongestAllSpaceLine = Current;
1502 }
1503
1504 // Check for EOF.
1505 if (Current == End) {
1506 IsDone = true;
1507 return true;
1508 }
1509
1510 if (!consumeLineBreakIfPresent()) {
1511 IsDone = true;
1512 return true;
1513 }
1514 ++LineBreaks;
1515 }
1516 return true;
1517}
1518
1519bool Scanner::scanBlockScalarIndent(unsigned BlockIndent,
1520 unsigned BlockExitIndent, bool &IsDone) {
1521 // Skip the indentation.
1522 while (Column < BlockIndent) {
1523 auto I = skip_s_space(Current);
1524 if (I == Current)
1525 break;
1526 Current = I;
Michael J. Spencer22120c42012-04-03 23:09:22 +00001527 ++Column;
1528 }
1529
Alex Lorenza22b250c2015-05-13 23:10:51 +00001530 if (skip_nb_char(Current) == Current)
1531 return true;
1532
1533 if (Column <= BlockExitIndent) { // End of the block literal.
1534 IsDone = true;
1535 return true;
Michael J. Spencer22120c42012-04-03 23:09:22 +00001536 }
1537
Alex Lorenza22b250c2015-05-13 23:10:51 +00001538 if (Column < BlockIndent) {
1539 if (Current != End && *Current == '#') { // Trailing comment.
1540 IsDone = true;
1541 return true;
1542 }
1543 setError("A text line is less indented than the block scalar", Current);
1544 return false;
1545 }
1546 return true; // A normal text line.
1547}
1548
1549bool Scanner::scanBlockScalar(bool IsLiteral) {
1550 // Eat '|' or '>'
1551 assert(*Current == '|' || *Current == '>');
1552 skip(1);
1553
1554 char ChompingIndicator;
1555 unsigned BlockIndent;
1556 bool IsDone = false;
1557 if (!scanBlockScalarHeader(ChompingIndicator, BlockIndent, IsDone))
1558 return false;
1559 if (IsDone)
1560 return true;
1561
1562 auto Start = Current;
1563 unsigned BlockExitIndent = Indent < 0 ? 0 : (unsigned)Indent;
1564 unsigned LineBreaks = 0;
1565 if (BlockIndent == 0) {
1566 if (!findBlockScalarIndent(BlockIndent, BlockExitIndent, LineBreaks,
1567 IsDone))
1568 return false;
1569 }
1570
1571 // Scan the block's scalars body.
1572 SmallString<256> Str;
1573 while (!IsDone) {
1574 if (!scanBlockScalarIndent(BlockIndent, BlockExitIndent, IsDone))
1575 return false;
1576 if (IsDone)
1577 break;
1578
1579 // Parse the current line.
1580 auto LineStart = Current;
1581 advanceWhile(&Scanner::skip_nb_char);
1582 if (LineStart != Current) {
1583 Str.append(LineBreaks, '\n');
1584 Str.append(StringRef(LineStart, Current - LineStart));
1585 LineBreaks = 0;
1586 }
1587
1588 // Check for EOF.
1589 if (Current == End)
1590 break;
1591
1592 if (!consumeLineBreakIfPresent())
1593 break;
1594 ++LineBreaks;
1595 }
1596
1597 if (Current == End && !LineBreaks)
1598 // Ensure that there is at least one line break before the end of file.
1599 LineBreaks = 1;
1600 Str.append(getChompedLineBreaks(ChompingIndicator, LineBreaks, Str), '\n');
1601
1602 // New lines may start a simple key.
1603 if (!FlowLevel)
1604 IsSimpleKeyAllowed = true;
1605
Michael J. Spencer22120c42012-04-03 23:09:22 +00001606 Token T;
Alex Lorenza22b250c2015-05-13 23:10:51 +00001607 T.Kind = Token::TK_BlockScalar;
Michael J. Spencer22120c42012-04-03 23:09:22 +00001608 T.Range = StringRef(Start, Current - Start);
Alex Lorenza22b250c2015-05-13 23:10:51 +00001609 T.Value = Str.str().str();
Michael J. Spencer22120c42012-04-03 23:09:22 +00001610 TokenQueue.push_back(T);
1611 return true;
1612}
1613
1614bool Scanner::scanTag() {
1615 StringRef::iterator Start = Current;
1616 unsigned ColStart = Column;
1617 skip(1); // Eat !.
1618 if (Current == End || isBlankOrBreak(Current)); // An empty tag.
1619 else if (*Current == '<') {
1620 skip(1);
1621 scan_ns_uri_char();
1622 if (!consume('>'))
1623 return false;
1624 } else {
1625 // FIXME: Actually parse the c-ns-shorthand-tag rule.
1626 Current = skip_while(&Scanner::skip_ns_char, Current);
1627 }
1628
1629 Token T;
1630 T.Kind = Token::TK_Tag;
1631 T.Range = StringRef(Start, Current - Start);
1632 TokenQueue.push_back(T);
1633
1634 // Tags can be simple keys.
Duncan P. N. Exon Smith6eeaff12015-10-08 22:47:55 +00001635 saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false);
Michael J. Spencer22120c42012-04-03 23:09:22 +00001636
1637 IsSimpleKeyAllowed = false;
1638
1639 return true;
1640}
1641
1642bool Scanner::fetchMoreTokens() {
1643 if (IsStartOfStream)
1644 return scanStreamStart();
1645
1646 scanToNextToken();
1647
1648 if (Current == End)
1649 return scanStreamEnd();
1650
1651 removeStaleSimpleKeyCandidates();
1652
1653 unrollIndent(Column);
1654
1655 if (Column == 0 && *Current == '%')
1656 return scanDirective();
1657
1658 if (Column == 0 && Current + 4 <= End
1659 && *Current == '-'
1660 && *(Current + 1) == '-'
1661 && *(Current + 2) == '-'
1662 && (Current + 3 == End || isBlankOrBreak(Current + 3)))
1663 return scanDocumentIndicator(true);
1664
1665 if (Column == 0 && Current + 4 <= End
1666 && *Current == '.'
1667 && *(Current + 1) == '.'
1668 && *(Current + 2) == '.'
1669 && (Current + 3 == End || isBlankOrBreak(Current + 3)))
1670 return scanDocumentIndicator(false);
1671
1672 if (*Current == '[')
1673 return scanFlowCollectionStart(true);
1674
1675 if (*Current == '{')
1676 return scanFlowCollectionStart(false);
1677
1678 if (*Current == ']')
1679 return scanFlowCollectionEnd(true);
1680
1681 if (*Current == '}')
1682 return scanFlowCollectionEnd(false);
1683
1684 if (*Current == ',')
1685 return scanFlowEntry();
1686
1687 if (*Current == '-' && isBlankOrBreak(Current + 1))
1688 return scanBlockEntry();
1689
1690 if (*Current == '?' && (FlowLevel || isBlankOrBreak(Current + 1)))
1691 return scanKey();
1692
1693 if (*Current == ':' && (FlowLevel || isBlankOrBreak(Current + 1)))
1694 return scanValue();
1695
1696 if (*Current == '*')
1697 return scanAliasOrAnchor(true);
1698
1699 if (*Current == '&')
1700 return scanAliasOrAnchor(false);
1701
1702 if (*Current == '!')
1703 return scanTag();
1704
1705 if (*Current == '|' && !FlowLevel)
1706 return scanBlockScalar(true);
1707
1708 if (*Current == '>' && !FlowLevel)
1709 return scanBlockScalar(false);
1710
1711 if (*Current == '\'')
1712 return scanFlowScalar(false);
1713
1714 if (*Current == '"')
1715 return scanFlowScalar(true);
1716
1717 // Get a plain scalar.
1718 StringRef FirstChar(Current, 1);
1719 if (!(isBlankOrBreak(Current)
1720 || FirstChar.find_first_of("-?:,[]{}#&*!|>'\"%@`") != StringRef::npos)
1721 || (*Current == '-' && !isBlankOrBreak(Current + 1))
1722 || (!FlowLevel && (*Current == '?' || *Current == ':')
1723 && isBlankOrBreak(Current + 1))
1724 || (!FlowLevel && *Current == ':'
1725 && Current + 2 < End
1726 && *(Current + 1) == ':'
1727 && !isBlankOrBreak(Current + 2)))
1728 return scanPlainScalar();
1729
1730 setError("Unrecognized character while tokenizing.");
1731 return false;
1732}
1733
Alex Lorenze4bcfbf2015-05-07 18:08:46 +00001734Stream::Stream(StringRef Input, SourceMgr &SM, bool ShowColors)
1735 : scanner(new Scanner(Input, SM, ShowColors)), CurrentDoc() {}
Michael J. Spencer22120c42012-04-03 23:09:22 +00001736
Alex Lorenze4bcfbf2015-05-07 18:08:46 +00001737Stream::Stream(MemoryBufferRef InputBuffer, SourceMgr &SM, bool ShowColors)
1738 : scanner(new Scanner(InputBuffer, SM, ShowColors)), CurrentDoc() {}
Sean Silvaaba82702012-11-19 23:21:47 +00001739
Benjamin Kramera1355d12012-04-04 08:53:34 +00001740Stream::~Stream() {}
1741
Michael J. Spencer22120c42012-04-03 23:09:22 +00001742bool Stream::failed() { return scanner->failed(); }
1743
1744void Stream::printError(Node *N, const Twine &Msg) {
Michael J. Spencer22120c42012-04-03 23:09:22 +00001745 scanner->printError( N->getSourceRange().Start
1746 , SourceMgr::DK_Error
1747 , Msg
Benjamin Kramerea68a942015-02-19 15:26:17 +00001748 , N->getSourceRange());
Michael J. Spencer22120c42012-04-03 23:09:22 +00001749}
1750
Michael J. Spencer22120c42012-04-03 23:09:22 +00001751document_iterator Stream::begin() {
1752 if (CurrentDoc)
1753 report_fatal_error("Can only iterate over the stream once");
1754
1755 // Skip Stream-Start.
1756 scanner->getNext();
1757
1758 CurrentDoc.reset(new Document(*this));
1759 return document_iterator(CurrentDoc);
1760}
1761
1762document_iterator Stream::end() {
1763 return document_iterator();
1764}
1765
1766void Stream::skip() {
1767 for (document_iterator i = begin(), e = end(); i != e; ++i)
1768 i->skip();
1769}
1770
Ahmed Charles56440fd2014-03-06 05:51:42 +00001771Node::Node(unsigned int Type, std::unique_ptr<Document> &D, StringRef A,
1772 StringRef T)
1773 : Doc(D), TypeID(Type), Anchor(A), Tag(T) {
Michael J. Spencer22120c42012-04-03 23:09:22 +00001774 SMLoc Start = SMLoc::getFromPointer(peekNext().Range.begin());
1775 SourceRange = SMRange(Start, Start);
1776}
1777
Michael J. Spencerc064a9a2013-10-18 22:38:04 +00001778std::string Node::getVerbatimTag() const {
1779 StringRef Raw = getRawTag();
1780 if (!Raw.empty() && Raw != "!") {
1781 std::string Ret;
1782 if (Raw.find_last_of('!') == 0) {
1783 Ret = Doc->getTagMap().find("!")->second;
1784 Ret += Raw.substr(1);
Richard Trieu73d06522015-01-17 00:46:44 +00001785 return Ret;
Michael J. Spencerc064a9a2013-10-18 22:38:04 +00001786 } else if (Raw.startswith("!!")) {
1787 Ret = Doc->getTagMap().find("!!")->second;
1788 Ret += Raw.substr(2);
Richard Trieu73d06522015-01-17 00:46:44 +00001789 return Ret;
Michael J. Spencerc064a9a2013-10-18 22:38:04 +00001790 } else {
1791 StringRef TagHandle = Raw.substr(0, Raw.find_last_of('!') + 1);
1792 std::map<StringRef, StringRef>::const_iterator It =
1793 Doc->getTagMap().find(TagHandle);
1794 if (It != Doc->getTagMap().end())
1795 Ret = It->second;
1796 else {
1797 Token T;
1798 T.Kind = Token::TK_Tag;
1799 T.Range = TagHandle;
1800 setError(Twine("Unknown tag handle ") + TagHandle, T);
1801 }
1802 Ret += Raw.substr(Raw.find_last_of('!') + 1);
Richard Trieu73d06522015-01-17 00:46:44 +00001803 return Ret;
Michael J. Spencerc064a9a2013-10-18 22:38:04 +00001804 }
1805 }
1806
1807 switch (getType()) {
1808 case NK_Null:
1809 return "tag:yaml.org,2002:null";
1810 case NK_Scalar:
Alex Lorenza22b250c2015-05-13 23:10:51 +00001811 case NK_BlockScalar:
Michael J. Spencerc064a9a2013-10-18 22:38:04 +00001812 // TODO: Tag resolution.
1813 return "tag:yaml.org,2002:str";
1814 case NK_Mapping:
1815 return "tag:yaml.org,2002:map";
1816 case NK_Sequence:
1817 return "tag:yaml.org,2002:seq";
1818 }
1819
1820 return "";
1821}
1822
Michael J. Spencer22120c42012-04-03 23:09:22 +00001823Token &Node::peekNext() {
1824 return Doc->peekNext();
1825}
1826
1827Token Node::getNext() {
1828 return Doc->getNext();
1829}
1830
1831Node *Node::parseBlockNode() {
1832 return Doc->parseBlockNode();
1833}
1834
1835BumpPtrAllocator &Node::getAllocator() {
1836 return Doc->NodeAllocator;
1837}
1838
1839void Node::setError(const Twine &Msg, Token &Tok) const {
1840 Doc->setError(Msg, Tok);
1841}
1842
1843bool Node::failed() const {
1844 return Doc->failed();
1845}
1846
1847
1848
1849StringRef ScalarNode::getValue(SmallVectorImpl<char> &Storage) const {
1850 // TODO: Handle newlines properly. We need to remove leading whitespace.
1851 if (Value[0] == '"') { // Double quoted.
1852 // Pull off the leading and trailing "s.
1853 StringRef UnquotedValue = Value.substr(1, Value.size() - 2);
1854 // Search for characters that would require unescaping the value.
1855 StringRef::size_type i = UnquotedValue.find_first_of("\\\r\n");
1856 if (i != StringRef::npos)
1857 return unescapeDoubleQuoted(UnquotedValue, i, Storage);
1858 return UnquotedValue;
1859 } else if (Value[0] == '\'') { // Single quoted.
1860 // Pull off the leading and trailing 's.
1861 StringRef UnquotedValue = Value.substr(1, Value.size() - 2);
1862 StringRef::size_type i = UnquotedValue.find('\'');
1863 if (i != StringRef::npos) {
1864 // We're going to need Storage.
1865 Storage.clear();
1866 Storage.reserve(UnquotedValue.size());
1867 for (; i != StringRef::npos; i = UnquotedValue.find('\'')) {
1868 StringRef Valid(UnquotedValue.begin(), i);
1869 Storage.insert(Storage.end(), Valid.begin(), Valid.end());
1870 Storage.push_back('\'');
1871 UnquotedValue = UnquotedValue.substr(i + 2);
1872 }
1873 Storage.insert(Storage.end(), UnquotedValue.begin(), UnquotedValue.end());
1874 return StringRef(Storage.begin(), Storage.size());
1875 }
1876 return UnquotedValue;
1877 }
1878 // Plain or block.
Vedant Kumar98372e32016-02-16 02:06:01 +00001879 return Value.rtrim(' ');
Michael J. Spencer22120c42012-04-03 23:09:22 +00001880}
1881
1882StringRef ScalarNode::unescapeDoubleQuoted( StringRef UnquotedValue
1883 , StringRef::size_type i
1884 , SmallVectorImpl<char> &Storage)
1885 const {
1886 // Use Storage to build proper value.
1887 Storage.clear();
1888 Storage.reserve(UnquotedValue.size());
1889 for (; i != StringRef::npos; i = UnquotedValue.find_first_of("\\\r\n")) {
1890 // Insert all previous chars into Storage.
1891 StringRef Valid(UnquotedValue.begin(), i);
1892 Storage.insert(Storage.end(), Valid.begin(), Valid.end());
1893 // Chop off inserted chars.
1894 UnquotedValue = UnquotedValue.substr(i);
1895
1896 assert(!UnquotedValue.empty() && "Can't be empty!");
1897
1898 // Parse escape or line break.
1899 switch (UnquotedValue[0]) {
1900 case '\r':
1901 case '\n':
1902 Storage.push_back('\n');
1903 if ( UnquotedValue.size() > 1
1904 && (UnquotedValue[1] == '\r' || UnquotedValue[1] == '\n'))
1905 UnquotedValue = UnquotedValue.substr(1);
1906 UnquotedValue = UnquotedValue.substr(1);
1907 break;
1908 default:
1909 if (UnquotedValue.size() == 1)
1910 // TODO: Report error.
1911 break;
1912 UnquotedValue = UnquotedValue.substr(1);
1913 switch (UnquotedValue[0]) {
1914 default: {
1915 Token T;
1916 T.Range = StringRef(UnquotedValue.begin(), 1);
1917 setError("Unrecognized escape code!", T);
1918 return "";
1919 }
1920 case '\r':
1921 case '\n':
1922 // Remove the new line.
1923 if ( UnquotedValue.size() > 1
1924 && (UnquotedValue[1] == '\r' || UnquotedValue[1] == '\n'))
1925 UnquotedValue = UnquotedValue.substr(1);
1926 // If this was just a single byte newline, it will get skipped
1927 // below.
1928 break;
1929 case '0':
1930 Storage.push_back(0x00);
1931 break;
1932 case 'a':
1933 Storage.push_back(0x07);
1934 break;
1935 case 'b':
1936 Storage.push_back(0x08);
1937 break;
1938 case 't':
1939 case 0x09:
1940 Storage.push_back(0x09);
1941 break;
1942 case 'n':
1943 Storage.push_back(0x0A);
1944 break;
1945 case 'v':
1946 Storage.push_back(0x0B);
1947 break;
1948 case 'f':
1949 Storage.push_back(0x0C);
1950 break;
1951 case 'r':
1952 Storage.push_back(0x0D);
1953 break;
1954 case 'e':
1955 Storage.push_back(0x1B);
1956 break;
1957 case ' ':
1958 Storage.push_back(0x20);
1959 break;
1960 case '"':
1961 Storage.push_back(0x22);
1962 break;
1963 case '/':
1964 Storage.push_back(0x2F);
1965 break;
1966 case '\\':
1967 Storage.push_back(0x5C);
1968 break;
1969 case 'N':
1970 encodeUTF8(0x85, Storage);
1971 break;
1972 case '_':
1973 encodeUTF8(0xA0, Storage);
1974 break;
1975 case 'L':
1976 encodeUTF8(0x2028, Storage);
1977 break;
1978 case 'P':
1979 encodeUTF8(0x2029, Storage);
1980 break;
1981 case 'x': {
1982 if (UnquotedValue.size() < 3)
1983 // TODO: Report error.
1984 break;
Michael J. Spencera6c2c292012-04-26 19:27:11 +00001985 unsigned int UnicodeScalarValue;
1986 if (UnquotedValue.substr(1, 2).getAsInteger(16, UnicodeScalarValue))
1987 // TODO: Report error.
1988 UnicodeScalarValue = 0xFFFD;
Michael J. Spencer22120c42012-04-03 23:09:22 +00001989 encodeUTF8(UnicodeScalarValue, Storage);
1990 UnquotedValue = UnquotedValue.substr(2);
1991 break;
1992 }
1993 case 'u': {
1994 if (UnquotedValue.size() < 5)
1995 // TODO: Report error.
1996 break;
Michael J. Spencera6c2c292012-04-26 19:27:11 +00001997 unsigned int UnicodeScalarValue;
1998 if (UnquotedValue.substr(1, 4).getAsInteger(16, UnicodeScalarValue))
1999 // TODO: Report error.
2000 UnicodeScalarValue = 0xFFFD;
Michael J. Spencer22120c42012-04-03 23:09:22 +00002001 encodeUTF8(UnicodeScalarValue, Storage);
2002 UnquotedValue = UnquotedValue.substr(4);
2003 break;
2004 }
2005 case 'U': {
2006 if (UnquotedValue.size() < 9)
2007 // TODO: Report error.
2008 break;
Michael J. Spencera6c2c292012-04-26 19:27:11 +00002009 unsigned int UnicodeScalarValue;
2010 if (UnquotedValue.substr(1, 8).getAsInteger(16, UnicodeScalarValue))
2011 // TODO: Report error.
2012 UnicodeScalarValue = 0xFFFD;
Michael J. Spencer22120c42012-04-03 23:09:22 +00002013 encodeUTF8(UnicodeScalarValue, Storage);
2014 UnquotedValue = UnquotedValue.substr(8);
2015 break;
2016 }
2017 }
2018 UnquotedValue = UnquotedValue.substr(1);
2019 }
2020 }
2021 Storage.insert(Storage.end(), UnquotedValue.begin(), UnquotedValue.end());
2022 return StringRef(Storage.begin(), Storage.size());
2023}
2024
2025Node *KeyValueNode::getKey() {
2026 if (Key)
2027 return Key;
2028 // Handle implicit null keys.
2029 {
2030 Token &t = peekNext();
2031 if ( t.Kind == Token::TK_BlockEnd
2032 || t.Kind == Token::TK_Value
2033 || t.Kind == Token::TK_Error) {
2034 return Key = new (getAllocator()) NullNode(Doc);
2035 }
2036 if (t.Kind == Token::TK_Key)
2037 getNext(); // skip TK_Key.
2038 }
2039
2040 // Handle explicit null keys.
2041 Token &t = peekNext();
2042 if (t.Kind == Token::TK_BlockEnd || t.Kind == Token::TK_Value) {
2043 return Key = new (getAllocator()) NullNode(Doc);
2044 }
2045
2046 // We've got a normal key.
2047 return Key = parseBlockNode();
2048}
2049
2050Node *KeyValueNode::getValue() {
2051 if (Value)
2052 return Value;
2053 getKey()->skip();
2054 if (failed())
2055 return Value = new (getAllocator()) NullNode(Doc);
2056
2057 // Handle implicit null values.
2058 {
2059 Token &t = peekNext();
2060 if ( t.Kind == Token::TK_BlockEnd
2061 || t.Kind == Token::TK_FlowMappingEnd
2062 || t.Kind == Token::TK_Key
2063 || t.Kind == Token::TK_FlowEntry
2064 || t.Kind == Token::TK_Error) {
2065 return Value = new (getAllocator()) NullNode(Doc);
2066 }
2067
2068 if (t.Kind != Token::TK_Value) {
2069 setError("Unexpected token in Key Value.", t);
2070 return Value = new (getAllocator()) NullNode(Doc);
2071 }
2072 getNext(); // skip TK_Value.
2073 }
2074
2075 // Handle explicit null values.
2076 Token &t = peekNext();
2077 if (t.Kind == Token::TK_BlockEnd || t.Kind == Token::TK_Key) {
2078 return Value = new (getAllocator()) NullNode(Doc);
2079 }
2080
2081 // We got a normal value.
2082 return Value = parseBlockNode();
2083}
2084
2085void MappingNode::increment() {
2086 if (failed()) {
2087 IsAtEnd = true;
Craig Topperc10719f2014-04-07 04:17:22 +00002088 CurrentEntry = nullptr;
Michael J. Spencer22120c42012-04-03 23:09:22 +00002089 return;
2090 }
2091 if (CurrentEntry) {
2092 CurrentEntry->skip();
2093 if (Type == MT_Inline) {
2094 IsAtEnd = true;
Craig Topperc10719f2014-04-07 04:17:22 +00002095 CurrentEntry = nullptr;
Michael J. Spencer22120c42012-04-03 23:09:22 +00002096 return;
2097 }
2098 }
2099 Token T = peekNext();
2100 if (T.Kind == Token::TK_Key || T.Kind == Token::TK_Scalar) {
2101 // KeyValueNode eats the TK_Key. That way it can detect null keys.
2102 CurrentEntry = new (getAllocator()) KeyValueNode(Doc);
2103 } else if (Type == MT_Block) {
2104 switch (T.Kind) {
2105 case Token::TK_BlockEnd:
2106 getNext();
2107 IsAtEnd = true;
Craig Topperc10719f2014-04-07 04:17:22 +00002108 CurrentEntry = nullptr;
Michael J. Spencer22120c42012-04-03 23:09:22 +00002109 break;
2110 default:
2111 setError("Unexpected token. Expected Key or Block End", T);
2112 case Token::TK_Error:
2113 IsAtEnd = true;
Craig Topperc10719f2014-04-07 04:17:22 +00002114 CurrentEntry = nullptr;
Michael J. Spencer22120c42012-04-03 23:09:22 +00002115 }
2116 } else {
2117 switch (T.Kind) {
2118 case Token::TK_FlowEntry:
2119 // Eat the flow entry and recurse.
2120 getNext();
2121 return increment();
2122 case Token::TK_FlowMappingEnd:
2123 getNext();
2124 case Token::TK_Error:
2125 // Set this to end iterator.
2126 IsAtEnd = true;
Craig Topperc10719f2014-04-07 04:17:22 +00002127 CurrentEntry = nullptr;
Michael J. Spencer22120c42012-04-03 23:09:22 +00002128 break;
2129 default:
2130 setError( "Unexpected token. Expected Key, Flow Entry, or Flow "
2131 "Mapping End."
2132 , T);
2133 IsAtEnd = true;
Craig Topperc10719f2014-04-07 04:17:22 +00002134 CurrentEntry = nullptr;
Michael J. Spencer22120c42012-04-03 23:09:22 +00002135 }
2136 }
2137}
2138
2139void SequenceNode::increment() {
2140 if (failed()) {
2141 IsAtEnd = true;
Craig Topperc10719f2014-04-07 04:17:22 +00002142 CurrentEntry = nullptr;
Michael J. Spencer22120c42012-04-03 23:09:22 +00002143 return;
2144 }
2145 if (CurrentEntry)
2146 CurrentEntry->skip();
2147 Token T = peekNext();
2148 if (SeqType == ST_Block) {
2149 switch (T.Kind) {
2150 case Token::TK_BlockEntry:
2151 getNext();
2152 CurrentEntry = parseBlockNode();
Craig Topper8d399f82014-04-09 04:20:00 +00002153 if (!CurrentEntry) { // An error occurred.
Michael J. Spencer22120c42012-04-03 23:09:22 +00002154 IsAtEnd = true;
Craig Topperc10719f2014-04-07 04:17:22 +00002155 CurrentEntry = nullptr;
Michael J. Spencer22120c42012-04-03 23:09:22 +00002156 }
2157 break;
2158 case Token::TK_BlockEnd:
2159 getNext();
2160 IsAtEnd = true;
Craig Topperc10719f2014-04-07 04:17:22 +00002161 CurrentEntry = nullptr;
Michael J. Spencer22120c42012-04-03 23:09:22 +00002162 break;
2163 default:
2164 setError( "Unexpected token. Expected Block Entry or Block End."
2165 , T);
2166 case Token::TK_Error:
2167 IsAtEnd = true;
Craig Topperc10719f2014-04-07 04:17:22 +00002168 CurrentEntry = nullptr;
Michael J. Spencer22120c42012-04-03 23:09:22 +00002169 }
2170 } else if (SeqType == ST_Indentless) {
2171 switch (T.Kind) {
2172 case Token::TK_BlockEntry:
2173 getNext();
2174 CurrentEntry = parseBlockNode();
Craig Topper8d399f82014-04-09 04:20:00 +00002175 if (!CurrentEntry) { // An error occurred.
Michael J. Spencer22120c42012-04-03 23:09:22 +00002176 IsAtEnd = true;
Craig Topperc10719f2014-04-07 04:17:22 +00002177 CurrentEntry = nullptr;
Michael J. Spencer22120c42012-04-03 23:09:22 +00002178 }
2179 break;
2180 default:
2181 case Token::TK_Error:
2182 IsAtEnd = true;
Craig Topperc10719f2014-04-07 04:17:22 +00002183 CurrentEntry = nullptr;
Michael J. Spencer22120c42012-04-03 23:09:22 +00002184 }
2185 } else if (SeqType == ST_Flow) {
2186 switch (T.Kind) {
2187 case Token::TK_FlowEntry:
2188 // Eat the flow entry and recurse.
2189 getNext();
2190 WasPreviousTokenFlowEntry = true;
2191 return increment();
2192 case Token::TK_FlowSequenceEnd:
2193 getNext();
2194 case Token::TK_Error:
2195 // Set this to end iterator.
2196 IsAtEnd = true;
Craig Topperc10719f2014-04-07 04:17:22 +00002197 CurrentEntry = nullptr;
Michael J. Spencer22120c42012-04-03 23:09:22 +00002198 break;
2199 case Token::TK_StreamEnd:
2200 case Token::TK_DocumentEnd:
2201 case Token::TK_DocumentStart:
2202 setError("Could not find closing ]!", T);
2203 // Set this to end iterator.
2204 IsAtEnd = true;
Craig Topperc10719f2014-04-07 04:17:22 +00002205 CurrentEntry = nullptr;
Michael J. Spencer22120c42012-04-03 23:09:22 +00002206 break;
2207 default:
2208 if (!WasPreviousTokenFlowEntry) {
2209 setError("Expected , between entries!", T);
2210 IsAtEnd = true;
Craig Topperc10719f2014-04-07 04:17:22 +00002211 CurrentEntry = nullptr;
Michael J. Spencer22120c42012-04-03 23:09:22 +00002212 break;
2213 }
2214 // Otherwise it must be a flow entry.
2215 CurrentEntry = parseBlockNode();
2216 if (!CurrentEntry) {
2217 IsAtEnd = true;
2218 }
2219 WasPreviousTokenFlowEntry = false;
2220 break;
2221 }
2222 }
2223}
2224
Craig Topperc10719f2014-04-07 04:17:22 +00002225Document::Document(Stream &S) : stream(S), Root(nullptr) {
Michael J. Spencerc064a9a2013-10-18 22:38:04 +00002226 // Tag maps starts with two default mappings.
2227 TagMap["!"] = "!";
2228 TagMap["!!"] = "tag:yaml.org,2002:";
2229
Michael J. Spencer22120c42012-04-03 23:09:22 +00002230 if (parseDirectives())
2231 expectToken(Token::TK_DocumentStart);
2232 Token &T = peekNext();
2233 if (T.Kind == Token::TK_DocumentStart)
2234 getNext();
2235}
2236
2237bool Document::skip() {
2238 if (stream.scanner->failed())
2239 return false;
2240 if (!Root)
2241 getRoot();
2242 Root->skip();
2243 Token &T = peekNext();
2244 if (T.Kind == Token::TK_StreamEnd)
2245 return false;
2246 if (T.Kind == Token::TK_DocumentEnd) {
2247 getNext();
2248 return skip();
2249 }
2250 return true;
2251}
2252
2253Token &Document::peekNext() {
2254 return stream.scanner->peekNext();
2255}
2256
2257Token Document::getNext() {
2258 return stream.scanner->getNext();
2259}
2260
2261void Document::setError(const Twine &Message, Token &Location) const {
2262 stream.scanner->setError(Message, Location.Range.begin());
2263}
2264
2265bool Document::failed() const {
2266 return stream.scanner->failed();
2267}
2268
2269Node *Document::parseBlockNode() {
2270 Token T = peekNext();
2271 // Handle properties.
2272 Token AnchorInfo;
Michael J. Spencerc064a9a2013-10-18 22:38:04 +00002273 Token TagInfo;
Michael J. Spencer22120c42012-04-03 23:09:22 +00002274parse_property:
2275 switch (T.Kind) {
2276 case Token::TK_Alias:
2277 getNext();
2278 return new (NodeAllocator) AliasNode(stream.CurrentDoc, T.Range.substr(1));
2279 case Token::TK_Anchor:
2280 if (AnchorInfo.Kind == Token::TK_Anchor) {
2281 setError("Already encountered an anchor for this node!", T);
Craig Topperc10719f2014-04-07 04:17:22 +00002282 return nullptr;
Michael J. Spencer22120c42012-04-03 23:09:22 +00002283 }
2284 AnchorInfo = getNext(); // Consume TK_Anchor.
2285 T = peekNext();
2286 goto parse_property;
2287 case Token::TK_Tag:
Michael J. Spencerc064a9a2013-10-18 22:38:04 +00002288 if (TagInfo.Kind == Token::TK_Tag) {
2289 setError("Already encountered a tag for this node!", T);
Craig Topperc10719f2014-04-07 04:17:22 +00002290 return nullptr;
Michael J. Spencerc064a9a2013-10-18 22:38:04 +00002291 }
2292 TagInfo = getNext(); // Consume TK_Tag.
Michael J. Spencer22120c42012-04-03 23:09:22 +00002293 T = peekNext();
2294 goto parse_property;
2295 default:
2296 break;
2297 }
2298
2299 switch (T.Kind) {
2300 case Token::TK_BlockEntry:
2301 // We got an unindented BlockEntry sequence. This is not terminated with
2302 // a BlockEnd.
2303 // Don't eat the TK_BlockEntry, SequenceNode needs it.
2304 return new (NodeAllocator) SequenceNode( stream.CurrentDoc
2305 , AnchorInfo.Range.substr(1)
Michael J. Spencerc064a9a2013-10-18 22:38:04 +00002306 , TagInfo.Range
Michael J. Spencer22120c42012-04-03 23:09:22 +00002307 , SequenceNode::ST_Indentless);
2308 case Token::TK_BlockSequenceStart:
2309 getNext();
2310 return new (NodeAllocator)
2311 SequenceNode( stream.CurrentDoc
2312 , AnchorInfo.Range.substr(1)
Michael J. Spencerc064a9a2013-10-18 22:38:04 +00002313 , TagInfo.Range
Michael J. Spencer22120c42012-04-03 23:09:22 +00002314 , SequenceNode::ST_Block);
2315 case Token::TK_BlockMappingStart:
2316 getNext();
2317 return new (NodeAllocator)
2318 MappingNode( stream.CurrentDoc
2319 , AnchorInfo.Range.substr(1)
Michael J. Spencerc064a9a2013-10-18 22:38:04 +00002320 , TagInfo.Range
Michael J. Spencer22120c42012-04-03 23:09:22 +00002321 , MappingNode::MT_Block);
2322 case Token::TK_FlowSequenceStart:
2323 getNext();
2324 return new (NodeAllocator)
2325 SequenceNode( stream.CurrentDoc
2326 , AnchorInfo.Range.substr(1)
Michael J. Spencerc064a9a2013-10-18 22:38:04 +00002327 , TagInfo.Range
Michael J. Spencer22120c42012-04-03 23:09:22 +00002328 , SequenceNode::ST_Flow);
2329 case Token::TK_FlowMappingStart:
2330 getNext();
2331 return new (NodeAllocator)
2332 MappingNode( stream.CurrentDoc
2333 , AnchorInfo.Range.substr(1)
Michael J. Spencerc064a9a2013-10-18 22:38:04 +00002334 , TagInfo.Range
Michael J. Spencer22120c42012-04-03 23:09:22 +00002335 , MappingNode::MT_Flow);
2336 case Token::TK_Scalar:
2337 getNext();
2338 return new (NodeAllocator)
2339 ScalarNode( stream.CurrentDoc
2340 , AnchorInfo.Range.substr(1)
Michael J. Spencerc064a9a2013-10-18 22:38:04 +00002341 , TagInfo.Range
Michael J. Spencer22120c42012-04-03 23:09:22 +00002342 , T.Range);
Benjamin Kramer72367332015-05-18 21:11:27 +00002343 case Token::TK_BlockScalar: {
Alex Lorenza22b250c2015-05-13 23:10:51 +00002344 getNext();
Alex Lorenz481dca22015-05-21 19:45:02 +00002345 StringRef NullTerminatedStr(T.Value.c_str(), T.Value.length() + 1);
2346 StringRef StrCopy = NullTerminatedStr.copy(NodeAllocator).drop_back();
Alex Lorenza22b250c2015-05-13 23:10:51 +00002347 return new (NodeAllocator)
2348 BlockScalarNode(stream.CurrentDoc, AnchorInfo.Range.substr(1),
Benjamin Kramer72367332015-05-18 21:11:27 +00002349 TagInfo.Range, StrCopy, T.Range);
2350 }
Michael J. Spencer22120c42012-04-03 23:09:22 +00002351 case Token::TK_Key:
2352 // Don't eat the TK_Key, KeyValueNode expects it.
2353 return new (NodeAllocator)
2354 MappingNode( stream.CurrentDoc
2355 , AnchorInfo.Range.substr(1)
Michael J. Spencerc064a9a2013-10-18 22:38:04 +00002356 , TagInfo.Range
Michael J. Spencer22120c42012-04-03 23:09:22 +00002357 , MappingNode::MT_Inline);
2358 case Token::TK_DocumentStart:
2359 case Token::TK_DocumentEnd:
2360 case Token::TK_StreamEnd:
2361 default:
2362 // TODO: Properly handle tags. "[!!str ]" should resolve to !!str "", not
2363 // !!null null.
2364 return new (NodeAllocator) NullNode(stream.CurrentDoc);
2365 case Token::TK_Error:
Craig Topperc10719f2014-04-07 04:17:22 +00002366 return nullptr;
Michael J. Spencer22120c42012-04-03 23:09:22 +00002367 }
2368 llvm_unreachable("Control flow shouldn't reach here.");
Craig Topperc10719f2014-04-07 04:17:22 +00002369 return nullptr;
Michael J. Spencer22120c42012-04-03 23:09:22 +00002370}
2371
2372bool Document::parseDirectives() {
2373 bool isDirective = false;
2374 while (true) {
2375 Token T = peekNext();
2376 if (T.Kind == Token::TK_TagDirective) {
Michael J. Spencerc064a9a2013-10-18 22:38:04 +00002377 parseTAGDirective();
Michael J. Spencer22120c42012-04-03 23:09:22 +00002378 isDirective = true;
2379 } else if (T.Kind == Token::TK_VersionDirective) {
Michael J. Spencerc064a9a2013-10-18 22:38:04 +00002380 parseYAMLDirective();
Michael J. Spencer22120c42012-04-03 23:09:22 +00002381 isDirective = true;
2382 } else
2383 break;
2384 }
2385 return isDirective;
2386}
2387
Michael J. Spencerc064a9a2013-10-18 22:38:04 +00002388void Document::parseYAMLDirective() {
2389 getNext(); // Eat %YAML <version>
2390}
2391
2392void Document::parseTAGDirective() {
2393 Token Tag = getNext(); // %TAG <handle> <prefix>
2394 StringRef T = Tag.Range;
2395 // Strip %TAG
2396 T = T.substr(T.find_first_of(" \t")).ltrim(" \t");
2397 std::size_t HandleEnd = T.find_first_of(" \t");
2398 StringRef TagHandle = T.substr(0, HandleEnd);
2399 StringRef TagPrefix = T.substr(HandleEnd).ltrim(" \t");
2400 TagMap[TagHandle] = TagPrefix;
2401}
2402
Michael J. Spencer22120c42012-04-03 23:09:22 +00002403bool Document::expectToken(int TK) {
2404 Token T = getNext();
2405 if (T.Kind != TK) {
2406 setError("Unexpected token", T);
2407 return false;
2408 }
2409 return true;
2410}