blob: e2f21a56a810a006bd4f48ebb3df94a32c816cba [file] [log] [blame]
Eugene Zelenko72208a82017-06-21 23:19:47 +00001//===- YAMLParser.cpp - Simple YAML parser --------------------------------===//
Michael J. Spencer22120c42012-04-03 23:09:22 +00002//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements a YAML parser.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/Support/YAMLParser.h"
Chandler Carruth6bda14b2017-06-06 11:49:48 +000015#include "llvm/ADT/AllocatorList.h"
Eugene Zelenko72208a82017-06-21 23:19:47 +000016#include "llvm/ADT/ArrayRef.h"
17#include "llvm/ADT/None.h"
David Majnemer0d955d02016-08-11 22:21:41 +000018#include "llvm/ADT/STLExtras.h"
Benjamin Kramer16132e62015-03-23 18:07:13 +000019#include "llvm/ADT/SmallString.h"
Michael J. Spencer22120c42012-04-03 23:09:22 +000020#include "llvm/ADT/SmallVector.h"
21#include "llvm/ADT/StringExtras.h"
Eugene Zelenko72208a82017-06-21 23:19:47 +000022#include "llvm/ADT/StringRef.h"
Michael J. Spencer22120c42012-04-03 23:09:22 +000023#include "llvm/ADT/Twine.h"
Eugene Zelenko72208a82017-06-21 23:19:47 +000024#include "llvm/Support/Compiler.h"
Michael J. Spencer22120c42012-04-03 23:09:22 +000025#include "llvm/Support/ErrorHandling.h"
26#include "llvm/Support/MemoryBuffer.h"
Eugene Zelenko72208a82017-06-21 23:19:47 +000027#include "llvm/Support/SMLoc.h"
Michael J. Spencer22120c42012-04-03 23:09:22 +000028#include "llvm/Support/SourceMgr.h"
Chandler Carruthed0881b2012-12-03 16:50:05 +000029#include "llvm/Support/raw_ostream.h"
Eugene Zelenko72208a82017-06-21 23:19:47 +000030#include <algorithm>
31#include <cassert>
32#include <cstddef>
33#include <cstdint>
34#include <map>
35#include <memory>
36#include <string>
37#include <system_error>
38#include <utility>
Michael J. Spencer22120c42012-04-03 23:09:22 +000039
40using namespace llvm;
41using namespace yaml;
42
43enum UnicodeEncodingForm {
Dmitri Gribenkodbeafa72012-06-09 00:01:45 +000044 UEF_UTF32_LE, ///< UTF-32 Little Endian
45 UEF_UTF32_BE, ///< UTF-32 Big Endian
46 UEF_UTF16_LE, ///< UTF-16 Little Endian
47 UEF_UTF16_BE, ///< UTF-16 Big Endian
48 UEF_UTF8, ///< UTF-8 or ascii.
49 UEF_Unknown ///< Not a valid Unicode encoding.
Michael J. Spencer22120c42012-04-03 23:09:22 +000050};
51
52/// EncodingInfo - Holds the encoding type and length of the byte order mark if
53/// it exists. Length is in {0, 2, 3, 4}.
Eugene Zelenko72208a82017-06-21 23:19:47 +000054using EncodingInfo = std::pair<UnicodeEncodingForm, unsigned>;
Michael J. Spencer22120c42012-04-03 23:09:22 +000055
56/// getUnicodeEncoding - Reads up to the first 4 bytes to determine the Unicode
57/// encoding form of \a Input.
58///
59/// @param Input A string of length 0 or more.
60/// @returns An EncodingInfo indicating the Unicode encoding form of the input
61/// and how long the byte order mark is if one exists.
62static EncodingInfo getUnicodeEncoding(StringRef Input) {
Eugene Zelenko72208a82017-06-21 23:19:47 +000063 if (Input.empty())
Michael J. Spencer22120c42012-04-03 23:09:22 +000064 return std::make_pair(UEF_Unknown, 0);
65
66 switch (uint8_t(Input[0])) {
67 case 0x00:
68 if (Input.size() >= 4) {
69 if ( Input[1] == 0
70 && uint8_t(Input[2]) == 0xFE
71 && uint8_t(Input[3]) == 0xFF)
72 return std::make_pair(UEF_UTF32_BE, 4);
73 if (Input[1] == 0 && Input[2] == 0 && Input[3] != 0)
74 return std::make_pair(UEF_UTF32_BE, 0);
75 }
76
77 if (Input.size() >= 2 && Input[1] != 0)
78 return std::make_pair(UEF_UTF16_BE, 0);
79 return std::make_pair(UEF_Unknown, 0);
80 case 0xFF:
81 if ( Input.size() >= 4
82 && uint8_t(Input[1]) == 0xFE
83 && Input[2] == 0
84 && Input[3] == 0)
85 return std::make_pair(UEF_UTF32_LE, 4);
86
87 if (Input.size() >= 2 && uint8_t(Input[1]) == 0xFE)
88 return std::make_pair(UEF_UTF16_LE, 2);
89 return std::make_pair(UEF_Unknown, 0);
90 case 0xFE:
91 if (Input.size() >= 2 && uint8_t(Input[1]) == 0xFF)
92 return std::make_pair(UEF_UTF16_BE, 2);
93 return std::make_pair(UEF_Unknown, 0);
94 case 0xEF:
95 if ( Input.size() >= 3
96 && uint8_t(Input[1]) == 0xBB
97 && uint8_t(Input[2]) == 0xBF)
98 return std::make_pair(UEF_UTF8, 3);
99 return std::make_pair(UEF_Unknown, 0);
100 }
101
102 // It could still be utf-32 or utf-16.
103 if (Input.size() >= 4 && Input[1] == 0 && Input[2] == 0 && Input[3] == 0)
104 return std::make_pair(UEF_UTF32_LE, 0);
105
106 if (Input.size() >= 2 && Input[1] == 0)
107 return std::make_pair(UEF_UTF16_LE, 0);
108
109 return std::make_pair(UEF_UTF8, 0);
110}
111
Juergen Ributzkad12ccbd2013-11-19 00:57:56 +0000112/// Pin the vtables to this file.
113void Node::anchor() {}
114void NullNode::anchor() {}
115void ScalarNode::anchor() {}
Alex Lorenza22b250c2015-05-13 23:10:51 +0000116void BlockScalarNode::anchor() {}
Juergen Ributzkad12ccbd2013-11-19 00:57:56 +0000117void KeyValueNode::anchor() {}
118void MappingNode::anchor() {}
119void SequenceNode::anchor() {}
120void AliasNode::anchor() {}
121
Eugene Zelenko72208a82017-06-21 23:19:47 +0000122namespace llvm {
123namespace yaml {
124
Michael J. Spencer22120c42012-04-03 23:09:22 +0000125/// Token - A single YAML token.
Duncan P. N. Exon Smith23d83062016-09-11 22:40:40 +0000126struct Token {
Michael J. Spencer22120c42012-04-03 23:09:22 +0000127 enum TokenKind {
128 TK_Error, // Uninitialized token.
129 TK_StreamStart,
130 TK_StreamEnd,
131 TK_VersionDirective,
132 TK_TagDirective,
133 TK_DocumentStart,
134 TK_DocumentEnd,
135 TK_BlockEntry,
136 TK_BlockEnd,
137 TK_BlockSequenceStart,
138 TK_BlockMappingStart,
139 TK_FlowEntry,
140 TK_FlowSequenceStart,
141 TK_FlowSequenceEnd,
142 TK_FlowMappingStart,
143 TK_FlowMappingEnd,
144 TK_Key,
145 TK_Value,
146 TK_Scalar,
Alex Lorenza22b250c2015-05-13 23:10:51 +0000147 TK_BlockScalar,
Michael J. Spencer22120c42012-04-03 23:09:22 +0000148 TK_Alias,
149 TK_Anchor,
150 TK_Tag
Eugene Zelenko72208a82017-06-21 23:19:47 +0000151 } Kind = TK_Error;
Michael J. Spencer22120c42012-04-03 23:09:22 +0000152
153 /// A string of length 0 or more whose begin() points to the logical location
154 /// of the token in the input.
155 StringRef Range;
156
Alex Lorenza22b250c2015-05-13 23:10:51 +0000157 /// The value of a block scalar node.
158 std::string Value;
159
Eugene Zelenko72208a82017-06-21 23:19:47 +0000160 Token() = default;
Michael J. Spencer22120c42012-04-03 23:09:22 +0000161};
Michael J. Spencer22120c42012-04-03 23:09:22 +0000162
Eugene Zelenko72208a82017-06-21 23:19:47 +0000163} // end namespace yaml
164} // end namespace llvm
165
166using TokenQueueT = BumpPtrList<Token>;
Michael J. Spencer22120c42012-04-03 23:09:22 +0000167
168namespace {
Eugene Zelenko72208a82017-06-21 23:19:47 +0000169
Michael J. Spencer22120c42012-04-03 23:09:22 +0000170/// @brief This struct is used to track simple keys.
171///
172/// Simple keys are handled by creating an entry in SimpleKeys for each Token
173/// which could legally be the start of a simple key. When peekNext is called,
174/// if the Token To be returned is referenced by a SimpleKey, we continue
175/// tokenizing until that potential simple key has either been found to not be
176/// a simple key (we moved on to the next line or went further than 1024 chars).
177/// Or when we run into a Value, and then insert a Key token (and possibly
178/// others) before the SimpleKey's Tok.
179struct SimpleKey {
180 TokenQueueT::iterator Tok;
181 unsigned Column;
182 unsigned Line;
183 unsigned FlowLevel;
184 bool IsRequired;
185
186 bool operator ==(const SimpleKey &Other) {
187 return Tok == Other.Tok;
188 }
189};
Eugene Zelenko72208a82017-06-21 23:19:47 +0000190
191} // end anonymous namespace
Michael J. Spencer22120c42012-04-03 23:09:22 +0000192
193/// @brief The Unicode scalar value of a UTF-8 minimal well-formed code unit
194/// subsequence and the subsequence's length in code units (uint8_t).
195/// A length of 0 represents an error.
Eugene Zelenko72208a82017-06-21 23:19:47 +0000196using UTF8Decoded = std::pair<uint32_t, unsigned>;
Michael J. Spencer22120c42012-04-03 23:09:22 +0000197
198static UTF8Decoded decodeUTF8(StringRef Range) {
199 StringRef::iterator Position= Range.begin();
200 StringRef::iterator End = Range.end();
201 // 1 byte: [0x00, 0x7f]
202 // Bit pattern: 0xxxxxxx
203 if ((*Position & 0x80) == 0) {
204 return std::make_pair(*Position, 1);
205 }
206 // 2 bytes: [0x80, 0x7ff]
207 // Bit pattern: 110xxxxx 10xxxxxx
208 if (Position + 1 != End &&
209 ((*Position & 0xE0) == 0xC0) &&
210 ((*(Position + 1) & 0xC0) == 0x80)) {
211 uint32_t codepoint = ((*Position & 0x1F) << 6) |
212 (*(Position + 1) & 0x3F);
213 if (codepoint >= 0x80)
214 return std::make_pair(codepoint, 2);
215 }
216 // 3 bytes: [0x8000, 0xffff]
217 // Bit pattern: 1110xxxx 10xxxxxx 10xxxxxx
218 if (Position + 2 != End &&
219 ((*Position & 0xF0) == 0xE0) &&
220 ((*(Position + 1) & 0xC0) == 0x80) &&
221 ((*(Position + 2) & 0xC0) == 0x80)) {
222 uint32_t codepoint = ((*Position & 0x0F) << 12) |
223 ((*(Position + 1) & 0x3F) << 6) |
224 (*(Position + 2) & 0x3F);
225 // Codepoints between 0xD800 and 0xDFFF are invalid, as
226 // they are high / low surrogate halves used by UTF-16.
227 if (codepoint >= 0x800 &&
228 (codepoint < 0xD800 || codepoint > 0xDFFF))
229 return std::make_pair(codepoint, 3);
230 }
231 // 4 bytes: [0x10000, 0x10FFFF]
232 // Bit pattern: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
233 if (Position + 3 != End &&
234 ((*Position & 0xF8) == 0xF0) &&
235 ((*(Position + 1) & 0xC0) == 0x80) &&
236 ((*(Position + 2) & 0xC0) == 0x80) &&
237 ((*(Position + 3) & 0xC0) == 0x80)) {
238 uint32_t codepoint = ((*Position & 0x07) << 18) |
239 ((*(Position + 1) & 0x3F) << 12) |
240 ((*(Position + 2) & 0x3F) << 6) |
241 (*(Position + 3) & 0x3F);
242 if (codepoint >= 0x10000 && codepoint <= 0x10FFFF)
243 return std::make_pair(codepoint, 4);
244 }
245 return std::make_pair(0, 0);
246}
247
248namespace llvm {
249namespace yaml {
Eugene Zelenko72208a82017-06-21 23:19:47 +0000250
Michael J. Spencer22120c42012-04-03 23:09:22 +0000251/// @brief Scans YAML tokens from a MemoryBuffer.
252class Scanner {
253public:
Mehdi Amini3ab3fef2016-11-28 21:38:52 +0000254 Scanner(StringRef Input, SourceMgr &SM, bool ShowColors = true,
255 std::error_code *EC = nullptr);
256 Scanner(MemoryBufferRef Buffer, SourceMgr &SM_, bool ShowColors = true,
257 std::error_code *EC = nullptr);
Michael J. Spencer22120c42012-04-03 23:09:22 +0000258
259 /// @brief Parse the next token and return it without popping it.
260 Token &peekNext();
261
262 /// @brief Parse the next token and pop it from the queue.
263 Token getNext();
264
265 void printError(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Message,
Dmitri Gribenko3238fb72013-05-05 00:40:33 +0000266 ArrayRef<SMRange> Ranges = None) {
Alex Lorenze4bcfbf2015-05-07 18:08:46 +0000267 SM.PrintMessage(Loc, Kind, Message, Ranges, /* FixIts= */ None, ShowColors);
Michael J. Spencer22120c42012-04-03 23:09:22 +0000268 }
269
270 void setError(const Twine &Message, StringRef::iterator Position) {
271 if (Current >= End)
272 Current = End - 1;
273
Mehdi Amini3ab3fef2016-11-28 21:38:52 +0000274 // propagate the error if possible
275 if (EC)
276 *EC = make_error_code(std::errc::invalid_argument);
277
Michael J. Spencer22120c42012-04-03 23:09:22 +0000278 // Don't print out more errors after the first one we encounter. The rest
279 // are just the result of the first, and have no meaning.
280 if (!Failed)
281 printError(SMLoc::getFromPointer(Current), SourceMgr::DK_Error, Message);
282 Failed = true;
283 }
284
285 void setError(const Twine &Message) {
286 setError(Message, Current);
287 }
288
289 /// @brief Returns true if an error occurred while parsing.
290 bool failed() {
291 return Failed;
292 }
293
294private:
Rafael Espindola68669e32014-08-27 19:03:22 +0000295 void init(MemoryBufferRef Buffer);
296
Michael J. Spencer22120c42012-04-03 23:09:22 +0000297 StringRef currentInput() {
298 return StringRef(Current, End - Current);
299 }
300
301 /// @brief Decode a UTF-8 minimal well-formed code unit subsequence starting
302 /// at \a Position.
303 ///
304 /// If the UTF-8 code units starting at Position do not form a well-formed
305 /// code unit subsequence, then the Unicode scalar value is 0, and the length
306 /// is 0.
307 UTF8Decoded decodeUTF8(StringRef::iterator Position) {
308 return ::decodeUTF8(StringRef(Position, End - Position));
309 }
310
311 // The following functions are based on the gramar rules in the YAML spec. The
312 // style of the function names it meant to closely match how they are written
313 // in the spec. The number within the [] is the number of the grammar rule in
314 // the spec.
315 //
316 // See 4.2 [Production Naming Conventions] for the meaning of the prefixes.
317 //
318 // c-
319 // A production starting and ending with a special character.
320 // b-
321 // A production matching a single line break.
322 // nb-
323 // A production starting and ending with a non-break character.
324 // s-
325 // A production starting and ending with a white space character.
326 // ns-
327 // A production starting and ending with a non-space character.
328 // l-
329 // A production matching complete line(s).
330
331 /// @brief Skip a single nb-char[27] starting at Position.
332 ///
333 /// A nb-char is 0x9 | [0x20-0x7E] | 0x85 | [0xA0-0xD7FF] | [0xE000-0xFEFE]
334 /// | [0xFF00-0xFFFD] | [0x10000-0x10FFFF]
335 ///
336 /// @returns The code unit after the nb-char, or Position if it's not an
337 /// nb-char.
338 StringRef::iterator skip_nb_char(StringRef::iterator Position);
339
340 /// @brief Skip a single b-break[28] starting at Position.
341 ///
342 /// A b-break is 0xD 0xA | 0xD | 0xA
343 ///
344 /// @returns The code unit after the b-break, or Position if it's not a
345 /// b-break.
346 StringRef::iterator skip_b_break(StringRef::iterator Position);
347
Alex Lorenza22b250c2015-05-13 23:10:51 +0000348 /// Skip a single s-space[31] starting at Position.
349 ///
350 /// An s-space is 0x20
351 ///
352 /// @returns The code unit after the s-space, or Position if it's not a
353 /// s-space.
354 StringRef::iterator skip_s_space(StringRef::iterator Position);
355
Michael J. Spencer22120c42012-04-03 23:09:22 +0000356 /// @brief Skip a single s-white[33] starting at Position.
357 ///
358 /// A s-white is 0x20 | 0x9
359 ///
360 /// @returns The code unit after the s-white, or Position if it's not a
361 /// s-white.
362 StringRef::iterator skip_s_white(StringRef::iterator Position);
363
364 /// @brief Skip a single ns-char[34] starting at Position.
365 ///
366 /// A ns-char is nb-char - s-white
367 ///
368 /// @returns The code unit after the ns-char, or Position if it's not a
369 /// ns-char.
370 StringRef::iterator skip_ns_char(StringRef::iterator Position);
371
Eugene Zelenko72208a82017-06-21 23:19:47 +0000372 using SkipWhileFunc = StringRef::iterator (Scanner::*)(StringRef::iterator);
373
Michael J. Spencer22120c42012-04-03 23:09:22 +0000374 /// @brief Skip minimal well-formed code unit subsequences until Func
375 /// returns its input.
376 ///
377 /// @returns The code unit after the last minimal well-formed code unit
378 /// subsequence that Func accepted.
379 StringRef::iterator skip_while( SkipWhileFunc Func
380 , StringRef::iterator Position);
381
Alex Lorenza22b250c2015-05-13 23:10:51 +0000382 /// Skip minimal well-formed code unit subsequences until Func returns its
383 /// input.
384 void advanceWhile(SkipWhileFunc Func);
385
Michael J. Spencer22120c42012-04-03 23:09:22 +0000386 /// @brief Scan ns-uri-char[39]s starting at Cur.
387 ///
388 /// This updates Cur and Column while scanning.
Justin Bogner16742612016-10-16 22:01:22 +0000389 void scan_ns_uri_char();
Michael J. Spencer22120c42012-04-03 23:09:22 +0000390
Michael J. Spencer22120c42012-04-03 23:09:22 +0000391 /// @brief Consume a minimal well-formed code unit subsequence starting at
392 /// \a Cur. Return false if it is not the same Unicode scalar value as
393 /// \a Expected. This updates \a Column.
394 bool consume(uint32_t Expected);
395
396 /// @brief Skip \a Distance UTF-8 code units. Updates \a Cur and \a Column.
397 void skip(uint32_t Distance);
398
399 /// @brief Return true if the minimal well-formed code unit subsequence at
400 /// Pos is whitespace or a new line
401 bool isBlankOrBreak(StringRef::iterator Position);
402
Alex Lorenza22b250c2015-05-13 23:10:51 +0000403 /// Consume a single b-break[28] if it's present at the current position.
404 ///
405 /// Return false if the code unit at the current position isn't a line break.
406 bool consumeLineBreakIfPresent();
407
Michael J. Spencer22120c42012-04-03 23:09:22 +0000408 /// @brief If IsSimpleKeyAllowed, create and push_back a new SimpleKey.
409 void saveSimpleKeyCandidate( TokenQueueT::iterator Tok
410 , unsigned AtColumn
411 , bool IsRequired);
412
413 /// @brief Remove simple keys that can no longer be valid simple keys.
414 ///
415 /// Invalid simple keys are not on the current line or are further than 1024
416 /// columns back.
417 void removeStaleSimpleKeyCandidates();
418
419 /// @brief Remove all simple keys on FlowLevel \a Level.
420 void removeSimpleKeyCandidatesOnFlowLevel(unsigned Level);
421
422 /// @brief Unroll indentation in \a Indents back to \a Col. Creates BlockEnd
423 /// tokens if needed.
424 bool unrollIndent(int ToColumn);
425
426 /// @brief Increase indent to \a Col. Creates \a Kind token at \a InsertPoint
427 /// if needed.
428 bool rollIndent( int ToColumn
429 , Token::TokenKind Kind
430 , TokenQueueT::iterator InsertPoint);
431
Alex Lorenzfe6f1862015-05-06 23:00:45 +0000432 /// @brief Skip a single-line comment when the comment starts at the current
433 /// position of the scanner.
434 void skipComment();
435
Michael J. Spencer22120c42012-04-03 23:09:22 +0000436 /// @brief Skip whitespace and comments until the start of the next token.
437 void scanToNextToken();
438
439 /// @brief Must be the first token generated.
440 bool scanStreamStart();
441
442 /// @brief Generate tokens needed to close out the stream.
443 bool scanStreamEnd();
444
445 /// @brief Scan a %BLAH directive.
446 bool scanDirective();
447
448 /// @brief Scan a ... or ---.
449 bool scanDocumentIndicator(bool IsStart);
450
451 /// @brief Scan a [ or { and generate the proper flow collection start token.
452 bool scanFlowCollectionStart(bool IsSequence);
453
454 /// @brief Scan a ] or } and generate the proper flow collection end token.
455 bool scanFlowCollectionEnd(bool IsSequence);
456
457 /// @brief Scan the , that separates entries in a flow collection.
458 bool scanFlowEntry();
459
460 /// @brief Scan the - that starts block sequence entries.
461 bool scanBlockEntry();
462
463 /// @brief Scan an explicit ? indicating a key.
464 bool scanKey();
465
466 /// @brief Scan an explicit : indicating a value.
467 bool scanValue();
468
469 /// @brief Scan a quoted scalar.
470 bool scanFlowScalar(bool IsDoubleQuoted);
471
472 /// @brief Scan an unquoted scalar.
473 bool scanPlainScalar();
474
475 /// @brief Scan an Alias or Anchor starting with * or &.
476 bool scanAliasOrAnchor(bool IsAlias);
477
478 /// @brief Scan a block scalar starting with | or >.
479 bool scanBlockScalar(bool IsLiteral);
480
Alex Lorenza22b250c2015-05-13 23:10:51 +0000481 /// Scan a chomping indicator in a block scalar header.
482 char scanBlockChompingIndicator();
483
484 /// Scan an indentation indicator in a block scalar header.
485 unsigned scanBlockIndentationIndicator();
486
487 /// Scan a block scalar header.
488 ///
489 /// Return false if an error occurred.
490 bool scanBlockScalarHeader(char &ChompingIndicator, unsigned &IndentIndicator,
491 bool &IsDone);
492
493 /// Look for the indentation level of a block scalar.
494 ///
495 /// Return false if an error occurred.
496 bool findBlockScalarIndent(unsigned &BlockIndent, unsigned BlockExitIndent,
497 unsigned &LineBreaks, bool &IsDone);
498
499 /// Scan the indentation of a text line in a block scalar.
500 ///
501 /// Return false if an error occurred.
502 bool scanBlockScalarIndent(unsigned BlockIndent, unsigned BlockExitIndent,
503 bool &IsDone);
504
Michael J. Spencer22120c42012-04-03 23:09:22 +0000505 /// @brief Scan a tag of the form !stuff.
506 bool scanTag();
507
508 /// @brief Dispatch to the next scanning function based on \a *Cur.
509 bool fetchMoreTokens();
510
511 /// @brief The SourceMgr used for diagnostics and buffer management.
512 SourceMgr &SM;
513
514 /// @brief The original input.
Rafael Espindola68669e32014-08-27 19:03:22 +0000515 MemoryBufferRef InputBuffer;
Michael J. Spencer22120c42012-04-03 23:09:22 +0000516
517 /// @brief The current position of the scanner.
518 StringRef::iterator Current;
519
520 /// @brief The end of the input (one past the last character).
521 StringRef::iterator End;
522
523 /// @brief Current YAML indentation level in spaces.
524 int Indent;
525
526 /// @brief Current column number in Unicode code points.
527 unsigned Column;
528
529 /// @brief Current line number.
530 unsigned Line;
531
532 /// @brief How deep we are in flow style containers. 0 Means at block level.
533 unsigned FlowLevel;
534
535 /// @brief Are we at the start of the stream?
536 bool IsStartOfStream;
537
538 /// @brief Can the next token be the start of a simple key?
539 bool IsSimpleKeyAllowed;
540
Michael J. Spencer22120c42012-04-03 23:09:22 +0000541 /// @brief True if an error has occurred.
542 bool Failed;
543
Alex Lorenze4bcfbf2015-05-07 18:08:46 +0000544 /// @brief Should colors be used when printing out the diagnostic messages?
545 bool ShowColors;
546
Michael J. Spencer22120c42012-04-03 23:09:22 +0000547 /// @brief Queue of tokens. This is required to queue up tokens while looking
548 /// for the end of a simple key. And for cases where a single character
549 /// can produce multiple tokens (e.g. BlockEnd).
550 TokenQueueT TokenQueue;
551
552 /// @brief Indentation levels.
553 SmallVector<int, 4> Indents;
554
555 /// @brief Potential simple keys.
556 SmallVector<SimpleKey, 4> SimpleKeys;
Mehdi Amini3ab3fef2016-11-28 21:38:52 +0000557
558 std::error_code *EC;
Michael J. Spencer22120c42012-04-03 23:09:22 +0000559};
560
561} // end namespace yaml
562} // end namespace llvm
563
564/// encodeUTF8 - Encode \a UnicodeScalarValue in UTF-8 and append it to result.
565static void encodeUTF8( uint32_t UnicodeScalarValue
566 , SmallVectorImpl<char> &Result) {
567 if (UnicodeScalarValue <= 0x7F) {
568 Result.push_back(UnicodeScalarValue & 0x7F);
569 } else if (UnicodeScalarValue <= 0x7FF) {
570 uint8_t FirstByte = 0xC0 | ((UnicodeScalarValue & 0x7C0) >> 6);
571 uint8_t SecondByte = 0x80 | (UnicodeScalarValue & 0x3F);
572 Result.push_back(FirstByte);
573 Result.push_back(SecondByte);
574 } else if (UnicodeScalarValue <= 0xFFFF) {
575 uint8_t FirstByte = 0xE0 | ((UnicodeScalarValue & 0xF000) >> 12);
576 uint8_t SecondByte = 0x80 | ((UnicodeScalarValue & 0xFC0) >> 6);
577 uint8_t ThirdByte = 0x80 | (UnicodeScalarValue & 0x3F);
578 Result.push_back(FirstByte);
579 Result.push_back(SecondByte);
580 Result.push_back(ThirdByte);
581 } else if (UnicodeScalarValue <= 0x10FFFF) {
582 uint8_t FirstByte = 0xF0 | ((UnicodeScalarValue & 0x1F0000) >> 18);
583 uint8_t SecondByte = 0x80 | ((UnicodeScalarValue & 0x3F000) >> 12);
584 uint8_t ThirdByte = 0x80 | ((UnicodeScalarValue & 0xFC0) >> 6);
585 uint8_t FourthByte = 0x80 | (UnicodeScalarValue & 0x3F);
586 Result.push_back(FirstByte);
587 Result.push_back(SecondByte);
588 Result.push_back(ThirdByte);
589 Result.push_back(FourthByte);
590 }
591}
592
593bool yaml::dumpTokens(StringRef Input, raw_ostream &OS) {
594 SourceMgr SM;
595 Scanner scanner(Input, SM);
596 while (true) {
597 Token T = scanner.getNext();
598 switch (T.Kind) {
599 case Token::TK_StreamStart:
600 OS << "Stream-Start: ";
601 break;
602 case Token::TK_StreamEnd:
603 OS << "Stream-End: ";
604 break;
605 case Token::TK_VersionDirective:
606 OS << "Version-Directive: ";
607 break;
608 case Token::TK_TagDirective:
609 OS << "Tag-Directive: ";
610 break;
611 case Token::TK_DocumentStart:
612 OS << "Document-Start: ";
613 break;
614 case Token::TK_DocumentEnd:
615 OS << "Document-End: ";
616 break;
617 case Token::TK_BlockEntry:
618 OS << "Block-Entry: ";
619 break;
620 case Token::TK_BlockEnd:
621 OS << "Block-End: ";
622 break;
623 case Token::TK_BlockSequenceStart:
624 OS << "Block-Sequence-Start: ";
625 break;
626 case Token::TK_BlockMappingStart:
627 OS << "Block-Mapping-Start: ";
628 break;
629 case Token::TK_FlowEntry:
630 OS << "Flow-Entry: ";
631 break;
632 case Token::TK_FlowSequenceStart:
633 OS << "Flow-Sequence-Start: ";
634 break;
635 case Token::TK_FlowSequenceEnd:
636 OS << "Flow-Sequence-End: ";
637 break;
638 case Token::TK_FlowMappingStart:
639 OS << "Flow-Mapping-Start: ";
640 break;
641 case Token::TK_FlowMappingEnd:
642 OS << "Flow-Mapping-End: ";
643 break;
644 case Token::TK_Key:
645 OS << "Key: ";
646 break;
647 case Token::TK_Value:
648 OS << "Value: ";
649 break;
650 case Token::TK_Scalar:
651 OS << "Scalar: ";
652 break;
Alex Lorenza22b250c2015-05-13 23:10:51 +0000653 case Token::TK_BlockScalar:
654 OS << "Block Scalar: ";
655 break;
Michael J. Spencer22120c42012-04-03 23:09:22 +0000656 case Token::TK_Alias:
657 OS << "Alias: ";
658 break;
659 case Token::TK_Anchor:
660 OS << "Anchor: ";
661 break;
662 case Token::TK_Tag:
663 OS << "Tag: ";
664 break;
665 case Token::TK_Error:
666 break;
667 }
668 OS << T.Range << "\n";
669 if (T.Kind == Token::TK_StreamEnd)
670 break;
671 else if (T.Kind == Token::TK_Error)
672 return false;
673 }
674 return true;
675}
676
677bool yaml::scanTokens(StringRef Input) {
Eugene Zelenko72208a82017-06-21 23:19:47 +0000678 SourceMgr SM;
679 Scanner scanner(Input, SM);
680 while (true) {
681 Token T = scanner.getNext();
Michael J. Spencer22120c42012-04-03 23:09:22 +0000682 if (T.Kind == Token::TK_StreamEnd)
683 break;
684 else if (T.Kind == Token::TK_Error)
685 return false;
686 }
687 return true;
688}
689
690std::string yaml::escape(StringRef Input) {
691 std::string EscapedInput;
692 for (StringRef::iterator i = Input.begin(), e = Input.end(); i != e; ++i) {
693 if (*i == '\\')
694 EscapedInput += "\\\\";
695 else if (*i == '"')
696 EscapedInput += "\\\"";
697 else if (*i == 0)
698 EscapedInput += "\\0";
699 else if (*i == 0x07)
700 EscapedInput += "\\a";
701 else if (*i == 0x08)
702 EscapedInput += "\\b";
703 else if (*i == 0x09)
704 EscapedInput += "\\t";
705 else if (*i == 0x0A)
706 EscapedInput += "\\n";
707 else if (*i == 0x0B)
708 EscapedInput += "\\v";
709 else if (*i == 0x0C)
710 EscapedInput += "\\f";
711 else if (*i == 0x0D)
712 EscapedInput += "\\r";
713 else if (*i == 0x1B)
714 EscapedInput += "\\e";
Benjamin Kramer0aa0d3d2012-04-21 10:51:42 +0000715 else if ((unsigned char)*i < 0x20) { // Control characters not handled above.
Michael J. Spencer22120c42012-04-03 23:09:22 +0000716 std::string HexStr = utohexstr(*i);
717 EscapedInput += "\\x" + std::string(2 - HexStr.size(), '0') + HexStr;
718 } else if (*i & 0x80) { // UTF-8 multiple code unit subsequence.
719 UTF8Decoded UnicodeScalarValue
720 = decodeUTF8(StringRef(i, Input.end() - i));
721 if (UnicodeScalarValue.second == 0) {
722 // Found invalid char.
723 SmallString<4> Val;
724 encodeUTF8(0xFFFD, Val);
725 EscapedInput.insert(EscapedInput.end(), Val.begin(), Val.end());
726 // FIXME: Error reporting.
727 return EscapedInput;
728 }
729 if (UnicodeScalarValue.first == 0x85)
730 EscapedInput += "\\N";
731 else if (UnicodeScalarValue.first == 0xA0)
732 EscapedInput += "\\_";
733 else if (UnicodeScalarValue.first == 0x2028)
734 EscapedInput += "\\L";
735 else if (UnicodeScalarValue.first == 0x2029)
736 EscapedInput += "\\P";
737 else {
738 std::string HexStr = utohexstr(UnicodeScalarValue.first);
739 if (HexStr.size() <= 2)
740 EscapedInput += "\\x" + std::string(2 - HexStr.size(), '0') + HexStr;
741 else if (HexStr.size() <= 4)
742 EscapedInput += "\\u" + std::string(4 - HexStr.size(), '0') + HexStr;
743 else if (HexStr.size() <= 8)
744 EscapedInput += "\\U" + std::string(8 - HexStr.size(), '0') + HexStr;
745 }
746 i += UnicodeScalarValue.second - 1;
747 } else
748 EscapedInput.push_back(*i);
749 }
750 return EscapedInput;
751}
752
Mehdi Amini3ab3fef2016-11-28 21:38:52 +0000753Scanner::Scanner(StringRef Input, SourceMgr &sm, bool ShowColors,
754 std::error_code *EC)
755 : SM(sm), ShowColors(ShowColors), EC(EC) {
Rafael Espindola68669e32014-08-27 19:03:22 +0000756 init(MemoryBufferRef(Input, "YAML"));
Michael J. Spencer22120c42012-04-03 23:09:22 +0000757}
758
Mehdi Amini3ab3fef2016-11-28 21:38:52 +0000759Scanner::Scanner(MemoryBufferRef Buffer, SourceMgr &SM_, bool ShowColors,
760 std::error_code *EC)
761 : SM(SM_), ShowColors(ShowColors), EC(EC) {
Rafael Espindola68669e32014-08-27 19:03:22 +0000762 init(Buffer);
763}
764
765void Scanner::init(MemoryBufferRef Buffer) {
766 InputBuffer = Buffer;
767 Current = InputBuffer.getBufferStart();
768 End = InputBuffer.getBufferEnd();
769 Indent = -1;
770 Column = 0;
771 Line = 0;
772 FlowLevel = 0;
773 IsStartOfStream = true;
774 IsSimpleKeyAllowed = true;
775 Failed = false;
776 std::unique_ptr<MemoryBuffer> InputBufferOwner =
777 MemoryBuffer::getMemBuffer(Buffer);
778 SM.AddNewSourceBuffer(std::move(InputBufferOwner), SMLoc());
Sean Silvaaba82702012-11-19 23:21:47 +0000779}
780
Michael J. Spencer22120c42012-04-03 23:09:22 +0000781Token &Scanner::peekNext() {
782 // If the current token is a possible simple key, keep parsing until we
783 // can confirm.
784 bool NeedMore = false;
785 while (true) {
786 if (TokenQueue.empty() || NeedMore) {
787 if (!fetchMoreTokens()) {
788 TokenQueue.clear();
789 TokenQueue.push_back(Token());
790 return TokenQueue.front();
791 }
792 }
793 assert(!TokenQueue.empty() &&
794 "fetchMoreTokens lied about getting tokens!");
795
796 removeStaleSimpleKeyCandidates();
797 SimpleKey SK;
Duncan P. N. Exon Smith6eeaff12015-10-08 22:47:55 +0000798 SK.Tok = TokenQueue.begin();
David Majnemer0d955d02016-08-11 22:21:41 +0000799 if (!is_contained(SimpleKeys, SK))
Michael J. Spencer22120c42012-04-03 23:09:22 +0000800 break;
801 else
802 NeedMore = true;
803 }
804 return TokenQueue.front();
805}
806
807Token Scanner::getNext() {
808 Token Ret = peekNext();
809 // TokenQueue can be empty if there was an error getting the next token.
810 if (!TokenQueue.empty())
811 TokenQueue.pop_front();
812
813 // There cannot be any referenced Token's if the TokenQueue is empty. So do a
814 // quick deallocation of them all.
Duncan P. N. Exon Smith23d83062016-09-11 22:40:40 +0000815 if (TokenQueue.empty())
816 TokenQueue.resetAlloc();
Michael J. Spencer22120c42012-04-03 23:09:22 +0000817
818 return Ret;
819}
820
821StringRef::iterator Scanner::skip_nb_char(StringRef::iterator Position) {
Michael J. Spencer60331132012-04-27 21:12:20 +0000822 if (Position == End)
823 return Position;
Michael J. Spencer22120c42012-04-03 23:09:22 +0000824 // Check 7 bit c-printable - b-char.
825 if ( *Position == 0x09
826 || (*Position >= 0x20 && *Position <= 0x7E))
827 return Position + 1;
828
829 // Check for valid UTF-8.
830 if (uint8_t(*Position) & 0x80) {
831 UTF8Decoded u8d = decodeUTF8(Position);
832 if ( u8d.second != 0
833 && u8d.first != 0xFEFF
834 && ( u8d.first == 0x85
835 || ( u8d.first >= 0xA0
836 && u8d.first <= 0xD7FF)
837 || ( u8d.first >= 0xE000
838 && u8d.first <= 0xFFFD)
839 || ( u8d.first >= 0x10000
840 && u8d.first <= 0x10FFFF)))
841 return Position + u8d.second;
842 }
843 return Position;
844}
845
846StringRef::iterator Scanner::skip_b_break(StringRef::iterator Position) {
Michael J. Spencer60331132012-04-27 21:12:20 +0000847 if (Position == End)
848 return Position;
Michael J. Spencer22120c42012-04-03 23:09:22 +0000849 if (*Position == 0x0D) {
850 if (Position + 1 != End && *(Position + 1) == 0x0A)
851 return Position + 2;
852 return Position + 1;
853 }
854
855 if (*Position == 0x0A)
856 return Position + 1;
857 return Position;
858}
859
Alex Lorenza22b250c2015-05-13 23:10:51 +0000860StringRef::iterator Scanner::skip_s_space(StringRef::iterator Position) {
861 if (Position == End)
862 return Position;
863 if (*Position == ' ')
864 return Position + 1;
865 return Position;
866}
Michael J. Spencer22120c42012-04-03 23:09:22 +0000867
868StringRef::iterator Scanner::skip_s_white(StringRef::iterator Position) {
869 if (Position == End)
870 return Position;
871 if (*Position == ' ' || *Position == '\t')
872 return Position + 1;
873 return Position;
874}
875
876StringRef::iterator Scanner::skip_ns_char(StringRef::iterator Position) {
877 if (Position == End)
878 return Position;
879 if (*Position == ' ' || *Position == '\t')
880 return Position;
881 return skip_nb_char(Position);
882}
883
884StringRef::iterator Scanner::skip_while( SkipWhileFunc Func
885 , StringRef::iterator Position) {
886 while (true) {
887 StringRef::iterator i = (this->*Func)(Position);
888 if (i == Position)
889 break;
890 Position = i;
891 }
892 return Position;
893}
894
Alex Lorenza22b250c2015-05-13 23:10:51 +0000895void Scanner::advanceWhile(SkipWhileFunc Func) {
896 auto Final = skip_while(Func, Current);
897 Column += Final - Current;
898 Current = Final;
899}
900
Michael J. Spencer22120c42012-04-03 23:09:22 +0000901static bool is_ns_hex_digit(const char C) {
902 return (C >= '0' && C <= '9')
903 || (C >= 'a' && C <= 'z')
904 || (C >= 'A' && C <= 'Z');
905}
906
907static bool is_ns_word_char(const char C) {
908 return C == '-'
909 || (C >= 'a' && C <= 'z')
910 || (C >= 'A' && C <= 'Z');
911}
912
Justin Bogner16742612016-10-16 22:01:22 +0000913void Scanner::scan_ns_uri_char() {
Michael J. Spencer22120c42012-04-03 23:09:22 +0000914 while (true) {
915 if (Current == End)
916 break;
917 if (( *Current == '%'
918 && Current + 2 < End
919 && is_ns_hex_digit(*(Current + 1))
920 && is_ns_hex_digit(*(Current + 2)))
921 || is_ns_word_char(*Current)
922 || StringRef(Current, 1).find_first_of("#;/?:@&=+$,_.!~*'()[]")
923 != StringRef::npos) {
924 ++Current;
925 ++Column;
926 } else
927 break;
928 }
Michael J. Spencer22120c42012-04-03 23:09:22 +0000929}
930
Michael J. Spencer22120c42012-04-03 23:09:22 +0000931bool Scanner::consume(uint32_t Expected) {
932 if (Expected >= 0x80)
933 report_fatal_error("Not dealing with this yet");
934 if (Current == End)
935 return false;
936 if (uint8_t(*Current) >= 0x80)
937 report_fatal_error("Not dealing with this yet");
938 if (uint8_t(*Current) == Expected) {
939 ++Current;
940 ++Column;
941 return true;
942 }
943 return false;
944}
945
946void Scanner::skip(uint32_t Distance) {
947 Current += Distance;
948 Column += Distance;
Benjamin Kramer8fb58f62012-09-26 15:52:15 +0000949 assert(Current <= End && "Skipped past the end");
Michael J. Spencer22120c42012-04-03 23:09:22 +0000950}
951
952bool Scanner::isBlankOrBreak(StringRef::iterator Position) {
953 if (Position == End)
954 return false;
Alexander Kornienko66da20a2015-12-28 15:46:15 +0000955 return *Position == ' ' || *Position == '\t' || *Position == '\r' ||
956 *Position == '\n';
Michael J. Spencer22120c42012-04-03 23:09:22 +0000957}
958
Alex Lorenza22b250c2015-05-13 23:10:51 +0000959bool Scanner::consumeLineBreakIfPresent() {
960 auto Next = skip_b_break(Current);
961 if (Next == Current)
962 return false;
963 Column = 0;
964 ++Line;
965 Current = Next;
966 return true;
967}
968
Michael J. Spencer22120c42012-04-03 23:09:22 +0000969void Scanner::saveSimpleKeyCandidate( TokenQueueT::iterator Tok
970 , unsigned AtColumn
971 , bool IsRequired) {
972 if (IsSimpleKeyAllowed) {
973 SimpleKey SK;
974 SK.Tok = Tok;
975 SK.Line = Line;
976 SK.Column = AtColumn;
977 SK.IsRequired = IsRequired;
978 SK.FlowLevel = FlowLevel;
979 SimpleKeys.push_back(SK);
980 }
981}
982
983void Scanner::removeStaleSimpleKeyCandidates() {
984 for (SmallVectorImpl<SimpleKey>::iterator i = SimpleKeys.begin();
985 i != SimpleKeys.end();) {
986 if (i->Line != Line || i->Column + 1024 < Column) {
987 if (i->IsRequired)
988 setError( "Could not find expected : for simple key"
989 , i->Tok->Range.begin());
990 i = SimpleKeys.erase(i);
991 } else
992 ++i;
993 }
994}
995
996void Scanner::removeSimpleKeyCandidatesOnFlowLevel(unsigned Level) {
997 if (!SimpleKeys.empty() && (SimpleKeys.end() - 1)->FlowLevel == Level)
998 SimpleKeys.pop_back();
999}
1000
1001bool Scanner::unrollIndent(int ToColumn) {
1002 Token T;
1003 // Indentation is ignored in flow.
1004 if (FlowLevel != 0)
1005 return true;
1006
1007 while (Indent > ToColumn) {
1008 T.Kind = Token::TK_BlockEnd;
1009 T.Range = StringRef(Current, 1);
1010 TokenQueue.push_back(T);
1011 Indent = Indents.pop_back_val();
1012 }
1013
1014 return true;
1015}
1016
1017bool Scanner::rollIndent( int ToColumn
1018 , Token::TokenKind Kind
1019 , TokenQueueT::iterator InsertPoint) {
1020 if (FlowLevel)
1021 return true;
1022 if (Indent < ToColumn) {
1023 Indents.push_back(Indent);
1024 Indent = ToColumn;
1025
1026 Token T;
1027 T.Kind = Kind;
1028 T.Range = StringRef(Current, 0);
1029 TokenQueue.insert(InsertPoint, T);
1030 }
1031 return true;
1032}
1033
Alex Lorenzfe6f1862015-05-06 23:00:45 +00001034void Scanner::skipComment() {
1035 if (*Current != '#')
1036 return;
1037 while (true) {
1038 // This may skip more than one byte, thus Column is only incremented
1039 // for code points.
1040 StringRef::iterator I = skip_nb_char(Current);
1041 if (I == Current)
1042 break;
1043 Current = I;
1044 ++Column;
1045 }
1046}
1047
Michael J. Spencer22120c42012-04-03 23:09:22 +00001048void Scanner::scanToNextToken() {
1049 while (true) {
1050 while (*Current == ' ' || *Current == '\t') {
1051 skip(1);
1052 }
1053
Alex Lorenzfe6f1862015-05-06 23:00:45 +00001054 skipComment();
Michael J. Spencer22120c42012-04-03 23:09:22 +00001055
1056 // Skip EOL.
1057 StringRef::iterator i = skip_b_break(Current);
1058 if (i == Current)
1059 break;
1060 Current = i;
1061 ++Line;
1062 Column = 0;
1063 // New lines may start a simple key.
1064 if (!FlowLevel)
1065 IsSimpleKeyAllowed = true;
1066 }
1067}
1068
1069bool Scanner::scanStreamStart() {
1070 IsStartOfStream = false;
1071
1072 EncodingInfo EI = getUnicodeEncoding(currentInput());
1073
1074 Token T;
1075 T.Kind = Token::TK_StreamStart;
1076 T.Range = StringRef(Current, EI.second);
1077 TokenQueue.push_back(T);
1078 Current += EI.second;
1079 return true;
1080}
1081
1082bool Scanner::scanStreamEnd() {
1083 // Force an ending new line if one isn't present.
1084 if (Column != 0) {
1085 Column = 0;
1086 ++Line;
1087 }
1088
1089 unrollIndent(-1);
1090 SimpleKeys.clear();
1091 IsSimpleKeyAllowed = false;
1092
1093 Token T;
1094 T.Kind = Token::TK_StreamEnd;
1095 T.Range = StringRef(Current, 0);
1096 TokenQueue.push_back(T);
1097 return true;
1098}
1099
1100bool Scanner::scanDirective() {
1101 // Reset the indentation level.
1102 unrollIndent(-1);
1103 SimpleKeys.clear();
1104 IsSimpleKeyAllowed = false;
1105
1106 StringRef::iterator Start = Current;
1107 consume('%');
1108 StringRef::iterator NameStart = Current;
1109 Current = skip_while(&Scanner::skip_ns_char, Current);
1110 StringRef Name(NameStart, Current - NameStart);
1111 Current = skip_while(&Scanner::skip_s_white, Current);
Michael J. Spencerc064a9a2013-10-18 22:38:04 +00001112
1113 Token T;
Michael J. Spencer22120c42012-04-03 23:09:22 +00001114 if (Name == "YAML") {
1115 Current = skip_while(&Scanner::skip_ns_char, Current);
Michael J. Spencer22120c42012-04-03 23:09:22 +00001116 T.Kind = Token::TK_VersionDirective;
1117 T.Range = StringRef(Start, Current - Start);
1118 TokenQueue.push_back(T);
1119 return true;
Michael J. Spencerc064a9a2013-10-18 22:38:04 +00001120 } else if(Name == "TAG") {
1121 Current = skip_while(&Scanner::skip_ns_char, Current);
1122 Current = skip_while(&Scanner::skip_s_white, Current);
1123 Current = skip_while(&Scanner::skip_ns_char, Current);
1124 T.Kind = Token::TK_TagDirective;
1125 T.Range = StringRef(Start, Current - Start);
1126 TokenQueue.push_back(T);
1127 return true;
Michael J. Spencer22120c42012-04-03 23:09:22 +00001128 }
1129 return false;
1130}
1131
1132bool Scanner::scanDocumentIndicator(bool IsStart) {
1133 unrollIndent(-1);
1134 SimpleKeys.clear();
1135 IsSimpleKeyAllowed = false;
1136
1137 Token T;
1138 T.Kind = IsStart ? Token::TK_DocumentStart : Token::TK_DocumentEnd;
1139 T.Range = StringRef(Current, 3);
1140 skip(3);
1141 TokenQueue.push_back(T);
1142 return true;
1143}
1144
1145bool Scanner::scanFlowCollectionStart(bool IsSequence) {
1146 Token T;
1147 T.Kind = IsSequence ? Token::TK_FlowSequenceStart
1148 : Token::TK_FlowMappingStart;
1149 T.Range = StringRef(Current, 1);
1150 skip(1);
1151 TokenQueue.push_back(T);
1152
1153 // [ and { may begin a simple key.
Duncan P. N. Exon Smith6eeaff12015-10-08 22:47:55 +00001154 saveSimpleKeyCandidate(--TokenQueue.end(), Column - 1, false);
Michael J. Spencer22120c42012-04-03 23:09:22 +00001155
1156 // And may also be followed by a simple key.
1157 IsSimpleKeyAllowed = true;
1158 ++FlowLevel;
1159 return true;
1160}
1161
1162bool Scanner::scanFlowCollectionEnd(bool IsSequence) {
1163 removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
1164 IsSimpleKeyAllowed = false;
1165 Token T;
1166 T.Kind = IsSequence ? Token::TK_FlowSequenceEnd
1167 : Token::TK_FlowMappingEnd;
1168 T.Range = StringRef(Current, 1);
1169 skip(1);
1170 TokenQueue.push_back(T);
1171 if (FlowLevel)
1172 --FlowLevel;
1173 return true;
1174}
1175
1176bool Scanner::scanFlowEntry() {
1177 removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
1178 IsSimpleKeyAllowed = true;
1179 Token T;
1180 T.Kind = Token::TK_FlowEntry;
1181 T.Range = StringRef(Current, 1);
1182 skip(1);
1183 TokenQueue.push_back(T);
1184 return true;
1185}
1186
1187bool Scanner::scanBlockEntry() {
1188 rollIndent(Column, Token::TK_BlockSequenceStart, TokenQueue.end());
1189 removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
1190 IsSimpleKeyAllowed = true;
1191 Token T;
1192 T.Kind = Token::TK_BlockEntry;
1193 T.Range = StringRef(Current, 1);
1194 skip(1);
1195 TokenQueue.push_back(T);
1196 return true;
1197}
1198
1199bool Scanner::scanKey() {
1200 if (!FlowLevel)
1201 rollIndent(Column, Token::TK_BlockMappingStart, TokenQueue.end());
1202
1203 removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
1204 IsSimpleKeyAllowed = !FlowLevel;
1205
1206 Token T;
1207 T.Kind = Token::TK_Key;
1208 T.Range = StringRef(Current, 1);
1209 skip(1);
1210 TokenQueue.push_back(T);
1211 return true;
1212}
1213
1214bool Scanner::scanValue() {
1215 // If the previous token could have been a simple key, insert the key token
1216 // into the token queue.
1217 if (!SimpleKeys.empty()) {
1218 SimpleKey SK = SimpleKeys.pop_back_val();
1219 Token T;
1220 T.Kind = Token::TK_Key;
1221 T.Range = SK.Tok->Range;
1222 TokenQueueT::iterator i, e;
1223 for (i = TokenQueue.begin(), e = TokenQueue.end(); i != e; ++i) {
1224 if (i == SK.Tok)
1225 break;
1226 }
1227 assert(i != e && "SimpleKey not in token queue!");
1228 i = TokenQueue.insert(i, T);
1229
1230 // We may also need to add a Block-Mapping-Start token.
1231 rollIndent(SK.Column, Token::TK_BlockMappingStart, i);
1232
1233 IsSimpleKeyAllowed = false;
1234 } else {
1235 if (!FlowLevel)
1236 rollIndent(Column, Token::TK_BlockMappingStart, TokenQueue.end());
1237 IsSimpleKeyAllowed = !FlowLevel;
1238 }
1239
1240 Token T;
1241 T.Kind = Token::TK_Value;
1242 T.Range = StringRef(Current, 1);
1243 skip(1);
1244 TokenQueue.push_back(T);
1245 return true;
1246}
1247
1248// Forbidding inlining improves performance by roughly 20%.
1249// FIXME: Remove once llvm optimizes this to the faster version without hints.
1250LLVM_ATTRIBUTE_NOINLINE static bool
1251wasEscaped(StringRef::iterator First, StringRef::iterator Position);
1252
1253// Returns whether a character at 'Position' was escaped with a leading '\'.
1254// 'First' specifies the position of the first character in the string.
1255static bool wasEscaped(StringRef::iterator First,
1256 StringRef::iterator Position) {
1257 assert(Position - 1 >= First);
1258 StringRef::iterator I = Position - 1;
1259 // We calculate the number of consecutive '\'s before the current position
1260 // by iterating backwards through our string.
1261 while (I >= First && *I == '\\') --I;
1262 // (Position - 1 - I) now contains the number of '\'s before the current
1263 // position. If it is odd, the character at 'Position' was escaped.
1264 return (Position - 1 - I) % 2 == 1;
1265}
1266
1267bool Scanner::scanFlowScalar(bool IsDoubleQuoted) {
1268 StringRef::iterator Start = Current;
1269 unsigned ColStart = Column;
1270 if (IsDoubleQuoted) {
1271 do {
1272 ++Current;
1273 while (Current != End && *Current != '"')
1274 ++Current;
1275 // Repeat until the previous character was not a '\' or was an escaped
1276 // backslash.
Michael J. Spencer60331132012-04-27 21:12:20 +00001277 } while ( Current != End
1278 && *(Current - 1) == '\\'
1279 && wasEscaped(Start + 1, Current));
Michael J. Spencer22120c42012-04-03 23:09:22 +00001280 } else {
1281 skip(1);
1282 while (true) {
1283 // Skip a ' followed by another '.
1284 if (Current + 1 < End && *Current == '\'' && *(Current + 1) == '\'') {
1285 skip(2);
1286 continue;
1287 } else if (*Current == '\'')
1288 break;
1289 StringRef::iterator i = skip_nb_char(Current);
1290 if (i == Current) {
1291 i = skip_b_break(Current);
1292 if (i == Current)
1293 break;
1294 Current = i;
1295 Column = 0;
1296 ++Line;
1297 } else {
1298 if (i == End)
1299 break;
1300 Current = i;
1301 ++Column;
1302 }
1303 }
1304 }
Benjamin Kramer8fb58f62012-09-26 15:52:15 +00001305
1306 if (Current == End) {
1307 setError("Expected quote at end of scalar", Current);
1308 return false;
1309 }
1310
Michael J. Spencer22120c42012-04-03 23:09:22 +00001311 skip(1); // Skip ending quote.
1312 Token T;
1313 T.Kind = Token::TK_Scalar;
1314 T.Range = StringRef(Start, Current - Start);
1315 TokenQueue.push_back(T);
1316
Duncan P. N. Exon Smith6eeaff12015-10-08 22:47:55 +00001317 saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false);
Michael J. Spencer22120c42012-04-03 23:09:22 +00001318
1319 IsSimpleKeyAllowed = false;
1320
1321 return true;
1322}
1323
1324bool Scanner::scanPlainScalar() {
1325 StringRef::iterator Start = Current;
1326 unsigned ColStart = Column;
1327 unsigned LeadingBlanks = 0;
1328 assert(Indent >= -1 && "Indent must be >= -1 !");
1329 unsigned indent = static_cast<unsigned>(Indent + 1);
1330 while (true) {
1331 if (*Current == '#')
1332 break;
1333
1334 while (!isBlankOrBreak(Current)) {
1335 if ( FlowLevel && *Current == ':'
1336 && !(isBlankOrBreak(Current + 1) || *(Current + 1) == ',')) {
1337 setError("Found unexpected ':' while scanning a plain scalar", Current);
1338 return false;
1339 }
1340
1341 // Check for the end of the plain scalar.
1342 if ( (*Current == ':' && isBlankOrBreak(Current + 1))
1343 || ( FlowLevel
1344 && (StringRef(Current, 1).find_first_of(",:?[]{}")
1345 != StringRef::npos)))
1346 break;
1347
1348 StringRef::iterator i = skip_nb_char(Current);
1349 if (i == Current)
1350 break;
1351 Current = i;
1352 ++Column;
1353 }
1354
1355 // Are we at the end?
1356 if (!isBlankOrBreak(Current))
1357 break;
1358
1359 // Eat blanks.
1360 StringRef::iterator Tmp = Current;
1361 while (isBlankOrBreak(Tmp)) {
1362 StringRef::iterator i = skip_s_white(Tmp);
1363 if (i != Tmp) {
1364 if (LeadingBlanks && (Column < indent) && *Tmp == '\t') {
1365 setError("Found invalid tab character in indentation", Tmp);
1366 return false;
1367 }
1368 Tmp = i;
1369 ++Column;
1370 } else {
1371 i = skip_b_break(Tmp);
1372 if (!LeadingBlanks)
1373 LeadingBlanks = 1;
1374 Tmp = i;
1375 Column = 0;
1376 ++Line;
1377 }
1378 }
1379
1380 if (!FlowLevel && Column < indent)
1381 break;
1382
1383 Current = Tmp;
1384 }
1385 if (Start == Current) {
1386 setError("Got empty plain scalar", Start);
1387 return false;
1388 }
1389 Token T;
1390 T.Kind = Token::TK_Scalar;
1391 T.Range = StringRef(Start, Current - Start);
1392 TokenQueue.push_back(T);
1393
1394 // Plain scalars can be simple keys.
Duncan P. N. Exon Smith6eeaff12015-10-08 22:47:55 +00001395 saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false);
Michael J. Spencer22120c42012-04-03 23:09:22 +00001396
1397 IsSimpleKeyAllowed = false;
1398
1399 return true;
1400}
1401
1402bool Scanner::scanAliasOrAnchor(bool IsAlias) {
1403 StringRef::iterator Start = Current;
1404 unsigned ColStart = Column;
1405 skip(1);
1406 while(true) {
1407 if ( *Current == '[' || *Current == ']'
1408 || *Current == '{' || *Current == '}'
1409 || *Current == ','
1410 || *Current == ':')
1411 break;
1412 StringRef::iterator i = skip_ns_char(Current);
1413 if (i == Current)
1414 break;
1415 Current = i;
1416 ++Column;
1417 }
1418
1419 if (Start == Current) {
1420 setError("Got empty alias or anchor", Start);
1421 return false;
1422 }
1423
1424 Token T;
1425 T.Kind = IsAlias ? Token::TK_Alias : Token::TK_Anchor;
1426 T.Range = StringRef(Start, Current - Start);
1427 TokenQueue.push_back(T);
1428
1429 // Alias and anchors can be simple keys.
Duncan P. N. Exon Smith6eeaff12015-10-08 22:47:55 +00001430 saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false);
Michael J. Spencer22120c42012-04-03 23:09:22 +00001431
1432 IsSimpleKeyAllowed = false;
1433
1434 return true;
1435}
1436
Alex Lorenza22b250c2015-05-13 23:10:51 +00001437char Scanner::scanBlockChompingIndicator() {
1438 char Indicator = ' ';
1439 if (Current != End && (*Current == '+' || *Current == '-')) {
1440 Indicator = *Current;
1441 skip(1);
1442 }
1443 return Indicator;
1444}
1445
1446/// Get the number of line breaks after chomping.
1447///
1448/// Return the number of trailing line breaks to emit, depending on
1449/// \p ChompingIndicator.
1450static unsigned getChompedLineBreaks(char ChompingIndicator,
1451 unsigned LineBreaks, StringRef Str) {
1452 if (ChompingIndicator == '-') // Strip all line breaks.
1453 return 0;
1454 if (ChompingIndicator == '+') // Keep all line breaks.
1455 return LineBreaks;
1456 // Clip trailing lines.
1457 return Str.empty() ? 0 : 1;
1458}
1459
1460unsigned Scanner::scanBlockIndentationIndicator() {
1461 unsigned Indent = 0;
1462 if (Current != End && (*Current >= '1' && *Current <= '9')) {
1463 Indent = unsigned(*Current - '0');
1464 skip(1);
1465 }
1466 return Indent;
1467}
1468
1469bool Scanner::scanBlockScalarHeader(char &ChompingIndicator,
1470 unsigned &IndentIndicator, bool &IsDone) {
1471 auto Start = Current;
1472
1473 ChompingIndicator = scanBlockChompingIndicator();
1474 IndentIndicator = scanBlockIndentationIndicator();
1475 // Check for the chomping indicator once again.
1476 if (ChompingIndicator == ' ')
1477 ChompingIndicator = scanBlockChompingIndicator();
1478 Current = skip_while(&Scanner::skip_s_white, Current);
1479 skipComment();
1480
1481 if (Current == End) { // EOF, we have an empty scalar.
1482 Token T;
1483 T.Kind = Token::TK_BlockScalar;
1484 T.Range = StringRef(Start, Current - Start);
1485 TokenQueue.push_back(T);
1486 IsDone = true;
1487 return true;
1488 }
1489
1490 if (!consumeLineBreakIfPresent()) {
1491 setError("Expected a line break after block scalar header", Current);
1492 return false;
1493 }
1494 return true;
1495}
1496
1497bool Scanner::findBlockScalarIndent(unsigned &BlockIndent,
1498 unsigned BlockExitIndent,
1499 unsigned &LineBreaks, bool &IsDone) {
1500 unsigned MaxAllSpaceLineCharacters = 0;
1501 StringRef::iterator LongestAllSpaceLine;
1502
1503 while (true) {
1504 advanceWhile(&Scanner::skip_s_space);
1505 if (skip_nb_char(Current) != Current) {
1506 // This line isn't empty, so try and find the indentation.
1507 if (Column <= BlockExitIndent) { // End of the block literal.
1508 IsDone = true;
1509 return true;
1510 }
1511 // We found the block's indentation.
1512 BlockIndent = Column;
1513 if (MaxAllSpaceLineCharacters > BlockIndent) {
1514 setError(
1515 "Leading all-spaces line must be smaller than the block indent",
1516 LongestAllSpaceLine);
Michael J. Spencer22120c42012-04-03 23:09:22 +00001517 return false;
1518 }
Alex Lorenza22b250c2015-05-13 23:10:51 +00001519 return true;
Michael J. Spencer22120c42012-04-03 23:09:22 +00001520 }
Alex Lorenza22b250c2015-05-13 23:10:51 +00001521 if (skip_b_break(Current) != Current &&
1522 Column > MaxAllSpaceLineCharacters) {
1523 // Record the longest all-space line in case it's longer than the
1524 // discovered block indent.
1525 MaxAllSpaceLineCharacters = Column;
1526 LongestAllSpaceLine = Current;
1527 }
1528
1529 // Check for EOF.
1530 if (Current == End) {
1531 IsDone = true;
1532 return true;
1533 }
1534
1535 if (!consumeLineBreakIfPresent()) {
1536 IsDone = true;
1537 return true;
1538 }
1539 ++LineBreaks;
1540 }
1541 return true;
1542}
1543
1544bool Scanner::scanBlockScalarIndent(unsigned BlockIndent,
1545 unsigned BlockExitIndent, bool &IsDone) {
1546 // Skip the indentation.
1547 while (Column < BlockIndent) {
1548 auto I = skip_s_space(Current);
1549 if (I == Current)
1550 break;
1551 Current = I;
Michael J. Spencer22120c42012-04-03 23:09:22 +00001552 ++Column;
1553 }
1554
Alex Lorenza22b250c2015-05-13 23:10:51 +00001555 if (skip_nb_char(Current) == Current)
1556 return true;
1557
1558 if (Column <= BlockExitIndent) { // End of the block literal.
1559 IsDone = true;
1560 return true;
Michael J. Spencer22120c42012-04-03 23:09:22 +00001561 }
1562
Alex Lorenza22b250c2015-05-13 23:10:51 +00001563 if (Column < BlockIndent) {
1564 if (Current != End && *Current == '#') { // Trailing comment.
1565 IsDone = true;
1566 return true;
1567 }
1568 setError("A text line is less indented than the block scalar", Current);
1569 return false;
1570 }
1571 return true; // A normal text line.
1572}
1573
1574bool Scanner::scanBlockScalar(bool IsLiteral) {
1575 // Eat '|' or '>'
1576 assert(*Current == '|' || *Current == '>');
1577 skip(1);
1578
1579 char ChompingIndicator;
1580 unsigned BlockIndent;
1581 bool IsDone = false;
1582 if (!scanBlockScalarHeader(ChompingIndicator, BlockIndent, IsDone))
1583 return false;
1584 if (IsDone)
1585 return true;
1586
1587 auto Start = Current;
1588 unsigned BlockExitIndent = Indent < 0 ? 0 : (unsigned)Indent;
1589 unsigned LineBreaks = 0;
1590 if (BlockIndent == 0) {
1591 if (!findBlockScalarIndent(BlockIndent, BlockExitIndent, LineBreaks,
1592 IsDone))
1593 return false;
1594 }
1595
1596 // Scan the block's scalars body.
1597 SmallString<256> Str;
1598 while (!IsDone) {
1599 if (!scanBlockScalarIndent(BlockIndent, BlockExitIndent, IsDone))
1600 return false;
1601 if (IsDone)
1602 break;
1603
1604 // Parse the current line.
1605 auto LineStart = Current;
1606 advanceWhile(&Scanner::skip_nb_char);
1607 if (LineStart != Current) {
1608 Str.append(LineBreaks, '\n');
1609 Str.append(StringRef(LineStart, Current - LineStart));
1610 LineBreaks = 0;
1611 }
1612
1613 // Check for EOF.
1614 if (Current == End)
1615 break;
1616
1617 if (!consumeLineBreakIfPresent())
1618 break;
1619 ++LineBreaks;
1620 }
1621
1622 if (Current == End && !LineBreaks)
1623 // Ensure that there is at least one line break before the end of file.
1624 LineBreaks = 1;
1625 Str.append(getChompedLineBreaks(ChompingIndicator, LineBreaks, Str), '\n');
1626
1627 // New lines may start a simple key.
1628 if (!FlowLevel)
1629 IsSimpleKeyAllowed = true;
1630
Michael J. Spencer22120c42012-04-03 23:09:22 +00001631 Token T;
Alex Lorenza22b250c2015-05-13 23:10:51 +00001632 T.Kind = Token::TK_BlockScalar;
Michael J. Spencer22120c42012-04-03 23:09:22 +00001633 T.Range = StringRef(Start, Current - Start);
Alex Lorenza22b250c2015-05-13 23:10:51 +00001634 T.Value = Str.str().str();
Michael J. Spencer22120c42012-04-03 23:09:22 +00001635 TokenQueue.push_back(T);
1636 return true;
1637}
1638
1639bool Scanner::scanTag() {
1640 StringRef::iterator Start = Current;
1641 unsigned ColStart = Column;
1642 skip(1); // Eat !.
1643 if (Current == End || isBlankOrBreak(Current)); // An empty tag.
1644 else if (*Current == '<') {
1645 skip(1);
1646 scan_ns_uri_char();
1647 if (!consume('>'))
1648 return false;
1649 } else {
1650 // FIXME: Actually parse the c-ns-shorthand-tag rule.
1651 Current = skip_while(&Scanner::skip_ns_char, Current);
1652 }
1653
1654 Token T;
1655 T.Kind = Token::TK_Tag;
1656 T.Range = StringRef(Start, Current - Start);
1657 TokenQueue.push_back(T);
1658
1659 // Tags can be simple keys.
Duncan P. N. Exon Smith6eeaff12015-10-08 22:47:55 +00001660 saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false);
Michael J. Spencer22120c42012-04-03 23:09:22 +00001661
1662 IsSimpleKeyAllowed = false;
1663
1664 return true;
1665}
1666
1667bool Scanner::fetchMoreTokens() {
1668 if (IsStartOfStream)
1669 return scanStreamStart();
1670
1671 scanToNextToken();
1672
1673 if (Current == End)
1674 return scanStreamEnd();
1675
1676 removeStaleSimpleKeyCandidates();
1677
1678 unrollIndent(Column);
1679
1680 if (Column == 0 && *Current == '%')
1681 return scanDirective();
1682
1683 if (Column == 0 && Current + 4 <= End
1684 && *Current == '-'
1685 && *(Current + 1) == '-'
1686 && *(Current + 2) == '-'
1687 && (Current + 3 == End || isBlankOrBreak(Current + 3)))
1688 return scanDocumentIndicator(true);
1689
1690 if (Column == 0 && Current + 4 <= End
1691 && *Current == '.'
1692 && *(Current + 1) == '.'
1693 && *(Current + 2) == '.'
1694 && (Current + 3 == End || isBlankOrBreak(Current + 3)))
1695 return scanDocumentIndicator(false);
1696
1697 if (*Current == '[')
1698 return scanFlowCollectionStart(true);
1699
1700 if (*Current == '{')
1701 return scanFlowCollectionStart(false);
1702
1703 if (*Current == ']')
1704 return scanFlowCollectionEnd(true);
1705
1706 if (*Current == '}')
1707 return scanFlowCollectionEnd(false);
1708
1709 if (*Current == ',')
1710 return scanFlowEntry();
1711
1712 if (*Current == '-' && isBlankOrBreak(Current + 1))
1713 return scanBlockEntry();
1714
1715 if (*Current == '?' && (FlowLevel || isBlankOrBreak(Current + 1)))
1716 return scanKey();
1717
1718 if (*Current == ':' && (FlowLevel || isBlankOrBreak(Current + 1)))
1719 return scanValue();
1720
1721 if (*Current == '*')
1722 return scanAliasOrAnchor(true);
1723
1724 if (*Current == '&')
1725 return scanAliasOrAnchor(false);
1726
1727 if (*Current == '!')
1728 return scanTag();
1729
1730 if (*Current == '|' && !FlowLevel)
1731 return scanBlockScalar(true);
1732
1733 if (*Current == '>' && !FlowLevel)
1734 return scanBlockScalar(false);
1735
1736 if (*Current == '\'')
1737 return scanFlowScalar(false);
1738
1739 if (*Current == '"')
1740 return scanFlowScalar(true);
1741
1742 // Get a plain scalar.
1743 StringRef FirstChar(Current, 1);
1744 if (!(isBlankOrBreak(Current)
1745 || FirstChar.find_first_of("-?:,[]{}#&*!|>'\"%@`") != StringRef::npos)
1746 || (*Current == '-' && !isBlankOrBreak(Current + 1))
1747 || (!FlowLevel && (*Current == '?' || *Current == ':')
1748 && isBlankOrBreak(Current + 1))
1749 || (!FlowLevel && *Current == ':'
1750 && Current + 2 < End
1751 && *(Current + 1) == ':'
1752 && !isBlankOrBreak(Current + 2)))
1753 return scanPlainScalar();
1754
1755 setError("Unrecognized character while tokenizing.");
1756 return false;
1757}
1758
Mehdi Amini3ab3fef2016-11-28 21:38:52 +00001759Stream::Stream(StringRef Input, SourceMgr &SM, bool ShowColors,
1760 std::error_code *EC)
1761 : scanner(new Scanner(Input, SM, ShowColors, EC)), CurrentDoc() {}
Michael J. Spencer22120c42012-04-03 23:09:22 +00001762
Mehdi Amini3ab3fef2016-11-28 21:38:52 +00001763Stream::Stream(MemoryBufferRef InputBuffer, SourceMgr &SM, bool ShowColors,
1764 std::error_code *EC)
1765 : scanner(new Scanner(InputBuffer, SM, ShowColors, EC)), CurrentDoc() {}
Sean Silvaaba82702012-11-19 23:21:47 +00001766
Eugene Zelenko72208a82017-06-21 23:19:47 +00001767Stream::~Stream() = default;
Benjamin Kramera1355d12012-04-04 08:53:34 +00001768
Michael J. Spencer22120c42012-04-03 23:09:22 +00001769bool Stream::failed() { return scanner->failed(); }
1770
1771void Stream::printError(Node *N, const Twine &Msg) {
Michael J. Spencer22120c42012-04-03 23:09:22 +00001772 scanner->printError( N->getSourceRange().Start
1773 , SourceMgr::DK_Error
1774 , Msg
Benjamin Kramerea68a942015-02-19 15:26:17 +00001775 , N->getSourceRange());
Michael J. Spencer22120c42012-04-03 23:09:22 +00001776}
1777
Michael J. Spencer22120c42012-04-03 23:09:22 +00001778document_iterator Stream::begin() {
1779 if (CurrentDoc)
1780 report_fatal_error("Can only iterate over the stream once");
1781
1782 // Skip Stream-Start.
1783 scanner->getNext();
1784
1785 CurrentDoc.reset(new Document(*this));
1786 return document_iterator(CurrentDoc);
1787}
1788
1789document_iterator Stream::end() {
1790 return document_iterator();
1791}
1792
1793void Stream::skip() {
1794 for (document_iterator i = begin(), e = end(); i != e; ++i)
1795 i->skip();
1796}
1797
Ahmed Charles56440fd2014-03-06 05:51:42 +00001798Node::Node(unsigned int Type, std::unique_ptr<Document> &D, StringRef A,
1799 StringRef T)
1800 : Doc(D), TypeID(Type), Anchor(A), Tag(T) {
Michael J. Spencer22120c42012-04-03 23:09:22 +00001801 SMLoc Start = SMLoc::getFromPointer(peekNext().Range.begin());
1802 SourceRange = SMRange(Start, Start);
1803}
1804
Michael J. Spencerc064a9a2013-10-18 22:38:04 +00001805std::string Node::getVerbatimTag() const {
1806 StringRef Raw = getRawTag();
1807 if (!Raw.empty() && Raw != "!") {
1808 std::string Ret;
1809 if (Raw.find_last_of('!') == 0) {
1810 Ret = Doc->getTagMap().find("!")->second;
1811 Ret += Raw.substr(1);
Richard Trieu73d06522015-01-17 00:46:44 +00001812 return Ret;
Michael J. Spencerc064a9a2013-10-18 22:38:04 +00001813 } else if (Raw.startswith("!!")) {
1814 Ret = Doc->getTagMap().find("!!")->second;
1815 Ret += Raw.substr(2);
Richard Trieu73d06522015-01-17 00:46:44 +00001816 return Ret;
Michael J. Spencerc064a9a2013-10-18 22:38:04 +00001817 } else {
1818 StringRef TagHandle = Raw.substr(0, Raw.find_last_of('!') + 1);
1819 std::map<StringRef, StringRef>::const_iterator It =
1820 Doc->getTagMap().find(TagHandle);
1821 if (It != Doc->getTagMap().end())
1822 Ret = It->second;
1823 else {
1824 Token T;
1825 T.Kind = Token::TK_Tag;
1826 T.Range = TagHandle;
1827 setError(Twine("Unknown tag handle ") + TagHandle, T);
1828 }
1829 Ret += Raw.substr(Raw.find_last_of('!') + 1);
Richard Trieu73d06522015-01-17 00:46:44 +00001830 return Ret;
Michael J. Spencerc064a9a2013-10-18 22:38:04 +00001831 }
1832 }
1833
1834 switch (getType()) {
1835 case NK_Null:
1836 return "tag:yaml.org,2002:null";
1837 case NK_Scalar:
Alex Lorenza22b250c2015-05-13 23:10:51 +00001838 case NK_BlockScalar:
Michael J. Spencerc064a9a2013-10-18 22:38:04 +00001839 // TODO: Tag resolution.
1840 return "tag:yaml.org,2002:str";
1841 case NK_Mapping:
1842 return "tag:yaml.org,2002:map";
1843 case NK_Sequence:
1844 return "tag:yaml.org,2002:seq";
1845 }
1846
1847 return "";
1848}
1849
Michael J. Spencer22120c42012-04-03 23:09:22 +00001850Token &Node::peekNext() {
1851 return Doc->peekNext();
1852}
1853
1854Token Node::getNext() {
1855 return Doc->getNext();
1856}
1857
1858Node *Node::parseBlockNode() {
1859 return Doc->parseBlockNode();
1860}
1861
1862BumpPtrAllocator &Node::getAllocator() {
1863 return Doc->NodeAllocator;
1864}
1865
1866void Node::setError(const Twine &Msg, Token &Tok) const {
1867 Doc->setError(Msg, Tok);
1868}
1869
1870bool Node::failed() const {
1871 return Doc->failed();
1872}
1873
Michael J. Spencer22120c42012-04-03 23:09:22 +00001874StringRef ScalarNode::getValue(SmallVectorImpl<char> &Storage) const {
1875 // TODO: Handle newlines properly. We need to remove leading whitespace.
1876 if (Value[0] == '"') { // Double quoted.
1877 // Pull off the leading and trailing "s.
1878 StringRef UnquotedValue = Value.substr(1, Value.size() - 2);
1879 // Search for characters that would require unescaping the value.
1880 StringRef::size_type i = UnquotedValue.find_first_of("\\\r\n");
1881 if (i != StringRef::npos)
1882 return unescapeDoubleQuoted(UnquotedValue, i, Storage);
1883 return UnquotedValue;
1884 } else if (Value[0] == '\'') { // Single quoted.
1885 // Pull off the leading and trailing 's.
1886 StringRef UnquotedValue = Value.substr(1, Value.size() - 2);
1887 StringRef::size_type i = UnquotedValue.find('\'');
1888 if (i != StringRef::npos) {
1889 // We're going to need Storage.
1890 Storage.clear();
1891 Storage.reserve(UnquotedValue.size());
1892 for (; i != StringRef::npos; i = UnquotedValue.find('\'')) {
1893 StringRef Valid(UnquotedValue.begin(), i);
1894 Storage.insert(Storage.end(), Valid.begin(), Valid.end());
1895 Storage.push_back('\'');
1896 UnquotedValue = UnquotedValue.substr(i + 2);
1897 }
1898 Storage.insert(Storage.end(), UnquotedValue.begin(), UnquotedValue.end());
1899 return StringRef(Storage.begin(), Storage.size());
1900 }
1901 return UnquotedValue;
1902 }
1903 // Plain or block.
Vedant Kumar98372e32016-02-16 02:06:01 +00001904 return Value.rtrim(' ');
Michael J. Spencer22120c42012-04-03 23:09:22 +00001905}
1906
1907StringRef ScalarNode::unescapeDoubleQuoted( StringRef UnquotedValue
1908 , StringRef::size_type i
1909 , SmallVectorImpl<char> &Storage)
1910 const {
1911 // Use Storage to build proper value.
1912 Storage.clear();
1913 Storage.reserve(UnquotedValue.size());
1914 for (; i != StringRef::npos; i = UnquotedValue.find_first_of("\\\r\n")) {
1915 // Insert all previous chars into Storage.
1916 StringRef Valid(UnquotedValue.begin(), i);
1917 Storage.insert(Storage.end(), Valid.begin(), Valid.end());
1918 // Chop off inserted chars.
1919 UnquotedValue = UnquotedValue.substr(i);
1920
1921 assert(!UnquotedValue.empty() && "Can't be empty!");
1922
1923 // Parse escape or line break.
1924 switch (UnquotedValue[0]) {
1925 case '\r':
1926 case '\n':
1927 Storage.push_back('\n');
1928 if ( UnquotedValue.size() > 1
1929 && (UnquotedValue[1] == '\r' || UnquotedValue[1] == '\n'))
1930 UnquotedValue = UnquotedValue.substr(1);
1931 UnquotedValue = UnquotedValue.substr(1);
1932 break;
1933 default:
1934 if (UnquotedValue.size() == 1)
1935 // TODO: Report error.
1936 break;
1937 UnquotedValue = UnquotedValue.substr(1);
1938 switch (UnquotedValue[0]) {
1939 default: {
1940 Token T;
1941 T.Range = StringRef(UnquotedValue.begin(), 1);
1942 setError("Unrecognized escape code!", T);
1943 return "";
1944 }
1945 case '\r':
1946 case '\n':
1947 // Remove the new line.
1948 if ( UnquotedValue.size() > 1
1949 && (UnquotedValue[1] == '\r' || UnquotedValue[1] == '\n'))
1950 UnquotedValue = UnquotedValue.substr(1);
1951 // If this was just a single byte newline, it will get skipped
1952 // below.
1953 break;
1954 case '0':
1955 Storage.push_back(0x00);
1956 break;
1957 case 'a':
1958 Storage.push_back(0x07);
1959 break;
1960 case 'b':
1961 Storage.push_back(0x08);
1962 break;
1963 case 't':
1964 case 0x09:
1965 Storage.push_back(0x09);
1966 break;
1967 case 'n':
1968 Storage.push_back(0x0A);
1969 break;
1970 case 'v':
1971 Storage.push_back(0x0B);
1972 break;
1973 case 'f':
1974 Storage.push_back(0x0C);
1975 break;
1976 case 'r':
1977 Storage.push_back(0x0D);
1978 break;
1979 case 'e':
1980 Storage.push_back(0x1B);
1981 break;
1982 case ' ':
1983 Storage.push_back(0x20);
1984 break;
1985 case '"':
1986 Storage.push_back(0x22);
1987 break;
1988 case '/':
1989 Storage.push_back(0x2F);
1990 break;
1991 case '\\':
1992 Storage.push_back(0x5C);
1993 break;
1994 case 'N':
1995 encodeUTF8(0x85, Storage);
1996 break;
1997 case '_':
1998 encodeUTF8(0xA0, Storage);
1999 break;
2000 case 'L':
2001 encodeUTF8(0x2028, Storage);
2002 break;
2003 case 'P':
2004 encodeUTF8(0x2029, Storage);
2005 break;
2006 case 'x': {
2007 if (UnquotedValue.size() < 3)
2008 // TODO: Report error.
2009 break;
Michael J. Spencera6c2c292012-04-26 19:27:11 +00002010 unsigned int UnicodeScalarValue;
2011 if (UnquotedValue.substr(1, 2).getAsInteger(16, UnicodeScalarValue))
2012 // TODO: Report error.
2013 UnicodeScalarValue = 0xFFFD;
Michael J. Spencer22120c42012-04-03 23:09:22 +00002014 encodeUTF8(UnicodeScalarValue, Storage);
2015 UnquotedValue = UnquotedValue.substr(2);
2016 break;
2017 }
2018 case 'u': {
2019 if (UnquotedValue.size() < 5)
2020 // TODO: Report error.
2021 break;
Michael J. Spencera6c2c292012-04-26 19:27:11 +00002022 unsigned int UnicodeScalarValue;
2023 if (UnquotedValue.substr(1, 4).getAsInteger(16, UnicodeScalarValue))
2024 // TODO: Report error.
2025 UnicodeScalarValue = 0xFFFD;
Michael J. Spencer22120c42012-04-03 23:09:22 +00002026 encodeUTF8(UnicodeScalarValue, Storage);
2027 UnquotedValue = UnquotedValue.substr(4);
2028 break;
2029 }
2030 case 'U': {
2031 if (UnquotedValue.size() < 9)
2032 // TODO: Report error.
2033 break;
Michael J. Spencera6c2c292012-04-26 19:27:11 +00002034 unsigned int UnicodeScalarValue;
2035 if (UnquotedValue.substr(1, 8).getAsInteger(16, UnicodeScalarValue))
2036 // TODO: Report error.
2037 UnicodeScalarValue = 0xFFFD;
Michael J. Spencer22120c42012-04-03 23:09:22 +00002038 encodeUTF8(UnicodeScalarValue, Storage);
2039 UnquotedValue = UnquotedValue.substr(8);
2040 break;
2041 }
2042 }
2043 UnquotedValue = UnquotedValue.substr(1);
2044 }
2045 }
2046 Storage.insert(Storage.end(), UnquotedValue.begin(), UnquotedValue.end());
2047 return StringRef(Storage.begin(), Storage.size());
2048}
2049
2050Node *KeyValueNode::getKey() {
2051 if (Key)
2052 return Key;
2053 // Handle implicit null keys.
2054 {
2055 Token &t = peekNext();
2056 if ( t.Kind == Token::TK_BlockEnd
2057 || t.Kind == Token::TK_Value
2058 || t.Kind == Token::TK_Error) {
2059 return Key = new (getAllocator()) NullNode(Doc);
2060 }
2061 if (t.Kind == Token::TK_Key)
2062 getNext(); // skip TK_Key.
2063 }
2064
2065 // Handle explicit null keys.
2066 Token &t = peekNext();
2067 if (t.Kind == Token::TK_BlockEnd || t.Kind == Token::TK_Value) {
2068 return Key = new (getAllocator()) NullNode(Doc);
2069 }
2070
2071 // We've got a normal key.
2072 return Key = parseBlockNode();
2073}
2074
2075Node *KeyValueNode::getValue() {
2076 if (Value)
2077 return Value;
2078 getKey()->skip();
2079 if (failed())
2080 return Value = new (getAllocator()) NullNode(Doc);
2081
2082 // Handle implicit null values.
2083 {
2084 Token &t = peekNext();
2085 if ( t.Kind == Token::TK_BlockEnd
2086 || t.Kind == Token::TK_FlowMappingEnd
2087 || t.Kind == Token::TK_Key
2088 || t.Kind == Token::TK_FlowEntry
2089 || t.Kind == Token::TK_Error) {
2090 return Value = new (getAllocator()) NullNode(Doc);
2091 }
2092
2093 if (t.Kind != Token::TK_Value) {
2094 setError("Unexpected token in Key Value.", t);
2095 return Value = new (getAllocator()) NullNode(Doc);
2096 }
2097 getNext(); // skip TK_Value.
2098 }
2099
2100 // Handle explicit null values.
2101 Token &t = peekNext();
2102 if (t.Kind == Token::TK_BlockEnd || t.Kind == Token::TK_Key) {
2103 return Value = new (getAllocator()) NullNode(Doc);
2104 }
2105
2106 // We got a normal value.
2107 return Value = parseBlockNode();
2108}
2109
2110void MappingNode::increment() {
2111 if (failed()) {
2112 IsAtEnd = true;
Craig Topperc10719f2014-04-07 04:17:22 +00002113 CurrentEntry = nullptr;
Michael J. Spencer22120c42012-04-03 23:09:22 +00002114 return;
2115 }
2116 if (CurrentEntry) {
2117 CurrentEntry->skip();
2118 if (Type == MT_Inline) {
2119 IsAtEnd = true;
Craig Topperc10719f2014-04-07 04:17:22 +00002120 CurrentEntry = nullptr;
Michael J. Spencer22120c42012-04-03 23:09:22 +00002121 return;
2122 }
2123 }
2124 Token T = peekNext();
2125 if (T.Kind == Token::TK_Key || T.Kind == Token::TK_Scalar) {
2126 // KeyValueNode eats the TK_Key. That way it can detect null keys.
2127 CurrentEntry = new (getAllocator()) KeyValueNode(Doc);
2128 } else if (Type == MT_Block) {
2129 switch (T.Kind) {
2130 case Token::TK_BlockEnd:
2131 getNext();
2132 IsAtEnd = true;
Craig Topperc10719f2014-04-07 04:17:22 +00002133 CurrentEntry = nullptr;
Michael J. Spencer22120c42012-04-03 23:09:22 +00002134 break;
2135 default:
2136 setError("Unexpected token. Expected Key or Block End", T);
Galina Kistanova5e6c5422017-05-23 01:20:52 +00002137 LLVM_FALLTHROUGH;
Michael J. Spencer22120c42012-04-03 23:09:22 +00002138 case Token::TK_Error:
2139 IsAtEnd = true;
Craig Topperc10719f2014-04-07 04:17:22 +00002140 CurrentEntry = nullptr;
Michael J. Spencer22120c42012-04-03 23:09:22 +00002141 }
2142 } else {
2143 switch (T.Kind) {
2144 case Token::TK_FlowEntry:
2145 // Eat the flow entry and recurse.
2146 getNext();
2147 return increment();
2148 case Token::TK_FlowMappingEnd:
2149 getNext();
Galina Kistanova5e6c5422017-05-23 01:20:52 +00002150 LLVM_FALLTHROUGH;
Michael J. Spencer22120c42012-04-03 23:09:22 +00002151 case Token::TK_Error:
2152 // Set this to end iterator.
2153 IsAtEnd = true;
Craig Topperc10719f2014-04-07 04:17:22 +00002154 CurrentEntry = nullptr;
Michael J. Spencer22120c42012-04-03 23:09:22 +00002155 break;
2156 default:
2157 setError( "Unexpected token. Expected Key, Flow Entry, or Flow "
2158 "Mapping End."
2159 , T);
2160 IsAtEnd = true;
Craig Topperc10719f2014-04-07 04:17:22 +00002161 CurrentEntry = nullptr;
Michael J. Spencer22120c42012-04-03 23:09:22 +00002162 }
2163 }
2164}
2165
2166void SequenceNode::increment() {
2167 if (failed()) {
2168 IsAtEnd = true;
Craig Topperc10719f2014-04-07 04:17:22 +00002169 CurrentEntry = nullptr;
Michael J. Spencer22120c42012-04-03 23:09:22 +00002170 return;
2171 }
2172 if (CurrentEntry)
2173 CurrentEntry->skip();
2174 Token T = peekNext();
2175 if (SeqType == ST_Block) {
2176 switch (T.Kind) {
2177 case Token::TK_BlockEntry:
2178 getNext();
2179 CurrentEntry = parseBlockNode();
Craig Topper8d399f82014-04-09 04:20:00 +00002180 if (!CurrentEntry) { // An error occurred.
Michael J. Spencer22120c42012-04-03 23:09:22 +00002181 IsAtEnd = true;
Craig Topperc10719f2014-04-07 04:17:22 +00002182 CurrentEntry = nullptr;
Michael J. Spencer22120c42012-04-03 23:09:22 +00002183 }
2184 break;
2185 case Token::TK_BlockEnd:
2186 getNext();
2187 IsAtEnd = true;
Craig Topperc10719f2014-04-07 04:17:22 +00002188 CurrentEntry = nullptr;
Michael J. Spencer22120c42012-04-03 23:09:22 +00002189 break;
2190 default:
2191 setError( "Unexpected token. Expected Block Entry or Block End."
2192 , T);
Galina Kistanova5e6c5422017-05-23 01:20:52 +00002193 LLVM_FALLTHROUGH;
Michael J. Spencer22120c42012-04-03 23:09:22 +00002194 case Token::TK_Error:
2195 IsAtEnd = true;
Craig Topperc10719f2014-04-07 04:17:22 +00002196 CurrentEntry = nullptr;
Michael J. Spencer22120c42012-04-03 23:09:22 +00002197 }
2198 } else if (SeqType == ST_Indentless) {
2199 switch (T.Kind) {
2200 case Token::TK_BlockEntry:
2201 getNext();
2202 CurrentEntry = parseBlockNode();
Craig Topper8d399f82014-04-09 04:20:00 +00002203 if (!CurrentEntry) { // An error occurred.
Michael J. Spencer22120c42012-04-03 23:09:22 +00002204 IsAtEnd = true;
Craig Topperc10719f2014-04-07 04:17:22 +00002205 CurrentEntry = nullptr;
Michael J. Spencer22120c42012-04-03 23:09:22 +00002206 }
2207 break;
2208 default:
2209 case Token::TK_Error:
2210 IsAtEnd = true;
Craig Topperc10719f2014-04-07 04:17:22 +00002211 CurrentEntry = nullptr;
Michael J. Spencer22120c42012-04-03 23:09:22 +00002212 }
2213 } else if (SeqType == ST_Flow) {
2214 switch (T.Kind) {
2215 case Token::TK_FlowEntry:
2216 // Eat the flow entry and recurse.
2217 getNext();
2218 WasPreviousTokenFlowEntry = true;
2219 return increment();
2220 case Token::TK_FlowSequenceEnd:
2221 getNext();
Galina Kistanova5e6c5422017-05-23 01:20:52 +00002222 LLVM_FALLTHROUGH;
Michael J. Spencer22120c42012-04-03 23:09:22 +00002223 case Token::TK_Error:
2224 // Set this to end iterator.
2225 IsAtEnd = true;
Craig Topperc10719f2014-04-07 04:17:22 +00002226 CurrentEntry = nullptr;
Michael J. Spencer22120c42012-04-03 23:09:22 +00002227 break;
2228 case Token::TK_StreamEnd:
2229 case Token::TK_DocumentEnd:
2230 case Token::TK_DocumentStart:
2231 setError("Could not find closing ]!", T);
2232 // Set this to end iterator.
2233 IsAtEnd = true;
Craig Topperc10719f2014-04-07 04:17:22 +00002234 CurrentEntry = nullptr;
Michael J. Spencer22120c42012-04-03 23:09:22 +00002235 break;
2236 default:
2237 if (!WasPreviousTokenFlowEntry) {
2238 setError("Expected , between entries!", T);
2239 IsAtEnd = true;
Craig Topperc10719f2014-04-07 04:17:22 +00002240 CurrentEntry = nullptr;
Michael J. Spencer22120c42012-04-03 23:09:22 +00002241 break;
2242 }
2243 // Otherwise it must be a flow entry.
2244 CurrentEntry = parseBlockNode();
2245 if (!CurrentEntry) {
2246 IsAtEnd = true;
2247 }
2248 WasPreviousTokenFlowEntry = false;
2249 break;
2250 }
2251 }
2252}
2253
Craig Topperc10719f2014-04-07 04:17:22 +00002254Document::Document(Stream &S) : stream(S), Root(nullptr) {
Michael J. Spencerc064a9a2013-10-18 22:38:04 +00002255 // Tag maps starts with two default mappings.
2256 TagMap["!"] = "!";
2257 TagMap["!!"] = "tag:yaml.org,2002:";
2258
Michael J. Spencer22120c42012-04-03 23:09:22 +00002259 if (parseDirectives())
2260 expectToken(Token::TK_DocumentStart);
2261 Token &T = peekNext();
2262 if (T.Kind == Token::TK_DocumentStart)
2263 getNext();
2264}
2265
2266bool Document::skip() {
2267 if (stream.scanner->failed())
2268 return false;
2269 if (!Root)
2270 getRoot();
2271 Root->skip();
2272 Token &T = peekNext();
2273 if (T.Kind == Token::TK_StreamEnd)
2274 return false;
2275 if (T.Kind == Token::TK_DocumentEnd) {
2276 getNext();
2277 return skip();
2278 }
2279 return true;
2280}
2281
2282Token &Document::peekNext() {
2283 return stream.scanner->peekNext();
2284}
2285
2286Token Document::getNext() {
2287 return stream.scanner->getNext();
2288}
2289
2290void Document::setError(const Twine &Message, Token &Location) const {
2291 stream.scanner->setError(Message, Location.Range.begin());
2292}
2293
2294bool Document::failed() const {
2295 return stream.scanner->failed();
2296}
2297
2298Node *Document::parseBlockNode() {
2299 Token T = peekNext();
2300 // Handle properties.
2301 Token AnchorInfo;
Michael J. Spencerc064a9a2013-10-18 22:38:04 +00002302 Token TagInfo;
Michael J. Spencer22120c42012-04-03 23:09:22 +00002303parse_property:
2304 switch (T.Kind) {
2305 case Token::TK_Alias:
2306 getNext();
2307 return new (NodeAllocator) AliasNode(stream.CurrentDoc, T.Range.substr(1));
2308 case Token::TK_Anchor:
2309 if (AnchorInfo.Kind == Token::TK_Anchor) {
2310 setError("Already encountered an anchor for this node!", T);
Craig Topperc10719f2014-04-07 04:17:22 +00002311 return nullptr;
Michael J. Spencer22120c42012-04-03 23:09:22 +00002312 }
2313 AnchorInfo = getNext(); // Consume TK_Anchor.
2314 T = peekNext();
2315 goto parse_property;
2316 case Token::TK_Tag:
Michael J. Spencerc064a9a2013-10-18 22:38:04 +00002317 if (TagInfo.Kind == Token::TK_Tag) {
2318 setError("Already encountered a tag for this node!", T);
Craig Topperc10719f2014-04-07 04:17:22 +00002319 return nullptr;
Michael J. Spencerc064a9a2013-10-18 22:38:04 +00002320 }
2321 TagInfo = getNext(); // Consume TK_Tag.
Michael J. Spencer22120c42012-04-03 23:09:22 +00002322 T = peekNext();
2323 goto parse_property;
2324 default:
2325 break;
2326 }
2327
2328 switch (T.Kind) {
2329 case Token::TK_BlockEntry:
2330 // We got an unindented BlockEntry sequence. This is not terminated with
2331 // a BlockEnd.
2332 // Don't eat the TK_BlockEntry, SequenceNode needs it.
2333 return new (NodeAllocator) SequenceNode( stream.CurrentDoc
2334 , AnchorInfo.Range.substr(1)
Michael J. Spencerc064a9a2013-10-18 22:38:04 +00002335 , TagInfo.Range
Michael J. Spencer22120c42012-04-03 23:09:22 +00002336 , SequenceNode::ST_Indentless);
2337 case Token::TK_BlockSequenceStart:
2338 getNext();
2339 return new (NodeAllocator)
2340 SequenceNode( stream.CurrentDoc
2341 , AnchorInfo.Range.substr(1)
Michael J. Spencerc064a9a2013-10-18 22:38:04 +00002342 , TagInfo.Range
Michael J. Spencer22120c42012-04-03 23:09:22 +00002343 , SequenceNode::ST_Block);
2344 case Token::TK_BlockMappingStart:
2345 getNext();
2346 return new (NodeAllocator)
2347 MappingNode( stream.CurrentDoc
2348 , AnchorInfo.Range.substr(1)
Michael J. Spencerc064a9a2013-10-18 22:38:04 +00002349 , TagInfo.Range
Michael J. Spencer22120c42012-04-03 23:09:22 +00002350 , MappingNode::MT_Block);
2351 case Token::TK_FlowSequenceStart:
2352 getNext();
2353 return new (NodeAllocator)
2354 SequenceNode( stream.CurrentDoc
2355 , AnchorInfo.Range.substr(1)
Michael J. Spencerc064a9a2013-10-18 22:38:04 +00002356 , TagInfo.Range
Michael J. Spencer22120c42012-04-03 23:09:22 +00002357 , SequenceNode::ST_Flow);
2358 case Token::TK_FlowMappingStart:
2359 getNext();
2360 return new (NodeAllocator)
2361 MappingNode( stream.CurrentDoc
2362 , AnchorInfo.Range.substr(1)
Michael J. Spencerc064a9a2013-10-18 22:38:04 +00002363 , TagInfo.Range
Michael J. Spencer22120c42012-04-03 23:09:22 +00002364 , MappingNode::MT_Flow);
2365 case Token::TK_Scalar:
2366 getNext();
2367 return new (NodeAllocator)
2368 ScalarNode( stream.CurrentDoc
2369 , AnchorInfo.Range.substr(1)
Michael J. Spencerc064a9a2013-10-18 22:38:04 +00002370 , TagInfo.Range
Michael J. Spencer22120c42012-04-03 23:09:22 +00002371 , T.Range);
Benjamin Kramer72367332015-05-18 21:11:27 +00002372 case Token::TK_BlockScalar: {
Alex Lorenza22b250c2015-05-13 23:10:51 +00002373 getNext();
Alex Lorenz481dca22015-05-21 19:45:02 +00002374 StringRef NullTerminatedStr(T.Value.c_str(), T.Value.length() + 1);
2375 StringRef StrCopy = NullTerminatedStr.copy(NodeAllocator).drop_back();
Alex Lorenza22b250c2015-05-13 23:10:51 +00002376 return new (NodeAllocator)
2377 BlockScalarNode(stream.CurrentDoc, AnchorInfo.Range.substr(1),
Benjamin Kramer72367332015-05-18 21:11:27 +00002378 TagInfo.Range, StrCopy, T.Range);
2379 }
Michael J. Spencer22120c42012-04-03 23:09:22 +00002380 case Token::TK_Key:
2381 // Don't eat the TK_Key, KeyValueNode expects it.
2382 return new (NodeAllocator)
2383 MappingNode( stream.CurrentDoc
2384 , AnchorInfo.Range.substr(1)
Michael J. Spencerc064a9a2013-10-18 22:38:04 +00002385 , TagInfo.Range
Michael J. Spencer22120c42012-04-03 23:09:22 +00002386 , MappingNode::MT_Inline);
2387 case Token::TK_DocumentStart:
2388 case Token::TK_DocumentEnd:
2389 case Token::TK_StreamEnd:
2390 default:
2391 // TODO: Properly handle tags. "[!!str ]" should resolve to !!str "", not
2392 // !!null null.
2393 return new (NodeAllocator) NullNode(stream.CurrentDoc);
2394 case Token::TK_Error:
Craig Topperc10719f2014-04-07 04:17:22 +00002395 return nullptr;
Michael J. Spencer22120c42012-04-03 23:09:22 +00002396 }
2397 llvm_unreachable("Control flow shouldn't reach here.");
Craig Topperc10719f2014-04-07 04:17:22 +00002398 return nullptr;
Michael J. Spencer22120c42012-04-03 23:09:22 +00002399}
2400
2401bool Document::parseDirectives() {
2402 bool isDirective = false;
2403 while (true) {
2404 Token T = peekNext();
2405 if (T.Kind == Token::TK_TagDirective) {
Michael J. Spencerc064a9a2013-10-18 22:38:04 +00002406 parseTAGDirective();
Michael J. Spencer22120c42012-04-03 23:09:22 +00002407 isDirective = true;
2408 } else if (T.Kind == Token::TK_VersionDirective) {
Michael J. Spencerc064a9a2013-10-18 22:38:04 +00002409 parseYAMLDirective();
Michael J. Spencer22120c42012-04-03 23:09:22 +00002410 isDirective = true;
2411 } else
2412 break;
2413 }
2414 return isDirective;
2415}
2416
Michael J. Spencerc064a9a2013-10-18 22:38:04 +00002417void Document::parseYAMLDirective() {
2418 getNext(); // Eat %YAML <version>
2419}
2420
2421void Document::parseTAGDirective() {
2422 Token Tag = getNext(); // %TAG <handle> <prefix>
2423 StringRef T = Tag.Range;
2424 // Strip %TAG
2425 T = T.substr(T.find_first_of(" \t")).ltrim(" \t");
2426 std::size_t HandleEnd = T.find_first_of(" \t");
2427 StringRef TagHandle = T.substr(0, HandleEnd);
2428 StringRef TagPrefix = T.substr(HandleEnd).ltrim(" \t");
2429 TagMap[TagHandle] = TagPrefix;
2430}
2431
Michael J. Spencer22120c42012-04-03 23:09:22 +00002432bool Document::expectToken(int TK) {
2433 Token T = getNext();
2434 if (T.Kind != TK) {
2435 setError("Unexpected token", T);
2436 return false;
2437 }
2438 return true;
2439}