Blame - lib/Support/YAMLParser.cpp - fp2-dev/platform/external/llvm

blob: 3e302d0eb1b928dceb7ed777e058e56d1c58215d [file] [log] [blame]

Michael J. Spencer	93210e8	2012-04-03 23:09:22 +0000	[diff] [blame^]	1	//===--- YAMLParser.cpp - Simple YAML parser ------------------------------===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	//
				10	// This file implements a YAML parser.
				11	//
				12	//===----------------------------------------------------------------------===//
				13
				14	#include "llvm/Support/YAMLParser.h"
				15
				16	#include "llvm/ADT/ilist.h"
				17	#include "llvm/ADT/ilist_node.h"
				18	#include "llvm/ADT/SmallVector.h"
				19	#include "llvm/ADT/StringExtras.h"
				20	#include "llvm/ADT/Twine.h"
				21	#include "llvm/Support/ErrorHandling.h"
				22	#include "llvm/Support/MemoryBuffer.h"
				23	#include "llvm/Support/raw_ostream.h"
				24	#include "llvm/Support/SourceMgr.h"
				25
				26	using namespace llvm;
				27	using namespace yaml;
				28
				29	enum UnicodeEncodingForm {
				30	UEF_UTF32_LE, //< UTF-32 Little Endian
				31	UEF_UTF32_BE, //< UTF-32 Big Endian
				32	UEF_UTF16_LE, //< UTF-16 Little Endian
				33	UEF_UTF16_BE, //< UTF-16 Big Endian
				34	UEF_UTF8, //< UTF-8 or ascii.
				35	UEF_Unknown //< Not a valid Unicode encoding.
				36	};
				37
				38	/// EncodingInfo - Holds the encoding type and length of the byte order mark if
				39	/// it exists. Length is in {0, 2, 3, 4}.
				40	typedef std::pair<UnicodeEncodingForm, unsigned> EncodingInfo;
				41
				42	/// getUnicodeEncoding - Reads up to the first 4 bytes to determine the Unicode
				43	/// encoding form of \a Input.
				44	///
				45	/// @param Input A string of length 0 or more.
				46	/// @returns An EncodingInfo indicating the Unicode encoding form of the input
				47	/// and how long the byte order mark is if one exists.
				48	static EncodingInfo getUnicodeEncoding(StringRef Input) {
				49	if (Input.size() == 0)
				50	return std::make_pair(UEF_Unknown, 0);
				51
				52	switch (uint8_t(Input[0])) {
				53	case 0x00:
				54	if (Input.size() >= 4) {
				55	if ( Input[1] == 0
				56	&& uint8_t(Input[2]) == 0xFE
				57	&& uint8_t(Input[3]) == 0xFF)
				58	return std::make_pair(UEF_UTF32_BE, 4);
				59	if (Input[1] == 0 && Input[2] == 0 && Input[3] != 0)
				60	return std::make_pair(UEF_UTF32_BE, 0);
				61	}
				62
				63	if (Input.size() >= 2 && Input[1] != 0)
				64	return std::make_pair(UEF_UTF16_BE, 0);
				65	return std::make_pair(UEF_Unknown, 0);
				66	case 0xFF:
				67	if ( Input.size() >= 4
				68	&& uint8_t(Input[1]) == 0xFE
				69	&& Input[2] == 0
				70	&& Input[3] == 0)
				71	return std::make_pair(UEF_UTF32_LE, 4);
				72
				73	if (Input.size() >= 2 && uint8_t(Input[1]) == 0xFE)
				74	return std::make_pair(UEF_UTF16_LE, 2);
				75	return std::make_pair(UEF_Unknown, 0);
				76	case 0xFE:
				77	if (Input.size() >= 2 && uint8_t(Input[1]) == 0xFF)
				78	return std::make_pair(UEF_UTF16_BE, 2);
				79	return std::make_pair(UEF_Unknown, 0);
				80	case 0xEF:
				81	if ( Input.size() >= 3
				82	&& uint8_t(Input[1]) == 0xBB
				83	&& uint8_t(Input[2]) == 0xBF)
				84	return std::make_pair(UEF_UTF8, 3);
				85	return std::make_pair(UEF_Unknown, 0);
				86	}
				87
				88	// It could still be utf-32 or utf-16.
				89	if (Input.size() >= 4 && Input[1] == 0 && Input[2] == 0 && Input[3] == 0)
				90	return std::make_pair(UEF_UTF32_LE, 0);
				91
				92	if (Input.size() >= 2 && Input[1] == 0)
				93	return std::make_pair(UEF_UTF16_LE, 0);
				94
				95	return std::make_pair(UEF_UTF8, 0);
				96	}
				97
				98	namespace llvm {
				99	namespace yaml {
				100	/// Token - A single YAML token.
				101	struct Token : ilist_node<Token> {
				102	enum TokenKind {
				103	TK_Error, // Uninitialized token.
				104	TK_StreamStart,
				105	TK_StreamEnd,
				106	TK_VersionDirective,
				107	TK_TagDirective,
				108	TK_DocumentStart,
				109	TK_DocumentEnd,
				110	TK_BlockEntry,
				111	TK_BlockEnd,
				112	TK_BlockSequenceStart,
				113	TK_BlockMappingStart,
				114	TK_FlowEntry,
				115	TK_FlowSequenceStart,
				116	TK_FlowSequenceEnd,
				117	TK_FlowMappingStart,
				118	TK_FlowMappingEnd,
				119	TK_Key,
				120	TK_Value,
				121	TK_Scalar,
				122	TK_Alias,
				123	TK_Anchor,
				124	TK_Tag
				125	} Kind;
				126
				127	/// A string of length 0 or more whose begin() points to the logical location
				128	/// of the token in the input.
				129	StringRef Range;
				130
				131	Token() : Kind(TK_Error) {}
				132	};
				133	}
				134	}
				135
				136	template<>
				137	struct ilist_sentinel_traits<Token> {
				138	Token *createSentinel() const {
				139	return &Sentinel;
				140	}
				141	static void destroySentinel(Token*) {}
				142
				143	Token *provideInitialHead() const { return createSentinel(); }
				144	Token ensureHead(Token) const { return createSentinel(); }
				145	static void noteHead(Token, Token) {}
				146
				147	private:
				148	mutable Token Sentinel;
				149	};
				150
				151	template<>
				152	struct ilist_node_traits<Token> {
				153	Token *createNode(const Token &V) {
				154	return new (Alloc.Allocate<Token>()) Token(V);
				155	}
				156	static void deleteNode(Token *V) {}
				157
				158	void addNodeToList(Token *) {}
				159	void removeNodeFromList(Token *) {}
				160	void transferNodesFromList(ilist_node_traits & /SrcTraits/,
				161	ilist_iterator<Token> /first/,
				162	ilist_iterator<Token> /last/) {}
				163
				164	BumpPtrAllocator Alloc;
				165	};
				166
				167	typedef ilist<Token> TokenQueueT;
				168
				169	namespace {
				170	/// @brief This struct is used to track simple keys.
				171	///
				172	/// Simple keys are handled by creating an entry in SimpleKeys for each Token
				173	/// which could legally be the start of a simple key. When peekNext is called,
				174	/// if the Token To be returned is referenced by a SimpleKey, we continue
				175	/// tokenizing until that potential simple key has either been found to not be
				176	/// a simple key (we moved on to the next line or went further than 1024 chars).
				177	/// Or when we run into a Value, and then insert a Key token (and possibly
				178	/// others) before the SimpleKey's Tok.
				179	struct SimpleKey {
				180	TokenQueueT::iterator Tok;
				181	unsigned Column;
				182	unsigned Line;
				183	unsigned FlowLevel;
				184	bool IsRequired;
				185
				186	bool operator ==(const SimpleKey &Other) {
				187	return Tok == Other.Tok;
				188	}
				189	};
				190	}
				191
				192	/// @brief The Unicode scalar value of a UTF-8 minimal well-formed code unit
				193	/// subsequence and the subsequence's length in code units (uint8_t).
				194	/// A length of 0 represents an error.
				195	typedef std::pair<uint32_t, unsigned> UTF8Decoded;
				196
				197	static UTF8Decoded decodeUTF8(StringRef Range) {
				198	StringRef::iterator Position= Range.begin();
				199	StringRef::iterator End = Range.end();
				200	// 1 byte: [0x00, 0x7f]
				201	// Bit pattern: 0xxxxxxx
				202	if ((*Position & 0x80) == 0) {
				203	return std::make_pair(*Position, 1);
				204	}
				205	// 2 bytes: [0x80, 0x7ff]
				206	// Bit pattern: 110xxxxx 10xxxxxx
				207	if (Position + 1 != End &&
				208	((*Position & 0xE0) == 0xC0) &&
				209	((*(Position + 1) & 0xC0) == 0x80)) {
				210	uint32_t codepoint = ((*Position & 0x1F) << 6) \|
				211	(*(Position + 1) & 0x3F);
				212	if (codepoint >= 0x80)
				213	return std::make_pair(codepoint, 2);
				214	}
				215	// 3 bytes: [0x8000, 0xffff]
				216	// Bit pattern: 1110xxxx 10xxxxxx 10xxxxxx
				217	if (Position + 2 != End &&
				218	((*Position & 0xF0) == 0xE0) &&
				219	((*(Position + 1) & 0xC0) == 0x80) &&
				220	((*(Position + 2) & 0xC0) == 0x80)) {
				221	uint32_t codepoint = ((*Position & 0x0F) << 12) \|
				222	((*(Position + 1) & 0x3F) << 6) \|
				223	(*(Position + 2) & 0x3F);
				224	// Codepoints between 0xD800 and 0xDFFF are invalid, as
				225	// they are high / low surrogate halves used by UTF-16.
				226	if (codepoint >= 0x800 &&
				227	(codepoint < 0xD800 \|\| codepoint > 0xDFFF))
				228	return std::make_pair(codepoint, 3);
				229	}
				230	// 4 bytes: [0x10000, 0x10FFFF]
				231	// Bit pattern: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
				232	if (Position + 3 != End &&
				233	((*Position & 0xF8) == 0xF0) &&
				234	((*(Position + 1) & 0xC0) == 0x80) &&
				235	((*(Position + 2) & 0xC0) == 0x80) &&
				236	((*(Position + 3) & 0xC0) == 0x80)) {
				237	uint32_t codepoint = ((*Position & 0x07) << 18) \|
				238	((*(Position + 1) & 0x3F) << 12) \|
				239	((*(Position + 2) & 0x3F) << 6) \|
				240	(*(Position + 3) & 0x3F);
				241	if (codepoint >= 0x10000 && codepoint <= 0x10FFFF)
				242	return std::make_pair(codepoint, 4);
				243	}
				244	return std::make_pair(0, 0);
				245	}
				246
				247	namespace llvm {
				248	namespace yaml {
				249	/// @brief Scans YAML tokens from a MemoryBuffer.
				250	class Scanner {
				251	public:
				252	Scanner(const StringRef Input, SourceMgr &SM);
				253
				254	/// @brief Parse the next token and return it without popping it.
				255	Token &peekNext();
				256
				257	/// @brief Parse the next token and pop it from the queue.
				258	Token getNext();
				259
				260	void printError(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Message,
				261	ArrayRef<SMRange> Ranges = ArrayRef<SMRange>()) {
				262	SM.PrintMessage(Loc, Kind, Message, Ranges);
				263	}
				264
				265	void setError(const Twine &Message, StringRef::iterator Position) {
				266	if (Current >= End)
				267	Current = End - 1;
				268
				269	// Don't print out more errors after the first one we encounter. The rest
				270	// are just the result of the first, and have no meaning.
				271	if (!Failed)
				272	printError(SMLoc::getFromPointer(Current), SourceMgr::DK_Error, Message);
				273	Failed = true;
				274	}
				275
				276	void setError(const Twine &Message) {
				277	setError(Message, Current);
				278	}
				279
				280	/// @brief Returns true if an error occurred while parsing.
				281	bool failed() {
				282	return Failed;
				283	}
				284
				285	private:
				286	StringRef currentInput() {
				287	return StringRef(Current, End - Current);
				288	}
				289
				290	/// @brief Decode a UTF-8 minimal well-formed code unit subsequence starting
				291	/// at \a Position.
				292	///
				293	/// If the UTF-8 code units starting at Position do not form a well-formed
				294	/// code unit subsequence, then the Unicode scalar value is 0, and the length
				295	/// is 0.
				296	UTF8Decoded decodeUTF8(StringRef::iterator Position) {
				297	return ::decodeUTF8(StringRef(Position, End - Position));
				298	}
				299
				300	// The following functions are based on the gramar rules in the YAML spec. The
				301	// style of the function names it meant to closely match how they are written
				302	// in the spec. The number within the [] is the number of the grammar rule in
				303	// the spec.
				304	//
				305	// See 4.2 [Production Naming Conventions] for the meaning of the prefixes.
				306	//
				307	// c-
				308	// A production starting and ending with a special character.
				309	// b-
				310	// A production matching a single line break.
				311	// nb-
				312	// A production starting and ending with a non-break character.
				313	// s-
				314	// A production starting and ending with a white space character.
				315	// ns-
				316	// A production starting and ending with a non-space character.
				317	// l-
				318	// A production matching complete line(s).
				319
				320	/// @brief Skip a single nb-char[27] starting at Position.
				321	///
				322	/// A nb-char is 0x9 \| [0x20-0x7E] \| 0x85 \| [0xA0-0xD7FF] \| [0xE000-0xFEFE]
				323	/// \| [0xFF00-0xFFFD] \| [0x10000-0x10FFFF]
				324	///
				325	/// @returns The code unit after the nb-char, or Position if it's not an
				326	/// nb-char.
				327	StringRef::iterator skip_nb_char(StringRef::iterator Position);
				328
				329	/// @brief Skip a single b-break[28] starting at Position.
				330	///
				331	/// A b-break is 0xD 0xA \| 0xD \| 0xA
				332	///
				333	/// @returns The code unit after the b-break, or Position if it's not a
				334	/// b-break.
				335	StringRef::iterator skip_b_break(StringRef::iterator Position);
				336
				337	/// @brief Skip a single s-white[33] starting at Position.
				338	///
				339	/// A s-white is 0x20 \| 0x9
				340	///
				341	/// @returns The code unit after the s-white, or Position if it's not a
				342	/// s-white.
				343	StringRef::iterator skip_s_white(StringRef::iterator Position);
				344
				345	/// @brief Skip a single ns-char[34] starting at Position.
				346	///
				347	/// A ns-char is nb-char - s-white
				348	///
				349	/// @returns The code unit after the ns-char, or Position if it's not a
				350	/// ns-char.
				351	StringRef::iterator skip_ns_char(StringRef::iterator Position);
				352
				353	typedef StringRef::iterator (Scanner::*SkipWhileFunc)(StringRef::iterator);
				354	/// @brief Skip minimal well-formed code unit subsequences until Func
				355	/// returns its input.
				356	///
				357	/// @returns The code unit after the last minimal well-formed code unit
				358	/// subsequence that Func accepted.
				359	StringRef::iterator skip_while( SkipWhileFunc Func
				360	, StringRef::iterator Position);
				361
				362	/// @brief Scan ns-uri-char[39]s starting at Cur.
				363	///
				364	/// This updates Cur and Column while scanning.
				365	///
				366	/// @returns A StringRef starting at Cur which covers the longest contiguous
				367	/// sequence of ns-uri-char.
				368	StringRef scan_ns_uri_char();
				369
				370	/// @brief Scan ns-plain-one-line[133] starting at \a Cur.
				371	StringRef scan_ns_plain_one_line();
				372
				373	/// @brief Consume a minimal well-formed code unit subsequence starting at
				374	/// \a Cur. Return false if it is not the same Unicode scalar value as
				375	/// \a Expected. This updates \a Column.
				376	bool consume(uint32_t Expected);
				377
				378	/// @brief Skip \a Distance UTF-8 code units. Updates \a Cur and \a Column.
				379	void skip(uint32_t Distance);
				380
				381	/// @brief Return true if the minimal well-formed code unit subsequence at
				382	/// Pos is whitespace or a new line
				383	bool isBlankOrBreak(StringRef::iterator Position);
				384
				385	/// @brief If IsSimpleKeyAllowed, create and push_back a new SimpleKey.
				386	void saveSimpleKeyCandidate( TokenQueueT::iterator Tok
				387	, unsigned AtColumn
				388	, bool IsRequired);
				389
				390	/// @brief Remove simple keys that can no longer be valid simple keys.
				391	///
				392	/// Invalid simple keys are not on the current line or are further than 1024
				393	/// columns back.
				394	void removeStaleSimpleKeyCandidates();
				395
				396	/// @brief Remove all simple keys on FlowLevel \a Level.
				397	void removeSimpleKeyCandidatesOnFlowLevel(unsigned Level);
				398
				399	/// @brief Unroll indentation in \a Indents back to \a Col. Creates BlockEnd
				400	/// tokens if needed.
				401	bool unrollIndent(int ToColumn);
				402
				403	/// @brief Increase indent to \a Col. Creates \a Kind token at \a InsertPoint
				404	/// if needed.
				405	bool rollIndent( int ToColumn
				406	, Token::TokenKind Kind
				407	, TokenQueueT::iterator InsertPoint);
				408
				409	/// @brief Skip whitespace and comments until the start of the next token.
				410	void scanToNextToken();
				411
				412	/// @brief Must be the first token generated.
				413	bool scanStreamStart();
				414
				415	/// @brief Generate tokens needed to close out the stream.
				416	bool scanStreamEnd();
				417
				418	/// @brief Scan a %BLAH directive.
				419	bool scanDirective();
				420
				421	/// @brief Scan a ... or ---.
				422	bool scanDocumentIndicator(bool IsStart);
				423
				424	/// @brief Scan a [ or { and generate the proper flow collection start token.
				425	bool scanFlowCollectionStart(bool IsSequence);
				426
				427	/// @brief Scan a ] or } and generate the proper flow collection end token.
				428	bool scanFlowCollectionEnd(bool IsSequence);
				429
				430	/// @brief Scan the , that separates entries in a flow collection.
				431	bool scanFlowEntry();
				432
				433	/// @brief Scan the - that starts block sequence entries.
				434	bool scanBlockEntry();
				435
				436	/// @brief Scan an explicit ? indicating a key.
				437	bool scanKey();
				438
				439	/// @brief Scan an explicit : indicating a value.
				440	bool scanValue();
				441
				442	/// @brief Scan a quoted scalar.
				443	bool scanFlowScalar(bool IsDoubleQuoted);
				444
				445	/// @brief Scan an unquoted scalar.
				446	bool scanPlainScalar();
				447
				448	/// @brief Scan an Alias or Anchor starting with * or &.
				449	bool scanAliasOrAnchor(bool IsAlias);
				450
				451	/// @brief Scan a block scalar starting with \| or >.
				452	bool scanBlockScalar(bool IsLiteral);
				453
				454	/// @brief Scan a tag of the form !stuff.
				455	bool scanTag();
				456
				457	/// @brief Dispatch to the next scanning function based on \a *Cur.
				458	bool fetchMoreTokens();
				459
				460	/// @brief The SourceMgr used for diagnostics and buffer management.
				461	SourceMgr &SM;
				462
				463	/// @brief The original input.
				464	MemoryBuffer *InputBuffer;
				465
				466	/// @brief The current position of the scanner.
				467	StringRef::iterator Current;
				468
				469	/// @brief The end of the input (one past the last character).
				470	StringRef::iterator End;
				471
				472	/// @brief Current YAML indentation level in spaces.
				473	int Indent;
				474
				475	/// @brief Current column number in Unicode code points.
				476	unsigned Column;
				477
				478	/// @brief Current line number.
				479	unsigned Line;
				480
				481	/// @brief How deep we are in flow style containers. 0 Means at block level.
				482	unsigned FlowLevel;
				483
				484	/// @brief Are we at the start of the stream?
				485	bool IsStartOfStream;
				486
				487	/// @brief Can the next token be the start of a simple key?
				488	bool IsSimpleKeyAllowed;
				489
				490	/// @brief Is the next token required to start a simple key?
				491	bool IsSimpleKeyRequired;
				492
				493	/// @brief True if an error has occurred.
				494	bool Failed;
				495
				496	/// @brief Queue of tokens. This is required to queue up tokens while looking
				497	/// for the end of a simple key. And for cases where a single character
				498	/// can produce multiple tokens (e.g. BlockEnd).
				499	TokenQueueT TokenQueue;
				500
				501	/// @brief Indentation levels.
				502	SmallVector<int, 4> Indents;
				503
				504	/// @brief Potential simple keys.
				505	SmallVector<SimpleKey, 4> SimpleKeys;
				506	};
				507
				508	} // end namespace yaml
				509	} // end namespace llvm
				510
				511	/// encodeUTF8 - Encode \a UnicodeScalarValue in UTF-8 and append it to result.
				512	static void encodeUTF8( uint32_t UnicodeScalarValue
				513	, SmallVectorImpl<char> &Result) {
				514	if (UnicodeScalarValue <= 0x7F) {
				515	Result.push_back(UnicodeScalarValue & 0x7F);
				516	} else if (UnicodeScalarValue <= 0x7FF) {
				517	uint8_t FirstByte = 0xC0 \| ((UnicodeScalarValue & 0x7C0) >> 6);
				518	uint8_t SecondByte = 0x80 \| (UnicodeScalarValue & 0x3F);
				519	Result.push_back(FirstByte);
				520	Result.push_back(SecondByte);
				521	} else if (UnicodeScalarValue <= 0xFFFF) {
				522	uint8_t FirstByte = 0xE0 \| ((UnicodeScalarValue & 0xF000) >> 12);
				523	uint8_t SecondByte = 0x80 \| ((UnicodeScalarValue & 0xFC0) >> 6);
				524	uint8_t ThirdByte = 0x80 \| (UnicodeScalarValue & 0x3F);
				525	Result.push_back(FirstByte);
				526	Result.push_back(SecondByte);
				527	Result.push_back(ThirdByte);
				528	} else if (UnicodeScalarValue <= 0x10FFFF) {
				529	uint8_t FirstByte = 0xF0 \| ((UnicodeScalarValue & 0x1F0000) >> 18);
				530	uint8_t SecondByte = 0x80 \| ((UnicodeScalarValue & 0x3F000) >> 12);
				531	uint8_t ThirdByte = 0x80 \| ((UnicodeScalarValue & 0xFC0) >> 6);
				532	uint8_t FourthByte = 0x80 \| (UnicodeScalarValue & 0x3F);
				533	Result.push_back(FirstByte);
				534	Result.push_back(SecondByte);
				535	Result.push_back(ThirdByte);
				536	Result.push_back(FourthByte);
				537	}
				538	}
				539
				540	bool yaml::dumpTokens(StringRef Input, raw_ostream &OS) {
				541	SourceMgr SM;
				542	Scanner scanner(Input, SM);
				543	while (true) {
				544	Token T = scanner.getNext();
				545	switch (T.Kind) {
				546	case Token::TK_StreamStart:
				547	OS << "Stream-Start: ";
				548	break;
				549	case Token::TK_StreamEnd:
				550	OS << "Stream-End: ";
				551	break;
				552	case Token::TK_VersionDirective:
				553	OS << "Version-Directive: ";
				554	break;
				555	case Token::TK_TagDirective:
				556	OS << "Tag-Directive: ";
				557	break;
				558	case Token::TK_DocumentStart:
				559	OS << "Document-Start: ";
				560	break;
				561	case Token::TK_DocumentEnd:
				562	OS << "Document-End: ";
				563	break;
				564	case Token::TK_BlockEntry:
				565	OS << "Block-Entry: ";
				566	break;
				567	case Token::TK_BlockEnd:
				568	OS << "Block-End: ";
				569	break;
				570	case Token::TK_BlockSequenceStart:
				571	OS << "Block-Sequence-Start: ";
				572	break;
				573	case Token::TK_BlockMappingStart:
				574	OS << "Block-Mapping-Start: ";
				575	break;
				576	case Token::TK_FlowEntry:
				577	OS << "Flow-Entry: ";
				578	break;
				579	case Token::TK_FlowSequenceStart:
				580	OS << "Flow-Sequence-Start: ";
				581	break;
				582	case Token::TK_FlowSequenceEnd:
				583	OS << "Flow-Sequence-End: ";
				584	break;
				585	case Token::TK_FlowMappingStart:
				586	OS << "Flow-Mapping-Start: ";
				587	break;
				588	case Token::TK_FlowMappingEnd:
				589	OS << "Flow-Mapping-End: ";
				590	break;
				591	case Token::TK_Key:
				592	OS << "Key: ";
				593	break;
				594	case Token::TK_Value:
				595	OS << "Value: ";
				596	break;
				597	case Token::TK_Scalar:
				598	OS << "Scalar: ";
				599	break;
				600	case Token::TK_Alias:
				601	OS << "Alias: ";
				602	break;
				603	case Token::TK_Anchor:
				604	OS << "Anchor: ";
				605	break;
				606	case Token::TK_Tag:
				607	OS << "Tag: ";
				608	break;
				609	case Token::TK_Error:
				610	break;
				611	}
				612	OS << T.Range << "\n";
				613	if (T.Kind == Token::TK_StreamEnd)
				614	break;
				615	else if (T.Kind == Token::TK_Error)
				616	return false;
				617	}
				618	return true;
				619	}
				620
				621	bool yaml::scanTokens(StringRef Input) {
				622	llvm::SourceMgr SM;
				623	llvm::yaml::Scanner scanner(Input, SM);
				624	for (;;) {
				625	llvm::yaml::Token T = scanner.getNext();
				626	if (T.Kind == Token::TK_StreamEnd)
				627	break;
				628	else if (T.Kind == Token::TK_Error)
				629	return false;
				630	}
				631	return true;
				632	}
				633
				634	std::string yaml::escape(StringRef Input) {
				635	std::string EscapedInput;
				636	for (StringRef::iterator i = Input.begin(), e = Input.end(); i != e; ++i) {
				637	if (*i == '\\')
				638	EscapedInput += "\\\\";
				639	else if (*i == '"')
				640	EscapedInput += "\\\"";
				641	else if (*i == 0)
				642	EscapedInput += "\\0";
				643	else if (*i == 0x07)
				644	EscapedInput += "\\a";
				645	else if (*i == 0x08)
				646	EscapedInput += "\\b";
				647	else if (*i == 0x09)
				648	EscapedInput += "\\t";
				649	else if (*i == 0x0A)
				650	EscapedInput += "\\n";
				651	else if (*i == 0x0B)
				652	EscapedInput += "\\v";
				653	else if (*i == 0x0C)
				654	EscapedInput += "\\f";
				655	else if (*i == 0x0D)
				656	EscapedInput += "\\r";
				657	else if (*i == 0x1B)
				658	EscapedInput += "\\e";
				659	else if (i >= 0 && i < 0x20) { // Control characters not handled above.
				660	std::string HexStr = utohexstr(*i);
				661	EscapedInput += "\\x" + std::string(2 - HexStr.size(), '0') + HexStr;
				662	} else if (*i & 0x80) { // UTF-8 multiple code unit subsequence.
				663	UTF8Decoded UnicodeScalarValue
				664	= decodeUTF8(StringRef(i, Input.end() - i));
				665	if (UnicodeScalarValue.second == 0) {
				666	// Found invalid char.
				667	SmallString<4> Val;
				668	encodeUTF8(0xFFFD, Val);
				669	EscapedInput.insert(EscapedInput.end(), Val.begin(), Val.end());
				670	// FIXME: Error reporting.
				671	return EscapedInput;
				672	}
				673	if (UnicodeScalarValue.first == 0x85)
				674	EscapedInput += "\\N";
				675	else if (UnicodeScalarValue.first == 0xA0)
				676	EscapedInput += "\\_";
				677	else if (UnicodeScalarValue.first == 0x2028)
				678	EscapedInput += "\\L";
				679	else if (UnicodeScalarValue.first == 0x2029)
				680	EscapedInput += "\\P";
				681	else {
				682	std::string HexStr = utohexstr(UnicodeScalarValue.first);
				683	if (HexStr.size() <= 2)
				684	EscapedInput += "\\x" + std::string(2 - HexStr.size(), '0') + HexStr;
				685	else if (HexStr.size() <= 4)
				686	EscapedInput += "\\u" + std::string(4 - HexStr.size(), '0') + HexStr;
				687	else if (HexStr.size() <= 8)
				688	EscapedInput += "\\U" + std::string(8 - HexStr.size(), '0') + HexStr;
				689	}
				690	i += UnicodeScalarValue.second - 1;
				691	} else
				692	EscapedInput.push_back(*i);
				693	}
				694	return EscapedInput;
				695	}
				696
				697	Scanner::Scanner(StringRef Input, SourceMgr &sm)
				698	: SM(sm)
				699	, Indent(-1)
				700	, Column(0)
				701	, Line(0)
				702	, FlowLevel(0)
				703	, IsStartOfStream(true)
				704	, IsSimpleKeyAllowed(true)
				705	, IsSimpleKeyRequired(false)
				706	, Failed(false) {
				707	InputBuffer = MemoryBuffer::getMemBuffer(Input, "YAML");
				708	SM.AddNewSourceBuffer(InputBuffer, SMLoc());
				709	Current = InputBuffer->getBufferStart();
				710	End = InputBuffer->getBufferEnd();
				711	}
				712
				713	Token &Scanner::peekNext() {
				714	// If the current token is a possible simple key, keep parsing until we
				715	// can confirm.
				716	bool NeedMore = false;
				717	while (true) {
				718	if (TokenQueue.empty() \|\| NeedMore) {
				719	if (!fetchMoreTokens()) {
				720	TokenQueue.clear();
				721	TokenQueue.push_back(Token());
				722	return TokenQueue.front();
				723	}
				724	}
				725	assert(!TokenQueue.empty() &&
				726	"fetchMoreTokens lied about getting tokens!");
				727
				728	removeStaleSimpleKeyCandidates();
				729	SimpleKey SK;
				730	SK.Tok = TokenQueue.front();
				731	if (std::find(SimpleKeys.begin(), SimpleKeys.end(), SK)
				732	== SimpleKeys.end())
				733	break;
				734	else
				735	NeedMore = true;
				736	}
				737	return TokenQueue.front();
				738	}
				739
				740	Token Scanner::getNext() {
				741	Token Ret = peekNext();
				742	// TokenQueue can be empty if there was an error getting the next token.
				743	if (!TokenQueue.empty())
				744	TokenQueue.pop_front();
				745
				746	// There cannot be any referenced Token's if the TokenQueue is empty. So do a
				747	// quick deallocation of them all.
				748	if (TokenQueue.empty()) {
				749	TokenQueue.Alloc.Reset();
				750	}
				751
				752	return Ret;
				753	}
				754
				755	StringRef::iterator Scanner::skip_nb_char(StringRef::iterator Position) {
				756	// Check 7 bit c-printable - b-char.
				757	if ( *Position == 0x09
				758	\|\| (Position >= 0x20 && Position <= 0x7E))
				759	return Position + 1;
				760
				761	// Check for valid UTF-8.
				762	if (uint8_t(*Position) & 0x80) {
				763	UTF8Decoded u8d = decodeUTF8(Position);
				764	if ( u8d.second != 0
				765	&& u8d.first != 0xFEFF
				766	&& ( u8d.first == 0x85
				767	\|\| ( u8d.first >= 0xA0
				768	&& u8d.first <= 0xD7FF)
				769	\|\| ( u8d.first >= 0xE000
				770	&& u8d.first <= 0xFFFD)
				771	\|\| ( u8d.first >= 0x10000
				772	&& u8d.first <= 0x10FFFF)))
				773	return Position + u8d.second;
				774	}
				775	return Position;
				776	}
				777
				778	StringRef::iterator Scanner::skip_b_break(StringRef::iterator Position) {
				779	if (*Position == 0x0D) {
				780	if (Position + 1 != End && *(Position + 1) == 0x0A)
				781	return Position + 2;
				782	return Position + 1;
				783	}
				784
				785	if (*Position == 0x0A)
				786	return Position + 1;
				787	return Position;
				788	}
				789
				790
				791	StringRef::iterator Scanner::skip_s_white(StringRef::iterator Position) {
				792	if (Position == End)
				793	return Position;
				794	if (Position == ' ' \|\| Position == '\t')
				795	return Position + 1;
				796	return Position;
				797	}
				798
				799	StringRef::iterator Scanner::skip_ns_char(StringRef::iterator Position) {
				800	if (Position == End)
				801	return Position;
				802	if (Position == ' ' \|\| Position == '\t')
				803	return Position;
				804	return skip_nb_char(Position);
				805	}
				806
				807	StringRef::iterator Scanner::skip_while( SkipWhileFunc Func
				808	, StringRef::iterator Position) {
				809	while (true) {
				810	StringRef::iterator i = (this->*Func)(Position);
				811	if (i == Position)
				812	break;
				813	Position = i;
				814	}
				815	return Position;
				816	}
				817
				818	static bool is_ns_hex_digit(const char C) {
				819	return (C >= '0' && C <= '9')
				820	\|\| (C >= 'a' && C <= 'z')
				821	\|\| (C >= 'A' && C <= 'Z');
				822	}
				823
				824	static bool is_ns_word_char(const char C) {
				825	return C == '-'
				826	\|\| (C >= 'a' && C <= 'z')
				827	\|\| (C >= 'A' && C <= 'Z');
				828	}
				829
				830	StringRef Scanner::scan_ns_uri_char() {
				831	StringRef::iterator Start = Current;
				832	while (true) {
				833	if (Current == End)
				834	break;
				835	if (( *Current == '%'
				836	&& Current + 2 < End
				837	&& is_ns_hex_digit(*(Current + 1))
				838	&& is_ns_hex_digit(*(Current + 2)))
				839	\|\| is_ns_word_char(*Current)
				840	\|\| StringRef(Current, 1).find_first_of("#;/?:@&=+$,_.!~*'()[]")
				841	!= StringRef::npos) {
				842	++Current;
				843	++Column;
				844	} else
				845	break;
				846	}
				847	return StringRef(Start, Current - Start);
				848	}
				849
				850	StringRef Scanner::scan_ns_plain_one_line() {
				851	StringRef::iterator start = Current;
				852	// The first character must already be verified.
				853	++Current;
				854	while (true) {
				855	if (Current == End) {
				856	break;
				857	} else if (*Current == ':') {
				858	// Check if the next character is a ns-char.
				859	if (Current + 1 == End)
				860	break;
				861	StringRef::iterator i = skip_ns_char(Current + 1);
				862	if (Current + 1 != i) {
				863	Current = i;
				864	Column += 2; // Consume both the ':' and ns-char.
				865	} else
				866	break;
				867	} else if (*Current == '#') {
				868	// Check if the previous character was a ns-char.
				869	// The & 0x80 check is to check for the trailing byte of a utf-8
				870	if (*(Current - 1) & 0x80 \|\| skip_ns_char(Current - 1) == Current) {
				871	++Current;
				872	++Column;
				873	} else
				874	break;
				875	} else {
				876	StringRef::iterator i = skip_nb_char(Current);
				877	if (i == Current)
				878	break;
				879	Current = i;
				880	++Column;
				881	}
				882	}
				883	return StringRef(start, Current - start);
				884	}
				885
				886	bool Scanner::consume(uint32_t Expected) {
				887	if (Expected >= 0x80)
				888	report_fatal_error("Not dealing with this yet");
				889	if (Current == End)
				890	return false;
				891	if (uint8_t(*Current) >= 0x80)
				892	report_fatal_error("Not dealing with this yet");
				893	if (uint8_t(*Current) == Expected) {
				894	++Current;
				895	++Column;
				896	return true;
				897	}
				898	return false;
				899	}
				900
				901	void Scanner::skip(uint32_t Distance) {
				902	Current += Distance;
				903	Column += Distance;
				904	}
				905
				906	bool Scanner::isBlankOrBreak(StringRef::iterator Position) {
				907	if (Position == End)
				908	return false;
				909	if ( Position == ' ' \|\| Position == '\t'
				910	\|\| Position == '\r' \|\| Position == '\n')
				911	return true;
				912	return false;
				913	}
				914
				915	void Scanner::saveSimpleKeyCandidate( TokenQueueT::iterator Tok
				916	, unsigned AtColumn
				917	, bool IsRequired) {
				918	if (IsSimpleKeyAllowed) {
				919	SimpleKey SK;
				920	SK.Tok = Tok;
				921	SK.Line = Line;
				922	SK.Column = AtColumn;
				923	SK.IsRequired = IsRequired;
				924	SK.FlowLevel = FlowLevel;
				925	SimpleKeys.push_back(SK);
				926	}
				927	}
				928
				929	void Scanner::removeStaleSimpleKeyCandidates() {
				930	for (SmallVectorImpl<SimpleKey>::iterator i = SimpleKeys.begin();
				931	i != SimpleKeys.end();) {
				932	if (i->Line != Line \|\| i->Column + 1024 < Column) {
				933	if (i->IsRequired)
				934	setError( "Could not find expected : for simple key"
				935	, i->Tok->Range.begin());
				936	i = SimpleKeys.erase(i);
				937	} else
				938	++i;
				939	}
				940	}
				941
				942	void Scanner::removeSimpleKeyCandidatesOnFlowLevel(unsigned Level) {
				943	if (!SimpleKeys.empty() && (SimpleKeys.end() - 1)->FlowLevel == Level)
				944	SimpleKeys.pop_back();
				945	}
				946
				947	bool Scanner::unrollIndent(int ToColumn) {
				948	Token T;
				949	// Indentation is ignored in flow.
				950	if (FlowLevel != 0)
				951	return true;
				952
				953	while (Indent > ToColumn) {
				954	T.Kind = Token::TK_BlockEnd;
				955	T.Range = StringRef(Current, 1);
				956	TokenQueue.push_back(T);
				957	Indent = Indents.pop_back_val();
				958	}
				959
				960	return true;
				961	}
				962
				963	bool Scanner::rollIndent( int ToColumn
				964	, Token::TokenKind Kind
				965	, TokenQueueT::iterator InsertPoint) {
				966	if (FlowLevel)
				967	return true;
				968	if (Indent < ToColumn) {
				969	Indents.push_back(Indent);
				970	Indent = ToColumn;
				971
				972	Token T;
				973	T.Kind = Kind;
				974	T.Range = StringRef(Current, 0);
				975	TokenQueue.insert(InsertPoint, T);
				976	}
				977	return true;
				978	}
				979
				980	void Scanner::scanToNextToken() {
				981	while (true) {
				982	while (Current == ' ' \|\| Current == '\t') {
				983	skip(1);
				984	}
				985
				986	// Skip comment.
				987	if (*Current == '#') {
				988	while (true) {
				989	// This may skip more than one byte, thus Column is only incremented
				990	// for code points.
				991	StringRef::iterator i = skip_nb_char(Current);
				992	if (i == Current)
				993	break;
				994	Current = i;
				995	++Column;
				996	}
				997	}
				998
				999	// Skip EOL.
				1000	StringRef::iterator i = skip_b_break(Current);
				1001	if (i == Current)
				1002	break;
				1003	Current = i;
				1004	++Line;
				1005	Column = 0;
				1006	// New lines may start a simple key.
				1007	if (!FlowLevel)
				1008	IsSimpleKeyAllowed = true;
				1009	}
				1010	}
				1011
				1012	bool Scanner::scanStreamStart() {
				1013	IsStartOfStream = false;
				1014
				1015	EncodingInfo EI = getUnicodeEncoding(currentInput());
				1016
				1017	Token T;
				1018	T.Kind = Token::TK_StreamStart;
				1019	T.Range = StringRef(Current, EI.second);
				1020	TokenQueue.push_back(T);
				1021	Current += EI.second;
				1022	return true;
				1023	}
				1024
				1025	bool Scanner::scanStreamEnd() {
				1026	// Force an ending new line if one isn't present.
				1027	if (Column != 0) {
				1028	Column = 0;
				1029	++Line;
				1030	}
				1031
				1032	unrollIndent(-1);
				1033	SimpleKeys.clear();
				1034	IsSimpleKeyAllowed = false;
				1035
				1036	Token T;
				1037	T.Kind = Token::TK_StreamEnd;
				1038	T.Range = StringRef(Current, 0);
				1039	TokenQueue.push_back(T);
				1040	return true;
				1041	}
				1042
				1043	bool Scanner::scanDirective() {
				1044	// Reset the indentation level.
				1045	unrollIndent(-1);
				1046	SimpleKeys.clear();
				1047	IsSimpleKeyAllowed = false;
				1048
				1049	StringRef::iterator Start = Current;
				1050	consume('%');
				1051	StringRef::iterator NameStart = Current;
				1052	Current = skip_while(&Scanner::skip_ns_char, Current);
				1053	StringRef Name(NameStart, Current - NameStart);
				1054	Current = skip_while(&Scanner::skip_s_white, Current);
				1055
				1056	if (Name == "YAML") {
				1057	Current = skip_while(&Scanner::skip_ns_char, Current);
				1058	Token T;
				1059	T.Kind = Token::TK_VersionDirective;
				1060	T.Range = StringRef(Start, Current - Start);
				1061	TokenQueue.push_back(T);
				1062	return true;
				1063	}
				1064	return false;
				1065	}
				1066
				1067	bool Scanner::scanDocumentIndicator(bool IsStart) {
				1068	unrollIndent(-1);
				1069	SimpleKeys.clear();
				1070	IsSimpleKeyAllowed = false;
				1071
				1072	Token T;
				1073	T.Kind = IsStart ? Token::TK_DocumentStart : Token::TK_DocumentEnd;
				1074	T.Range = StringRef(Current, 3);
				1075	skip(3);
				1076	TokenQueue.push_back(T);
				1077	return true;
				1078	}
				1079
				1080	bool Scanner::scanFlowCollectionStart(bool IsSequence) {
				1081	Token T;
				1082	T.Kind = IsSequence ? Token::TK_FlowSequenceStart
				1083	: Token::TK_FlowMappingStart;
				1084	T.Range = StringRef(Current, 1);
				1085	skip(1);
				1086	TokenQueue.push_back(T);
				1087
				1088	// [ and { may begin a simple key.
				1089	saveSimpleKeyCandidate(TokenQueue.back(), Column - 1, false);
				1090
				1091	// And may also be followed by a simple key.
				1092	IsSimpleKeyAllowed = true;
				1093	++FlowLevel;
				1094	return true;
				1095	}
				1096
				1097	bool Scanner::scanFlowCollectionEnd(bool IsSequence) {
				1098	removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
				1099	IsSimpleKeyAllowed = false;
				1100	Token T;
				1101	T.Kind = IsSequence ? Token::TK_FlowSequenceEnd
				1102	: Token::TK_FlowMappingEnd;
				1103	T.Range = StringRef(Current, 1);
				1104	skip(1);
				1105	TokenQueue.push_back(T);
				1106	if (FlowLevel)
				1107	--FlowLevel;
				1108	return true;
				1109	}
				1110
				1111	bool Scanner::scanFlowEntry() {
				1112	removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
				1113	IsSimpleKeyAllowed = true;
				1114	Token T;
				1115	T.Kind = Token::TK_FlowEntry;
				1116	T.Range = StringRef(Current, 1);
				1117	skip(1);
				1118	TokenQueue.push_back(T);
				1119	return true;
				1120	}
				1121
				1122	bool Scanner::scanBlockEntry() {
				1123	rollIndent(Column, Token::TK_BlockSequenceStart, TokenQueue.end());
				1124	removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
				1125	IsSimpleKeyAllowed = true;
				1126	Token T;
				1127	T.Kind = Token::TK_BlockEntry;
				1128	T.Range = StringRef(Current, 1);
				1129	skip(1);
				1130	TokenQueue.push_back(T);
				1131	return true;
				1132	}
				1133
				1134	bool Scanner::scanKey() {
				1135	if (!FlowLevel)
				1136	rollIndent(Column, Token::TK_BlockMappingStart, TokenQueue.end());
				1137
				1138	removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
				1139	IsSimpleKeyAllowed = !FlowLevel;
				1140
				1141	Token T;
				1142	T.Kind = Token::TK_Key;
				1143	T.Range = StringRef(Current, 1);
				1144	skip(1);
				1145	TokenQueue.push_back(T);
				1146	return true;
				1147	}
				1148
				1149	bool Scanner::scanValue() {
				1150	// If the previous token could have been a simple key, insert the key token
				1151	// into the token queue.
				1152	if (!SimpleKeys.empty()) {
				1153	SimpleKey SK = SimpleKeys.pop_back_val();
				1154	Token T;
				1155	T.Kind = Token::TK_Key;
				1156	T.Range = SK.Tok->Range;
				1157	TokenQueueT::iterator i, e;
				1158	for (i = TokenQueue.begin(), e = TokenQueue.end(); i != e; ++i) {
				1159	if (i == SK.Tok)
				1160	break;
				1161	}
				1162	assert(i != e && "SimpleKey not in token queue!");
				1163	i = TokenQueue.insert(i, T);
				1164
				1165	// We may also need to add a Block-Mapping-Start token.
				1166	rollIndent(SK.Column, Token::TK_BlockMappingStart, i);
				1167
				1168	IsSimpleKeyAllowed = false;
				1169	} else {
				1170	if (!FlowLevel)
				1171	rollIndent(Column, Token::TK_BlockMappingStart, TokenQueue.end());
				1172	IsSimpleKeyAllowed = !FlowLevel;
				1173	}
				1174
				1175	Token T;
				1176	T.Kind = Token::TK_Value;
				1177	T.Range = StringRef(Current, 1);
				1178	skip(1);
				1179	TokenQueue.push_back(T);
				1180	return true;
				1181	}
				1182
				1183	// Forbidding inlining improves performance by roughly 20%.
				1184	// FIXME: Remove once llvm optimizes this to the faster version without hints.
				1185	LLVM_ATTRIBUTE_NOINLINE static bool
				1186	wasEscaped(StringRef::iterator First, StringRef::iterator Position);
				1187
				1188	// Returns whether a character at 'Position' was escaped with a leading '\'.
				1189	// 'First' specifies the position of the first character in the string.
				1190	static bool wasEscaped(StringRef::iterator First,
				1191	StringRef::iterator Position) {
				1192	assert(Position - 1 >= First);
				1193	StringRef::iterator I = Position - 1;
				1194	// We calculate the number of consecutive '\'s before the current position
				1195	// by iterating backwards through our string.
				1196	while (I >= First && *I == '\\') --I;
				1197	// (Position - 1 - I) now contains the number of '\'s before the current
				1198	// position. If it is odd, the character at 'Position' was escaped.
				1199	return (Position - 1 - I) % 2 == 1;
				1200	}
				1201
				1202	bool Scanner::scanFlowScalar(bool IsDoubleQuoted) {
				1203	StringRef::iterator Start = Current;
				1204	unsigned ColStart = Column;
				1205	if (IsDoubleQuoted) {
				1206	do {
				1207	++Current;
				1208	while (Current != End && *Current != '"')
				1209	++Current;
				1210	// Repeat until the previous character was not a '\' or was an escaped
				1211	// backslash.
				1212	} while (*(Current - 1) == '\\' && wasEscaped(Start + 1, Current));
				1213	} else {
				1214	skip(1);
				1215	while (true) {
				1216	// Skip a ' followed by another '.
				1217	if (Current + 1 < End && Current == '\'' && (Current + 1) == '\'') {
				1218	skip(2);
				1219	continue;
				1220	} else if (*Current == '\'')
				1221	break;
				1222	StringRef::iterator i = skip_nb_char(Current);
				1223	if (i == Current) {
				1224	i = skip_b_break(Current);
				1225	if (i == Current)
				1226	break;
				1227	Current = i;
				1228	Column = 0;
				1229	++Line;
				1230	} else {
				1231	if (i == End)
				1232	break;
				1233	Current = i;
				1234	++Column;
				1235	}
				1236	}
				1237	}
				1238	skip(1); // Skip ending quote.
				1239	Token T;
				1240	T.Kind = Token::TK_Scalar;
				1241	T.Range = StringRef(Start, Current - Start);
				1242	TokenQueue.push_back(T);
				1243
				1244	saveSimpleKeyCandidate(TokenQueue.back(), ColStart, false);
				1245
				1246	IsSimpleKeyAllowed = false;
				1247
				1248	return true;
				1249	}
				1250
				1251	bool Scanner::scanPlainScalar() {
				1252	StringRef::iterator Start = Current;
				1253	unsigned ColStart = Column;
				1254	unsigned LeadingBlanks = 0;
				1255	assert(Indent >= -1 && "Indent must be >= -1 !");
				1256	unsigned indent = static_cast<unsigned>(Indent + 1);
				1257	while (true) {
				1258	if (*Current == '#')
				1259	break;
				1260
				1261	while (!isBlankOrBreak(Current)) {
				1262	if ( FlowLevel && *Current == ':'
				1263	&& !(isBlankOrBreak(Current + 1) \|\| *(Current + 1) == ',')) {
				1264	setError("Found unexpected ':' while scanning a plain scalar", Current);
				1265	return false;
				1266	}
				1267
				1268	// Check for the end of the plain scalar.
				1269	if ( (*Current == ':' && isBlankOrBreak(Current + 1))
				1270	\|\| ( FlowLevel
				1271	&& (StringRef(Current, 1).find_first_of(",:?[]{}")
				1272	!= StringRef::npos)))
				1273	break;
				1274
				1275	StringRef::iterator i = skip_nb_char(Current);
				1276	if (i == Current)
				1277	break;
				1278	Current = i;
				1279	++Column;
				1280	}
				1281
				1282	// Are we at the end?
				1283	if (!isBlankOrBreak(Current))
				1284	break;
				1285
				1286	// Eat blanks.
				1287	StringRef::iterator Tmp = Current;
				1288	while (isBlankOrBreak(Tmp)) {
				1289	StringRef::iterator i = skip_s_white(Tmp);
				1290	if (i != Tmp) {
				1291	if (LeadingBlanks && (Column < indent) && *Tmp == '\t') {
				1292	setError("Found invalid tab character in indentation", Tmp);
				1293	return false;
				1294	}
				1295	Tmp = i;
				1296	++Column;
				1297	} else {
				1298	i = skip_b_break(Tmp);
				1299	if (!LeadingBlanks)
				1300	LeadingBlanks = 1;
				1301	Tmp = i;
				1302	Column = 0;
				1303	++Line;
				1304	}
				1305	}
				1306
				1307	if (!FlowLevel && Column < indent)
				1308	break;
				1309
				1310	Current = Tmp;
				1311	}
				1312	if (Start == Current) {
				1313	setError("Got empty plain scalar", Start);
				1314	return false;
				1315	}
				1316	Token T;
				1317	T.Kind = Token::TK_Scalar;
				1318	T.Range = StringRef(Start, Current - Start);
				1319	TokenQueue.push_back(T);
				1320
				1321	// Plain scalars can be simple keys.
				1322	saveSimpleKeyCandidate(TokenQueue.back(), ColStart, false);
				1323
				1324	IsSimpleKeyAllowed = false;
				1325
				1326	return true;
				1327	}
				1328
				1329	bool Scanner::scanAliasOrAnchor(bool IsAlias) {
				1330	StringRef::iterator Start = Current;
				1331	unsigned ColStart = Column;
				1332	skip(1);
				1333	while(true) {
				1334	if ( Current == '[' \|\| Current == ']'
				1335	\|\| Current == '{' \|\| Current == '}'
				1336	\|\| *Current == ','
				1337	\|\| *Current == ':')
				1338	break;
				1339	StringRef::iterator i = skip_ns_char(Current);
				1340	if (i == Current)
				1341	break;
				1342	Current = i;
				1343	++Column;
				1344	}
				1345
				1346	if (Start == Current) {
				1347	setError("Got empty alias or anchor", Start);
				1348	return false;
				1349	}
				1350
				1351	Token T;
				1352	T.Kind = IsAlias ? Token::TK_Alias : Token::TK_Anchor;
				1353	T.Range = StringRef(Start, Current - Start);
				1354	TokenQueue.push_back(T);
				1355
				1356	// Alias and anchors can be simple keys.
				1357	saveSimpleKeyCandidate(TokenQueue.back(), ColStart, false);
				1358
				1359	IsSimpleKeyAllowed = false;
				1360
				1361	return true;
				1362	}
				1363
				1364	bool Scanner::scanBlockScalar(bool IsLiteral) {
				1365	StringRef::iterator Start = Current;
				1366	skip(1); // Eat \| or >
				1367	while(true) {
				1368	StringRef::iterator i = skip_nb_char(Current);
				1369	if (i == Current) {
				1370	if (Column == 0)
				1371	break;
				1372	i = skip_b_break(Current);
				1373	if (i != Current) {
				1374	// We got a line break.
				1375	Column = 0;
				1376	++Line;
				1377	Current = i;
				1378	continue;
				1379	} else {
				1380	// There was an error, which should already have been printed out.
				1381	return false;
				1382	}
				1383	}
				1384	Current = i;
				1385	++Column;
				1386	}
				1387
				1388	if (Start == Current) {
				1389	setError("Got empty block scalar", Start);
				1390	return false;
				1391	}
				1392
				1393	Token T;
				1394	T.Kind = Token::TK_Scalar;
				1395	T.Range = StringRef(Start, Current - Start);
				1396	TokenQueue.push_back(T);
				1397	return true;
				1398	}
				1399
				1400	bool Scanner::scanTag() {
				1401	StringRef::iterator Start = Current;
				1402	unsigned ColStart = Column;
				1403	skip(1); // Eat !.
				1404	if (Current == End \|\| isBlankOrBreak(Current)); // An empty tag.
				1405	else if (*Current == '<') {
				1406	skip(1);
				1407	scan_ns_uri_char();
				1408	if (!consume('>'))
				1409	return false;
				1410	} else {
				1411	// FIXME: Actually parse the c-ns-shorthand-tag rule.
				1412	Current = skip_while(&Scanner::skip_ns_char, Current);
				1413	}
				1414
				1415	Token T;
				1416	T.Kind = Token::TK_Tag;
				1417	T.Range = StringRef(Start, Current - Start);
				1418	TokenQueue.push_back(T);
				1419
				1420	// Tags can be simple keys.
				1421	saveSimpleKeyCandidate(TokenQueue.back(), ColStart, false);
				1422
				1423	IsSimpleKeyAllowed = false;
				1424
				1425	return true;
				1426	}
				1427
				1428	bool Scanner::fetchMoreTokens() {
				1429	if (IsStartOfStream)
				1430	return scanStreamStart();
				1431
				1432	scanToNextToken();
				1433
				1434	if (Current == End)
				1435	return scanStreamEnd();
				1436
				1437	removeStaleSimpleKeyCandidates();
				1438
				1439	unrollIndent(Column);
				1440
				1441	if (Column == 0 && *Current == '%')
				1442	return scanDirective();
				1443
				1444	if (Column == 0 && Current + 4 <= End
				1445	&& *Current == '-'
				1446	&& *(Current + 1) == '-'
				1447	&& *(Current + 2) == '-'
				1448	&& (Current + 3 == End \|\| isBlankOrBreak(Current + 3)))
				1449	return scanDocumentIndicator(true);
				1450
				1451	if (Column == 0 && Current + 4 <= End
				1452	&& *Current == '.'
				1453	&& *(Current + 1) == '.'
				1454	&& *(Current + 2) == '.'
				1455	&& (Current + 3 == End \|\| isBlankOrBreak(Current + 3)))
				1456	return scanDocumentIndicator(false);
				1457
				1458	if (*Current == '[')
				1459	return scanFlowCollectionStart(true);
				1460
				1461	if (*Current == '{')
				1462	return scanFlowCollectionStart(false);
				1463
				1464	if (*Current == ']')
				1465	return scanFlowCollectionEnd(true);
				1466
				1467	if (*Current == '}')
				1468	return scanFlowCollectionEnd(false);
				1469
				1470	if (*Current == ',')
				1471	return scanFlowEntry();
				1472
				1473	if (*Current == '-' && isBlankOrBreak(Current + 1))
				1474	return scanBlockEntry();
				1475
				1476	if (*Current == '?' && (FlowLevel \|\| isBlankOrBreak(Current + 1)))
				1477	return scanKey();
				1478
				1479	if (*Current == ':' && (FlowLevel \|\| isBlankOrBreak(Current + 1)))
				1480	return scanValue();
				1481
				1482	if (Current == '')
				1483	return scanAliasOrAnchor(true);
				1484
				1485	if (*Current == '&')
				1486	return scanAliasOrAnchor(false);
				1487
				1488	if (*Current == '!')
				1489	return scanTag();
				1490
				1491	if (*Current == '\|' && !FlowLevel)
				1492	return scanBlockScalar(true);
				1493
				1494	if (*Current == '>' && !FlowLevel)
				1495	return scanBlockScalar(false);
				1496
				1497	if (*Current == '\'')
				1498	return scanFlowScalar(false);
				1499
				1500	if (*Current == '"')
				1501	return scanFlowScalar(true);
				1502
				1503	// Get a plain scalar.
				1504	StringRef FirstChar(Current, 1);
				1505	if (!(isBlankOrBreak(Current)
				1506	\|\| FirstChar.find_first_of("-?:,[]{}#&*!\|>'\"%@`") != StringRef::npos)
				1507	\|\| (*Current == '-' && !isBlankOrBreak(Current + 1))
				1508	\|\| (!FlowLevel && (Current == '?' \|\| Current == ':')
				1509	&& isBlankOrBreak(Current + 1))
				1510	\|\| (!FlowLevel && *Current == ':'
				1511	&& Current + 2 < End
				1512	&& *(Current + 1) == ':'
				1513	&& !isBlankOrBreak(Current + 2)))
				1514	return scanPlainScalar();
				1515
				1516	setError("Unrecognized character while tokenizing.");
				1517	return false;
				1518	}
				1519
				1520	Stream::Stream(StringRef Input, SourceMgr &SM)
				1521	: scanner(new Scanner(Input, SM))
				1522	, CurrentDoc(0) {}
				1523
				1524	bool Stream::failed() { return scanner->failed(); }
				1525
				1526	void Stream::printError(Node *N, const Twine &Msg) {
				1527	SmallVector<SMRange, 1> Ranges;
				1528	Ranges.push_back(N->getSourceRange());
				1529	scanner->printError( N->getSourceRange().Start
				1530	, SourceMgr::DK_Error
				1531	, Msg
				1532	, Ranges);
				1533	}
				1534
				1535	void Stream::handleYAMLDirective(const Token &t) {
				1536	// TODO: Ensure version is 1.x.
				1537	}
				1538
				1539	document_iterator Stream::begin() {
				1540	if (CurrentDoc)
				1541	report_fatal_error("Can only iterate over the stream once");
				1542
				1543	// Skip Stream-Start.
				1544	scanner->getNext();
				1545
				1546	CurrentDoc.reset(new Document(*this));
				1547	return document_iterator(CurrentDoc);
				1548	}
				1549
				1550	document_iterator Stream::end() {
				1551	return document_iterator();
				1552	}
				1553
				1554	void Stream::skip() {
				1555	for (document_iterator i = begin(), e = end(); i != e; ++i)
				1556	i->skip();
				1557	}
				1558
				1559	Node::Node(unsigned int Type, OwningPtr<Document> &D, StringRef A)
				1560	: Doc(D)
				1561	, TypeID(Type)
				1562	, Anchor(A) {
				1563	SMLoc Start = SMLoc::getFromPointer(peekNext().Range.begin());
				1564	SourceRange = SMRange(Start, Start);
				1565	}
				1566
				1567	Node::~Node() {}
				1568
				1569	Token &Node::peekNext() {
				1570	return Doc->peekNext();
				1571	}
				1572
				1573	Token Node::getNext() {
				1574	return Doc->getNext();
				1575	}
				1576
				1577	Node *Node::parseBlockNode() {
				1578	return Doc->parseBlockNode();
				1579	}
				1580
				1581	BumpPtrAllocator &Node::getAllocator() {
				1582	return Doc->NodeAllocator;
				1583	}
				1584
				1585	void Node::setError(const Twine &Msg, Token &Tok) const {
				1586	Doc->setError(Msg, Tok);
				1587	}
				1588
				1589	bool Node::failed() const {
				1590	return Doc->failed();
				1591	}
				1592
				1593
				1594
				1595	StringRef ScalarNode::getValue(SmallVectorImpl<char> &Storage) const {
				1596	// TODO: Handle newlines properly. We need to remove leading whitespace.
				1597	if (Value[0] == '"') { // Double quoted.
				1598	// Pull off the leading and trailing "s.
				1599	StringRef UnquotedValue = Value.substr(1, Value.size() - 2);
				1600	// Search for characters that would require unescaping the value.
				1601	StringRef::size_type i = UnquotedValue.find_first_of("\\\r\n");
				1602	if (i != StringRef::npos)
				1603	return unescapeDoubleQuoted(UnquotedValue, i, Storage);
				1604	return UnquotedValue;
				1605	} else if (Value[0] == '\'') { // Single quoted.
				1606	// Pull off the leading and trailing 's.
				1607	StringRef UnquotedValue = Value.substr(1, Value.size() - 2);
				1608	StringRef::size_type i = UnquotedValue.find('\'');
				1609	if (i != StringRef::npos) {
				1610	// We're going to need Storage.
				1611	Storage.clear();
				1612	Storage.reserve(UnquotedValue.size());
				1613	for (; i != StringRef::npos; i = UnquotedValue.find('\'')) {
				1614	StringRef Valid(UnquotedValue.begin(), i);
				1615	Storage.insert(Storage.end(), Valid.begin(), Valid.end());
				1616	Storage.push_back('\'');
				1617	UnquotedValue = UnquotedValue.substr(i + 2);
				1618	}
				1619	Storage.insert(Storage.end(), UnquotedValue.begin(), UnquotedValue.end());
				1620	return StringRef(Storage.begin(), Storage.size());
				1621	}
				1622	return UnquotedValue;
				1623	}
				1624	// Plain or block.
				1625	size_t trimtrail = Value.rfind(' ');
				1626	return Value.drop_back(
				1627	trimtrail == StringRef::npos ? 0 : Value.size() - trimtrail);
				1628	}
				1629
				1630	StringRef ScalarNode::unescapeDoubleQuoted( StringRef UnquotedValue
				1631	, StringRef::size_type i
				1632	, SmallVectorImpl<char> &Storage)
				1633	const {
				1634	// Use Storage to build proper value.
				1635	Storage.clear();
				1636	Storage.reserve(UnquotedValue.size());
				1637	for (; i != StringRef::npos; i = UnquotedValue.find_first_of("\\\r\n")) {
				1638	// Insert all previous chars into Storage.
				1639	StringRef Valid(UnquotedValue.begin(), i);
				1640	Storage.insert(Storage.end(), Valid.begin(), Valid.end());
				1641	// Chop off inserted chars.
				1642	UnquotedValue = UnquotedValue.substr(i);
				1643
				1644	assert(!UnquotedValue.empty() && "Can't be empty!");
				1645
				1646	// Parse escape or line break.
				1647	switch (UnquotedValue[0]) {
				1648	case '\r':
				1649	case '\n':
				1650	Storage.push_back('\n');
				1651	if ( UnquotedValue.size() > 1
				1652	&& (UnquotedValue[1] == '\r' \|\| UnquotedValue[1] == '\n'))
				1653	UnquotedValue = UnquotedValue.substr(1);
				1654	UnquotedValue = UnquotedValue.substr(1);
				1655	break;
				1656	default:
				1657	if (UnquotedValue.size() == 1)
				1658	// TODO: Report error.
				1659	break;
				1660	UnquotedValue = UnquotedValue.substr(1);
				1661	switch (UnquotedValue[0]) {
				1662	default: {
				1663	Token T;
				1664	T.Range = StringRef(UnquotedValue.begin(), 1);
				1665	setError("Unrecognized escape code!", T);
				1666	return "";
				1667	}
				1668	case '\r':
				1669	case '\n':
				1670	// Remove the new line.
				1671	if ( UnquotedValue.size() > 1
				1672	&& (UnquotedValue[1] == '\r' \|\| UnquotedValue[1] == '\n'))
				1673	UnquotedValue = UnquotedValue.substr(1);
				1674	// If this was just a single byte newline, it will get skipped
				1675	// below.
				1676	break;
				1677	case '0':
				1678	Storage.push_back(0x00);
				1679	break;
				1680	case 'a':
				1681	Storage.push_back(0x07);
				1682	break;
				1683	case 'b':
				1684	Storage.push_back(0x08);
				1685	break;
				1686	case 't':
				1687	case 0x09:
				1688	Storage.push_back(0x09);
				1689	break;
				1690	case 'n':
				1691	Storage.push_back(0x0A);
				1692	break;
				1693	case 'v':
				1694	Storage.push_back(0x0B);
				1695	break;
				1696	case 'f':
				1697	Storage.push_back(0x0C);
				1698	break;
				1699	case 'r':
				1700	Storage.push_back(0x0D);
				1701	break;
				1702	case 'e':
				1703	Storage.push_back(0x1B);
				1704	break;
				1705	case ' ':
				1706	Storage.push_back(0x20);
				1707	break;
				1708	case '"':
				1709	Storage.push_back(0x22);
				1710	break;
				1711	case '/':
				1712	Storage.push_back(0x2F);
				1713	break;
				1714	case '\\':
				1715	Storage.push_back(0x5C);
				1716	break;
				1717	case 'N':
				1718	encodeUTF8(0x85, Storage);
				1719	break;
				1720	case '_':
				1721	encodeUTF8(0xA0, Storage);
				1722	break;
				1723	case 'L':
				1724	encodeUTF8(0x2028, Storage);
				1725	break;
				1726	case 'P':
				1727	encodeUTF8(0x2029, Storage);
				1728	break;
				1729	case 'x': {
				1730	if (UnquotedValue.size() < 3)
				1731	// TODO: Report error.
				1732	break;
				1733	unsigned int UnicodeScalarValue;
				1734	UnquotedValue.substr(1, 2).getAsInteger(16, UnicodeScalarValue);
				1735	encodeUTF8(UnicodeScalarValue, Storage);
				1736	UnquotedValue = UnquotedValue.substr(2);
				1737	break;
				1738	}
				1739	case 'u': {
				1740	if (UnquotedValue.size() < 5)
				1741	// TODO: Report error.
				1742	break;
				1743	unsigned int UnicodeScalarValue;
				1744	UnquotedValue.substr(1, 4).getAsInteger(16, UnicodeScalarValue);
				1745	encodeUTF8(UnicodeScalarValue, Storage);
				1746	UnquotedValue = UnquotedValue.substr(4);
				1747	break;
				1748	}
				1749	case 'U': {
				1750	if (UnquotedValue.size() < 9)
				1751	// TODO: Report error.
				1752	break;
				1753	unsigned int UnicodeScalarValue;
				1754	UnquotedValue.substr(1, 8).getAsInteger(16, UnicodeScalarValue);
				1755	encodeUTF8(UnicodeScalarValue, Storage);
				1756	UnquotedValue = UnquotedValue.substr(8);
				1757	break;
				1758	}
				1759	}
				1760	UnquotedValue = UnquotedValue.substr(1);
				1761	}
				1762	}
				1763	Storage.insert(Storage.end(), UnquotedValue.begin(), UnquotedValue.end());
				1764	return StringRef(Storage.begin(), Storage.size());
				1765	}
				1766
				1767	Node *KeyValueNode::getKey() {
				1768	if (Key)
				1769	return Key;
				1770	// Handle implicit null keys.
				1771	{
				1772	Token &t = peekNext();
				1773	if ( t.Kind == Token::TK_BlockEnd
				1774	\|\| t.Kind == Token::TK_Value
				1775	\|\| t.Kind == Token::TK_Error) {
				1776	return Key = new (getAllocator()) NullNode(Doc);
				1777	}
				1778	if (t.Kind == Token::TK_Key)
				1779	getNext(); // skip TK_Key.
				1780	}
				1781
				1782	// Handle explicit null keys.
				1783	Token &t = peekNext();
				1784	if (t.Kind == Token::TK_BlockEnd \|\| t.Kind == Token::TK_Value) {
				1785	return Key = new (getAllocator()) NullNode(Doc);
				1786	}
				1787
				1788	// We've got a normal key.
				1789	return Key = parseBlockNode();
				1790	}
				1791
				1792	Node *KeyValueNode::getValue() {
				1793	if (Value)
				1794	return Value;
				1795	getKey()->skip();
				1796	if (failed())
				1797	return Value = new (getAllocator()) NullNode(Doc);
				1798
				1799	// Handle implicit null values.
				1800	{
				1801	Token &t = peekNext();
				1802	if ( t.Kind == Token::TK_BlockEnd
				1803	\|\| t.Kind == Token::TK_FlowMappingEnd
				1804	\|\| t.Kind == Token::TK_Key
				1805	\|\| t.Kind == Token::TK_FlowEntry
				1806	\|\| t.Kind == Token::TK_Error) {
				1807	return Value = new (getAllocator()) NullNode(Doc);
				1808	}
				1809
				1810	if (t.Kind != Token::TK_Value) {
				1811	setError("Unexpected token in Key Value.", t);
				1812	return Value = new (getAllocator()) NullNode(Doc);
				1813	}
				1814	getNext(); // skip TK_Value.
				1815	}
				1816
				1817	// Handle explicit null values.
				1818	Token &t = peekNext();
				1819	if (t.Kind == Token::TK_BlockEnd \|\| t.Kind == Token::TK_Key) {
				1820	return Value = new (getAllocator()) NullNode(Doc);
				1821	}
				1822
				1823	// We got a normal value.
				1824	return Value = parseBlockNode();
				1825	}
				1826
				1827	void MappingNode::increment() {
				1828	if (failed()) {
				1829	IsAtEnd = true;
				1830	CurrentEntry = 0;
				1831	return;
				1832	}
				1833	if (CurrentEntry) {
				1834	CurrentEntry->skip();
				1835	if (Type == MT_Inline) {
				1836	IsAtEnd = true;
				1837	CurrentEntry = 0;
				1838	return;
				1839	}
				1840	}
				1841	Token T = peekNext();
				1842	if (T.Kind == Token::TK_Key \|\| T.Kind == Token::TK_Scalar) {
				1843	// KeyValueNode eats the TK_Key. That way it can detect null keys.
				1844	CurrentEntry = new (getAllocator()) KeyValueNode(Doc);
				1845	} else if (Type == MT_Block) {
				1846	switch (T.Kind) {
				1847	case Token::TK_BlockEnd:
				1848	getNext();
				1849	IsAtEnd = true;
				1850	CurrentEntry = 0;
				1851	break;
				1852	default:
				1853	setError("Unexpected token. Expected Key or Block End", T);
				1854	case Token::TK_Error:
				1855	IsAtEnd = true;
				1856	CurrentEntry = 0;
				1857	}
				1858	} else {
				1859	switch (T.Kind) {
				1860	case Token::TK_FlowEntry:
				1861	// Eat the flow entry and recurse.
				1862	getNext();
				1863	return increment();
				1864	case Token::TK_FlowMappingEnd:
				1865	getNext();
				1866	case Token::TK_Error:
				1867	// Set this to end iterator.
				1868	IsAtEnd = true;
				1869	CurrentEntry = 0;
				1870	break;
				1871	default:
				1872	setError( "Unexpected token. Expected Key, Flow Entry, or Flow "
				1873	"Mapping End."
				1874	, T);
				1875	IsAtEnd = true;
				1876	CurrentEntry = 0;
				1877	}
				1878	}
				1879	}
				1880
				1881	void SequenceNode::increment() {
				1882	if (failed()) {
				1883	IsAtEnd = true;
				1884	CurrentEntry = 0;
				1885	return;
				1886	}
				1887	if (CurrentEntry)
				1888	CurrentEntry->skip();
				1889	Token T = peekNext();
				1890	if (SeqType == ST_Block) {
				1891	switch (T.Kind) {
				1892	case Token::TK_BlockEntry:
				1893	getNext();
				1894	CurrentEntry = parseBlockNode();
				1895	if (CurrentEntry == 0) { // An error occurred.
				1896	IsAtEnd = true;
				1897	CurrentEntry = 0;
				1898	}
				1899	break;
				1900	case Token::TK_BlockEnd:
				1901	getNext();
				1902	IsAtEnd = true;
				1903	CurrentEntry = 0;
				1904	break;
				1905	default:
				1906	setError( "Unexpected token. Expected Block Entry or Block End."
				1907	, T);
				1908	case Token::TK_Error:
				1909	IsAtEnd = true;
				1910	CurrentEntry = 0;
				1911	}
				1912	} else if (SeqType == ST_Indentless) {
				1913	switch (T.Kind) {
				1914	case Token::TK_BlockEntry:
				1915	getNext();
				1916	CurrentEntry = parseBlockNode();
				1917	if (CurrentEntry == 0) { // An error occurred.
				1918	IsAtEnd = true;
				1919	CurrentEntry = 0;
				1920	}
				1921	break;
				1922	default:
				1923	case Token::TK_Error:
				1924	IsAtEnd = true;
				1925	CurrentEntry = 0;
				1926	}
				1927	} else if (SeqType == ST_Flow) {
				1928	switch (T.Kind) {
				1929	case Token::TK_FlowEntry:
				1930	// Eat the flow entry and recurse.
				1931	getNext();
				1932	WasPreviousTokenFlowEntry = true;
				1933	return increment();
				1934	case Token::TK_FlowSequenceEnd:
				1935	getNext();
				1936	case Token::TK_Error:
				1937	// Set this to end iterator.
				1938	IsAtEnd = true;
				1939	CurrentEntry = 0;
				1940	break;
				1941	case Token::TK_StreamEnd:
				1942	case Token::TK_DocumentEnd:
				1943	case Token::TK_DocumentStart:
				1944	setError("Could not find closing ]!", T);
				1945	// Set this to end iterator.
				1946	IsAtEnd = true;
				1947	CurrentEntry = 0;
				1948	break;
				1949	default:
				1950	if (!WasPreviousTokenFlowEntry) {
				1951	setError("Expected , between entries!", T);
				1952	IsAtEnd = true;
				1953	CurrentEntry = 0;
				1954	break;
				1955	}
				1956	// Otherwise it must be a flow entry.
				1957	CurrentEntry = parseBlockNode();
				1958	if (!CurrentEntry) {
				1959	IsAtEnd = true;
				1960	}
				1961	WasPreviousTokenFlowEntry = false;
				1962	break;
				1963	}
				1964	}
				1965	}
				1966
				1967	Document::Document(Stream &S) : stream(S), Root(0) {
				1968	if (parseDirectives())
				1969	expectToken(Token::TK_DocumentStart);
				1970	Token &T = peekNext();
				1971	if (T.Kind == Token::TK_DocumentStart)
				1972	getNext();
				1973	}
				1974
				1975	bool Document::skip() {
				1976	if (stream.scanner->failed())
				1977	return false;
				1978	if (!Root)
				1979	getRoot();
				1980	Root->skip();
				1981	Token &T = peekNext();
				1982	if (T.Kind == Token::TK_StreamEnd)
				1983	return false;
				1984	if (T.Kind == Token::TK_DocumentEnd) {
				1985	getNext();
				1986	return skip();
				1987	}
				1988	return true;
				1989	}
				1990
				1991	Token &Document::peekNext() {
				1992	return stream.scanner->peekNext();
				1993	}
				1994
				1995	Token Document::getNext() {
				1996	return stream.scanner->getNext();
				1997	}
				1998
				1999	void Document::setError(const Twine &Message, Token &Location) const {
				2000	stream.scanner->setError(Message, Location.Range.begin());
				2001	}
				2002
				2003	bool Document::failed() const {
				2004	return stream.scanner->failed();
				2005	}
				2006
				2007	Node *Document::parseBlockNode() {
				2008	Token T = peekNext();
				2009	// Handle properties.
				2010	Token AnchorInfo;
				2011	parse_property:
				2012	switch (T.Kind) {
				2013	case Token::TK_Alias:
				2014	getNext();
				2015	return new (NodeAllocator) AliasNode(stream.CurrentDoc, T.Range.substr(1));
				2016	case Token::TK_Anchor:
				2017	if (AnchorInfo.Kind == Token::TK_Anchor) {
				2018	setError("Already encountered an anchor for this node!", T);
				2019	return 0;
				2020	}
				2021	AnchorInfo = getNext(); // Consume TK_Anchor.
				2022	T = peekNext();
				2023	goto parse_property;
				2024	case Token::TK_Tag:
				2025	getNext(); // Skip TK_Tag.
				2026	T = peekNext();
				2027	goto parse_property;
				2028	default:
				2029	break;
				2030	}
				2031
				2032	switch (T.Kind) {
				2033	case Token::TK_BlockEntry:
				2034	// We got an unindented BlockEntry sequence. This is not terminated with
				2035	// a BlockEnd.
				2036	// Don't eat the TK_BlockEntry, SequenceNode needs it.
				2037	return new (NodeAllocator) SequenceNode( stream.CurrentDoc
				2038	, AnchorInfo.Range.substr(1)
				2039	, SequenceNode::ST_Indentless);
				2040	case Token::TK_BlockSequenceStart:
				2041	getNext();
				2042	return new (NodeAllocator)
				2043	SequenceNode( stream.CurrentDoc
				2044	, AnchorInfo.Range.substr(1)
				2045	, SequenceNode::ST_Block);
				2046	case Token::TK_BlockMappingStart:
				2047	getNext();
				2048	return new (NodeAllocator)
				2049	MappingNode( stream.CurrentDoc
				2050	, AnchorInfo.Range.substr(1)
				2051	, MappingNode::MT_Block);
				2052	case Token::TK_FlowSequenceStart:
				2053	getNext();
				2054	return new (NodeAllocator)
				2055	SequenceNode( stream.CurrentDoc
				2056	, AnchorInfo.Range.substr(1)
				2057	, SequenceNode::ST_Flow);
				2058	case Token::TK_FlowMappingStart:
				2059	getNext();
				2060	return new (NodeAllocator)
				2061	MappingNode( stream.CurrentDoc
				2062	, AnchorInfo.Range.substr(1)
				2063	, MappingNode::MT_Flow);
				2064	case Token::TK_Scalar:
				2065	getNext();
				2066	return new (NodeAllocator)
				2067	ScalarNode( stream.CurrentDoc
				2068	, AnchorInfo.Range.substr(1)
				2069	, T.Range);
				2070	case Token::TK_Key:
				2071	// Don't eat the TK_Key, KeyValueNode expects it.
				2072	return new (NodeAllocator)
				2073	MappingNode( stream.CurrentDoc
				2074	, AnchorInfo.Range.substr(1)
				2075	, MappingNode::MT_Inline);
				2076	case Token::TK_DocumentStart:
				2077	case Token::TK_DocumentEnd:
				2078	case Token::TK_StreamEnd:
				2079	default:
				2080	// TODO: Properly handle tags. "[!!str ]" should resolve to !!str "", not
				2081	// !!null null.
				2082	return new (NodeAllocator) NullNode(stream.CurrentDoc);
				2083	case Token::TK_Error:
				2084	return 0;
				2085	}
				2086	llvm_unreachable("Control flow shouldn't reach here.");
				2087	return 0;
				2088	}
				2089
				2090	bool Document::parseDirectives() {
				2091	bool isDirective = false;
				2092	while (true) {
				2093	Token T = peekNext();
				2094	if (T.Kind == Token::TK_TagDirective) {
				2095	handleTagDirective(getNext());
				2096	isDirective = true;
				2097	} else if (T.Kind == Token::TK_VersionDirective) {
				2098	stream.handleYAMLDirective(getNext());
				2099	isDirective = true;
				2100	} else
				2101	break;
				2102	}
				2103	return isDirective;
				2104	}
				2105
				2106	bool Document::expectToken(int TK) {
				2107	Token T = getNext();
				2108	if (T.Kind != TK) {
				2109	setError("Unexpected token", T);
				2110	return false;
				2111	}
				2112	return true;
				2113	}
				2114
				2115	OwningPtr<Document> document_iterator::NullDoc;