Blame - src/scanner.h - fp2-dev/platform/external/v8

blob: 6e5333bce1e5defe347d3f4cac9e3fa51be4e382 [file] [log] [blame]

Kristian Monsen	0d5e116	2010-09-30 15:31:59 +0100	[diff] [blame^]	1	// Copyright 2010 the V8 project authors. All rights reserved.
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	2	// Redistribution and use in source and binary forms, with or without
				3	// modification, are permitted provided that the following conditions are
				4	// met:
				5	//
				6	// * Redistributions of source code must retain the above copyright
				7	// notice, this list of conditions and the following disclaimer.
				8	// * Redistributions in binary form must reproduce the above
				9	// copyright notice, this list of conditions and the following
				10	// disclaimer in the documentation and/or other materials provided
				11	// with the distribution.
				12	// * Neither the name of Google Inc. nor the names of its
				13	// contributors may be used to endorse or promote products derived
				14	// from this software without specific prior written permission.
				15	//
				16	// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
				17	// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
				18	// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
				19	// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
				20	// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
				21	// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
				22	// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
				23	// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
				24	// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
				25	// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
				26	// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
				27
				28	#ifndef V8_SCANNER_H_
				29	#define V8_SCANNER_H_
				30
				31	#include "token.h"
				32	#include "char-predicates-inl.h"
				33
				34	namespace v8 {
				35	namespace internal {
				36
				37
				38	class UTF8Buffer {
				39	public:
				40	UTF8Buffer();
				41	~UTF8Buffer();
				42
Kristian Monsen	80d68ea	2010-09-08 11:05:35 +0100	[diff] [blame]	43	inline void AddChar(uc32 c) {
				44	if (static_cast<unsigned>(c) <= unibrow::Utf8::kMaxOneByteChar) {
				45	buffer_.Add(static_cast<char>(c));
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	46	} else {
				47	AddCharSlow(c);
				48	}
				49	}
				50
Kristian Monsen	80d68ea	2010-09-08 11:05:35 +0100	[diff] [blame]	51	void StartLiteral() {
				52	buffer_.StartSequence();
				53	}
				54
				55	Vector<const char> EndLiteral() {
				56	buffer_.Add(kEndMarker);
				57	Vector<char> sequence = buffer_.EndSequence();
				58	return Vector<const char>(sequence.start(), sequence.length());
				59	}
				60
				61	void DropLiteral() {
				62	buffer_.DropSequence();
				63	}
				64
Steve Block	d0582a6	2009-12-15 09:54:21 +0000	[diff] [blame]	65	void Reset() {
Kristian Monsen	80d68ea	2010-09-08 11:05:35 +0100	[diff] [blame]	66	buffer_.Reset();
Steve Block	d0582a6	2009-12-15 09:54:21 +0000	[diff] [blame]	67	}
				68
Kristian Monsen	80d68ea	2010-09-08 11:05:35 +0100	[diff] [blame]	69	// The end marker added after a parsed literal.
				70	// Using zero allows the usage of strlen and similar functions on
				71	// identifiers and numbers (but not strings, since they may contain zero
				72	// bytes).
				73	// TODO(lrn): Use '\xff' as end marker, since it cannot occur inside
				74	// an utf-8 string. This requires changes in all places that uses
				75	// str-functions on the literals, but allows a single pointer to represent
				76	// the literal, even if it contains embedded zeros.
				77	static const char kEndMarker = '\x00';
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	78	private:
Steve Block	d0582a6	2009-12-15 09:54:21 +0000	[diff] [blame]	79	static const int kInitialCapacity = 256;
Kristian Monsen	80d68ea	2010-09-08 11:05:35 +0100	[diff] [blame]	80	SequenceCollector<char, 4> buffer_;
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	81
				82	void AddCharSlow(uc32 c);
				83	};
				84
				85
Steve Block	6ded16b	2010-05-10 14:33:55 +0100	[diff] [blame]	86	// Interface through which the scanner reads characters from the input source.
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	87	class UTF16Buffer {
				88	public:
				89	UTF16Buffer();
				90	virtual ~UTF16Buffer() {}
				91
				92	virtual void PushBack(uc32 ch) = 0;
Steve Block	6ded16b	2010-05-10 14:33:55 +0100	[diff] [blame]	93	// Returns a value < 0 when the buffer end is reached.
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	94	virtual uc32 Advance() = 0;
				95	virtual void SeekForward(int pos) = 0;
				96
				97	int pos() const { return pos_; }
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	98
				99	protected:
Steve Block	6ded16b	2010-05-10 14:33:55 +0100	[diff] [blame]	100	int pos_; // Current position in the buffer.
				101	int end_; // Position where scanning should stop (EOF).
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	102	};
				103
				104
Steve Block	6ded16b	2010-05-10 14:33:55 +0100	[diff] [blame]	105	// UTF16 buffer to read characters from a character stream.
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	106	class CharacterStreamUTF16Buffer: public UTF16Buffer {
				107	public:
				108	CharacterStreamUTF16Buffer();
				109	virtual ~CharacterStreamUTF16Buffer() {}
Steve Block	6ded16b	2010-05-10 14:33:55 +0100	[diff] [blame]	110	void Initialize(Handle<String> data,
				111	unibrow::CharacterStream* stream,
				112	int start_position,
				113	int end_position);
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	114	virtual void PushBack(uc32 ch);
				115	virtual uc32 Advance();
				116	virtual void SeekForward(int pos);
				117
				118	private:
				119	List<uc32> pushback_buffer_;
				120	uc32 last_;
				121	unibrow::CharacterStream* stream_;
				122
				123	List<uc32>* pushback_buffer() { return &pushback_buffer_; }
				124	};
				125
				126
Steve Block	6ded16b	2010-05-10 14:33:55 +0100	[diff] [blame]	127	// UTF16 buffer to read characters from an external string.
				128	template <typename StringType, typename CharType>
				129	class ExternalStringUTF16Buffer: public UTF16Buffer {
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	130	public:
Steve Block	6ded16b	2010-05-10 14:33:55 +0100	[diff] [blame]	131	ExternalStringUTF16Buffer();
				132	virtual ~ExternalStringUTF16Buffer() {}
				133	void Initialize(Handle<StringType> data,
				134	int start_position,
				135	int end_position);
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	136	virtual void PushBack(uc32 ch);
				137	virtual uc32 Advance();
				138	virtual void SeekForward(int pos);
				139
				140	private:
Steve Block	6ded16b	2010-05-10 14:33:55 +0100	[diff] [blame]	141	const CharType* raw_data_; // Pointer to the actual array of characters.
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	142	};
				143
				144
Steve Block	d0582a6	2009-12-15 09:54:21 +0000	[diff] [blame]	145	class KeywordMatcher {
				146	// Incrementally recognize keywords.
				147	//
				148	// Recognized keywords:
				149	// break case catch const* continue debugger* default delete do else
				150	// finally false for function if in instanceof native* new null
				151	// return switch this throw true try typeof var void while with
				152	//
				153	// *: Actually "future reserved keywords". These are the only ones we
				154	// recognized, the remaining are allowed as identifiers.
				155	public:
Kristian Monsen	9dcf7e2	2010-06-28 14:14:28 +0100	[diff] [blame]	156	KeywordMatcher()
				157	: state_(INITIAL),
				158	token_(Token::IDENTIFIER),
				159	keyword_(NULL),
				160	counter_(0),
				161	keyword_token_(Token::ILLEGAL) {}
Steve Block	d0582a6	2009-12-15 09:54:21 +0000	[diff] [blame]	162
				163	Token::Value token() { return token_; }
				164
				165	inline void AddChar(uc32 input) {
				166	if (state_ != UNMATCHABLE) {
				167	Step(input);
				168	}
				169	}
				170
				171	void Fail() {
				172	token_ = Token::IDENTIFIER;
				173	state_ = UNMATCHABLE;
				174	}
				175
				176	private:
				177	enum State {
				178	UNMATCHABLE,
				179	INITIAL,
				180	KEYWORD_PREFIX,
				181	KEYWORD_MATCHED,
				182	C,
				183	CA,
				184	CO,
				185	CON,
				186	D,
				187	DE,
				188	F,
				189	I,
				190	IN,
				191	N,
				192	T,
				193	TH,
				194	TR,
				195	V,
				196	W
				197	};
				198
				199	struct FirstState {
				200	const char* keyword;
				201	State state;
				202	Token::Value token;
				203	};
				204
				205	// Range of possible first characters of a keyword.
				206	static const unsigned int kFirstCharRangeMin = 'b';
				207	static const unsigned int kFirstCharRangeMax = 'w';
				208	static const unsigned int kFirstCharRangeLength =
				209	kFirstCharRangeMax - kFirstCharRangeMin + 1;
				210	// State map for first keyword character range.
				211	static FirstState first_states_[kFirstCharRangeLength];
				212
Steve Block	d0582a6	2009-12-15 09:54:21 +0000	[diff] [blame]	213	// If input equals keyword's character at position, continue matching keyword
				214	// from that position.
				215	inline bool MatchKeywordStart(uc32 input,
				216	const char* keyword,
				217	int position,
				218	Token::Value token_if_match) {
				219	if (input == keyword[position]) {
				220	state_ = KEYWORD_PREFIX;
				221	this->keyword_ = keyword;
				222	this->counter_ = position + 1;
				223	this->keyword_token_ = token_if_match;
				224	return true;
				225	}
				226	return false;
				227	}
				228
				229	// If input equals match character, transition to new state and return true.
				230	inline bool MatchState(uc32 input, char match, State new_state) {
				231	if (input == match) {
				232	state_ = new_state;
				233	return true;
				234	}
				235	return false;
				236	}
				237
				238	inline bool MatchKeyword(uc32 input,
				239	char match,
				240	State new_state,
				241	Token::Value keyword_token) {
Kristian Monsen	9dcf7e2	2010-06-28 14:14:28 +0100	[diff] [blame]	242	if (input != match) {
				243	return false;
Steve Block	d0582a6	2009-12-15 09:54:21 +0000	[diff] [blame]	244	}
Kristian Monsen	9dcf7e2	2010-06-28 14:14:28 +0100	[diff] [blame]	245	state_ = new_state;
				246	token_ = keyword_token;
				247	return true;
Steve Block	d0582a6	2009-12-15 09:54:21 +0000	[diff] [blame]	248	}
				249
				250	void Step(uc32 input);
Kristian Monsen	9dcf7e2	2010-06-28 14:14:28 +0100	[diff] [blame]	251
				252	// Current state.
				253	State state_;
				254	// Token for currently added characters.
				255	Token::Value token_;
				256
				257	// Matching a specific keyword string (there is only one possible valid
				258	// keyword with the current prefix).
				259	const char* keyword_;
				260	int counter_;
				261	Token::Value keyword_token_;
Steve Block	d0582a6	2009-12-15 09:54:21 +0000	[diff] [blame]	262	};
				263
				264
Leon Clarke	4515c47	2010-02-03 11:58:03 +0000	[diff] [blame]	265	enum ParserMode { PARSE, PREPARSE };
				266	enum ParserLanguage { JAVASCRIPT, JSON };
				267
				268
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	269	class Scanner {
				270	public:
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	271	typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder;
				272
Kristian Monsen	80d68ea	2010-09-08 11:05:35 +0100	[diff] [blame]	273	class LiteralScope {
				274	public:
				275	explicit LiteralScope(Scanner* self);
				276	~LiteralScope();
				277	void Complete();
				278
				279	private:
				280	Scanner* scanner_;
				281	bool complete_;
				282	};
				283
Kristian Monsen	0d5e116	2010-09-30 15:31:59 +0100	[diff] [blame^]	284	Scanner();
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	285
Steve Block	6ded16b	2010-05-10 14:33:55 +0100	[diff] [blame]	286	// Initialize the Scanner to scan source.
				287	void Initialize(Handle<String> source,
				288	ParserLanguage language);
				289	void Initialize(Handle<String> source,
				290	unibrow::CharacterStream* stream,
				291	ParserLanguage language);
				292	void Initialize(Handle<String> source,
				293	int start_position, int end_position,
				294	ParserLanguage language);
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	295
				296	// Returns the next token.
				297	Token::Value Next();
				298
				299	// One token look-ahead (past the token returned by Next()).
Kristian Monsen	0d5e116	2010-09-30 15:31:59 +0100	[diff] [blame^]	300	Token::Value peek() const { return next_.token; }
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	301
				302	// Returns true if there was a line terminator before the peek'ed token.
				303	bool has_line_terminator_before_next() const {
				304	return has_line_terminator_before_next_;
				305	}
				306
				307	struct Location {
				308	Location(int b, int e) : beg_pos(b), end_pos(e) { }
				309	Location() : beg_pos(0), end_pos(0) { }
				310	int beg_pos;
				311	int end_pos;
				312	};
				313
				314	// Returns the location information for the current token
				315	// (the token returned by Next()).
Kristian Monsen	0d5e116	2010-09-30 15:31:59 +0100	[diff] [blame^]	316	Location location() const { return current_.location; }
				317	Location peek_location() const { return next_.location; }
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	318
				319	// Returns the literal string, if any, for the current token (the
				320	// token returned by Next()). The string is 0-terminated and in
				321	// UTF-8 format; they may contain 0-characters. Literal strings are
				322	// collected for identifiers, strings, and numbers.
Steve Block	d0582a6	2009-12-15 09:54:21 +0000	[diff] [blame]	323	// These functions only give the correct result if the literal
				324	// was scanned between calls to StartLiteral() and TerminateLiteral().
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	325	const char* literal_string() const {
Kristian Monsen	80d68ea	2010-09-08 11:05:35 +0100	[diff] [blame]	326	return current_.literal_chars.start();
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	327	}
Kristian Monsen	80d68ea	2010-09-08 11:05:35 +0100	[diff] [blame]	328
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	329	int literal_length() const {
Kristian Monsen	80d68ea	2010-09-08 11:05:35 +0100	[diff] [blame]	330	// Excluding terminal '\x00' added by TerminateLiteral().
				331	return current_.literal_chars.length() - 1;
				332	}
				333
				334	Vector<const char> literal() const {
				335	return Vector<const char>(literal_string(), literal_length());
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	336	}
				337
				338	// Returns the literal string for the next token (the token that
				339	// would be returned if Next() were called).
				340	const char* next_literal_string() const {
Kristian Monsen	80d68ea	2010-09-08 11:05:35 +0100	[diff] [blame]	341	return next_.literal_chars.start();
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	342	}
Kristian Monsen	80d68ea	2010-09-08 11:05:35 +0100	[diff] [blame]	343
				344
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	345	// Returns the length of the next token (that would be returned if
				346	// Next() were called).
				347	int next_literal_length() const {
Kristian Monsen	80d68ea	2010-09-08 11:05:35 +0100	[diff] [blame]	348	// Excluding terminal '\x00' added by TerminateLiteral().
				349	return next_.literal_chars.length() - 1;
Steve Block	d0582a6	2009-12-15 09:54:21 +0000	[diff] [blame]	350	}
				351
				352	Vector<const char> next_literal() const {
Kristian Monsen	80d68ea	2010-09-08 11:05:35 +0100	[diff] [blame]	353	return Vector<const char>(next_literal_string(), next_literal_length());
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	354	}
				355
				356	// Scans the input as a regular expression pattern, previous
				357	// character(s) must be /(=). Returns true if a pattern is scanned.
				358	bool ScanRegExpPattern(bool seen_equal);
				359	// Returns true if regexp flags are scanned (always since flags can
				360	// be empty).
				361	bool ScanRegExpFlags();
				362
				363	// Seek forward to the given position. This operation does not
				364	// work in general, for instance when there are pushed back
				365	// characters, but works for seeking forward until simple delimiter
				366	// tokens, which is what it is used for.
				367	void SeekForward(int pos);
				368
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	369	bool stack_overflow() { return stack_overflow_; }
				370
				371	static StaticResource<Utf8Decoder>* utf8_decoder() { return &utf8_decoder_; }
				372
				373	// Tells whether the buffer contains an identifier (no escapes).
				374	// Used for checking if a property name is an identifier.
				375	static bool IsIdentifier(unibrow::CharacterStream* buffer);
				376
				377	static unibrow::Predicate<IdentifierStart, 128> kIsIdentifierStart;
				378	static unibrow::Predicate<IdentifierPart, 128> kIsIdentifierPart;
				379	static unibrow::Predicate<unibrow::LineTerminator, 128> kIsLineTerminator;
				380	static unibrow::Predicate<unibrow::WhiteSpace, 128> kIsWhiteSpace;
				381
				382	static const int kCharacterLookaheadBufferSize = 1;
Steve Block	6ded16b	2010-05-10 14:33:55 +0100	[diff] [blame]	383	static const int kNoEndPosition = 1;
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	384
				385	private:
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	386	// The current and look-ahead token.
				387	struct TokenDesc {
				388	Token::Value token;
				389	Location location;
Kristian Monsen	80d68ea	2010-09-08 11:05:35 +0100	[diff] [blame]	390	Vector<const char> literal_chars;
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	391	};
				392
Kristian Monsen	9dcf7e2	2010-06-28 14:14:28 +0100	[diff] [blame]	393	void Init(Handle<String> source,
				394	unibrow::CharacterStream* stream,
				395	int start_position, int end_position,
				396	ParserLanguage language);
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	397
				398	// Literal buffer support
Kristian Monsen	80d68ea	2010-09-08 11:05:35 +0100	[diff] [blame]	399	inline void StartLiteral();
				400	inline void AddChar(uc32 ch);
				401	inline void AddCharAdvance();
				402	inline void TerminateLiteral();
				403	// Stops scanning of a literal, e.g., due to an encountered error.
				404	inline void DropLiteral();
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	405
				406	// Low-level scanning support.
				407	void Advance() { c0_ = source_->Advance(); }
				408	void PushBack(uc32 ch) {
				409	source_->PushBack(ch);
				410	c0_ = ch;
				411	}
				412
Leon Clarke	4515c47	2010-02-03 11:58:03 +0000	[diff] [blame]	413	bool SkipWhiteSpace() {
				414	if (is_parsing_json_) {
				415	return SkipJsonWhiteSpace();
				416	} else {
				417	return SkipJavaScriptWhiteSpace();
				418	}
				419	}
Kristian Monsen	9dcf7e2	2010-06-28 14:14:28 +0100	[diff] [blame]	420
Leon Clarke	4515c47	2010-02-03 11:58:03 +0000	[diff] [blame]	421	bool SkipJavaScriptWhiteSpace();
				422	bool SkipJsonWhiteSpace();
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	423	Token::Value SkipSingleLineComment();
				424	Token::Value SkipMultiLineComment();
				425
				426	inline Token::Value Select(Token::Value tok);
				427	inline Token::Value Select(uc32 next, Token::Value then, Token::Value else_);
				428
Leon Clarke	4515c47	2010-02-03 11:58:03 +0000	[diff] [blame]	429	inline void Scan() {
				430	if (is_parsing_json_) {
				431	ScanJson();
				432	} else {
				433	ScanJavaScript();
				434	}
				435	}
				436
				437	// Scans a single JavaScript token.
				438	void ScanJavaScript();
				439
				440	// Scan a single JSON token. The JSON lexical grammar is specified in the
				441	// ECMAScript 5 standard, section 15.12.1.1.
				442	// Recognizes all of the single-character tokens directly, or calls a function
				443	// to scan a number, string or identifier literal.
				444	// The only allowed whitespace characters between tokens are tab,
				445	// carrige-return, newline and space.
				446	void ScanJson();
				447
				448	// A JSON number (production JSONNumber) is a subset of the valid JavaScript
				449	// decimal number literals.
				450	// It includes an optional minus sign, must have at least one
				451	// digit before and after a decimal point, may not have prefixed zeros (unless
				452	// the integer part is zero), and may include an exponent part (e.g., "e-10").
				453	// Hexadecimal and octal numbers are not allowed.
				454	Token::Value ScanJsonNumber();
Kristian Monsen	9dcf7e2	2010-06-28 14:14:28 +0100	[diff] [blame]	455
Leon Clarke	4515c47	2010-02-03 11:58:03 +0000	[diff] [blame]	456	// A JSON string (production JSONString) is subset of valid JavaScript string
				457	// literals. The string must only be double-quoted (not single-quoted), and
				458	// the only allowed backslash-escapes are ", /, \, b, f, n, r, t and
				459	// four-digit hex escapes (uXXXX). Any other use of backslashes is invalid.
				460	Token::Value ScanJsonString();
Kristian Monsen	9dcf7e2	2010-06-28 14:14:28 +0100	[diff] [blame]	461
Leon Clarke	4515c47	2010-02-03 11:58:03 +0000	[diff] [blame]	462	// Used to recognizes one of the literals "true", "false", or "null". These
				463	// are the only valid JSON identifiers (productions JSONBooleanLiteral,
				464	// JSONNullLiteral).
				465	Token::Value ScanJsonIdentifier(const char* text, Token::Value token);
				466
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	467	void ScanDecimalDigits();
				468	Token::Value ScanNumber(bool seen_period);
				469	Token::Value ScanIdentifier();
				470	uc32 ScanHexEscape(uc32 c, int length);
				471	uc32 ScanOctalEscape(uc32 c, int length);
				472	void ScanEscape();
				473	Token::Value ScanString();
				474
				475	// Scans a possible HTML comment -- begins with '<!'.
				476	Token::Value ScanHtmlComment();
				477
				478	// Return the current source position.
				479	int source_pos() {
Steve Block	6ded16b	2010-05-10 14:33:55 +0100	[diff] [blame]	480	return source_->pos() - kCharacterLookaheadBufferSize;
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	481	}
				482
				483	// Decodes a unicode escape-sequence which is part of an identifier.
				484	// If the escape sequence cannot be decoded the result is kBadRune.
				485	uc32 ScanIdentifierUnicodeEscape();
Kristian Monsen	9dcf7e2	2010-06-28 14:14:28 +0100	[diff] [blame]	486
				487	TokenDesc current_; // desc for current token (as returned by Next())
				488	TokenDesc next_; // desc for next token (one token look-ahead)
				489	bool has_line_terminator_before_next_;
Kristian Monsen	9dcf7e2	2010-06-28 14:14:28 +0100	[diff] [blame]	490	bool is_parsing_json_;
				491
				492	// Different UTF16 buffers used to pull characters from. Based on input one of
				493	// these will be initialized as the actual data source.
				494	CharacterStreamUTF16Buffer char_stream_buffer_;
				495	ExternalStringUTF16Buffer<ExternalTwoByteString, uint16_t>
				496	two_byte_string_buffer_;
				497	ExternalStringUTF16Buffer<ExternalAsciiString, char> ascii_string_buffer_;
				498
				499	// Source. Will point to one of the buffers declared above.
				500	UTF16Buffer* source_;
				501
				502	// Used to convert the source string into a character stream when a stream
				503	// is not passed to the scanner.
				504	SafeStringInputBuffer safe_string_input_buffer_;
				505
				506	// Buffer to hold literal values (identifiers, strings, numbers)
Kristian Monsen	80d68ea	2010-09-08 11:05:35 +0100	[diff] [blame]	507	// using '\x00'-terminated UTF-8 encoding. Handles allocation internally.
				508	UTF8Buffer literal_buffer_;
Kristian Monsen	9dcf7e2	2010-06-28 14:14:28 +0100	[diff] [blame]	509
				510	bool stack_overflow_;
				511	static StaticResource<Utf8Decoder> utf8_decoder_;
				512
				513	// One Unicode character look-ahead; c0_ < 0 at the end of the input.
				514	uc32 c0_;
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	515	};
				516
				517	} } // namespace v8::internal
				518
				519	#endif // V8_SCANNER_H_