Blame - re2/regexp.h - fp2-dev/platform/external/regex-re2

blob: 331c017673c42574fcfff9f73ef181fe3ff0cadf [file] [log] [blame]

Ian Hodson	2ee91b4	2012-05-14 12:29:36 +0100	[diff] [blame]	1	// Copyright 2006 The RE2 Authors. All Rights Reserved.
				2	// Use of this source code is governed by a BSD-style
				3	// license that can be found in the LICENSE file.
				4
				5	// --- SPONSORED LINK --------------------------------------------------
				6	// If you want to use this library for regular expression matching,
				7	// you should use re2/re2.h, which provides a class RE2 that
				8	// mimics the PCRE interface provided by PCRE's C++ wrappers.
				9	// This header describes the low-level interface used to implement RE2
				10	// and may change in backwards-incompatible ways from time to time.
				11	// In contrast, RE2's interface will not.
				12	// ---------------------------------------------------------------------
				13
				14	// Regular expression library: parsing, execution, and manipulation
				15	// of regular expressions.
				16	//
				17	// Any operation that traverses the Regexp structures should be written
				18	// using Regexp::Walker (see walker-inl.h), not recursively, because deeply nested
				19	// regular expressions such as x++++++++++++++++++++... might cause recursive
				20	// traversals to overflow the stack.
				21	//
				22	// It is the caller's responsibility to provide appropriate mutual exclusion
				23	// around manipulation of the regexps. RE2 does this.
				24	//
				25	// PARSING
				26	//
				27	// Regexp::Parse parses regular expressions encoded in UTF-8.
				28	// The default syntax is POSIX extended regular expressions,
				29	// with the following changes:
				30	//
				31	// 1. Backreferences (optional in POSIX EREs) are not supported.
				32	// (Supporting them precludes the use of DFA-based
				33	// matching engines.)
				34	//
				35	// 2. Collating elements and collation classes are not supported.
				36	// (No one has needed or wanted them.)
				37	//
				38	// The exact syntax accepted can be modified by passing flags to
				39	// Regexp::Parse. In particular, many of the basic Perl additions
				40	// are available. The flags are documented below (search for LikePerl).
				41	//
				42	// If parsed with the flag Regexp::Latin1, both the regular expression
				43	// and the input to the matching routines are assumed to be encoded in
				44	// Latin-1, not UTF-8.
				45	//
				46	// EXECUTION
				47	//
				48	// Once Regexp has parsed a regular expression, it provides methods
				49	// to search text using that regular expression. These methods are
				50	// implemented via calling out to other regular expression libraries.
				51	// (Let's call them the sublibraries.)
				52	//
				53	// To call a sublibrary, Regexp does not simply prepare a
				54	// string version of the regular expression and hand it to the
				55	// sublibrary. Instead, Regexp prepares, from its own parsed form, the
				56	// corresponding internal representation used by the sublibrary.
				57	// This has the drawback of needing to know the internal representation
				58	// used by the sublibrary, but it has two important benefits:
				59	//
				60	// 1. The syntax and meaning of regular expressions is guaranteed
				61	// to be that used by Regexp's parser, not the syntax expected
				62	// by the sublibrary. Regexp might accept a restricted or
				63	// expanded syntax for regular expressions as compared with
				64	// the sublibrary. As long as Regexp can translate from its
				65	// internal form into the sublibrary's, clients need not know
				66	// exactly which sublibrary they are using.
				67	//
				68	// 2. The sublibrary parsers are bypassed. For whatever reason,
				69	// sublibrary regular expression parsers often have security
				70	// problems. For example, plan9grep's regular expression parser
				71	// has a buffer overflow in its handling of large character
				72	// classes, and PCRE's parser has had buffer overflow problems
				73	// in the past. Security-team requires sandboxing of sublibrary
				74	// regular expression parsers. Avoiding the sublibrary parsers
				75	// avoids the sandbox.
				76	//
				77	// The execution methods we use now are provided by the compiled form,
				78	// Prog, described in prog.h
				79	//
				80	// MANIPULATION
				81	//
				82	// Unlike other regular expression libraries, Regexp makes its parsed
				83	// form accessible to clients, so that client code can analyze the
				84	// parsed regular expressions.
				85
				86	#ifndef RE2_REGEXP_H__
				87	#define RE2_REGEXP_H__
				88
				89	#include "util/util.h"
				90	#include "re2/stringpiece.h"
				91
				92	namespace re2 {
				93
				94	// Keep in sync with string list kOpcodeNames[] in testing/dump.cc
				95	enum RegexpOp {
				96	// Matches no strings.
				97	kRegexpNoMatch = 1,
				98
				99	// Matches empty string.
				100	kRegexpEmptyMatch,
				101
				102	// Matches rune_.
				103	kRegexpLiteral,
				104
				105	// Matches runes_.
				106	kRegexpLiteralString,
				107
				108	// Matches concatenation of sub_[0..nsub-1].
				109	kRegexpConcat,
				110	// Matches union of sub_[0..nsub-1].
				111	kRegexpAlternate,
				112
				113	// Matches sub_[0] zero or more times.
				114	kRegexpStar,
				115	// Matches sub_[0] one or more times.
				116	kRegexpPlus,
				117	// Matches sub_[0] zero or one times.
				118	kRegexpQuest,
				119
				120	// Matches sub_[0] at least min_ times, at most max_ times.
				121	// max_ == -1 means no upper limit.
				122	kRegexpRepeat,
				123
				124	// Parenthesized (capturing) subexpression. Index is cap_.
				125	// Optionally, capturing name is name_.
				126	kRegexpCapture,
				127
				128	// Matches any character.
				129	kRegexpAnyChar,
				130
				131	// Matches any byte [sic].
				132	kRegexpAnyByte,
				133
				134	// Matches empty string at beginning of line.
				135	kRegexpBeginLine,
				136	// Matches empty string at end of line.
				137	kRegexpEndLine,
				138
				139	// Matches word boundary "\b".
				140	kRegexpWordBoundary,
				141	// Matches not-a-word boundary "\B".
				142	kRegexpNoWordBoundary,
				143
				144	// Matches empty string at beginning of text.
				145	kRegexpBeginText,
				146	// Matches empty string at end of text.
				147	kRegexpEndText,
				148
				149	// Matches character class given by cc_.
				150	kRegexpCharClass,
				151
				152	// Forces match of entire expression right now,
				153	// with match ID match_id_ (used by RE2::Set).
				154	kRegexpHaveMatch,
				155
				156	kMaxRegexpOp = kRegexpHaveMatch,
				157	};
				158
				159	// Keep in sync with string list in regexp.cc
				160	enum RegexpStatusCode {
				161	// No error
				162	kRegexpSuccess = 0,
				163
				164	// Unexpected error
				165	kRegexpInternalError,
				166
				167	// Parse errors
				168	kRegexpBadEscape, // bad escape sequence
				169	kRegexpBadCharClass, // bad character class
				170	kRegexpBadCharRange, // bad character class range
				171	kRegexpMissingBracket, // missing closing ]
				172	kRegexpMissingParen, // missing closing )
				173	kRegexpTrailingBackslash, // at end of regexp
				174	kRegexpRepeatArgument, // repeat argument missing, e.g. "*"
				175	kRegexpRepeatSize, // bad repetition argument
				176	kRegexpRepeatOp, // bad repetition operator
				177	kRegexpBadPerlOp, // bad perl operator
				178	kRegexpBadUTF8, // invalid UTF-8 in regexp
				179	kRegexpBadNamedCapture, // bad named capture
				180	};
				181
				182	// Error status for certain operations.
				183	class RegexpStatus {
				184	public:
				185	RegexpStatus() : code_(kRegexpSuccess), tmp_(NULL) {}
				186	~RegexpStatus() { delete tmp_; }
				187
				188	void set_code(enum RegexpStatusCode code) { code_ = code; }
				189	void set_error_arg(const StringPiece& error_arg) { error_arg_ = error_arg; }
				190	void set_tmp(string* tmp) { delete tmp_; tmp_ = tmp; }
				191	enum RegexpStatusCode code() const { return code_; }
				192	const StringPiece& error_arg() const { return error_arg_; }
				193	bool ok() const { return code() == kRegexpSuccess; }
				194
				195	// Copies state from status.
				196	void Copy(const RegexpStatus& status);
				197
				198	// Returns text equivalent of code, e.g.:
				199	// "Bad character class"
Alexander Gutkin	0d4c523	2013-02-28 13:47:27 +0000	[diff] [blame]	200	static string CodeText(enum RegexpStatusCode code);
Ian Hodson	2ee91b4	2012-05-14 12:29:36 +0100	[diff] [blame]	201
				202	// Returns text describing error, e.g.:
				203	// "Bad character class: [z-a]"
				204	string Text() const;
				205
				206	private:
				207	enum RegexpStatusCode code_; // Kind of error
				208	StringPiece error_arg_; // Piece of regexp containing syntax error.
				209	string* tmp_; // Temporary storage, possibly where error_arg_ is.
				210
				211	DISALLOW_EVIL_CONSTRUCTORS(RegexpStatus);
				212	};
				213
				214	// Walker to implement Simplify.
				215	class SimplifyWalker;
				216
				217	// Compiled form; see prog.h
				218	class Prog;
				219
				220	struct RuneRange {
				221	RuneRange() : lo(0), hi(0) { }
				222	RuneRange(int l, int h) : lo(l), hi(h) { }
				223	Rune lo;
				224	Rune hi;
				225	};
				226
				227	// Less-than on RuneRanges treats a == b if they overlap at all.
				228	// This lets us look in a set to find the range covering a particular Rune.
				229	struct RuneRangeLess {
				230	bool operator()(const RuneRange& a, const RuneRange& b) const {
				231	return a.hi < b.lo;
				232	}
				233	};
				234
				235	class CharClassBuilder;
				236
				237	class CharClass {
				238	public:
				239	void Delete();
				240
				241	typedef RuneRange* iterator;
				242	iterator begin() { return ranges_; }
				243	iterator end() { return ranges_ + nranges_; }
				244
				245	int size() { return nrunes_; }
				246	bool empty() { return nrunes_ == 0; }
				247	bool full() { return nrunes_ == Runemax+1; }
				248	bool FoldsASCII() { return folds_ascii_; }
				249
				250	bool Contains(Rune r);
				251	CharClass* Negate();
				252
				253	private:
				254	CharClass(); // not implemented
				255	~CharClass(); // not implemented
				256	static CharClass* New(int maxranges);
				257
				258	friend class CharClassBuilder;
				259
				260	bool folds_ascii_;
				261	int nrunes_;
				262	RuneRange *ranges_;
				263	int nranges_;
				264	DISALLOW_EVIL_CONSTRUCTORS(CharClass);
				265	};
				266
				267	class Regexp {
				268	public:
				269
				270	// Flags for parsing. Can be ORed together.
				271	enum ParseFlags {
				272	NoParseFlags = 0,
				273	FoldCase = 1<<0, // Fold case during matching (case-insensitive).
				274	Literal = 1<<1, // Treat s as literal string instead of a regexp.
				275	ClassNL = 1<<2, // Allow char classes like [^a-z] and \D and \s
				276	// and [[:space:]] to match newline.
				277	DotNL = 1<<3, // Allow . to match newline.
				278	MatchNL = ClassNL \| DotNL,
				279	OneLine = 1<<4, // Treat ^ and $ as only matching at beginning and
				280	// end of text, not around embedded newlines.
				281	// (Perl's default)
				282	Latin1 = 1<<5, // Regexp and text are in Latin1, not UTF-8.
				283	NonGreedy = 1<<6, // Repetition operators are non-greedy by default.
				284	PerlClasses = 1<<7, // Allow Perl character classes like \d.
				285	PerlB = 1<<8, // Allow Perl's \b and \B.
				286	PerlX = 1<<9, // Perl extensions:
				287	// non-capturing parens - (?: )
				288	// non-greedy operators - *? +? ?? {}?
				289	// flag edits - (?i) (?-i) (?i: )
				290	// i - FoldCase
				291	// m - !OneLine
				292	// s - DotNL
				293	// U - NonGreedy
				294	// line ends: \A \z
				295	// \Q and \E to disable/enable metacharacters
				296	// (?P<name>expr) for named captures
				297	// \C to match any single byte
				298	UnicodeGroups = 1<<10, // Allow \p{Han} for Unicode Han group
				299	// and \P{Han} for its negation.
				300	NeverNL = 1<<11, // Never match NL, even if the regexp mentions
				301	// it explicitly.
Alexander Gutkin	0d4c523	2013-02-28 13:47:27 +0000	[diff] [blame]	302	NeverCapture = 1<<12, // Parse all parens as non-capturing.
Ian Hodson	2ee91b4	2012-05-14 12:29:36 +0100	[diff] [blame]	303
				304	// As close to Perl as we can get.
				305	LikePerl = ClassNL \| OneLine \| PerlClasses \| PerlB \| PerlX \|
				306	UnicodeGroups,
				307
				308	// Internal use only.
				309	WasDollar = 1<<15, // on kRegexpEndText: was $ in regexp text
				310	};
				311
				312	// Get. No set, Regexps are logically immutable once created.
				313	RegexpOp op() { return static_cast<RegexpOp>(op_); }
				314	int nsub() { return nsub_; }
				315	bool simple() { return simple_; }
				316	enum ParseFlags parse_flags() { return static_cast<ParseFlags>(parse_flags_); }
				317	int Ref(); // For testing.
				318
				319	Regexp** sub() {
				320	if(nsub_ <= 1)
				321	return &subone_;
				322	else
				323	return submany_;
				324	}
				325
				326	int min() { DCHECK_EQ(op_, kRegexpRepeat); return min_; }
				327	int max() { DCHECK_EQ(op_, kRegexpRepeat); return max_; }
				328	Rune rune() { DCHECK_EQ(op_, kRegexpLiteral); return rune_; }
				329	CharClass* cc() { DCHECK_EQ(op_, kRegexpCharClass); return cc_; }
				330	int cap() { DCHECK_EQ(op_, kRegexpCapture); return cap_; }
				331	const string* name() { DCHECK_EQ(op_, kRegexpCapture); return name_; }
				332	Rune* runes() { DCHECK_EQ(op_, kRegexpLiteralString); return runes_; }
				333	int nrunes() { DCHECK_EQ(op_, kRegexpLiteralString); return nrunes_; }
				334	int match_id() { DCHECK_EQ(op_, kRegexpHaveMatch); return match_id_; }
				335
				336	// Increments reference count, returns object as convenience.
				337	Regexp* Incref();
				338
				339	// Decrements reference count and deletes this object if count reaches 0.
				340	void Decref();
				341
				342	// Parses string s to produce regular expression, returned.
				343	// Caller must release return value with re->Decref().
				344	// On failure, sets *status (if status != NULL) and returns NULL.
				345	static Regexp* Parse(const StringPiece& s, ParseFlags flags,
				346	RegexpStatus* status);
				347
				348	// Returns a _new_ simplified version of the current regexp.
				349	// Does not edit the current regexp.
				350	// Caller must release return value with re->Decref().
				351	// Simplified means that counted repetition has been rewritten
				352	// into simpler terms and all Perl/POSIX features have been
				353	// removed. The result will capture exactly the same
				354	// subexpressions the original did, unless formatted with ToString.
				355	Regexp* Simplify();
				356	friend class SimplifyWalker;
				357
				358	// Parses the regexp src and then simplifies it and sets *dst to the
				359	// string representation of the simplified form. Returns true on success.
				360	// Returns false and sets *status (if status != NULL) on parse error.
				361	static bool SimplifyRegexp(const StringPiece& src, ParseFlags flags,
				362	string* dst,
				363	RegexpStatus* status);
				364
				365	// Returns the number of capturing groups in the regexp.
				366	int NumCaptures();
				367	friend class NumCapturesWalker;
				368
				369	// Returns a map from names to capturing group indices,
				370	// or NULL if the regexp contains no named capture groups.
				371	// The caller is responsible for deleting the map.
				372	map<string, int>* NamedCaptures();
				373
				374	// Returns a map from capturing group indices to capturing group
				375	// names or NULL if the regexp contains no named capture groups. The
				376	// caller is responsible for deleting the map.
				377	map<int, string>* CaptureNames();
				378
				379	// Returns a string representation of the current regexp,
				380	// using as few parentheses as possible.
				381	string ToString();
				382
				383	// Convenience functions. They consume the passed reference,
				384	// so in many cases you should use, e.g., Plus(re->Incref(), flags).
				385	// They do not consume allocated arrays like subs or runes.
				386	static Regexp* Plus(Regexp* sub, ParseFlags flags);
				387	static Regexp* Star(Regexp* sub, ParseFlags flags);
				388	static Regexp* Quest(Regexp* sub, ParseFlags flags);
				389	static Regexp* Concat(Regexp** subs, int nsubs, ParseFlags flags);
				390	static Regexp* Alternate(Regexp** subs, int nsubs, ParseFlags flags);
				391	static Regexp* Capture(Regexp* sub, ParseFlags flags, int cap);
				392	static Regexp* Repeat(Regexp* sub, ParseFlags flags, int min, int max);
				393	static Regexp* NewLiteral(Rune rune, ParseFlags flags);
				394	static Regexp* NewCharClass(CharClass* cc, ParseFlags flags);
				395	static Regexp* LiteralString(Rune* runes, int nrunes, ParseFlags flags);
				396	static Regexp* HaveMatch(int match_id, ParseFlags flags);
				397
				398	// Like Alternate but does not factor out common prefixes.
				399	static Regexp* AlternateNoFactor(Regexp** subs, int nsubs, ParseFlags flags);
				400
				401	// Debugging function. Returns string format for regexp
				402	// that makes structure clear. Does NOT use regexp syntax.
				403	string Dump();
				404
				405	// Helper traversal class, defined fully in walker-inl.h.
				406	template<typename T> class Walker;
				407
				408	// Compile to Prog. See prog.h
				409	// Reverse prog expects to be run over text backward.
				410	// Construction and execution of prog will
				411	// stay within approximately max_mem bytes of memory.
				412	// If max_mem <= 0, a reasonable default is used.
				413	Prog* CompileToProg(int64 max_mem);
				414	Prog* CompileToReverseProg(int64 max_mem);
				415
				416	// Whether to expect this library to find exactly the same answer as PCRE
				417	// when running this regexp. Most regexps do mimic PCRE exactly, but a few
				418	// obscure cases behave differently. Technically this is more a property
				419	// of the Prog than the Regexp, but the computation is much easier to do
				420	// on the Regexp. See mimics_pcre.cc for the exact conditions.
				421	bool MimicsPCRE();
				422
				423	// Benchmarking function.
				424	void NullWalk();
				425
				426	// Whether every match of this regexp must be anchored and
				427	// begin with a non-empty fixed string (perhaps after ASCII
				428	// case-folding). If so, returns the prefix and the sub-regexp that
				429	// follows it.
				430	bool RequiredPrefix(string* prefix, bool foldcase, Regexp* suffix);
				431
				432	private:
				433	// Constructor allocates vectors as appropriate for operator.
				434	explicit Regexp(RegexpOp op, ParseFlags parse_flags);
				435
				436	// Use Decref() instead of delete to release Regexps.
				437	// This is private to catch deletes at compile time.
				438	~Regexp();
				439	void Destroy();
				440	bool QuickDestroy();
				441
				442	// Helpers for Parse. Listed here so they can edit Regexps.
				443	class ParseState;
				444	friend class ParseState;
				445	friend bool ParseCharClass(StringPiece* s, Regexp** out_re,
				446	RegexpStatus* status);
				447
				448	// Helper for testing [sic].
				449	friend bool RegexpEqualTestingOnly(Regexp, Regexp);
				450
				451	// Computes whether Regexp is already simple.
				452	bool ComputeSimple();
				453
				454	// Constructor that generates a concatenation or alternation,
				455	// enforcing the limit on the number of subexpressions for
				456	// a particular Regexp.
				457	static Regexp* ConcatOrAlternate(RegexpOp op, Regexp** subs, int nsubs,
				458	ParseFlags flags, bool can_factor);
				459
				460	// Returns the leading string that re starts with.
				461	// The returned Rune* points into a piece of re,
				462	// so it must not be used after the caller calls re->Decref().
				463	static Rune* LeadingString(Regexp* re, int* nrune, ParseFlags* flags);
				464
				465	// Removes the first n leading runes from the beginning of re.
				466	// Edits re in place.
				467	static void RemoveLeadingString(Regexp* re, int n);
				468
				469	// Returns the leading regexp in re's top-level concatenation.
				470	// The returned Regexp* points at re or a sub-expression of re,
				471	// so it must not be used after the caller calls re->Decref().
				472	static Regexp* LeadingRegexp(Regexp* re);
				473
				474	// Removes LeadingRegexp(re) from re and returns the remainder.
				475	// Might edit re in place.
				476	static Regexp* RemoveLeadingRegexp(Regexp* re);
				477
				478	// Simplifies an alternation of literal strings by factoring out
				479	// common prefixes.
				480	static int FactorAlternation(Regexp** sub, int nsub, ParseFlags flags);
				481	static int FactorAlternationRecursive(Regexp** sub, int nsub,
				482	ParseFlags flags, int maxdepth);
				483
				484	// Is a == b? Only efficient on regexps that have not been through
				485	// Simplify yet - the expansion of a kRegexpRepeat will make this
				486	// take a long time. Do not call on such regexps, hence private.
				487	static bool Equal(Regexp* a, Regexp* b);
				488
				489	// Allocate space for n sub-regexps.
				490	void AllocSub(int n) {
				491	if (n < 0 \|\| static_cast<uint16>(n) != n)
				492	LOG(FATAL) << "Cannot AllocSub " << n;
				493	if (n > 1)
				494	submany_ = new Regexp*[n];
				495	nsub_ = n;
				496	}
				497
				498	// Add Rune to LiteralString
				499	void AddRuneToString(Rune r);
				500
				501	// Swaps this with that, in place.
				502	void Swap(Regexp *that);
				503
				504	// Operator. See description of operators above.
				505	// uint8 instead of RegexpOp to control space usage.
				506	uint8 op_;
				507
				508	// Is this regexp structure already simple
				509	// (has it been returned by Simplify)?
				510	// uint8 instead of bool to control space usage.
				511	uint8 simple_;
				512
				513	// Flags saved from parsing and used during execution.
				514	// (Only FoldCase is used.)
				515	// uint16 instead of ParseFlags to control space usage.
				516	uint16 parse_flags_;
				517
				518	// Reference count. Exists so that SimplifyRegexp can build
				519	// regexp structures that are dags rather than trees to avoid
				520	// exponential blowup in space requirements.
				521	// uint16 to control space usage.
				522	// The standard regexp routines will never generate a
				523	// ref greater than the maximum repeat count (100),
				524	// but even so, Incref and Decref consult an overflow map
				525	// when ref_ reaches kMaxRef.
				526	uint16 ref_;
				527	static const uint16 kMaxRef = 0xffff;
				528
				529	// Subexpressions.
				530	// uint16 to control space usage.
				531	// Concat and Alternate handle larger numbers of subexpressions
				532	// by building concatenation or alternation trees.
				533	// Other routines should call Concat or Alternate instead of
				534	// filling in sub() by hand.
				535	uint16 nsub_;
				536	static const uint16 kMaxNsub = 0xffff;
				537	union {
				538	Regexp** submany_; // if nsub_ > 1
				539	Regexp* subone_; // if nsub_ == 1
				540	};
				541
				542	// Extra space for parse and teardown stacks.
				543	Regexp* down_;
				544
				545	// Arguments to operator. See description of operators above.
				546	union {
				547	struct { // Repeat
				548	int max_;
				549	int min_;
				550	};
				551	struct { // Capture
				552	int cap_;
				553	string* name_;
				554	};
				555	struct { // LiteralString
				556	int nrunes_;
				557	Rune* runes_;
				558	};
				559	struct { // CharClass
				560	// These two could be in separate union members,
				561	// but it wouldn't save any space (there are other two-word structs)
				562	// and keeping them separate avoids confusion during parsing.
				563	CharClass* cc_;
				564	CharClassBuilder* ccb_;
				565	};
				566	Rune rune_; // Literal
				567	int match_id_; // HaveMatch
				568	void *the_union_[2]; // as big as any other element, for memset
				569	};
				570
				571	DISALLOW_EVIL_CONSTRUCTORS(Regexp);
				572	};
				573
				574	// Character class set: contains non-overlapping, non-abutting RuneRanges.
				575	typedef set<RuneRange, RuneRangeLess> RuneRangeSet;
				576
				577	class CharClassBuilder {
				578	public:
				579	CharClassBuilder();
				580
				581	typedef RuneRangeSet::iterator iterator;
				582	iterator begin() { return ranges_.begin(); }
				583	iterator end() { return ranges_.end(); }
				584
				585	int size() { return nrunes_; }
				586	bool empty() { return nrunes_ == 0; }
				587	bool full() { return nrunes_ == Runemax+1; }
				588
				589	bool Contains(Rune r);
				590	bool FoldsASCII();
				591	bool AddRange(Rune lo, Rune hi); // returns whether class changed
				592	CharClassBuilder* Copy();
				593	void AddCharClass(CharClassBuilder* cc);
				594	void Negate();
				595	void RemoveAbove(Rune r);
				596	CharClass* GetCharClass();
				597	void AddRangeFlags(Rune lo, Rune hi, Regexp::ParseFlags parse_flags);
				598
				599	private:
				600	static const uint32 AlphaMask = (1<<26) - 1;
				601	uint32 upper_; // bitmap of A-Z
				602	uint32 lower_; // bitmap of a-z
				603	int nrunes_;
				604	RuneRangeSet ranges_;
				605	DISALLOW_EVIL_CONSTRUCTORS(CharClassBuilder);
				606	};
				607
				608	// Tell g++ that bitwise ops on ParseFlags produce ParseFlags.
				609	inline Regexp::ParseFlags operator\|(Regexp::ParseFlags a, Regexp::ParseFlags b)
				610	{
				611	return static_cast<Regexp::ParseFlags>(static_cast<int>(a) \| static_cast<int>(b));
				612	}
				613
				614	inline Regexp::ParseFlags operator^(Regexp::ParseFlags a, Regexp::ParseFlags b)
				615	{
				616	return static_cast<Regexp::ParseFlags>(static_cast<int>(a) ^ static_cast<int>(b));
				617	}
				618
				619	inline Regexp::ParseFlags operator&(Regexp::ParseFlags a, Regexp::ParseFlags b)
				620	{
				621	return static_cast<Regexp::ParseFlags>(static_cast<int>(a) & static_cast<int>(b));
				622	}
				623
				624	inline Regexp::ParseFlags operator~(Regexp::ParseFlags a)
				625	{
				626	return static_cast<Regexp::ParseFlags>(~static_cast<int>(a));
				627	}
				628
				629
				630
				631	} // namespace re2
				632
				633	#endif // RE2_REGEXP_H__