Import 'regex-syntax' package vesion 0.6.17 * Add OWNERS Bug: 152884384 Test: make Change-Id: I760a69ffc851039ceaff58980deece308a426aed

commit: 048fc04e7fd0dd2014a94e73fb34f6e59021c170 [log] [tgz]
author: Chih-Hung Hsieh <chh@google.com> Thu Apr 16 10:44:22 2020 -0700
committer: Chih-Hung Hsieh <chh@google.com> Thu Apr 16 10:46:29 2020 -0700
tree: f9b501b392f13f2ca9d86d0a9d377b181f624dfb
parent: 2dd07da5420c40e365646bf756938d8d1abf6d62 [diff] [blame]
diff --git a/src/parser.rs b/src/parser.rs
new file mode 100644
index 0000000..00f1391
--- /dev/null
+++ b/src/parser.rs

@@ -0,0 +1,200 @@
+use ast;
+use hir;
+
+use Result;
+
+/// A builder for a regular expression parser.
+///
+/// This builder permits modifying configuration options for the parser.
+///
+/// This type combines the builder options for both the
+/// [AST `ParserBuilder`](ast/parse/struct.ParserBuilder.html)
+/// and the
+/// [HIR `TranslatorBuilder`](hir/translate/struct.TranslatorBuilder.html).
+#[derive(Clone, Debug, Default)]
+pub struct ParserBuilder {
+    ast: ast::parse::ParserBuilder,
+    hir: hir::translate::TranslatorBuilder,
+}
+
+impl ParserBuilder {
+    /// Create a new parser builder with a default configuration.
+    pub fn new() -> ParserBuilder {
+        ParserBuilder::default()
+    }
+
+    /// Build a parser from this configuration with the given pattern.
+    pub fn build(&self) -> Parser {
+        Parser { ast: self.ast.build(), hir: self.hir.build() }
+    }
+
+    /// Set the nesting limit for this parser.
+    ///
+    /// The nesting limit controls how deep the abstract syntax tree is allowed
+    /// to be. If the AST exceeds the given limit (e.g., with too many nested
+    /// groups), then an error is returned by the parser.
+    ///
+    /// The purpose of this limit is to act as a heuristic to prevent stack
+    /// overflow for consumers that do structural induction on an `Ast` using
+    /// explicit recursion. While this crate never does this (instead using
+    /// constant stack space and moving the call stack to the heap), other
+    /// crates may.
+    ///
+    /// This limit is not checked until the entire Ast is parsed. Therefore,
+    /// if callers want to put a limit on the amount of heap space used, then
+    /// they should impose a limit on the length, in bytes, of the concrete
+    /// pattern string. In particular, this is viable since this parser
+    /// implementation will limit itself to heap space proportional to the
+    /// lenth of the pattern string.
+    ///
+    /// Note that a nest limit of `0` will return a nest limit error for most
+    /// patterns but not all. For example, a nest limit of `0` permits `a` but
+    /// not `ab`, since `ab` requires a concatenation, which results in a nest
+    /// depth of `1`. In general, a nest limit is not something that manifests
+    /// in an obvious way in the concrete syntax, therefore, it should not be
+    /// used in a granular way.
+    pub fn nest_limit(&mut self, limit: u32) -> &mut ParserBuilder {
+        self.ast.nest_limit(limit);
+        self
+    }
+
+    /// Whether to support octal syntax or not.
+    ///
+    /// Octal syntax is a little-known way of uttering Unicode codepoints in
+    /// a regular expression. For example, `a`, `\x61`, `\u0061` and
+    /// `\141` are all equivalent regular expressions, where the last example
+    /// shows octal syntax.
+    ///
+    /// While supporting octal syntax isn't in and of itself a problem, it does
+    /// make good error messages harder. That is, in PCRE based regex engines,
+    /// syntax like `\0` invokes a backreference, which is explicitly
+    /// unsupported in Rust's regex engine. However, many users expect it to
+    /// be supported. Therefore, when octal support is disabled, the error
+    /// message will explicitly mention that backreferences aren't supported.
+    ///
+    /// Octal syntax is disabled by default.
+    pub fn octal(&mut self, yes: bool) -> &mut ParserBuilder {
+        self.ast.octal(yes);
+        self
+    }
+
+    /// When enabled, the parser will permit the construction of a regular
+    /// expression that may match invalid UTF-8.
+    ///
+    /// When disabled (the default), the parser is guaranteed to produce
+    /// an expression that will only ever match valid UTF-8 (otherwise, the
+    /// parser will return an error).
+    ///
+    /// Perhaps surprisingly, when invalid UTF-8 isn't allowed, a negated ASCII
+    /// word boundary (uttered as `(?-u:\B)` in the concrete syntax) will cause
+    /// the parser to return an error. Namely, a negated ASCII word boundary
+    /// can result in matching positions that aren't valid UTF-8 boundaries.
+    pub fn allow_invalid_utf8(&mut self, yes: bool) -> &mut ParserBuilder {
+        self.hir.allow_invalid_utf8(yes);
+        self
+    }
+
+    /// Enable verbose mode in the regular expression.
+    ///
+    /// When enabled, verbose mode permits insigificant whitespace in many
+    /// places in the regular expression, as well as comments. Comments are
+    /// started using `#` and continue until the end of the line.
+    ///
+    /// By default, this is disabled. It may be selectively enabled in the
+    /// regular expression by using the `x` flag regardless of this setting.
+    pub fn ignore_whitespace(&mut self, yes: bool) -> &mut ParserBuilder {
+        self.ast.ignore_whitespace(yes);
+        self
+    }
+
+    /// Enable or disable the case insensitive flag by default.
+    ///
+    /// By default this is disabled. It may alternatively be selectively
+    /// enabled in the regular expression itself via the `i` flag.
+    pub fn case_insensitive(&mut self, yes: bool) -> &mut ParserBuilder {
+        self.hir.case_insensitive(yes);
+        self
+    }
+
+    /// Enable or disable the multi-line matching flag by default.
+    ///
+    /// By default this is disabled. It may alternatively be selectively
+    /// enabled in the regular expression itself via the `m` flag.
+    pub fn multi_line(&mut self, yes: bool) -> &mut ParserBuilder {
+        self.hir.multi_line(yes);
+        self
+    }
+
+    /// Enable or disable the "dot matches any character" flag by default.
+    ///
+    /// By default this is disabled. It may alternatively be selectively
+    /// enabled in the regular expression itself via the `s` flag.
+    pub fn dot_matches_new_line(&mut self, yes: bool) -> &mut ParserBuilder {
+        self.hir.dot_matches_new_line(yes);
+        self
+    }
+
+    /// Enable or disable the "swap greed" flag by default.
+    ///
+    /// By default this is disabled. It may alternatively be selectively
+    /// enabled in the regular expression itself via the `U` flag.
+    pub fn swap_greed(&mut self, yes: bool) -> &mut ParserBuilder {
+        self.hir.swap_greed(yes);
+        self
+    }
+
+    /// Enable or disable the Unicode flag (`u`) by default.
+    ///
+    /// By default this is **enabled**. It may alternatively be selectively
+    /// disabled in the regular expression itself via the `u` flag.
+    ///
+    /// Note that unless `allow_invalid_utf8` is enabled (it's disabled by
+    /// default), a regular expression will fail to parse if Unicode mode is
+    /// disabled and a sub-expression could possibly match invalid UTF-8.
+    pub fn unicode(&mut self, yes: bool) -> &mut ParserBuilder {
+        self.hir.unicode(yes);
+        self
+    }
+}
+
+/// A convenience parser for regular expressions.
+///
+/// This parser takes as input a regular expression pattern string (the
+/// "concrete syntax") and returns a high-level intermediate representation
+/// (the HIR) suitable for most types of analysis. In particular, this parser
+/// hides the intermediate state of producing an AST (the "abstract syntax").
+/// The AST is itself far more complex than the HIR, so this parser serves as a
+/// convenience for never having to deal with it at all.
+///
+/// If callers have more fine grained use cases that need an AST, then please
+/// see the [`ast::parse`](ast/parse/index.html) module.
+///
+/// A `Parser` can be configured in more detail via a
+/// [`ParserBuilder`](struct.ParserBuilder.html).
+#[derive(Clone, Debug)]
+pub struct Parser {
+    ast: ast::parse::Parser,
+    hir: hir::translate::Translator,
+}
+
+impl Parser {
+    /// Create a new parser with a default configuration.
+    ///
+    /// The parser can be run with `parse` method. The parse method returns
+    /// a high level intermediate representation of the given regular
+    /// expression.
+    ///
+    /// To set configuration options on the parser, use
+    /// [`ParserBuilder`](struct.ParserBuilder.html).
+    pub fn new() -> Parser {
+        ParserBuilder::new().build()
+    }
+
+    /// Parse the regular expression into a high level intermediate
+    /// representation.
+    pub fn parse(&mut self, pattern: &str) -> Result<hir::Hir> {
+        let ast = self.ast.parse(pattern)?;
+        let hir = self.hir.translate(pattern, &ast)?;
+        Ok(hir)
+    }
+}
commit	048fc04e7fd0dd2014a94e73fb34f6e59021c170	[log] [tgz]
author	Chih-Hung Hsieh <chh@google.com>	Thu Apr 16 10:44:22 2020 -0700
committer	Chih-Hung Hsieh <chh@google.com>	Thu Apr 16 10:46:29 2020 -0700
tree	f9b501b392f13f2ca9d86d0a9d377b181f624dfb
parent	2dd07da5420c40e365646bf756938d8d1abf6d62 [diff] [blame]