blob: 1b9bc41306b44ae9e5a5c94a835c1a5cee4c836e [file] [log] [blame]
Chih-Hung Hsieh048fc042020-04-16 10:44:22 -07001/*!
2This module provides a regular expression printer for `Ast`.
3*/
4
5use std::fmt;
6
7use ast::visitor::{self, Visitor};
8use ast::{self, Ast};
9
10/// A builder for constructing a printer.
11///
12/// Note that since a printer doesn't have any configuration knobs, this type
13/// remains unexported.
14#[derive(Clone, Debug)]
15struct PrinterBuilder {
16 _priv: (),
17}
18
19impl Default for PrinterBuilder {
20 fn default() -> PrinterBuilder {
21 PrinterBuilder::new()
22 }
23}
24
25impl PrinterBuilder {
26 fn new() -> PrinterBuilder {
27 PrinterBuilder { _priv: () }
28 }
29
30 fn build(&self) -> Printer {
31 Printer { _priv: () }
32 }
33}
34
35/// A printer for a regular expression abstract syntax tree.
36///
37/// A printer converts an abstract syntax tree (AST) to a regular expression
38/// pattern string. This particular printer uses constant stack space and heap
39/// space proportional to the size of the AST.
40///
41/// This printer will not necessarily preserve the original formatting of the
42/// regular expression pattern string. For example, all whitespace and comments
43/// are ignored.
44#[derive(Debug)]
45pub struct Printer {
46 _priv: (),
47}
48
49impl Printer {
50 /// Create a new printer.
51 pub fn new() -> Printer {
52 PrinterBuilder::new().build()
53 }
54
55 /// Print the given `Ast` to the given writer. The writer must implement
56 /// `fmt::Write`. Typical implementations of `fmt::Write` that can be used
57 /// here are a `fmt::Formatter` (which is available in `fmt::Display`
58 /// implementations) or a `&mut String`.
59 pub fn print<W: fmt::Write>(&mut self, ast: &Ast, wtr: W) -> fmt::Result {
60 visitor::visit(ast, Writer { printer: self, wtr: wtr })
61 }
62}
63
64#[derive(Debug)]
65struct Writer<'p, W> {
66 printer: &'p mut Printer,
67 wtr: W,
68}
69
70impl<'p, W: fmt::Write> Visitor for Writer<'p, W> {
71 type Output = ();
72 type Err = fmt::Error;
73
74 fn finish(self) -> fmt::Result {
75 Ok(())
76 }
77
78 fn visit_pre(&mut self, ast: &Ast) -> fmt::Result {
79 match *ast {
80 Ast::Group(ref x) => self.fmt_group_pre(x),
81 Ast::Class(ast::Class::Bracketed(ref x)) => {
82 self.fmt_class_bracketed_pre(x)
83 }
84 _ => Ok(()),
85 }
86 }
87
88 fn visit_post(&mut self, ast: &Ast) -> fmt::Result {
89 use ast::Class;
90
91 match *ast {
92 Ast::Empty(_) => Ok(()),
93 Ast::Flags(ref x) => self.fmt_set_flags(x),
94 Ast::Literal(ref x) => self.fmt_literal(x),
95 Ast::Dot(_) => self.wtr.write_str("."),
96 Ast::Assertion(ref x) => self.fmt_assertion(x),
97 Ast::Class(Class::Perl(ref x)) => self.fmt_class_perl(x),
98 Ast::Class(Class::Unicode(ref x)) => self.fmt_class_unicode(x),
99 Ast::Class(Class::Bracketed(ref x)) => {
100 self.fmt_class_bracketed_post(x)
101 }
102 Ast::Repetition(ref x) => self.fmt_repetition(x),
103 Ast::Group(ref x) => self.fmt_group_post(x),
104 Ast::Alternation(_) => Ok(()),
105 Ast::Concat(_) => Ok(()),
106 }
107 }
108
109 fn visit_alternation_in(&mut self) -> fmt::Result {
110 self.wtr.write_str("|")
111 }
112
113 fn visit_class_set_item_pre(
114 &mut self,
115 ast: &ast::ClassSetItem,
116 ) -> Result<(), Self::Err> {
117 match *ast {
118 ast::ClassSetItem::Bracketed(ref x) => {
119 self.fmt_class_bracketed_pre(x)
120 }
121 _ => Ok(()),
122 }
123 }
124
125 fn visit_class_set_item_post(
126 &mut self,
127 ast: &ast::ClassSetItem,
128 ) -> Result<(), Self::Err> {
129 use ast::ClassSetItem::*;
130
131 match *ast {
132 Empty(_) => Ok(()),
133 Literal(ref x) => self.fmt_literal(x),
134 Range(ref x) => {
135 self.fmt_literal(&x.start)?;
136 self.wtr.write_str("-")?;
137 self.fmt_literal(&x.end)?;
138 Ok(())
139 }
140 Ascii(ref x) => self.fmt_class_ascii(x),
141 Unicode(ref x) => self.fmt_class_unicode(x),
142 Perl(ref x) => self.fmt_class_perl(x),
143 Bracketed(ref x) => self.fmt_class_bracketed_post(x),
144 Union(_) => Ok(()),
145 }
146 }
147
148 fn visit_class_set_binary_op_in(
149 &mut self,
150 ast: &ast::ClassSetBinaryOp,
151 ) -> Result<(), Self::Err> {
152 self.fmt_class_set_binary_op_kind(&ast.kind)
153 }
154}
155
156impl<'p, W: fmt::Write> Writer<'p, W> {
157 fn fmt_group_pre(&mut self, ast: &ast::Group) -> fmt::Result {
158 use ast::GroupKind::*;
159 match ast.kind {
160 CaptureIndex(_) => self.wtr.write_str("("),
161 CaptureName(ref x) => {
162 self.wtr.write_str("(?P<")?;
163 self.wtr.write_str(&x.name)?;
164 self.wtr.write_str(">")?;
165 Ok(())
166 }
167 NonCapturing(ref flags) => {
168 self.wtr.write_str("(?")?;
169 self.fmt_flags(flags)?;
170 self.wtr.write_str(":")?;
171 Ok(())
172 }
173 }
174 }
175
176 fn fmt_group_post(&mut self, _ast: &ast::Group) -> fmt::Result {
177 self.wtr.write_str(")")
178 }
179
180 fn fmt_repetition(&mut self, ast: &ast::Repetition) -> fmt::Result {
181 use ast::RepetitionKind::*;
182 match ast.op.kind {
183 ZeroOrOne if ast.greedy => self.wtr.write_str("?"),
184 ZeroOrOne => self.wtr.write_str("??"),
185 ZeroOrMore if ast.greedy => self.wtr.write_str("*"),
186 ZeroOrMore => self.wtr.write_str("*?"),
187 OneOrMore if ast.greedy => self.wtr.write_str("+"),
188 OneOrMore => self.wtr.write_str("+?"),
189 Range(ref x) => {
190 self.fmt_repetition_range(x)?;
191 if !ast.greedy {
192 self.wtr.write_str("?")?;
193 }
194 Ok(())
195 }
196 }
197 }
198
199 fn fmt_repetition_range(
200 &mut self,
201 ast: &ast::RepetitionRange,
202 ) -> fmt::Result {
203 use ast::RepetitionRange::*;
204 match *ast {
205 Exactly(x) => write!(self.wtr, "{{{}}}", x),
206 AtLeast(x) => write!(self.wtr, "{{{},}}", x),
207 Bounded(x, y) => write!(self.wtr, "{{{},{}}}", x, y),
208 }
209 }
210
211 fn fmt_literal(&mut self, ast: &ast::Literal) -> fmt::Result {
212 use ast::LiteralKind::*;
213
214 match ast.kind {
215 Verbatim => self.wtr.write_char(ast.c),
216 Punctuation => write!(self.wtr, r"\{}", ast.c),
217 Octal => write!(self.wtr, r"\{:o}", ast.c as u32),
218 HexFixed(ast::HexLiteralKind::X) => {
219 write!(self.wtr, r"\x{:02X}", ast.c as u32)
220 }
221 HexFixed(ast::HexLiteralKind::UnicodeShort) => {
222 write!(self.wtr, r"\u{:04X}", ast.c as u32)
223 }
224 HexFixed(ast::HexLiteralKind::UnicodeLong) => {
225 write!(self.wtr, r"\U{:08X}", ast.c as u32)
226 }
227 HexBrace(ast::HexLiteralKind::X) => {
228 write!(self.wtr, r"\x{{{:X}}}", ast.c as u32)
229 }
230 HexBrace(ast::HexLiteralKind::UnicodeShort) => {
231 write!(self.wtr, r"\u{{{:X}}}", ast.c as u32)
232 }
233 HexBrace(ast::HexLiteralKind::UnicodeLong) => {
234 write!(self.wtr, r"\U{{{:X}}}", ast.c as u32)
235 }
236 Special(ast::SpecialLiteralKind::Bell) => {
237 self.wtr.write_str(r"\a")
238 }
239 Special(ast::SpecialLiteralKind::FormFeed) => {
240 self.wtr.write_str(r"\f")
241 }
242 Special(ast::SpecialLiteralKind::Tab) => self.wtr.write_str(r"\t"),
243 Special(ast::SpecialLiteralKind::LineFeed) => {
244 self.wtr.write_str(r"\n")
245 }
246 Special(ast::SpecialLiteralKind::CarriageReturn) => {
247 self.wtr.write_str(r"\r")
248 }
249 Special(ast::SpecialLiteralKind::VerticalTab) => {
250 self.wtr.write_str(r"\v")
251 }
252 Special(ast::SpecialLiteralKind::Space) => {
253 self.wtr.write_str(r"\ ")
254 }
255 }
256 }
257
258 fn fmt_assertion(&mut self, ast: &ast::Assertion) -> fmt::Result {
259 use ast::AssertionKind::*;
260 match ast.kind {
261 StartLine => self.wtr.write_str("^"),
262 EndLine => self.wtr.write_str("$"),
263 StartText => self.wtr.write_str(r"\A"),
264 EndText => self.wtr.write_str(r"\z"),
265 WordBoundary => self.wtr.write_str(r"\b"),
266 NotWordBoundary => self.wtr.write_str(r"\B"),
267 }
268 }
269
270 fn fmt_set_flags(&mut self, ast: &ast::SetFlags) -> fmt::Result {
271 self.wtr.write_str("(?")?;
272 self.fmt_flags(&ast.flags)?;
273 self.wtr.write_str(")")?;
274 Ok(())
275 }
276
277 fn fmt_flags(&mut self, ast: &ast::Flags) -> fmt::Result {
278 use ast::{Flag, FlagsItemKind};
279
280 for item in &ast.items {
281 match item.kind {
282 FlagsItemKind::Negation => self.wtr.write_str("-"),
283 FlagsItemKind::Flag(ref flag) => match *flag {
284 Flag::CaseInsensitive => self.wtr.write_str("i"),
285 Flag::MultiLine => self.wtr.write_str("m"),
286 Flag::DotMatchesNewLine => self.wtr.write_str("s"),
287 Flag::SwapGreed => self.wtr.write_str("U"),
288 Flag::Unicode => self.wtr.write_str("u"),
289 Flag::IgnoreWhitespace => self.wtr.write_str("x"),
290 },
291 }?;
292 }
293 Ok(())
294 }
295
296 fn fmt_class_bracketed_pre(
297 &mut self,
298 ast: &ast::ClassBracketed,
299 ) -> fmt::Result {
300 if ast.negated {
301 self.wtr.write_str("[^")
302 } else {
303 self.wtr.write_str("[")
304 }
305 }
306
307 fn fmt_class_bracketed_post(
308 &mut self,
309 _ast: &ast::ClassBracketed,
310 ) -> fmt::Result {
311 self.wtr.write_str("]")
312 }
313
314 fn fmt_class_set_binary_op_kind(
315 &mut self,
316 ast: &ast::ClassSetBinaryOpKind,
317 ) -> fmt::Result {
318 use ast::ClassSetBinaryOpKind::*;
319 match *ast {
320 Intersection => self.wtr.write_str("&&"),
321 Difference => self.wtr.write_str("--"),
322 SymmetricDifference => self.wtr.write_str("~~"),
323 }
324 }
325
326 fn fmt_class_perl(&mut self, ast: &ast::ClassPerl) -> fmt::Result {
327 use ast::ClassPerlKind::*;
328 match ast.kind {
329 Digit if ast.negated => self.wtr.write_str(r"\D"),
330 Digit => self.wtr.write_str(r"\d"),
331 Space if ast.negated => self.wtr.write_str(r"\S"),
332 Space => self.wtr.write_str(r"\s"),
333 Word if ast.negated => self.wtr.write_str(r"\W"),
334 Word => self.wtr.write_str(r"\w"),
335 }
336 }
337
338 fn fmt_class_ascii(&mut self, ast: &ast::ClassAscii) -> fmt::Result {
339 use ast::ClassAsciiKind::*;
340 match ast.kind {
341 Alnum if ast.negated => self.wtr.write_str("[:^alnum:]"),
342 Alnum => self.wtr.write_str("[:alnum:]"),
343 Alpha if ast.negated => self.wtr.write_str("[:^alpha:]"),
344 Alpha => self.wtr.write_str("[:alpha:]"),
345 Ascii if ast.negated => self.wtr.write_str("[:^ascii:]"),
346 Ascii => self.wtr.write_str("[:ascii:]"),
347 Blank if ast.negated => self.wtr.write_str("[:^blank:]"),
348 Blank => self.wtr.write_str("[:blank:]"),
349 Cntrl if ast.negated => self.wtr.write_str("[:^cntrl:]"),
350 Cntrl => self.wtr.write_str("[:cntrl:]"),
351 Digit if ast.negated => self.wtr.write_str("[:^digit:]"),
352 Digit => self.wtr.write_str("[:digit:]"),
353 Graph if ast.negated => self.wtr.write_str("[:^graph:]"),
354 Graph => self.wtr.write_str("[:graph:]"),
355 Lower if ast.negated => self.wtr.write_str("[:^lower:]"),
356 Lower => self.wtr.write_str("[:lower:]"),
357 Print if ast.negated => self.wtr.write_str("[:^print:]"),
358 Print => self.wtr.write_str("[:print:]"),
359 Punct if ast.negated => self.wtr.write_str("[:^punct:]"),
360 Punct => self.wtr.write_str("[:punct:]"),
361 Space if ast.negated => self.wtr.write_str("[:^space:]"),
362 Space => self.wtr.write_str("[:space:]"),
363 Upper if ast.negated => self.wtr.write_str("[:^upper:]"),
364 Upper => self.wtr.write_str("[:upper:]"),
365 Word if ast.negated => self.wtr.write_str("[:^word:]"),
366 Word => self.wtr.write_str("[:word:]"),
367 Xdigit if ast.negated => self.wtr.write_str("[:^xdigit:]"),
368 Xdigit => self.wtr.write_str("[:xdigit:]"),
369 }
370 }
371
372 fn fmt_class_unicode(&mut self, ast: &ast::ClassUnicode) -> fmt::Result {
373 use ast::ClassUnicodeKind::*;
374 use ast::ClassUnicodeOpKind::*;
375
376 if ast.negated {
377 self.wtr.write_str(r"\P")?;
378 } else {
379 self.wtr.write_str(r"\p")?;
380 }
381 match ast.kind {
382 OneLetter(c) => self.wtr.write_char(c),
383 Named(ref x) => write!(self.wtr, "{{{}}}", x),
384 NamedValue { op: Equal, ref name, ref value } => {
385 write!(self.wtr, "{{{}={}}}", name, value)
386 }
387 NamedValue { op: Colon, ref name, ref value } => {
388 write!(self.wtr, "{{{}:{}}}", name, value)
389 }
390 NamedValue { op: NotEqual, ref name, ref value } => {
391 write!(self.wtr, "{{{}!={}}}", name, value)
392 }
393 }
394 }
395}
396
397#[cfg(test)]
398mod tests {
399 use super::Printer;
400 use ast::parse::ParserBuilder;
401
402 fn roundtrip(given: &str) {
403 roundtrip_with(|b| b, given);
404 }
405
406 fn roundtrip_with<F>(mut f: F, given: &str)
407 where
408 F: FnMut(&mut ParserBuilder) -> &mut ParserBuilder,
409 {
410 let mut builder = ParserBuilder::new();
411 f(&mut builder);
412 let ast = builder.build().parse(given).unwrap();
413
414 let mut printer = Printer::new();
415 let mut dst = String::new();
416 printer.print(&ast, &mut dst).unwrap();
417 assert_eq!(given, dst);
418 }
419
420 #[test]
421 fn print_literal() {
422 roundtrip("a");
423 roundtrip(r"\[");
424 roundtrip_with(|b| b.octal(true), r"\141");
425 roundtrip(r"\x61");
426 roundtrip(r"\x7F");
427 roundtrip(r"\u0061");
428 roundtrip(r"\U00000061");
429 roundtrip(r"\x{61}");
430 roundtrip(r"\x{7F}");
431 roundtrip(r"\u{61}");
432 roundtrip(r"\U{61}");
433
434 roundtrip(r"\a");
435 roundtrip(r"\f");
436 roundtrip(r"\t");
437 roundtrip(r"\n");
438 roundtrip(r"\r");
439 roundtrip(r"\v");
440 roundtrip(r"(?x)\ ");
441 }
442
443 #[test]
444 fn print_dot() {
445 roundtrip(".");
446 }
447
448 #[test]
449 fn print_concat() {
450 roundtrip("ab");
451 roundtrip("abcde");
452 roundtrip("a(bcd)ef");
453 }
454
455 #[test]
456 fn print_alternation() {
457 roundtrip("a|b");
458 roundtrip("a|b|c|d|e");
459 roundtrip("|a|b|c|d|e");
460 roundtrip("|a|b|c|d|e|");
461 roundtrip("a(b|c|d)|e|f");
462 }
463
464 #[test]
465 fn print_assertion() {
466 roundtrip(r"^");
467 roundtrip(r"$");
468 roundtrip(r"\A");
469 roundtrip(r"\z");
470 roundtrip(r"\b");
471 roundtrip(r"\B");
472 }
473
474 #[test]
475 fn print_repetition() {
476 roundtrip("a?");
477 roundtrip("a??");
478 roundtrip("a*");
479 roundtrip("a*?");
480 roundtrip("a+");
481 roundtrip("a+?");
482 roundtrip("a{5}");
483 roundtrip("a{5}?");
484 roundtrip("a{5,}");
485 roundtrip("a{5,}?");
486 roundtrip("a{5,10}");
487 roundtrip("a{5,10}?");
488 }
489
490 #[test]
491 fn print_flags() {
492 roundtrip("(?i)");
493 roundtrip("(?-i)");
494 roundtrip("(?s-i)");
495 roundtrip("(?-si)");
496 roundtrip("(?siUmux)");
497 }
498
499 #[test]
500 fn print_group() {
501 roundtrip("(?i:a)");
502 roundtrip("(?P<foo>a)");
503 roundtrip("(a)");
504 }
505
506 #[test]
507 fn print_class() {
508 roundtrip(r"[abc]");
509 roundtrip(r"[a-z]");
510 roundtrip(r"[^a-z]");
511 roundtrip(r"[a-z0-9]");
512 roundtrip(r"[-a-z0-9]");
513 roundtrip(r"[-a-z0-9]");
514 roundtrip(r"[a-z0-9---]");
515 roundtrip(r"[a-z&&m-n]");
516 roundtrip(r"[[a-z&&m-n]]");
517 roundtrip(r"[a-z--m-n]");
518 roundtrip(r"[a-z~~m-n]");
519 roundtrip(r"[a-z[0-9]]");
520 roundtrip(r"[a-z[^0-9]]");
521
522 roundtrip(r"\d");
523 roundtrip(r"\D");
524 roundtrip(r"\s");
525 roundtrip(r"\S");
526 roundtrip(r"\w");
527 roundtrip(r"\W");
528
529 roundtrip(r"[[:alnum:]]");
530 roundtrip(r"[[:^alnum:]]");
531 roundtrip(r"[[:alpha:]]");
532 roundtrip(r"[[:^alpha:]]");
533 roundtrip(r"[[:ascii:]]");
534 roundtrip(r"[[:^ascii:]]");
535 roundtrip(r"[[:blank:]]");
536 roundtrip(r"[[:^blank:]]");
537 roundtrip(r"[[:cntrl:]]");
538 roundtrip(r"[[:^cntrl:]]");
539 roundtrip(r"[[:digit:]]");
540 roundtrip(r"[[:^digit:]]");
541 roundtrip(r"[[:graph:]]");
542 roundtrip(r"[[:^graph:]]");
543 roundtrip(r"[[:lower:]]");
544 roundtrip(r"[[:^lower:]]");
545 roundtrip(r"[[:print:]]");
546 roundtrip(r"[[:^print:]]");
547 roundtrip(r"[[:punct:]]");
548 roundtrip(r"[[:^punct:]]");
549 roundtrip(r"[[:space:]]");
550 roundtrip(r"[[:^space:]]");
551 roundtrip(r"[[:upper:]]");
552 roundtrip(r"[[:^upper:]]");
553 roundtrip(r"[[:word:]]");
554 roundtrip(r"[[:^word:]]");
555 roundtrip(r"[[:xdigit:]]");
556 roundtrip(r"[[:^xdigit:]]");
557
558 roundtrip(r"\pL");
559 roundtrip(r"\PL");
560 roundtrip(r"\p{L}");
561 roundtrip(r"\P{L}");
562 roundtrip(r"\p{X=Y}");
563 roundtrip(r"\P{X=Y}");
564 roundtrip(r"\p{X:Y}");
565 roundtrip(r"\P{X:Y}");
566 roundtrip(r"\p{X!=Y}");
567 roundtrip(r"\P{X!=Y}");
568 }
569}