blob: d168e135d3a35ddd8aa0303d7b9f6ae70763c4e1 [file] [log] [blame]
David Tolnay14cbdeb2016-10-01 12:13:59 -07001use nom::IResult;
2use unicode_xid::UnicodeXID;
3
4pub fn whitespace(input: &str) -> IResult<&str, ()> {
5 if input.is_empty() {
6 return IResult::Error;
7 }
8
David Tolnay318195d2016-10-08 11:34:19 -07009 let bytes = input.as_bytes();
10 let mut i = 0;
11 while i < bytes.len() {
12 let s = &input[i..];
13 if bytes[i] == b'/' {
David Tolnaydaaf7742016-10-03 11:11:43 -070014 if s.starts_with("//") && (!s.starts_with("///") || s.starts_with("////")) &&
15 !s.starts_with("//!") {
David Tolnay14cbdeb2016-10-01 12:13:59 -070016 if let Some(len) = s.find('\n') {
David Tolnay318195d2016-10-08 11:34:19 -070017 i += len + 1;
David Tolnay14cbdeb2016-10-01 12:13:59 -070018 continue;
19 }
20 break;
David Tolnayc1fea502016-10-30 17:54:02 -070021 } else if s.starts_with("/*") && (!s.starts_with("/**") || s.starts_with("/***")) &&
22 !s.starts_with("/*!") {
David Tolnay14cbdeb2016-10-01 12:13:59 -070023 match block_comment(s) {
24 IResult::Done(_, com) => {
David Tolnay318195d2016-10-08 11:34:19 -070025 i += com.len();
David Tolnay14cbdeb2016-10-01 12:13:59 -070026 continue;
27 }
28 IResult::Error => {
29 return IResult::Error;
30 }
31 }
32 }
33 }
David Tolnay318195d2016-10-08 11:34:19 -070034 match bytes[i] {
35 b' ' | 0x09...0x0d => {
36 i += 1;
37 continue;
38 }
David Tolnay3bcfb722016-10-08 11:58:36 -070039 b if b <= 0x7f => {}
David Tolnay318195d2016-10-08 11:34:19 -070040 _ => {
41 let ch = s.chars().next().unwrap();
David Tolnay462b6f12016-10-30 11:35:32 -070042 if is_whitespace(ch) {
David Tolnay318195d2016-10-08 11:34:19 -070043 i += ch.len_utf8();
44 continue;
45 }
46 }
David Tolnay14cbdeb2016-10-01 12:13:59 -070047 }
David Tolnay318195d2016-10-08 11:34:19 -070048 return if i > 0 {
49 IResult::Done(s, ())
50 } else {
51 IResult::Error
52 };
David Tolnay14cbdeb2016-10-01 12:13:59 -070053 }
54 IResult::Done("", ())
55}
56
57pub fn block_comment(input: &str) -> IResult<&str, &str> {
58 if !input.starts_with("/*") {
59 return IResult::Error;
60 }
61
62 let mut depth = 0;
David Tolnay079b5ad2016-10-08 09:39:29 -070063 let bytes = input.as_bytes();
64 let mut i = 0;
65 let upper = bytes.len() - 1;
66 while i < upper {
67 if bytes[i] == b'/' && bytes[i + 1] == b'*' {
David Tolnay14cbdeb2016-10-01 12:13:59 -070068 depth += 1;
David Tolnay079b5ad2016-10-08 09:39:29 -070069 i += 1; // eat '*'
70 } else if bytes[i] == b'*' && bytes[i + 1] == b'/' {
David Tolnay14cbdeb2016-10-01 12:13:59 -070071 depth -= 1;
72 if depth == 0 {
73 return IResult::Done(&input[i + 2..], &input[..i + 2]);
74 }
David Tolnay079b5ad2016-10-08 09:39:29 -070075 i += 1; // eat '/'
David Tolnay14cbdeb2016-10-01 12:13:59 -070076 }
David Tolnay079b5ad2016-10-08 09:39:29 -070077 i += 1;
David Tolnay14cbdeb2016-10-01 12:13:59 -070078 }
79 IResult::Error
80}
81
82pub fn word_break(input: &str) -> IResult<&str, ()> {
83 match input.chars().next() {
David Tolnaydaaf7742016-10-03 11:11:43 -070084 Some(ch) if UnicodeXID::is_xid_continue(ch) => IResult::Error,
85 Some(_) | None => IResult::Done(input, ()),
David Tolnay14cbdeb2016-10-01 12:13:59 -070086 }
87}
David Tolnaydef66372016-10-24 21:51:32 -070088
89pub fn skip_whitespace(input: &str) -> &str {
90 match whitespace(input) {
91 IResult::Done(rest, _) => rest,
92 IResult::Error => input,
93 }
94}
David Tolnay462b6f12016-10-30 11:35:32 -070095
96fn is_whitespace(ch: char) -> bool {
97 // Rust treats left-to-right mark and right-to-left mark as whitespace
98 ch.is_whitespace() || ch == '\u{200e}' || ch == '\u{200f}'
99}