blob: 52522f41c6b46c1b507a1b4cf016283adb1016d7 [file] [log] [blame]
Chih-Hung Hsiehe42c5052020-04-16 10:44:21 -07001mat!(uni_literal, r"☃", "☃", Some((0, 3)));
2mat!(uni_literal_plus, r"☃+", "☃", Some((0, 3)));
3mat!(uni_literal_casei_plus, r"(?i)☃+", "☃", Some((0, 3)));
4mat!(uni_class_plus, r"[☃Ⅰ]+", "☃", Some((0, 3)));
5mat!(uni_one, r"\pN", "Ⅰ", Some((0, 3)));
6mat!(uni_mixed, r"\pN+", "Ⅰ1Ⅱ2", Some((0, 8)));
7mat!(uni_not, r"\PN+", "abⅠ", Some((0, 2)));
8mat!(uni_not_class, r"[\PN]+", "abⅠ", Some((0, 2)));
9mat!(uni_not_class_neg, r"[^\PN]+", "abⅠ", Some((2, 5)));
10mat!(uni_case, r"(?i)Δ", "δ", Some((0, 2)));
11mat!(uni_case_upper, r"\p{Lu}+", "ΛΘΓΔα", Some((0, 8)));
12mat!(uni_case_upper_nocase_flag, r"(?i)\p{Lu}+", "ΛΘΓΔα", Some((0, 10)));
13mat!(uni_case_upper_nocase, r"\p{L}+", "ΛΘΓΔα", Some((0, 10)));
14mat!(uni_case_lower, r"\p{Ll}+", "ΛΘΓΔα", Some((8, 10)));
15
16// Test the Unicode friendliness of Perl character classes.
17mat!(uni_perl_w, r"\w+", "dδd", Some((0, 4)));
18mat!(uni_perl_w_not, r"\w+", "⥡", None);
19mat!(uni_perl_w_neg, r"\W+", "⥡", Some((0, 3)));
20mat!(uni_perl_d, r"\d+", "1२३9", Some((0, 8)));
21mat!(uni_perl_d_not, r"\d+", "Ⅱ", None);
22mat!(uni_perl_d_neg, r"\D+", "Ⅱ", Some((0, 3)));
23mat!(uni_perl_s, r"\s+", " ", Some((0, 3)));
24mat!(uni_perl_s_not, r"\s+", "☃", None);
25mat!(uni_perl_s_neg, r"\S+", "☃", Some((0, 3)));
26
27// And do the same for word boundaries.
28mat!(uni_boundary_none, r"\d\b", "6δ", None);
29mat!(uni_boundary_ogham, r"\d\b", "6 ", Some((0, 1)));
30mat!(uni_not_boundary_none, r"\d\B", "6δ", Some((0, 1)));
31mat!(uni_not_boundary_ogham, r"\d\B", "6 ", None);
32
33// Test general categories.
34//
35// We should test more, but there's a lot. Write a script to generate more of
36// these tests.
37mat!(uni_class_gencat_cased_letter, r"\p{Cased_Letter}", "A", Some((0, 3)));
38mat!(
39 uni_class_gencat_close_punctuation,
40 r"\p{Close_Punctuation}",
41 "❯",
42 Some((0, 3))
43);
44mat!(
45 uni_class_gencat_connector_punctuation,
46 r"\p{Connector_Punctuation}",
47 "⁀",
48 Some((0, 3))
49);
50mat!(uni_class_gencat_control, r"\p{Control}", "\u{9f}", Some((0, 2)));
51mat!(
52 uni_class_gencat_currency_symbol,
53 r"\p{Currency_Symbol}",
54 "£",
55 Some((0, 3))
56);
57mat!(
58 uni_class_gencat_dash_punctuation,
59 r"\p{Dash_Punctuation}",
60 "〰",
61 Some((0, 3))
62);
63mat!(uni_class_gencat_decimal_numer, r"\p{Decimal_Number}", "𑓙", Some((0, 4)));
64mat!(
65 uni_class_gencat_enclosing_mark,
66 r"\p{Enclosing_Mark}",
67 "\u{A672}",
68 Some((0, 3))
69);
70mat!(
71 uni_class_gencat_final_punctuation,
72 r"\p{Final_Punctuation}",
73 "⸡",
74 Some((0, 3))
75);
76mat!(uni_class_gencat_format, r"\p{Format}", "\u{E007F}", Some((0, 4)));
77mat!(
78 uni_class_gencat_initial_punctuation,
79 r"\p{Initial_Punctuation}",
80 "⸜",
81 Some((0, 3))
82);
83mat!(uni_class_gencat_letter, r"\p{Letter}", "Έ", Some((0, 2)));
84mat!(uni_class_gencat_letter_number, r"\p{Letter_Number}", "ↂ", Some((0, 3)));
85mat!(
86 uni_class_gencat_line_separator,
87 r"\p{Line_Separator}",
88 "\u{2028}",
89 Some((0, 3))
90);
91mat!(
92 uni_class_gencat_lowercase_letter,
93 r"\p{Lowercase_Letter}",
94 "ϛ",
95 Some((0, 2))
96);
97mat!(uni_class_gencat_mark, r"\p{Mark}", "\u{E01EF}", Some((0, 4)));
98mat!(uni_class_gencat_math, r"\p{Math}", "⋿", Some((0, 3)));
99mat!(
100 uni_class_gencat_modifier_letter,
101 r"\p{Modifier_Letter}",
102 "𖭃",
103 Some((0, 4))
104);
105mat!(
106 uni_class_gencat_modifier_symbol,
107 r"\p{Modifier_Symbol}",
108 "🏿",
109 Some((0, 4))
110);
111mat!(
112 uni_class_gencat_nonspacing_mark,
113 r"\p{Nonspacing_Mark}",
114 "\u{1E94A}",
115 Some((0, 4))
116);
117mat!(uni_class_gencat_number, r"\p{Number}", "⓿", Some((0, 3)));
118mat!(
119 uni_class_gencat_open_punctuation,
120 r"\p{Open_Punctuation}",
121 "⦅",
122 Some((0, 3))
123);
124mat!(uni_class_gencat_other, r"\p{Other}", "\u{bc9}", Some((0, 3)));
125mat!(uni_class_gencat_other_letter, r"\p{Other_Letter}", "ꓷ", Some((0, 3)));
126mat!(uni_class_gencat_other_number, r"\p{Other_Number}", "㉏", Some((0, 3)));
127mat!(
128 uni_class_gencat_other_punctuation,
129 r"\p{Other_Punctuation}",
130 "𞥞",
131 Some((0, 4))
132);
133mat!(uni_class_gencat_other_symbol, r"\p{Other_Symbol}", "⅌", Some((0, 3)));
134mat!(
135 uni_class_gencat_paragraph_separator,
136 r"\p{Paragraph_Separator}",
137 "\u{2029}",
138 Some((0, 3))
139);
140mat!(
141 uni_class_gencat_private_use,
142 r"\p{Private_Use}",
143 "\u{10FFFD}",
144 Some((0, 4))
145);
146mat!(uni_class_gencat_punctuation, r"\p{Punctuation}", "𑁍", Some((0, 4)));
147mat!(uni_class_gencat_separator, r"\p{Separator}", "\u{3000}", Some((0, 3)));
148mat!(
149 uni_class_gencat_space_separator,
150 r"\p{Space_Separator}",
151 "\u{205F}",
152 Some((0, 3))
153);
154mat!(
155 uni_class_gencat_spacing_mark,
156 r"\p{Spacing_Mark}",
157 "\u{16F7E}",
158 Some((0, 4))
159);
160mat!(uni_class_gencat_symbol, r"\p{Symbol}", "⯈", Some((0, 3)));
161mat!(
162 uni_class_gencat_titlecase_letter,
163 r"\p{Titlecase_Letter}",
164 "ῼ",
165 Some((0, 3))
166);
167mat!(
168 uni_class_gencat_unassigned,
169 r"\p{Unassigned}",
170 "\u{10FFFF}",
171 Some((0, 4))
172);
173mat!(
174 uni_class_gencat_uppercase_letter,
175 r"\p{Uppercase_Letter}",
176 "Ꝋ",
177 Some((0, 3))
178);
179
180// Test a smattering of properties.
181mat!(uni_class_prop_emoji1, r"\p{Emoji}", "\u{23E9}", Some((0, 3)));
182mat!(uni_class_prop_emoji2, r"\p{emoji}", "\u{1F21A}", Some((0, 4)));
183mat!(
184 uni_class_prop_picto1,
185 r"\p{extendedpictographic}",
186 "\u{1FA6E}",
187 Some((0, 4))
188);
189mat!(
190 uni_class_prop_picto2,
191 r"\p{extendedpictographic}",
192 "\u{1FFFD}",
193 Some((0, 4))
194);
195
196// grapheme_cluster_break
197mat!(
198 uni_class_gcb_prepend,
199 r"\p{grapheme_cluster_break=prepend}",
200 "\u{11D46}",
201 Some((0, 4))
202);
203mat!(
204 uni_class_gcb_ri1,
205 r"\p{gcb=regional_indicator}",
206 "\u{1F1E6}",
207 Some((0, 4))
208);
209mat!(uni_class_gcb_ri2, r"\p{gcb=ri}", "\u{1F1E7}", Some((0, 4)));
210mat!(
211 uni_class_gcb_ri3,
212 r"\p{gcb=regionalindicator}",
213 "\u{1F1FF}",
214 Some((0, 4))
215);
216mat!(uni_class_gcb_lvt, r"\p{gcb=lvt}", "\u{C989}", Some((0, 3)));
217mat!(uni_class_gcb_zwj, r"\p{gcb=zwj}", "\u{200D}", Some((0, 3)));
218
219// word_break
220mat!(uni_class_wb1, r"\p{word_break=Hebrew_Letter}", "\u{FB46}", Some((0, 3)));
221mat!(uni_class_wb2, r"\p{wb=hebrewletter}", "\u{FB46}", Some((0, 3)));
222mat!(uni_class_wb3, r"\p{wb=ExtendNumLet}", "\u{FF3F}", Some((0, 3)));
223mat!(uni_class_wb4, r"\p{wb=WSegSpace}", "\u{3000}", Some((0, 3)));
224mat!(uni_class_wb5, r"\p{wb=numeric}", "\u{1E950}", Some((0, 4)));
225
226// sentence_break
227mat!(uni_class_sb1, r"\p{sentence_break=Lower}", "\u{0469}", Some((0, 2)));
228mat!(uni_class_sb2, r"\p{sb=lower}", "\u{0469}", Some((0, 2)));
229mat!(uni_class_sb3, r"\p{sb=Close}", "\u{FF60}", Some((0, 3)));
230mat!(uni_class_sb4, r"\p{sb=Close}", "\u{1F677}", Some((0, 4)));
231mat!(uni_class_sb5, r"\p{sb=SContinue}", "\u{FF64}", Some((0, 3)));