blob: 61541f61a21434f6d76a23c32e6ed9b2d4523deb [file] [log] [blame]
Elliott Hughes2dbd7d22020-06-03 14:32:37 -07001# These test special UTF and UCP features of DFA matching. The output is
2# different for the different widths.
Janis Danisevskis112c9cc2016-03-31 13:35:25 +01003
4#subject dfa
5
Elliott Hughes2dbd7d22020-06-03 14:32:37 -07006# ----------------------------------------------------
7# These are a selection of the more comprehensive tests that are run for
8# non-DFA matching.
9
Janis Danisevskis112c9cc2016-03-31 13:35:25 +010010/X/utf
11 XX\x{d800}
12Failed: error -24: UTF-16 error: missing low surrogate at end at offset 2
13 XX\x{d800}\=offset=3
14No match
15 XX\x{d800}\=no_utf_check
16 0: X
17 XX\x{da00}
18Failed: error -24: UTF-16 error: missing low surrogate at end at offset 2
19 XX\x{da00}\=no_utf_check
20 0: X
21 XX\x{dc00}
22Failed: error -26: UTF-16 error: isolated low surrogate at offset 2
23 XX\x{dc00}\=no_utf_check
24 0: X
25 XX\x{de00}
26Failed: error -26: UTF-16 error: isolated low surrogate at offset 2
27 XX\x{de00}\=no_utf_check
28 0: X
29 XX\x{dfff}
30Failed: error -26: UTF-16 error: isolated low surrogate at offset 2
31 XX\x{dfff}\=no_utf_check
32 0: X
33 XX\x{110000}
34** Failed: character \x{110000} is greater than 0x10ffff and so cannot be converted to UTF-16
35 XX\x{d800}\x{1234}
Elliott Hughes2dbd7d22020-06-03 14:32:37 -070036Failed: error -25: UTF-16 error: invalid low surrogate at offset 2
Janis Danisevskis112c9cc2016-03-31 13:35:25 +010037
38/badutf/utf
39 X\xdf
40No match
41 XX\xef
42No match
43 XXX\xef\x80
44No match
45 X\xf7
46No match
47 XX\xf7\x80
48No match
49 XXX\xf7\x80\x80
50No match
51
52/shortutf/utf
53 XX\xdf\=ph
54No match
55 XX\xef\=ph
56No match
57 XX\xef\x80\=ph
58No match
59 \xf7\=ph
60No match
61 \xf7\x80\=ph
62No match
Elliott Hughes2dbd7d22020-06-03 14:32:37 -070063
64# ----------------------------------------------------
65# UCP and casing tests - except for the first two, these will all fail in 8-bit
66# mode because they are testing UCP without UTF and use characters > 255.
67
68/\x{c1}/i,no_start_optimize
69\= Expect no match
70 \x{e1}
71No match
72
73/\x{c1}+\x{e1}/iB,ucp
74------------------------------------------------------------------
75 Bra
76 /i \x{c1}+
77 /i \x{e1}
78 Ket
79 End
80------------------------------------------------------------------
81 \x{c1}\x{c1}\x{c1}
82 0: \xc1\xc1\xc1
83 1: \xc1\xc1
84 \x{e1}\x{e1}\x{e1}
85 0: \xe1\xe1\xe1
86 1: \xe1\xe1
87
88/\x{120}\x{c1}/i,ucp,no_start_optimize
89 \x{121}\x{e1}
90 0: \x{121}\xe1
91
92/\x{120}\x{c1}/i,ucp
93 \x{121}\x{e1}
94 0: \x{121}\xe1
95
96/[^\x{120}]/i,no_start_optimize
97 \x{121}
98 0: \x{121}
99
100/[^\x{120}]/i,ucp,no_start_optimize
101\= Expect no match
102 \x{121}
103No match
104
105/[^\x{120}]/i
106 \x{121}
107 0: \x{121}
108
109/[^\x{120}]/i,ucp
110\= Expect no match
111 \x{121}
112No match
113
114/\x{120}{2}/i,ucp
115 \x{121}\x{121}
116 0: \x{121}\x{121}
117
118/[^\x{120}]{2}/i,ucp
119\= Expect no match
120 \x{121}\x{121}
121No match
122
123# ----------------------------------------------------
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100124
125# End of testinput14