blob: 2bc8a25e3043e57e0d1aa7ecfe6ae2e3ae2203d4 [file] [log] [blame]
Janis Danisevskis112c9cc2016-03-31 13:35:25 +01001# This set of tests is for the 16-bit and 32-bit libraries' basic (non-UTF)
2# features that are not compatible with the 8-bit library, or which give
3# different output in 16-bit or 32-bit mode. The output for the two widths is
4# different, so they have separate output files.
Nick Kralevichf73ff172014-09-27 12:41:49 -07005
Janis Danisevskis112c9cc2016-03-31 13:35:25 +01006#forbid_utf
7#newline_default LF ANY ANYCRLF
Nick Kralevichf73ff172014-09-27 12:41:49 -07008
Janis Danisevskis112c9cc2016-03-31 13:35:25 +01009/[^\x{c4}]/IB
Nick Kralevichf73ff172014-09-27 12:41:49 -070010
11/\x{100}/I
12
Nick Kralevichf73ff172014-09-27 12:41:49 -070013/ (?: [\040\t] | \(
14(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
15\) )* # optional leading comment
16(?: (?:
17[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
18(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
19|
20" (?: # opening quote...
21[^\\\x80-\xff\n\015"] # Anything except backslash and quote
22| # or
23\\ [^\x80-\xff] # Escaped something (something != CR)
24)* " # closing quote
25) # initial word
26(?: (?: [\040\t] | \(
27(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
28\) )* \. (?: [\040\t] | \(
29(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
30\) )* (?:
31[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
32(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
33|
34" (?: # opening quote...
35[^\\\x80-\xff\n\015"] # Anything except backslash and quote
36| # or
37\\ [^\x80-\xff] # Escaped something (something != CR)
38)* " # closing quote
39) )* # further okay, if led by a period
40(?: [\040\t] | \(
41(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
42\) )* @ (?: [\040\t] | \(
43(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
44\) )* (?:
45[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
46(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
47| \[ # [
48(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
49\] # ]
50) # initial subdomain
51(?: #
52(?: [\040\t] | \(
53(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
54\) )* \. # if led by a period...
55(?: [\040\t] | \(
56(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
57\) )* (?:
58[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
59(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
60| \[ # [
61(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
62\] # ]
63) # ...further okay
64)*
65# address
66| # or
67(?:
68[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
69(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
70|
71" (?: # opening quote...
72[^\\\x80-\xff\n\015"] # Anything except backslash and quote
73| # or
74\\ [^\x80-\xff] # Escaped something (something != CR)
75)* " # closing quote
76) # one word, optionally followed by....
77(?:
78[^()<>@,;:".\\\[\]\x80-\xff\000-\010\012-\037] | # atom and space parts, or...
79\(
80(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
81\) | # comments, or...
82
83" (?: # opening quote...
84[^\\\x80-\xff\n\015"] # Anything except backslash and quote
85| # or
86\\ [^\x80-\xff] # Escaped something (something != CR)
87)* " # closing quote
88# quoted strings
89)*
90< (?: [\040\t] | \(
91(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
92\) )* # leading <
93(?: @ (?: [\040\t] | \(
94(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
95\) )* (?:
96[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
97(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
98| \[ # [
99(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
100\] # ]
101) # initial subdomain
102(?: #
103(?: [\040\t] | \(
104(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
105\) )* \. # if led by a period...
106(?: [\040\t] | \(
107(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
108\) )* (?:
109[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
110(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
111| \[ # [
112(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
113\] # ]
114) # ...further okay
115)*
116
117(?: (?: [\040\t] | \(
118(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
119\) )* , (?: [\040\t] | \(
120(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
121\) )* @ (?: [\040\t] | \(
122(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
123\) )* (?:
124[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
125(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
126| \[ # [
127(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
128\] # ]
129) # initial subdomain
130(?: #
131(?: [\040\t] | \(
132(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
133\) )* \. # if led by a period...
134(?: [\040\t] | \(
135(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
136\) )* (?:
137[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
138(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
139| \[ # [
140(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
141\] # ]
142) # ...further okay
143)*
144)* # further okay, if led by comma
145: # closing colon
146(?: [\040\t] | \(
147(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
148\) )* )? # optional route
149(?:
150[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
151(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
152|
153" (?: # opening quote...
154[^\\\x80-\xff\n\015"] # Anything except backslash and quote
155| # or
156\\ [^\x80-\xff] # Escaped something (something != CR)
157)* " # closing quote
158) # initial word
159(?: (?: [\040\t] | \(
160(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
161\) )* \. (?: [\040\t] | \(
162(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
163\) )* (?:
164[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
165(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
166|
167" (?: # opening quote...
168[^\\\x80-\xff\n\015"] # Anything except backslash and quote
169| # or
170\\ [^\x80-\xff] # Escaped something (something != CR)
171)* " # closing quote
172) )* # further okay, if led by a period
173(?: [\040\t] | \(
174(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
175\) )* @ (?: [\040\t] | \(
176(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
177\) )* (?:
178[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
179(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
180| \[ # [
181(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
182\] # ]
183) # initial subdomain
184(?: #
185(?: [\040\t] | \(
186(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
187\) )* \. # if led by a period...
188(?: [\040\t] | \(
189(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
190\) )* (?:
191[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
192(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
193| \[ # [
194(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
195\] # ]
196) # ...further okay
197)*
198# address spec
199(?: [\040\t] | \(
200(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
201\) )* > # trailing >
202# name and address
203) (?: [\040\t] | \(
204(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
205\) )* # optional trailing comment
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100206/Ix
Nick Kralevichf73ff172014-09-27 12:41:49 -0700207
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100208/[\h]/B
Nick Kralevichf73ff172014-09-27 12:41:49 -0700209 >\x09<
210
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100211/[\h]+/B
Nick Kralevichf73ff172014-09-27 12:41:49 -0700212 >\x09\x20\xa0<
213
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100214/[\v]/B
Nick Kralevichf73ff172014-09-27 12:41:49 -0700215
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100216/[^\h]/B
Nick Kralevichf73ff172014-09-27 12:41:49 -0700217
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100218/\h+/I
219 \x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000}
220 \x{3001}\x{2fff}\x{200a}\xa0\x{2000}
Nick Kralevichf73ff172014-09-27 12:41:49 -0700221
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100222/[\h\x{dc00}]+/IB
223 \x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000}
224 \x{3001}\x{2fff}\x{200a}\xa0\x{2000}
Nick Kralevichf73ff172014-09-27 12:41:49 -0700225
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100226/\H+/I
227 \x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f}
228 \x{2000}\x{200a}\x{1fff}\x{200b}
229 \x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060}
230 \xa0\x{3000}\x9f\xa1\x{2fff}\x{3001}
Nick Kralevichf73ff172014-09-27 12:41:49 -0700231
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100232/[\H\x{d800}]+/
233 \x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f}
234 \x{2000}\x{200a}\x{1fff}\x{200b}
235 \x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060}
236 \xa0\x{3000}\x9f\xa1\x{2fff}\x{3001}
Nick Kralevichf73ff172014-09-27 12:41:49 -0700237
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100238/\v+/I
239 \x{2027}\x{2030}\x{2028}\x{2029}
240 \x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d
241
242/[\v\x{dc00}]+/IB
243 \x{2027}\x{2030}\x{2028}\x{2029}
244 \x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d
245
246/\V+/I
247 \x{2028}\x{2029}\x{2027}\x{2030}
248 \x85\x0a\x0b\x0c\x0d\x09\x0e\x84\x86
249
250/[\V\x{d800}]+/
251 \x{2028}\x{2029}\x{2027}\x{2030}
252 \x85\x0a\x0b\x0c\x0d\x09\x0e\x84\x86
253
254/\R+/I,bsr=unicode
255 \x{2027}\x{2030}\x{2028}\x{2029}
256 \x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d
257
258/\x{d800}\x{d7ff}\x{dc00}\x{dc00}\x{dcff}\x{dd00}/I
259 \x{d800}\x{d7ff}\x{dc00}\x{dc00}\x{dcff}\x{dd00}
260
261/[^\x{80}][^\x{ff}][^\x{100}][^\x{1000}][^\x{ffff}]/B
262
263/[^\x{80}][^\x{ff}][^\x{100}][^\x{1000}][^\x{ffff}]/Bi
264
265/[^\x{100}]*[^\x{1000}]+[^\x{ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{100}]{5,6}+/B
266
267/[^\x{100}]*[^\x{1000}]+[^\x{ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{100}]{5,6}+/Bi
268
269/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF)XX/mark
Nick Kralevichf73ff172014-09-27 12:41:49 -0700270 XX
271
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100272/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE)XX/mark
Nick Kralevichf73ff172014-09-27 12:41:49 -0700273 XX
274
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100275/\u0100/B,alt_bsux,allow_empty_class,match_unset_backref
Nick Kralevichf73ff172014-09-27 12:41:49 -0700276
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100277/[\u0100-\u0200]/B,alt_bsux,allow_empty_class,match_unset_backref
Nick Kralevichf73ff172014-09-27 12:41:49 -0700278
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100279/\ud800/B,alt_bsux,allow_empty_class,match_unset_backref
Nick Kralevichf73ff172014-09-27 12:41:49 -0700280
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100281/^\x{ffff}+/i
282 \x{ffff}
Nick Kralevichf73ff172014-09-27 12:41:49 -0700283
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100284/^\x{ffff}?/i
285 \x{ffff}
Elliott Hughes0ea98832015-12-04 23:18:20 -0800286
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100287/^\x{ffff}*/i
288 \x{ffff}
289
290/^\x{ffff}{3}/i
291 \x{ffff}\x{ffff}\x{ffff}
292
293/^\x{ffff}{0,3}/i
294 \x{ffff}
295
296/[^\x00-a]{12,}[^b-\xff]*/B
297
298/[^\s]*\s* [^\W]+\W+ [^\d]*?\d0 [^\d\w]{4,6}?\w*A/B
299
300/a*[b-\x{200}]?a#a*[b-\x{200}]?b#[a-f]*[g-\x{200}]*#[g-\x{200}]*[a-c]*#[g-\x{200}]*[a-h]*/B
301
302/^[\x{1234}\x{4321}]{2,4}?/
303 \x{1234}\x{1234}\x{1234}
304
305# Check maximum non-UTF character size for the 16-bit library.
306
307/\x{ffff}/
308 A\x{ffff}B
309
310/\x{10000}/
311
312/\o{20000}/
313
314# Check maximum character size for the 32-bit library. These will all give
315# errors in the 16-bit library.
316
317/\x{110000}/
318
319/\x{7fffffff}/
320
321/\x{80000000}/
322
323/\x{ffffffff}/
324
325/\x{100000000}/
326
327/\o{17777777777}/
328
329/\o{20000000000}/
330
331/\o{37777777777}/
332
333/\o{40000000000}/
334
335/\x{7fffffff}\x{7fffffff}/I
336
337/\x{80000000}\x{80000000}/I
338
339/\x{ffffffff}\x{ffffffff}/I
340
341# Non-UTF characters
342
343/.{2,3}/
344 \x{400000}\x{400001}\x{400002}\x{400003}
345
346/\x{400000}\x{800000}/IBi
347
348# Check character ranges
349
350/[\H]/IB
351
352/[\V]/IB
353
354/(*THEN:\[A]{65501})/expand
355
Elliott Hughes9bc971b2018-07-27 13:23:14 -0700356# We can use pcre2test's utf8_input modifier to create wide pattern characters,
357# even though this test is run when UTF is not supported.
358
359/abý¿¿¿¿¿z/utf8_input
360 abý¿¿¿¿¿z
361 ab\x{7fffffff}z
362
363/abÿý¿¿¿¿¿z/utf8_input
364 abÿý¿¿¿¿¿z
365 ab\x{ffffffff}z
366
367/abÿAz/utf8_input
368 abÿAz
369 ab\x{80000041}z
370
Elliott Hughes378b1752021-06-08 13:42:40 -0700371/(?i:A{1,}\6666666666)/
372 A\x{1b6}6666666
373
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100374# End of testinput11