blob: 2c95f615f3328c6620587ceefaf67278f2ed1837 [file] [log] [blame]
Janis Danisevskis112c9cc2016-03-31 13:35:25 +01001# This set of tests is for the 16-bit and 32-bit libraries' basic (non-UTF)
2# features that are not compatible with the 8-bit library, or which give
3# different output in 16-bit or 32-bit mode. The output for the two widths is
4# different, so they have separate output files.
5
6#forbid_utf
7#newline_default LF ANY ANYCRLF
8
9/[^\x{c4}]/IB
10------------------------------------------------------------------
11 Bra
12 [^\x{c4}]
13 Ket
14 End
15------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -070016Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +010017Subject length lower bound = 1
18
19/\x{100}/I
Elliott Hughes0c26e192019-08-07 12:24:46 -070020Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +010021First code unit = \x{100}
22Subject length lower bound = 1
23
24/ (?: [\040\t] | \(
25(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
26\) )* # optional leading comment
27(?: (?:
28[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
29(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
30|
31" (?: # opening quote...
32[^\\\x80-\xff\n\015"] # Anything except backslash and quote
33| # or
34\\ [^\x80-\xff] # Escaped something (something != CR)
35)* " # closing quote
36) # initial word
37(?: (?: [\040\t] | \(
38(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
39\) )* \. (?: [\040\t] | \(
40(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
41\) )* (?:
42[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
43(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
44|
45" (?: # opening quote...
46[^\\\x80-\xff\n\015"] # Anything except backslash and quote
47| # or
48\\ [^\x80-\xff] # Escaped something (something != CR)
49)* " # closing quote
50) )* # further okay, if led by a period
51(?: [\040\t] | \(
52(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
53\) )* @ (?: [\040\t] | \(
54(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
55\) )* (?:
56[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
57(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
58| \[ # [
59(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
60\] # ]
61) # initial subdomain
62(?: #
63(?: [\040\t] | \(
64(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
65\) )* \. # if led by a period...
66(?: [\040\t] | \(
67(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
68\) )* (?:
69[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
70(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
71| \[ # [
72(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
73\] # ]
74) # ...further okay
75)*
76# address
77| # or
78(?:
79[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
80(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
81|
82" (?: # opening quote...
83[^\\\x80-\xff\n\015"] # Anything except backslash and quote
84| # or
85\\ [^\x80-\xff] # Escaped something (something != CR)
86)* " # closing quote
87) # one word, optionally followed by....
88(?:
89[^()<>@,;:".\\\[\]\x80-\xff\000-\010\012-\037] | # atom and space parts, or...
90\(
91(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
92\) | # comments, or...
93
94" (?: # opening quote...
95[^\\\x80-\xff\n\015"] # Anything except backslash and quote
96| # or
97\\ [^\x80-\xff] # Escaped something (something != CR)
98)* " # closing quote
99# quoted strings
100)*
101< (?: [\040\t] | \(
102(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
103\) )* # leading <
104(?: @ (?: [\040\t] | \(
105(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
106\) )* (?:
107[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
108(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
109| \[ # [
110(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
111\] # ]
112) # initial subdomain
113(?: #
114(?: [\040\t] | \(
115(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
116\) )* \. # if led by a period...
117(?: [\040\t] | \(
118(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
119\) )* (?:
120[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
121(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
122| \[ # [
123(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
124\] # ]
125) # ...further okay
126)*
127
128(?: (?: [\040\t] | \(
129(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
130\) )* , (?: [\040\t] | \(
131(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
132\) )* @ (?: [\040\t] | \(
133(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
134\) )* (?:
135[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
136(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
137| \[ # [
138(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
139\] # ]
140) # initial subdomain
141(?: #
142(?: [\040\t] | \(
143(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
144\) )* \. # if led by a period...
145(?: [\040\t] | \(
146(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
147\) )* (?:
148[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
149(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
150| \[ # [
151(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
152\] # ]
153) # ...further okay
154)*
155)* # further okay, if led by comma
156: # closing colon
157(?: [\040\t] | \(
158(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
159\) )* )? # optional route
160(?:
161[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
162(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
163|
164" (?: # opening quote...
165[^\\\x80-\xff\n\015"] # Anything except backslash and quote
166| # or
167\\ [^\x80-\xff] # Escaped something (something != CR)
168)* " # closing quote
169) # initial word
170(?: (?: [\040\t] | \(
171(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
172\) )* \. (?: [\040\t] | \(
173(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
174\) )* (?:
175[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
176(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
177|
178" (?: # opening quote...
179[^\\\x80-\xff\n\015"] # Anything except backslash and quote
180| # or
181\\ [^\x80-\xff] # Escaped something (something != CR)
182)* " # closing quote
183) )* # further okay, if led by a period
184(?: [\040\t] | \(
185(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
186\) )* @ (?: [\040\t] | \(
187(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
188\) )* (?:
189[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
190(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
191| \[ # [
192(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
193\] # ]
194) # initial subdomain
195(?: #
196(?: [\040\t] | \(
197(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
198\) )* \. # if led by a period...
199(?: [\040\t] | \(
200(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
201\) )* (?:
202[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
203(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
204| \[ # [
205(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
206\] # ]
207) # ...further okay
208)*
209# address spec
210(?: [\040\t] | \(
211(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
212\) )* > # trailing >
213# name and address
214) (?: [\040\t] | \(
215(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
216\) )* # optional trailing comment
217/Ix
Elliott Hughes0c26e192019-08-07 12:24:46 -0700218Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100219Contains explicit CR or LF match
220Options: extended
221Starting code units: \x09 \x20 ! " # $ % & ' ( * + - / 0 1 2 3 4 5 6 7 8
222 9 = ? A B C D E F G H I J K L M N O P Q R S T U V W X Y Z ^ _ ` a b c d e
223 f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \xff
224Subject length lower bound = 3
225
226/[\h]/B
227------------------------------------------------------------------
228 Bra
229 [\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}]
230 Ket
231 End
232------------------------------------------------------------------
233 >\x09<
234 0: \x09
235
236/[\h]+/B
237------------------------------------------------------------------
238 Bra
239 [\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}]++
240 Ket
241 End
242------------------------------------------------------------------
243 >\x09\x20\xa0<
244 0: \x09 \xa0
245
246/[\v]/B
247------------------------------------------------------------------
248 Bra
249 [\x0a-\x0d\x85\x{2028}-\x{2029}]
250 Ket
251 End
252------------------------------------------------------------------
253
254/[^\h]/B
255------------------------------------------------------------------
256 Bra
257 [^\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}]
258 Ket
259 End
260------------------------------------------------------------------
261
262/\h+/I
Elliott Hughes0c26e192019-08-07 12:24:46 -0700263Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100264Starting code units: \x09 \x20 \xa0 \xff
265Subject length lower bound = 1
266 \x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000}
267 0: \x{1680}\x{2000}\x{202f}\x{3000}
268 \x{3001}\x{2fff}\x{200a}\xa0\x{2000}
269 0: \x{200a}\xa0\x{2000}
270
271/[\h\x{dc00}]+/IB
272------------------------------------------------------------------
273 Bra
274 [\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}\x{dc00}]++
275 Ket
276 End
277------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -0700278Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100279Starting code units: \x09 \x20 \xa0 \xff
280Subject length lower bound = 1
281 \x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000}
282 0: \x{1680}\x{2000}\x{202f}\x{3000}
283 \x{3001}\x{2fff}\x{200a}\xa0\x{2000}
284 0: \x{200a}\xa0\x{2000}
285
286/\H+/I
Elliott Hughes0c26e192019-08-07 12:24:46 -0700287Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100288Subject length lower bound = 1
289 \x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f}
290 0: \x{167f}\x{1681}\x{180d}\x{180f}
291 \x{2000}\x{200a}\x{1fff}\x{200b}
292 0: \x{1fff}\x{200b}
293 \x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060}
294 0: \x{202e}\x{2030}\x{205e}\x{2060}
295 \xa0\x{3000}\x9f\xa1\x{2fff}\x{3001}
296 0: \x9f\xa1\x{2fff}\x{3001}
297
298/[\H\x{d800}]+/
299 \x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f}
300 0: \x{167f}\x{1681}\x{180d}\x{180f}
301 \x{2000}\x{200a}\x{1fff}\x{200b}
302 0: \x{1fff}\x{200b}
303 \x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060}
304 0: \x{202e}\x{2030}\x{205e}\x{2060}
305 \xa0\x{3000}\x9f\xa1\x{2fff}\x{3001}
306 0: \x9f\xa1\x{2fff}\x{3001}
307
308/\v+/I
Elliott Hughes0c26e192019-08-07 12:24:46 -0700309Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100310Starting code units: \x0a \x0b \x0c \x0d \x85 \xff
311Subject length lower bound = 1
312 \x{2027}\x{2030}\x{2028}\x{2029}
313 0: \x{2028}\x{2029}
314 \x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d
315 0: \x85\x0a\x0b\x0c\x0d
316
317/[\v\x{dc00}]+/IB
318------------------------------------------------------------------
319 Bra
320 [\x0a-\x0d\x85\x{2028}-\x{2029}\x{dc00}]++
321 Ket
322 End
323------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -0700324Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100325Starting code units: \x0a \x0b \x0c \x0d \x85 \xff
326Subject length lower bound = 1
327 \x{2027}\x{2030}\x{2028}\x{2029}
328 0: \x{2028}\x{2029}
329 \x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d
330 0: \x85\x0a\x0b\x0c\x0d
331
332/\V+/I
Elliott Hughes0c26e192019-08-07 12:24:46 -0700333Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100334Subject length lower bound = 1
335 \x{2028}\x{2029}\x{2027}\x{2030}
336 0: \x{2027}\x{2030}
337 \x85\x0a\x0b\x0c\x0d\x09\x0e\x84\x86
338 0: \x09\x0e\x84\x86
339
340/[\V\x{d800}]+/
341 \x{2028}\x{2029}\x{2027}\x{2030}
342 0: \x{2027}\x{2030}
343 \x85\x0a\x0b\x0c\x0d\x09\x0e\x84\x86
344 0: \x09\x0e\x84\x86
345
346/\R+/I,bsr=unicode
Elliott Hughes0c26e192019-08-07 12:24:46 -0700347Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100348\R matches any Unicode newline
349Starting code units: \x0a \x0b \x0c \x0d \x85 \xff
350Subject length lower bound = 1
351 \x{2027}\x{2030}\x{2028}\x{2029}
352 0: \x{2028}\x{2029}
353 \x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d
354 0: \x85\x0a\x0b\x0c\x0d
355
356/\x{d800}\x{d7ff}\x{dc00}\x{dc00}\x{dcff}\x{dd00}/I
Elliott Hughes0c26e192019-08-07 12:24:46 -0700357Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100358First code unit = \x{d800}
359Last code unit = \x{dd00}
360Subject length lower bound = 6
361 \x{d800}\x{d7ff}\x{dc00}\x{dc00}\x{dcff}\x{dd00}
362 0: \x{d800}\x{d7ff}\x{dc00}\x{dc00}\x{dcff}\x{dd00}
363
364/[^\x{80}][^\x{ff}][^\x{100}][^\x{1000}][^\x{ffff}]/B
365------------------------------------------------------------------
366 Bra
367 [^\x{80}]
368 [^\x{ff}]
369 [^\x{100}]
370 [^\x{1000}]
371 [^\x{ffff}]
372 Ket
373 End
374------------------------------------------------------------------
375
376/[^\x{80}][^\x{ff}][^\x{100}][^\x{1000}][^\x{ffff}]/Bi
377------------------------------------------------------------------
378 Bra
379 /i [^\x{80}]
380 /i [^\x{ff}]
381 /i [^\x{100}]
382 /i [^\x{1000}]
383 /i [^\x{ffff}]
384 Ket
385 End
386------------------------------------------------------------------
387
388/[^\x{100}]*[^\x{1000}]+[^\x{ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{100}]{5,6}+/B
389------------------------------------------------------------------
390 Bra
391 [^\x{100}]*
392 [^\x{1000}]+
393 [^\x{ffff}]??
394 [^\x{8000}]{4}
395 [^\x{8000}]*
396 [^\x{7fff}]{2}
397 [^\x{7fff}]{0,7}?
398 [^\x{100}]{5}
399 [^\x{100}]?+
400 Ket
401 End
402------------------------------------------------------------------
403
404/[^\x{100}]*[^\x{1000}]+[^\x{ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{100}]{5,6}+/Bi
405------------------------------------------------------------------
406 Bra
407 /i [^\x{100}]*
408 /i [^\x{1000}]+
409 /i [^\x{ffff}]??
410 /i [^\x{8000}]{4}
411 /i [^\x{8000}]*
412 /i [^\x{7fff}]{2}
413 /i [^\x{7fff}]{0,7}?
414 /i [^\x{100}]{5}
415 /i [^\x{100}]?+
416 Ket
417 End
418------------------------------------------------------------------
419
420/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF)XX/mark
421 XX
422 0: XX
423MK: 0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF
424
425/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE)XX/mark
426 XX
427 0: XX
428MK: 0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE
429
430/\u0100/B,alt_bsux,allow_empty_class,match_unset_backref
431------------------------------------------------------------------
432 Bra
433 \x{100}
434 Ket
435 End
436------------------------------------------------------------------
437
438/[\u0100-\u0200]/B,alt_bsux,allow_empty_class,match_unset_backref
439------------------------------------------------------------------
440 Bra
441 [\x{100}-\x{200}]
442 Ket
443 End
444------------------------------------------------------------------
445
446/\ud800/B,alt_bsux,allow_empty_class,match_unset_backref
447------------------------------------------------------------------
448 Bra
449 \x{d800}
450 Ket
451 End
452------------------------------------------------------------------
453
454/^\x{ffff}+/i
455 \x{ffff}
456 0: \x{ffff}
457
458/^\x{ffff}?/i
459 \x{ffff}
460 0: \x{ffff}
461
462/^\x{ffff}*/i
463 \x{ffff}
464 0: \x{ffff}
465
466/^\x{ffff}{3}/i
467 \x{ffff}\x{ffff}\x{ffff}
468 0: \x{ffff}\x{ffff}\x{ffff}
469
470/^\x{ffff}{0,3}/i
471 \x{ffff}
472 0: \x{ffff}
473
474/[^\x00-a]{12,}[^b-\xff]*/B
475------------------------------------------------------------------
476 Bra
477 [b-\xff] (neg){12,}
478 [\x00-a] (neg)*+
479 Ket
480 End
481------------------------------------------------------------------
482
483/[^\s]*\s* [^\W]+\W+ [^\d]*?\d0 [^\d\w]{4,6}?\w*A/B
484------------------------------------------------------------------
485 Bra
486 [\x00-\x08\x0e-\x1f!-\xff] (neg)*
487 \s*
488
489 [0-9A-Z_a-z]++
490 \W+
491
492 [\x00-/:-\xff] (neg)*?
493 \d
494 0
495 [\x00-/:-@[-^`{-\xff] (neg){4,6}?
496 \w*
497 A
498 Ket
499 End
500------------------------------------------------------------------
501
502/a*[b-\x{200}]?a#a*[b-\x{200}]?b#[a-f]*[g-\x{200}]*#[g-\x{200}]*[a-c]*#[g-\x{200}]*[a-h]*/B
503------------------------------------------------------------------
504 Bra
505 a*
506 [b-\xff\x{100}-\x{200}]?+
507 a#
508 a*+
509 [b-\xff\x{100}-\x{200}]?
510 b#
511 [a-f]*+
512 [g-\xff\x{100}-\x{200}]*+
513 #
514 [g-\xff\x{100}-\x{200}]*+
515 [a-c]*+
516 #
517 [g-\xff\x{100}-\x{200}]*
518 [a-h]*+
519 Ket
520 End
521------------------------------------------------------------------
522
523/^[\x{1234}\x{4321}]{2,4}?/
524 \x{1234}\x{1234}\x{1234}
525 0: \x{1234}\x{1234}
526
527# Check maximum non-UTF character size for the 16-bit library.
528
529/\x{ffff}/
530 A\x{ffff}B
531 0: \x{ffff}
532
533/\x{10000}/
534
535/\o{20000}/
536
537# Check maximum character size for the 32-bit library. These will all give
538# errors in the 16-bit library.
539
540/\x{110000}/
541
542/\x{7fffffff}/
543
544/\x{80000000}/
545
546/\x{ffffffff}/
547
548/\x{100000000}/
549Failed: error 134 at offset 12: character code point value in \x{} or \o{} is too large
550
551/\o{17777777777}/
552
553/\o{20000000000}/
554
555/\o{37777777777}/
556
557/\o{40000000000}/
558Failed: error 134 at offset 14: character code point value in \x{} or \o{} is too large
559
560/\x{7fffffff}\x{7fffffff}/I
Elliott Hughes0c26e192019-08-07 12:24:46 -0700561Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100562First code unit = \x{7fffffff}
563Last code unit = \x{7fffffff}
564Subject length lower bound = 2
565
566/\x{80000000}\x{80000000}/I
Elliott Hughes0c26e192019-08-07 12:24:46 -0700567Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100568First code unit = \x{80000000}
569Last code unit = \x{80000000}
570Subject length lower bound = 2
571
572/\x{ffffffff}\x{ffffffff}/I
Elliott Hughes0c26e192019-08-07 12:24:46 -0700573Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100574First code unit = \x{ffffffff}
575Last code unit = \x{ffffffff}
576Subject length lower bound = 2
577
578# Non-UTF characters
579
580/.{2,3}/
581 \x{400000}\x{400001}\x{400002}\x{400003}
582 0: \x{400000}\x{400001}\x{400002}
583
584/\x{400000}\x{800000}/IBi
585------------------------------------------------------------------
586 Bra
587 /i \x{400000}\x{800000}
588 Ket
589 End
590------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -0700591Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100592Options: caseless
593First code unit = \x{400000}
594Last code unit = \x{800000}
595Subject length lower bound = 2
596
597# Check character ranges
598
599/[\H]/IB
600------------------------------------------------------------------
601 Bra
602 [\x00-\x08\x0a-\x1f!-\x9f\xa1-\xff\x{100}-\x{167f}\x{1681}-\x{180d}\x{180f}-\x{1fff}\x{200b}-\x{202e}\x{2030}-\x{205e}\x{2060}-\x{2fff}\x{3001}-\x{ffffffff}]
603 Ket
604 End
605------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -0700606Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100607Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0a \x0b
608 \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a
609 \x1b \x1c \x1d \x1e \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9
610 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^
611 _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80
612 \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f
613 \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e
614 \x9f \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae
615 \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd
616 \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc
617 \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb
618 \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea
619 \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9
620 \xfa \xfb \xfc \xfd \xfe \xff
621Subject length lower bound = 1
622
623/[\V]/IB
624------------------------------------------------------------------
625 Bra
626 [\x00-\x09\x0e-\x84\x86-\xff\x{100}-\x{2027}\x{202a}-\x{ffffffff}]
627 Ket
628 End
629------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -0700630Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100631Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0e
632 \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d
633 \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = >
634 ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c
635 d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82
636 \x83 \x84 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92
637 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1
638 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0
639 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf
640 \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce
641 \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd
642 \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec
643 \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb
644 \xfc \xfd \xfe \xff
645Subject length lower bound = 1
646
647/(*THEN:\[A]{65501})/expand
648
Elliott Hughes9bc971b2018-07-27 13:23:14 -0700649# We can use pcre2test's utf8_input modifier to create wide pattern characters,
650# even though this test is run when UTF is not supported.
651
652/abý¿¿¿¿¿z/utf8_input
653 abý¿¿¿¿¿z
654 0: ab\x{7fffffff}z
655 ab\x{7fffffff}z
656 0: ab\x{7fffffff}z
657
658/abÿý¿¿¿¿¿z/utf8_input
659 abÿý¿¿¿¿¿z
660 0: ab\x{ffffffff}z
661 ab\x{ffffffff}z
662 0: ab\x{ffffffff}z
663
664/abÿAz/utf8_input
665 abÿAz
666 0: ab\x{80000041}z
667 ab\x{80000041}z
668 0: ab\x{80000041}z
669
Elliott Hughes378b1752021-06-08 13:42:40 -0700670/(?i:A{1,}\6666666666)/
671 A\x{1b6}6666666
672 0: A\x{1b6}6666666
673
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100674# End of testinput11