blob: d40851061fc2deb52d9920a14805a172386c241d [file] [log] [blame]
Janis Danisevskis112c9cc2016-03-31 13:35:25 +01001# This set of tests is for UTF-8 support and Unicode property support, with
2# relevance only for the 8-bit library.
3
Elliott Hughes2dbd7d22020-06-03 14:32:37 -07004# The next 5 patterns have UTF-8 errors
Janis Danisevskis112c9cc2016-03-31 13:35:25 +01005
6/[Ã]/utf
7Failed: error -8 at offset 1: UTF-8 error: byte 2 top bits not 0x80
8
9/Ã/utf
10Failed: error -3 at offset 0: UTF-8 error: 1 byte missing at end
11
12/ÃÃÃxxx/utf
13Failed: error -8 at offset 0: UTF-8 error: byte 2 top bits not 0x80
14
15/‚‚‚‚‚‚‚Ã/utf
16Failed: error -22 at offset 2: UTF-8 error: isolated byte with 0x80 bit set
17
Elliott Hughes2dbd7d22020-06-03 14:32:37 -070018/‚‚‚‚‚‚‚Ã/match_invalid_utf
19Failed: error -22 at offset 2: UTF-8 error: isolated byte with 0x80 bit set
20
Janis Danisevskis112c9cc2016-03-31 13:35:25 +010021# Now test subjects
22
23/badutf/utf
24\= Expect UTF-8 errors
25 X\xdf
26Failed: error -3: UTF-8 error: 1 byte missing at end at offset 1
27 XX\xef
28Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 2
29 XXX\xef\x80
30Failed: error -3: UTF-8 error: 1 byte missing at end at offset 3
31 X\xf7
32Failed: error -5: UTF-8 error: 3 bytes missing at end at offset 1
33 XX\xf7\x80
34Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 2
35 XXX\xf7\x80\x80
36Failed: error -3: UTF-8 error: 1 byte missing at end at offset 3
37 \xfb
38Failed: error -6: UTF-8 error: 4 bytes missing at end at offset 0
39 \xfb\x80
40Failed: error -5: UTF-8 error: 3 bytes missing at end at offset 0
41 \xfb\x80\x80
42Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 0
43 \xfb\x80\x80\x80
44Failed: error -3: UTF-8 error: 1 byte missing at end at offset 0
45 \xfd
46Failed: error -7: UTF-8 error: 5 bytes missing at end at offset 0
47 \xfd\x80
48Failed: error -6: UTF-8 error: 4 bytes missing at end at offset 0
49 \xfd\x80\x80
50Failed: error -5: UTF-8 error: 3 bytes missing at end at offset 0
51 \xfd\x80\x80\x80
52Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 0
53 \xfd\x80\x80\x80\x80
54Failed: error -3: UTF-8 error: 1 byte missing at end at offset 0
55 \xdf\x7f
56Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 0
57 \xef\x7f\x80
58Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 0
59 \xef\x80\x7f
60Failed: error -9: UTF-8 error: byte 3 top bits not 0x80 at offset 0
61 \xf7\x7f\x80\x80
62Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 0
63 \xf7\x80\x7f\x80
64Failed: error -9: UTF-8 error: byte 3 top bits not 0x80 at offset 0
65 \xf7\x80\x80\x7f
66Failed: error -10: UTF-8 error: byte 4 top bits not 0x80 at offset 0
67 \xfb\x7f\x80\x80\x80
68Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 0
69 \xfb\x80\x7f\x80\x80
70Failed: error -9: UTF-8 error: byte 3 top bits not 0x80 at offset 0
71 \xfb\x80\x80\x7f\x80
72Failed: error -10: UTF-8 error: byte 4 top bits not 0x80 at offset 0
73 \xfb\x80\x80\x80\x7f
74Failed: error -11: UTF-8 error: byte 5 top bits not 0x80 at offset 0
75 \xfd\x7f\x80\x80\x80\x80
76Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 0
77 \xfd\x80\x7f\x80\x80\x80
78Failed: error -9: UTF-8 error: byte 3 top bits not 0x80 at offset 0
79 \xfd\x80\x80\x7f\x80\x80
80Failed: error -10: UTF-8 error: byte 4 top bits not 0x80 at offset 0
81 \xfd\x80\x80\x80\x7f\x80
82Failed: error -11: UTF-8 error: byte 5 top bits not 0x80 at offset 0
83 \xfd\x80\x80\x80\x80\x7f
84Failed: error -12: UTF-8 error: byte 6 top bits not 0x80 at offset 0
85 \xed\xa0\x80
86Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 0
87 \xc0\x8f
88Failed: error -17: UTF-8 error: overlong 2-byte sequence at offset 0
89 \xe0\x80\x8f
90Failed: error -18: UTF-8 error: overlong 3-byte sequence at offset 0
91 \xf0\x80\x80\x8f
92Failed: error -19: UTF-8 error: overlong 4-byte sequence at offset 0
93 \xf8\x80\x80\x80\x8f
94Failed: error -20: UTF-8 error: overlong 5-byte sequence at offset 0
95 \xfc\x80\x80\x80\x80\x8f
96Failed: error -21: UTF-8 error: overlong 6-byte sequence at offset 0
97 \x80
98Failed: error -22: UTF-8 error: isolated byte with 0x80 bit set at offset 0
99 \xfe
100Failed: error -23: UTF-8 error: illegal byte (0xfe or 0xff) at offset 0
101 \xff
102Failed: error -23: UTF-8 error: illegal byte (0xfe or 0xff) at offset 0
103
104/badutf/utf
105\= Expect UTF-8 errors
106 XX\xfb\x80\x80\x80\x80
107Failed: error -13: UTF-8 error: 5-byte character is not allowed (RFC 3629) at offset 2
108 XX\xfd\x80\x80\x80\x80\x80
109Failed: error -14: UTF-8 error: 6-byte character is not allowed (RFC 3629) at offset 2
110 XX\xf7\xbf\xbf\xbf
111Failed: error -15: UTF-8 error: code points greater than 0x10ffff are not defined at offset 2
112
113/shortutf/utf
114\= Expect UTF-8 errors
115 XX\xdf\=ph
116Failed: error -3: UTF-8 error: 1 byte missing at end at offset 2
117 XX\xef\=ph
118Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 2
119 XX\xef\x80\=ph
120Failed: error -3: UTF-8 error: 1 byte missing at end at offset 2
121 \xf7\=ph
122Failed: error -5: UTF-8 error: 3 bytes missing at end at offset 0
123 \xf7\x80\=ph
124Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 0
125 \xf7\x80\x80\=ph
126Failed: error -3: UTF-8 error: 1 byte missing at end at offset 0
127 \xfb\=ph
128Failed: error -6: UTF-8 error: 4 bytes missing at end at offset 0
129 \xfb\x80\=ph
130Failed: error -5: UTF-8 error: 3 bytes missing at end at offset 0
131 \xfb\x80\x80\=ph
132Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 0
133 \xfb\x80\x80\x80\=ph
134Failed: error -3: UTF-8 error: 1 byte missing at end at offset 0
135 \xfd\=ph
136Failed: error -7: UTF-8 error: 5 bytes missing at end at offset 0
137 \xfd\x80\=ph
138Failed: error -6: UTF-8 error: 4 bytes missing at end at offset 0
139 \xfd\x80\x80\=ph
140Failed: error -5: UTF-8 error: 3 bytes missing at end at offset 0
141 \xfd\x80\x80\x80\=ph
142Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 0
143 \xfd\x80\x80\x80\x80\=ph
144Failed: error -3: UTF-8 error: 1 byte missing at end at offset 0
145
146/anything/utf
147\= Expect UTF-8 errors
148 X\xc0\x80
149Failed: error -17: UTF-8 error: overlong 2-byte sequence at offset 1
150 XX\xc1\x8f
151Failed: error -17: UTF-8 error: overlong 2-byte sequence at offset 2
152 XXX\xe0\x9f\x80
153Failed: error -18: UTF-8 error: overlong 3-byte sequence at offset 3
154 \xf0\x8f\x80\x80
155Failed: error -19: UTF-8 error: overlong 4-byte sequence at offset 0
156 \xf8\x87\x80\x80\x80
157Failed: error -20: UTF-8 error: overlong 5-byte sequence at offset 0
158 \xfc\x83\x80\x80\x80\x80
159Failed: error -21: UTF-8 error: overlong 6-byte sequence at offset 0
160 \xfe\x80\x80\x80\x80\x80
161Failed: error -23: UTF-8 error: illegal byte (0xfe or 0xff) at offset 0
162 \xff\x80\x80\x80\x80\x80
163Failed: error -23: UTF-8 error: illegal byte (0xfe or 0xff) at offset 0
164 \xf8\x88\x80\x80\x80
165Failed: error -13: UTF-8 error: 5-byte character is not allowed (RFC 3629) at offset 0
166 \xf9\x87\x80\x80\x80
167Failed: error -13: UTF-8 error: 5-byte character is not allowed (RFC 3629) at offset 0
168 \xfc\x84\x80\x80\x80\x80
169Failed: error -14: UTF-8 error: 6-byte character is not allowed (RFC 3629) at offset 0
170 \xfd\x83\x80\x80\x80\x80
171Failed: error -14: UTF-8 error: 6-byte character is not allowed (RFC 3629) at offset 0
172\= Expect no match
173 \xc3\x8f
174No match
175 \xe0\xaf\x80
176No match
177 \xe1\x80\x80
178No match
179 \xf0\x9f\x80\x80
180No match
181 \xf1\x8f\x80\x80
182No match
183 \xf8\x88\x80\x80\x80\=no_utf_check
184No match
185 \xf9\x87\x80\x80\x80\=no_utf_check
186No match
187 \xfc\x84\x80\x80\x80\x80\=no_utf_check
188No match
189 \xfd\x83\x80\x80\x80\x80\=no_utf_check
190No match
191
192# Similar tests with offsets
193
194/badutf/utf
195\= Expect UTF-8 errors
196 X\xdfabcd
197Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
198 X\xdfabcd\=offset=1
199Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
200\= Expect no match
201 X\xdfabcd\=offset=2
202No match
203
204/(?<=x)badutf/utf
205\= Expect UTF-8 errors
206 X\xdfabcd
207Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
208 X\xdfabcd\=offset=1
209Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
210 X\xdfabcd\=offset=2
211Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
212 X\xdfabcd\xdf\=offset=3
213Failed: error -3: UTF-8 error: 1 byte missing at end at offset 6
214\= Expect no match
215 X\xdfabcd\=offset=3
216No match
217
218/(?<=xx)badutf/utf
219\= Expect UTF-8 errors
220 X\xdfabcd
221Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
222 X\xdfabcd\=offset=1
223Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
224 X\xdfabcd\=offset=2
225Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
226 X\xdfabcd\=offset=3
227Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
228
229/(?<=xxxx)badutf/utf
230\= Expect UTF-8 errors
231 X\xdfabcd
232Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
233 X\xdfabcd\=offset=1
234Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
235 X\xdfabcd\=offset=2
236Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
237 X\xdfabcd\=offset=3
238Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
239 X\xdfabc\xdf\=offset=6
240Failed: error -3: UTF-8 error: 1 byte missing at end at offset 5
241 X\xdfabc\xdf\=offset=7
242Failed: error -33: bad offset value
243\= Expect no match
244 X\xdfabcd\=offset=6
245No match
246
247/\x{100}/IB,utf
248------------------------------------------------------------------
249 Bra
250 \x{100}
251 Ket
252 End
253------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -0700254Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100255Options: utf
256First code unit = \xc4
257Last code unit = \x80
258Subject length lower bound = 1
259
260/\x{1000}/IB,utf
261------------------------------------------------------------------
262 Bra
263 \x{1000}
264 Ket
265 End
266------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -0700267Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100268Options: utf
269First code unit = \xe1
270Last code unit = \x80
271Subject length lower bound = 1
272
273/\x{10000}/IB,utf
274------------------------------------------------------------------
275 Bra
276 \x{10000}
277 Ket
278 End
279------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -0700280Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100281Options: utf
282First code unit = \xf0
283Last code unit = \x80
284Subject length lower bound = 1
285
286/\x{100000}/IB,utf
287------------------------------------------------------------------
288 Bra
289 \x{100000}
290 Ket
291 End
292------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -0700293Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100294Options: utf
295First code unit = \xf4
296Last code unit = \x80
297Subject length lower bound = 1
298
299/\x{10ffff}/IB,utf
300------------------------------------------------------------------
301 Bra
302 \x{10ffff}
303 Ket
304 End
305------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -0700306Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100307Options: utf
308First code unit = \xf4
309Last code unit = \xbf
310Subject length lower bound = 1
311
312/[\x{ff}]/IB,utf
313------------------------------------------------------------------
314 Bra
315 \x{ff}
316 Ket
317 End
318------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -0700319Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100320Options: utf
321First code unit = \xc3
322Last code unit = \xbf
323Subject length lower bound = 1
324
325/[\x{100}]/IB,utf
326------------------------------------------------------------------
327 Bra
328 \x{100}
329 Ket
330 End
331------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -0700332Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100333Options: utf
334First code unit = \xc4
335Last code unit = \x80
336Subject length lower bound = 1
337
338/\x80/IB,utf
339------------------------------------------------------------------
340 Bra
341 \x{80}
342 Ket
343 End
344------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -0700345Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100346Options: utf
347First code unit = \xc2
348Last code unit = \x80
349Subject length lower bound = 1
350
351/\xff/IB,utf
352------------------------------------------------------------------
353 Bra
354 \x{ff}
355 Ket
356 End
357------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -0700358Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100359Options: utf
360First code unit = \xc3
361Last code unit = \xbf
362Subject length lower bound = 1
363
364/\x{D55c}\x{ad6d}\x{C5B4}/IB,utf
365------------------------------------------------------------------
366 Bra
367 \x{d55c}\x{ad6d}\x{c5b4}
368 Ket
369 End
370------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -0700371Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100372Options: utf
373First code unit = \xed
374Last code unit = \xb4
375Subject length lower bound = 3
376 \x{D55c}\x{ad6d}\x{C5B4}
377 0: \x{d55c}\x{ad6d}\x{c5b4}
378
379/\x{65e5}\x{672c}\x{8a9e}/IB,utf
380------------------------------------------------------------------
381 Bra
382 \x{65e5}\x{672c}\x{8a9e}
383 Ket
384 End
385------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -0700386Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100387Options: utf
388First code unit = \xe6
389Last code unit = \x9e
390Subject length lower bound = 3
391 \x{65e5}\x{672c}\x{8a9e}
392 0: \x{65e5}\x{672c}\x{8a9e}
393
394/\x{80}/IB,utf
395------------------------------------------------------------------
396 Bra
397 \x{80}
398 Ket
399 End
400------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -0700401Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100402Options: utf
403First code unit = \xc2
404Last code unit = \x80
405Subject length lower bound = 1
406
407/\x{084}/IB,utf
408------------------------------------------------------------------
409 Bra
410 \x{84}
411 Ket
412 End
413------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -0700414Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100415Options: utf
416First code unit = \xc2
417Last code unit = \x84
418Subject length lower bound = 1
419
420/\x{104}/IB,utf
421------------------------------------------------------------------
422 Bra
423 \x{104}
424 Ket
425 End
426------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -0700427Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100428Options: utf
429First code unit = \xc4
430Last code unit = \x84
431Subject length lower bound = 1
432
433/\x{861}/IB,utf
434------------------------------------------------------------------
435 Bra
436 \x{861}
437 Ket
438 End
439------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -0700440Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100441Options: utf
442First code unit = \xe0
443Last code unit = \xa1
444Subject length lower bound = 1
445
446/\x{212ab}/IB,utf
447------------------------------------------------------------------
448 Bra
449 \x{212ab}
450 Ket
451 End
452------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -0700453Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100454Options: utf
455First code unit = \xf0
456Last code unit = \xab
457Subject length lower bound = 1
458
459/[^ab\xC0-\xF0]/IB,utf
460------------------------------------------------------------------
461 Bra
462 [\x00-`c-\xbf\xf1-\xff] (neg)
463 Ket
464 End
465------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -0700466Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100467Options: utf
468Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a
469 \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19
470 \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4
471 5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y
472 Z [ \ ] ^ _ ` c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f
473 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0
474 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf
475 \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee
476 \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd
477 \xfe \xff
478Subject length lower bound = 1
479 \x{f1}
480 0: \x{f1}
481 \x{bf}
482 0: \x{bf}
483 \x{100}
484 0: \x{100}
485 \x{1000}
486 0: \x{1000}
487\= Expect no match
488 \x{c0}
489No match
490 \x{f0}
491No match
492
493/Ā{3,4}/IB,utf
494------------------------------------------------------------------
495 Bra
496 \x{100}{3}
497 \x{100}?+
498 Ket
499 End
500------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -0700501Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100502Options: utf
503First code unit = \xc4
504Last code unit = \x80
505Subject length lower bound = 3
506 \x{100}\x{100}\x{100}\x{100\x{100}
507 0: \x{100}\x{100}\x{100}
508
509/(\x{100}+|x)/IB,utf
510------------------------------------------------------------------
511 Bra
512 CBra 1
513 \x{100}++
514 Alt
515 x
516 Ket
517 Ket
518 End
519------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -0700520Capture group count = 1
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100521Options: utf
522Starting code units: x \xc4
523Subject length lower bound = 1
524
525/(\x{100}*a|x)/IB,utf
526------------------------------------------------------------------
527 Bra
528 CBra 1
529 \x{100}*+
530 a
531 Alt
532 x
533 Ket
534 Ket
535 End
536------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -0700537Capture group count = 1
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100538Options: utf
539Starting code units: a x \xc4
540Subject length lower bound = 1
541
542/(\x{100}{0,2}a|x)/IB,utf
543------------------------------------------------------------------
544 Bra
545 CBra 1
546 \x{100}{0,2}+
547 a
548 Alt
549 x
550 Ket
551 Ket
552 End
553------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -0700554Capture group count = 1
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100555Options: utf
556Starting code units: a x \xc4
557Subject length lower bound = 1
558
559/(\x{100}{1,2}a|x)/IB,utf
560------------------------------------------------------------------
561 Bra
562 CBra 1
563 \x{100}
564 \x{100}{0,1}+
565 a
566 Alt
567 x
568 Ket
569 Ket
570 End
571------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -0700572Capture group count = 1
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100573Options: utf
574Starting code units: x \xc4
575Subject length lower bound = 1
576
577/\x{100}/IB,utf
578------------------------------------------------------------------
579 Bra
580 \x{100}
581 Ket
582 End
583------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -0700584Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100585Options: utf
586First code unit = \xc4
587Last code unit = \x80
588Subject length lower bound = 1
589
590/a\x{100}\x{101}*/IB,utf
591------------------------------------------------------------------
592 Bra
593 a\x{100}
594 \x{101}*+
595 Ket
596 End
597------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -0700598Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100599Options: utf
600First code unit = 'a'
601Last code unit = \x80
602Subject length lower bound = 2
603
604/a\x{100}\x{101}+/IB,utf
605------------------------------------------------------------------
606 Bra
607 a\x{100}
608 \x{101}++
609 Ket
610 End
611------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -0700612Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100613Options: utf
614First code unit = 'a'
615Last code unit = \x81
616Subject length lower bound = 3
617
618/[^\x{c4}]/IB
619------------------------------------------------------------------
620 Bra
621 [^\x{c4}]
622 Ket
623 End
624------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -0700625Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100626Subject length lower bound = 1
627
628/[\x{100}]/IB,utf
629------------------------------------------------------------------
630 Bra
631 \x{100}
632 Ket
633 End
634------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -0700635Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100636Options: utf
637First code unit = \xc4
638Last code unit = \x80
639Subject length lower bound = 1
640 \x{100}
641 0: \x{100}
642 Z\x{100}
643 0: \x{100}
644 \x{100}Z
645 0: \x{100}
646
647/[\xff]/IB,utf
648------------------------------------------------------------------
649 Bra
650 \x{ff}
651 Ket
652 End
653------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -0700654Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100655Options: utf
656First code unit = \xc3
657Last code unit = \xbf
658Subject length lower bound = 1
659 >\x{ff}<
660 0: \x{ff}
661
662/[^\xff]/IB,utf
663------------------------------------------------------------------
664 Bra
665 [^\x{ff}]
666 Ket
667 End
668------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -0700669Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100670Options: utf
671Subject length lower bound = 1
672
673/\x{100}abc(xyz(?1))/IB,utf
674------------------------------------------------------------------
675 Bra
676 \x{100}abc
677 CBra 1
678 xyz
679 Recurse
680 Ket
681 Ket
682 End
683------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -0700684Capture group count = 1
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100685Options: utf
686First code unit = \xc4
687Last code unit = 'z'
688Subject length lower bound = 7
689
690/\777/I,utf
Elliott Hughes0c26e192019-08-07 12:24:46 -0700691Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100692Options: utf
693First code unit = \xc7
694Last code unit = \xbf
695Subject length lower bound = 1
696 \x{1ff}
697 0: \x{1ff}
698 \777
699 0: \x{1ff}
700
701/\x{100}+\x{200}/IB,utf
702------------------------------------------------------------------
703 Bra
704 \x{100}++
705 \x{200}
706 Ket
707 End
708------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -0700709Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100710Options: utf
711First code unit = \xc4
712Last code unit = \x80
713Subject length lower bound = 2
714
715/\x{100}+X/IB,utf
716------------------------------------------------------------------
717 Bra
718 \x{100}++
719 X
720 Ket
721 End
722------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -0700723Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100724Options: utf
725First code unit = \xc4
726Last code unit = 'X'
727Subject length lower bound = 2
728
729/^[\QĀ\E-\QŐ\E/B,utf
730Failed: error 106 at offset 15: missing terminating ] for character class
731
732# This tests the stricter UTF-8 check according to RFC 3629.
733
734/X/utf
735\= Expect UTF-8 errors
736 \x{d800}
737Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 0
738 \x{da00}
739Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 0
740 \x{dfff}
741Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 0
742 \x{110000}
743Failed: error -15: UTF-8 error: code points greater than 0x10ffff are not defined at offset 0
744 \x{2000000}
745Failed: error -13: UTF-8 error: 5-byte character is not allowed (RFC 3629) at offset 0
746 \x{7fffffff}
747Failed: error -14: UTF-8 error: 6-byte character is not allowed (RFC 3629) at offset 0
748\= Expect no match
749 \x{d800}\=no_utf_check
750No match
751 \x{da00}\=no_utf_check
752No match
753 \x{dfff}\=no_utf_check
754No match
755 \x{110000}\=no_utf_check
756No match
757 \x{2000000}\=no_utf_check
758No match
759 \x{7fffffff}\=no_utf_check
760No match
761
762/(*UTF8)\x{1234}/
763 abcd\x{1234}pqr
764 0: \x{1234}
765
766/(*CRLF)(*UTF)(*BSR_UNICODE)a\Rb/I
Elliott Hughes0c26e192019-08-07 12:24:46 -0700767Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100768Compile options: <none>
769Overall options: utf
770\R matches any Unicode newline
771Forced newline is CRLF
772First code unit = 'a'
773Last code unit = 'b'
774Subject length lower bound = 3
775
776/\h/I,utf
Elliott Hughes0c26e192019-08-07 12:24:46 -0700777Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100778Options: utf
779Starting code units: \x09 \x20 \xc2 \xe1 \xe2 \xe3
780Subject length lower bound = 1
781 ABC\x{09}
782 0: \x{09}
783 ABC\x{20}
784 0:
785 ABC\x{a0}
786 0: \x{a0}
787 ABC\x{1680}
788 0: \x{1680}
789 ABC\x{180e}
790 0: \x{180e}
791 ABC\x{2000}
792 0: \x{2000}
793 ABC\x{202f}
794 0: \x{202f}
795 ABC\x{205f}
796 0: \x{205f}
797 ABC\x{3000}
798 0: \x{3000}
799
800/\v/I,utf
Elliott Hughes0c26e192019-08-07 12:24:46 -0700801Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100802Options: utf
803Starting code units: \x0a \x0b \x0c \x0d \xc2 \xe2
804Subject length lower bound = 1
805 ABC\x{0a}
806 0: \x{0a}
807 ABC\x{0b}
808 0: \x{0b}
809 ABC\x{0c}
810 0: \x{0c}
811 ABC\x{0d}
812 0: \x{0d}
813 ABC\x{85}
814 0: \x{85}
815 ABC\x{2028}
816 0: \x{2028}
817
818/\h*A/I,utf
Elliott Hughes0c26e192019-08-07 12:24:46 -0700819Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100820Options: utf
821Starting code units: \x09 \x20 A \xc2 \xe1 \xe2 \xe3
822Last code unit = 'A'
823Subject length lower bound = 1
824 CDBABC
825 0: A
826
827/\v+A/I,utf
Elliott Hughes0c26e192019-08-07 12:24:46 -0700828Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100829Options: utf
830Starting code units: \x0a \x0b \x0c \x0d \xc2 \xe2
831Last code unit = 'A'
832Subject length lower bound = 2
833
834/\s?xxx\s/I,utf
Elliott Hughes0c26e192019-08-07 12:24:46 -0700835Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100836Options: utf
837Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 x
838Last code unit = 'x'
839Subject length lower bound = 4
840
841/\sxxx\s/I,utf,tables=2
Elliott Hughes0c26e192019-08-07 12:24:46 -0700842Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100843Options: utf
844Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 \xc2
845Last code unit = 'x'
846Subject length lower bound = 5
847 AB\x{85}xxx\x{a0}XYZ
848 0: \x{85}xxx\x{a0}
849 AB\x{a0}xxx\x{85}XYZ
850 0: \x{a0}xxx\x{85}
851
852/\S \S/I,utf,tables=2
Elliott Hughes0c26e192019-08-07 12:24:46 -0700853Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100854Options: utf
855Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0e \x0f
856 \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d \x1e
857 \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ A B C
858 D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h
859 i j k l m n o p q r s t u v w x y z { | } ~ \x7f \xc0 \xc1 \xc2 \xc3 \xc4
860 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3
861 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2
862 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1
863 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff
864Last code unit = ' '
865Subject length lower bound = 3
866 \x{a2} \x{84}
867 0: \x{a2} \x{84}
868 A Z
869 0: A Z
870
871/a+/utf
872 a\x{123}aa\=offset=1
873 0: aa
874 a\x{123}aa\=offset=3
875 0: aa
876 a\x{123}aa\=offset=4
877 0: a
878\= Expect bad offset value
879 a\x{123}aa\=offset=6
880Failed: error -33: bad offset value
881\= Expect bad UTF-8 offset
882 a\x{123}aa\=offset=2
883Error -36 (bad UTF-8 offset)
884\= Expect no match
885 a\x{123}aa\=offset=5
886No match
887
888/\x{1234}+/Ii,utf
Elliott Hughes0c26e192019-08-07 12:24:46 -0700889Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100890Options: caseless utf
891Starting code units: \xe1
892Subject length lower bound = 1
893
894/\x{1234}+?/Ii,utf
Elliott Hughes0c26e192019-08-07 12:24:46 -0700895Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100896Options: caseless utf
897Starting code units: \xe1
898Subject length lower bound = 1
899
900/\x{1234}++/Ii,utf
Elliott Hughes0c26e192019-08-07 12:24:46 -0700901Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100902Options: caseless utf
903Starting code units: \xe1
904Subject length lower bound = 1
905
906/\x{1234}{2}/Ii,utf
Elliott Hughes0c26e192019-08-07 12:24:46 -0700907Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100908Options: caseless utf
909Starting code units: \xe1
910Subject length lower bound = 2
911
912/[^\x{c4}]/IB,utf
913------------------------------------------------------------------
914 Bra
915 [^\x{c4}]
916 Ket
917 End
918------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -0700919Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100920Options: utf
921Subject length lower bound = 1
922
923/X+\x{200}/IB,utf
924------------------------------------------------------------------
925 Bra
926 X++
927 \x{200}
928 Ket
929 End
930------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -0700931Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100932Options: utf
933First code unit = 'X'
934Last code unit = \x80
935Subject length lower bound = 2
936
937/\R/I,utf
Elliott Hughes0c26e192019-08-07 12:24:46 -0700938Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100939Options: utf
940Starting code units: \x0a \x0b \x0c \x0d \xc2 \xe2
941Subject length lower bound = 1
942
943/\777/IB,utf
944------------------------------------------------------------------
945 Bra
946 \x{1ff}
947 Ket
948 End
949------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -0700950Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100951Options: utf
952First code unit = \xc7
953Last code unit = \xbf
954Subject length lower bound = 1
955
956/\w+\x{C4}/B,utf
957------------------------------------------------------------------
958 Bra
959 \w++
960 \x{c4}
961 Ket
962 End
963------------------------------------------------------------------
964 a\x{C4}\x{C4}
965 0: a\x{c4}
966
967/\w+\x{C4}/B,utf,tables=2
968------------------------------------------------------------------
969 Bra
970 \w+
971 \x{c4}
972 Ket
973 End
974------------------------------------------------------------------
975 a\x{C4}\x{C4}
976 0: a\x{c4}\x{c4}
977
978/\W+\x{C4}/B,utf
979------------------------------------------------------------------
980 Bra
981 \W+
982 \x{c4}
983 Ket
984 End
985------------------------------------------------------------------
986 !\x{C4}
987 0: !\x{c4}
988
989/\W+\x{C4}/B,utf,tables=2
990------------------------------------------------------------------
991 Bra
992 \W++
993 \x{c4}
994 Ket
995 End
996------------------------------------------------------------------
997 !\x{C4}
998 0: !\x{c4}
999
1000/\W+\x{A1}/B,utf
1001------------------------------------------------------------------
1002 Bra
1003 \W+
1004 \x{a1}
1005 Ket
1006 End
1007------------------------------------------------------------------
1008 !\x{A1}
1009 0: !\x{a1}
1010
1011/\W+\x{A1}/B,utf,tables=2
1012------------------------------------------------------------------
1013 Bra
1014 \W+
1015 \x{a1}
1016 Ket
1017 End
1018------------------------------------------------------------------
1019 !\x{A1}
1020 0: !\x{a1}
1021
1022/X\s+\x{A0}/B,utf
1023------------------------------------------------------------------
1024 Bra
1025 X
1026 \s++
1027 \x{a0}
1028 Ket
1029 End
1030------------------------------------------------------------------
1031 X\x20\x{A0}\x{A0}
1032 0: X \x{a0}
1033
1034/X\s+\x{A0}/B,utf,tables=2
1035------------------------------------------------------------------
1036 Bra
1037 X
1038 \s+
1039 \x{a0}
1040 Ket
1041 End
1042------------------------------------------------------------------
1043 X\x20\x{A0}\x{A0}
1044 0: X \x{a0}\x{a0}
1045
1046/\S+\x{A0}/B,utf
1047------------------------------------------------------------------
1048 Bra
1049 \S+
1050 \x{a0}
1051 Ket
1052 End
1053------------------------------------------------------------------
1054 X\x{A0}\x{A0}
1055 0: X\x{a0}\x{a0}
1056
1057/\S+\x{A0}/B,utf,tables=2
1058------------------------------------------------------------------
1059 Bra
1060 \S++
1061 \x{a0}
1062 Ket
1063 End
1064------------------------------------------------------------------
1065 X\x{A0}\x{A0}
1066 0: X\x{a0}
1067
1068/\x{a0}+\s!/B,utf
1069------------------------------------------------------------------
1070 Bra
1071 \x{a0}++
1072 \s
1073 !
1074 Ket
1075 End
1076------------------------------------------------------------------
1077 \x{a0}\x20!
1078 0: \x{a0} !
1079
1080/\x{a0}+\s!/B,utf,tables=2
1081------------------------------------------------------------------
1082 Bra
1083 \x{a0}+
1084 \s
1085 !
1086 Ket
1087 End
1088------------------------------------------------------------------
1089 \x{a0}\x20!
1090 0: \x{a0} !
1091
1092/A/utf
1093 \x{ff000041}
1094** Character \x{ff000041} is greater than 0x7fffffff and so cannot be converted to UTF-8
1095 \x{7f000041}
1096Failed: error -14: UTF-8 error: 6-byte character is not allowed (RFC 3629) at offset 0
1097
1098/(*UTF8)abc/never_utf
1099Failed: error 174 at offset 7: using UTF is disabled by the application
1100
1101/abc/utf,never_utf
1102Failed: error 174 at offset 0: using UTF is disabled by the application
1103
1104/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/IBi,utf
1105------------------------------------------------------------------
1106 Bra
1107 /i A\x{391}\x{10427}\x{ff3a}\x{1fb0}
1108 Ket
1109 End
1110------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -07001111Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +01001112Options: caseless utf
1113First code unit = 'A' (caseless)
1114Subject length lower bound = 5
1115
1116/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/IB,utf
1117------------------------------------------------------------------
1118 Bra
1119 A\x{391}\x{10427}\x{ff3a}\x{1fb0}
1120 Ket
1121 End
1122------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -07001123Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +01001124Options: utf
1125First code unit = 'A'
1126Last code unit = \xb0
1127Subject length lower bound = 5
1128
1129/AB\x{1fb0}/IB,utf
1130------------------------------------------------------------------
1131 Bra
1132 AB\x{1fb0}
1133 Ket
1134 End
1135------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -07001136Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +01001137Options: utf
1138First code unit = 'A'
1139Last code unit = \xb0
1140Subject length lower bound = 3
1141
1142/AB\x{1fb0}/IBi,utf
1143------------------------------------------------------------------
1144 Bra
1145 /i AB\x{1fb0}
1146 Ket
1147 End
1148------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -07001149Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +01001150Options: caseless utf
1151First code unit = 'A' (caseless)
1152Last code unit = 'B' (caseless)
1153Subject length lower bound = 3
1154
1155/\x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}/Ii,utf
Elliott Hughes0c26e192019-08-07 12:24:46 -07001156Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +01001157Options: caseless utf
1158Starting code units: \xd0 \xd1
1159Subject length lower bound = 17
1160 \x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}
1161 0: \x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}
1162 \x{451}\x{440}\x{441}\x{442}\x{443}\x{444}\x{445}\x{446}\x{447}\x{448}\x{449}\x{44a}\x{44b}\x{44c}\x{44d}\x{44e}\x{44f}
1163 0: \x{451}\x{440}\x{441}\x{442}\x{443}\x{444}\x{445}\x{446}\x{447}\x{448}\x{449}\x{44a}\x{44b}\x{44c}\x{44d}\x{44e}\x{44f}
1164
1165/[â±¥]/Bi,utf
1166------------------------------------------------------------------
1167 Bra
1168 /i \x{2c65}
1169 Ket
1170 End
1171------------------------------------------------------------------
1172
1173/[^â±¥]/Bi,utf
1174------------------------------------------------------------------
1175 Bra
1176 /i [^\x{2c65}]
1177 Ket
1178 End
1179------------------------------------------------------------------
1180
1181/\h/I
Elliott Hughes0c26e192019-08-07 12:24:46 -07001182Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +01001183Starting code units: \x09 \x20 \xa0
1184Subject length lower bound = 1
1185
1186/\v/I
Elliott Hughes0c26e192019-08-07 12:24:46 -07001187Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +01001188Starting code units: \x0a \x0b \x0c \x0d \x85
1189Subject length lower bound = 1
1190
1191/\R/I
Elliott Hughes0c26e192019-08-07 12:24:46 -07001192Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +01001193Starting code units: \x0a \x0b \x0c \x0d \x85
1194Subject length lower bound = 1
1195
1196/[[:blank:]]/B,ucp
1197------------------------------------------------------------------
1198 Bra
1199 [\x09 \xa0]
1200 Ket
1201 End
1202------------------------------------------------------------------
1203
1204/\x{212a}+/Ii,utf
Elliott Hughes0c26e192019-08-07 12:24:46 -07001205Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +01001206Options: caseless utf
1207Starting code units: K k \xe2
1208Subject length lower bound = 1
1209 KKkk\x{212a}
1210 0: KKkk\x{212a}
1211
1212/s+/Ii,utf
Elliott Hughes0c26e192019-08-07 12:24:46 -07001213Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +01001214Options: caseless utf
1215Starting code units: S s \xc5
1216Subject length lower bound = 1
1217 SSss\x{17f}
1218 0: SSss\x{17f}
1219
1220/\x{100}*A/IB,utf
1221------------------------------------------------------------------
1222 Bra
1223 \x{100}*+
1224 A
1225 Ket
1226 End
1227------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -07001228Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +01001229Options: utf
1230Starting code units: A \xc4
1231Last code unit = 'A'
1232Subject length lower bound = 1
1233 A
1234 0: A
1235
1236/\x{100}*\d(?R)/IB,utf
1237------------------------------------------------------------------
1238 Bra
1239 \x{100}*+
1240 \d
1241 Recurse
1242 Ket
1243 End
1244------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -07001245Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +01001246Options: utf
1247Starting code units: 0 1 2 3 4 5 6 7 8 9 \xc4
1248Subject length lower bound = 1
1249
1250/[Z\x{100}]/IB,utf
1251------------------------------------------------------------------
1252 Bra
1253 [Z\x{100}]
1254 Ket
1255 End
1256------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -07001257Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +01001258Options: utf
Elliott Hughes2dbd7d22020-06-03 14:32:37 -07001259Starting code units: Z \xc4
Janis Danisevskis112c9cc2016-03-31 13:35:25 +01001260Subject length lower bound = 1
1261 Z\x{100}
1262 0: Z
1263 \x{100}
1264 0: \x{100}
1265 \x{100}Z
1266 0: \x{100}
1267
1268/[z-\x{100}]/IB,utf
1269------------------------------------------------------------------
1270 Bra
1271 [z-\xff\x{100}]
1272 Ket
1273 End
1274------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -07001275Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +01001276Options: utf
Elliott Hughes2dbd7d22020-06-03 14:32:37 -07001277Starting code units: z { | } ~ \x7f \xc2 \xc3 \xc4
Janis Danisevskis112c9cc2016-03-31 13:35:25 +01001278Subject length lower bound = 1
1279
1280/[z\Qa-d]Ā\E]/IB,utf
1281------------------------------------------------------------------
1282 Bra
1283 [\-\]adz\x{100}]
1284 Ket
1285 End
1286------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -07001287Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +01001288Options: utf
Elliott Hughes2dbd7d22020-06-03 14:32:37 -07001289Starting code units: - ] a d z \xc4
Janis Danisevskis112c9cc2016-03-31 13:35:25 +01001290Subject length lower bound = 1
1291 \x{100}
1292 0: \x{100}
1293 Ā
1294 0: \x{100}
1295
1296/[ab\x{100}]abc(xyz(?1))/IB,utf
1297------------------------------------------------------------------
1298 Bra
1299 [ab\x{100}]
1300 abc
1301 CBra 1
1302 xyz
1303 Recurse
1304 Ket
1305 Ket
1306 End
1307------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -07001308Capture group count = 1
Janis Danisevskis112c9cc2016-03-31 13:35:25 +01001309Options: utf
Elliott Hughes2dbd7d22020-06-03 14:32:37 -07001310Starting code units: a b \xc4
Janis Danisevskis112c9cc2016-03-31 13:35:25 +01001311Last code unit = 'z'
1312Subject length lower bound = 7
1313
1314/\x{100}*\s/IB,utf
1315------------------------------------------------------------------
1316 Bra
1317 \x{100}*+
1318 \s
1319 Ket
1320 End
1321------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -07001322Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +01001323Options: utf
1324Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 \xc4
1325Subject length lower bound = 1
1326
1327/\x{100}*\d/IB,utf
1328------------------------------------------------------------------
1329 Bra
1330 \x{100}*+
1331 \d
1332 Ket
1333 End
1334------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -07001335Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +01001336Options: utf
1337Starting code units: 0 1 2 3 4 5 6 7 8 9 \xc4
1338Subject length lower bound = 1
1339
1340/\x{100}*\w/IB,utf
1341------------------------------------------------------------------
1342 Bra
1343 \x{100}*+
1344 \w
1345 Ket
1346 End
1347------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -07001348Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +01001349Options: utf
1350Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P
1351 Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z
1352 \xc4
1353Subject length lower bound = 1
1354
1355/\x{100}*\D/IB,utf
1356------------------------------------------------------------------
1357 Bra
1358 \x{100}*
1359 \D
1360 Ket
1361 End
1362------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -07001363Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +01001364Options: utf
1365Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a
1366 \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19
1367 \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / : ; < = >
1368 ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c
1369 d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \xc0 \xc1 \xc2
1370 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1
1371 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0
1372 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef
1373 \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe
1374 \xff
1375Subject length lower bound = 1
1376
1377/\x{100}*\S/IB,utf
1378------------------------------------------------------------------
1379 Bra
1380 \x{100}*
1381 \S
1382 Ket
1383 End
1384------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -07001385Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +01001386Options: utf
1387Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0e \x0f
1388 \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d \x1e
1389 \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ A B C
1390 D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h
1391 i j k l m n o p q r s t u v w x y z { | } ~ \x7f \xc0 \xc1 \xc2 \xc3 \xc4
1392 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3
1393 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2
1394 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1
1395 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff
1396Subject length lower bound = 1
1397
1398/\x{100}*\W/IB,utf
1399------------------------------------------------------------------
1400 Bra
1401 \x{100}*
1402 \W
1403 Ket
1404 End
1405------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -07001406Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +01001407Options: utf
1408Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a
1409 \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19
1410 \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / : ; < = >
1411 ? @ [ \ ] ^ ` { | } ~ \x7f \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9
1412 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8
1413 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7
1414 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6
1415 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff
1416Subject length lower bound = 1
1417
1418/[\x{105}-\x{109}]/IBi,utf
1419------------------------------------------------------------------
1420 Bra
1421 [\x{104}-\x{109}]
1422 Ket
1423 End
1424------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -07001425Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +01001426Options: caseless utf
Elliott Hughes2dbd7d22020-06-03 14:32:37 -07001427Starting code units: \xc4
Janis Danisevskis112c9cc2016-03-31 13:35:25 +01001428Subject length lower bound = 1
1429 \x{104}
1430 0: \x{104}
1431 \x{105}
1432 0: \x{105}
1433 \x{109}
1434 0: \x{109}
1435\= Expect no match
1436 \x{100}
1437No match
1438 \x{10a}
1439No match
1440
1441/[z-\x{100}]/IBi,utf
1442------------------------------------------------------------------
1443 Bra
1444 [Zz-\xff\x{39c}\x{3bc}\x{212b}\x{1e9e}\x{212b}\x{178}\x{100}-\x{101}]
1445 Ket
1446 End
1447------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -07001448Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +01001449Options: caseless utf
Elliott Hughes2dbd7d22020-06-03 14:32:37 -07001450Starting code units: Z z { | } ~ \x7f \xc2 \xc3 \xc4 \xc5 \xce \xe1 \xe2
Janis Danisevskis112c9cc2016-03-31 13:35:25 +01001451Subject length lower bound = 1
1452 Z
1453 0: Z
1454 z
1455 0: z
1456 \x{39c}
1457 0: \x{39c}
1458 \x{178}
1459 0: \x{178}
1460 |
1461 0: |
1462 \x{80}
1463 0: \x{80}
1464 \x{ff}
1465 0: \x{ff}
1466 \x{100}
1467 0: \x{100}
1468 \x{101}
1469 0: \x{101}
1470\= Expect no match
1471 \x{102}
1472No match
1473 Y
1474No match
1475 y
1476No match
1477
1478/[z-\x{100}]/IBi,utf
1479------------------------------------------------------------------
1480 Bra
1481 [Zz-\xff\x{39c}\x{3bc}\x{212b}\x{1e9e}\x{212b}\x{178}\x{100}-\x{101}]
1482 Ket
1483 End
1484------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -07001485Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +01001486Options: caseless utf
Elliott Hughes2dbd7d22020-06-03 14:32:37 -07001487Starting code units: Z z { | } ~ \x7f \xc2 \xc3 \xc4 \xc5 \xce \xe1 \xe2
Janis Danisevskis112c9cc2016-03-31 13:35:25 +01001488Subject length lower bound = 1
1489
1490/\x{3a3}B/IBi,utf
1491------------------------------------------------------------------
1492 Bra
1493 clist 03a3 03c2 03c3
1494 /i B
1495 Ket
1496 End
1497------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -07001498Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +01001499Options: caseless utf
1500Starting code units: \xce \xcf
1501Last code unit = 'B' (caseless)
1502Subject length lower bound = 2
1503
1504/abc/utf,replace=Ã
1505 abc
1506Failed: error -3: UTF-8 error: 1 byte missing at end
1507
1508/(?<=(a)(?-1))x/I,utf
Elliott Hughes0c26e192019-08-07 12:24:46 -07001509Capture group count = 1
Janis Danisevskis112c9cc2016-03-31 13:35:25 +01001510Max lookbehind = 2
1511Options: utf
1512First code unit = 'x'
1513Subject length lower bound = 1
1514 a\x80zx\=offset=3
1515Failed: error -22: UTF-8 error: isolated byte with 0x80 bit set at offset 1
1516
Elliott Hughes9bc971b2018-07-27 13:23:14 -07001517/[\W\p{Any}]/B
1518------------------------------------------------------------------
1519 Bra
1520 [\x00-/:-@[-^`{-\xff\p{Any}]
1521 Ket
1522 End
1523------------------------------------------------------------------
1524 abc
1525 0: a
1526 123
1527 0: 1
1528
1529/[\W\pL]/B
1530------------------------------------------------------------------
1531 Bra
1532 [\x00-/:-@[-^`{-\xff\p{L}]
1533 Ket
1534 End
1535------------------------------------------------------------------
1536 abc
1537 0: a
1538\= Expect no match
1539 123
1540No match
1541
1542/(*:*++++++++++++''''''''''''''''''''+''+++'+++x+++++++++++++++++++++++++++++++++++(++++++++++++++++++++:++++++%++:''''''''''''''''''''''''+++++++++++++++++++++++++++++++++++++++++++++++++++++-++++++++k+++++++''''+++'+++++++++++++++++++++++''''++++++++++++':Æ¿)/utf
1543Failed: error 176 at offset 259: name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)
1544
1545/[\s[:^ascii:]]/B,ucp
1546------------------------------------------------------------------
1547 Bra
1548 [\x80-\xff\p{Xsp}]
1549 Ket
1550 End
1551------------------------------------------------------------------
1552
1553# A special extra option allows excaped surrogate code points in 8-bit mode,
1554# but subjects containing them must not be UTF-checked.
1555
1556/\x{d800}/I,utf,allow_surrogate_escapes
Elliott Hughes0c26e192019-08-07 12:24:46 -07001557Capture group count = 0
Elliott Hughes9bc971b2018-07-27 13:23:14 -07001558Options: utf
1559Extra options: allow_surrogate_escapes
1560First code unit = \xed
1561Last code unit = \x80
1562Subject length lower bound = 1
1563 \x{d800}\=no_utf_check
1564 0: \x{d800}
1565
1566/\udfff\o{157401}/utf,alt_bsux,allow_surrogate_escapes
1567 \x{dfff}\x{df01}\=no_utf_check
1568 0: \x{dfff}\x{df01}
1569
1570# This has different starting code units in 8-bit mode.
1571
1572/^[^ab]/IB,utf
1573------------------------------------------------------------------
1574 Bra
1575 ^
1576 [\x00-`c-\xff] (neg)
1577 Ket
1578 End
1579------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -07001580Capture group count = 0
Elliott Hughes9bc971b2018-07-27 13:23:14 -07001581Compile options: utf
1582Overall options: anchored utf
1583Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a
1584 \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19
1585 \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4
1586 5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y
1587 Z [ \ ] ^ _ ` c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f
1588 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0
1589 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf
1590 \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee
1591 \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd
1592 \xfe \xff
1593Subject length lower bound = 1
1594 c
1595 0: c
1596 \x{ff}
1597 0: \x{ff}
1598 \x{100}
1599 0: \x{100}
1600\= Expect no match
1601 aaa
1602No match
Elliott Hughes0c26e192019-08-07 12:24:46 -07001603
1604# Offsets are different in 8-bit mode.
1605
1606/(?<=abc)(|def)/g,utf,replace=<$0>,substitute_callout
1607 123abcáyzabcdef789abcሴqr
1608 1(2) Old 6 6 "" New 6 8 "<>"
1609 2(2) Old 13 13 "" New 15 17 "<>"
1610 3(2) Old 13 16 "def" New 17 22 "<def>"
1611 4(2) Old 22 22 "" New 28 30 "<>"
1612 4: 123abc<>\x{e1}yzabc<><def>789abc<>\x{1234}qr
1613
1614# Check name length with non-ASCII characters
1615
1616/(?'ABáC678901234567890123456789012'...)/utf
1617
1618/(?'ABáC6789012345678901234567890123'...)/utf
1619Failed: error 148 at offset 36: subpattern name is too long (maximum 32 code units)
1620
1621/(?'ABZC6789012345678901234567890123'...)/utf
1622
1623/(?(n/utf
1624Failed: error 142 at offset 4: syntax error in subpattern name (missing terminator?)
1625
1626/(?(á/utf
1627Failed: error 142 at offset 5: syntax error in subpattern name (missing terminator?)
Elliott Hughes9bc971b2018-07-27 13:23:14 -07001628
Elliott Hughes2dbd7d22020-06-03 14:32:37 -07001629# Invalid UTF-8 tests
1630
1631/.../g,match_invalid_utf
1632 abcd\x80wxzy\x80pqrs
1633 0: abc
1634 0: wxz
1635 0: pqr
1636 abcd\x{80}wxzy\x80pqrs
1637 0: abc
1638 0: d\x{80}w
1639 0: xzy
1640 0: pqr
1641
1642/abc/match_invalid_utf
1643 ab\x80ab\=ph
1644Partial match: ab
1645\= Expect no match
1646 ab\x80cdef\=ph
1647No match
1648
1649/ab$/match_invalid_utf
1650 ab\x80cdeab
1651 0: ab
1652\= Expect no match
1653 ab\x80cde
1654No match
1655
1656/.../g,match_invalid_utf
1657 abcd\x{80}wxzy\x80pqrs
1658 0: abc
1659 0: d\x{80}w
1660 0: xzy
1661 0: pqr
1662
1663/(?<=x)../g,match_invalid_utf
1664 abcd\x{80}wxzy\x80pqrs
1665 0: zy
1666 abcd\x{80}wxzy\x80xpqrs
1667 0: zy
1668 0: pq
1669
1670/X$/match_invalid_utf
1671\= Expect no match
1672 X\xc4
1673No match
1674
1675/(?<=..)X/match_invalid_utf,aftertext
1676 AB\x80AQXYZ
1677 0: X
1678 0+ YZ
1679 AB\x80AQXYZ\=offset=5
1680 0: X
1681 0+ YZ
1682 AB\x80\x80AXYZXC\=offset=5
1683 0: X
1684 0+ C
1685\= Expect no match
1686 AB\x80XYZ
1687No match
1688 AB\x80XYZ\=offset=3
1689No match
1690 AB\xfeXYZ
1691No match
1692 AB\xffXYZ\=offset=3
1693No match
1694 AB\x80AXYZ
1695No match
1696 AB\x80AXYZ\=offset=4
1697No match
1698 AB\x80\x80AXYZ\=offset=5
1699No match
1700
1701/.../match_invalid_utf
1702 AB\xc4CCC
1703 0: CCC
1704\= Expect no match
1705 A\x{d800}B
1706No match
1707 A\x{110000}B
1708No match
1709 A\xc4B
1710No match
1711
1712/\bX/match_invalid_utf
1713 A\x80X
1714 0: X
1715
1716/\BX/match_invalid_utf
1717\= Expect no match
1718 A\x80X
1719No match
1720
1721/(?<=...)X/match_invalid_utf
1722 AAA\x80BBBXYZ
1723 0: X
1724\= Expect no match
1725 AAA\x80BXYZ
1726No match
1727 AAA\x80BBXYZ
1728No match
1729
1730# -------------------------------------
1731
1732/(*UTF)(?=\x{123})/I
1733Capture group count = 0
1734May match empty string
1735Compile options: <none>
1736Overall options: utf
1737First code unit = \xc4
1738Last code unit = \xa3
1739Subject length lower bound = 1
1740
1741/[\x{c1}\x{e1}]X[\x{145}\x{146}]/I,utf
1742Capture group count = 0
1743Options: utf
1744Starting code units: \xc3
1745Last code unit = 'X'
1746Subject length lower bound = 3
1747
1748/[󿾟,]/BI,utf
1749------------------------------------------------------------------
1750 Bra
1751 [,\x{fff9f}]
1752 Ket
1753 End
1754------------------------------------------------------------------
1755Capture group count = 0
1756Options: utf
1757Starting code units: , \xf3
1758Subject length lower bound = 1
1759
1760/[\x{fff4}-\x{ffff8}]/I,utf
1761Capture group count = 0
1762Options: utf
1763Starting code units: \xef \xf0 \xf1 \xf2 \xf3
1764Subject length lower bound = 1
1765
1766/[\x{fff4}-\x{afff8}\x{10ffff}]/I,utf
1767Capture group count = 0
1768Options: utf
1769Starting code units: \xef \xf0 \xf1 \xf2 \xf4
1770Subject length lower bound = 1
1771
1772/[\xff\x{ffff}]/I,utf
1773Capture group count = 0
1774Options: utf
1775Starting code units: \xc3 \xef
1776Subject length lower bound = 1
1777
1778/[\xff\x{ff}]/I,utf
1779Capture group count = 0
1780Options: utf
1781Starting code units: \xc3
1782Subject length lower bound = 1
1783 abc\x{ff}def
1784 0: \x{ff}
1785
1786/[\xff\x{ff}]/I
1787Capture group count = 0
1788First code unit = \xff
1789Subject length lower bound = 1
1790 abc\x{ff}def
1791 0: \xff
1792
1793/[Ss]/I
1794Capture group count = 0
1795First code unit = 'S' (caseless)
1796Subject length lower bound = 1
1797
1798/[Ss]/I,utf
1799Capture group count = 0
1800Options: utf
1801Starting code units: S s
1802Subject length lower bound = 1
1803
1804/(?:\x{ff}|\x{3000})/I,utf
1805Capture group count = 0
1806Options: utf
1807Starting code units: \xc3 \xe3
1808Subject length lower bound = 1
1809
1810/x/utf
1811 abxyz
1812 0: x
1813 \x80\=startchar
1814Failed: error -22: UTF-8 error: isolated byte with 0x80 bit set at offset 0
1815 abc\x80\=startchar
1816Failed: error -22: UTF-8 error: isolated byte with 0x80 bit set at offset 3
1817 abc\x80\=startchar,offset=3
1818Error -36 (bad UTF-8 offset)
1819
1820/\x{c1}+\x{e1}/iIB,ucp
1821------------------------------------------------------------------
1822 Bra
1823 /i \x{c1}+
1824 /i \x{e1}
1825 Ket
1826 End
1827------------------------------------------------------------------
1828Capture group count = 0
1829Options: caseless ucp
1830First code unit = \xc1 (caseless)
1831Last code unit = \xe1 (caseless)
1832Subject length lower bound = 2
1833 \x{c1}\x{c1}\x{c1}
1834 0: \xc1\xc1\xc1
1835 \x{e1}\x{e1}\x{e1}
1836 0: \xe1\xe1\xe1
1837
1838/a|\x{c1}/iI,ucp
1839Capture group count = 0
1840Options: caseless ucp
1841Starting code units: A a \xc1 \xe1
1842Subject length lower bound = 1
1843 \x{e1}xxx
1844 0: \xe1
1845
1846/a|\x{c1}/iI,utf
1847Capture group count = 0
1848Options: caseless utf
1849Starting code units: A a \xc3
1850Subject length lower bound = 1
1851 \x{e1}xxx
1852 0: \x{e1}
1853
1854/\x{c1}|\x{e1}/iI,ucp
1855Capture group count = 0
1856Options: caseless ucp
1857First code unit = \xc1 (caseless)
1858Subject length lower bound = 1
1859
1860/X(\x{e1})Y/ucp,replace=>\U$1<,substitute_extended
1861 X\x{e1}Y
1862 1: >\xc1<
1863
1864/X(\x{e1})Y/i,ucp,replace=>\L$1<,substitute_extended
1865 X\x{c1}Y
1866 1: >\xe1<
1867
1868# Without UTF or UCP characters > 127 have only one case in the default locale.
1869
1870/X(\x{e1})Y/replace=>\U$1<,substitute_extended
1871 X\x{e1}Y
1872 1: >\xe1<
1873
Elliott Hughes3435c422020-12-04 13:18:28 -08001874/A/utf,match_invalid_utf,caseless
1875 \xe5A
1876 0: A
1877
1878/\bch\b/utf,match_invalid_utf
1879 qchq\=ph
1880Partial match:
1881 qchq\=ps
1882Partial match:
1883
Janis Danisevskis112c9cc2016-03-31 13:35:25 +01001884# End of testinput10