blob: 84c485817d0ed14e8b297048a50185eaf104cffb [file] [log] [blame]
Janis Danisevskis112c9cc2016-03-31 13:35:25 +01001# This set of tests is for UTF-16 and UTF-32 support, including Unicode
2# properties. It is relevant only to the 16-bit and 32-bit libraries. The
3# output is different for each library, so there are separate output files.
4
5/ÃÃÃxxx/IB,utf,no_utf_check
6** Failed: invalid UTF-8 string cannot be converted to 16-bit string
7
8/abc/utf
9 Ã]
10** Failed: invalid UTF-8 string cannot be used as input in UTF mode
11
12# Check maximum character size
13
14/\x{ffff}/IB,utf
15------------------------------------------------------------------
16 Bra
17 \x{ffff}
18 Ket
19 End
20------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -070021Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +010022Options: utf
23First code unit = \x{ffff}
24Subject length lower bound = 1
25
26/\x{10000}/IB,utf
27------------------------------------------------------------------
28 Bra
29 \x{10000}
30 Ket
31 End
32------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -070033Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +010034Options: utf
35First code unit = \x{d800}
36Last code unit = \x{dc00}
37Subject length lower bound = 1
38
39/\x{100}/IB,utf
40------------------------------------------------------------------
41 Bra
42 \x{100}
43 Ket
44 End
45------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -070046Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +010047Options: utf
48First code unit = \x{100}
49Subject length lower bound = 1
50
51/\x{1000}/IB,utf
52------------------------------------------------------------------
53 Bra
54 \x{1000}
55 Ket
56 End
57------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -070058Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +010059Options: utf
60First code unit = \x{1000}
61Subject length lower bound = 1
62
63/\x{10000}/IB,utf
64------------------------------------------------------------------
65 Bra
66 \x{10000}
67 Ket
68 End
69------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -070070Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +010071Options: utf
72First code unit = \x{d800}
73Last code unit = \x{dc00}
74Subject length lower bound = 1
75
76/\x{100000}/IB,utf
77------------------------------------------------------------------
78 Bra
79 \x{100000}
80 Ket
81 End
82------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -070083Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +010084Options: utf
85First code unit = \x{dbc0}
86Last code unit = \x{dc00}
87Subject length lower bound = 1
88
89/\x{10ffff}/IB,utf
90------------------------------------------------------------------
91 Bra
92 \x{10ffff}
93 Ket
94 End
95------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -070096Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +010097Options: utf
98First code unit = \x{dbff}
99Last code unit = \x{dfff}
100Subject length lower bound = 1
101
102/[\x{ff}]/IB,utf
103------------------------------------------------------------------
104 Bra
105 \x{ff}
106 Ket
107 End
108------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -0700109Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100110Options: utf
111First code unit = \xff
112Subject length lower bound = 1
113
114/[\x{100}]/IB,utf
115------------------------------------------------------------------
116 Bra
117 \x{100}
118 Ket
119 End
120------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -0700121Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100122Options: utf
123First code unit = \x{100}
124Subject length lower bound = 1
125
126/\x80/IB,utf
127------------------------------------------------------------------
128 Bra
129 \x{80}
130 Ket
131 End
132------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -0700133Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100134Options: utf
135First code unit = \x80
136Subject length lower bound = 1
137
138/\xff/IB,utf
139------------------------------------------------------------------
140 Bra
141 \x{ff}
142 Ket
143 End
144------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -0700145Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100146Options: utf
147First code unit = \xff
148Subject length lower bound = 1
149
150/\x{D55c}\x{ad6d}\x{C5B4}/IB,utf
151------------------------------------------------------------------
152 Bra
153 \x{d55c}\x{ad6d}\x{c5b4}
154 Ket
155 End
156------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -0700157Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100158Options: utf
159First code unit = \x{d55c}
160Last code unit = \x{c5b4}
161Subject length lower bound = 3
162 \x{D55c}\x{ad6d}\x{C5B4}
163 0: \x{d55c}\x{ad6d}\x{c5b4}
164
165/\x{65e5}\x{672c}\x{8a9e}/IB,utf
166------------------------------------------------------------------
167 Bra
168 \x{65e5}\x{672c}\x{8a9e}
169 Ket
170 End
171------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -0700172Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100173Options: utf
174First code unit = \x{65e5}
175Last code unit = \x{8a9e}
176Subject length lower bound = 3
177 \x{65e5}\x{672c}\x{8a9e}
178 0: \x{65e5}\x{672c}\x{8a9e}
179
180/\x{80}/IB,utf
181------------------------------------------------------------------
182 Bra
183 \x{80}
184 Ket
185 End
186------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -0700187Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100188Options: utf
189First code unit = \x80
190Subject length lower bound = 1
191
192/\x{084}/IB,utf
193------------------------------------------------------------------
194 Bra
195 \x{84}
196 Ket
197 End
198------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -0700199Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100200Options: utf
201First code unit = \x84
202Subject length lower bound = 1
203
204/\x{104}/IB,utf
205------------------------------------------------------------------
206 Bra
207 \x{104}
208 Ket
209 End
210------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -0700211Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100212Options: utf
213First code unit = \x{104}
214Subject length lower bound = 1
215
216/\x{861}/IB,utf
217------------------------------------------------------------------
218 Bra
219 \x{861}
220 Ket
221 End
222------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -0700223Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100224Options: utf
225First code unit = \x{861}
226Subject length lower bound = 1
227
228/\x{212ab}/IB,utf
229------------------------------------------------------------------
230 Bra
231 \x{212ab}
232 Ket
233 End
234------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -0700235Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100236Options: utf
237First code unit = \x{d844}
238Last code unit = \x{deab}
239Subject length lower bound = 1
240
241/[^ab\xC0-\xF0]/IB,utf
242------------------------------------------------------------------
243 Bra
244 [\x00-`c-\xbf\xf1-\xff] (neg)
245 Ket
246 End
247------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -0700248Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100249Options: utf
250Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a
251 \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19
252 \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4
253 5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y
254 Z [ \ ] ^ _ ` c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f
255 \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e
256 \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d
257 \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac
258 \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb
259 \xbc \xbd \xbe \xbf \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb
260 \xfc \xfd \xfe \xff
261Subject length lower bound = 1
262 \x{f1}
263 0: \x{f1}
264 \x{bf}
265 0: \x{bf}
266 \x{100}
267 0: \x{100}
268 \x{1000}
269 0: \x{1000}
270\= Expect no match
271 \x{c0}
272No match
273 \x{f0}
274No match
275
276/Ā{3,4}/IB,utf
277------------------------------------------------------------------
278 Bra
279 \x{100}{3}
280 \x{100}?+
281 Ket
282 End
283------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -0700284Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100285Options: utf
286First code unit = \x{100}
287Last code unit = \x{100}
288Subject length lower bound = 3
289 \x{100}\x{100}\x{100}\x{100\x{100}
290 0: \x{100}\x{100}\x{100}
291
292/(\x{100}+|x)/IB,utf
293------------------------------------------------------------------
294 Bra
295 CBra 1
296 \x{100}++
297 Alt
298 x
299 Ket
300 Ket
301 End
302------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -0700303Capture group count = 1
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100304Options: utf
305Starting code units: x \xff
306Subject length lower bound = 1
307
308/(\x{100}*a|x)/IB,utf
309------------------------------------------------------------------
310 Bra
311 CBra 1
312 \x{100}*+
313 a
314 Alt
315 x
316 Ket
317 Ket
318 End
319------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -0700320Capture group count = 1
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100321Options: utf
322Starting code units: a x \xff
323Subject length lower bound = 1
324
325/(\x{100}{0,2}a|x)/IB,utf
326------------------------------------------------------------------
327 Bra
328 CBra 1
329 \x{100}{0,2}+
330 a
331 Alt
332 x
333 Ket
334 Ket
335 End
336------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -0700337Capture group count = 1
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100338Options: utf
339Starting code units: a x \xff
340Subject length lower bound = 1
341
342/(\x{100}{1,2}a|x)/IB,utf
343------------------------------------------------------------------
344 Bra
345 CBra 1
346 \x{100}
347 \x{100}{0,1}+
348 a
349 Alt
350 x
351 Ket
352 Ket
353 End
354------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -0700355Capture group count = 1
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100356Options: utf
357Starting code units: x \xff
358Subject length lower bound = 1
359
360/\x{100}/IB,utf
361------------------------------------------------------------------
362 Bra
363 \x{100}
364 Ket
365 End
366------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -0700367Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100368Options: utf
369First code unit = \x{100}
370Subject length lower bound = 1
371
372/a\x{100}\x{101}*/IB,utf
373------------------------------------------------------------------
374 Bra
375 a\x{100}
376 \x{101}*+
377 Ket
378 End
379------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -0700380Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100381Options: utf
382First code unit = 'a'
383Last code unit = \x{100}
384Subject length lower bound = 2
385
386/a\x{100}\x{101}+/IB,utf
387------------------------------------------------------------------
388 Bra
389 a\x{100}
390 \x{101}++
391 Ket
392 End
393------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -0700394Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100395Options: utf
396First code unit = 'a'
397Last code unit = \x{101}
398Subject length lower bound = 3
399
400/[^\x{c4}]/IB
401------------------------------------------------------------------
402 Bra
403 [^\x{c4}]
404 Ket
405 End
406------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -0700407Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100408Subject length lower bound = 1
409
410/[\x{100}]/IB,utf
411------------------------------------------------------------------
412 Bra
413 \x{100}
414 Ket
415 End
416------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -0700417Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100418Options: utf
419First code unit = \x{100}
420Subject length lower bound = 1
421 \x{100}
422 0: \x{100}
423 Z\x{100}
424 0: \x{100}
425 \x{100}Z
426 0: \x{100}
427
428/[\xff]/IB,utf
429------------------------------------------------------------------
430 Bra
431 \x{ff}
432 Ket
433 End
434------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -0700435Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100436Options: utf
437First code unit = \xff
438Subject length lower bound = 1
439 >\x{ff}<
440 0: \x{ff}
441
442/[^\xff]/IB,utf
443------------------------------------------------------------------
444 Bra
445 [^\x{ff}]
446 Ket
447 End
448------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -0700449Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100450Options: utf
451Subject length lower bound = 1
452
453/\x{100}abc(xyz(?1))/IB,utf
454------------------------------------------------------------------
455 Bra
456 \x{100}abc
457 CBra 1
458 xyz
459 Recurse
460 Ket
461 Ket
462 End
463------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -0700464Capture group count = 1
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100465Options: utf
466First code unit = \x{100}
467Last code unit = 'z'
468Subject length lower bound = 7
469
470/\777/I,utf
Elliott Hughes0c26e192019-08-07 12:24:46 -0700471Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100472Options: utf
473First code unit = \x{1ff}
474Subject length lower bound = 1
475 \x{1ff}
476 0: \x{1ff}
477 \777
478 0: \x{1ff}
479
480/\x{100}+\x{200}/IB,utf
481------------------------------------------------------------------
482 Bra
483 \x{100}++
484 \x{200}
485 Ket
486 End
487------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -0700488Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100489Options: utf
490First code unit = \x{100}
491Last code unit = \x{200}
492Subject length lower bound = 2
493
494/\x{100}+X/IB,utf
495------------------------------------------------------------------
496 Bra
497 \x{100}++
498 X
499 Ket
500 End
501------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -0700502Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100503Options: utf
504First code unit = \x{100}
505Last code unit = 'X'
506Subject length lower bound = 2
507
508/^[\QĀ\E-\QŐ\E/B,utf
509Failed: error 106 at offset 13: missing terminating ] for character class
510
511/X/utf
512 XX\x{d800}\=no_utf_check
513 0: X
514 XX\x{da00}\=no_utf_check
515 0: X
516 XX\x{dc00}\=no_utf_check
517 0: X
518 XX\x{de00}\=no_utf_check
519 0: X
520 XX\x{dfff}\=no_utf_check
521 0: X
522\= Expect UTF error
523 XX\x{d800}
524Failed: error -24: UTF-16 error: missing low surrogate at end at offset 2
525 XX\x{da00}
526Failed: error -24: UTF-16 error: missing low surrogate at end at offset 2
527 XX\x{dc00}
528Failed: error -26: UTF-16 error: isolated low surrogate at offset 2
529 XX\x{de00}
530Failed: error -26: UTF-16 error: isolated low surrogate at offset 2
531 XX\x{dfff}
532Failed: error -26: UTF-16 error: isolated low surrogate at offset 2
533 XX\x{110000}
534** Failed: character \x{110000} is greater than 0x10ffff and so cannot be converted to UTF-16
535 XX\x{d800}\x{1234}
Elliott Hughes2dbd7d22020-06-03 14:32:37 -0700536Failed: error -25: UTF-16 error: invalid low surrogate at offset 2
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100537\= Expect no match
538 XX\x{d800}\=offset=3
539No match
540
541/(?<=.)X/utf
542 XX\x{d800}\=offset=3
543Failed: error -24: UTF-16 error: missing low surrogate at end at offset 2
544
545/(*UTF16)\x{11234}/
546 abcd\x{11234}pqr
547 0: \x{11234}
548
549/(*UTF)\x{11234}/I
Elliott Hughes0c26e192019-08-07 12:24:46 -0700550Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100551Compile options: <none>
552Overall options: utf
553First code unit = \x{d804}
554Last code unit = \x{de34}
555Subject length lower bound = 1
556 abcd\x{11234}pqr
557 0: \x{11234}
558
559/(*UTF-32)\x{11234}/
Elliott Hughes9bc971b2018-07-27 13:23:14 -0700560Failed: error 160 at offset 5: (*VERB) not recognized or malformed
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100561 abcd\x{11234}pqr
562
563/(*UTF-32)\x{112}/
564Failed: error 160 at offset 5: (*VERB) not recognized or malformed
565 abcd\x{11234}pqr
566
567/(*CRLF)(*UTF16)(*BSR_UNICODE)a\Rb/I
Elliott Hughes0c26e192019-08-07 12:24:46 -0700568Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100569Compile options: <none>
570Overall options: utf
571\R matches any Unicode newline
572Forced newline is CRLF
573First code unit = 'a'
574Last code unit = 'b'
575Subject length lower bound = 3
576
577/(*CRLF)(*UTF32)(*BSR_UNICODE)a\Rb/I
Elliott Hughes0c26e192019-08-07 12:24:46 -0700578Failed: error 160 at offset 14: (*VERB) not recognized or malformed
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100579
580/\h/I,utf
Elliott Hughes0c26e192019-08-07 12:24:46 -0700581Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100582Options: utf
583Starting code units: \x09 \x20 \xa0 \xff
584Subject length lower bound = 1
585 ABC\x{09}
586 0: \x{09}
587 ABC\x{20}
588 0:
589 ABC\x{a0}
590 0: \x{a0}
591 ABC\x{1680}
592 0: \x{1680}
593 ABC\x{180e}
594 0: \x{180e}
595 ABC\x{2000}
596 0: \x{2000}
597 ABC\x{202f}
598 0: \x{202f}
599 ABC\x{205f}
600 0: \x{205f}
601 ABC\x{3000}
602 0: \x{3000}
603
604/\v/I,utf
Elliott Hughes0c26e192019-08-07 12:24:46 -0700605Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100606Options: utf
607Starting code units: \x0a \x0b \x0c \x0d \x85 \xff
608Subject length lower bound = 1
609 ABC\x{0a}
610 0: \x{0a}
611 ABC\x{0b}
612 0: \x{0b}
613 ABC\x{0c}
614 0: \x{0c}
615 ABC\x{0d}
616 0: \x{0d}
617 ABC\x{85}
618 0: \x{85}
619 ABC\x{2028}
620 0: \x{2028}
621
622/\h*A/I,utf
Elliott Hughes0c26e192019-08-07 12:24:46 -0700623Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100624Options: utf
625Starting code units: \x09 \x20 A \xa0 \xff
626Last code unit = 'A'
627Subject length lower bound = 1
628 CDBABC
629 0: A
630 \x{2000}ABC
631 0: \x{2000}A
632
633/\R*A/I,bsr=unicode,utf
Elliott Hughes0c26e192019-08-07 12:24:46 -0700634Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100635Options: utf
636\R matches any Unicode newline
637Starting code units: \x0a \x0b \x0c \x0d A \x85 \xff
638Last code unit = 'A'
639Subject length lower bound = 1
640 CDBABC
641 0: A
642 \x{2028}A
643 0: \x{2028}A
644
645/\v+A/I,utf
Elliott Hughes0c26e192019-08-07 12:24:46 -0700646Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100647Options: utf
648Starting code units: \x0a \x0b \x0c \x0d \x85 \xff
649Last code unit = 'A'
650Subject length lower bound = 2
651
652/\s?xxx\s/I,utf
Elliott Hughes0c26e192019-08-07 12:24:46 -0700653Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100654Options: utf
655Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 x
656Last code unit = 'x'
657Subject length lower bound = 4
658
659/\sxxx\s/I,utf,tables=2
Elliott Hughes0c26e192019-08-07 12:24:46 -0700660Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100661Options: utf
662Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 \x85 \xa0
663Last code unit = 'x'
664Subject length lower bound = 5
665 AB\x{85}xxx\x{a0}XYZ
666 0: \x{85}xxx\x{a0}
667 AB\x{a0}xxx\x{85}XYZ
668 0: \x{a0}xxx\x{85}
669
670/\S \S/I,utf,tables=2
Elliott Hughes0c26e192019-08-07 12:24:46 -0700671Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100672Options: utf
673Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0e \x0f
674 \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d \x1e
675 \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ A B C
676 D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h
677 i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84
678 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94
679 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa1 \xa2 \xa3 \xa4
680 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3
681 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2
682 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1
683 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0
684 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef
685 \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe
686 \xff
687Last code unit = ' '
688Subject length lower bound = 3
689 \x{a2} \x{84}
690 0: \x{a2} \x{84}
691 A Z
692 0: A Z
693
694/a+/utf
695 a\x{123}aa\=offset=1
696 0: aa
697 a\x{123}aa\=offset=2
698 0: aa
699 a\x{123}aa\=offset=3
700 0: a
701\= Expect no match
702 a\x{123}aa\=offset=4
703No match
704\= Expect bad offset error
705 a\x{123}aa\=offset=5
706Failed: error -33: bad offset value
707 a\x{123}aa\=offset=6
708Failed: error -33: bad offset value
709
710/\x{1234}+/Ii,utf
Elliott Hughes0c26e192019-08-07 12:24:46 -0700711Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100712Options: caseless utf
713First code unit = \x{1234}
714Subject length lower bound = 1
715
716/\x{1234}+?/Ii,utf
Elliott Hughes0c26e192019-08-07 12:24:46 -0700717Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100718Options: caseless utf
719First code unit = \x{1234}
720Subject length lower bound = 1
721
722/\x{1234}++/Ii,utf
Elliott Hughes0c26e192019-08-07 12:24:46 -0700723Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100724Options: caseless utf
725First code unit = \x{1234}
726Subject length lower bound = 1
727
728/\x{1234}{2}/Ii,utf
Elliott Hughes0c26e192019-08-07 12:24:46 -0700729Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100730Options: caseless utf
731First code unit = \x{1234}
732Last code unit = \x{1234}
733Subject length lower bound = 2
734
735/[^\x{c4}]/IB,utf
736------------------------------------------------------------------
737 Bra
738 [^\x{c4}]
739 Ket
740 End
741------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -0700742Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100743Options: utf
744Subject length lower bound = 1
745
746/X+\x{200}/IB,utf
747------------------------------------------------------------------
748 Bra
749 X++
750 \x{200}
751 Ket
752 End
753------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -0700754Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100755Options: utf
756First code unit = 'X'
757Last code unit = \x{200}
758Subject length lower bound = 2
759
760/\R/I,utf
Elliott Hughes0c26e192019-08-07 12:24:46 -0700761Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100762Options: utf
763Starting code units: \x0a \x0b \x0c \x0d \x85 \xff
764Subject length lower bound = 1
765
766# Check bad offset
767
768/a/utf
769\= Expect bad UTF-16 offset, or no match in 32-bit
770 \x{10000}\=offset=1
771Error -36 (bad UTF-16 offset)
772 \x{10000}ab\=offset=1
773Error -36 (bad UTF-16 offset)
774\= Expect 16-bit match, 32-bit no match
775 \x{10000}ab\=offset=2
776 0: a
777\= Expect no match
778 \x{10000}ab\=offset=3
779No match
780\= Expect no match in 16-bit, bad offset in 32-bit
781 \x{10000}ab\=offset=4
782No match
783\= Expect bad offset
784 \x{10000}ab\=offset=5
785Failed: error -33: bad offset value
786
787/í¼€/utf
788Failed: error -26 at offset 0: UTF-16 error: isolated low surrogate
789
790/\w+\x{C4}/B,utf
791------------------------------------------------------------------
792 Bra
793 \w++
794 \x{c4}
795 Ket
796 End
797------------------------------------------------------------------
798 a\x{C4}\x{C4}
799 0: a\x{c4}
800
801/\w+\x{C4}/B,utf,tables=2
802------------------------------------------------------------------
803 Bra
804 \w+
805 \x{c4}
806 Ket
807 End
808------------------------------------------------------------------
809 a\x{C4}\x{C4}
810 0: a\x{c4}\x{c4}
811
812/\W+\x{C4}/B,utf
813------------------------------------------------------------------
814 Bra
815 \W+
816 \x{c4}
817 Ket
818 End
819------------------------------------------------------------------
820 !\x{C4}
821 0: !\x{c4}
822
823/\W+\x{C4}/B,utf,tables=2
824------------------------------------------------------------------
825 Bra
826 \W++
827 \x{c4}
828 Ket
829 End
830------------------------------------------------------------------
831 !\x{C4}
832 0: !\x{c4}
833
834/\W+\x{A1}/B,utf
835------------------------------------------------------------------
836 Bra
837 \W+
838 \x{a1}
839 Ket
840 End
841------------------------------------------------------------------
842 !\x{A1}
843 0: !\x{a1}
844
845/\W+\x{A1}/B,utf,tables=2
846------------------------------------------------------------------
847 Bra
848 \W+
849 \x{a1}
850 Ket
851 End
852------------------------------------------------------------------
853 !\x{A1}
854 0: !\x{a1}
855
856/X\s+\x{A0}/B,utf
857------------------------------------------------------------------
858 Bra
859 X
860 \s++
861 \x{a0}
862 Ket
863 End
864------------------------------------------------------------------
865 X\x20\x{A0}\x{A0}
866 0: X \x{a0}
867
868/X\s+\x{A0}/B,utf,tables=2
869------------------------------------------------------------------
870 Bra
871 X
872 \s+
873 \x{a0}
874 Ket
875 End
876------------------------------------------------------------------
877 X\x20\x{A0}\x{A0}
878 0: X \x{a0}\x{a0}
879
880/\S+\x{A0}/B,utf
881------------------------------------------------------------------
882 Bra
883 \S+
884 \x{a0}
885 Ket
886 End
887------------------------------------------------------------------
888 X\x{A0}\x{A0}
889 0: X\x{a0}\x{a0}
890
891/\S+\x{A0}/B,utf,tables=2
892------------------------------------------------------------------
893 Bra
894 \S++
895 \x{a0}
896 Ket
897 End
898------------------------------------------------------------------
899 X\x{A0}\x{A0}
900 0: X\x{a0}
901
902/\x{a0}+\s!/B,utf
903------------------------------------------------------------------
904 Bra
905 \x{a0}++
906 \s
907 !
908 Ket
909 End
910------------------------------------------------------------------
911 \x{a0}\x20!
912 0: \x{a0} !
913
914/\x{a0}+\s!/B,utf,tables=2
915------------------------------------------------------------------
916 Bra
917 \x{a0}+
918 \s
919 !
920 Ket
921 End
922------------------------------------------------------------------
923 \x{a0}\x20!
924 0: \x{a0} !
925
926/(*UTF)abc/never_utf
927Failed: error 174 at offset 6: using UTF is disabled by the application
928
929/abc/utf,never_utf
930Failed: error 174 at offset 0: using UTF is disabled by the application
931
932/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/IBi,utf
933------------------------------------------------------------------
934 Bra
935 /i A\x{391}\x{10427}\x{ff3a}\x{1fb0}
936 Ket
937 End
938------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -0700939Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100940Options: caseless utf
941First code unit = 'A' (caseless)
942Last code unit = \x{1fb0} (caseless)
943Subject length lower bound = 5
944
945/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/IB,utf
946------------------------------------------------------------------
947 Bra
948 A\x{391}\x{10427}\x{ff3a}\x{1fb0}
949 Ket
950 End
951------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -0700952Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100953Options: utf
954First code unit = 'A'
955Last code unit = \x{1fb0}
956Subject length lower bound = 5
957
958/AB\x{1fb0}/IB,utf
959------------------------------------------------------------------
960 Bra
961 AB\x{1fb0}
962 Ket
963 End
964------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -0700965Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100966Options: utf
967First code unit = 'A'
968Last code unit = \x{1fb0}
969Subject length lower bound = 3
970
971/AB\x{1fb0}/IBi,utf
972------------------------------------------------------------------
973 Bra
974 /i AB\x{1fb0}
975 Ket
976 End
977------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -0700978Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100979Options: caseless utf
980First code unit = 'A' (caseless)
981Last code unit = \x{1fb0} (caseless)
982Subject length lower bound = 3
983
984/\x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}/Ii,utf
Elliott Hughes0c26e192019-08-07 12:24:46 -0700985Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100986Options: caseless utf
987First code unit = \x{401} (caseless)
988Last code unit = \x{42f} (caseless)
989Subject length lower bound = 17
990 \x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}
991 0: \x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}
992 \x{451}\x{440}\x{441}\x{442}\x{443}\x{444}\x{445}\x{446}\x{447}\x{448}\x{449}\x{44a}\x{44b}\x{44c}\x{44d}\x{44e}\x{44f}
993 0: \x{451}\x{440}\x{441}\x{442}\x{443}\x{444}\x{445}\x{446}\x{447}\x{448}\x{449}\x{44a}\x{44b}\x{44c}\x{44d}\x{44e}\x{44f}
994
995/[â±¥]/Bi,utf
996------------------------------------------------------------------
997 Bra
998 /i \x{2c65}
999 Ket
1000 End
1001------------------------------------------------------------------
1002
1003/[^â±¥]/Bi,utf
1004------------------------------------------------------------------
1005 Bra
1006 /i [^\x{2c65}]
1007 Ket
1008 End
1009------------------------------------------------------------------
1010
1011/[[:blank:]]/B,ucp
1012------------------------------------------------------------------
1013 Bra
1014 [\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}]
1015 Ket
1016 End
1017------------------------------------------------------------------
1018
1019/\x{212a}+/Ii,utf
Elliott Hughes0c26e192019-08-07 12:24:46 -07001020Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +01001021Options: caseless utf
1022Starting code units: K k \xff
1023Subject length lower bound = 1
1024 KKkk\x{212a}
1025 0: KKkk\x{212a}
1026
1027/s+/Ii,utf
Elliott Hughes0c26e192019-08-07 12:24:46 -07001028Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +01001029Options: caseless utf
1030Starting code units: S s \xff
1031Subject length lower bound = 1
1032 SSss\x{17f}
1033 0: SSss\x{17f}
1034
1035# Non-UTF characters should give errors in both 16-bit and 32-bit modes.
1036
1037/\x{110000}/utf
1038Failed: error 134 at offset 9: character code point value in \x{} or \o{} is too large
1039
1040/\o{4200000}/utf
1041Failed: error 134 at offset 10: character code point value in \x{} or \o{} is too large
1042
1043/\x{100}*A/IB,utf
1044------------------------------------------------------------------
1045 Bra
1046 \x{100}*+
1047 A
1048 Ket
1049 End
1050------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -07001051Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +01001052Options: utf
1053Starting code units: A \xff
1054Last code unit = 'A'
1055Subject length lower bound = 1
1056 A
1057 0: A
1058
1059/\x{100}*\d(?R)/IB,utf
1060------------------------------------------------------------------
1061 Bra
1062 \x{100}*+
1063 \d
1064 Recurse
1065 Ket
1066 End
1067------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -07001068Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +01001069Options: utf
1070Starting code units: 0 1 2 3 4 5 6 7 8 9 \xff
1071Subject length lower bound = 1
1072
1073/[Z\x{100}]/IB,utf
1074------------------------------------------------------------------
1075 Bra
1076 [Z\x{100}]
1077 Ket
1078 End
1079------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -07001080Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +01001081Options: utf
1082Starting code units: Z \xff
1083Subject length lower bound = 1
1084 Z\x{100}
1085 0: Z
1086 \x{100}
1087 0: \x{100}
1088 \x{100}Z
1089 0: \x{100}
1090
1091/[z-\x{100}]/IB,utf
1092------------------------------------------------------------------
1093 Bra
1094 [z-\xff\x{100}]
1095 Ket
1096 End
1097------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -07001098Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +01001099Options: utf
1100Starting code units: z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87
1101 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96
1102 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5
1103 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4
1104 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3
1105 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2
1106 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1
1107 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0
1108 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff
1109Subject length lower bound = 1
1110
1111/[z\Qa-d]Ā\E]/IB,utf
1112------------------------------------------------------------------
1113 Bra
1114 [\-\]adz\x{100}]
1115 Ket
1116 End
1117------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -07001118Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +01001119Options: utf
1120Starting code units: - ] a d z \xff
1121Subject length lower bound = 1
1122 \x{100}
1123 0: \x{100}
1124 Ā
1125 0: \x{100}
1126
1127/[ab\x{100}]abc(xyz(?1))/IB,utf
1128------------------------------------------------------------------
1129 Bra
1130 [ab\x{100}]
1131 abc
1132 CBra 1
1133 xyz
1134 Recurse
1135 Ket
1136 Ket
1137 End
1138------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -07001139Capture group count = 1
Janis Danisevskis112c9cc2016-03-31 13:35:25 +01001140Options: utf
1141Starting code units: a b \xff
1142Last code unit = 'z'
1143Subject length lower bound = 7
1144
1145/\x{100}*\s/IB,utf
1146------------------------------------------------------------------
1147 Bra
1148 \x{100}*+
1149 \s
1150 Ket
1151 End
1152------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -07001153Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +01001154Options: utf
1155Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 \xff
1156Subject length lower bound = 1
1157
1158/\x{100}*\d/IB,utf
1159------------------------------------------------------------------
1160 Bra
1161 \x{100}*+
1162 \d
1163 Ket
1164 End
1165------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -07001166Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +01001167Options: utf
1168Starting code units: 0 1 2 3 4 5 6 7 8 9 \xff
1169Subject length lower bound = 1
1170
1171/\x{100}*\w/IB,utf
1172------------------------------------------------------------------
1173 Bra
1174 \x{100}*+
1175 \w
1176 Ket
1177 End
1178------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -07001179Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +01001180Options: utf
1181Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P
1182 Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z
1183 \xff
1184Subject length lower bound = 1
1185
1186/\x{100}*\D/IB,utf
1187------------------------------------------------------------------
1188 Bra
1189 \x{100}*
1190 \D
1191 Ket
1192 End
1193------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -07001194Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +01001195Options: utf
1196Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a
1197 \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19
1198 \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / : ; < = >
1199 ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c
1200 d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82
1201 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91
1202 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0
1203 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf
1204 \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe
1205 \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd
1206 \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc
1207 \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb
1208 \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa
1209 \xfb \xfc \xfd \xfe \xff
1210Subject length lower bound = 1
1211
1212/\x{100}*\S/IB,utf
1213------------------------------------------------------------------
1214 Bra
1215 \x{100}*
1216 \S
1217 Ket
1218 End
1219------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -07001220Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +01001221Options: utf
1222Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0e \x0f
1223 \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d \x1e
1224 \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ A B C
1225 D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h
1226 i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84
1227 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93
1228 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2
1229 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1
1230 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0
1231 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf
1232 \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde
1233 \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed
1234 \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc
1235 \xfd \xfe \xff
1236Subject length lower bound = 1
1237
1238/\x{100}*\W/IB,utf
1239------------------------------------------------------------------
1240 Bra
1241 \x{100}*
1242 \W
1243 Ket
1244 End
1245------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -07001246Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +01001247Options: utf
1248Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a
1249 \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19
1250 \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / : ; < = >
1251 ? @ [ \ ] ^ ` { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89
1252 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98
1253 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7
1254 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6
1255 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5
1256 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4
1257 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3
1258 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2
1259 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff
1260Subject length lower bound = 1
1261
1262/[\x{105}-\x{109}]/IBi,utf
1263------------------------------------------------------------------
1264 Bra
1265 [\x{104}-\x{109}]
1266 Ket
1267 End
1268------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -07001269Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +01001270Options: caseless utf
1271Starting code units: \xff
1272Subject length lower bound = 1
1273 \x{104}
1274 0: \x{104}
1275 \x{105}
1276 0: \x{105}
1277 \x{109}
1278 0: \x{109}
1279\= Expect no match
1280 \x{100}
1281No match
1282 \x{10a}
1283No match
1284
1285/[z-\x{100}]/IBi,utf
1286------------------------------------------------------------------
1287 Bra
1288 [Zz-\xff\x{39c}\x{3bc}\x{212b}\x{1e9e}\x{212b}\x{178}\x{100}-\x{101}]
1289 Ket
1290 End
1291------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -07001292Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +01001293Options: caseless utf
1294Starting code units: Z z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86
1295 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95
1296 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4
1297 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3
1298 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2
1299 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1
1300 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0
1301 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef
1302 \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe
1303 \xff
1304Subject length lower bound = 1
1305 Z
1306 0: Z
1307 z
1308 0: z
1309 \x{39c}
1310 0: \x{39c}
1311 \x{178}
1312 0: \x{178}
1313 |
1314 0: |
1315 \x{80}
1316 0: \x{80}
1317 \x{ff}
1318 0: \x{ff}
1319 \x{100}
1320 0: \x{100}
1321 \x{101}
1322 0: \x{101}
1323\= Expect no match
1324 \x{102}
1325No match
1326 Y
1327No match
1328 y
1329No match
1330
1331/[z-\x{100}]/IBi,utf
1332------------------------------------------------------------------
1333 Bra
1334 [Zz-\xff\x{39c}\x{3bc}\x{212b}\x{1e9e}\x{212b}\x{178}\x{100}-\x{101}]
1335 Ket
1336 End
1337------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -07001338Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +01001339Options: caseless utf
1340Starting code units: Z z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86
1341 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95
1342 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4
1343 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3
1344 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2
1345 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1
1346 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0
1347 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef
1348 \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe
1349 \xff
1350Subject length lower bound = 1
1351
1352/\x{3a3}B/IBi,utf
1353------------------------------------------------------------------
1354 Bra
1355 clist 03a3 03c2 03c3
1356 /i B
1357 Ket
1358 End
1359------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -07001360Capture group count = 0
Janis Danisevskis112c9cc2016-03-31 13:35:25 +01001361Options: caseless utf
1362Starting code units: \xff
1363Last code unit = 'B' (caseless)
1364Subject length lower bound = 2
1365
1366/./utf
1367 \x{110000}
1368** Failed: character \x{110000} is greater than 0x10ffff and so cannot be converted to UTF-16
1369
Elliott Hughes9bc971b2018-07-27 13:23:14 -07001370/(*UTF)abý¿¿¿¿¿z/B
1371------------------------------------------------------------------
1372 Bra
1373 ab\x{fd}\x{bf}\x{bf}\x{bf}\x{bf}\x{bf}z
1374 Ket
1375 End
1376------------------------------------------------------------------
1377
1378/abý¿¿¿¿¿z/utf
1379** Failed: character value greater than 0x10ffff cannot be converted to UTF
1380
1381/[\W\p{Any}]/B
1382------------------------------------------------------------------
1383 Bra
1384 [\x00-/:-@[-^`{-\xff\p{Any}\x{100}-\x{ffff}]
1385 Ket
1386 End
1387------------------------------------------------------------------
1388 abc
1389 0: a
1390 123
1391 0: 1
1392
1393/[\W\pL]/B
1394------------------------------------------------------------------
1395 Bra
1396 [\x00-/:-@[-^`{-\xff\p{L}\x{100}-\x{ffff}]
1397 Ket
1398 End
1399------------------------------------------------------------------
1400 abc
1401 0: a
1402 \x{100}
1403 0: \x{100}
1404 \x{308}
1405 0: \x{308}
1406\= Expect no match
1407 123
1408No match
1409
1410/[\s[:^ascii:]]/B,ucp
1411------------------------------------------------------------------
1412 Bra
1413 [\x80-\xff\p{Xsp}\x{100}-\x{ffff}]
1414 Ket
1415 End
1416------------------------------------------------------------------
1417
1418/\pP/ucp
1419 \x{7fffffff}
1420** Character \x{7fffffff} is greater than 0xffff and UTF-16 mode is not enabled.
1421** Truncation will probably give the wrong result.
1422No match
1423
1424# A special extra option allows excaped surrogate code points in 32-bit mode,
1425# but subjects containing them must not be UTF-checked. These patterns give
1426# errors in 16-bit mode.
1427
1428/\x{d800}/I,utf,allow_surrogate_escapes
1429Failed: error 191 at offset 0: PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is not allowed in UTF-16 mode
1430 \x{d800}\=no_utf_check
1431
1432/\udfff\o{157401}/utf,alt_bsux,allow_surrogate_escapes
1433Failed: error 191 at offset 0: PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is not allowed in UTF-16 mode
1434 \x{dfff}\x{df01}\=no_utf_check
1435
1436# This has different starting code units in 8-bit mode.
1437
1438/^[^ab]/IB,utf
1439------------------------------------------------------------------
1440 Bra
1441 ^
1442 [\x00-`c-\xff] (neg)
1443 Ket
1444 End
1445------------------------------------------------------------------
Elliott Hughes0c26e192019-08-07 12:24:46 -07001446Capture group count = 0
Elliott Hughes9bc971b2018-07-27 13:23:14 -07001447Compile options: utf
1448Overall options: anchored utf
1449Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a
1450 \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19
1451 \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4
1452 5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y
1453 Z [ \ ] ^ _ ` c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f
1454 \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e
1455 \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d
1456 \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac
1457 \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb
1458 \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca
1459 \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9
1460 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8
1461 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7
1462 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff
1463Subject length lower bound = 1
1464 c
1465 0: c
1466 \x{ff}
1467 0: \x{ff}
1468 \x{100}
1469 0: \x{100}
1470\= Expect no match
1471 aaa
1472No match
Elliott Hughes0c26e192019-08-07 12:24:46 -07001473
1474# Offsets are different in 8-bit mode.
1475
1476/(?<=abc)(|def)/g,utf,replace=<$0>,substitute_callout
1477 123abcáyzabcdef789abcሴqr
1478 1(2) Old 6 6 "" New 6 8 "<>"
1479 2(2) Old 12 12 "" New 14 16 "<>"
1480 3(2) Old 12 15 "def" New 16 21 "<def>"
1481 4(2) Old 21 21 "" New 27 29 "<>"
1482 4: 123abc<>\x{e1}yzabc<><def>789abc<>\x{1234}qr
1483
1484# A few script run tests in non-UTF mode (but they need Unicode support)
1485
1486/^(*script_run:.{4})/
1487 \x{3041}\x{30a1}\x{3007}\x{3007} Hiragana Katakana Han Han
1488 0: \x{3041}\x{30a1}\x{3007}\x{3007}
1489 \x{30a1}\x{3041}\x{3007}\x{3007} Katakana Hiragana Han Han
1490 0: \x{30a1}\x{3041}\x{3007}\x{3007}
1491 \x{1100}\x{2e80}\x{2e80}\x{1101} Hangul Han Han Hangul
1492 0: \x{1100}\x{2e80}\x{2e80}\x{1101}
1493
1494/^(*sr:.*)/utf,allow_surrogate_escapes
1495Failed: error 191 at offset 0: PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is not allowed in UTF-16 mode
1496 \x{2e80}\x{3105}\x{2e80}\x{30a1} Han Bopomofo Han Katakana
1497 \x{d800}\x{dfff} Surrogates (Unknown) \=no_utf_check
1498
1499/(?(n/utf
1500Failed: error 142 at offset 4: syntax error in subpattern name (missing terminator?)
1501
1502/(?(á/utf
1503Failed: error 142 at offset 4: syntax error in subpattern name (missing terminator?)
Elliott Hughes9bc971b2018-07-27 13:23:14 -07001504
Elliott Hughes2dbd7d22020-06-03 14:32:37 -07001505# Invalid UTF-16/32 tests.
1506
1507/.../g,match_invalid_utf
1508 abcd\x{df00}wxzy\x{df00}pqrs
1509 0: abc
1510 0: wxz
1511 0: pqr
1512 abcd\x{80}wxzy\x{df00}pqrs
1513 0: abc
1514 0: d\x{80}w
1515 0: xzy
1516 0: pqr
1517
1518/abc/match_invalid_utf
1519 ab\x{df00}ab\=ph
1520Partial match: ab
1521\= Expect no match
1522 ab\x{df00}cdef\=ph
1523No match
1524
1525/ab$/match_invalid_utf
1526 ab\x{df00}cdeab
1527 0: ab
1528\= Expect no match
1529 ab\x{df00}cde
1530No match
1531
1532/.../g,match_invalid_utf
1533 abcd\x{80}wxzy\x{df00}pqrs
1534 0: abc
1535 0: d\x{80}w
1536 0: xzy
1537 0: pqr
1538
1539/(?<=x)../g,match_invalid_utf
1540 abcd\x{80}wxzy\x{df00}pqrs
1541 0: zy
1542 abcd\x{80}wxzy\x{df00}xpqrs
1543 0: zy
1544 0: pq
1545
1546/X$/match_invalid_utf
1547\= Expect no match
1548 X\x{df00}
1549No match
1550
1551/(?<=..)X/match_invalid_utf,aftertext
1552 AB\x{df00}AQXYZ
1553 0: X
1554 0+ YZ
1555 AB\x{df00}AQXYZ\=offset=5
1556 0: X
1557 0+ YZ
1558 AB\x{df00}\x{df00}AXYZXC\=offset=5
1559 0: X
1560 0+ C
1561\= Expect no match
1562 AB\x{df00}XYZ
1563No match
1564 AB\x{df00}XYZ\=offset=3
1565No match
1566 AB\x{df00}AXYZ
1567No match
1568 AB\x{df00}AXYZ\=offset=4
1569No match
1570 AB\x{df00}\x{df00}AXYZ\=offset=5
1571No match
1572
1573/.../match_invalid_utf
1574\= Expect no match
1575 A\x{d800}B
1576No match
1577 A\x{110000}B
1578** Failed: character \x{110000} is greater than 0x10ffff and so cannot be converted to UTF-16
1579
1580/aa/utf,ucp,match_invalid_utf,global
1581 aa\x{d800}aa
1582 0: aa
1583 0: aa
1584
1585/aa/utf,ucp,match_invalid_utf,global
1586 \x{d800}aa
1587 0: aa
1588
1589# ----------------------------------------------------
1590
1591/(*UTF)(?=\x{123})/I
1592Capture group count = 0
1593May match empty string
1594Compile options: <none>
1595Overall options: utf
1596First code unit = \x{123}
1597Subject length lower bound = 1
1598
1599/[\x{c1}\x{e1}]X[\x{145}\x{146}]/I,utf
1600Capture group count = 0
1601Options: utf
1602First code unit = \xc1 (caseless)
1603Last code unit = \x{145} (caseless)
1604Subject length lower bound = 3
1605
1606/[\xff\x{ffff}]/I,utf
1607Capture group count = 0
1608Options: utf
1609Starting code units: \xff
1610Subject length lower bound = 1
1611
1612/[\xff\x{ff}]/I,utf
1613Capture group count = 0
1614Options: utf
1615Starting code units: \xff
1616Subject length lower bound = 1
1617
1618/[\xff\x{ff}]/I
1619Capture group count = 0
1620Starting code units: \xff
1621Subject length lower bound = 1
1622
1623/[Ss]/I
1624Capture group count = 0
1625First code unit = 'S' (caseless)
1626Subject length lower bound = 1
1627
1628/[Ss]/I,utf
1629Capture group count = 0
1630Options: utf
1631Starting code units: S s
1632Subject length lower bound = 1
1633
1634/(?:\x{ff}|\x{3000})/I,utf
1635Capture group count = 0
1636Options: utf
1637Starting code units: \xff
1638Subject length lower bound = 1
1639
1640# ----------------------------------------------------
1641# UCP and casing tests
1642
1643/\x{120}/i,I
1644Capture group count = 0
1645Options: caseless
1646First code unit = \x{120}
1647Subject length lower bound = 1
1648
1649/\x{c1}/i,I,ucp
1650Capture group count = 0
1651Options: caseless ucp
1652First code unit = \xc1 (caseless)
1653Subject length lower bound = 1
1654
1655/[\x{120}\x{121}]/iB,ucp
1656------------------------------------------------------------------
1657 Bra
1658 /i \x{120}
1659 Ket
1660 End
1661------------------------------------------------------------------
1662
1663/[ab\x{120}]+/iB,ucp
1664------------------------------------------------------------------
1665 Bra
1666 [ABab\x{120}-\x{121}]++
1667 Ket
1668 End
1669------------------------------------------------------------------
1670 aABb\x{121}\x{120}
1671 0: aABb\x{121}\x{120}
1672
1673/\x{c1}/i,no_start_optimize
1674\= Expect no match
1675 \x{e1}
1676No match
1677
1678/\x{120}\x{c1}/i,ucp,no_start_optimize
1679 \x{121}\x{e1}
1680 0: \x{121}\xe1
1681
1682/\x{120}\x{c1}/i,ucp
1683 \x{121}\x{e1}
1684 0: \x{121}\xe1
1685
1686/[^\x{120}]/i,no_start_optimize
1687 \x{121}
1688 0: \x{121}
1689
1690/[^\x{120}]/i,ucp,no_start_optimize
1691\= Expect no match
1692 \x{121}
1693No match
1694
1695/[^\x{120}]/i
1696 \x{121}
1697 0: \x{121}
1698
1699/[^\x{120}]/i,ucp
1700\= Expect no match
1701 \x{121}
1702No match
1703
1704/\x{120}{2}/i,ucp
1705 \x{121}\x{121}
1706 0: \x{121}\x{121}
1707
1708/[^\x{120}]{2}/i,ucp
1709\= Expect no match
1710 \x{121}\x{121}
1711No match
1712
1713/\x{c1}+\x{e1}/iB,ucp
1714------------------------------------------------------------------
1715 Bra
1716 /i \x{c1}+
1717 /i \x{e1}
1718 Ket
1719 End
1720------------------------------------------------------------------
1721 \x{c1}\x{c1}\x{c1}
1722 0: \xc1\xc1\xc1
1723
1724/\x{c1}+\x{e1}/iIB,ucp
1725------------------------------------------------------------------
1726 Bra
1727 /i \x{c1}+
1728 /i \x{e1}
1729 Ket
1730 End
1731------------------------------------------------------------------
1732Capture group count = 0
1733Options: caseless ucp
1734First code unit = \xc1 (caseless)
1735Last code unit = \xe1 (caseless)
1736Subject length lower bound = 2
1737 \x{c1}\x{c1}\x{c1}
1738 0: \xc1\xc1\xc1
1739 \x{e1}\x{e1}\x{e1}
1740 0: \xe1\xe1\xe1
1741
1742/a|\x{c1}/iI,ucp
1743Capture group count = 0
1744Options: caseless ucp
1745Starting code units: A a \xc1 \xe1
1746Subject length lower bound = 1
1747 \x{e1}xxx
1748 0: \xe1
1749
1750/\x{c1}|\x{e1}/iI,ucp
1751Capture group count = 0
1752Options: caseless ucp
1753First code unit = \xc1 (caseless)
1754Subject length lower bound = 1
1755
1756/X(\x{e1})Y/ucp,replace=>\U$1<,substitute_extended
1757 X\x{e1}Y
1758 1: >\xc1<
1759
1760/X(\x{121})Y/ucp,replace=>\U$1<,substitute_extended
1761 X\x{121}Y
1762 1: >\x{120}<
1763
1764/s/i,ucp
1765 \x{17f}
1766 0: \x{17f}
1767
1768/s/i,utf
1769 \x{17f}
1770 0: \x{17f}
1771
1772/[^s]/i,ucp
1773\= Expect no match
1774 \x{17f}
1775No match
1776
1777/[^s]/i,utf
1778\= Expect no match
1779 \x{17f}
1780No match
1781
1782# ----------------------------------------------------
1783
Janis Danisevskis112c9cc2016-03-31 13:35:25 +01001784# End of testinput12