blob: 6bd352f6c4dd9a22d1cb6bdc34a0ba2c70a5ec05 [file] [log] [blame]
Elliott Hughes5b808042021-10-01 10:56:10 -07001# This set of tests checks the API, internals, and non-Perl stuff for UTF
2# support, including Unicode properties. However, tests that give different
3# results in 8-bit, 16-bit, and 32-bit modes are excluded (see tests 10 and
4# 12).
5
6#newline_default lf any anycrlf
7
8# PCRE2 and Perl disagree about the characteristics of certain Unicode
9# characters. For example, 061C was considered by Perl to be Arabic, though
10# it was not listed as such in the Unicode Scripts.txt file for Unicode 8.
11# However, it *is* in that file for Unicode 10, but when I came to re-check,
12# Perl had changed in the meantime, with 5.026 not recognizing it as Arabic.
13
14# 2066-2069 are graphic and printable according to Perl, though they are
15# actually "isolate" control characters. That is why the following tests are
16# here rather than in test 4.
17
18/^[\p{Arabic}]/utf
19 \x{061c}
20
21/^[[:graph:]]+$/utf,ucp
22\= Expect no match
23 \x{61c}
24 \x{2066}
25 \x{2067}
26 \x{2068}
27 \x{2069}
28
29/^[[:print:]]+$/utf,ucp
30\= Expect no match
31 \x{61c}
32 \x{2066}
33 \x{2067}
34 \x{2068}
35 \x{2069}
36
37/^[[:^graph:]]+$/utf,ucp
38 \x{09}\x{0a}\x{1D}\x{20}\x{85}\x{a0}\x{61c}\x{1680}
39 \x{2028}\x{2029}\x{202f}\x{2065}\x{2066}\x{2067}\x{2068}\x{2069}
40
41/^[[:^print:]]+$/utf,ucp
42 \x{09}\x{1D}\x{85}\x{61c}\x{2028}\x{2029}\x{2065}\x{2066}\x{2067}
43 \x{2068}\x{2069}
44
45# Perl does not consider U+180e to be a space character. It is true that it
46# does not appear in the Unicode PropList.txt file as such, but in many other
47# sources it is listed as a space, and has been treated as such in PCRE for
48# a long time.
49
50/^>[[:blank:]]*/utf,ucp
51 >\x{20}\x{a0}\x{1680}\x{180e}\x{2000}\x{202f}\x{9}\x{b}\x{2028}
52
53/^A\s+Z/utf,ucp
54 A\x{85}\x{180e}\x{2005}Z
55
56/^A[\s]+Z/utf,ucp
57 A\x{2005}Z
58 A\x{85}\x{2005}Z
59
60/^[[:graph:]]+$/utf,ucp
61\= Expect no match
62 \x{180e}
63
64/^[[:print:]]+$/utf,ucp
65 \x{180e}
66
67/^[[:^graph:]]+$/utf,ucp
68 \x{09}\x{0a}\x{1D}\x{20}\x{85}\x{a0}\x{61c}\x{1680}\x{180e}
69
70/^[[:^print:]]+$/utf,ucp
71\= Expect no match
72 \x{180e}
73
74# End of U+180E tests.
75
76# ---------------------------------------------------------------------
77
78/\x{110000}/IB,utf
79
80/\o{4200000}/IB,utf
81
82/\x{ffffffff}/utf
83
84/\o{37777777777}/utf
85
86/\x{100000000}/utf
87
88/\o{77777777777}/utf
89
90/\x{d800}/utf
91
92/\o{154000}/utf
93
94/\x{dfff}/utf
95
96/\o{157777}/utf
97
98/\x{d7ff}/utf
99
100/\o{153777}/utf
101
102/\x{e000}/utf
103
104/\o{170000}/utf
105
106/^\x{100}a\x{1234}/utf
107 \x{100}a\x{1234}bcd
108
109/\x{0041}\x{2262}\x{0391}\x{002e}/IB,utf
110 \x{0041}\x{2262}\x{0391}\x{002e}
111
112/.{3,5}X/IB,utf
113 \x{212ab}\x{212ab}\x{212ab}\x{861}X
114
115/.{3,5}?/IB,utf
116 \x{212ab}\x{212ab}\x{212ab}\x{861}
117
118/^[ab]/IB,utf
119 bar
120\= Expect no match
121 c
122 \x{ff}
123 \x{100}
124
125/\x{100}*(\d+|"(?1)")/utf
126 1234
127 "1234"
128 \x{100}1234
129 "\x{100}1234"
130 \x{100}\x{100}12ab
131 \x{100}\x{100}"12"
132\= Expect no match
133 \x{100}\x{100}abcd
134
135/\x{100}*/IB,utf
136
137/a\x{100}*/IB,utf
138
139/ab\x{100}*/IB,utf
140
141/[\x{200}-\x{100}]/utf
142
143/[Ā-Ą]/utf
144 \x{100}
145 \x{104}
146\= Expect no match
147 \x{105}
148 \x{ff}
149
150/[\xFF]/IB
151 >\xff<
152
153/[^\xFF]/IB
154
155/[Ä-Ü]/utf
156 Ö # Matches without Study
157 \x{d6}
158
159/[Ä-Ü]/utf
160 Ö <-- Same with Study
161 \x{d6}
162
163/[\x{c4}-\x{dc}]/utf
164 Ö # Matches without Study
165 \x{d6}
166
167/[\x{c4}-\x{dc}]/utf
168 Ö <-- Same with Study
169 \x{d6}
170
171/[^\x{100}]abc(xyz(?1))/IB,utf
172
173/(\x{100}(b(?2)c))?/IB,utf
174
175/(\x{100}(b(?2)c)){0,2}/IB,utf
176
177/(\x{100}(b(?1)c))?/IB,utf
178
179/(\x{100}(b(?1)c)){0,2}/IB,utf
180
181/\W/utf
182 A.B
183 A\x{100}B
184
185/\w/utf
186 \x{100}X
187
188# Use no_start_optimize because the first code unit is different in 8-bit from
189# the wider modes.
190
191/^\ሴ/IB,utf,no_start_optimize
192
193/()()()()()()()()()()
194 ()()()()()()()()()()
195 ()()()()()()()()()()
196 ()()()()()()()()()()
197 A (x) (?41) B/x,utf
198 AxxB
199
200/^[\x{100}\E-\Q\E\x{150}]/B,utf
201
202/^[\QĀ\E-\QŐ\E]/B,utf
203
204/^abc./gmx,newline=any,utf
205 abc1 \x0aabc2 \x0babc3xx \x0cabc4 \x0dabc5xx \x0d\x0aabc6 \x{0085}abc7 \x{2028}abc8 \x{2029}abc9 JUNK
206
207/abc.$/gmx,newline=any,utf
208 abc1\x0a abc2\x0b abc3\x0c abc4\x0d abc5\x0d\x0a abc6\x{0085} abc7\x{2028} abc8\x{2029} abc9
209
210/^a\Rb/bsr=unicode,utf
211 a\nb
212 a\rb
213 a\r\nb
214 a\x0bb
215 a\x0cb
216 a\x{85}b
217 a\x{2028}b
218 a\x{2029}b
219\= Expect no match
220 a\n\rb
221
222/^a\R*b/bsr=unicode,utf
223 ab
224 a\nb
225 a\rb
226 a\r\nb
227 a\x0bb
228 a\x0c\x{2028}\x{2029}b
229 a\x{85}b
230 a\n\rb
231 a\n\r\x{85}\x0cb
232
233/^a\R+b/bsr=unicode,utf
234 a\nb
235 a\rb
236 a\r\nb
237 a\x0bb
238 a\x0c\x{2028}\x{2029}b
239 a\x{85}b
240 a\n\rb
241 a\n\r\x{85}\x0cb
242\= Expect no match
243 ab
244
245/^a\R{1,3}b/bsr=unicode,utf
246 a\nb
247 a\n\rb
248 a\n\r\x{85}b
249 a\r\n\r\nb
250 a\r\n\r\n\r\nb
251 a\n\r\n\rb
252 a\n\n\r\nb
253\= Expect no match
254 a\n\n\n\rb
255 a\r
256
257/\H\h\V\v/utf
258 X X\x0a
259 X\x09X\x0b
260\= Expect no match
261 \x{a0} X\x0a
262
263/\H*\h+\V?\v{3,4}/utf
264 \x09\x20\x{a0}X\x0a\x0b\x0c\x0d\x0a
265 \x09\x20\x{a0}\x0a\x0b\x0c\x0d\x0a
266 \x09\x20\x{a0}\x0a\x0b\x0c
267\= Expect no match
268 \x09\x20\x{a0}\x0a\x0b
269
270/\H\h\V\v/utf
271 \x{3001}\x{3000}\x{2030}\x{2028}
272 X\x{180e}X\x{85}
273\= Expect no match
274 \x{2009} X\x0a
275
276/\H*\h+\V?\v{3,4}/utf
277 \x{1680}\x{180e}\x{2007}X\x{2028}\x{2029}\x0c\x0d\x0a
278 \x09\x{205f}\x{a0}\x0a\x{2029}\x0c\x{2028}\x0a
279 \x09\x20\x{202f}\x0a\x0b\x0c
280\= Expect no match
281 \x09\x{200a}\x{a0}\x{2028}\x0b
282
283/[\h]/B,utf
284 >\x{1680}
285
286/[\h]{3,}/B,utf
287 >\x{1680}\x{180e}\x{2000}\x{2003}\x{200a}\x{202f}\x{205f}\x{3000}<
288
289/[\v]/B,utf
290
291/[\H]/B,utf
292
293/[\V]/B,utf
294
295/.*$/newline=any,utf
296 \x{1ec5}
297
298/a\Rb/I,bsr=anycrlf,utf
299 a\rb
300 a\nb
301 a\r\nb
302\= Expect no match
303 a\x{85}b
304 a\x0bb
305
306/a\Rb/I,bsr=unicode,utf
307 a\rb
308 a\nb
309 a\r\nb
310 a\x{85}b
311 a\x0bb
312
313/a\R?b/I,bsr=anycrlf,utf
314 a\rb
315 a\nb
316 a\r\nb
317\= Expect no match
318 a\x{85}b
319 a\x0bb
320
321/a\R?b/I,bsr=unicode,utf
322 a\rb
323 a\nb
324 a\r\nb
325 a\x{85}b
326 a\x0bb
327
328/.*a.*=.b.*/utf,newline=any
329 QQQ\x{2029}ABCaXYZ=!bPQR
330\= Expect no match
331 a\x{2029}b
332 \x61\xe2\x80\xa9\x62
333
334/[[:a\x{100}b:]]/utf
335
336/a[^]b/utf,allow_empty_class,match_unset_backref
337 a\x{1234}b
338 a\nb
339\= Expect no match
340 ab
341
342/a[^]+b/utf,allow_empty_class,match_unset_backref
343 aXb
344 a\nX\nX\x{1234}b
345\= Expect no match
346 ab
347
348/(\x{de})\1/
349 \x{de}\x{de}
350
351/X/newline=any,utf,firstline
352 A\x{1ec5}ABCXYZ
353
354/Xa{2,4}b/utf
355 X\=ps
356 Xa\=ps
357 Xaa\=ps
358 Xaaa\=ps
359 Xaaaa\=ps
360
361/Xa{2,4}?b/utf
362 X\=ps
363 Xa\=ps
364 Xaa\=ps
365 Xaaa\=ps
366 Xaaaa\=ps
367
368/Xa{2,4}+b/utf
369 X\=ps
370 Xa\=ps
371 Xaa\=ps
372 Xaaa\=ps
373 Xaaaa\=ps
374
375/X\x{123}{2,4}b/utf
376 X\=ps
377 X\x{123}\=ps
378 X\x{123}\x{123}\=ps
379 X\x{123}\x{123}\x{123}\=ps
380 X\x{123}\x{123}\x{123}\x{123}\=ps
381
382/X\x{123}{2,4}?b/utf
383 X\=ps
384 X\x{123}\=ps
385 X\x{123}\x{123}\=ps
386 X\x{123}\x{123}\x{123}\=ps
387 X\x{123}\x{123}\x{123}\x{123}\=ps
388
389/X\x{123}{2,4}+b/utf
390 X\=ps
391 X\x{123}\=ps
392 X\x{123}\x{123}\=ps
393 X\x{123}\x{123}\x{123}\=ps
394 X\x{123}\x{123}\x{123}\x{123}\=ps
395
396/X\x{123}{2,4}b/utf
397\= Expect no match
398 Xx\=ps
399 X\x{123}x\=ps
400 X\x{123}\x{123}x\=ps
401 X\x{123}\x{123}\x{123}x\=ps
402 X\x{123}\x{123}\x{123}\x{123}x\=ps
403
404/X\x{123}{2,4}?b/utf
405\= Expect no match
406 Xx\=ps
407 X\x{123}x\=ps
408 X\x{123}\x{123}x\=ps
409 X\x{123}\x{123}\x{123}x\=ps
410 X\x{123}\x{123}\x{123}\x{123}x\=ps
411
412/X\x{123}{2,4}+b/utf
413\= Expect no match
414 Xx\=ps
415 X\x{123}x\=ps
416 X\x{123}\x{123}x\=ps
417 X\x{123}\x{123}\x{123}x\=ps
418 X\x{123}\x{123}\x{123}\x{123}x\=ps
419
420/X\d{2,4}b/utf
421 X\=ps
422 X3\=ps
423 X33\=ps
424 X333\=ps
425 X3333\=ps
426
427/X\d{2,4}?b/utf
428 X\=ps
429 X3\=ps
430 X33\=ps
431 X333\=ps
432 X3333\=ps
433
434/X\d{2,4}+b/utf
435 X\=ps
436 X3\=ps
437 X33\=ps
438 X333\=ps
439 X3333\=ps
440
441/X\D{2,4}b/utf
442 X\=ps
443 Xa\=ps
444 Xaa\=ps
445 Xaaa\=ps
446 Xaaaa\=ps
447
448/X\D{2,4}?b/utf
449 X\=ps
450 Xa\=ps
451 Xaa\=ps
452 Xaaa\=ps
453 Xaaaa\=ps
454
455/X\D{2,4}+b/utf
456 X\=ps
457 Xa\=ps
458 Xaa\=ps
459 Xaaa\=ps
460 Xaaaa\=ps
461
462/X\D{2,4}b/utf
463 X\=ps
464 X\x{123}\=ps
465 X\x{123}\x{123}\=ps
466 X\x{123}\x{123}\x{123}\=ps
467 X\x{123}\x{123}\x{123}\x{123}\=ps
468
469/X\D{2,4}?b/utf
470 X\=ps
471 X\x{123}\=ps
472 X\x{123}\x{123}\=ps
473 X\x{123}\x{123}\x{123}\=ps
474 X\x{123}\x{123}\x{123}\x{123}\=ps
475
476/X\D{2,4}+b/utf
477 X\=ps
478 X\x{123}\=ps
479 X\x{123}\x{123}\=ps
480 X\x{123}\x{123}\x{123}\=ps
481 X\x{123}\x{123}\x{123}\x{123}\=ps
482
483/X[abc]{2,4}b/utf
484 X\=ps
485 Xa\=ps
486 Xaa\=ps
487 Xaaa\=ps
488 Xaaaa\=ps
489
490/X[abc]{2,4}?b/utf
491 X\=ps
492 Xa\=ps
493 Xaa\=ps
494 Xaaa\=ps
495 Xaaaa\=ps
496
497/X[abc]{2,4}+b/utf
498 X\=ps
499 Xa\=ps
500 Xaa\=ps
501 Xaaa\=ps
502 Xaaaa\=ps
503
504/X[abc\x{123}]{2,4}b/utf
505 X\=ps
506 X\x{123}\=ps
507 X\x{123}\x{123}\=ps
508 X\x{123}\x{123}\x{123}\=ps
509 X\x{123}\x{123}\x{123}\x{123}\=ps
510
511/X[abc\x{123}]{2,4}?b/utf
512 X\=ps
513 X\x{123}\=ps
514 X\x{123}\x{123}\=ps
515 X\x{123}\x{123}\x{123}\=ps
516 X\x{123}\x{123}\x{123}\x{123}\=ps
517
518/X[abc\x{123}]{2,4}+b/utf
519 X\=ps
520 X\x{123}\=ps
521 X\x{123}\x{123}\=ps
522 X\x{123}\x{123}\x{123}\=ps
523 X\x{123}\x{123}\x{123}\x{123}\=ps
524
525/X[^a]{2,4}b/utf
526 X\=ps
527 Xz\=ps
528 Xzz\=ps
529 Xzzz\=ps
530 Xzzzz\=ps
531
532/X[^a]{2,4}?b/utf
533 X\=ps
534 Xz\=ps
535 Xzz\=ps
536 Xzzz\=ps
537 Xzzzz\=ps
538
539/X[^a]{2,4}+b/utf
540 X\=ps
541 Xz\=ps
542 Xzz\=ps
543 Xzzz\=ps
544 Xzzzz\=ps
545
546/X[^a]{2,4}b/utf
547 X\=ps
548 X\x{123}\=ps
549 X\x{123}\x{123}\=ps
550 X\x{123}\x{123}\x{123}\=ps
551 X\x{123}\x{123}\x{123}\x{123}\=ps
552
553/X[^a]{2,4}?b/utf
554 X\=ps
555 X\x{123}\=ps
556 X\x{123}\x{123}\=ps
557 X\x{123}\x{123}\x{123}\=ps
558 X\x{123}\x{123}\x{123}\x{123}\=ps
559
560/X[^a]{2,4}+b/utf
561 X\=ps
562 X\x{123}\=ps
563 X\x{123}\x{123}\=ps
564 X\x{123}\x{123}\x{123}\=ps
565 X\x{123}\x{123}\x{123}\x{123}\=ps
566
567/(Y)X\1{2,4}b/utf
568 YX\=ps
569 YXY\=ps
570 YXYY\=ps
571 YXYYY\=ps
572 YXYYYY\=ps
573
574/(Y)X\1{2,4}?b/utf
575 YX\=ps
576 YXY\=ps
577 YXYY\=ps
578 YXYYY\=ps
579 YXYYYY\=ps
580
581/(Y)X\1{2,4}+b/utf
582 YX\=ps
583 YXY\=ps
584 YXYY\=ps
585 YXYYY\=ps
586 YXYYYY\=ps
587
588/(\x{123})X\1{2,4}b/utf
589 \x{123}X\=ps
590 \x{123}X\x{123}\=ps
591 \x{123}X\x{123}\x{123}\=ps
592 \x{123}X\x{123}\x{123}\x{123}\=ps
593 \x{123}X\x{123}\x{123}\x{123}\x{123}\=ps
594
595/(\x{123})X\1{2,4}?b/utf
596 \x{123}X\=ps
597 \x{123}X\x{123}\=ps
598 \x{123}X\x{123}\x{123}\=ps
599 \x{123}X\x{123}\x{123}\x{123}\=ps
600 \x{123}X\x{123}\x{123}\x{123}\x{123}\=ps
601
602/(\x{123})X\1{2,4}+b/utf
603 \x{123}X\=ps
604 \x{123}X\x{123}\=ps
605 \x{123}X\x{123}\x{123}\=ps
606 \x{123}X\x{123}\x{123}\x{123}\=ps
607 \x{123}X\x{123}\x{123}\x{123}\x{123}\=ps
608
609/\bthe cat\b/utf
610 the cat\=ps
611 the cat\=ph
612
613/abcd*/utf
614 xxxxabcd\=ps
615 xxxxabcd\=ph
616
617/abcd*/i,utf
618 xxxxabcd\=ps
619 xxxxabcd\=ph
620 XXXXABCD\=ps
621 XXXXABCD\=ph
622
623/abc\d*/utf
624 xxxxabc1\=ps
625 xxxxabc1\=ph
626
627/(a)bc\1*/utf
628 xxxxabca\=ps
629 xxxxabca\=ph
630
631/abc[de]*/utf
632 xxxxabcde\=ps
633 xxxxabcde\=ph
634
635/X\W{3}X/utf
636 X\=ps
637
638/\sxxx\s/utf,tables=2
639 AB\x{85}xxx\x{a0}XYZ
640 AB\x{a0}xxx\x{85}XYZ
641
642/\S \S/utf,tables=2
643 \x{a2} \x{84}
644
645'A#хц'Bx,newline=any,utf
646
647'A#хц
648 PQ'Bx,newline=any,utf
649
650/a+#хaa
651 z#XX?/Bx,newline=any,utf
652
653/a+#хaa
654 z#х?/Bx,newline=any,utf
655
656/\g{A}xxx#bXX(?'A'123) (?'A'456)/Bx,newline=any,utf
657
658/\g{A}xxx#bх(?'A'123) (?'A'456)/Bx,newline=any,utf
659
660/^\cģ/utf
661
662/(\R*)(.)/s,utf
663 \r\n
664 \r\r\n\n\r
665 \r\r\n\n\r\n
666
667/(\R)*(.)/s,utf
668 \r\n
669 \r\r\n\n\r
670 \r\r\n\n\r\n
671
672/[^\x{1234}]+/Ii,utf
673
674/[^\x{1234}]+?/Ii,utf
675
676/[^\x{1234}]++/Ii,utf
677
678/[^\x{1234}]{2}/Ii,utf
679
680/f.*/
681 for\=ph
682
683/f.*/s
684 for\=ph
685
686/f.*/utf
687 for\=ph
688
689/f.*/s,utf
690 for\=ph
691
692/\x{d7ff}\x{e000}/utf
693
694/\x{d800}/utf
695
696/\x{dfff}/utf
697
698/\h+/utf
699 \x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000}
700 \x{3001}\x{2fff}\x{200a}\x{a0}\x{2000}
701
702/[\h\x{e000}]+/B,utf
703 \x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000}
704 \x{3001}\x{2fff}\x{200a}\x{a0}\x{2000}
705
706/\H+/utf
707 \x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f}
708 \x{2000}\x{200a}\x{1fff}\x{200b}
709 \x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060}
710 \x{a0}\x{3000}\x{9f}\x{a1}\x{2fff}\x{3001}
711
712/[\H\x{d7ff}]+/B,utf
713 \x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f}
714 \x{2000}\x{200a}\x{1fff}\x{200b}
715 \x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060}
716 \x{a0}\x{3000}\x{9f}\x{a1}\x{2fff}\x{3001}
717
718/\v+/utf
719 \x{2027}\x{2030}\x{2028}\x{2029}
720 \x09\x0e\x{84}\x{86}\x{85}\x0a\x0b\x0c\x0d
721
722/[\v\x{e000}]+/B,utf
723 \x{2027}\x{2030}\x{2028}\x{2029}
724 \x09\x0e\x{84}\x{86}\x{85}\x0a\x0b\x0c\x0d
725
726/\V+/utf
727 \x{2028}\x{2029}\x{2027}\x{2030}
728 \x{85}\x0a\x0b\x0c\x0d\x09\x0e\x{84}\x{86}
729
730/[\V\x{d7ff}]+/B,utf
731 \x{2028}\x{2029}\x{2027}\x{2030}
732 \x{85}\x0a\x0b\x0c\x0d\x09\x0e\x{84}\x{86}
733
734/\R+/bsr=unicode,utf
735 \x{2027}\x{2030}\x{2028}\x{2029}
736 \x09\x0e\x{84}\x{86}\x{85}\x0a\x0b\x0c\x0d
737
738/(..)\1/utf
739 ab\=ps
740 aba\=ps
741 abab\=ps
742
743/(..)\1/i,utf
744 ab\=ps
745 abA\=ps
746 aBAb\=ps
747
748/(..)\1{2,}/utf
749 ab\=ps
750 aba\=ps
751 abab\=ps
752 ababa\=ps
753 ababab\=ps
754 ababab\=ph
755 abababa\=ps
756 abababa\=ph
757
758/(..)\1{2,}/i,utf
759 ab\=ps
760 aBa\=ps
761 aBAb\=ps
762 AbaBA\=ps
763 abABAb\=ps
764 aBAbaB\=ph
765 abABabA\=ps
766 abaBABa\=ph
767
768/(..)\1{2,}?x/i,utf
769 ab\=ps
770 abA\=ps
771 aBAb\=ps
772 abaBA\=ps
773 abAbaB\=ps
774 abaBabA\=ps
775 abAbABaBx\=ps
776
777/./utf,newline=crlf
778 \r\=ps
779 \r\=ph
780
781/.{2,3}/utf,newline=crlf
782 \r\=ps
783 \r\=ph
784 \r\r\=ps
785 \r\r\=ph
786 \r\r\r\=ps
787 \r\r\r\=ph
788
789/.{2,3}?/utf,newline=crlf
790 \r\=ps
791 \r\=ph
792 \r\r\=ps
793 \r\r\=ph
794 \r\r\r\=ps
795 \r\r\r\=ph
796
797/[^\x{100}][^\x{1234}][^\x{ffff}][^\x{10000}][^\x{10ffff}]/B,utf
798
799/[^\x{100}][^\x{1234}][^\x{ffff}][^\x{10000}][^\x{10ffff}]/Bi,utf
800
801/[^\x{100}]*[^\x{10000}]+[^\x{10ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{fffff}]{5,6}+/B,utf
802
803/[^\x{100}]*[^\x{10000}]+[^\x{10ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{fffff}]{5,6}+/Bi,utf
804
805/(?<=\x{1234}\x{1234})\bxy/I,utf
806
807/(?<!^)ETA/utf
808\= Expect no match
809 ETA
810
811/\u0100/B,utf,alt_bsux,allow_empty_class,match_unset_backref
812
813/[\u0100-\u0200]/B,utf,alt_bsux,allow_empty_class,match_unset_backref
814
815/\ud800/utf,alt_bsux,allow_empty_class,match_unset_backref
816
817/^\u{0000000000010ffff}/utf,extra_alt_bsux
818 \x{10ffff}
819
820/\u/utf,alt_bsux
821 \\u
822
823/^a+[a\x{200}]/B,utf
824 aa
825
826/[b-d\x{200}-\x{250}]*[ae-h]?#[\x{200}-\x{250}]{0,8}[\x00-\xff]*#[\x{200}-\x{250}]+[a-z]/B,utf
827
828/[\p{L}]/IB
829
830/[\p{^L}]/IB
831
832/[\P{L}]/IB
833
834/[\P{^L}]/IB
835
836/[abc\p{L}\x{0660}]/IB,utf
837
838/[\p{Nd}]/IB,utf
839 1234
840
841/[\p{Nd}+-]+/IB,utf
842 1234
843 12-34
844 12+\x{661}-34
845\= Expect no match
846 abcd
847
848/(?:[\PPa*]*){8,}/
849
850/[\P{Any}]/B
851
852/[\P{Any}\E]/B
853
854/(\P{Yi}+\277)/
855
856/(\P{Yi}+\277)?/
857
858/(?<=\P{Yi}{3}A)X/
859
860/\p{Yi}+(\P{Yi}+)(?1)/
861
862/(\P{Yi}{2}\277)?/
863
864/[\P{Yi}A]/
865
866/[\P{Yi}\P{Yi}\P{Yi}A]/
867
868/[^\P{Yi}A]/
869
870/[^\P{Yi}\P{Yi}\P{Yi}A]/
871
872/(\P{Yi}*\277)*/
873
874/(\P{Yi}*?\277)*/
875
876/(\p{Yi}*+\277)*/
877
878/(\P{Yi}?\277)*/
879
880/(\P{Yi}??\277)*/
881
882/(\p{Yi}?+\277)*/
883
884/(\P{Yi}{0,3}\277)*/
885
886/(\P{Yi}{0,3}?\277)*/
887
888/(\p{Yi}{0,3}+\277)*/
889
890/\p{Zl}{2,3}+/B,utf
891 


892 \x{2028}\x{2028}\x{2028}
893
894/\p{Zl}/B,utf
895
896/\p{Lu}{3}+/B,utf
897
898/\pL{2}+/B,utf
899
900/\p{Cc}{2}+/B,utf
901
902/^\p{Cf}/utf
903 \x{180e}
904 \x{061c}
905 \x{2066}
906 \x{2067}
907 \x{2068}
908 \x{2069}
909
910/^\p{Cs}/utf
911 \x{dfff}\=no_utf_check
912\= Expect no match
913 \x{09f}
914
915/^\p{Mn}/utf
916 \x{1a1b}
917
918/^\p{Pe}/utf
919 \x{2309}
920 \x{230b}
921
922/^\p{Ps}/utf
923 \x{2308}
924 \x{230a}
925
926/^\p{Sc}+/utf
927 $\x{a2}\x{a3}\x{a4}\x{a5}\x{a6}
928 \x{9f2}
929\= Expect no match
930 X
931 \x{2c2}
932
933/^\p{Zs}/utf
934 \ \
935 \x{a0}
936 \x{1680}
937 \x{2000}
938 \x{2001}
939\= Expect no match
940 \x{2028}
941 \x{200d}
942
943# These are here because Perl has problems with the negative versions of the
944# properties and has changed how it behaves for caseless matching.
945
946/\p{^Lu}/i,utf
947 1234
948\= Expect no match
949 ABC
950
951/\P{Lu}/i,utf
952 1234
953\= Expect no match
954 ABC
955
956/\p{Ll}/i,utf
957 a
958 Az
959\= Expect no match
960 ABC
961
962/\p{Lu}/i,utf
963 A
964 a\x{10a0}B
965\= Expect no match
966 a
967 \x{1d00}
968
969/\p{Lu}/i,utf
970 A
971 aZ
972\= Expect no match
973 abc
974
975/[\x{c0}\x{391}]/i,utf
976 \x{c0}
977 \x{e0}
978
979# The next two are special cases where the lengths of the different cases of
980# the same character differ. The first went wrong with heap frame storage; the
981# second was broken in all cases.
982
983/^\x{023a}+?(\x{0130}+)/i,utf
984 \x{023a}\x{2c65}\x{0130}
985
986/^\x{023a}+([^X])/i,utf
987 \x{023a}\x{2c65}X
988
989/\x{c0}+\x{116}+/i,utf
990 \x{c0}\x{e0}\x{116}\x{117}
991
992/[\x{c0}\x{116}]+/i,utf
993 \x{c0}\x{e0}\x{116}\x{117}
994
995/(\x{de})\1/i,utf
996 \x{de}\x{de}
997 \x{de}\x{fe}
998 \x{fe}\x{fe}
999 \x{fe}\x{de}
1000
1001/^\x{c0}$/i,utf
1002 \x{c0}
1003 \x{e0}
1004
1005/^\x{e0}$/i,utf
1006 \x{c0}
1007 \x{e0}
1008
1009# The next two should be Perl-compatible, but it fails to match \x{e0}. PCRE
1010# will match it only with UCP support, because without that it has no notion
1011# of case for anything other than the ASCII letters.
1012
1013/((?i)[\x{c0}])/utf
1014 \x{c0}
1015 \x{e0}
1016
1017/(?i:[\x{c0}])/utf
1018 \x{c0}
1019 \x{e0}
1020
1021# These are PCRE's extra properties to help with Unicodizing \d etc.
1022
1023/^\p{Xan}/utf
1024 ABCD
1025 1234
1026 \x{6ca}
1027 \x{a6c}
1028 \x{10a7}
1029\= Expect no match
1030 _ABC
1031
1032/^\p{Xan}+/utf
1033 ABCD1234\x{6ca}\x{a6c}\x{10a7}_
1034\= Expect no match
1035 _ABC
1036
1037/^\p{Xan}+?/utf
1038 \x{6ca}\x{a6c}\x{10a7}_
1039
1040/^\p{Xan}*/utf
1041 ABCD1234\x{6ca}\x{a6c}\x{10a7}_
1042
1043/^\p{Xan}{2,9}/utf
1044 ABCD1234\x{6ca}\x{a6c}\x{10a7}_
1045
1046/^\p{Xan}{2,9}?/utf
1047 \x{6ca}\x{a6c}\x{10a7}_
1048
1049/^[\p{Xan}]/utf
1050 ABCD1234_
1051 1234abcd_
1052 \x{6ca}
1053 \x{a6c}
1054 \x{10a7}
1055\= Expect no match
1056 _ABC
1057
1058/^[\p{Xan}]+/utf
1059 ABCD1234\x{6ca}\x{a6c}\x{10a7}_
1060\= Expect no match
1061 _ABC
1062
1063/^>\p{Xsp}/utf
1064 >\x{1680}\x{2028}\x{0b}
1065 >\x{a0}
1066\= Expect no match
1067 \x{0b}
1068
1069/^>\p{Xsp}+/utf
1070 > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
1071
1072/^>\p{Xsp}+?/utf
1073 >\x{1680}\x{2028}\x{0b}
1074
1075/^>\p{Xsp}*/utf
1076 > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
1077
1078/^>\p{Xsp}{2,9}/utf
1079 > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
1080
1081/^>\p{Xsp}{2,9}?/utf
1082 > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
1083
1084/^>[\p{Xsp}]/utf
1085 >\x{2028}\x{0b}
1086
1087/^>[\p{Xsp}]+/utf
1088 > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
1089
1090/^>\p{Xps}/utf
1091 >\x{1680}\x{2028}\x{0b}
1092 >\x{a0}
1093\= Expect no match
1094 \x{0b}
1095
1096/^>\p{Xps}+/utf
1097 > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
1098
1099/^>\p{Xps}+?/utf
1100 >\x{1680}\x{2028}\x{0b}
1101
1102/^>\p{Xps}*/utf
1103 > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
1104
1105/^>\p{Xps}{2,9}/utf
1106 > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
1107
1108/^>\p{Xps}{2,9}?/utf
1109 > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
1110
1111/^>[\p{Xps}]/utf
1112 >\x{2028}\x{0b}
1113
1114/^>[\p{Xps}]+/utf
1115 > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
1116
1117/^\p{Xwd}/utf
1118 ABCD
1119 1234
1120 \x{6ca}
1121 \x{a6c}
1122 \x{10a7}
1123 _ABC
1124\= Expect no match
1125 []
1126
1127/^\p{Xwd}+/utf
1128 ABCD1234\x{6ca}\x{a6c}\x{10a7}_
1129
1130/^\p{Xwd}+?/utf
1131 \x{6ca}\x{a6c}\x{10a7}_
1132
1133/^\p{Xwd}*/utf
1134 ABCD1234\x{6ca}\x{a6c}\x{10a7}_
1135
1136/^\p{Xwd}{2,9}/utf
1137 A_B12\x{6ca}\x{a6c}\x{10a7}
1138
1139/^\p{Xwd}{2,9}?/utf
1140 \x{6ca}\x{a6c}\x{10a7}_
1141
1142/^[\p{Xwd}]/utf
1143 ABCD1234_
1144 1234abcd_
1145 \x{6ca}
1146 \x{a6c}
1147 \x{10a7}
1148 _ABC
1149\= Expect no match
1150 []
1151
1152/^[\p{Xwd}]+/utf
1153 ABCD1234\x{6ca}\x{a6c}\x{10a7}_
1154
1155# A check not in UTF-8 mode
1156
1157/^[\p{Xwd}]+/
1158 ABCD1234_
1159
1160# Some negative checks
1161
1162/^[\P{Xwd}]+/utf
1163 !.+\x{019}\x{35a}AB
1164
1165/^[\p{^Xwd}]+/utf
1166 !.+\x{019}\x{35a}AB
1167
1168/[\D]/B,utf,ucp
1169 1\x{3c8}2
1170
1171/[\d]/B,utf,ucp
1172 >\x{6f4}<
1173
1174/[\S]/B,utf,ucp
1175 \x{1680}\x{6f4}\x{1680}
1176
1177/[\s]/B,utf,ucp
1178 >\x{1680}<
1179
1180/[\W]/B,utf,ucp
1181 A\x{1712}B
1182
1183/[\w]/B,utf,ucp
1184 >\x{1723}<
1185
1186/\D/B,utf,ucp
1187 1\x{3c8}2
1188
1189/\d/B,utf,ucp
1190 >\x{6f4}<
1191
1192/\S/B,utf,ucp
1193 \x{1680}\x{6f4}\x{1680}
1194
1195/\s/B,utf,ucp
1196 >\x{1680}>
1197
1198/\W/B,utf,ucp
1199 A\x{1712}B
1200
1201/\w/B,utf,ucp
1202 >\x{1723}<
1203
1204/[[:alpha:]]/B,ucp
1205
1206/[[:lower:]]/B,ucp
1207
1208/[[:upper:]]/B,ucp
1209
1210/[[:alnum:]]/B,ucp
1211
1212/[[:ascii:]]/B,ucp
1213
1214/[[:cntrl:]]/B,ucp
1215
1216/[[:digit:]]/B,ucp
1217
1218/[[:graph:]]/B,ucp
1219
1220/[[:print:]]/B,ucp
1221
1222/[[:punct:]]/B,ucp
1223
1224/[[:space:]]/B,ucp
1225
1226/[[:word:]]/B,ucp
1227
1228/[[:xdigit:]]/B,ucp
1229
1230# Unicode properties for \b abd \B
1231
1232/\b...\B/utf,ucp
1233 abc_
1234 \x{37e}abc\x{376}
1235 \x{37e}\x{376}\x{371}\x{393}\x{394}
1236 !\x{c0}++\x{c1}\x{c2}
1237 !\x{c0}+++++
1238
1239# Without PCRE_UCP, non-ASCII always fail, even if < 256
1240
1241/\b...\B/utf
1242 abc_
1243\= Expect no match
1244 \x{37e}abc\x{376}
1245 \x{37e}\x{376}\x{371}\x{393}\x{394}
1246 !\x{c0}++\x{c1}\x{c2}
1247 !\x{c0}+++++
1248
1249# With PCRE_UCP, non-UTF8 chars that are < 256 still check properties
1250
1251/\b...\B/ucp
1252 abc_
1253 !\x{c0}++\x{c1}\x{c2}
1254 !\x{c0}+++++
1255
1256# Some of these are silly, but they check various combinations
1257
1258/[[:^alpha:][:^cntrl:]]+/B,utf,ucp
1259 123
1260 abc
1261
1262/[[:^cntrl:][:^alpha:]]+/B,utf,ucp
1263 123
1264 abc
1265
1266/[[:alpha:]]+/B,utf,ucp
1267 abc
1268
1269/[[:^alpha:]\S]+/B,utf,ucp
1270 123
1271 abc
1272
1273/[^\d]+/B,utf,ucp
1274 abc123
1275 abc\x{123}
1276 \x{660}abc
1277
1278/\p{Lu}+9\p{Lu}+B\p{Lu}+b/B
1279
1280/\p{^Lu}+9\p{^Lu}+B\p{^Lu}+b/B
1281
1282/\P{Lu}+9\P{Lu}+B\P{Lu}+b/B
1283
1284/\p{Han}+X\p{Greek}+\x{370}/B,utf
1285
1286/\p{Xan}+!\p{Xan}+A/B
1287
1288/\p{Xsp}+!\p{Xsp}\t/B
1289
1290/\p{Xps}+!\p{Xps}\t/B
1291
1292/\p{Xwd}+!\p{Xwd}_/B
1293
1294/A+\p{N}A+\dB+\p{N}*B+\d*/B,ucp
1295
1296# These behaved oddly in Perl, so they are kept in this test
1297
1298/(\x{23a}\x{23a}\x{23a})?\1/i,utf
1299\= Expect no match
1300 \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}
1301
1302/(ȺȺȺ)?\1/i,utf
1303\= Expect no match
1304 ȺȺȺⱥⱥ
1305
1306/(\x{23a}\x{23a}\x{23a})?\1/i,utf
1307 \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}
1308
1309/(ȺȺȺ)?\1/i,utf
1310 ȺȺȺⱥⱥⱥ
1311
1312/(\x{23a}\x{23a}\x{23a})\1/i,utf
1313\= Expect no match
1314 \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}
1315
1316/(ȺȺȺ)\1/i,utf
1317\= Expect no match
1318 ȺȺȺⱥⱥ
1319
1320/(\x{23a}\x{23a}\x{23a})\1/i,utf
1321 \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}
1322
1323/(ȺȺȺ)\1/i,utf
1324 ȺȺȺⱥⱥⱥ
1325
1326/(\x{2c65}\x{2c65})\1/i,utf
1327 \x{2c65}\x{2c65}\x{23a}\x{23a}
1328
1329/(ⱥⱥ)\1/i,utf
1330 ⱥⱥȺȺ
1331
1332/(\x{23a}\x{23a}\x{23a})\1Y/i,utf
1333 X\x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}YZ
1334
1335/(\x{2c65}\x{2c65})\1Y/i,utf
1336 X\x{2c65}\x{2c65}\x{23a}\x{23a}YZ
1337
1338# These scripts weren't yet in Perl when I added Unicode 6.0.0 to PCRE
1339
1340/^[\p{Batak}]/utf
1341 \x{1bc0}
1342 \x{1bff}
1343\= Expect no match
1344 \x{1bf4}
1345
1346/^[\p{Brahmi}]/utf
1347 \x{11000}
1348 \x{1106f}
1349\= Expect no match
1350 \x{1104e}
1351
1352/^[\p{Mandaic}]/utf
1353 \x{840}
1354 \x{85e}
1355\= Expect no match
1356 \x{85c}
1357 \x{85d}
1358
1359/(\X*)(.)/s,utf
1360 A\x{300}
1361
1362/^S(\X*)e(\X*)$/utf
1363 Stéréo
1364
1365/^\X/utf
1366 ́réo
1367
1368/^a\X41z/alt_bsux,allow_empty_class,match_unset_backref,dupnames
1369 aX41z
1370\= Expect no match
1371 aAz
1372
1373/\X/
1374 a\=ps
1375 a\=ph
1376
1377/\Xa/
1378 aa\=ps
1379 aa\=ph
1380
1381/\X{2}/
1382 aa\=ps
1383 aa\=ph
1384
1385/\X+a/
1386 a\=ps
1387 aa\=ps
1388 aa\=ph
1389
1390/\X+?a/
1391 a\=ps
1392 ab\=ps
1393 aa\=ps
1394 aa\=ph
1395 aba\=ps
1396
1397# These Unicode 6.1.0 scripts are not known to Perl.
1398
1399/\p{Chakma}\d/utf,ucp
1400 \x{11100}\x{1113c}
1401
1402/\p{Takri}\d/utf,ucp
1403 \x{11680}\x{116c0}
1404
1405/^\X/utf
1406 A\=ps
1407 A\=ph
1408 A\x{300}\x{301}\=ps
1409 A\x{300}\x{301}\=ph
1410 A\x{301}\=ps
1411 A\x{301}\=ph
1412
1413/^\X{2,3}/utf
1414 A\=ps
1415 A\=ph
1416 AA\=ps
1417 AA\=ph
1418 A\x{300}\x{301}\=ps
1419 A\x{300}\x{301}\=ph
1420 A\x{300}\x{301}A\x{300}\x{301}\=ps
1421 A\x{300}\x{301}A\x{300}\x{301}\=ph
1422
1423/^\X{2}/utf
1424 AA\=ps
1425 AA\=ph
1426 A\x{300}\x{301}A\x{300}\x{301}\=ps
1427 A\x{300}\x{301}A\x{300}\x{301}\=ph
1428
1429/^\X+/utf
1430 AA\=ps
1431 AA\=ph
1432
1433/^\X+?Z/utf
1434 AA\=ps
1435 AA\=ph
1436
1437/A\x{3a3}B/IBi,utf
1438
1439/[\x{3a3}]/Bi,utf
1440
1441/[^\x{3a3}]/Bi,utf
1442
1443/[\x{3a3}]+/Bi,utf
1444
1445/[^\x{3a3}]+/Bi,utf
1446
1447/a*\x{3a3}/Bi,utf
1448
1449/\x{3a3}+a/Bi,utf
1450
1451/\x{3a3}*\x{3c2}/Bi,utf
1452
1453/\x{3a3}{3}/i,utf,aftertext
1454 \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2}
1455
1456/\x{3a3}{2,4}/i,utf,aftertext
1457 \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2}
1458
1459/\x{3a3}{2,4}?/i,utf,aftertext
1460 \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2}
1461
1462/\x{3a3}+./i,utf,aftertext
1463 \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2}
1464
1465/\x{3a3}++./i,utf,aftertext
1466\= Expect no match
1467 \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2}
1468
1469/\x{3a3}*\x{3c2}/Bi,utf
1470
1471/[^\x{3a3}]*\x{3c2}/Bi,utf
1472
1473/[^a]*\x{3c2}/Bi,utf
1474
1475/ist/Bi,utf
1476\= Expect no match
1477 ikt
1478
1479/is+t/i,utf
1480 iSs\x{17f}t
1481\= Expect no match
1482 ikt
1483
1484/is+?t/i,utf
1485\= Expect no match
1486 ikt
1487
1488/is?t/i,utf
1489\= Expect no match
1490 ikt
1491
1492/is{2}t/i,utf
1493\= Expect no match
1494 iskt
1495
1496# This property is a PCRE special
1497
1498/^\p{Xuc}/utf
1499 $abc
1500 @abc
1501 `abc
1502 \x{1234}abc
1503\= Expect no match
1504 abc
1505
1506/^\p{Xuc}+/utf
1507 $@`\x{a0}\x{1234}\x{e000}**
1508\= Expect no match
1509 \x{9f}
1510
1511/^\p{Xuc}+?/utf
1512 $@`\x{a0}\x{1234}\x{e000}**
1513\= Expect no match
1514 \x{9f}
1515
1516/^\p{Xuc}+?\*/utf
1517 $@`\x{a0}\x{1234}\x{e000}**
1518\= Expect no match
1519 \x{9f}
1520
1521/^\p{Xuc}++/utf
1522 $@`\x{a0}\x{1234}\x{e000}**
1523\= Expect no match
1524 \x{9f}
1525
1526/^\p{Xuc}{3,5}/utf
1527 $@`\x{a0}\x{1234}\x{e000}**
1528\= Expect no match
1529 \x{9f}
1530
1531/^\p{Xuc}{3,5}?/utf
1532 $@`\x{a0}\x{1234}\x{e000}**
1533\= Expect no match
1534 \x{9f}
1535
1536/^[\p{Xuc}]/utf
1537 $@`\x{a0}\x{1234}\x{e000}**
1538\= Expect no match
1539 \x{9f}
1540
1541/^[\p{Xuc}]+/utf
1542 $@`\x{a0}\x{1234}\x{e000}**
1543\= Expect no match
1544 \x{9f}
1545
1546/^\P{Xuc}/utf
1547 abc
1548\= Expect no match
1549 $abc
1550 @abc
1551 `abc
1552 \x{1234}abc
1553
1554/^[\P{Xuc}]/utf
1555 abc
1556\= Expect no match
1557 $abc
1558 @abc
1559 `abc
1560 \x{1234}abc
1561
1562# Some auto-possessification tests
1563
1564/\pN+\z/B
1565
1566/\PN+\z/B
1567
1568/\pN+/B
1569
1570/\PN+/B
1571
1572/\p{Any}+\p{Any} \p{Any}+\P{Any} \p{Any}+\p{L&} \p{Any}+\p{L} \p{Any}+\p{Lu} \p{Any}+\p{Han} \p{Any}+\p{Xan} \p{Any}+\p{Xsp} \p{Any}+\p{Xps} \p{Xwd}+\p{Any} \p{Any}+\p{Xuc}/Bx,ucp
1573
1574/\p{L&}+\p{Any} \p{L&}+\p{L&} \P{L&}+\p{L&} \p{L&}+\p{L} \p{L&}+\p{Lu} \p{L&}+\p{Han} \p{L&}+\p{Xan} \p{L&}+\P{Xan} \p{L&}+\p{Xsp} \p{L&}+\p{Xps} \p{Xwd}+\p{L&} \p{L&}+\p{Xuc}/Bx,ucp
1575
1576/\p{N}+\p{Any} \p{N}+\p{L&} \p{N}+\p{L} \p{N}+\P{L} \p{N}+\P{N} \p{N}+\p{Lu} \p{N}+\p{Han} \p{N}+\p{Xan} \p{N}+\p{Xsp} \p{N}+\p{Xps} \p{Xwd}+\p{N} \p{N}+\p{Xuc}/Bx,ucp
1577
1578/\p{Lu}+\p{Any} \p{Lu}+\p{L&} \p{Lu}+\p{L} \p{Lu}+\p{Lu} \P{Lu}+\p{Lu} \p{Lu}+\p{Nd} \p{Lu}+\P{Nd} \p{Lu}+\p{Han} \p{Lu}+\p{Xan} \p{Lu}+\p{Xsp} \p{Lu}+\p{Xps} \p{Xwd}+\p{Lu} \p{Lu}+\p{Xuc}/Bx,ucp
1579
1580/\p{Han}+\p{Lu} \p{Han}+\p{L&} \p{Han}+\p{L} \p{Han}+\p{Lu} \p{Han}+\p{Arabic} \p{Arabic}+\p{Arabic} \p{Han}+\p{Xan} \p{Han}+\p{Xsp} \p{Han}+\p{Xps} \p{Xwd}+\p{Han} \p{Han}+\p{Xuc}/Bx,ucp
1581
1582/\p{Xan}+\p{Any} \p{Xan}+\p{L&} \P{Xan}+\p{L&} \p{Xan}+\p{L} \p{Xan}+\p{Lu} \p{Xan}+\p{Han} \p{Xan}+\p{Xan} \p{Xan}+\P{Xan} \p{Xan}+\p{Xsp} \p{Xan}+\p{Xps} \p{Xwd}+\p{Xan} \p{Xan}+\p{Xuc}/Bx,ucp
1583
1584/\p{Xsp}+\p{Any} \p{Xsp}+\p{L&} \p{Xsp}+\p{L} \p{Xsp}+\p{Lu} \p{Xsp}+\p{Han} \p{Xsp}+\p{Xan} \p{Xsp}+\p{Xsp} \P{Xsp}+\p{Xsp} \p{Xsp}+\p{Xps} \p{Xwd}+\p{Xsp} \p{Xsp}+\p{Xuc}/Bx,ucp
1585
1586/\p{Xwd}+\p{Any} \p{Xwd}+\p{L&} \p{Xwd}+\p{L} \p{Xwd}+\p{Lu} \p{Xwd}+\p{Han} \p{Xwd}+\p{Xan} \p{Xwd}+\p{Xsp} \p{Xwd}+\p{Xps} \p{Xwd}+\p{Xwd} \p{Xwd}+\P{Xwd} \p{Xwd}+\p{Xuc}/Bx,ucp
1587
1588/\p{Xuc}+\p{Any} \p{Xuc}+\p{L&} \p{Xuc}+\p{L} \p{Xuc}+\p{Lu} \p{Xuc}+\p{Han} \p{Xuc}+\p{Xan} \p{Xuc}+\p{Xsp} \p{Xuc}+\p{Xps} \p{Xwd}+\p{Xuc} \p{Xuc}+\p{Xuc} \p{Xuc}+\P{Xuc}/Bx,ucp
1589
1590/\p{N}+\p{Ll} \p{N}+\p{Nd} \p{N}+\P{Nd}/Bx,ucp
1591
1592/\p{Xan}+\p{L} \p{Xan}+\p{N} \p{Xan}+\p{C} \p{Xan}+\P{L} \P{Xan}+\p{N} \p{Xan}+\P{C}/Bx,ucp
1593
1594/\p{L}+\p{Xan} \p{N}+\p{Xan} \p{C}+\p{Xan} \P{L}+\p{Xan} \p{N}+\p{Xan} \P{C}+\p{Xan} \p{L}+\P{Xan}/Bx,ucp
1595
1596/\p{Xan}+\p{Lu} \p{Xan}+\p{Nd} \p{Xan}+\p{Cc} \p{Xan}+\P{Ll} \P{Xan}+\p{No} \p{Xan}+\P{Cf}/Bx,ucp
1597
1598/\p{Lu}+\p{Xan} \p{Nd}+\p{Xan} \p{Cs}+\p{Xan} \P{Lt}+\p{Xan} \p{Nl}+\p{Xan} \P{Cc}+\p{Xan} \p{Lt}+\P{Xan}/Bx,ucp
1599
1600/\w+\p{P} \w+\p{Po} \w+\s \p{Xan}+\s \s+\p{Xan} \s+\w/Bx,ucp
1601
1602/\w+\P{P} \W+\p{Po} \w+\S \P{Xan}+\s \s+\P{Xan} \s+\W/Bx,ucp
1603
1604/\w+\p{Po} \w+\p{Pc} \W+\p{Po} \W+\p{Pc} \w+\P{Po} \w+\P{Pc}/Bx,ucp
1605
1606/\p{Nl}+\p{Xan} \P{Nl}+\p{Xan} \p{Nl}+\P{Xan} \P{Nl}+\P{Xan}/Bx,ucp
1607
1608/\p{Xan}+\p{Nl} \P{Xan}+\p{Nl} \p{Xan}+\P{Nl} \P{Xan}+\P{Nl}/Bx,ucp
1609
1610/\p{Xan}+\p{Nd} \P{Xan}+\p{Nd} \p{Xan}+\P{Nd} \P{Xan}+\P{Nd}/Bx,ucp
1611
1612# End auto-possessification tests
1613
1614/\w+/B,utf,ucp,auto_callout
1615 abcd
1616
1617/[\p{N}]?+/B,no_auto_possess
1618
1619/[\p{L}ab]{2,3}+/B,no_auto_possess
1620
1621/\D+\X \d+\X \S+\X \s+\X \W+\X \w+\X \R+\X \H+\X \h+\X \V+\X \v+\X a+\X \n+\X .+\X/Bx
1622
1623/.+\X/Bsx
1624
1625/\X+$/Bmx
1626
1627/\X+\D \X+\d \X+\S \X+\s \X+\W \X+\w \X+. \X+\R \X+\H \X+\h \X+\V \X+\v \X+\X \X+\Z \X+\z \X+$/Bx
1628
1629/\d+\s{0,5}=\s*\S?=\w{0,4}\W*/B,utf,ucp
1630
1631/[RST]+/Bi,utf,ucp
1632
1633/[R-T]+/Bi,utf,ucp
1634
1635/[Q-U]+/Bi,utf,ucp
1636
1637/^s?c/Iim,utf
1638 scat
1639
1640/\X?abc/utf,no_start_optimize
1641 \xff\x7f\x00\x00\x03\x00\x41\xcc\x80\x41\x{300}\x61\x62\x63\x00\=no_utf_check,offset=06
1642
1643/\x{100}\x{200}\K\x{300}/utf,startchar
1644 \x{100}\x{200}\x{300}
1645
1646# Test UTF characters in a substitution
1647
1648/ábc/utf,replace=XሴZ
1649 123ábc123
1650
1651/(?<=abc)(|def)/g,utf,replace=<$0>
1652 123abcáyzabcdef789abcሴqr
1653
1654/[A-`]/iB,utf
1655 abcdefghijklmno
1656
1657/(?<=\K\x{17f})/g,utf,aftertext,allow_lookaround_bsk
1658 \x{17f}\x{17f}\x{17f}\x{17f}\x{17f}
1659
1660/(?<=\K\x{17f})/altglobal,utf,aftertext,allow_lookaround_bsk
1661 \x{17f}\x{17f}\x{17f}\x{17f}\x{17f}
1662
1663"\xa\xf<(.\pZ*\P{Xwd}+^\xa8\3'3yq.::?(?J:()\xd1+!~:3'(8?:)':(?'d'(?'d'^u]!.+.+\\A\Ah(n+?9){7}+\K;(?'X'u'(?'c'(?'z'(?<y>\xb::\xf0'|\xd3(\xae?'w(z\x8?P>l)\x8?P>a)'\H\R\xd1+!!~:3'(?:h$N{26875}\W+?\\=D{2}\x89(?i:Uy0\N({2\xa(\v\x85*){y*\A(()\p{L}+?\P{^Xan}'+?\xff\+pS\?|).{;y*\A(()\p{L}+?\8}\d?1(|)(/1){7}.+[Lp{Me}].\s\xdcC*?(?(<y>))(?<!^)$C((;*?(R))+(\xbf(R))\x8a\X*?\x8a\xb\xd1^9\3*+(\xc1,\k'R'\xb4)\xcc(z\z(?J)(?'X'\x1b(\xb\xd1^9\?'3*+P{^Xan}+?\xff\+(\xc1.]k+\xb'Pm'\xb4)\xcc4f\xa7'\xd1V(?i:U,{2,2})'(?'X'))?-%--\x95$9*\4'|\xd1(\x9c''%\x94$9)#(?'R')3\x7?('P\xed7'\xa8\xb1^u\xeaw\1\0\0\(|(?1){7}.+[\p{Me}].\s\xdcC*^\x14?(?(<y>))(?<!^)$C((;*?(R*?))+(?(R)\x8a\X*?\x8a\xb\xd1^9\3*+|(\xc1,\k'R'\xb4)\xcc! z)\z(?JJ)(?'X';(\xb\xd1^9\?'3*+(\xc1.]k+\xb'Pm'\xb4))':(?'d')(?'RD'(d')|)|$)'|(?<x>\g{d});\g{x}\x11\g{d}\x81\|$((?'X'\'X'(?'W''\x92()'9'\x83*))\xba*\!?^ <){)':;\xcc4'\xd1'(?'X'28))?-%--\x95$9*\4'|\xd1((''e\x94*$9:)*#(?'R')3)\x7?('P\xed')\\x16:;()\x1e\x10*:(?<y>)\xd1+0!~:(?)'d'E:yD!\s(?'R'\x1e;\x10:U))|'\x9g!\xb0*){)\\x16:;()\x1e\x10\x87*:(?<y>)\xd1+!~:(?)'}'\d'E:yD!\s(?'R'\x1e;\x10:U))|'))|)g!\xb0*R+9{29+)#(?'P'})*?pS\{3,}\x85,{0,}l{*UTF)(\xe{7}){3722,{9,}d{2,?|))|{)\(A?&d}}{\xa,}2}){3,}7,l{)22}(,}l:7{2,4}}29\x19+)#?'P'})*v?))\x5"
1664
1665/$(&.+[\p{Me}].\s\xdcC*?(?(<y>))(?<!^)$C((;*?(R))+(?(R)){0,6}?|){12\x8a\X*?\x8a\x0b\xd1^9\3*+(\xc1,\k'P'\xb4)\xcc(z\z(?JJ)(?'X'8};(\x0b\xd1^9\?'3*+(\xc1.]k+\x0b'Pm'\xb4\xcc4'\xd1'(?'X'))?-%--\x95$9*\4'|\xd1(''%\x95*$9)#(?'R')3\x07?('P\xed')\\x16:;()\x1e\x10*:(?<y>)\xd1+!~:(?)''(d'E:yD!\s(?'R'\x1e;\x10:U))|')g!\xb0*){29+))#(?'P'})*?/
1666
1667"(*UTF)(*UCP)(.UTF).+X(\V+;\^(\D|)!999}(?(?C{7(?C')\H*\S*/^\x5\xa\\xd3\x85n?(;\D*(?m).[^mH+((*UCP)(*U:F)})(?!^)(?'"
1668
1669/[\pS#moq]/
1670 =
1671
1672/(*:a\x{12345}b\t(d\)c)xxx/utf,alt_verbnames,mark
1673 cxxxz
1674
1675/abcd/utf,replace=x\x{824}y\o{3333}z(\Q12\$34$$\x34\E5$$),substitute_extended
1676 abcd
1677
1678/a(\x{e0}\x{101})(\x{c0}\x{102})/utf,replace=a\u$1\U$1\E$1\l$2\L$2\Eab\U\x{e0}\x{101}\L\x{d0}\x{160}\EDone,substitute_extended
1679 a\x{e0}\x{101}\x{c0}\x{102}
1680
1681/((?<digit>\d)|(?<letter>\p{L}))/g,substitute_extended,replace=<${digit:+digit; :not digit; }${letter:+letter:not a letter}>
1682 ab12cde
1683
1684/(*UCP)(*UTF)[[:>:]]X/B
1685
1686/abc/utf,replace=xyz
1687 abc\=zero_terminate
1688
1689/a[[:punct:]b]/ucp,bincode
1690
1691/a[[:punct:]b]/utf,ucp,bincode
1692
1693/a[b[:punct:]]/utf,ucp,bincode
1694
1695/[[:^ascii:]]/utf,ucp,bincode
1696
1697/[[:^ascii:]\w]/utf,ucp,bincode
1698
1699/[\w[:^ascii:]]/utf,ucp,bincode
1700
1701/[^[:ascii:]\W]/utf,ucp,bincode
1702 \x{de}
1703 \x{200}
1704\= Expect no match
1705 \x{300}
1706 \x{37e}
1707
1708/[[:^ascii:]a]/utf,ucp,bincode
1709
1710/L(?#(|++<!(2)?/B,utf,no_auto_possess,auto_callout
1711
1712/L(?#(|++<!(2)?/B,utf,ucp,auto_callout
1713
1714/(*UTF)C\x09((?<!'(?x)!*H? #\xcc\x9a[^$]/
1715
1716/[\D]/utf
1717 \x{1d7cf}
1718
1719/[\D\P{Nd}]/utf
1720 \x{1d7cf}
1721
1722/[^\D]/utf
1723 a9b
1724\= Expect no match
1725 \x{1d7cf}
1726
1727/[^\D\P{Nd}]/utf
1728 a9b
1729 \x{1d7cf}
1730\= Expect no match
1731 \x{10000}
1732
1733# Hex uses pattern length, not zero-terminated. This tests for overrunning
1734# the given length of a pattern.
1735
1736/'(*UTF)'/hex
1737
1738/'#('/hex,extended,utf
1739
1740/a(?<=A\XB)/utf
1741
1742/ab(?<=A\RB)/utf
1743
1744/../utf,auto_callout
1745 \n\x{123}\x{123}\x{123}\x{123}
1746
1747# This tests processing wide characters in extended mode.
1748
1749/XȀ/x,utf
1750
1751# These three test a bug fix that was not clearing up after a locale setting
1752# when the test or a subsequent one matched a wide character.
1753
1754//locale=C
1755
1756/[\P{Yi}]/utf
1757\x{2f000}
1758
1759/[\P{Yi}]/utf,locale=C
1760\x{2f000}
1761
1762/^(?<!(?=􃡜))/B,utf
1763
1764# Horizontal and vertical space lists ignore caseless
1765
1766/[\HH]/Bi,utf
1767
1768/[^\HH]/Bi,utf
1769
1770//g,utf
1771 \=zero_terminate
1772
1773/^(?1)\p{Nd}{3}(a)/
1774 a123a
1775
1776/\p{Nd}{0,3}[\pL](*:abc)(?C1)xxx/callout_info
1777
1778# ---------------------------------------------------------------------------
1779
1780# A bunch of tests that hit lines of code that others do not (at least when
1781# these were created).
1782
1783/^[^a]{3,}?x/i,utf,no_start_optimize,no_auto_possess
1784\= Expect no match
1785 bbb
1786 cc
1787
1788/^[ac]{3,}?x/i,utf,no_start_optimize,no_auto_possess
1789\= Expect no match
1790 aaa\x{100}
1791
1792/^X\X/no_start_optimize,no_auto_possess
1793\= Expect no match
1794 X
1795
1796/^X\p{L&}+?/no_start_optimize,no_auto_possess
1797\= Expect no match
1798 X
1799
1800/^X\p{L}+?/no_start_optimize,no_auto_possess
1801\= Expect no match
1802 X
1803
1804/^X\p{Lu}+?/no_start_optimize,no_auto_possess
1805\= Expect no match
1806 X
1807
1808/^X\p{Arabic}+?/no_start_optimize,no_auto_possess
1809\= Expect no match
1810 X
1811
1812/^X\p{Xan}+?/ucp,no_start_optimize,no_auto_possess
1813\= Expect no match
1814 X
1815
1816/^X\s+?/ucp,no_start_optimize,no_auto_possess
1817\= Expect no match
1818 X
1819 XX
1820
1821/^X\S+?/ucp,no_start_optimize,no_auto_possess
1822 XX
1823\= Expect no match
1824 X
1825
1826/^X\w+?/ucp,no_start_optimize,no_auto_possess
1827\= Expect no match
1828 X
1829
1830/^X[^\x{b5}]+?/i,utf,no_start_optimize,no_auto_possess
1831\= Expect no match
1832 X
1833
1834/^X[\x{b5}]+?/i,utf,no_start_optimize,no_auto_possess
1835\= Expect no match
1836 X
1837
1838/^X\p{Xuc}+?/utf,no_start_optimize,no_auto_possess
1839\= Expect no match
1840 X
1841
1842/^X.+?Z/s,utf,no_start_optimize,no_auto_possess
1843\= Expect no match
1844 X
1845
1846/^X\R+?/utf,no_start_optimize,no_auto_possess
1847\= Expect no match
1848 X
1849
1850/^X\H+?/utf,no_start_optimize,no_auto_possess
1851\= Expect no match
1852 X
1853
1854/^X\V+?/utf,no_start_optimize,no_auto_possess
1855\= Expect no match
1856 X
1857
1858/^X\s+?/utf,no_start_optimize,no_auto_possess
1859\= Expect no match
1860 X
1861 XX
1862
1863/^X\S+?/utf,no_start_optimize,no_auto_possess
1864\= Expect no match
1865 X
1866
1867/^X\p{Any}{1,3}?Z/s,no_start_optimize,no_auto_possess
1868 XYYYZ
1869\= Expect no match
1870 XY
1871 XYY
1872 XYYY
1873 XYYYYZ
1874
1875/^X\p{L&}{1,3}?Z/s,no_start_optimize,no_auto_possess
1876\= Expect no match
1877 XY
1878 XY!
1879
1880/^X\p{L}{1,3}?Z/s,no_start_optimize,no_auto_possess
1881\= Expect no match
1882 XY
1883 XY!
1884
1885/^X\p{Lu}{1,3}?Z/s,no_start_optimize,no_auto_possess
1886\= Expect no match
1887 XY
1888 XY!
1889
1890/^X\P{Han}{1,3}?Z/s,utf,no_start_optimize,no_auto_possess
1891\= Expect no match
1892 XY
1893 XY!
1894 XY\x{2f00}!
1895
1896/^X\p{Xan}{1,3}?Z/s,no_start_optimize,no_auto_possess
1897\= Expect no match
1898 XY
1899 XY!
1900
1901/^X\p{Xsp}{1,3}?Z/s,no_start_optimize,no_auto_possess
1902\= Expect no match
1903 X\n
1904 X\n!
1905 X\n\n!
1906
1907/^X\P{Xsp}{1,3}?Z/s,no_start_optimize,no_auto_possess
1908\= Expect no match
1909 XYY\n
1910
1911/^X\p{Xwd}{1,3}?Z/s,no_start_optimize,no_auto_possess
1912\= Expect no match
1913 XY
1914 XY!
1915 XYY!
1916
1917/^X\x{b5}+?Z/i,utf,no_start_optimize,no_auto_possess
1918\= Expect no match
1919 X
1920 X\x{b5}
1921 X\x{b5}\x{b5}Y
1922
1923/^X\p{Xuc}+?Z/utf,no_start_optimize,no_auto_possess
1924\= Expect no match
1925 X
1926 X$
1927 X@@Y
1928
1929/(*CRLF)^X.+?Z/utf,no_start_optimize,no_auto_possess
1930\= Expect partial match
1931 XYY\r\=ph
1932\= Expect no match
1933 X
1934
1935/^X.+?Z/s,utf,no_start_optimize,no_auto_possess
1936\= Expect no match
1937 X
1938 XYY
1939
1940/^X\R+?Z/utf,no_start_optimize,no_auto_possess
1941\= Expect no match
1942 X\nX
1943 X\n\rX
1944 X\n\r\nX
1945 X\n\n
1946 X\n\x{0c}
1947
1948/(*BSR_ANYCRLF)^X\R+?Z/utf,no_start_optimize,no_auto_possess
1949\= Expect no match
1950 X\nX
1951 X\n\rX
1952 X\n\r\nX
1953 X\n\n
1954 X\n\x{0c}
1955
1956/^X\H+?Z/utf,no_start_optimize,no_auto_possess
1957\= Expect no match
1958 XY\t
1959 XYY
1960
1961/^X\h+?Z/utf,no_start_optimize,no_auto_possess
1962\= Expect no match
1963 X\t\t
1964 X\tY
1965
1966/^X\V+?Z/utf,no_start_optimize,no_auto_possess
1967\= Expect no match
1968 XY\n
1969 XYY
1970
1971/^X\v+?Z/utf,no_start_optimize,no_auto_possess
1972\= Expect no match
1973 X\n\n
1974 X\nY
1975
1976/^X\D+?Z/utf,no_start_optimize,no_auto_possess
1977\= Expect no match
1978 XY9
1979 XYY
1980
1981/^X\d+?Z/utf,no_start_optimize,no_auto_possess
1982\= Expect no match
1983 X99
1984 X9Y
1985
1986/^X\S+?Z/utf,no_start_optimize,no_auto_possess
1987\= Expect no match
1988 XY\n
1989 XYY
1990
1991/^X\s+?Z/utf,no_start_optimize,no_auto_possess
1992\= Expect no match
1993 X\n\n
1994 X\nY
1995
1996/^X\W+?Z/utf,no_start_optimize,no_auto_possess
1997\= Expect no match
1998 X.A
1999 X++
2000
2001/^X\p{L&}{1,3}Z/no_start_optimize,no_auto_possess
2002\= Expect no match
2003 XY
2004 XY!
2005
2006/^X\p{L}{1,3}Z/no_start_optimize,no_auto_possess
2007\= Expect no match
2008 XY
2009
2010/^X\p{Xan}{1,3}Z/no_start_optimize,no_auto_possess
2011\= Expect no match
2012 XY
2013
2014/^X\P{Xsp}{1,3}Z/no_start_optimize,no_auto_possess
2015\= Expect no match
2016 XYY
2017
2018/^X\p{Xuc}+Z/utf,no_start_optimize,no_auto_possess
2019\= Expect no match
2020 X$
2021
2022# ----------------------------------------------------------------------
2023# These test the dangerous PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL option.
2024
2025/\x{d800}/B,utf,bad_escape_is_literal
2026
2027/\ud800/B,utf,alt_bsux,bad_escape_is_literal
2028
2029# ----------------------------------------------------------------------
2030
2031/Aሴ+B/literal,utf,no_utf_check
2032 Aሴ+B
2033
2034# These are here because I upgraded to Unicode 10.0.0 before Perl did, so it
2035# doesn't recognize all these scripts. In time these three tests can be moved
2036# to test 4.
2037
2038/^(\p{Adlam}+)(\p{Bhaiksuki}+)(\p{Marchen}+)(\p{Newa}+)(\p{Osage}+)
2039 (\p{Tangut}+)(\p{Masaram_Gondi}+)(\p{Nushu}+)(\p{Soyombo}+)
2040 (\p{Zanabazar_Square}+)/x,utf
2041 \x{1E900}\x{1E924}\x{1E953}\x{11C00}\x{11C2D}\x{11C3E}\x{11C70}\x{11C77}\x{11CAB}\x{11400}\x{1142F}\x{11455}\x{104B0}\x{104D8}\x{104FB}\x{16FE0}\x{18800}\x{18AF2}\x{11D00}\x{11D3A}\x{11D59}\x{16FE1}\x{1B170}\x{1B2FB}\x{11A50}\x{11A58}\x{11AA2}\x{11A00}\x{11A07}\x{11A47}
2042
2043/^\x{1E900}\x{104B0}/i,utf
2044 \x{1E900}\x{104B0}
2045 \x{1E922}\x{104D8}
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07002046
Elliott Hughes5b808042021-10-01 10:56:10 -07002047/^(?:(\X)(?C))+$/utf
2048 \x{1E900}\x{1E924}\x{1E953}\x{11C00}\x{11C2D}\x{11C3E}\x{11C70}\x{11C77}\x{11CAB}\x{11400}\x{1142F}\x{11455}\x{104B0}\x{104D8}\x{104FB}\x{16FE0}\x{18800}\x{18AF2}\x{11D00}\x{11D3A}\x{11D59}\x{16FE1}\x{1B170}\x{1B2FB}\x{11A50}\x{11A58}\x{11AA2}\x{11A00}\x{11A07}\x{11A47}\=callout_capture,callout_no_where
2049
2050# Similarly for Unicode 11.0.0
2051
2052/^(\p{Dogra}+)(\p{Gunjala_Gondi}+)(\p{Hanifi_Rohingya}+)(\p{Makasar}+)
2053 (\p{Medefaidrin}+)(\p{Old_Sogdian}+)(\p{Sogdian}+)/x,utf
2054 \x{11800}\x{11da9}\x{10d27}\x{11ee0}\x{16e48}\x{10f27}\x{10f30}
2055
2056# These two are here because of differences from Perl.
2057
2058/^\X/utf
2059 A\x{200d}B A ZWJ
2060 \x{261d}\x{261d}B Extended_Pictographic Extended_Pictographic
2061 \x{261D}\x{1F3FB}B Extended_Pictographic Extend
2062 \x{1F1E6}\x{1F1E7}B RegionalIndicator RegionalIndicator
2063 \x{261D}\x{1F3FB}\x{261d}B Extended_Pictographic Extend E-P
2064 \x{261D}\x{1F3FB}\x{200d}\x{261d}B Extended_Pictographic Extend ZWJ E-P
2065
2066# Regional indicators
2067
2068/^(\X)(\X)/utf,aftertext
2069 \x{1F1E6}\x{1F1E7}\x{1F1E7}B
2070 \x{1F1E6}\x{1F1E7}\x{1F1E7}\x{1F1E6}B
2071
2072# More differences from Perl
2073
Elliott Hughes5b808042021-10-01 10:56:10 -07002074/^\p{Common}/utf
2075 \x{60c}
2076 \x{61f}
2077 \x{964}
2078 \x{965}
2079
2080/^\p{Inherited}/utf
2081 \x{64b}
2082 \x{654}
2083 \x{655}
2084 \x{1D1AA}
2085
2086/\N{U+}/
2087
2088/\N{U+}/utf
2089
2090/\N{U}/
2091
2092# This tests the non-UTF Unicode NEL pattern whitespace character, only
2093# recognized by PCRE2 with /x when there is Unicode support.
2094
2095/A
2096 …B/x
2097 AB
2098
2099# This tests Unicode Pattern White Space characters in verb names when they
2100# are being processed with PCRE2_EXTENDED. Note: there are UTF-8 characters
2101# with code points greater than 255 between A, B, and C in the pattern.
2102
2103/(*: A‎B
C)abc/x,utf,mark,alt_verbnames
2104 abc
2105
2106# Script run tests: auto-possessification
2107
2108/^(*sr:.*)/B,utf
2109 paypаl.com A classic example of why script run checks are a good thing
2110
2111/^(*sr:.*(*ACCEPT))/utf
2112 paypаl.com But *ACCEPT breaks things
2113
2114/^(*sr:\x{2e80}*)/B,utf
2115
2116/^(*sr:\x{2e80}*)\x{2e80}/B,utf
2117
2118/(?<!)(*sr:)/B
2119
2120/(?<=abc(?=X(*sr:BXY)CCC)XBXYCCC)./B
2121 abcXBXYCCC!
2122
2123# Some script run patterns are broken in Perl 5.28.0. These can be moved into
2124# test 4 when a mended version of Perl is released.
2125
2126/^(*sr:.{4})/utf
2127 \x{0980}12\x{0993} Bengali Common-digits Bengali
2128 \x{0780}12\x{07b1} Thaana Common-digits Thaana
2129 \x{0e01}12\x{0e5b} Thai Common-digits Thai
2130 \x{1780}12\x{19ff} Khmer Common-digits Khmer
2131 \x{0904}12\x{0939} Devanagari Common-digits Devanagari
2132 A\x{ff10}\x{ff19}B Latin Common-notascii-digits Latin
2133 A\x{1d7ce}\x{1d7cf}B Latin fancy-common-digits Latin
2134
2135# These ones involve non-ASCII but nevertheless Common digits. As of October
2136# 2018 even blead Perl wasn't handling all of these - but is going to.
2137
2138/^(*sr:.{4})/utf
2139 A\x{ff10}\x{ff19}B Latin Common-notascii-digits Latin
2140 \x{ff10}\x{ff19}.. Common-notascii-digits Common Common
2141 A\x{ff10}BC Latin Common-notascii-digit Latin Latin
2142 A\x{1d7ce}\x{1d7cf}B Latin fancy-common-digits Latin
2143 \x{1d7ce}\x{1d7cf},, fancy-common-digits Common Common
2144 A\x{1d7ce}BC Latin fancy-common-digit Latin Latin
2145
2146# Some Unicode 12.1.0 new script characters
2147
2148/\p{Elymaic}\p{Nandinagari}\p{Nyiakeng_Puachue_Hmong}\p{Wancho}/utf
2149 \x{10fe5}\x{119AC}\x{1E10E}\x{1E2D1}
2150
2151# Some Unicode 13.0.0 new script characters
2152
2153/\p{Chorasmian}\p{Dives_Akuru}\p{Khitan_Small_Script}\p{Yezidi}/utf
2154 \x{10FB0}\x{11900}\x{18B00}\x{10E80}
2155
2156# -------
2157
2158# Test reference and errors in non-ASCII characters in group names
2159
2160/(?'𑠅ABC'...)/I,utf
2161 abcde\=copy=𑠅ABC
2162
2163# Bad ones
2164
2165/(?'AB၌C'...)\g{AB၌C}/utf
2166
2167/(?'Ù ABC'...)/utf
2168
2169/(?'²ABC'...)/utf
2170
2171/(?'X²ABC'...)/utf
2172
2173# -------
2174
2175/\p{Any}*xyz/I
2176
2177/(|ß)7/caseless,ucp
2178
2179/(\xc1)\1/i,ucp
2180 \xc1\xe1\=no_jit
2181
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07002182/\p{L&}+\p{bidi_control}/B
2183
2184/\p{bidi_control}+\p{L&}/B
2185
2186/\p{han}/B
2187
2188/\p{script:han}/B
2189
2190/\p{sc:han}/B
2191
2192/\p{script extensions:han}/B
2193
2194/\p{scx:han}/B
2195
2196# Test error - invalid script name
2197
2198/\p{sc:L}/
2199
2200# Some Boolean property tests that differ from Perl
2201
2202/\p{emojimodifierbase}\p{ebase}/g,utf
2203 >AN<>\x{261d}\x{1faf6}<>yz<
2204
2205/\p{graphemelink}\p{grlink}/g,utf
2206 >AN<>\x{11d97}\x{94d}<>yz<
2207
2208/\p{soft dotted}\p{sd}/g,utf
2209 >AF23<>\x{1df1a}\x{69}<>yz<
2210
2211# ------------------------------------------------
2212
2213/\p{\2b[:xäigi:t:_/
2214
Elliott Hughes5b808042021-10-01 10:56:10 -07002215# End of testinput5