blob: fd41df33f7683018b13f940f7f593ceeb1949633 [file] [log] [blame]
sherman0b4d42d2009-02-23 21:06:15 -08001//
2// Copyright 1999-2009 Sun Microsystems, Inc. All Rights Reserved.
3// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4//
5// This code is free software; you can redistribute it and/or modify it
6// under the terms of the GNU General Public License version 2 only, as
7// published by the Free Software Foundation.
8//
9// This code is distributed in the hope that it will be useful, but WITHOUT
10// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12// version 2 for more details (a copy is included in the LICENSE file that
13// accompanied this code).
14//
15// You should have received a copy of the GNU General Public License version
16// 2 along with this work; if not, write to the Free Software Foundation,
17// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18//
19// Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
20// CA 95054 USA or visit www.sun.com if you need additional information or
21// have any questions.
22//
23//
24// This file contains test cases for regular expressions.
25// A test case consists of three lines:
26// The first line is a pattern used in the test
27// The second line is the input to search for the pattern in
28// The third line is a concatentation of the match, the number of groups,
29// and the contents of the first four subexpressions.
30// Empty lines and lines beginning with comment slashes are ignored.
31//
32// Test unsetting of backed off groups
33^(a)?a
34a
35true a 1
36
37^(aa(bb)?)+$
38aabbaa
39true aabbaa 2 aa bb
40
41((a|b)?b)+
42b
43true b 2 b
44
45(aaa)?aaa
46aaa
47true aaa 1
48
49^(a(b)?)+$
50aba
51true aba 2 a b
52
53^(a(b(c)?)?)?abc
54abc
55true abc 3
56
57^(a(b(c))).*
58abc
59true abc 3 abc bc c
60
61// use of x modifier
62abc(?x)blah
63abcblah
64true abcblah 0
65
66abc(?x) blah
67abcblah
68true abcblah 0
69
70abc(?x) blah blech
71abcblahblech
72true abcblahblech 0
73
74abc(?x) blah # ignore comment
75abcblah
76true abcblah 0
77
78// Simple alternation
79a|b
80a
81true a 0
82
83a|b
84z
85false 0
86
87a|b
88b
89true b 0
90
91a|b|cd
92cd
93true cd 0
94
95a|ad
96ad
97true a 0
98
99z(a|ac)b
100zacb
101true zacb 1 ac
102
103// Simple char class
104[abc]+
105ababab
106true ababab 0
107
108[abc]+
109defg
110false 0
111
112[abc]+[def]+[ghi]+
113zzzaaddggzzz
114true aaddgg 0
115
116// Range char class
117[a-g]+
118zzzggg
119true ggg 0
120
121[a-g]+
122mmm
123false 0
124
125[a-]+
126za-9z
127true a- 0
128
129[a-\\u4444]+
130za-9z
131true za 0
132
133// Negated char class
134[^abc]+
135ababab
136false 0
137
138[^abc]+
139aaabbbcccdefg
140true defg 0
141
142// Making sure a ^ not in first position matches literal ^
143[abc^b]
144b
145true b 0
146
147[abc^b]
148^
149true ^ 0
150
151// Class union and intersection
152[abc[def]]
153b
154true b 0
155
156[abc[def]]
157e
158true e 0
159
160[a-d[0-9][m-p]]
161a
162true a 0
163
164[a-d[0-9][m-p]]
165o
166true o 0
167
168[a-d[0-9][m-p]]
1694
170true 4 0
171
172[a-d[0-9][m-p]]
173e
174false 0
175
176[a-d[0-9][m-p]]
177u
178false 0
179
180[[a-d][0-9][m-p]]
181b
182true b 0
183
184[[a-d][0-9][m-p]]
185z
186false 0
187
188[a-c[d-f[g-i]]]
189a
190true a 0
191
192[a-c[d-f[g-i]]]
193e
194true e 0
195
196[a-c[d-f[g-i]]]
197h
198true h 0
199
200[a-c[d-f[g-i]]]
201m
202false 0
203
204[a-c[d-f[g-i]]m]
205m
206true m 0
207
208[abc[def]ghi]
209a
210true a 0
211
212[abc[def]ghi]
213d
214true d 0
215
216[abc[def]ghi]
217h
218true h 0
219
220[abc[def]ghi]
221w
222false 0
223
224[a-c&&[d-f]]
225a
226false 0
227
228[a-c&&[d-f]]
229e
230false 0
231
232[a-c&&[d-f]]
233z
234false 0
235
236[[a-c]&&[d-f]]
237a
238false 0
239
240[[a-c]&&[d-f]]
241e
242false 0
243
244[[a-c]&&[d-f]]
245z
246false 0
247
248[a-c&&d-f]
249a
250false 0
251
252[a-m&&m-z]
253m
254true m 0
255
256[a-m&&m-z&&a-c]
257m
258false 0
259
260[a-m&&m-z&&a-z]
261m
262true m 0
263
264[[a-m]&&[m-z]]
265a
266false 0
267
268[[a-m]&&[m-z]]
269m
270true m 0
271
272[[a-m]&&[m-z]]
273z
274false 0
275
276[[a-m]&&[^a-c]]
277a
278false 0
279
280[[a-m]&&[^a-c]]
281d
282true d 0
283
284[a-m&&[^a-c]]
285a
286false 0
287
288[a-m&&[^a-c]]
289d
290true d 0
291
292[a-cd-f&&[d-f]]
293a
294false 0
295
296[a-cd-f&&[d-f]]
297e
298true e 0
299
300[[a-c]&&d-fa-c]
301a
302true a 0
303
304[[a-c]&&[d-f][a-c]]
305a
306true a 0
307
308[[a-c][d-f]&&abc]
309a
310true a 0
311
312[[a-c][d-f]&&abc[def]]
313e
314true e 0
315
316[[a-c]&&[b-d]&&[c-e]]
317a
318false 0
319
320[[a-c]&&[b-d]&&[c-e]]
321c
322true c 0
323
324[[a-c]&&[b-d][c-e]&&[u-z]]
325c
326false 0
327
328[abc[^bcd]]
329a
330true a 0
331
332[abc[^bcd]]
333d
334false 0
335
336[a-c&&a-d&&a-eghi]
337b
338true b 0
339
340[a-c&&a-d&&a-eghi]
341g
342false 0
343
344[[a[b]]&&[b[a]]]
345a
346true a 0
347
348[[a]&&[b][c][a]&&[^d]]
349a
350true a 0
351
352[[a]&&[b][c][a]&&[^d]]
353d
354false 0
355
356[[[a-d]&&[c-f]]]
357a
358false 0
359
360[[[a-d]&&[c-f]]]
361c
362true c 0
363
364[[[a-d]&&[c-f]]&&[c]]
365c
366true c 0
367
368[[[a-d]&&[c-f]]&&[c]&&c]
369c
370true c 0
371
372[[[a-d]&&[c-f]]&&[c]&&c&&c]
373c
374true c 0
375
376[[[a-d]&&[c-f]]&&[c]&&c&&[cde]]
377c
378true c 0
379
380[z[abc&&bcd]]
381c
382true c 0
383
384[z[abc&&bcd]&&[u-z]]
385z
386true z 0
387
388[x[abc&&bcd[z]]&&[u-z]]
389z
390false 0
391
392[x[[wz]abc&&bcd[z]]&&[u-z]]
393z
394true z 0
395
396[[abc]&&[def]abc]
397a
398true a 0
399
400[[abc]&&[def]xyz[abc]]
401a
402true a 0
403
404\pL
405a
406true a 0
407
408\pL
4097
410false 0
411
412\p{L}
413a
414true a 0
415
416\p{LC}
417a
418true a 0
419
420\p{LC}
421A
422true A 0
423
424\p{IsL}
425a
426true a 0
427
428\p{IsLC}
429a
430true a 0
431
432\p{IsLC}
433A
434true A 0
435
436\p{IsLC}
4379
438false 0
439
440\P{IsLC}
4419
442true 9 0
443
444// Guillemet left is initial quote punctuation
445\p{Pi}
446\u00ab
447true \u00ab 0
448
449\P{Pi}
450\u00ac
451true \u00ac 0
452
453// Guillemet right is final quote punctuation
454\p{IsPf}
455\u00bb
456true \u00bb 0
457
458\p{P}
459\u00bb
460true \u00bb 0
461
462\p{P}+
463\u00bb
464true \u00bb 0
465
466\P{IsPf}
467\u00bc
468true \u00bc 0
469
470\P{IsP}
471\u00bc
472true \u00bc 0
473
474\p{L1}
475\u00bc
476true \u00bc 0
477
478\p{L1}+
479\u00bc
480true \u00bc 0
481
482\p{L1}
483\u02bc
484false 0
485
486\p{ASCII}
487a
488true a 0
489
490\p{IsASCII}
491a
492true a 0
493
494\p{IsASCII}
495\u0370
496false 0
497
498\pLbc
499abc
500true abc 0
501
502a[r\p{InGreek}]c
503a\u0370c
504true a\u0370c 0
505
506a\p{InGreek}
507a\u0370
508true a\u0370 0
509
510a\P{InGreek}
511a\u0370
512false 0
513
514a\P{InGreek}
515ab
516true ab 0
517
518a{^InGreek}
519-
520error
521
522a\p{^InGreek}
523-
524error
525
526a\P{^InGreek}
527-
528error
529
530a\p{InGreek}
531a\u0370
532true a\u0370 0
533
534a[\p{InGreek}]c
535a\u0370c
536true a\u0370c 0
537
538a[\P{InGreek}]c
539a\u0370c
540false 0
541
542a[\P{InGreek}]c
543abc
544true abc 0
545
546a[{^InGreek}]c
547anc
548true anc 0
549
550a[{^InGreek}]c
551azc
552false 0
553
554a[\p{^InGreek}]c
555-
556error
557
558a[\P{^InGreek}]c
559-
560error
561
562a[\p{InGreek}]
563a\u0370
564true a\u0370 0
565
566a[r\p{InGreek}]c
567arc
568true arc 0
569
570a[\p{InGreek}r]c
571arc
572true arc 0
573
574a[r\p{InGreek}]c
575arc
576true arc 0
577
578a[^\p{InGreek}]c
579a\u0370c
580false 0
581
582a[^\P{InGreek}]c
583a\u0370c
584true a\u0370c 0
585
586a[\p{InGreek}&&[^\u0370]]c
587a\u0370c
588false 0
589
590// Test the dot metacharacter
591a.c.+
592a#c%&
593true a#c%& 0
594
595ab.
596ab\n
597false 0
598
599(?s)ab.
600ab\n
601true ab\n 0
602
603a[\p{L}&&[\P{InGreek}]]c
604a\u6000c
605true a\u6000c 0
606
607a[\p{L}&&[\P{InGreek}]]c
608arc
609true arc 0
610
611a[\p{L}&&[\P{InGreek}]]c
612a\u0370c
613false 0
614
615a\p{InGreek}c
616a\u0370c
617true a\u0370c 0
618
619a\p{Sc}
620a$
621true a$ 0
622
623// Test the word char escape sequence
624ab\wc
625abcc
626true abcc 0
627
628\W\w\W
629#r#
630true #r# 0
631
632\W\w\W
633rrrr#ggg
634false 0
635
636abc[\w]
637abcd
638true abcd 0
639
640abc[\sdef]*
641abc def
642true abc def 0
643
644abc[\sy-z]*
645abc y z
646true abc y z 0
647
648abc[a-d\sm-p]*
649abcaa mn p
650true abcaa mn p 0
651
652// Test the whitespace escape sequence
653ab\sc
654ab c
655true ab c 0
656
657\s\s\s
658blah err
659false 0
660
661\S\S\s
662blah err
663true ah 0
664
665// Test the digit escape sequence
666ab\dc
667ab9c
668true ab9c 0
669
670\d\d\d
671blah45
672false 0
673
674// Test the caret metacharacter
675^abc
676abcdef
677true abc 0
678
679^abc
680bcdabc
681false 0
682
683// Greedy ? metacharacter
684a?b
685aaaab
686true ab 0
687
688a?b
689b
690true b 0
691
692a?b
693aaaccc
694false 0
695
696.?b
697aaaab
698true ab 0
699
700// Reluctant ? metacharacter
701a??b
702aaaab
703true ab 0
704
705a??b
706b
707true b 0
708
709a??b
710aaaccc
711false 0
712
713.??b
714aaaab
715true ab 0
716
717// Possessive ? metacharacter
718a?+b
719aaaab
720true ab 0
721
722a?+b
723b
724true b 0
725
726a?+b
727aaaccc
728false 0
729
730.?+b
731aaaab
732true ab 0
733
734// Greedy + metacharacter
735a+b
736aaaab
737true aaaab 0
738
739a+b
740b
741false 0
742
743a+b
744aaaccc
745false 0
746
747.+b
748aaaab
749true aaaab 0
750
751// Reluctant + metacharacter
752a+?b
753aaaab
754true aaaab 0
755
756a+?b
757b
758false 0
759
760a+?b
761aaaccc
762false 0
763
764.+?b
765aaaab
766true aaaab 0
767
768// Possessive + metacharacter
769a++b
770aaaab
771true aaaab 0
772
773a++b
774b
775false 0
776
777a++b
778aaaccc
779false 0
780
781.++b
782aaaab
783false 0
784
785// Greedy Repetition
786a{2,3}
787a
788false 0
789
790a{2,3}
791aa
792true aa 0
793
794a{2,3}
795aaa
796true aaa 0
797
798a{2,3}
799aaaa
800true aaa 0
801
802a{3,}
803zzzaaaazzz
804true aaaa 0
805
806a{3,}
807zzzaazzz
808false 0
809
810// Reluctant Repetition
811a{2,3}?
812a
813false 0
814
815a{2,3}?
816aa
817true aa 0
818
819a{2,3}?
820aaa
821true aa 0
822
823a{2,3}?
824aaaa
825true aa 0
826
827// Zero width Positive lookahead
828abc(?=d)
829zzzabcd
830true abc 0
831
832abc(?=d)
833zzzabced
834false 0
835
836// Zero width Negative lookahead
837abc(?!d)
838zzabcd
839false 0
840
841abc(?!d)
842zzabced
843true abc 0
844
845// Zero width Positive lookbehind
846\w(?<=a)
847###abc###
848true a 0
849
850\w(?<=a)
851###ert###
852false 0
853
854// Zero width Negative lookbehind
855(?<!a)\w
856###abc###
857true a 0
858
859(?<!a)c
860bc
861true c 0
862
863(?<!a)c
864ac
865false 0
866
867// Nondeterministic group
868(a+b)+
869ababab
870true ababab 1 ab
871
872(a|b)+
873ccccd
874false 1
875
876// Deterministic group
877(ab)+
878ababab
879true ababab 1 ab
880
881(ab)+
882accccd
883false 1
884
885(ab)*
886ababab
887true ababab 1 ab
888
889(ab)(cd*)
890zzzabczzz
891true abc 2 ab c
892
893abc(d)*abc
894abcdddddabc
895true abcdddddabc 1 d
896
897// Escaped metacharacter
898\*
899*
900true * 0
901
902\\
903\
904true \ 0
905
906\\
907\\\\
908true \ 0
909
910// Back references
911(a*)bc\1
912zzzaabcaazzz
913true aabcaa 1 aa
914
915(a*)bc\1
916zzzaabcazzz
917true abca 1 a
918
919(gt*)(dde)*(yu)\1\3(vv)
920zzzgttddeddeyugttyuvvzzz
921true gttddeddeyugttyuvv 4 gtt dde yu vv
922
923// Greedy * metacharacter
924a*b
925aaaab
926true aaaab 0
927
928a*b
929b
930true b 0
931
932a*b
933aaaccc
934false 0
935
936.*b
937aaaab
938true aaaab 0
939
940// Reluctant * metacharacter
941a*?b
942aaaab
943true aaaab 0
944
945a*?b
946b
947true b 0
948
949a*?b
950aaaccc
951false 0
952
953.*?b
954aaaab
955true aaaab 0
956
957// Possessive * metacharacter
958a*+b
959aaaab
960true aaaab 0
961
962a*+b
963b
964true b 0
965
966a*+b
967aaaccc
968false 0
969
970.*+b
971aaaab
972false 0
973
974// Case insensitivity
975(?i)foobar
976fOobAr
977true fOobAr 0
978
979f(?i)oobar
980fOobAr
981true fOobAr 0
982
983foo(?i)bar
984fOobAr
985false 0
986
987(?i)foo[bar]+
988foObAr
989true foObAr 0
990
991(?i)foo[a-r]+
992foObAr
993true foObAr 0
994
995// Disable metacharacters- test both length <=3 and >3
996// So that the BM optimization is part of test
997\Q***\Eabc
998***abc
999true ***abc 0
1000
1001bl\Q***\Eabc
1002bl***abc
1003true bl***abc 0
1004
1005\Q***abc
1006***abc
1007true ***abc 0
1008
1009blah\Q***\Eabc
1010blah***abc
1011true blah***abc 0
1012
1013\Q***abc
1014***abc
1015true ***abc 0
1016
1017\Q*ab
1018*ab
1019true *ab 0
1020
1021blah\Q***abc
1022blah***abc
1023true blah***abc 0
1024
1025bla\Q***abc
1026bla***abc
1027true bla***abc 0
1028
1029// Escapes in char classes
1030[ab\Qdef\E]
1031d
1032true d 0
1033
1034[ab\Q[\E]
1035[
1036true [ 0
1037
1038[\Q]\E]
1039]
1040true ] 0
1041
1042[\Q\\E]
1043\
1044true \ 0
1045
1046[\Q(\E]
1047(
1048true ( 0
1049
1050[\n-#]
1051!
1052true ! 0
1053
1054[\n-#]
1055-
1056false 0
1057
1058[\w-#]
1059!
1060false 0
1061
1062[\w-#]
1063a
1064true a 0
1065
1066[\w-#]
1067-
1068true - 0
1069
1070[\w-#]
1071#
1072true # 0
1073
1074[\043]+
1075blahblah#blech
1076true # 0
1077
1078[\042-\044]+
1079blahblah#blech
1080true # 0
1081
1082[\u1234-\u1236]
1083blahblah\u1235blech
1084true \u1235 0
1085
1086[^\043]*
1087blahblah#blech
1088true blahblah 0
1089
1090(|f)?+
1091foo
1092true 1