blob: bc4448ff81362f4e8a5cbebcbe94c7f738cb2135 [file] [log] [blame]
sherman0b4d42d2009-02-23 21:06:15 -08001/*
2 * Copyright 1999-2009 Sun Microsystems, Inc. All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Sun designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Sun in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
22 * CA 95054 USA or visit www.sun.com if you need additional information or
23 * have any questions.
24 */
25
26/**
27 * @test
28 * @summary tests RegExp framework
29 * @author Mike McCloskey
30 * @bug 4481568 4482696 4495089 4504687 4527731 4599621 4631553 4619345
31 * 4630911 4672616 4711773 4727935 4750573 4792284 4803197 4757029 4808962
32 * 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476
33 * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940
34 * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133
shermancc01ef52010-05-18 15:36:47 -070035 * 6350801 6676425 6878475 6919132 6931676 6948903
sherman0b4d42d2009-02-23 21:06:15 -080036 */
37
38import java.util.regex.*;
39import java.util.Random;
40import java.io.*;
41import java.util.*;
42import java.nio.CharBuffer;
43
44/**
45 * This is a test class created to check the operation of
46 * the Pattern and Matcher classes.
47 */
48public class RegExTest {
49
50 private static Random generator = new Random();
51 private static boolean failure = false;
52 private static int failCount = 0;
53
54 /**
55 * Main to interpret arguments and run several tests.
56 *
57 */
58 public static void main(String[] args) throws Exception {
59 // Most of the tests are in a file
60 processFile("TestCases.txt");
61 //processFile("PerlCases.txt");
62 processFile("BMPTestCases.txt");
63 processFile("SupplementaryTestCases.txt");
64
65 // These test many randomly generated char patterns
66 bm();
67 slice();
68
69 // These are hard to put into the file
70 escapes();
71 blankInput();
72
73 // Substitition tests on randomly generated sequences
74 globalSubstitute();
75 stringbufferSubstitute();
76 substitutionBasher();
77
78 // Canonical Equivalence
79 ceTest();
80
81 // Anchors
82 anchorTest();
83
84 // boolean match calls
85 matchesTest();
86 lookingAtTest();
87
88 // Pattern API
89 patternMatchesTest();
90
91 // Misc
92 lookbehindTest();
93 nullArgumentTest();
94 backRefTest();
95 groupCaptureTest();
96 caretTest();
97 charClassTest();
98 emptyPatternTest();
99 findIntTest();
100 group0Test();
101 longPatternTest();
102 octalTest();
103 ampersandTest();
104 negationTest();
105 splitTest();
106 appendTest();
107 caseFoldingTest();
108 commentsTest();
109 unixLinesTest();
110 replaceFirstTest();
111 gTest();
112 zTest();
113 serializeTest();
114 reluctantRepetitionTest();
115 multilineDollarTest();
116 dollarAtEndTest();
117 caretBetweenTerminatorsTest();
118 // This RFE rejected in Tiger numOccurrencesTest();
119 javaCharClassTest();
120 nonCaptureRepetitionTest();
121 notCapturedGroupCurlyMatchTest();
122 escapedSegmentTest();
123 literalPatternTest();
124 literalReplacementTest();
125 regionTest();
126 toStringTest();
127 negatedCharClassTest();
128 findFromTest();
129 boundsTest();
130 unicodeWordBoundsTest();
131 caretAtEndTest();
132 wordSearchTest();
133 hitEndTest();
134 toMatchResultTest();
135 surrogatesInClassTest();
136 namedGroupCaptureTest();
sherman6782c962010-02-05 00:10:42 -0800137 nonBmpClassComplementTest();
shermancc01ef52010-05-18 15:36:47 -0700138 unicodePropertiesTest();
sherman0b4d42d2009-02-23 21:06:15 -0800139 if (failure)
140 throw new RuntimeException("Failure in the RE handling.");
141 else
142 System.err.println("OKAY: All tests passed.");
143 }
144
145 // Utility functions
146
147 private static String getRandomAlphaString(int length) {
148 StringBuffer buf = new StringBuffer(length);
149 for (int i=0; i<length; i++) {
150 char randChar = (char)(97 + generator.nextInt(26));
151 buf.append(randChar);
152 }
153 return buf.toString();
154 }
155
156 private static void check(Matcher m, String expected) {
157 m.find();
158 if (!m.group().equals(expected))
159 failCount++;
160 }
161
162 private static void check(Matcher m, String result, boolean expected) {
163 m.find();
164 if (m.group().equals(result))
165 failCount += (expected) ? 0 : 1;
166 else
167 failCount += (expected) ? 1 : 0;
168 }
169
170 private static void check(Pattern p, String s, boolean expected) {
171 Matcher matcher = p.matcher(s);
172 if (matcher.find())
173 failCount += (expected) ? 0 : 1;
174 else
175 failCount += (expected) ? 1 : 0;
176 }
177
178 private static void check(String p, char c, boolean expected) {
179 String propertyPattern = expected ? "\\p" + p : "\\P" + p;
180 Pattern pattern = Pattern.compile(propertyPattern);
181 char[] ca = new char[1]; ca[0] = c;
182 Matcher matcher = pattern.matcher(new String(ca));
183 if (!matcher.find())
184 failCount++;
185 }
186
187 private static void check(String p, int codePoint, boolean expected) {
188 String propertyPattern = expected ? "\\p" + p : "\\P" + p;
189 Pattern pattern = Pattern.compile(propertyPattern);
190 char[] ca = Character.toChars(codePoint);
191 Matcher matcher = pattern.matcher(new String(ca));
192 if (!matcher.find())
193 failCount++;
194 }
195
196 private static void check(String p, int flag, String input, String s,
197 boolean expected)
198 {
199 Pattern pattern = Pattern.compile(p, flag);
200 Matcher matcher = pattern.matcher(input);
201 if (expected)
202 check(matcher, s, expected);
203 else
204 check(pattern, input, false);
205 }
206
207 private static void report(String testName) {
208 int spacesToAdd = 30 - testName.length();
209 StringBuffer paddedNameBuffer = new StringBuffer(testName);
210 for (int i=0; i<spacesToAdd; i++)
211 paddedNameBuffer.append(" ");
212 String paddedName = paddedNameBuffer.toString();
213 System.err.println(paddedName + ": " +
214 (failCount==0 ? "Passed":"Failed("+failCount+")"));
215 if (failCount > 0)
216 failure = true;
217 failCount = 0;
218 }
219
220 /**
221 * Converts ASCII alphabet characters [A-Za-z] in the given 's' to
222 * supplementary characters. This method does NOT fully take care
223 * of the regex syntax.
224 */
225 private static String toSupplementaries(String s) {
226 int length = s.length();
227 StringBuffer sb = new StringBuffer(length * 2);
228
229 for (int i = 0; i < length; ) {
230 char c = s.charAt(i++);
231 if (c == '\\') {
232 sb.append(c);
233 if (i < length) {
234 c = s.charAt(i++);
235 sb.append(c);
236 if (c == 'u') {
237 // assume no syntax error
238 sb.append(s.charAt(i++));
239 sb.append(s.charAt(i++));
240 sb.append(s.charAt(i++));
241 sb.append(s.charAt(i++));
242 }
243 }
244 } else if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) {
245 sb.append('\ud800').append((char)('\udc00'+c));
246 } else {
247 sb.append(c);
248 }
249 }
250 return sb.toString();
251 }
252
253 // Regular expression tests
254
255 // This is for bug 6178785
256 // Test if an expected NPE gets thrown when passing in a null argument
257 private static boolean check(Runnable test) {
258 try {
259 test.run();
260 failCount++;
261 return false;
262 } catch (NullPointerException npe) {
263 return true;
264 }
265 }
266
267 private static void nullArgumentTest() {
268 check(new Runnable() { public void run() { Pattern.compile(null); }});
269 check(new Runnable() { public void run() { Pattern.matches(null, null); }});
270 check(new Runnable() { public void run() { Pattern.matches("xyz", null);}});
271 check(new Runnable() { public void run() { Pattern.quote(null);}});
272 check(new Runnable() { public void run() { Pattern.compile("xyz").split(null);}});
273 check(new Runnable() { public void run() { Pattern.compile("xyz").matcher(null);}});
274
275 final Matcher m = Pattern.compile("xyz").matcher("xyz");
276 m.matches();
277 check(new Runnable() { public void run() { m.appendTail(null);}});
278 check(new Runnable() { public void run() { m.replaceAll(null);}});
279 check(new Runnable() { public void run() { m.replaceFirst(null);}});
280 check(new Runnable() { public void run() { m.appendReplacement(null, null);}});
281 check(new Runnable() { public void run() { m.reset(null);}});
282 check(new Runnable() { public void run() { Matcher.quoteReplacement(null);}});
283 //check(new Runnable() { public void run() { m.usePattern(null);}});
284
285 report("Null Argument");
286 }
287
288 // This is for bug6635133
289 // Test if surrogate pair in Unicode escapes can be handled correctly.
290 private static void surrogatesInClassTest() throws Exception {
291 Pattern pattern = Pattern.compile("[\\ud834\\udd21-\\ud834\\udd24]");
292 Matcher matcher = pattern.matcher("\ud834\udd22");
293 if (!matcher.find())
294 failCount++;
295 }
296
297 // This is for bug 4988891
298 // Test toMatchResult to see that it is a copy of the Matcher
299 // that is not affected by subsequent operations on the original
300 private static void toMatchResultTest() throws Exception {
301 Pattern pattern = Pattern.compile("squid");
302 Matcher matcher = pattern.matcher(
303 "agiantsquidofdestinyasmallsquidoffate");
304 matcher.find();
305 int matcherStart1 = matcher.start();
306 MatchResult mr = matcher.toMatchResult();
307 if (mr == matcher)
308 failCount++;
309 int resultStart1 = mr.start();
310 if (matcherStart1 != resultStart1)
311 failCount++;
312 matcher.find();
313 int matcherStart2 = matcher.start();
314 int resultStart2 = mr.start();
315 if (matcherStart2 == resultStart2)
316 failCount++;
317 if (resultStart1 != resultStart2)
318 failCount++;
319 MatchResult mr2 = matcher.toMatchResult();
320 if (mr == mr2)
321 failCount++;
322 if (mr2.start() != matcherStart2)
323 failCount++;
324 report("toMatchResult is a copy");
325 }
326
327 // This is for bug 5013885
328 // Must test a slice to see if it reports hitEnd correctly
329 private static void hitEndTest() throws Exception {
330 // Basic test of Slice node
331 Pattern p = Pattern.compile("^squidattack");
332 Matcher m = p.matcher("squack");
333 m.find();
334 if (m.hitEnd())
335 failCount++;
336 m.reset("squid");
337 m.find();
338 if (!m.hitEnd())
339 failCount++;
340
341 // Test Slice, SliceA and SliceU nodes
342 for (int i=0; i<3; i++) {
343 int flags = 0;
344 if (i==1) flags = Pattern.CASE_INSENSITIVE;
345 if (i==2) flags = Pattern.UNICODE_CASE;
346 p = Pattern.compile("^abc", flags);
347 m = p.matcher("ad");
348 m.find();
349 if (m.hitEnd())
350 failCount++;
351 m.reset("ab");
352 m.find();
353 if (!m.hitEnd())
354 failCount++;
355 }
356
357 // Test Boyer-Moore node
358 p = Pattern.compile("catattack");
359 m = p.matcher("attack");
360 m.find();
361 if (!m.hitEnd())
362 failCount++;
363
364 p = Pattern.compile("catattack");
365 m = p.matcher("attackattackattackcatatta");
366 m.find();
367 if (!m.hitEnd())
368 failCount++;
sherman0b4d42d2009-02-23 21:06:15 -0800369 report("hitEnd from a Slice");
370 }
371
372 // This is for bug 4997476
373 // It is weird code submitted by customer demonstrating a regression
374 private static void wordSearchTest() throws Exception {
375 String testString = new String("word1 word2 word3");
376 Pattern p = Pattern.compile("\\b");
377 Matcher m = p.matcher(testString);
378 int position = 0;
379 int start = 0;
380 while (m.find(position)) {
381 start = m.start();
382 if (start == testString.length())
383 break;
384 if (m.find(start+1)) {
385 position = m.start();
386 } else {
387 position = testString.length();
388 }
389 if (testString.substring(start, position).equals(" "))
390 continue;
391 if (!testString.substring(start, position-1).startsWith("word"))
392 failCount++;
393 }
394 report("Customer word search");
395 }
396
397 // This is for bug 4994840
398 private static void caretAtEndTest() throws Exception {
399 // Problem only occurs with multiline patterns
400 // containing a beginning-of-line caret "^" followed
401 // by an expression that also matches the empty string.
402 Pattern pattern = Pattern.compile("^x?", Pattern.MULTILINE);
403 Matcher matcher = pattern.matcher("\r");
404 matcher.find();
405 matcher.find();
406 report("Caret at end");
407 }
408
409 // This test is for 4979006
410 // Check to see if word boundary construct properly handles unicode
411 // non spacing marks
412 private static void unicodeWordBoundsTest() throws Exception {
413 String spaces = " ";
414 String wordChar = "a";
415 String nsm = "\u030a";
416
417 assert (Character.getType('\u030a') == Character.NON_SPACING_MARK);
418
419 Pattern pattern = Pattern.compile("\\b");
420 Matcher matcher = pattern.matcher("");
421 // S=other B=word character N=non spacing mark .=word boundary
422 // SS.BB.SS
423 String input = spaces + wordChar + wordChar + spaces;
424 twoFindIndexes(input, matcher, 2, 4);
425 // SS.BBN.SS
426 input = spaces + wordChar +wordChar + nsm + spaces;
427 twoFindIndexes(input, matcher, 2, 5);
428 // SS.BN.SS
429 input = spaces + wordChar + nsm + spaces;
430 twoFindIndexes(input, matcher, 2, 4);
431 // SS.BNN.SS
432 input = spaces + wordChar + nsm + nsm + spaces;
433 twoFindIndexes(input, matcher, 2, 5);
434 // SSN.BB.SS
435 input = spaces + nsm + wordChar + wordChar + spaces;
436 twoFindIndexes(input, matcher, 3, 5);
437 // SS.BNB.SS
438 input = spaces + wordChar + nsm + wordChar + spaces;
439 twoFindIndexes(input, matcher, 2, 5);
440 // SSNNSS
441 input = spaces + nsm + nsm + spaces;
442 matcher.reset(input);
443 if (matcher.find())
444 failCount++;
445 // SSN.BBN.SS
446 input = spaces + nsm + wordChar + wordChar + nsm + spaces;
447 twoFindIndexes(input, matcher, 3, 6);
448
449 report("Unicode word boundary");
450 }
451
452 private static void twoFindIndexes(String input, Matcher matcher, int a,
453 int b) throws Exception
454 {
455 matcher.reset(input);
456 matcher.find();
457 if (matcher.start() != a)
458 failCount++;
459 matcher.find();
460 if (matcher.start() != b)
461 failCount++;
462 }
463
464 // This test is for 6284152
465 static void check(String regex, String input, String[] expected) {
466 List<String> result = new ArrayList<String>();
467 Pattern p = Pattern.compile(regex);
468 Matcher m = p.matcher(input);
469 while (m.find()) {
470 result.add(m.group());
471 }
472 if (!Arrays.asList(expected).equals(result))
473 failCount++;
474 }
475
476 private static void lookbehindTest() throws Exception {
477 //Positive
478 check("(?<=%.{0,5})foo\\d",
479 "%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5",
480 new String[]{"foo1", "foo2", "foo3"});
481
482 //boundary at end of the lookbehind sub-regex should work consistently
483 //with the boundary just after the lookbehind sub-regex
484 check("(?<=.*\\b)foo", "abcd foo", new String[]{"foo"});
485 check("(?<=.*)\\bfoo", "abcd foo", new String[]{"foo"});
486 check("(?<!abc )\\bfoo", "abc foo", new String[0]);
487 check("(?<!abc \\b)foo", "abc foo", new String[0]);
488
489 //Negative
490 check("(?<!%.{0,5})foo\\d",
491 "%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5",
492 new String[] {"foo4", "foo5"});
493
494 //Positive greedy
495 check("(?<=%b{1,4})foo", "%bbbbfoo", new String[] {"foo"});
496
497 //Positive reluctant
498 check("(?<=%b{1,4}?)foo", "%bbbbfoo", new String[] {"foo"});
499
500 //supplementary
501 check("(?<=%b{1,4})fo\ud800\udc00o", "%bbbbfo\ud800\udc00o",
502 new String[] {"fo\ud800\udc00o"});
503 check("(?<=%b{1,4}?)fo\ud800\udc00o", "%bbbbfo\ud800\udc00o",
504 new String[] {"fo\ud800\udc00o"});
505 check("(?<!%b{1,4})fo\ud800\udc00o", "%afo\ud800\udc00o",
506 new String[] {"fo\ud800\udc00o"});
507 check("(?<!%b{1,4}?)fo\ud800\udc00o", "%afo\ud800\udc00o",
508 new String[] {"fo\ud800\udc00o"});
509 report("Lookbehind");
510 }
511
512 // This test is for 4938995
513 // Check to see if weak region boundaries are transparent to
514 // lookahead and lookbehind constructs
515 private static void boundsTest() throws Exception {
516 String fullMessage = "catdogcat";
517 Pattern pattern = Pattern.compile("(?<=cat)dog(?=cat)");
518 Matcher matcher = pattern.matcher("catdogca");
519 matcher.useTransparentBounds(true);
520 if (matcher.find())
521 failCount++;
522 matcher.reset("atdogcat");
523 if (matcher.find())
524 failCount++;
525 matcher.reset(fullMessage);
526 if (!matcher.find())
527 failCount++;
528 matcher.reset(fullMessage);
529 matcher.region(0,9);
530 if (!matcher.find())
531 failCount++;
532 matcher.reset(fullMessage);
533 matcher.region(0,6);
534 if (!matcher.find())
535 failCount++;
536 matcher.reset(fullMessage);
537 matcher.region(3,6);
538 if (!matcher.find())
539 failCount++;
540 matcher.useTransparentBounds(false);
541 if (matcher.find())
542 failCount++;
543
544 // Negative lookahead/lookbehind
545 pattern = Pattern.compile("(?<!cat)dog(?!cat)");
546 matcher = pattern.matcher("dogcat");
547 matcher.useTransparentBounds(true);
548 matcher.region(0,3);
549 if (matcher.find())
550 failCount++;
551 matcher.reset("catdog");
552 matcher.region(3,6);
553 if (matcher.find())
554 failCount++;
555 matcher.useTransparentBounds(false);
556 matcher.reset("dogcat");
557 matcher.region(0,3);
558 if (!matcher.find())
559 failCount++;
560 matcher.reset("catdog");
561 matcher.region(3,6);
562 if (!matcher.find())
563 failCount++;
564
565 report("Region bounds transparency");
566 }
567
568 // This test is for 4945394
569 private static void findFromTest() throws Exception {
570 String message = "This is 40 $0 message.";
571 Pattern pat = Pattern.compile("\\$0");
572 Matcher match = pat.matcher(message);
573 if (!match.find())
574 failCount++;
575 if (match.find())
576 failCount++;
577 if (match.find())
578 failCount++;
579 report("Check for alternating find");
580 }
581
582 // This test is for 4872664 and 4892980
583 private static void negatedCharClassTest() throws Exception {
584 Pattern pattern = Pattern.compile("[^>]");
585 Matcher matcher = pattern.matcher("\u203A");
586 if (!matcher.matches())
587 failCount++;
588 pattern = Pattern.compile("[^fr]");
589 matcher = pattern.matcher("a");
590 if (!matcher.find())
591 failCount++;
592 matcher.reset("\u203A");
593 if (!matcher.find())
594 failCount++;
595 String s = "for";
596 String result[] = s.split("[^fr]");
597 if (!result[0].equals("f"))
598 failCount++;
599 if (!result[1].equals("r"))
600 failCount++;
601 s = "f\u203Ar";
602 result = s.split("[^fr]");
603 if (!result[0].equals("f"))
604 failCount++;
605 if (!result[1].equals("r"))
606 failCount++;
607
608 // Test adding to bits, subtracting a node, then adding to bits again
609 pattern = Pattern.compile("[^f\u203Ar]");
610 matcher = pattern.matcher("a");
611 if (!matcher.find())
612 failCount++;
613 matcher.reset("f");
614 if (matcher.find())
615 failCount++;
616 matcher.reset("\u203A");
617 if (matcher.find())
618 failCount++;
619 matcher.reset("r");
620 if (matcher.find())
621 failCount++;
622 matcher.reset("\u203B");
623 if (!matcher.find())
624 failCount++;
625
626 // Test subtracting a node, adding to bits, subtracting again
627 pattern = Pattern.compile("[^\u203Ar\u203B]");
628 matcher = pattern.matcher("a");
629 if (!matcher.find())
630 failCount++;
631 matcher.reset("\u203A");
632 if (matcher.find())
633 failCount++;
634 matcher.reset("r");
635 if (matcher.find())
636 failCount++;
637 matcher.reset("\u203B");
638 if (matcher.find())
639 failCount++;
640 matcher.reset("\u203C");
641 if (!matcher.find())
642 failCount++;
643
644 report("Negated Character Class");
645 }
646
647 // This test is for 4628291
648 private static void toStringTest() throws Exception {
649 Pattern pattern = Pattern.compile("b+");
650 if (pattern.toString() != "b+")
651 failCount++;
652 Matcher matcher = pattern.matcher("aaabbbccc");
653 String matcherString = matcher.toString(); // unspecified
654 matcher.find();
655 matcherString = matcher.toString(); // unspecified
656 matcher.region(0,3);
657 matcherString = matcher.toString(); // unspecified
658 matcher.reset();
659 matcherString = matcher.toString(); // unspecified
660 report("toString");
661 }
662
663 // This test is for 4808962
664 private static void literalPatternTest() throws Exception {
665 int flags = Pattern.LITERAL;
666
667 Pattern pattern = Pattern.compile("abc\\t$^", flags);
668 check(pattern, "abc\\t$^", true);
669
670 pattern = Pattern.compile(Pattern.quote("abc\\t$^"));
671 check(pattern, "abc\\t$^", true);
672
673 pattern = Pattern.compile("\\Qa^$bcabc\\E", flags);
674 check(pattern, "\\Qa^$bcabc\\E", true);
675 check(pattern, "a^$bcabc", false);
676
677 pattern = Pattern.compile("\\\\Q\\\\E");
678 check(pattern, "\\Q\\E", true);
679
680 pattern = Pattern.compile("\\Qabc\\Eefg\\\\Q\\\\Ehij");
681 check(pattern, "abcefg\\Q\\Ehij", true);
682
683 pattern = Pattern.compile("\\\\\\Q\\\\E");
684 check(pattern, "\\\\\\\\", true);
685
686 pattern = Pattern.compile(Pattern.quote("\\Qa^$bcabc\\E"));
687 check(pattern, "\\Qa^$bcabc\\E", true);
688 check(pattern, "a^$bcabc", false);
689
690 pattern = Pattern.compile(Pattern.quote("\\Qabc\\Edef"));
691 check(pattern, "\\Qabc\\Edef", true);
692 check(pattern, "abcdef", false);
693
694 pattern = Pattern.compile(Pattern.quote("abc\\Edef"));
695 check(pattern, "abc\\Edef", true);
696 check(pattern, "abcdef", false);
697
698 pattern = Pattern.compile(Pattern.quote("\\E"));
699 check(pattern, "\\E", true);
700
701 pattern = Pattern.compile("((((abc.+?:)", flags);
702 check(pattern, "((((abc.+?:)", true);
703
704 flags |= Pattern.MULTILINE;
705
706 pattern = Pattern.compile("^cat$", flags);
707 check(pattern, "abc^cat$def", true);
708 check(pattern, "cat", false);
709
710 flags |= Pattern.CASE_INSENSITIVE;
711
712 pattern = Pattern.compile("abcdef", flags);
713 check(pattern, "ABCDEF", true);
714 check(pattern, "AbCdEf", true);
715
716 flags |= Pattern.DOTALL;
717
718 pattern = Pattern.compile("a...b", flags);
719 check(pattern, "A...b", true);
720 check(pattern, "Axxxb", false);
721
722 flags |= Pattern.CANON_EQ;
723
724 Pattern p = Pattern.compile("testa\u030a", flags);
725 check(pattern, "testa\u030a", false);
726 check(pattern, "test\u00e5", false);
727
728 // Supplementary character test
729 flags = Pattern.LITERAL;
730
731 pattern = Pattern.compile(toSupplementaries("abc\\t$^"), flags);
732 check(pattern, toSupplementaries("abc\\t$^"), true);
733
734 pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\t$^")));
735 check(pattern, toSupplementaries("abc\\t$^"), true);
736
737 pattern = Pattern.compile(toSupplementaries("\\Qa^$bcabc\\E"), flags);
738 check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true);
739 check(pattern, toSupplementaries("a^$bcabc"), false);
740
741 pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qa^$bcabc\\E")));
742 check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true);
743 check(pattern, toSupplementaries("a^$bcabc"), false);
744
745 pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qabc\\Edef")));
746 check(pattern, toSupplementaries("\\Qabc\\Edef"), true);
747 check(pattern, toSupplementaries("abcdef"), false);
748
749 pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\Edef")));
750 check(pattern, toSupplementaries("abc\\Edef"), true);
751 check(pattern, toSupplementaries("abcdef"), false);
752
753 pattern = Pattern.compile(toSupplementaries("((((abc.+?:)"), flags);
754 check(pattern, toSupplementaries("((((abc.+?:)"), true);
755
756 flags |= Pattern.MULTILINE;
757
758 pattern = Pattern.compile(toSupplementaries("^cat$"), flags);
759 check(pattern, toSupplementaries("abc^cat$def"), true);
760 check(pattern, toSupplementaries("cat"), false);
761
762 flags |= Pattern.DOTALL;
763
764 // note: this is case-sensitive.
765 pattern = Pattern.compile(toSupplementaries("a...b"), flags);
766 check(pattern, toSupplementaries("a...b"), true);
767 check(pattern, toSupplementaries("axxxb"), false);
768
769 flags |= Pattern.CANON_EQ;
770
771 String t = toSupplementaries("test");
772 p = Pattern.compile(t + "a\u030a", flags);
773 check(pattern, t + "a\u030a", false);
774 check(pattern, t + "\u00e5", false);
775
776 report("Literal pattern");
777 }
778
779 // This test is for 4803179
780 // This test is also for 4808962, replacement parts
781 private static void literalReplacementTest() throws Exception {
782 int flags = Pattern.LITERAL;
783
784 Pattern pattern = Pattern.compile("abc", flags);
785 Matcher matcher = pattern.matcher("zzzabczzz");
786 String replaceTest = "$0";
787 String result = matcher.replaceAll(replaceTest);
788 if (!result.equals("zzzabczzz"))
789 failCount++;
790
791 matcher.reset();
792 String literalReplacement = matcher.quoteReplacement(replaceTest);
793 result = matcher.replaceAll(literalReplacement);
794 if (!result.equals("zzz$0zzz"))
795 failCount++;
796
797 matcher.reset();
798 replaceTest = "\\t$\\$";
799 literalReplacement = matcher.quoteReplacement(replaceTest);
800 result = matcher.replaceAll(literalReplacement);
801 if (!result.equals("zzz\\t$\\$zzz"))
802 failCount++;
803
804 // Supplementary character test
805 pattern = Pattern.compile(toSupplementaries("abc"), flags);
806 matcher = pattern.matcher(toSupplementaries("zzzabczzz"));
807 replaceTest = "$0";
808 result = matcher.replaceAll(replaceTest);
809 if (!result.equals(toSupplementaries("zzzabczzz")))
810 failCount++;
811
812 matcher.reset();
813 literalReplacement = matcher.quoteReplacement(replaceTest);
814 result = matcher.replaceAll(literalReplacement);
815 if (!result.equals(toSupplementaries("zzz$0zzz")))
816 failCount++;
817
818 matcher.reset();
819 replaceTest = "\\t$\\$";
820 literalReplacement = matcher.quoteReplacement(replaceTest);
821 result = matcher.replaceAll(literalReplacement);
822 if (!result.equals(toSupplementaries("zzz\\t$\\$zzz")))
823 failCount++;
824
825 report("Literal replacement");
826 }
827
828 // This test is for 4757029
829 private static void regionTest() throws Exception {
830 Pattern pattern = Pattern.compile("abc");
831 Matcher matcher = pattern.matcher("abcdefabc");
832
833 matcher.region(0,9);
834 if (!matcher.find())
835 failCount++;
836 if (!matcher.find())
837 failCount++;
838 matcher.region(0,3);
839 if (!matcher.find())
840 failCount++;
841 matcher.region(3,6);
842 if (matcher.find())
843 failCount++;
844 matcher.region(0,2);
845 if (matcher.find())
846 failCount++;
847
848 expectRegionFail(matcher, 1, -1);
849 expectRegionFail(matcher, -1, -1);
850 expectRegionFail(matcher, -1, 1);
851 expectRegionFail(matcher, 5, 3);
852 expectRegionFail(matcher, 5, 12);
853 expectRegionFail(matcher, 12, 12);
854
855 pattern = Pattern.compile("^abc$");
856 matcher = pattern.matcher("zzzabczzz");
857 matcher.region(0,9);
858 if (matcher.find())
859 failCount++;
860 matcher.region(3,6);
861 if (!matcher.find())
862 failCount++;
863 matcher.region(3,6);
864 matcher.useAnchoringBounds(false);
865 if (matcher.find())
866 failCount++;
867
868 // Supplementary character test
869 pattern = Pattern.compile(toSupplementaries("abc"));
870 matcher = pattern.matcher(toSupplementaries("abcdefabc"));
871 matcher.region(0,9*2);
872 if (!matcher.find())
873 failCount++;
874 if (!matcher.find())
875 failCount++;
876 matcher.region(0,3*2);
877 if (!matcher.find())
878 failCount++;
879 matcher.region(1,3*2);
880 if (matcher.find())
881 failCount++;
882 matcher.region(3*2,6*2);
883 if (matcher.find())
884 failCount++;
885 matcher.region(0,2*2);
886 if (matcher.find())
887 failCount++;
888 matcher.region(0,2*2+1);
889 if (matcher.find())
890 failCount++;
891
892 expectRegionFail(matcher, 1*2, -1);
893 expectRegionFail(matcher, -1, -1);
894 expectRegionFail(matcher, -1, 1*2);
895 expectRegionFail(matcher, 5*2, 3*2);
896 expectRegionFail(matcher, 5*2, 12*2);
897 expectRegionFail(matcher, 12*2, 12*2);
898
899 pattern = Pattern.compile(toSupplementaries("^abc$"));
900 matcher = pattern.matcher(toSupplementaries("zzzabczzz"));
901 matcher.region(0,9*2);
902 if (matcher.find())
903 failCount++;
904 matcher.region(3*2,6*2);
905 if (!matcher.find())
906 failCount++;
907 matcher.region(3*2+1,6*2);
908 if (matcher.find())
909 failCount++;
910 matcher.region(3*2,6*2-1);
911 if (matcher.find())
912 failCount++;
913 matcher.region(3*2,6*2);
914 matcher.useAnchoringBounds(false);
915 if (matcher.find())
916 failCount++;
917 report("Regions");
918 }
919
920 private static void expectRegionFail(Matcher matcher, int index1,
921 int index2)
922 {
923 try {
924 matcher.region(index1, index2);
925 failCount++;
926 } catch (IndexOutOfBoundsException ioobe) {
927 // Correct result
928 } catch (IllegalStateException ise) {
929 // Correct result
930 }
931 }
932
933 // This test is for 4803197
934 private static void escapedSegmentTest() throws Exception {
935
936 Pattern pattern = Pattern.compile("\\Qdir1\\dir2\\E");
937 check(pattern, "dir1\\dir2", true);
938
939 pattern = Pattern.compile("\\Qdir1\\dir2\\\\E");
940 check(pattern, "dir1\\dir2\\", true);
941
942 pattern = Pattern.compile("(\\Qdir1\\dir2\\\\E)");
943 check(pattern, "dir1\\dir2\\", true);
944
945 // Supplementary character test
946 pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2\\E"));
947 check(pattern, toSupplementaries("dir1\\dir2"), true);
948
949 pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2")+"\\\\E");
950 check(pattern, toSupplementaries("dir1\\dir2\\"), true);
951
952 pattern = Pattern.compile(toSupplementaries("(\\Qdir1\\dir2")+"\\\\E)");
953 check(pattern, toSupplementaries("dir1\\dir2\\"), true);
954
955 report("Escaped segment");
956 }
957
958 // This test is for 4792284
959 private static void nonCaptureRepetitionTest() throws Exception {
960 String input = "abcdefgh;";
961
962 String[] patterns = new String[] {
963 "(?:\\w{4})+;",
964 "(?:\\w{8})*;",
965 "(?:\\w{2}){2,4};",
966 "(?:\\w{4}){2,};", // only matches the
967 ".*?(?:\\w{5})+;", // specified minimum
968 ".*?(?:\\w{9})*;", // number of reps - OK
969 "(?:\\w{4})+?;", // lazy repetition - OK
970 "(?:\\w{4})++;", // possessive repetition - OK
971 "(?:\\w{2,}?)+;", // non-deterministic - OK
972 "(\\w{4})+;", // capturing group - OK
973 };
974
975 for (int i = 0; i < patterns.length; i++) {
976 // Check find()
977 check(patterns[i], 0, input, input, true);
978 // Check matches()
979 Pattern p = Pattern.compile(patterns[i]);
980 Matcher m = p.matcher(input);
981
982 if (m.matches()) {
983 if (!m.group(0).equals(input))
984 failCount++;
985 } else {
986 failCount++;
987 }
988 }
989
990 report("Non capturing repetition");
991 }
992
993 // This test is for 6358731
994 private static void notCapturedGroupCurlyMatchTest() throws Exception {
995 Pattern pattern = Pattern.compile("(abc)+|(abcd)+");
996 Matcher matcher = pattern.matcher("abcd");
997 if (!matcher.matches() ||
998 matcher.group(1) != null ||
999 !matcher.group(2).equals("abcd")) {
1000 failCount++;
1001 }
1002 report("Not captured GroupCurly");
1003 }
1004
1005 // This test is for 4706545
1006 private static void javaCharClassTest() throws Exception {
1007 for (int i=0; i<1000; i++) {
1008 char c = (char)generator.nextInt();
1009 check("{javaLowerCase}", c, Character.isLowerCase(c));
1010 check("{javaUpperCase}", c, Character.isUpperCase(c));
1011 check("{javaUpperCase}+", c, Character.isUpperCase(c));
1012 check("{javaTitleCase}", c, Character.isTitleCase(c));
1013 check("{javaDigit}", c, Character.isDigit(c));
1014 check("{javaDefined}", c, Character.isDefined(c));
1015 check("{javaLetter}", c, Character.isLetter(c));
1016 check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c));
1017 check("{javaJavaIdentifierStart}", c,
1018 Character.isJavaIdentifierStart(c));
1019 check("{javaJavaIdentifierPart}", c,
1020 Character.isJavaIdentifierPart(c));
1021 check("{javaUnicodeIdentifierStart}", c,
1022 Character.isUnicodeIdentifierStart(c));
1023 check("{javaUnicodeIdentifierPart}", c,
1024 Character.isUnicodeIdentifierPart(c));
1025 check("{javaIdentifierIgnorable}", c,
1026 Character.isIdentifierIgnorable(c));
1027 check("{javaSpaceChar}", c, Character.isSpaceChar(c));
1028 check("{javaWhitespace}", c, Character.isWhitespace(c));
1029 check("{javaISOControl}", c, Character.isISOControl(c));
1030 check("{javaMirrored}", c, Character.isMirrored(c));
1031
1032 }
1033
1034 // Supplementary character test
1035 for (int i=0; i<1000; i++) {
1036 int c = generator.nextInt(Character.MAX_CODE_POINT
1037 - Character.MIN_SUPPLEMENTARY_CODE_POINT)
1038 + Character.MIN_SUPPLEMENTARY_CODE_POINT;
1039 check("{javaLowerCase}", c, Character.isLowerCase(c));
1040 check("{javaUpperCase}", c, Character.isUpperCase(c));
1041 check("{javaUpperCase}+", c, Character.isUpperCase(c));
1042 check("{javaTitleCase}", c, Character.isTitleCase(c));
1043 check("{javaDigit}", c, Character.isDigit(c));
1044 check("{javaDefined}", c, Character.isDefined(c));
1045 check("{javaLetter}", c, Character.isLetter(c));
1046 check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c));
1047 check("{javaJavaIdentifierStart}", c,
1048 Character.isJavaIdentifierStart(c));
1049 check("{javaJavaIdentifierPart}", c,
1050 Character.isJavaIdentifierPart(c));
1051 check("{javaUnicodeIdentifierStart}", c,
1052 Character.isUnicodeIdentifierStart(c));
1053 check("{javaUnicodeIdentifierPart}", c,
1054 Character.isUnicodeIdentifierPart(c));
1055 check("{javaIdentifierIgnorable}", c,
1056 Character.isIdentifierIgnorable(c));
1057 check("{javaSpaceChar}", c, Character.isSpaceChar(c));
1058 check("{javaWhitespace}", c, Character.isWhitespace(c));
1059 check("{javaISOControl}", c, Character.isISOControl(c));
1060 check("{javaMirrored}", c, Character.isMirrored(c));
1061 }
1062
1063 report("Java character classes");
1064 }
1065
1066 // This test is for 4523620
1067 /*
1068 private static void numOccurrencesTest() throws Exception {
1069 Pattern pattern = Pattern.compile("aaa");
1070
1071 if (pattern.numOccurrences("aaaaaa", false) != 2)
1072 failCount++;
1073 if (pattern.numOccurrences("aaaaaa", true) != 4)
1074 failCount++;
1075
1076 pattern = Pattern.compile("^");
1077 if (pattern.numOccurrences("aaaaaa", false) != 1)
1078 failCount++;
1079 if (pattern.numOccurrences("aaaaaa", true) != 1)
1080 failCount++;
1081
1082 report("Number of Occurrences");
1083 }
1084 */
1085
1086 // This test is for 4776374
1087 private static void caretBetweenTerminatorsTest() throws Exception {
1088 int flags1 = Pattern.DOTALL;
1089 int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES;
1090 int flags3 = Pattern.DOTALL | Pattern.UNIX_LINES | Pattern.MULTILINE;
1091 int flags4 = Pattern.DOTALL | Pattern.MULTILINE;
1092
1093 check("^....", flags1, "test\ntest", "test", true);
1094 check(".....^", flags1, "test\ntest", "test", false);
1095 check(".....^", flags1, "test\n", "test", false);
1096 check("....^", flags1, "test\r\n", "test", false);
1097
1098 check("^....", flags2, "test\ntest", "test", true);
1099 check("....^", flags2, "test\ntest", "test", false);
1100 check(".....^", flags2, "test\n", "test", false);
1101 check("....^", flags2, "test\r\n", "test", false);
1102
1103 check("^....", flags3, "test\ntest", "test", true);
1104 check(".....^", flags3, "test\ntest", "test\n", true);
1105 check(".....^", flags3, "test\u0085test", "test\u0085", false);
1106 check(".....^", flags3, "test\n", "test", false);
1107 check(".....^", flags3, "test\r\n", "test", false);
1108 check("......^", flags3, "test\r\ntest", "test\r\n", true);
1109
1110 check("^....", flags4, "test\ntest", "test", true);
1111 check(".....^", flags3, "test\ntest", "test\n", true);
1112 check(".....^", flags4, "test\u0085test", "test\u0085", true);
1113 check(".....^", flags4, "test\n", "test\n", false);
1114 check(".....^", flags4, "test\r\n", "test\r", false);
1115
1116 // Supplementary character test
1117 String t = toSupplementaries("test");
1118 check("^....", flags1, t+"\n"+t, t, true);
1119 check(".....^", flags1, t+"\n"+t, t, false);
1120 check(".....^", flags1, t+"\n", t, false);
1121 check("....^", flags1, t+"\r\n", t, false);
1122
1123 check("^....", flags2, t+"\n"+t, t, true);
1124 check("....^", flags2, t+"\n"+t, t, false);
1125 check(".....^", flags2, t+"\n", t, false);
1126 check("....^", flags2, t+"\r\n", t, false);
1127
1128 check("^....", flags3, t+"\n"+t, t, true);
1129 check(".....^", flags3, t+"\n"+t, t+"\n", true);
1130 check(".....^", flags3, t+"\u0085"+t, t+"\u0085", false);
1131 check(".....^", flags3, t+"\n", t, false);
1132 check(".....^", flags3, t+"\r\n", t, false);
1133 check("......^", flags3, t+"\r\n"+t, t+"\r\n", true);
1134
1135 check("^....", flags4, t+"\n"+t, t, true);
1136 check(".....^", flags3, t+"\n"+t, t+"\n", true);
1137 check(".....^", flags4, t+"\u0085"+t, t+"\u0085", true);
1138 check(".....^", flags4, t+"\n", t+"\n", false);
1139 check(".....^", flags4, t+"\r\n", t+"\r", false);
1140
1141 report("Caret between terminators");
1142 }
1143
1144 // This test is for 4727935
1145 private static void dollarAtEndTest() throws Exception {
1146 int flags1 = Pattern.DOTALL;
1147 int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES;
1148 int flags3 = Pattern.DOTALL | Pattern.MULTILINE;
1149
1150 check("....$", flags1, "test\n", "test", true);
1151 check("....$", flags1, "test\r\n", "test", true);
1152 check(".....$", flags1, "test\n", "test\n", true);
1153 check(".....$", flags1, "test\u0085", "test\u0085", true);
1154 check("....$", flags1, "test\u0085", "test", true);
1155
1156 check("....$", flags2, "test\n", "test", true);
1157 check(".....$", flags2, "test\n", "test\n", true);
1158 check(".....$", flags2, "test\u0085", "test\u0085", true);
1159 check("....$", flags2, "test\u0085", "est\u0085", true);
1160
1161 check("....$.blah", flags3, "test\nblah", "test\nblah", true);
1162 check(".....$.blah", flags3, "test\n\nblah", "test\n\nblah", true);
1163 check("....$blah", flags3, "test\nblah", "!!!!", false);
1164 check(".....$blah", flags3, "test\nblah", "!!!!", false);
1165
1166 // Supplementary character test
1167 String t = toSupplementaries("test");
1168 String b = toSupplementaries("blah");
1169 check("....$", flags1, t+"\n", t, true);
1170 check("....$", flags1, t+"\r\n", t, true);
1171 check(".....$", flags1, t+"\n", t+"\n", true);
1172 check(".....$", flags1, t+"\u0085", t+"\u0085", true);
1173 check("....$", flags1, t+"\u0085", t, true);
1174
1175 check("....$", flags2, t+"\n", t, true);
1176 check(".....$", flags2, t+"\n", t+"\n", true);
1177 check(".....$", flags2, t+"\u0085", t+"\u0085", true);
1178 check("....$", flags2, t+"\u0085", toSupplementaries("est\u0085"), true);
1179
1180 check("....$."+b, flags3, t+"\n"+b, t+"\n"+b, true);
1181 check(".....$."+b, flags3, t+"\n\n"+b, t+"\n\n"+b, true);
1182 check("....$"+b, flags3, t+"\n"+b, "!!!!", false);
1183 check(".....$"+b, flags3, t+"\n"+b, "!!!!", false);
1184
1185 report("Dollar at End");
1186 }
1187
1188 // This test is for 4711773
1189 private static void multilineDollarTest() throws Exception {
1190 Pattern findCR = Pattern.compile("$", Pattern.MULTILINE);
1191 Matcher matcher = findCR.matcher("first bit\nsecond bit");
1192 matcher.find();
1193 if (matcher.start(0) != 9)
1194 failCount++;
1195 matcher.find();
1196 if (matcher.start(0) != 20)
1197 failCount++;
1198
1199 // Supplementary character test
1200 matcher = findCR.matcher(toSupplementaries("first bit\n second bit")); // double BMP chars
1201 matcher.find();
1202 if (matcher.start(0) != 9*2)
1203 failCount++;
1204 matcher.find();
1205 if (matcher.start(0) != 20*2)
1206 failCount++;
1207
1208 report("Multiline Dollar");
1209 }
1210
1211 private static void reluctantRepetitionTest() throws Exception {
1212 Pattern p = Pattern.compile("1(\\s\\S+?){1,3}?[\\s,]2");
1213 check(p, "1 word word word 2", true);
1214 check(p, "1 wor wo w 2", true);
1215 check(p, "1 word word 2", true);
1216 check(p, "1 word 2", true);
1217 check(p, "1 wo w w 2", true);
1218 check(p, "1 wo w 2", true);
1219 check(p, "1 wor w 2", true);
1220
1221 p = Pattern.compile("([a-z])+?c");
1222 Matcher m = p.matcher("ababcdefdec");
1223 check(m, "ababc");
1224
1225 // Supplementary character test
1226 p = Pattern.compile(toSupplementaries("([a-z])+?c"));
1227 m = p.matcher(toSupplementaries("ababcdefdec"));
1228 check(m, toSupplementaries("ababc"));
1229
1230 report("Reluctant Repetition");
1231 }
1232
1233 private static void serializeTest() throws Exception {
1234 String patternStr = "(b)";
1235 String matchStr = "b";
1236 Pattern pattern = Pattern.compile(patternStr);
1237 ByteArrayOutputStream baos = new ByteArrayOutputStream();
1238 ObjectOutputStream oos = new ObjectOutputStream(baos);
1239 oos.writeObject(pattern);
1240 oos.close();
1241 ObjectInputStream ois = new ObjectInputStream(
1242 new ByteArrayInputStream(baos.toByteArray()));
1243 Pattern serializedPattern = (Pattern)ois.readObject();
1244 ois.close();
1245 Matcher matcher = serializedPattern.matcher(matchStr);
1246 if (!matcher.matches())
1247 failCount++;
1248 if (matcher.groupCount() != 1)
1249 failCount++;
1250
1251 report("Serialization");
1252 }
1253
1254 private static void gTest() {
1255 Pattern pattern = Pattern.compile("\\G\\w");
1256 Matcher matcher = pattern.matcher("abc#x#x");
1257 matcher.find();
1258 matcher.find();
1259 matcher.find();
1260 if (matcher.find())
1261 failCount++;
1262
1263 pattern = Pattern.compile("\\GA*");
1264 matcher = pattern.matcher("1A2AA3");
1265 matcher.find();
1266 if (matcher.find())
1267 failCount++;
1268
1269 pattern = Pattern.compile("\\GA*");
1270 matcher = pattern.matcher("1A2AA3");
1271 if (!matcher.find(1))
1272 failCount++;
1273 matcher.find();
1274 if (matcher.find())
1275 failCount++;
1276
1277 report("\\G");
1278 }
1279
1280 private static void zTest() {
1281 Pattern pattern = Pattern.compile("foo\\Z");
1282 // Positives
1283 check(pattern, "foo\u0085", true);
1284 check(pattern, "foo\u2028", true);
1285 check(pattern, "foo\u2029", true);
1286 check(pattern, "foo\n", true);
1287 check(pattern, "foo\r", true);
1288 check(pattern, "foo\r\n", true);
1289 // Negatives
1290 check(pattern, "fooo", false);
1291 check(pattern, "foo\n\r", false);
1292
1293 pattern = Pattern.compile("foo\\Z", Pattern.UNIX_LINES);
1294 // Positives
1295 check(pattern, "foo", true);
1296 check(pattern, "foo\n", true);
1297 // Negatives
1298 check(pattern, "foo\r", false);
1299 check(pattern, "foo\u0085", false);
1300 check(pattern, "foo\u2028", false);
1301 check(pattern, "foo\u2029", false);
1302
1303 report("\\Z");
1304 }
1305
1306 private static void replaceFirstTest() {
1307 Pattern pattern = Pattern.compile("(ab)(c*)");
1308 Matcher matcher = pattern.matcher("abccczzzabcczzzabccc");
1309 if (!matcher.replaceFirst("test").equals("testzzzabcczzzabccc"))
1310 failCount++;
1311
1312 matcher.reset("zzzabccczzzabcczzzabccczzz");
1313 if (!matcher.replaceFirst("test").equals("zzztestzzzabcczzzabccczzz"))
1314 failCount++;
1315
1316 matcher.reset("zzzabccczzzabcczzzabccczzz");
1317 String result = matcher.replaceFirst("$1");
1318 if (!result.equals("zzzabzzzabcczzzabccczzz"))
1319 failCount++;
1320
1321 matcher.reset("zzzabccczzzabcczzzabccczzz");
1322 result = matcher.replaceFirst("$2");
1323 if (!result.equals("zzzccczzzabcczzzabccczzz"))
1324 failCount++;
1325
1326 pattern = Pattern.compile("a*");
1327 matcher = pattern.matcher("aaaaaaaaaa");
1328 if (!matcher.replaceFirst("test").equals("test"))
1329 failCount++;
1330
1331 pattern = Pattern.compile("a+");
1332 matcher = pattern.matcher("zzzaaaaaaaaaa");
1333 if (!matcher.replaceFirst("test").equals("zzztest"))
1334 failCount++;
1335
1336 // Supplementary character test
1337 pattern = Pattern.compile(toSupplementaries("(ab)(c*)"));
1338 matcher = pattern.matcher(toSupplementaries("abccczzzabcczzzabccc"));
1339 if (!matcher.replaceFirst(toSupplementaries("test"))
1340 .equals(toSupplementaries("testzzzabcczzzabccc")))
1341 failCount++;
1342
1343 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
1344 if (!matcher.replaceFirst(toSupplementaries("test")).
1345 equals(toSupplementaries("zzztestzzzabcczzzabccczzz")))
1346 failCount++;
1347
1348 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
1349 result = matcher.replaceFirst("$1");
1350 if (!result.equals(toSupplementaries("zzzabzzzabcczzzabccczzz")))
1351 failCount++;
1352
1353 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
1354 result = matcher.replaceFirst("$2");
1355 if (!result.equals(toSupplementaries("zzzccczzzabcczzzabccczzz")))
1356 failCount++;
1357
1358 pattern = Pattern.compile(toSupplementaries("a*"));
1359 matcher = pattern.matcher(toSupplementaries("aaaaaaaaaa"));
1360 if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("test")))
1361 failCount++;
1362
1363 pattern = Pattern.compile(toSupplementaries("a+"));
1364 matcher = pattern.matcher(toSupplementaries("zzzaaaaaaaaaa"));
1365 if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("zzztest")))
1366 failCount++;
1367
1368 report("Replace First");
1369 }
1370
1371 private static void unixLinesTest() {
1372 Pattern pattern = Pattern.compile(".*");
1373 Matcher matcher = pattern.matcher("aa\u2028blah");
1374 matcher.find();
1375 if (!matcher.group(0).equals("aa"))
1376 failCount++;
1377
1378 pattern = Pattern.compile(".*", Pattern.UNIX_LINES);
1379 matcher = pattern.matcher("aa\u2028blah");
1380 matcher.find();
1381 if (!matcher.group(0).equals("aa\u2028blah"))
1382 failCount++;
1383
1384 pattern = Pattern.compile("[az]$",
1385 Pattern.MULTILINE | Pattern.UNIX_LINES);
1386 matcher = pattern.matcher("aa\u2028zz");
1387 check(matcher, "a\u2028", false);
1388
1389 // Supplementary character test
1390 pattern = Pattern.compile(".*");
1391 matcher = pattern.matcher(toSupplementaries("aa\u2028blah"));
1392 matcher.find();
1393 if (!matcher.group(0).equals(toSupplementaries("aa")))
1394 failCount++;
1395
1396 pattern = Pattern.compile(".*", Pattern.UNIX_LINES);
1397 matcher = pattern.matcher(toSupplementaries("aa\u2028blah"));
1398 matcher.find();
1399 if (!matcher.group(0).equals(toSupplementaries("aa\u2028blah")))
1400 failCount++;
1401
1402 pattern = Pattern.compile(toSupplementaries("[az]$"),
1403 Pattern.MULTILINE | Pattern.UNIX_LINES);
1404 matcher = pattern.matcher(toSupplementaries("aa\u2028zz"));
1405 check(matcher, toSupplementaries("a\u2028"), false);
1406
1407 report("Unix Lines");
1408 }
1409
1410 private static void commentsTest() {
1411 int flags = Pattern.COMMENTS;
1412
1413 Pattern pattern = Pattern.compile("aa \\# aa", flags);
1414 Matcher matcher = pattern.matcher("aa#aa");
1415 if (!matcher.matches())
1416 failCount++;
1417
1418 pattern = Pattern.compile("aa # blah", flags);
1419 matcher = pattern.matcher("aa");
1420 if (!matcher.matches())
1421 failCount++;
1422
1423 pattern = Pattern.compile("aa blah", flags);
1424 matcher = pattern.matcher("aablah");
1425 if (!matcher.matches())
1426 failCount++;
1427
1428 pattern = Pattern.compile("aa # blah blech ", flags);
1429 matcher = pattern.matcher("aa");
1430 if (!matcher.matches())
1431 failCount++;
1432
1433 pattern = Pattern.compile("aa # blah\n ", flags);
1434 matcher = pattern.matcher("aa");
1435 if (!matcher.matches())
1436 failCount++;
1437
1438 pattern = Pattern.compile("aa # blah\nbc # blech", flags);
1439 matcher = pattern.matcher("aabc");
1440 if (!matcher.matches())
1441 failCount++;
1442
1443 pattern = Pattern.compile("aa # blah\nbc# blech", flags);
1444 matcher = pattern.matcher("aabc");
1445 if (!matcher.matches())
1446 failCount++;
1447
1448 pattern = Pattern.compile("aa # blah\nbc\\# blech", flags);
1449 matcher = pattern.matcher("aabc#blech");
1450 if (!matcher.matches())
1451 failCount++;
1452
1453 // Supplementary character test
1454 pattern = Pattern.compile(toSupplementaries("aa \\# aa"), flags);
1455 matcher = pattern.matcher(toSupplementaries("aa#aa"));
1456 if (!matcher.matches())
1457 failCount++;
1458
1459 pattern = Pattern.compile(toSupplementaries("aa # blah"), flags);
1460 matcher = pattern.matcher(toSupplementaries("aa"));
1461 if (!matcher.matches())
1462 failCount++;
1463
1464 pattern = Pattern.compile(toSupplementaries("aa blah"), flags);
1465 matcher = pattern.matcher(toSupplementaries("aablah"));
1466 if (!matcher.matches())
1467 failCount++;
1468
1469 pattern = Pattern.compile(toSupplementaries("aa # blah blech "), flags);
1470 matcher = pattern.matcher(toSupplementaries("aa"));
1471 if (!matcher.matches())
1472 failCount++;
1473
1474 pattern = Pattern.compile(toSupplementaries("aa # blah\n "), flags);
1475 matcher = pattern.matcher(toSupplementaries("aa"));
1476 if (!matcher.matches())
1477 failCount++;
1478
1479 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc # blech"), flags);
1480 matcher = pattern.matcher(toSupplementaries("aabc"));
1481 if (!matcher.matches())
1482 failCount++;
1483
1484 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc# blech"), flags);
1485 matcher = pattern.matcher(toSupplementaries("aabc"));
1486 if (!matcher.matches())
1487 failCount++;
1488
1489 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc\\# blech"), flags);
1490 matcher = pattern.matcher(toSupplementaries("aabc#blech"));
1491 if (!matcher.matches())
1492 failCount++;
1493
1494 report("Comments");
1495 }
1496
1497 private static void caseFoldingTest() { // bug 4504687
1498 int flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE;
1499 Pattern pattern = Pattern.compile("aa", flags);
1500 Matcher matcher = pattern.matcher("ab");
1501 if (matcher.matches())
1502 failCount++;
1503
1504 pattern = Pattern.compile("aA", flags);
1505 matcher = pattern.matcher("ab");
1506 if (matcher.matches())
1507 failCount++;
1508
1509 pattern = Pattern.compile("aa", flags);
1510 matcher = pattern.matcher("aB");
1511 if (matcher.matches())
1512 failCount++;
1513 matcher = pattern.matcher("Ab");
1514 if (matcher.matches())
1515 failCount++;
1516
1517 // ASCII "a"
1518 // Latin-1 Supplement "a" + grave
1519 // Cyrillic "a"
1520 String[] patterns = new String[] {
1521 //single
1522 "a", "\u00e0", "\u0430",
1523 //slice
1524 "ab", "\u00e0\u00e1", "\u0430\u0431",
1525 //class single
1526 "[a]", "[\u00e0]", "[\u0430]",
1527 //class range
1528 "[a-b]", "[\u00e0-\u00e5]", "[\u0430-\u0431]",
1529 //back reference
1530 "(a)\\1", "(\u00e0)\\1", "(\u0430)\\1"
1531 };
1532
1533 String[] texts = new String[] {
1534 "A", "\u00c0", "\u0410",
1535 "AB", "\u00c0\u00c1", "\u0410\u0411",
1536 "A", "\u00c0", "\u0410",
1537 "B", "\u00c2", "\u0411",
1538 "aA", "\u00e0\u00c0", "\u0430\u0410"
1539 };
1540
1541 boolean[] expected = new boolean[] {
1542 true, false, false,
1543 true, false, false,
1544 true, false, false,
1545 true, false, false,
1546 true, false, false
1547 };
1548
1549 flags = Pattern.CASE_INSENSITIVE;
1550 for (int i = 0; i < patterns.length; i++) {
1551 pattern = Pattern.compile(patterns[i], flags);
1552 matcher = pattern.matcher(texts[i]);
1553 if (matcher.matches() != expected[i]) {
1554 System.out.println("<1> Failed at " + i);
1555 failCount++;
1556 }
1557 }
1558
1559 flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE;
1560 for (int i = 0; i < patterns.length; i++) {
1561 pattern = Pattern.compile(patterns[i], flags);
1562 matcher = pattern.matcher(texts[i]);
1563 if (!matcher.matches()) {
1564 System.out.println("<2> Failed at " + i);
1565 failCount++;
1566 }
1567 }
1568 // flag unicode_case alone should do nothing
1569 flags = Pattern.UNICODE_CASE;
1570 for (int i = 0; i < patterns.length; i++) {
1571 pattern = Pattern.compile(patterns[i], flags);
1572 matcher = pattern.matcher(texts[i]);
1573 if (matcher.matches()) {
1574 System.out.println("<3> Failed at " + i);
1575 failCount++;
1576 }
1577 }
1578
1579 // Special cases: i, I, u+0131 and u+0130
1580 flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE;
1581 pattern = Pattern.compile("[h-j]+", flags);
1582 if (!pattern.matcher("\u0131\u0130").matches())
1583 failCount++;
1584 report("Case Folding");
1585 }
1586
1587 private static void appendTest() {
1588 Pattern pattern = Pattern.compile("(ab)(cd)");
1589 Matcher matcher = pattern.matcher("abcd");
1590 String result = matcher.replaceAll("$2$1");
1591 if (!result.equals("cdab"))
1592 failCount++;
1593
1594 String s1 = "Swap all: first = 123, second = 456";
1595 String s2 = "Swap one: first = 123, second = 456";
1596 String r = "$3$2$1";
1597 pattern = Pattern.compile("([a-z]+)( *= *)([0-9]+)");
1598 matcher = pattern.matcher(s1);
1599
1600 result = matcher.replaceAll(r);
1601 if (!result.equals("Swap all: 123 = first, 456 = second"))
1602 failCount++;
1603
1604 matcher = pattern.matcher(s2);
1605
1606 if (matcher.find()) {
1607 StringBuffer sb = new StringBuffer();
1608 matcher.appendReplacement(sb, r);
1609 matcher.appendTail(sb);
1610 result = sb.toString();
1611 if (!result.equals("Swap one: 123 = first, second = 456"))
1612 failCount++;
1613 }
1614
1615 // Supplementary character test
1616 pattern = Pattern.compile(toSupplementaries("(ab)(cd)"));
1617 matcher = pattern.matcher(toSupplementaries("abcd"));
1618 result = matcher.replaceAll("$2$1");
1619 if (!result.equals(toSupplementaries("cdab")))
1620 failCount++;
1621
1622 s1 = toSupplementaries("Swap all: first = 123, second = 456");
1623 s2 = toSupplementaries("Swap one: first = 123, second = 456");
1624 r = toSupplementaries("$3$2$1");
1625 pattern = Pattern.compile(toSupplementaries("([a-z]+)( *= *)([0-9]+)"));
1626 matcher = pattern.matcher(s1);
1627
1628 result = matcher.replaceAll(r);
1629 if (!result.equals(toSupplementaries("Swap all: 123 = first, 456 = second")))
1630 failCount++;
1631
1632 matcher = pattern.matcher(s2);
1633
1634 if (matcher.find()) {
1635 StringBuffer sb = new StringBuffer();
1636 matcher.appendReplacement(sb, r);
1637 matcher.appendTail(sb);
1638 result = sb.toString();
1639 if (!result.equals(toSupplementaries("Swap one: 123 = first, second = 456")))
1640 failCount++;
1641 }
1642 report("Append");
1643 }
1644
1645 private static void splitTest() {
1646 Pattern pattern = Pattern.compile(":");
1647 String[] result = pattern.split("foo:and:boo", 2);
1648 if (!result[0].equals("foo"))
1649 failCount++;
1650 if (!result[1].equals("and:boo"))
1651 failCount++;
1652 // Supplementary character test
1653 Pattern patternX = Pattern.compile(toSupplementaries("X"));
1654 result = patternX.split(toSupplementaries("fooXandXboo"), 2);
1655 if (!result[0].equals(toSupplementaries("foo")))
1656 failCount++;
1657 if (!result[1].equals(toSupplementaries("andXboo")))
1658 failCount++;
1659
1660 CharBuffer cb = CharBuffer.allocate(100);
1661 cb.put("foo:and:boo");
1662 cb.flip();
1663 result = pattern.split(cb);
1664 if (!result[0].equals("foo"))
1665 failCount++;
1666 if (!result[1].equals("and"))
1667 failCount++;
1668 if (!result[2].equals("boo"))
1669 failCount++;
1670
1671 // Supplementary character test
1672 CharBuffer cbs = CharBuffer.allocate(100);
1673 cbs.put(toSupplementaries("fooXandXboo"));
1674 cbs.flip();
1675 result = patternX.split(cbs);
1676 if (!result[0].equals(toSupplementaries("foo")))
1677 failCount++;
1678 if (!result[1].equals(toSupplementaries("and")))
1679 failCount++;
1680 if (!result[2].equals(toSupplementaries("boo")))
1681 failCount++;
1682
1683 String source = "0123456789";
1684 for (int limit=-2; limit<3; limit++) {
1685 for (int x=0; x<10; x++) {
1686 result = source.split(Integer.toString(x), limit);
1687 int expectedLength = limit < 1 ? 2 : limit;
1688
1689 if ((limit == 0) && (x == 9)) {
1690 // expected dropping of ""
1691 if (result.length != 1)
1692 failCount++;
1693 if (!result[0].equals("012345678")) {
1694 failCount++;
1695 }
1696 } else {
1697 if (result.length != expectedLength) {
1698 failCount++;
1699 }
1700 if (!result[0].equals(source.substring(0,x))) {
1701 if (limit != 1) {
1702 failCount++;
1703 } else {
1704 if (!result[0].equals(source.substring(0,10))) {
1705 failCount++;
1706 }
1707 }
1708 }
1709 if (expectedLength > 1) { // Check segment 2
1710 if (!result[1].equals(source.substring(x+1,10)))
1711 failCount++;
1712 }
1713 }
1714 }
1715 }
1716 // Check the case for no match found
1717 for (int limit=-2; limit<3; limit++) {
1718 result = source.split("e", limit);
1719 if (result.length != 1)
1720 failCount++;
1721 if (!result[0].equals(source))
1722 failCount++;
1723 }
1724 // Check the case for limit == 0, source = "";
1725 source = "";
1726 result = source.split("e", 0);
1727 if (result.length != 1)
1728 failCount++;
1729 if (!result[0].equals(source))
1730 failCount++;
1731
1732 report("Split");
1733 }
1734
1735 private static void negationTest() {
1736 Pattern pattern = Pattern.compile("[\\[@^]+");
1737 Matcher matcher = pattern.matcher("@@@@[[[[^^^^");
1738 if (!matcher.find())
1739 failCount++;
1740 if (!matcher.group(0).equals("@@@@[[[[^^^^"))
1741 failCount++;
1742 pattern = Pattern.compile("[@\\[^]+");
1743 matcher = pattern.matcher("@@@@[[[[^^^^");
1744 if (!matcher.find())
1745 failCount++;
1746 if (!matcher.group(0).equals("@@@@[[[[^^^^"))
1747 failCount++;
1748 pattern = Pattern.compile("[@\\[^@]+");
1749 matcher = pattern.matcher("@@@@[[[[^^^^");
1750 if (!matcher.find())
1751 failCount++;
1752 if (!matcher.group(0).equals("@@@@[[[[^^^^"))
1753 failCount++;
1754
1755 pattern = Pattern.compile("\\)");
1756 matcher = pattern.matcher("xxx)xxx");
1757 if (!matcher.find())
1758 failCount++;
1759
1760 report("Negation");
1761 }
1762
1763 private static void ampersandTest() {
1764 Pattern pattern = Pattern.compile("[&@]+");
1765 check(pattern, "@@@@&&&&", true);
1766
1767 pattern = Pattern.compile("[@&]+");
1768 check(pattern, "@@@@&&&&", true);
1769
1770 pattern = Pattern.compile("[@\\&]+");
1771 check(pattern, "@@@@&&&&", true);
1772
1773 report("Ampersand");
1774 }
1775
1776 private static void octalTest() throws Exception {
1777 Pattern pattern = Pattern.compile("\\u0007");
1778 Matcher matcher = pattern.matcher("\u0007");
1779 if (!matcher.matches())
1780 failCount++;
1781 pattern = Pattern.compile("\\07");
1782 matcher = pattern.matcher("\u0007");
1783 if (!matcher.matches())
1784 failCount++;
1785 pattern = Pattern.compile("\\007");
1786 matcher = pattern.matcher("\u0007");
1787 if (!matcher.matches())
1788 failCount++;
1789 pattern = Pattern.compile("\\0007");
1790 matcher = pattern.matcher("\u0007");
1791 if (!matcher.matches())
1792 failCount++;
1793 pattern = Pattern.compile("\\040");
1794 matcher = pattern.matcher("\u0020");
1795 if (!matcher.matches())
1796 failCount++;
1797 pattern = Pattern.compile("\\0403");
1798 matcher = pattern.matcher("\u00203");
1799 if (!matcher.matches())
1800 failCount++;
1801 pattern = Pattern.compile("\\0103");
1802 matcher = pattern.matcher("\u0043");
1803 if (!matcher.matches())
1804 failCount++;
1805
1806 report("Octal");
1807 }
1808
1809 private static void longPatternTest() throws Exception {
1810 try {
1811 Pattern pattern = Pattern.compile(
1812 "a 32-character-long pattern xxxx");
1813 pattern = Pattern.compile("a 33-character-long pattern xxxxx");
1814 pattern = Pattern.compile("a thirty four character long regex");
1815 StringBuffer patternToBe = new StringBuffer(101);
1816 for (int i=0; i<100; i++)
1817 patternToBe.append((char)(97 + i%26));
1818 pattern = Pattern.compile(patternToBe.toString());
1819 } catch (PatternSyntaxException e) {
1820 failCount++;
1821 }
1822
1823 // Supplementary character test
1824 try {
1825 Pattern pattern = Pattern.compile(
1826 toSupplementaries("a 32-character-long pattern xxxx"));
1827 pattern = Pattern.compile(toSupplementaries("a 33-character-long pattern xxxxx"));
1828 pattern = Pattern.compile(toSupplementaries("a thirty four character long regex"));
1829 StringBuffer patternToBe = new StringBuffer(101*2);
1830 for (int i=0; i<100; i++)
1831 patternToBe.append(Character.toChars(Character.MIN_SUPPLEMENTARY_CODE_POINT
1832 + 97 + i%26));
1833 pattern = Pattern.compile(patternToBe.toString());
1834 } catch (PatternSyntaxException e) {
1835 failCount++;
1836 }
1837 report("LongPattern");
1838 }
1839
1840 private static void group0Test() throws Exception {
1841 Pattern pattern = Pattern.compile("(tes)ting");
1842 Matcher matcher = pattern.matcher("testing");
1843 check(matcher, "testing");
1844
1845 matcher.reset("testing");
1846 if (matcher.lookingAt()) {
1847 if (!matcher.group(0).equals("testing"))
1848 failCount++;
1849 } else {
1850 failCount++;
1851 }
1852
1853 matcher.reset("testing");
1854 if (matcher.matches()) {
1855 if (!matcher.group(0).equals("testing"))
1856 failCount++;
1857 } else {
1858 failCount++;
1859 }
1860
1861 pattern = Pattern.compile("(tes)ting");
1862 matcher = pattern.matcher("testing");
1863 if (matcher.lookingAt()) {
1864 if (!matcher.group(0).equals("testing"))
1865 failCount++;
1866 } else {
1867 failCount++;
1868 }
1869
1870 pattern = Pattern.compile("^(tes)ting");
1871 matcher = pattern.matcher("testing");
1872 if (matcher.matches()) {
1873 if (!matcher.group(0).equals("testing"))
1874 failCount++;
1875 } else {
1876 failCount++;
1877 }
1878
1879 // Supplementary character test
1880 pattern = Pattern.compile(toSupplementaries("(tes)ting"));
1881 matcher = pattern.matcher(toSupplementaries("testing"));
1882 check(matcher, toSupplementaries("testing"));
1883
1884 matcher.reset(toSupplementaries("testing"));
1885 if (matcher.lookingAt()) {
1886 if (!matcher.group(0).equals(toSupplementaries("testing")))
1887 failCount++;
1888 } else {
1889 failCount++;
1890 }
1891
1892 matcher.reset(toSupplementaries("testing"));
1893 if (matcher.matches()) {
1894 if (!matcher.group(0).equals(toSupplementaries("testing")))
1895 failCount++;
1896 } else {
1897 failCount++;
1898 }
1899
1900 pattern = Pattern.compile(toSupplementaries("(tes)ting"));
1901 matcher = pattern.matcher(toSupplementaries("testing"));
1902 if (matcher.lookingAt()) {
1903 if (!matcher.group(0).equals(toSupplementaries("testing")))
1904 failCount++;
1905 } else {
1906 failCount++;
1907 }
1908
1909 pattern = Pattern.compile(toSupplementaries("^(tes)ting"));
1910 matcher = pattern.matcher(toSupplementaries("testing"));
1911 if (matcher.matches()) {
1912 if (!matcher.group(0).equals(toSupplementaries("testing")))
1913 failCount++;
1914 } else {
1915 failCount++;
1916 }
1917
1918 report("Group0");
1919 }
1920
1921 private static void findIntTest() throws Exception {
1922 Pattern p = Pattern.compile("blah");
1923 Matcher m = p.matcher("zzzzblahzzzzzblah");
1924 boolean result = m.find(2);
1925 if (!result)
1926 failCount++;
1927
1928 p = Pattern.compile("$");
1929 m = p.matcher("1234567890");
1930 result = m.find(10);
1931 if (!result)
1932 failCount++;
1933 try {
1934 result = m.find(11);
1935 failCount++;
1936 } catch (IndexOutOfBoundsException e) {
1937 // correct result
1938 }
1939
1940 // Supplementary character test
1941 p = Pattern.compile(toSupplementaries("blah"));
1942 m = p.matcher(toSupplementaries("zzzzblahzzzzzblah"));
1943 result = m.find(2);
1944 if (!result)
1945 failCount++;
1946
1947 report("FindInt");
1948 }
1949
1950 private static void emptyPatternTest() throws Exception {
1951 Pattern p = Pattern.compile("");
1952 Matcher m = p.matcher("foo");
1953
1954 // Should find empty pattern at beginning of input
1955 boolean result = m.find();
1956 if (result != true)
1957 failCount++;
1958 if (m.start() != 0)
1959 failCount++;
1960
1961 // Should not match entire input if input is not empty
1962 m.reset();
1963 result = m.matches();
1964 if (result == true)
1965 failCount++;
1966
1967 try {
1968 m.start(0);
1969 failCount++;
1970 } catch (IllegalStateException e) {
1971 // Correct result
1972 }
1973
1974 // Should match entire input if input is empty
1975 m.reset("");
1976 result = m.matches();
1977 if (result != true)
1978 failCount++;
1979
1980 result = Pattern.matches("", "");
1981 if (result != true)
1982 failCount++;
1983
1984 result = Pattern.matches("", "foo");
1985 if (result == true)
1986 failCount++;
1987 report("EmptyPattern");
1988 }
1989
1990 private static void charClassTest() throws Exception {
1991 Pattern pattern = Pattern.compile("blah[ab]]blech");
1992 check(pattern, "blahb]blech", true);
1993
1994 pattern = Pattern.compile("[abc[def]]");
1995 check(pattern, "b", true);
1996
1997 // Supplementary character tests
1998 pattern = Pattern.compile(toSupplementaries("blah[ab]]blech"));
1999 check(pattern, toSupplementaries("blahb]blech"), true);
2000
2001 pattern = Pattern.compile(toSupplementaries("[abc[def]]"));
2002 check(pattern, toSupplementaries("b"), true);
2003
2004 try {
2005 // u00ff when UNICODE_CASE
2006 pattern = Pattern.compile("[ab\u00ffcd]",
2007 Pattern.CASE_INSENSITIVE|
2008 Pattern.UNICODE_CASE);
2009 check(pattern, "ab\u00ffcd", true);
2010 check(pattern, "Ab\u0178Cd", true);
2011
2012 // u00b5 when UNICODE_CASE
2013 pattern = Pattern.compile("[ab\u00b5cd]",
2014 Pattern.CASE_INSENSITIVE|
2015 Pattern.UNICODE_CASE);
2016 check(pattern, "ab\u00b5cd", true);
2017 check(pattern, "Ab\u039cCd", true);
2018 } catch (Exception e) { failCount++; }
2019
2020 /* Special cases
2021 (1)LatinSmallLetterLongS u+017f
2022 (2)LatinSmallLetterDotlessI u+0131
2023 (3)LatineCapitalLetterIWithDotAbove u+0130
2024 (4)KelvinSign u+212a
2025 (5)AngstromSign u+212b
2026 */
2027 int flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE;
2028 pattern = Pattern.compile("[sik\u00c5]+", flags);
2029 if (!pattern.matcher("\u017f\u0130\u0131\u212a\u212b").matches())
2030 failCount++;
2031
2032 report("CharClass");
2033 }
2034
2035 private static void caretTest() throws Exception {
2036 Pattern pattern = Pattern.compile("\\w*");
2037 Matcher matcher = pattern.matcher("a#bc#def##g");
2038 check(matcher, "a");
2039 check(matcher, "");
2040 check(matcher, "bc");
2041 check(matcher, "");
2042 check(matcher, "def");
2043 check(matcher, "");
2044 check(matcher, "");
2045 check(matcher, "g");
2046 check(matcher, "");
2047 if (matcher.find())
2048 failCount++;
2049
2050 pattern = Pattern.compile("^\\w*");
2051 matcher = pattern.matcher("a#bc#def##g");
2052 check(matcher, "a");
2053 if (matcher.find())
2054 failCount++;
2055
2056 pattern = Pattern.compile("\\w");
2057 matcher = pattern.matcher("abc##x");
2058 check(matcher, "a");
2059 check(matcher, "b");
2060 check(matcher, "c");
2061 check(matcher, "x");
2062 if (matcher.find())
2063 failCount++;
2064
2065 pattern = Pattern.compile("^\\w");
2066 matcher = pattern.matcher("abc##x");
2067 check(matcher, "a");
2068 if (matcher.find())
2069 failCount++;
2070
2071 pattern = Pattern.compile("\\A\\p{Alpha}{3}");
2072 matcher = pattern.matcher("abcdef-ghi\njklmno");
2073 check(matcher, "abc");
2074 if (matcher.find())
2075 failCount++;
2076
2077 pattern = Pattern.compile("^\\p{Alpha}{3}", Pattern.MULTILINE);
2078 matcher = pattern.matcher("abcdef-ghi\njklmno");
2079 check(matcher, "abc");
2080 check(matcher, "jkl");
2081 if (matcher.find())
2082 failCount++;
2083
2084 pattern = Pattern.compile("^", Pattern.MULTILINE);
2085 matcher = pattern.matcher("this is some text");
2086 String result = matcher.replaceAll("X");
2087 if (!result.equals("Xthis is some text"))
2088 failCount++;
2089
2090 pattern = Pattern.compile("^");
2091 matcher = pattern.matcher("this is some text");
2092 result = matcher.replaceAll("X");
2093 if (!result.equals("Xthis is some text"))
2094 failCount++;
2095
2096 pattern = Pattern.compile("^", Pattern.MULTILINE | Pattern.UNIX_LINES);
2097 matcher = pattern.matcher("this is some text\n");
2098 result = matcher.replaceAll("X");
2099 if (!result.equals("Xthis is some text\n"))
2100 failCount++;
2101
2102 report("Caret");
2103 }
2104
2105 private static void groupCaptureTest() throws Exception {
2106 // Independent group
2107 Pattern pattern = Pattern.compile("x+(?>y+)z+");
2108 Matcher matcher = pattern.matcher("xxxyyyzzz");
2109 matcher.find();
2110 try {
2111 String blah = matcher.group(1);
2112 failCount++;
2113 } catch (IndexOutOfBoundsException ioobe) {
2114 // Good result
2115 }
2116 // Pure group
2117 pattern = Pattern.compile("x+(?:y+)z+");
2118 matcher = pattern.matcher("xxxyyyzzz");
2119 matcher.find();
2120 try {
2121 String blah = matcher.group(1);
2122 failCount++;
2123 } catch (IndexOutOfBoundsException ioobe) {
2124 // Good result
2125 }
2126
2127 // Supplementary character tests
2128 // Independent group
2129 pattern = Pattern.compile(toSupplementaries("x+(?>y+)z+"));
2130 matcher = pattern.matcher(toSupplementaries("xxxyyyzzz"));
2131 matcher.find();
2132 try {
2133 String blah = matcher.group(1);
2134 failCount++;
2135 } catch (IndexOutOfBoundsException ioobe) {
2136 // Good result
2137 }
2138 // Pure group
2139 pattern = Pattern.compile(toSupplementaries("x+(?:y+)z+"));
2140 matcher = pattern.matcher(toSupplementaries("xxxyyyzzz"));
2141 matcher.find();
2142 try {
2143 String blah = matcher.group(1);
2144 failCount++;
2145 } catch (IndexOutOfBoundsException ioobe) {
2146 // Good result
2147 }
2148
2149 report("GroupCapture");
2150 }
2151
2152 private static void backRefTest() throws Exception {
2153 Pattern pattern = Pattern.compile("(a*)bc\\1");
2154 check(pattern, "zzzaabcazzz", true);
2155
2156 pattern = Pattern.compile("(a*)bc\\1");
2157 check(pattern, "zzzaabcaazzz", true);
2158
2159 pattern = Pattern.compile("(abc)(def)\\1");
2160 check(pattern, "abcdefabc", true);
2161
2162 pattern = Pattern.compile("(abc)(def)\\3");
2163 check(pattern, "abcdefabc", false);
2164
2165 try {
2166 for (int i = 1; i < 10; i++) {
2167 // Make sure backref 1-9 are always accepted
2168 pattern = Pattern.compile("abcdef\\" + i);
2169 // and fail to match if the target group does not exit
2170 check(pattern, "abcdef", false);
2171 }
2172 } catch(PatternSyntaxException e) {
2173 failCount++;
2174 }
2175
2176 pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11");
2177 check(pattern, "abcdefghija", false);
2178 check(pattern, "abcdefghija1", true);
2179
2180 pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11");
2181 check(pattern, "abcdefghijkk", true);
2182
2183 pattern = Pattern.compile("(a)bcdefghij\\11");
2184 check(pattern, "abcdefghija1", true);
2185
2186 // Supplementary character tests
2187 pattern = Pattern.compile(toSupplementaries("(a*)bc\\1"));
2188 check(pattern, toSupplementaries("zzzaabcazzz"), true);
2189
2190 pattern = Pattern.compile(toSupplementaries("(a*)bc\\1"));
2191 check(pattern, toSupplementaries("zzzaabcaazzz"), true);
2192
2193 pattern = Pattern.compile(toSupplementaries("(abc)(def)\\1"));
2194 check(pattern, toSupplementaries("abcdefabc"), true);
2195
2196 pattern = Pattern.compile(toSupplementaries("(abc)(def)\\3"));
2197 check(pattern, toSupplementaries("abcdefabc"), false);
2198
2199 pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11"));
2200 check(pattern, toSupplementaries("abcdefghija"), false);
2201 check(pattern, toSupplementaries("abcdefghija1"), true);
2202
2203 pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11"));
2204 check(pattern, toSupplementaries("abcdefghijkk"), true);
2205
2206 report("BackRef");
2207 }
2208
2209 /**
2210 * Unicode Technical Report #18, section 2.6 End of Line
2211 * There is no empty line to be matched in the sequence \u000D\u000A
2212 * but there is an empty line in the sequence \u000A\u000D.
2213 */
2214 private static void anchorTest() throws Exception {
2215 Pattern p = Pattern.compile("^.*$", Pattern.MULTILINE);
2216 Matcher m = p.matcher("blah1\r\nblah2");
2217 m.find();
2218 m.find();
2219 if (!m.group().equals("blah2"))
2220 failCount++;
2221
2222 m.reset("blah1\n\rblah2");
2223 m.find();
2224 m.find();
2225 m.find();
2226 if (!m.group().equals("blah2"))
2227 failCount++;
2228
2229 // Test behavior of $ with \r\n at end of input
2230 p = Pattern.compile(".+$");
2231 m = p.matcher("blah1\r\n");
2232 if (!m.find())
2233 failCount++;
2234 if (!m.group().equals("blah1"))
2235 failCount++;
2236 if (m.find())
2237 failCount++;
2238
2239 // Test behavior of $ with \r\n at end of input in multiline
2240 p = Pattern.compile(".+$", Pattern.MULTILINE);
2241 m = p.matcher("blah1\r\n");
2242 if (!m.find())
2243 failCount++;
2244 if (m.find())
2245 failCount++;
2246
2247 // Test for $ recognition of \u0085 for bug 4527731
2248 p = Pattern.compile(".+$", Pattern.MULTILINE);
2249 m = p.matcher("blah1\u0085");
2250 if (!m.find())
2251 failCount++;
2252
2253 // Supplementary character test
2254 p = Pattern.compile("^.*$", Pattern.MULTILINE);
2255 m = p.matcher(toSupplementaries("blah1\r\nblah2"));
2256 m.find();
2257 m.find();
2258 if (!m.group().equals(toSupplementaries("blah2")))
2259 failCount++;
2260
2261 m.reset(toSupplementaries("blah1\n\rblah2"));
2262 m.find();
2263 m.find();
2264 m.find();
2265 if (!m.group().equals(toSupplementaries("blah2")))
2266 failCount++;
2267
2268 // Test behavior of $ with \r\n at end of input
2269 p = Pattern.compile(".+$");
2270 m = p.matcher(toSupplementaries("blah1\r\n"));
2271 if (!m.find())
2272 failCount++;
2273 if (!m.group().equals(toSupplementaries("blah1")))
2274 failCount++;
2275 if (m.find())
2276 failCount++;
2277
2278 // Test behavior of $ with \r\n at end of input in multiline
2279 p = Pattern.compile(".+$", Pattern.MULTILINE);
2280 m = p.matcher(toSupplementaries("blah1\r\n"));
2281 if (!m.find())
2282 failCount++;
2283 if (m.find())
2284 failCount++;
2285
2286 // Test for $ recognition of \u0085 for bug 4527731
2287 p = Pattern.compile(".+$", Pattern.MULTILINE);
2288 m = p.matcher(toSupplementaries("blah1\u0085"));
2289 if (!m.find())
2290 failCount++;
2291
2292 report("Anchors");
2293 }
2294
2295 /**
2296 * A basic sanity test of Matcher.lookingAt().
2297 */
2298 private static void lookingAtTest() throws Exception {
2299 Pattern p = Pattern.compile("(ab)(c*)");
2300 Matcher m = p.matcher("abccczzzabcczzzabccc");
2301
2302 if (!m.lookingAt())
2303 failCount++;
2304
2305 if (!m.group().equals(m.group(0)))
2306 failCount++;
2307
2308 m = p.matcher("zzzabccczzzabcczzzabccczzz");
2309 if (m.lookingAt())
2310 failCount++;
2311
2312 // Supplementary character test
2313 p = Pattern.compile(toSupplementaries("(ab)(c*)"));
2314 m = p.matcher(toSupplementaries("abccczzzabcczzzabccc"));
2315
2316 if (!m.lookingAt())
2317 failCount++;
2318
2319 if (!m.group().equals(m.group(0)))
2320 failCount++;
2321
2322 m = p.matcher(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
2323 if (m.lookingAt())
2324 failCount++;
2325
2326 report("Looking At");
2327 }
2328
2329 /**
2330 * A basic sanity test of Matcher.matches().
2331 */
2332 private static void matchesTest() throws Exception {
2333 // matches()
2334 Pattern p = Pattern.compile("ulb(c*)");
2335 Matcher m = p.matcher("ulbcccccc");
2336 if (!m.matches())
2337 failCount++;
2338
2339 // find() but not matches()
2340 m.reset("zzzulbcccccc");
2341 if (m.matches())
2342 failCount++;
2343
2344 // lookingAt() but not matches()
2345 m.reset("ulbccccccdef");
2346 if (m.matches())
2347 failCount++;
2348
2349 // matches()
2350 p = Pattern.compile("a|ad");
2351 m = p.matcher("ad");
2352 if (!m.matches())
2353 failCount++;
2354
2355 // Supplementary character test
2356 // matches()
2357 p = Pattern.compile(toSupplementaries("ulb(c*)"));
2358 m = p.matcher(toSupplementaries("ulbcccccc"));
2359 if (!m.matches())
2360 failCount++;
2361
2362 // find() but not matches()
2363 m.reset(toSupplementaries("zzzulbcccccc"));
2364 if (m.matches())
2365 failCount++;
2366
2367 // lookingAt() but not matches()
2368 m.reset(toSupplementaries("ulbccccccdef"));
2369 if (m.matches())
2370 failCount++;
2371
2372 // matches()
2373 p = Pattern.compile(toSupplementaries("a|ad"));
2374 m = p.matcher(toSupplementaries("ad"));
2375 if (!m.matches())
2376 failCount++;
2377
2378 report("Matches");
2379 }
2380
2381 /**
2382 * A basic sanity test of Pattern.matches().
2383 */
2384 private static void patternMatchesTest() throws Exception {
2385 // matches()
2386 if (!Pattern.matches(toSupplementaries("ulb(c*)"),
2387 toSupplementaries("ulbcccccc")))
2388 failCount++;
2389
2390 // find() but not matches()
2391 if (Pattern.matches(toSupplementaries("ulb(c*)"),
2392 toSupplementaries("zzzulbcccccc")))
2393 failCount++;
2394
2395 // lookingAt() but not matches()
2396 if (Pattern.matches(toSupplementaries("ulb(c*)"),
2397 toSupplementaries("ulbccccccdef")))
2398 failCount++;
2399
2400 // Supplementary character test
2401 // matches()
2402 if (!Pattern.matches(toSupplementaries("ulb(c*)"),
2403 toSupplementaries("ulbcccccc")))
2404 failCount++;
2405
2406 // find() but not matches()
2407 if (Pattern.matches(toSupplementaries("ulb(c*)"),
2408 toSupplementaries("zzzulbcccccc")))
2409 failCount++;
2410
2411 // lookingAt() but not matches()
2412 if (Pattern.matches(toSupplementaries("ulb(c*)"),
2413 toSupplementaries("ulbccccccdef")))
2414 failCount++;
2415
2416 report("Pattern Matches");
2417 }
2418
2419 /**
2420 * Canonical equivalence testing. Tests the ability of the engine
2421 * to match sequences that are not explicitly specified in the
2422 * pattern when they are considered equivalent by the Unicode Standard.
2423 */
2424 private static void ceTest() throws Exception {
2425 // Decomposed char outside char classes
2426 Pattern p = Pattern.compile("testa\u030a", Pattern.CANON_EQ);
2427 Matcher m = p.matcher("test\u00e5");
2428 if (!m.matches())
2429 failCount++;
2430
2431 m.reset("testa\u030a");
2432 if (!m.matches())
2433 failCount++;
2434
2435 // Composed char outside char classes
2436 p = Pattern.compile("test\u00e5", Pattern.CANON_EQ);
2437 m = p.matcher("test\u00e5");
2438 if (!m.matches())
2439 failCount++;
2440
2441 m.reset("testa\u030a");
2442 if (!m.find())
2443 failCount++;
2444
2445 // Decomposed char inside a char class
2446 p = Pattern.compile("test[abca\u030a]", Pattern.CANON_EQ);
2447 m = p.matcher("test\u00e5");
2448 if (!m.find())
2449 failCount++;
2450
2451 m.reset("testa\u030a");
2452 if (!m.find())
2453 failCount++;
2454
2455 // Composed char inside a char class
2456 p = Pattern.compile("test[abc\u00e5def\u00e0]", Pattern.CANON_EQ);
2457 m = p.matcher("test\u00e5");
2458 if (!m.find())
2459 failCount++;
2460
2461 m.reset("testa\u0300");
2462 if (!m.find())
2463 failCount++;
2464
2465 m.reset("testa\u030a");
2466 if (!m.find())
2467 failCount++;
2468
2469 // Marks that cannot legally change order and be equivalent
2470 p = Pattern.compile("testa\u0308\u0300", Pattern.CANON_EQ);
2471 check(p, "testa\u0308\u0300", true);
2472 check(p, "testa\u0300\u0308", false);
2473
2474 // Marks that can legally change order and be equivalent
2475 p = Pattern.compile("testa\u0308\u0323", Pattern.CANON_EQ);
2476 check(p, "testa\u0308\u0323", true);
2477 check(p, "testa\u0323\u0308", true);
2478
2479 // Test all equivalences of the sequence a\u0308\u0323\u0300
2480 p = Pattern.compile("testa\u0308\u0323\u0300", Pattern.CANON_EQ);
2481 check(p, "testa\u0308\u0323\u0300", true);
2482 check(p, "testa\u0323\u0308\u0300", true);
2483 check(p, "testa\u0308\u0300\u0323", true);
2484 check(p, "test\u00e4\u0323\u0300", true);
2485 check(p, "test\u00e4\u0300\u0323", true);
2486
2487 /*
2488 * The following canonical equivalence tests don't work. Bug id: 4916384.
2489 *
2490 // Decomposed hangul (jamos)
2491 p = Pattern.compile("\u1100\u1161", Pattern.CANON_EQ);
2492 m = p.matcher("\u1100\u1161");
2493 if (!m.matches())
2494 failCount++;
2495
2496 m.reset("\uac00");
2497 if (!m.matches())
2498 failCount++;
2499
2500 // Composed hangul
2501 p = Pattern.compile("\uac00", Pattern.CANON_EQ);
2502 m = p.matcher("\u1100\u1161");
2503 if (!m.matches())
2504 failCount++;
2505
2506 m.reset("\uac00");
2507 if (!m.matches())
2508 failCount++;
2509
2510 // Decomposed supplementary outside char classes
2511 p = Pattern.compile("test\ud834\uddbc\ud834\udd6f", Pattern.CANON_EQ);
2512 m = p.matcher("test\ud834\uddc0");
2513 if (!m.matches())
2514 failCount++;
2515
2516 m.reset("test\ud834\uddbc\ud834\udd6f");
2517 if (!m.matches())
2518 failCount++;
2519
2520 // Composed supplementary outside char classes
2521 p = Pattern.compile("test\ud834\uddc0", Pattern.CANON_EQ);
2522 m.reset("test\ud834\uddbc\ud834\udd6f");
2523 if (!m.matches())
2524 failCount++;
2525
2526 m = p.matcher("test\ud834\uddc0");
2527 if (!m.matches())
2528 failCount++;
2529
2530 */
2531
2532 report("Canonical Equivalence");
2533 }
2534
2535 /**
2536 * A basic sanity test of Matcher.replaceAll().
2537 */
2538 private static void globalSubstitute() throws Exception {
2539 // Global substitution with a literal
2540 Pattern p = Pattern.compile("(ab)(c*)");
2541 Matcher m = p.matcher("abccczzzabcczzzabccc");
2542 if (!m.replaceAll("test").equals("testzzztestzzztest"))
2543 failCount++;
2544
2545 m.reset("zzzabccczzzabcczzzabccczzz");
2546 if (!m.replaceAll("test").equals("zzztestzzztestzzztestzzz"))
2547 failCount++;
2548
2549 // Global substitution with groups
2550 m.reset("zzzabccczzzabcczzzabccczzz");
2551 String result = m.replaceAll("$1");
2552 if (!result.equals("zzzabzzzabzzzabzzz"))
2553 failCount++;
2554
2555 // Supplementary character test
2556 // Global substitution with a literal
2557 p = Pattern.compile(toSupplementaries("(ab)(c*)"));
2558 m = p.matcher(toSupplementaries("abccczzzabcczzzabccc"));
2559 if (!m.replaceAll(toSupplementaries("test")).
2560 equals(toSupplementaries("testzzztestzzztest")))
2561 failCount++;
2562
2563 m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
2564 if (!m.replaceAll(toSupplementaries("test")).
2565 equals(toSupplementaries("zzztestzzztestzzztestzzz")))
2566 failCount++;
2567
2568 // Global substitution with groups
2569 m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
2570 result = m.replaceAll("$1");
2571 if (!result.equals(toSupplementaries("zzzabzzzabzzzabzzz")))
2572 failCount++;
2573
2574 report("Global Substitution");
2575 }
2576
2577 /**
2578 * Tests the usage of Matcher.appendReplacement() with literal
2579 * and group substitutions.
2580 */
2581 private static void stringbufferSubstitute() throws Exception {
2582 // SB substitution with literal
2583 String blah = "zzzblahzzz";
2584 Pattern p = Pattern.compile("blah");
2585 Matcher m = p.matcher(blah);
2586 StringBuffer result = new StringBuffer();
2587 try {
2588 m.appendReplacement(result, "blech");
2589 failCount++;
2590 } catch (IllegalStateException e) {
2591 }
2592 m.find();
2593 m.appendReplacement(result, "blech");
2594 if (!result.toString().equals("zzzblech"))
2595 failCount++;
2596
2597 m.appendTail(result);
2598 if (!result.toString().equals("zzzblechzzz"))
2599 failCount++;
2600
2601 // SB substitution with groups
2602 blah = "zzzabcdzzz";
2603 p = Pattern.compile("(ab)(cd)*");
2604 m = p.matcher(blah);
2605 result = new StringBuffer();
2606 try {
2607 m.appendReplacement(result, "$1");
2608 failCount++;
2609 } catch (IllegalStateException e) {
2610 }
2611 m.find();
2612 m.appendReplacement(result, "$1");
2613 if (!result.toString().equals("zzzab"))
2614 failCount++;
2615
2616 m.appendTail(result);
2617 if (!result.toString().equals("zzzabzzz"))
2618 failCount++;
2619
2620 // SB substitution with 3 groups
2621 blah = "zzzabcdcdefzzz";
2622 p = Pattern.compile("(ab)(cd)*(ef)");
2623 m = p.matcher(blah);
2624 result = new StringBuffer();
2625 try {
2626 m.appendReplacement(result, "$1w$2w$3");
2627 failCount++;
2628 } catch (IllegalStateException e) {
2629 }
2630 m.find();
2631 m.appendReplacement(result, "$1w$2w$3");
2632 if (!result.toString().equals("zzzabwcdwef"))
2633 failCount++;
2634
2635 m.appendTail(result);
2636 if (!result.toString().equals("zzzabwcdwefzzz"))
2637 failCount++;
2638
2639 // SB substitution with groups and three matches
2640 // skipping middle match
2641 blah = "zzzabcdzzzabcddzzzabcdzzz";
2642 p = Pattern.compile("(ab)(cd*)");
2643 m = p.matcher(blah);
2644 result = new StringBuffer();
2645 try {
2646 m.appendReplacement(result, "$1");
2647 failCount++;
2648 } catch (IllegalStateException e) {
2649 }
2650 m.find();
2651 m.appendReplacement(result, "$1");
2652 if (!result.toString().equals("zzzab"))
2653 failCount++;
2654
2655 m.find();
2656 m.find();
2657 m.appendReplacement(result, "$2");
2658 if (!result.toString().equals("zzzabzzzabcddzzzcd"))
2659 failCount++;
2660
2661 m.appendTail(result);
2662 if (!result.toString().equals("zzzabzzzabcddzzzcdzzz"))
2663 failCount++;
2664
2665 // Check to make sure escaped $ is ignored
2666 blah = "zzzabcdcdefzzz";
2667 p = Pattern.compile("(ab)(cd)*(ef)");
2668 m = p.matcher(blah);
2669 result = new StringBuffer();
2670 m.find();
2671 m.appendReplacement(result, "$1w\\$2w$3");
2672 if (!result.toString().equals("zzzabw$2wef"))
2673 failCount++;
2674
2675 m.appendTail(result);
2676 if (!result.toString().equals("zzzabw$2wefzzz"))
2677 failCount++;
2678
2679 // Check to make sure a reference to nonexistent group causes error
2680 blah = "zzzabcdcdefzzz";
2681 p = Pattern.compile("(ab)(cd)*(ef)");
2682 m = p.matcher(blah);
2683 result = new StringBuffer();
2684 m.find();
2685 try {
2686 m.appendReplacement(result, "$1w$5w$3");
2687 failCount++;
2688 } catch (IndexOutOfBoundsException ioobe) {
2689 // Correct result
2690 }
2691
2692 // Check double digit group references
2693 blah = "zzz123456789101112zzz";
2694 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)");
2695 m = p.matcher(blah);
2696 result = new StringBuffer();
2697 m.find();
2698 m.appendReplacement(result, "$1w$11w$3");
2699 if (!result.toString().equals("zzz1w11w3"))
2700 failCount++;
2701
2702 // Check to make sure it backs off $15 to $1 if only three groups
2703 blah = "zzzabcdcdefzzz";
2704 p = Pattern.compile("(ab)(cd)*(ef)");
2705 m = p.matcher(blah);
2706 result = new StringBuffer();
2707 m.find();
2708 m.appendReplacement(result, "$1w$15w$3");
2709 if (!result.toString().equals("zzzabwab5wef"))
2710 failCount++;
2711
2712
2713 // Supplementary character test
2714 // SB substitution with literal
2715 blah = toSupplementaries("zzzblahzzz");
2716 p = Pattern.compile(toSupplementaries("blah"));
2717 m = p.matcher(blah);
2718 result = new StringBuffer();
2719 try {
2720 m.appendReplacement(result, toSupplementaries("blech"));
2721 failCount++;
2722 } catch (IllegalStateException e) {
2723 }
2724 m.find();
2725 m.appendReplacement(result, toSupplementaries("blech"));
2726 if (!result.toString().equals(toSupplementaries("zzzblech")))
2727 failCount++;
2728
2729 m.appendTail(result);
2730 if (!result.toString().equals(toSupplementaries("zzzblechzzz")))
2731 failCount++;
2732
2733 // SB substitution with groups
2734 blah = toSupplementaries("zzzabcdzzz");
2735 p = Pattern.compile(toSupplementaries("(ab)(cd)*"));
2736 m = p.matcher(blah);
2737 result = new StringBuffer();
2738 try {
2739 m.appendReplacement(result, "$1");
2740 failCount++;
2741 } catch (IllegalStateException e) {
2742 }
2743 m.find();
2744 m.appendReplacement(result, "$1");
2745 if (!result.toString().equals(toSupplementaries("zzzab")))
2746 failCount++;
2747
2748 m.appendTail(result);
2749 if (!result.toString().equals(toSupplementaries("zzzabzzz")))
2750 failCount++;
2751
2752 // SB substitution with 3 groups
2753 blah = toSupplementaries("zzzabcdcdefzzz");
2754 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
2755 m = p.matcher(blah);
2756 result = new StringBuffer();
2757 try {
2758 m.appendReplacement(result, toSupplementaries("$1w$2w$3"));
2759 failCount++;
2760 } catch (IllegalStateException e) {
2761 }
2762 m.find();
2763 m.appendReplacement(result, toSupplementaries("$1w$2w$3"));
2764 if (!result.toString().equals(toSupplementaries("zzzabwcdwef")))
2765 failCount++;
2766
2767 m.appendTail(result);
2768 if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz")))
2769 failCount++;
2770
2771 // SB substitution with groups and three matches
2772 // skipping middle match
2773 blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz");
2774 p = Pattern.compile(toSupplementaries("(ab)(cd*)"));
2775 m = p.matcher(blah);
2776 result = new StringBuffer();
2777 try {
2778 m.appendReplacement(result, "$1");
2779 failCount++;
2780 } catch (IllegalStateException e) {
2781 }
2782 m.find();
2783 m.appendReplacement(result, "$1");
2784 if (!result.toString().equals(toSupplementaries("zzzab")))
2785 failCount++;
2786
2787 m.find();
2788 m.find();
2789 m.appendReplacement(result, "$2");
2790 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd")))
2791 failCount++;
2792
2793 m.appendTail(result);
2794 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz")))
2795 failCount++;
2796
2797 // Check to make sure escaped $ is ignored
2798 blah = toSupplementaries("zzzabcdcdefzzz");
2799 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
2800 m = p.matcher(blah);
2801 result = new StringBuffer();
2802 m.find();
2803 m.appendReplacement(result, toSupplementaries("$1w\\$2w$3"));
2804 if (!result.toString().equals(toSupplementaries("zzzabw$2wef")))
2805 failCount++;
2806
2807 m.appendTail(result);
2808 if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz")))
2809 failCount++;
2810
2811 // Check to make sure a reference to nonexistent group causes error
2812 blah = toSupplementaries("zzzabcdcdefzzz");
2813 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
2814 m = p.matcher(blah);
2815 result = new StringBuffer();
2816 m.find();
2817 try {
2818 m.appendReplacement(result, toSupplementaries("$1w$5w$3"));
2819 failCount++;
2820 } catch (IndexOutOfBoundsException ioobe) {
2821 // Correct result
2822 }
2823
2824 // Check double digit group references
2825 blah = toSupplementaries("zzz123456789101112zzz");
2826 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)");
2827 m = p.matcher(blah);
2828 result = new StringBuffer();
2829 m.find();
2830 m.appendReplacement(result, toSupplementaries("$1w$11w$3"));
2831 if (!result.toString().equals(toSupplementaries("zzz1w11w3")))
2832 failCount++;
2833
2834 // Check to make sure it backs off $15 to $1 if only three groups
2835 blah = toSupplementaries("zzzabcdcdefzzz");
2836 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
2837 m = p.matcher(blah);
2838 result = new StringBuffer();
2839 m.find();
2840 m.appendReplacement(result, toSupplementaries("$1w$15w$3"));
2841 if (!result.toString().equals(toSupplementaries("zzzabwab5wef")))
2842 failCount++;
2843
2844 // Check nothing has been appended into the output buffer if
2845 // the replacement string triggers IllegalArgumentException.
2846 p = Pattern.compile("(abc)");
2847 m = p.matcher("abcd");
2848 result = new StringBuffer();
2849 m.find();
2850 try {
2851 m.appendReplacement(result, ("xyz$g"));
2852 failCount++;
2853 } catch (IllegalArgumentException iae) {
2854 if (result.length() != 0)
2855 failCount++;
2856 }
2857
2858 report("SB Substitution");
2859 }
2860
2861 /*
2862 * 5 groups of characters are created to make a substitution string.
2863 * A base string will be created including random lead chars, the
2864 * substitution string, and random trailing chars.
2865 * A pattern containing the 5 groups is searched for and replaced with:
2866 * random group + random string + random group.
2867 * The results are checked for correctness.
2868 */
2869 private static void substitutionBasher() {
2870 for (int runs = 0; runs<1000; runs++) {
2871 // Create a base string to work in
2872 int leadingChars = generator.nextInt(10);
2873 StringBuffer baseBuffer = new StringBuffer(100);
2874 String leadingString = getRandomAlphaString(leadingChars);
2875 baseBuffer.append(leadingString);
2876
2877 // Create 5 groups of random number of random chars
2878 // Create the string to substitute
2879 // Create the pattern string to search for
2880 StringBuffer bufferToSub = new StringBuffer(25);
2881 StringBuffer bufferToPat = new StringBuffer(50);
2882 String[] groups = new String[5];
2883 for(int i=0; i<5; i++) {
2884 int aGroupSize = generator.nextInt(5)+1;
2885 groups[i] = getRandomAlphaString(aGroupSize);
2886 bufferToSub.append(groups[i]);
2887 bufferToPat.append('(');
2888 bufferToPat.append(groups[i]);
2889 bufferToPat.append(')');
2890 }
2891 String stringToSub = bufferToSub.toString();
2892 String pattern = bufferToPat.toString();
2893
2894 // Place sub string into working string at random index
2895 baseBuffer.append(stringToSub);
2896
2897 // Append random chars to end
2898 int trailingChars = generator.nextInt(10);
2899 String trailingString = getRandomAlphaString(trailingChars);
2900 baseBuffer.append(trailingString);
2901 String baseString = baseBuffer.toString();
2902
2903 // Create test pattern and matcher
2904 Pattern p = Pattern.compile(pattern);
2905 Matcher m = p.matcher(baseString);
2906
2907 // Reject candidate if pattern happens to start early
2908 m.find();
2909 if (m.start() < leadingChars)
2910 continue;
2911
2912 // Reject candidate if more than one match
2913 if (m.find())
2914 continue;
2915
2916 // Construct a replacement string with :
2917 // random group + random string + random group
2918 StringBuffer bufferToRep = new StringBuffer();
2919 int groupIndex1 = generator.nextInt(5);
2920 bufferToRep.append("$" + (groupIndex1 + 1));
2921 String randomMidString = getRandomAlphaString(5);
2922 bufferToRep.append(randomMidString);
2923 int groupIndex2 = generator.nextInt(5);
2924 bufferToRep.append("$" + (groupIndex2 + 1));
2925 String replacement = bufferToRep.toString();
2926
2927 // Do the replacement
2928 String result = m.replaceAll(replacement);
2929
2930 // Construct expected result
2931 StringBuffer bufferToRes = new StringBuffer();
2932 bufferToRes.append(leadingString);
2933 bufferToRes.append(groups[groupIndex1]);
2934 bufferToRes.append(randomMidString);
2935 bufferToRes.append(groups[groupIndex2]);
2936 bufferToRes.append(trailingString);
2937 String expectedResult = bufferToRes.toString();
2938
2939 // Check results
2940 if (!result.equals(expectedResult))
2941 failCount++;
2942 }
2943
2944 report("Substitution Basher");
2945 }
2946
2947 /**
2948 * Checks the handling of some escape sequences that the Pattern
2949 * class should process instead of the java compiler. These are
2950 * not in the file because the escapes should be be processed
2951 * by the Pattern class when the regex is compiled.
2952 */
2953 private static void escapes() throws Exception {
2954 Pattern p = Pattern.compile("\\043");
2955 Matcher m = p.matcher("#");
2956 if (!m.find())
2957 failCount++;
2958
2959 p = Pattern.compile("\\x23");
2960 m = p.matcher("#");
2961 if (!m.find())
2962 failCount++;
2963
2964 p = Pattern.compile("\\u0023");
2965 m = p.matcher("#");
2966 if (!m.find())
2967 failCount++;
2968
2969 report("Escape sequences");
2970 }
2971
2972 /**
2973 * Checks the handling of blank input situations. These
2974 * tests are incompatible with my test file format.
2975 */
2976 private static void blankInput() throws Exception {
2977 Pattern p = Pattern.compile("abc", Pattern.CASE_INSENSITIVE);
2978 Matcher m = p.matcher("");
2979 if (m.find())
2980 failCount++;
2981
2982 p = Pattern.compile("a*", Pattern.CASE_INSENSITIVE);
2983 m = p.matcher("");
2984 if (!m.find())
2985 failCount++;
2986
2987 p = Pattern.compile("abc");
2988 m = p.matcher("");
2989 if (m.find())
2990 failCount++;
2991
2992 p = Pattern.compile("a*");
2993 m = p.matcher("");
2994 if (!m.find())
2995 failCount++;
2996
2997 report("Blank input");
2998 }
2999
3000 /**
3001 * Tests the Boyer-Moore pattern matching of a character sequence
3002 * on randomly generated patterns.
3003 */
3004 private static void bm() throws Exception {
3005 doBnM('a');
3006 report("Boyer Moore (ASCII)");
3007
3008 doBnM(Character.MIN_SUPPLEMENTARY_CODE_POINT - 10);
3009 report("Boyer Moore (Supplementary)");
3010 }
3011
3012 private static void doBnM(int baseCharacter) throws Exception {
3013 int achar=0;
3014
3015 for (int i=0; i<100; i++) {
3016 // Create a short pattern to search for
3017 int patternLength = generator.nextInt(7) + 4;
3018 StringBuffer patternBuffer = new StringBuffer(patternLength);
3019 for (int x=0; x<patternLength; x++) {
3020 int ch = baseCharacter + generator.nextInt(26);
3021 if (Character.isSupplementaryCodePoint(ch)) {
3022 patternBuffer.append(Character.toChars(ch));
3023 } else {
3024 patternBuffer.append((char)ch);
3025 }
3026 }
3027 String pattern = patternBuffer.toString();
3028 Pattern p = Pattern.compile(pattern);
3029
3030 // Create a buffer with random ASCII chars that does
3031 // not match the sample
3032 String toSearch = null;
3033 StringBuffer s = null;
3034 Matcher m = p.matcher("");
3035 do {
3036 s = new StringBuffer(100);
3037 for (int x=0; x<100; x++) {
3038 int ch = baseCharacter + generator.nextInt(26);
3039 if (Character.isSupplementaryCodePoint(ch)) {
3040 s.append(Character.toChars(ch));
3041 } else {
3042 s.append((char)ch);
3043 }
3044 }
3045 toSearch = s.toString();
3046 m.reset(toSearch);
3047 } while (m.find());
3048
3049 // Insert the pattern at a random spot
3050 int insertIndex = generator.nextInt(99);
3051 if (Character.isLowSurrogate(s.charAt(insertIndex)))
3052 insertIndex++;
3053 s = s.insert(insertIndex, pattern);
3054 toSearch = s.toString();
3055
3056 // Make sure that the pattern is found
3057 m.reset(toSearch);
3058 if (!m.find())
3059 failCount++;
3060
3061 // Make sure that the match text is the pattern
3062 if (!m.group().equals(pattern))
3063 failCount++;
3064
3065 // Make sure match occured at insertion point
3066 if (m.start() != insertIndex)
3067 failCount++;
3068 }
3069 }
3070
3071 /**
3072 * Tests the matching of slices on randomly generated patterns.
3073 * The Boyer-Moore optimization is not done on these patterns
3074 * because it uses unicode case folding.
3075 */
3076 private static void slice() throws Exception {
3077 doSlice(Character.MAX_VALUE);
3078 report("Slice");
3079
3080 doSlice(Character.MAX_CODE_POINT);
3081 report("Slice (Supplementary)");
3082 }
3083
3084 private static void doSlice(int maxCharacter) throws Exception {
3085 Random generator = new Random();
3086 int achar=0;
3087
3088 for (int i=0; i<100; i++) {
3089 // Create a short pattern to search for
3090 int patternLength = generator.nextInt(7) + 4;
3091 StringBuffer patternBuffer = new StringBuffer(patternLength);
3092 for (int x=0; x<patternLength; x++) {
3093 int randomChar = 0;
3094 while (!Character.isLetterOrDigit(randomChar))
3095 randomChar = generator.nextInt(maxCharacter);
3096 if (Character.isSupplementaryCodePoint(randomChar)) {
3097 patternBuffer.append(Character.toChars(randomChar));
3098 } else {
3099 patternBuffer.append((char) randomChar);
3100 }
3101 }
3102 String pattern = patternBuffer.toString();
3103 Pattern p = Pattern.compile(pattern, Pattern.UNICODE_CASE);
3104
3105 // Create a buffer with random chars that does not match the sample
3106 String toSearch = null;
3107 StringBuffer s = null;
3108 Matcher m = p.matcher("");
3109 do {
3110 s = new StringBuffer(100);
3111 for (int x=0; x<100; x++) {
3112 int randomChar = 0;
3113 while (!Character.isLetterOrDigit(randomChar))
3114 randomChar = generator.nextInt(maxCharacter);
3115 if (Character.isSupplementaryCodePoint(randomChar)) {
3116 s.append(Character.toChars(randomChar));
3117 } else {
3118 s.append((char) randomChar);
3119 }
3120 }
3121 toSearch = s.toString();
3122 m.reset(toSearch);
3123 } while (m.find());
3124
3125 // Insert the pattern at a random spot
3126 int insertIndex = generator.nextInt(99);
3127 if (Character.isLowSurrogate(s.charAt(insertIndex)))
3128 insertIndex++;
3129 s = s.insert(insertIndex, pattern);
3130 toSearch = s.toString();
3131
3132 // Make sure that the pattern is found
3133 m.reset(toSearch);
3134 if (!m.find())
3135 failCount++;
3136
3137 // Make sure that the match text is the pattern
3138 if (!m.group().equals(pattern))
3139 failCount++;
3140
3141 // Make sure match occured at insertion point
3142 if (m.start() != insertIndex)
3143 failCount++;
3144 }
3145 }
3146
3147 private static void explainFailure(String pattern, String data,
3148 String expected, String actual) {
3149 System.err.println("----------------------------------------");
3150 System.err.println("Pattern = "+pattern);
3151 System.err.println("Data = "+data);
3152 System.err.println("Expected = " + expected);
3153 System.err.println("Actual = " + actual);
3154 }
3155
3156 private static void explainFailure(String pattern, String data,
3157 Throwable t) {
3158 System.err.println("----------------------------------------");
3159 System.err.println("Pattern = "+pattern);
3160 System.err.println("Data = "+data);
3161 t.printStackTrace(System.err);
3162 }
3163
3164 // Testing examples from a file
3165
3166 /**
3167 * Goes through the file "TestCases.txt" and creates many patterns
3168 * described in the file, matching the patterns against input lines in
3169 * the file, and comparing the results against the correct results
3170 * also found in the file. The file format is described in comments
3171 * at the head of the file.
3172 */
3173 private static void processFile(String fileName) throws Exception {
3174 File testCases = new File(System.getProperty("test.src", "."),
3175 fileName);
3176 FileInputStream in = new FileInputStream(testCases);
3177 BufferedReader r = new BufferedReader(new InputStreamReader(in));
3178
3179 // Process next test case.
3180 String aLine;
3181 while((aLine = r.readLine()) != null) {
3182 // Read a line for pattern
3183 String patternString = grabLine(r);
3184 Pattern p = null;
3185 try {
3186 p = compileTestPattern(patternString);
3187 } catch (PatternSyntaxException e) {
3188 String dataString = grabLine(r);
3189 String expectedResult = grabLine(r);
3190 if (expectedResult.startsWith("error"))
3191 continue;
3192 explainFailure(patternString, dataString, e);
3193 failCount++;
3194 continue;
3195 }
3196
3197 // Read a line for input string
3198 String dataString = grabLine(r);
3199 Matcher m = p.matcher(dataString);
3200 StringBuffer result = new StringBuffer();
3201
3202 // Check for IllegalStateExceptions before a match
3203 failCount += preMatchInvariants(m);
3204
3205 boolean found = m.find();
3206
3207 if (found)
3208 failCount += postTrueMatchInvariants(m);
3209 else
3210 failCount += postFalseMatchInvariants(m);
3211
3212 if (found) {
3213 result.append("true ");
3214 result.append(m.group(0) + " ");
3215 } else {
3216 result.append("false ");
3217 }
3218
3219 result.append(m.groupCount());
3220
3221 if (found) {
3222 for (int i=1; i<m.groupCount()+1; i++)
3223 if (m.group(i) != null)
3224 result.append(" " +m.group(i));
3225 }
3226
3227 // Read a line for the expected result
3228 String expectedResult = grabLine(r);
3229
3230 if (!result.toString().equals(expectedResult)) {
3231 explainFailure(patternString, dataString, expectedResult, result.toString());
3232 failCount++;
3233 }
3234 }
3235
3236 report(fileName);
3237 }
3238
3239 private static int preMatchInvariants(Matcher m) {
3240 int failCount = 0;
3241 try {
3242 m.start();
3243 failCount++;
3244 } catch (IllegalStateException ise) {}
3245 try {
3246 m.end();
3247 failCount++;
3248 } catch (IllegalStateException ise) {}
3249 try {
3250 m.group();
3251 failCount++;
3252 } catch (IllegalStateException ise) {}
3253 return failCount;
3254 }
3255
3256 private static int postFalseMatchInvariants(Matcher m) {
3257 int failCount = 0;
3258 try {
3259 m.group();
3260 failCount++;
3261 } catch (IllegalStateException ise) {}
3262 try {
3263 m.start();
3264 failCount++;
3265 } catch (IllegalStateException ise) {}
3266 try {
3267 m.end();
3268 failCount++;
3269 } catch (IllegalStateException ise) {}
3270 return failCount;
3271 }
3272
3273 private static int postTrueMatchInvariants(Matcher m) {
3274 int failCount = 0;
3275 //assert(m.start() = m.start(0);
3276 if (m.start() != m.start(0))
3277 failCount++;
3278 //assert(m.end() = m.end(0);
3279 if (m.start() != m.start(0))
3280 failCount++;
3281 //assert(m.group() = m.group(0);
3282 if (!m.group().equals(m.group(0)))
3283 failCount++;
3284 try {
3285 m.group(50);
3286 failCount++;
3287 } catch (IndexOutOfBoundsException ise) {}
3288
3289 return failCount;
3290 }
3291
3292 private static Pattern compileTestPattern(String patternString) {
3293 if (!patternString.startsWith("'")) {
3294 return Pattern.compile(patternString);
3295 }
3296
3297 int break1 = patternString.lastIndexOf("'");
3298 String flagString = patternString.substring(
3299 break1+1, patternString.length());
3300 patternString = patternString.substring(1, break1);
3301
3302 if (flagString.equals("i"))
3303 return Pattern.compile(patternString, Pattern.CASE_INSENSITIVE);
3304
3305 if (flagString.equals("m"))
3306 return Pattern.compile(patternString, Pattern.MULTILINE);
3307
3308 return Pattern.compile(patternString);
3309 }
3310
3311 /**
3312 * Reads a line from the input file. Keeps reading lines until a non
3313 * empty non comment line is read. If the line contains a \n then
3314 * these two characters are replaced by a newline char. If a \\uxxxx
3315 * sequence is read then the sequence is replaced by the unicode char.
3316 */
3317 private static String grabLine(BufferedReader r) throws Exception {
3318 int index = 0;
3319 String line = r.readLine();
3320 while (line.startsWith("//") || line.length() < 1)
3321 line = r.readLine();
3322 while ((index = line.indexOf("\\n")) != -1) {
3323 StringBuffer temp = new StringBuffer(line);
3324 temp.replace(index, index+2, "\n");
3325 line = temp.toString();
3326 }
3327 while ((index = line.indexOf("\\u")) != -1) {
3328 StringBuffer temp = new StringBuffer(line);
3329 String value = temp.substring(index+2, index+6);
3330 char aChar = (char)Integer.parseInt(value, 16);
3331 String unicodeChar = "" + aChar;
3332 temp.replace(index, index+6, unicodeChar);
3333 line = temp.toString();
3334 }
3335
3336 return line;
3337 }
3338
3339 private static void check(Pattern p, String s, String g, String expected) {
3340 Matcher m = p.matcher(s);
3341 m.find();
3342 if (!m.group(g).equals(expected))
3343 failCount++;
3344 }
3345
3346 private static void checkReplaceFirst(String p, String s, String r, String expected)
3347 {
3348 if (!expected.equals(Pattern.compile(p)
3349 .matcher(s)
3350 .replaceFirst(r)))
3351 failCount++;
3352 }
3353
3354 private static void checkReplaceAll(String p, String s, String r, String expected)
3355 {
3356 if (!expected.equals(Pattern.compile(p)
3357 .matcher(s)
3358 .replaceAll(r)))
3359 failCount++;
3360 }
3361
3362 private static void checkExpectedFail(String p) {
3363 try {
3364 Pattern.compile(p);
3365 } catch (PatternSyntaxException pse) {
3366 //pse.printStackTrace();
3367 return;
3368 }
3369 failCount++;
3370 }
3371
3372 private static void checkExpectedFail(Matcher m, String g) {
3373 m.find();
3374 try {
3375 m.group(g);
3376 } catch (IllegalArgumentException iae) {
3377 //iae.printStackTrace();
3378 return;
3379 } catch (NullPointerException npe) {
3380 return;
3381 }
3382 failCount++;
3383 }
3384
3385
3386 private static void namedGroupCaptureTest() throws Exception {
3387 check(Pattern.compile("x+(?<gname>y+)z+"),
3388 "xxxyyyzzz",
3389 "gname",
3390 "yyy");
3391
shermand9337e02009-10-21 11:40:40 -07003392 check(Pattern.compile("x+(?<gname8>y+)z+"),
shermanffaf4922009-03-20 16:22:59 -07003393 "xxxyyyzzz",
shermand9337e02009-10-21 11:40:40 -07003394 "gname8",
shermanffaf4922009-03-20 16:22:59 -07003395 "yyy");
3396
sherman0b4d42d2009-02-23 21:06:15 -08003397 //backref
3398 Pattern pattern = Pattern.compile("(a*)bc\\1");
3399 check(pattern, "zzzaabcazzz", true); // found "abca"
3400
3401 check(Pattern.compile("(?<gname>a*)bc\\k<gname>"),
3402 "zzzaabcaazzz", true);
3403
3404 check(Pattern.compile("(?<gname>abc)(def)\\k<gname>"),
3405 "abcdefabc", true);
3406
3407 check(Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(?<gname>k)\\k<gname>"),
3408 "abcdefghijkk", true);
3409
3410 // Supplementary character tests
3411 check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"),
3412 toSupplementaries("zzzaabcazzz"), true);
3413
3414 check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"),
3415 toSupplementaries("zzzaabcaazzz"), true);
3416
3417 check(Pattern.compile("(?<gname>" + toSupplementaries("abc)(def)") + "\\k<gname>"),
3418 toSupplementaries("abcdefabc"), true);
3419
3420 check(Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)") +
3421 "(?<gname>" +
3422 toSupplementaries("k)") + "\\k<gname>"),
3423 toSupplementaries("abcdefghijkk"), true);
3424
3425 check(Pattern.compile("x+(?<gname>y+)z+\\k<gname>"),
3426 "xxxyyyzzzyyy",
3427 "gname",
3428 "yyy");
3429
3430 //replaceFirst/All
3431 checkReplaceFirst("(?<gn>ab)(c*)",
3432 "abccczzzabcczzzabccc",
shermand9337e02009-10-21 11:40:40 -07003433 "${gn}",
sherman0b4d42d2009-02-23 21:06:15 -08003434 "abzzzabcczzzabccc");
3435
3436 checkReplaceAll("(?<gn>ab)(c*)",
3437 "abccczzzabcczzzabccc",
shermand9337e02009-10-21 11:40:40 -07003438 "${gn}",
sherman0b4d42d2009-02-23 21:06:15 -08003439 "abzzzabzzzab");
3440
3441
3442 checkReplaceFirst("(?<gn>ab)(c*)",
3443 "zzzabccczzzabcczzzabccczzz",
shermand9337e02009-10-21 11:40:40 -07003444 "${gn}",
sherman0b4d42d2009-02-23 21:06:15 -08003445 "zzzabzzzabcczzzabccczzz");
3446
3447 checkReplaceAll("(?<gn>ab)(c*)",
3448 "zzzabccczzzabcczzzabccczzz",
shermand9337e02009-10-21 11:40:40 -07003449 "${gn}",
sherman0b4d42d2009-02-23 21:06:15 -08003450 "zzzabzzzabzzzabzzz");
3451
3452 checkReplaceFirst("(?<gn1>ab)(?<gn2>c*)",
3453 "zzzabccczzzabcczzzabccczzz",
shermand9337e02009-10-21 11:40:40 -07003454 "${gn2}",
sherman0b4d42d2009-02-23 21:06:15 -08003455 "zzzccczzzabcczzzabccczzz");
3456
3457 checkReplaceAll("(?<gn1>ab)(?<gn2>c*)",
3458 "zzzabccczzzabcczzzabccczzz",
shermand9337e02009-10-21 11:40:40 -07003459 "${gn2}",
sherman0b4d42d2009-02-23 21:06:15 -08003460 "zzzccczzzcczzzccczzz");
3461
3462 //toSupplementaries("(ab)(c*)"));
3463 checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") +
3464 ")(?<gn2>" + toSupplementaries("c") + "*)",
3465 toSupplementaries("abccczzzabcczzzabccc"),
shermand9337e02009-10-21 11:40:40 -07003466 "${gn1}",
sherman0b4d42d2009-02-23 21:06:15 -08003467 toSupplementaries("abzzzabcczzzabccc"));
3468
3469
3470 checkReplaceAll("(?<gn1>" + toSupplementaries("ab") +
3471 ")(?<gn2>" + toSupplementaries("c") + "*)",
3472 toSupplementaries("abccczzzabcczzzabccc"),
shermand9337e02009-10-21 11:40:40 -07003473 "${gn1}",
sherman0b4d42d2009-02-23 21:06:15 -08003474 toSupplementaries("abzzzabzzzab"));
3475
3476 checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") +
3477 ")(?<gn2>" + toSupplementaries("c") + "*)",
3478 toSupplementaries("abccczzzabcczzzabccc"),
shermand9337e02009-10-21 11:40:40 -07003479 "${gn2}",
sherman0b4d42d2009-02-23 21:06:15 -08003480 toSupplementaries("ccczzzabcczzzabccc"));
3481
3482
3483 checkReplaceAll("(?<gn1>" + toSupplementaries("ab") +
3484 ")(?<gn2>" + toSupplementaries("c") + "*)",
3485 toSupplementaries("abccczzzabcczzzabccc"),
shermand9337e02009-10-21 11:40:40 -07003486 "${gn2}",
sherman0b4d42d2009-02-23 21:06:15 -08003487 toSupplementaries("ccczzzcczzzccc"));
3488
3489 checkReplaceFirst("(?<dog>Dog)AndCat",
3490 "zzzDogAndCatzzzDogAndCatzzz",
shermand9337e02009-10-21 11:40:40 -07003491 "${dog}",
sherman0b4d42d2009-02-23 21:06:15 -08003492 "zzzDogzzzDogAndCatzzz");
3493
3494
3495 checkReplaceAll("(?<dog>Dog)AndCat",
3496 "zzzDogAndCatzzzDogAndCatzzz",
shermand9337e02009-10-21 11:40:40 -07003497 "${dog}",
sherman0b4d42d2009-02-23 21:06:15 -08003498 "zzzDogzzzDogzzz");
3499
3500 // backref in Matcher & String
shermand9337e02009-10-21 11:40:40 -07003501 if (!"abcdefghij".replaceFirst("cd(?<gn>ef)gh", "${gn}").equals("abefij") ||
3502 !"abbbcbdbefgh".replaceAll("(?<gn>[a-e])b", "${gn}").equals("abcdefgh"))
sherman0b4d42d2009-02-23 21:06:15 -08003503 failCount++;
3504
3505 // negative
3506 checkExpectedFail("(?<groupnamehasnoascii.in>abc)(def)");
3507 checkExpectedFail("(?<groupnamehasnoascii_in>abc)(def)");
shermand9337e02009-10-21 11:40:40 -07003508 checkExpectedFail("(?<6groupnamestartswithdigit>abc)(def)");
sherman0b4d42d2009-02-23 21:06:15 -08003509 checkExpectedFail("(?<gname>abc)(def)\\k<gnameX>");
3510 checkExpectedFail("(?<gname>abc)(?<gname>def)\\k<gnameX>");
3511 checkExpectedFail(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"),
3512 "gnameX");
3513 checkExpectedFail(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"),
3514 null);
3515 report("NamedGroupCapture");
3516 }
sherman6782c962010-02-05 00:10:42 -08003517
shermancc01ef52010-05-18 15:36:47 -07003518 // This is for bug 6969132
sherman6782c962010-02-05 00:10:42 -08003519 private static void nonBmpClassComplementTest() throws Exception {
3520 Pattern p = Pattern.compile("\\P{Lu}");
3521 Matcher m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
3522 if (m.find() && m.start() == 1)
3523 failCount++;
3524
3525 // from a unicode category
3526 p = Pattern.compile("\\P{Lu}");
3527 m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
3528 if (m.find())
3529 failCount++;
3530 if (!m.hitEnd())
3531 failCount++;
3532
3533 // block
3534 p = Pattern.compile("\\P{InMathematicalAlphanumericSymbols}");
3535 m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
3536 if (m.find() && m.start() == 1)
3537 failCount++;
3538
3539 report("NonBmpClassComplement");
3540 }
3541
shermancc01ef52010-05-18 15:36:47 -07003542 private static void unicodePropertiesTest() throws Exception {
3543 // different forms
3544 if (!Pattern.compile("\\p{IsLu}").matcher("A").matches() ||
3545 !Pattern.compile("\\p{Lu}").matcher("A").matches() ||
3546 !Pattern.compile("\\p{gc=Lu}").matcher("A").matches() ||
3547 !Pattern.compile("\\p{general_category=Lu}").matcher("A").matches() ||
3548 !Pattern.compile("\\p{IsLatin}").matcher("B").matches() ||
3549 !Pattern.compile("\\p{sc=Latin}").matcher("B").matches() ||
3550 !Pattern.compile("\\p{script=Latin}").matcher("B").matches() ||
3551 !Pattern.compile("\\p{InBasicLatin}").matcher("c").matches() ||
3552 !Pattern.compile("\\p{blk=BasicLatin}").matcher("c").matches() ||
3553 !Pattern.compile("\\p{block=BasicLatin}").matcher("c").matches())
3554 failCount++;
3555
3556 Matcher common = Pattern.compile("\\p{script=Common}").matcher("");
3557 Matcher unknown = Pattern.compile("\\p{IsUnknown}").matcher("");
3558 Matcher lastSM = common;
3559 Character.UnicodeScript lastScript = Character.UnicodeScript.of(0);
3560
3561 Matcher latin = Pattern.compile("\\p{block=basic_latin}").matcher("");
3562 Matcher greek = Pattern.compile("\\p{InGreek}").matcher("");
3563 Matcher lastBM = latin;
3564 Character.UnicodeBlock lastBlock = Character.UnicodeBlock.of(0);
3565
3566 for (int cp = 1; cp < Character.MAX_CODE_POINT; cp++) {
3567 if (cp >= 0x30000 && (cp & 0x70) == 0){
3568 continue; // only pick couple code points, they are the same
3569 }
3570
3571 // Unicode Script
3572 Character.UnicodeScript script = Character.UnicodeScript.of(cp);
3573 Matcher m;
3574 String str = new String(Character.toChars(cp));
3575 if (script == lastScript) {
3576 m = lastSM;
3577 m.reset(str);
3578 } else {
3579 m = Pattern.compile("\\p{Is" + script.name() + "}").matcher(str);
3580 }
3581 if (!m.matches()) {
3582 failCount++;
3583 }
3584 Matcher other = (script == Character.UnicodeScript.COMMON)? unknown : common;
3585 other.reset(str);
3586 if (other.matches()) {
3587 failCount++;
3588 }
3589 lastSM = m;
3590 lastScript = script;
3591
3592 // Unicode Block
3593 Character.UnicodeBlock block = Character.UnicodeBlock.of(cp);
3594 if (block == null) {
3595 //System.out.printf("Not a Block: cp=%x%n", cp);
3596 continue;
3597 }
3598 if (block == lastBlock) {
3599 m = lastBM;
3600 m.reset(str);
3601 } else {
3602 m = Pattern.compile("\\p{block=" + block.toString() + "}").matcher(str);
3603 }
3604 if (!m.matches()) {
3605 failCount++;
3606 }
3607 other = (block == Character.UnicodeBlock.BASIC_LATIN)? greek : latin;
3608 other.reset(str);
3609 if (other.matches()) {
3610 failCount++;
3611 }
3612 lastBM = m;
3613 lastBlock = block;
3614 }
3615 report("unicodeProperties");
3616 }
sherman0b4d42d2009-02-23 21:06:15 -08003617}