blob: e323066c3a277ab14ebb50e884bb20917cfce5d2 [file] [log] [blame]
sherman0b4d42d2009-02-23 21:06:15 -08001/*
ohairf5857212010-12-28 15:53:50 -08002 * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
sherman0b4d42d2009-02-23 21:06:15 -08003 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
ohair2283b9d2010-05-25 15:58:33 -07007 * published by the Free Software Foundation. Oracle designates this
sherman0b4d42d2009-02-23 21:06:15 -08008 * particular file as subject to the "Classpath" exception as provided
ohair2283b9d2010-05-25 15:58:33 -07009 * by Oracle in the LICENSE file that accompanied this code.
sherman0b4d42d2009-02-23 21:06:15 -080010 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
ohair2283b9d2010-05-25 15:58:33 -070021 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
sherman0b4d42d2009-02-23 21:06:15 -080024 */
25
26/**
27 * @test
28 * @summary tests RegExp framework
29 * @author Mike McCloskey
30 * @bug 4481568 4482696 4495089 4504687 4527731 4599621 4631553 4619345
31 * 4630911 4672616 4711773 4727935 4750573 4792284 4803197 4757029 4808962
32 * 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476
33 * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940
34 * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133
shermanf03c78b2011-02-03 13:49:25 -080035 * 6350801 6676425 6878475 6919132 6931676 6948903 7014645
sherman0b4d42d2009-02-23 21:06:15 -080036 */
37
38import java.util.regex.*;
39import java.util.Random;
40import java.io.*;
41import java.util.*;
42import java.nio.CharBuffer;
43
44/**
45 * This is a test class created to check the operation of
46 * the Pattern and Matcher classes.
47 */
48public class RegExTest {
49
50 private static Random generator = new Random();
51 private static boolean failure = false;
52 private static int failCount = 0;
53
54 /**
55 * Main to interpret arguments and run several tests.
56 *
57 */
58 public static void main(String[] args) throws Exception {
59 // Most of the tests are in a file
60 processFile("TestCases.txt");
61 //processFile("PerlCases.txt");
62 processFile("BMPTestCases.txt");
63 processFile("SupplementaryTestCases.txt");
64
65 // These test many randomly generated char patterns
66 bm();
67 slice();
68
69 // These are hard to put into the file
70 escapes();
71 blankInput();
72
73 // Substitition tests on randomly generated sequences
74 globalSubstitute();
75 stringbufferSubstitute();
76 substitutionBasher();
77
78 // Canonical Equivalence
79 ceTest();
80
81 // Anchors
82 anchorTest();
83
84 // boolean match calls
85 matchesTest();
86 lookingAtTest();
87
88 // Pattern API
89 patternMatchesTest();
90
91 // Misc
92 lookbehindTest();
93 nullArgumentTest();
94 backRefTest();
95 groupCaptureTest();
96 caretTest();
97 charClassTest();
98 emptyPatternTest();
99 findIntTest();
100 group0Test();
101 longPatternTest();
102 octalTest();
103 ampersandTest();
104 negationTest();
105 splitTest();
106 appendTest();
107 caseFoldingTest();
108 commentsTest();
109 unixLinesTest();
110 replaceFirstTest();
111 gTest();
112 zTest();
113 serializeTest();
114 reluctantRepetitionTest();
115 multilineDollarTest();
116 dollarAtEndTest();
117 caretBetweenTerminatorsTest();
118 // This RFE rejected in Tiger numOccurrencesTest();
119 javaCharClassTest();
120 nonCaptureRepetitionTest();
121 notCapturedGroupCurlyMatchTest();
122 escapedSegmentTest();
123 literalPatternTest();
124 literalReplacementTest();
125 regionTest();
126 toStringTest();
127 negatedCharClassTest();
128 findFromTest();
129 boundsTest();
130 unicodeWordBoundsTest();
131 caretAtEndTest();
132 wordSearchTest();
133 hitEndTest();
134 toMatchResultTest();
135 surrogatesInClassTest();
136 namedGroupCaptureTest();
sherman6782c962010-02-05 00:10:42 -0800137 nonBmpClassComplementTest();
shermancc01ef52010-05-18 15:36:47 -0700138 unicodePropertiesTest();
shermanf03c78b2011-02-03 13:49:25 -0800139 unicodeHexNotationTest();
sherman0b4d42d2009-02-23 21:06:15 -0800140 if (failure)
141 throw new RuntimeException("Failure in the RE handling.");
142 else
143 System.err.println("OKAY: All tests passed.");
144 }
145
146 // Utility functions
147
148 private static String getRandomAlphaString(int length) {
149 StringBuffer buf = new StringBuffer(length);
150 for (int i=0; i<length; i++) {
151 char randChar = (char)(97 + generator.nextInt(26));
152 buf.append(randChar);
153 }
154 return buf.toString();
155 }
156
157 private static void check(Matcher m, String expected) {
158 m.find();
159 if (!m.group().equals(expected))
160 failCount++;
161 }
162
163 private static void check(Matcher m, String result, boolean expected) {
164 m.find();
shermanf03c78b2011-02-03 13:49:25 -0800165 if (m.group().equals(result) != expected)
166 failCount++;
sherman0b4d42d2009-02-23 21:06:15 -0800167 }
168
169 private static void check(Pattern p, String s, boolean expected) {
shermanf03c78b2011-02-03 13:49:25 -0800170 if (p.matcher(s).find() != expected)
171 failCount++;
172 }
173
174 private static void check(String p, String s, boolean expected) {
175 Matcher matcher = Pattern.compile(p).matcher(s);
176 if (matcher.find() != expected)
177 failCount++;
sherman0b4d42d2009-02-23 21:06:15 -0800178 }
179
180 private static void check(String p, char c, boolean expected) {
181 String propertyPattern = expected ? "\\p" + p : "\\P" + p;
182 Pattern pattern = Pattern.compile(propertyPattern);
183 char[] ca = new char[1]; ca[0] = c;
184 Matcher matcher = pattern.matcher(new String(ca));
185 if (!matcher.find())
186 failCount++;
187 }
188
189 private static void check(String p, int codePoint, boolean expected) {
190 String propertyPattern = expected ? "\\p" + p : "\\P" + p;
191 Pattern pattern = Pattern.compile(propertyPattern);
192 char[] ca = Character.toChars(codePoint);
193 Matcher matcher = pattern.matcher(new String(ca));
194 if (!matcher.find())
195 failCount++;
196 }
197
198 private static void check(String p, int flag, String input, String s,
199 boolean expected)
200 {
201 Pattern pattern = Pattern.compile(p, flag);
202 Matcher matcher = pattern.matcher(input);
203 if (expected)
204 check(matcher, s, expected);
205 else
206 check(pattern, input, false);
207 }
208
209 private static void report(String testName) {
210 int spacesToAdd = 30 - testName.length();
211 StringBuffer paddedNameBuffer = new StringBuffer(testName);
212 for (int i=0; i<spacesToAdd; i++)
213 paddedNameBuffer.append(" ");
214 String paddedName = paddedNameBuffer.toString();
215 System.err.println(paddedName + ": " +
216 (failCount==0 ? "Passed":"Failed("+failCount+")"));
217 if (failCount > 0)
218 failure = true;
219 failCount = 0;
220 }
221
222 /**
223 * Converts ASCII alphabet characters [A-Za-z] in the given 's' to
224 * supplementary characters. This method does NOT fully take care
225 * of the regex syntax.
226 */
227 private static String toSupplementaries(String s) {
228 int length = s.length();
229 StringBuffer sb = new StringBuffer(length * 2);
230
231 for (int i = 0; i < length; ) {
232 char c = s.charAt(i++);
233 if (c == '\\') {
234 sb.append(c);
235 if (i < length) {
236 c = s.charAt(i++);
237 sb.append(c);
238 if (c == 'u') {
239 // assume no syntax error
240 sb.append(s.charAt(i++));
241 sb.append(s.charAt(i++));
242 sb.append(s.charAt(i++));
243 sb.append(s.charAt(i++));
244 }
245 }
246 } else if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) {
247 sb.append('\ud800').append((char)('\udc00'+c));
248 } else {
249 sb.append(c);
250 }
251 }
252 return sb.toString();
253 }
254
255 // Regular expression tests
256
257 // This is for bug 6178785
258 // Test if an expected NPE gets thrown when passing in a null argument
259 private static boolean check(Runnable test) {
260 try {
261 test.run();
262 failCount++;
263 return false;
264 } catch (NullPointerException npe) {
265 return true;
266 }
267 }
268
269 private static void nullArgumentTest() {
270 check(new Runnable() { public void run() { Pattern.compile(null); }});
271 check(new Runnable() { public void run() { Pattern.matches(null, null); }});
272 check(new Runnable() { public void run() { Pattern.matches("xyz", null);}});
273 check(new Runnable() { public void run() { Pattern.quote(null);}});
274 check(new Runnable() { public void run() { Pattern.compile("xyz").split(null);}});
275 check(new Runnable() { public void run() { Pattern.compile("xyz").matcher(null);}});
276
277 final Matcher m = Pattern.compile("xyz").matcher("xyz");
278 m.matches();
279 check(new Runnable() { public void run() { m.appendTail(null);}});
280 check(new Runnable() { public void run() { m.replaceAll(null);}});
281 check(new Runnable() { public void run() { m.replaceFirst(null);}});
282 check(new Runnable() { public void run() { m.appendReplacement(null, null);}});
283 check(new Runnable() { public void run() { m.reset(null);}});
284 check(new Runnable() { public void run() { Matcher.quoteReplacement(null);}});
285 //check(new Runnable() { public void run() { m.usePattern(null);}});
286
287 report("Null Argument");
288 }
289
290 // This is for bug6635133
291 // Test if surrogate pair in Unicode escapes can be handled correctly.
292 private static void surrogatesInClassTest() throws Exception {
293 Pattern pattern = Pattern.compile("[\\ud834\\udd21-\\ud834\\udd24]");
294 Matcher matcher = pattern.matcher("\ud834\udd22");
295 if (!matcher.find())
296 failCount++;
297 }
298
299 // This is for bug 4988891
300 // Test toMatchResult to see that it is a copy of the Matcher
301 // that is not affected by subsequent operations on the original
302 private static void toMatchResultTest() throws Exception {
303 Pattern pattern = Pattern.compile("squid");
304 Matcher matcher = pattern.matcher(
305 "agiantsquidofdestinyasmallsquidoffate");
306 matcher.find();
307 int matcherStart1 = matcher.start();
308 MatchResult mr = matcher.toMatchResult();
309 if (mr == matcher)
310 failCount++;
311 int resultStart1 = mr.start();
312 if (matcherStart1 != resultStart1)
313 failCount++;
314 matcher.find();
315 int matcherStart2 = matcher.start();
316 int resultStart2 = mr.start();
317 if (matcherStart2 == resultStart2)
318 failCount++;
319 if (resultStart1 != resultStart2)
320 failCount++;
321 MatchResult mr2 = matcher.toMatchResult();
322 if (mr == mr2)
323 failCount++;
324 if (mr2.start() != matcherStart2)
325 failCount++;
326 report("toMatchResult is a copy");
327 }
328
329 // This is for bug 5013885
330 // Must test a slice to see if it reports hitEnd correctly
331 private static void hitEndTest() throws Exception {
332 // Basic test of Slice node
333 Pattern p = Pattern.compile("^squidattack");
334 Matcher m = p.matcher("squack");
335 m.find();
336 if (m.hitEnd())
337 failCount++;
338 m.reset("squid");
339 m.find();
340 if (!m.hitEnd())
341 failCount++;
342
343 // Test Slice, SliceA and SliceU nodes
344 for (int i=0; i<3; i++) {
345 int flags = 0;
346 if (i==1) flags = Pattern.CASE_INSENSITIVE;
347 if (i==2) flags = Pattern.UNICODE_CASE;
348 p = Pattern.compile("^abc", flags);
349 m = p.matcher("ad");
350 m.find();
351 if (m.hitEnd())
352 failCount++;
353 m.reset("ab");
354 m.find();
355 if (!m.hitEnd())
356 failCount++;
357 }
358
359 // Test Boyer-Moore node
360 p = Pattern.compile("catattack");
361 m = p.matcher("attack");
362 m.find();
363 if (!m.hitEnd())
364 failCount++;
365
366 p = Pattern.compile("catattack");
367 m = p.matcher("attackattackattackcatatta");
368 m.find();
369 if (!m.hitEnd())
370 failCount++;
sherman0b4d42d2009-02-23 21:06:15 -0800371 report("hitEnd from a Slice");
372 }
373
374 // This is for bug 4997476
375 // It is weird code submitted by customer demonstrating a regression
376 private static void wordSearchTest() throws Exception {
377 String testString = new String("word1 word2 word3");
378 Pattern p = Pattern.compile("\\b");
379 Matcher m = p.matcher(testString);
380 int position = 0;
381 int start = 0;
382 while (m.find(position)) {
383 start = m.start();
384 if (start == testString.length())
385 break;
386 if (m.find(start+1)) {
387 position = m.start();
388 } else {
389 position = testString.length();
390 }
391 if (testString.substring(start, position).equals(" "))
392 continue;
393 if (!testString.substring(start, position-1).startsWith("word"))
394 failCount++;
395 }
396 report("Customer word search");
397 }
398
399 // This is for bug 4994840
400 private static void caretAtEndTest() throws Exception {
401 // Problem only occurs with multiline patterns
402 // containing a beginning-of-line caret "^" followed
403 // by an expression that also matches the empty string.
404 Pattern pattern = Pattern.compile("^x?", Pattern.MULTILINE);
405 Matcher matcher = pattern.matcher("\r");
406 matcher.find();
407 matcher.find();
408 report("Caret at end");
409 }
410
411 // This test is for 4979006
412 // Check to see if word boundary construct properly handles unicode
413 // non spacing marks
414 private static void unicodeWordBoundsTest() throws Exception {
415 String spaces = " ";
416 String wordChar = "a";
417 String nsm = "\u030a";
418
419 assert (Character.getType('\u030a') == Character.NON_SPACING_MARK);
420
421 Pattern pattern = Pattern.compile("\\b");
422 Matcher matcher = pattern.matcher("");
423 // S=other B=word character N=non spacing mark .=word boundary
424 // SS.BB.SS
425 String input = spaces + wordChar + wordChar + spaces;
426 twoFindIndexes(input, matcher, 2, 4);
427 // SS.BBN.SS
428 input = spaces + wordChar +wordChar + nsm + spaces;
429 twoFindIndexes(input, matcher, 2, 5);
430 // SS.BN.SS
431 input = spaces + wordChar + nsm + spaces;
432 twoFindIndexes(input, matcher, 2, 4);
433 // SS.BNN.SS
434 input = spaces + wordChar + nsm + nsm + spaces;
435 twoFindIndexes(input, matcher, 2, 5);
436 // SSN.BB.SS
437 input = spaces + nsm + wordChar + wordChar + spaces;
438 twoFindIndexes(input, matcher, 3, 5);
439 // SS.BNB.SS
440 input = spaces + wordChar + nsm + wordChar + spaces;
441 twoFindIndexes(input, matcher, 2, 5);
442 // SSNNSS
443 input = spaces + nsm + nsm + spaces;
444 matcher.reset(input);
445 if (matcher.find())
446 failCount++;
447 // SSN.BBN.SS
448 input = spaces + nsm + wordChar + wordChar + nsm + spaces;
449 twoFindIndexes(input, matcher, 3, 6);
450
451 report("Unicode word boundary");
452 }
453
454 private static void twoFindIndexes(String input, Matcher matcher, int a,
455 int b) throws Exception
456 {
457 matcher.reset(input);
458 matcher.find();
459 if (matcher.start() != a)
460 failCount++;
461 matcher.find();
462 if (matcher.start() != b)
463 failCount++;
464 }
465
466 // This test is for 6284152
467 static void check(String regex, String input, String[] expected) {
468 List<String> result = new ArrayList<String>();
469 Pattern p = Pattern.compile(regex);
470 Matcher m = p.matcher(input);
471 while (m.find()) {
472 result.add(m.group());
473 }
474 if (!Arrays.asList(expected).equals(result))
475 failCount++;
476 }
477
478 private static void lookbehindTest() throws Exception {
479 //Positive
480 check("(?<=%.{0,5})foo\\d",
481 "%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5",
482 new String[]{"foo1", "foo2", "foo3"});
483
484 //boundary at end of the lookbehind sub-regex should work consistently
485 //with the boundary just after the lookbehind sub-regex
486 check("(?<=.*\\b)foo", "abcd foo", new String[]{"foo"});
487 check("(?<=.*)\\bfoo", "abcd foo", new String[]{"foo"});
488 check("(?<!abc )\\bfoo", "abc foo", new String[0]);
489 check("(?<!abc \\b)foo", "abc foo", new String[0]);
490
491 //Negative
492 check("(?<!%.{0,5})foo\\d",
493 "%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5",
494 new String[] {"foo4", "foo5"});
495
496 //Positive greedy
497 check("(?<=%b{1,4})foo", "%bbbbfoo", new String[] {"foo"});
498
499 //Positive reluctant
500 check("(?<=%b{1,4}?)foo", "%bbbbfoo", new String[] {"foo"});
501
502 //supplementary
503 check("(?<=%b{1,4})fo\ud800\udc00o", "%bbbbfo\ud800\udc00o",
504 new String[] {"fo\ud800\udc00o"});
505 check("(?<=%b{1,4}?)fo\ud800\udc00o", "%bbbbfo\ud800\udc00o",
506 new String[] {"fo\ud800\udc00o"});
507 check("(?<!%b{1,4})fo\ud800\udc00o", "%afo\ud800\udc00o",
508 new String[] {"fo\ud800\udc00o"});
509 check("(?<!%b{1,4}?)fo\ud800\udc00o", "%afo\ud800\udc00o",
510 new String[] {"fo\ud800\udc00o"});
511 report("Lookbehind");
512 }
513
514 // This test is for 4938995
515 // Check to see if weak region boundaries are transparent to
516 // lookahead and lookbehind constructs
517 private static void boundsTest() throws Exception {
518 String fullMessage = "catdogcat";
519 Pattern pattern = Pattern.compile("(?<=cat)dog(?=cat)");
520 Matcher matcher = pattern.matcher("catdogca");
521 matcher.useTransparentBounds(true);
522 if (matcher.find())
523 failCount++;
524 matcher.reset("atdogcat");
525 if (matcher.find())
526 failCount++;
527 matcher.reset(fullMessage);
528 if (!matcher.find())
529 failCount++;
530 matcher.reset(fullMessage);
531 matcher.region(0,9);
532 if (!matcher.find())
533 failCount++;
534 matcher.reset(fullMessage);
535 matcher.region(0,6);
536 if (!matcher.find())
537 failCount++;
538 matcher.reset(fullMessage);
539 matcher.region(3,6);
540 if (!matcher.find())
541 failCount++;
542 matcher.useTransparentBounds(false);
543 if (matcher.find())
544 failCount++;
545
546 // Negative lookahead/lookbehind
547 pattern = Pattern.compile("(?<!cat)dog(?!cat)");
548 matcher = pattern.matcher("dogcat");
549 matcher.useTransparentBounds(true);
550 matcher.region(0,3);
551 if (matcher.find())
552 failCount++;
553 matcher.reset("catdog");
554 matcher.region(3,6);
555 if (matcher.find())
556 failCount++;
557 matcher.useTransparentBounds(false);
558 matcher.reset("dogcat");
559 matcher.region(0,3);
560 if (!matcher.find())
561 failCount++;
562 matcher.reset("catdog");
563 matcher.region(3,6);
564 if (!matcher.find())
565 failCount++;
566
567 report("Region bounds transparency");
568 }
569
570 // This test is for 4945394
571 private static void findFromTest() throws Exception {
572 String message = "This is 40 $0 message.";
573 Pattern pat = Pattern.compile("\\$0");
574 Matcher match = pat.matcher(message);
575 if (!match.find())
576 failCount++;
577 if (match.find())
578 failCount++;
579 if (match.find())
580 failCount++;
581 report("Check for alternating find");
582 }
583
584 // This test is for 4872664 and 4892980
585 private static void negatedCharClassTest() throws Exception {
586 Pattern pattern = Pattern.compile("[^>]");
587 Matcher matcher = pattern.matcher("\u203A");
588 if (!matcher.matches())
589 failCount++;
590 pattern = Pattern.compile("[^fr]");
591 matcher = pattern.matcher("a");
592 if (!matcher.find())
593 failCount++;
594 matcher.reset("\u203A");
595 if (!matcher.find())
596 failCount++;
597 String s = "for";
598 String result[] = s.split("[^fr]");
599 if (!result[0].equals("f"))
600 failCount++;
601 if (!result[1].equals("r"))
602 failCount++;
603 s = "f\u203Ar";
604 result = s.split("[^fr]");
605 if (!result[0].equals("f"))
606 failCount++;
607 if (!result[1].equals("r"))
608 failCount++;
609
610 // Test adding to bits, subtracting a node, then adding to bits again
611 pattern = Pattern.compile("[^f\u203Ar]");
612 matcher = pattern.matcher("a");
613 if (!matcher.find())
614 failCount++;
615 matcher.reset("f");
616 if (matcher.find())
617 failCount++;
618 matcher.reset("\u203A");
619 if (matcher.find())
620 failCount++;
621 matcher.reset("r");
622 if (matcher.find())
623 failCount++;
624 matcher.reset("\u203B");
625 if (!matcher.find())
626 failCount++;
627
628 // Test subtracting a node, adding to bits, subtracting again
629 pattern = Pattern.compile("[^\u203Ar\u203B]");
630 matcher = pattern.matcher("a");
631 if (!matcher.find())
632 failCount++;
633 matcher.reset("\u203A");
634 if (matcher.find())
635 failCount++;
636 matcher.reset("r");
637 if (matcher.find())
638 failCount++;
639 matcher.reset("\u203B");
640 if (matcher.find())
641 failCount++;
642 matcher.reset("\u203C");
643 if (!matcher.find())
644 failCount++;
645
646 report("Negated Character Class");
647 }
648
649 // This test is for 4628291
650 private static void toStringTest() throws Exception {
651 Pattern pattern = Pattern.compile("b+");
652 if (pattern.toString() != "b+")
653 failCount++;
654 Matcher matcher = pattern.matcher("aaabbbccc");
655 String matcherString = matcher.toString(); // unspecified
656 matcher.find();
657 matcherString = matcher.toString(); // unspecified
658 matcher.region(0,3);
659 matcherString = matcher.toString(); // unspecified
660 matcher.reset();
661 matcherString = matcher.toString(); // unspecified
662 report("toString");
663 }
664
665 // This test is for 4808962
666 private static void literalPatternTest() throws Exception {
667 int flags = Pattern.LITERAL;
668
669 Pattern pattern = Pattern.compile("abc\\t$^", flags);
670 check(pattern, "abc\\t$^", true);
671
672 pattern = Pattern.compile(Pattern.quote("abc\\t$^"));
673 check(pattern, "abc\\t$^", true);
674
675 pattern = Pattern.compile("\\Qa^$bcabc\\E", flags);
676 check(pattern, "\\Qa^$bcabc\\E", true);
677 check(pattern, "a^$bcabc", false);
678
679 pattern = Pattern.compile("\\\\Q\\\\E");
680 check(pattern, "\\Q\\E", true);
681
682 pattern = Pattern.compile("\\Qabc\\Eefg\\\\Q\\\\Ehij");
683 check(pattern, "abcefg\\Q\\Ehij", true);
684
685 pattern = Pattern.compile("\\\\\\Q\\\\E");
686 check(pattern, "\\\\\\\\", true);
687
688 pattern = Pattern.compile(Pattern.quote("\\Qa^$bcabc\\E"));
689 check(pattern, "\\Qa^$bcabc\\E", true);
690 check(pattern, "a^$bcabc", false);
691
692 pattern = Pattern.compile(Pattern.quote("\\Qabc\\Edef"));
693 check(pattern, "\\Qabc\\Edef", true);
694 check(pattern, "abcdef", false);
695
696 pattern = Pattern.compile(Pattern.quote("abc\\Edef"));
697 check(pattern, "abc\\Edef", true);
698 check(pattern, "abcdef", false);
699
700 pattern = Pattern.compile(Pattern.quote("\\E"));
701 check(pattern, "\\E", true);
702
703 pattern = Pattern.compile("((((abc.+?:)", flags);
704 check(pattern, "((((abc.+?:)", true);
705
706 flags |= Pattern.MULTILINE;
707
708 pattern = Pattern.compile("^cat$", flags);
709 check(pattern, "abc^cat$def", true);
710 check(pattern, "cat", false);
711
712 flags |= Pattern.CASE_INSENSITIVE;
713
714 pattern = Pattern.compile("abcdef", flags);
715 check(pattern, "ABCDEF", true);
716 check(pattern, "AbCdEf", true);
717
718 flags |= Pattern.DOTALL;
719
720 pattern = Pattern.compile("a...b", flags);
721 check(pattern, "A...b", true);
722 check(pattern, "Axxxb", false);
723
724 flags |= Pattern.CANON_EQ;
725
726 Pattern p = Pattern.compile("testa\u030a", flags);
727 check(pattern, "testa\u030a", false);
728 check(pattern, "test\u00e5", false);
729
730 // Supplementary character test
731 flags = Pattern.LITERAL;
732
733 pattern = Pattern.compile(toSupplementaries("abc\\t$^"), flags);
734 check(pattern, toSupplementaries("abc\\t$^"), true);
735
736 pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\t$^")));
737 check(pattern, toSupplementaries("abc\\t$^"), true);
738
739 pattern = Pattern.compile(toSupplementaries("\\Qa^$bcabc\\E"), flags);
740 check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true);
741 check(pattern, toSupplementaries("a^$bcabc"), false);
742
743 pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qa^$bcabc\\E")));
744 check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true);
745 check(pattern, toSupplementaries("a^$bcabc"), false);
746
747 pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qabc\\Edef")));
748 check(pattern, toSupplementaries("\\Qabc\\Edef"), true);
749 check(pattern, toSupplementaries("abcdef"), false);
750
751 pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\Edef")));
752 check(pattern, toSupplementaries("abc\\Edef"), true);
753 check(pattern, toSupplementaries("abcdef"), false);
754
755 pattern = Pattern.compile(toSupplementaries("((((abc.+?:)"), flags);
756 check(pattern, toSupplementaries("((((abc.+?:)"), true);
757
758 flags |= Pattern.MULTILINE;
759
760 pattern = Pattern.compile(toSupplementaries("^cat$"), flags);
761 check(pattern, toSupplementaries("abc^cat$def"), true);
762 check(pattern, toSupplementaries("cat"), false);
763
764 flags |= Pattern.DOTALL;
765
766 // note: this is case-sensitive.
767 pattern = Pattern.compile(toSupplementaries("a...b"), flags);
768 check(pattern, toSupplementaries("a...b"), true);
769 check(pattern, toSupplementaries("axxxb"), false);
770
771 flags |= Pattern.CANON_EQ;
772
773 String t = toSupplementaries("test");
774 p = Pattern.compile(t + "a\u030a", flags);
775 check(pattern, t + "a\u030a", false);
776 check(pattern, t + "\u00e5", false);
777
778 report("Literal pattern");
779 }
780
781 // This test is for 4803179
782 // This test is also for 4808962, replacement parts
783 private static void literalReplacementTest() throws Exception {
784 int flags = Pattern.LITERAL;
785
786 Pattern pattern = Pattern.compile("abc", flags);
787 Matcher matcher = pattern.matcher("zzzabczzz");
788 String replaceTest = "$0";
789 String result = matcher.replaceAll(replaceTest);
790 if (!result.equals("zzzabczzz"))
791 failCount++;
792
793 matcher.reset();
794 String literalReplacement = matcher.quoteReplacement(replaceTest);
795 result = matcher.replaceAll(literalReplacement);
796 if (!result.equals("zzz$0zzz"))
797 failCount++;
798
799 matcher.reset();
800 replaceTest = "\\t$\\$";
801 literalReplacement = matcher.quoteReplacement(replaceTest);
802 result = matcher.replaceAll(literalReplacement);
803 if (!result.equals("zzz\\t$\\$zzz"))
804 failCount++;
805
806 // Supplementary character test
807 pattern = Pattern.compile(toSupplementaries("abc"), flags);
808 matcher = pattern.matcher(toSupplementaries("zzzabczzz"));
809 replaceTest = "$0";
810 result = matcher.replaceAll(replaceTest);
811 if (!result.equals(toSupplementaries("zzzabczzz")))
812 failCount++;
813
814 matcher.reset();
815 literalReplacement = matcher.quoteReplacement(replaceTest);
816 result = matcher.replaceAll(literalReplacement);
817 if (!result.equals(toSupplementaries("zzz$0zzz")))
818 failCount++;
819
820 matcher.reset();
821 replaceTest = "\\t$\\$";
822 literalReplacement = matcher.quoteReplacement(replaceTest);
823 result = matcher.replaceAll(literalReplacement);
824 if (!result.equals(toSupplementaries("zzz\\t$\\$zzz")))
825 failCount++;
826
827 report("Literal replacement");
828 }
829
830 // This test is for 4757029
831 private static void regionTest() throws Exception {
832 Pattern pattern = Pattern.compile("abc");
833 Matcher matcher = pattern.matcher("abcdefabc");
834
835 matcher.region(0,9);
836 if (!matcher.find())
837 failCount++;
838 if (!matcher.find())
839 failCount++;
840 matcher.region(0,3);
841 if (!matcher.find())
842 failCount++;
843 matcher.region(3,6);
844 if (matcher.find())
845 failCount++;
846 matcher.region(0,2);
847 if (matcher.find())
848 failCount++;
849
850 expectRegionFail(matcher, 1, -1);
851 expectRegionFail(matcher, -1, -1);
852 expectRegionFail(matcher, -1, 1);
853 expectRegionFail(matcher, 5, 3);
854 expectRegionFail(matcher, 5, 12);
855 expectRegionFail(matcher, 12, 12);
856
857 pattern = Pattern.compile("^abc$");
858 matcher = pattern.matcher("zzzabczzz");
859 matcher.region(0,9);
860 if (matcher.find())
861 failCount++;
862 matcher.region(3,6);
863 if (!matcher.find())
864 failCount++;
865 matcher.region(3,6);
866 matcher.useAnchoringBounds(false);
867 if (matcher.find())
868 failCount++;
869
870 // Supplementary character test
871 pattern = Pattern.compile(toSupplementaries("abc"));
872 matcher = pattern.matcher(toSupplementaries("abcdefabc"));
873 matcher.region(0,9*2);
874 if (!matcher.find())
875 failCount++;
876 if (!matcher.find())
877 failCount++;
878 matcher.region(0,3*2);
879 if (!matcher.find())
880 failCount++;
881 matcher.region(1,3*2);
882 if (matcher.find())
883 failCount++;
884 matcher.region(3*2,6*2);
885 if (matcher.find())
886 failCount++;
887 matcher.region(0,2*2);
888 if (matcher.find())
889 failCount++;
890 matcher.region(0,2*2+1);
891 if (matcher.find())
892 failCount++;
893
894 expectRegionFail(matcher, 1*2, -1);
895 expectRegionFail(matcher, -1, -1);
896 expectRegionFail(matcher, -1, 1*2);
897 expectRegionFail(matcher, 5*2, 3*2);
898 expectRegionFail(matcher, 5*2, 12*2);
899 expectRegionFail(matcher, 12*2, 12*2);
900
901 pattern = Pattern.compile(toSupplementaries("^abc$"));
902 matcher = pattern.matcher(toSupplementaries("zzzabczzz"));
903 matcher.region(0,9*2);
904 if (matcher.find())
905 failCount++;
906 matcher.region(3*2,6*2);
907 if (!matcher.find())
908 failCount++;
909 matcher.region(3*2+1,6*2);
910 if (matcher.find())
911 failCount++;
912 matcher.region(3*2,6*2-1);
913 if (matcher.find())
914 failCount++;
915 matcher.region(3*2,6*2);
916 matcher.useAnchoringBounds(false);
917 if (matcher.find())
918 failCount++;
919 report("Regions");
920 }
921
922 private static void expectRegionFail(Matcher matcher, int index1,
923 int index2)
924 {
925 try {
926 matcher.region(index1, index2);
927 failCount++;
928 } catch (IndexOutOfBoundsException ioobe) {
929 // Correct result
930 } catch (IllegalStateException ise) {
931 // Correct result
932 }
933 }
934
935 // This test is for 4803197
936 private static void escapedSegmentTest() throws Exception {
937
938 Pattern pattern = Pattern.compile("\\Qdir1\\dir2\\E");
939 check(pattern, "dir1\\dir2", true);
940
941 pattern = Pattern.compile("\\Qdir1\\dir2\\\\E");
942 check(pattern, "dir1\\dir2\\", true);
943
944 pattern = Pattern.compile("(\\Qdir1\\dir2\\\\E)");
945 check(pattern, "dir1\\dir2\\", true);
946
947 // Supplementary character test
948 pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2\\E"));
949 check(pattern, toSupplementaries("dir1\\dir2"), true);
950
951 pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2")+"\\\\E");
952 check(pattern, toSupplementaries("dir1\\dir2\\"), true);
953
954 pattern = Pattern.compile(toSupplementaries("(\\Qdir1\\dir2")+"\\\\E)");
955 check(pattern, toSupplementaries("dir1\\dir2\\"), true);
956
957 report("Escaped segment");
958 }
959
960 // This test is for 4792284
961 private static void nonCaptureRepetitionTest() throws Exception {
962 String input = "abcdefgh;";
963
964 String[] patterns = new String[] {
965 "(?:\\w{4})+;",
966 "(?:\\w{8})*;",
967 "(?:\\w{2}){2,4};",
968 "(?:\\w{4}){2,};", // only matches the
969 ".*?(?:\\w{5})+;", // specified minimum
970 ".*?(?:\\w{9})*;", // number of reps - OK
971 "(?:\\w{4})+?;", // lazy repetition - OK
972 "(?:\\w{4})++;", // possessive repetition - OK
973 "(?:\\w{2,}?)+;", // non-deterministic - OK
974 "(\\w{4})+;", // capturing group - OK
975 };
976
977 for (int i = 0; i < patterns.length; i++) {
978 // Check find()
979 check(patterns[i], 0, input, input, true);
980 // Check matches()
981 Pattern p = Pattern.compile(patterns[i]);
982 Matcher m = p.matcher(input);
983
984 if (m.matches()) {
985 if (!m.group(0).equals(input))
986 failCount++;
987 } else {
988 failCount++;
989 }
990 }
991
992 report("Non capturing repetition");
993 }
994
995 // This test is for 6358731
996 private static void notCapturedGroupCurlyMatchTest() throws Exception {
997 Pattern pattern = Pattern.compile("(abc)+|(abcd)+");
998 Matcher matcher = pattern.matcher("abcd");
999 if (!matcher.matches() ||
1000 matcher.group(1) != null ||
1001 !matcher.group(2).equals("abcd")) {
1002 failCount++;
1003 }
1004 report("Not captured GroupCurly");
1005 }
1006
1007 // This test is for 4706545
1008 private static void javaCharClassTest() throws Exception {
1009 for (int i=0; i<1000; i++) {
1010 char c = (char)generator.nextInt();
1011 check("{javaLowerCase}", c, Character.isLowerCase(c));
1012 check("{javaUpperCase}", c, Character.isUpperCase(c));
1013 check("{javaUpperCase}+", c, Character.isUpperCase(c));
1014 check("{javaTitleCase}", c, Character.isTitleCase(c));
1015 check("{javaDigit}", c, Character.isDigit(c));
1016 check("{javaDefined}", c, Character.isDefined(c));
1017 check("{javaLetter}", c, Character.isLetter(c));
1018 check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c));
1019 check("{javaJavaIdentifierStart}", c,
1020 Character.isJavaIdentifierStart(c));
1021 check("{javaJavaIdentifierPart}", c,
1022 Character.isJavaIdentifierPart(c));
1023 check("{javaUnicodeIdentifierStart}", c,
1024 Character.isUnicodeIdentifierStart(c));
1025 check("{javaUnicodeIdentifierPart}", c,
1026 Character.isUnicodeIdentifierPart(c));
1027 check("{javaIdentifierIgnorable}", c,
1028 Character.isIdentifierIgnorable(c));
1029 check("{javaSpaceChar}", c, Character.isSpaceChar(c));
1030 check("{javaWhitespace}", c, Character.isWhitespace(c));
1031 check("{javaISOControl}", c, Character.isISOControl(c));
1032 check("{javaMirrored}", c, Character.isMirrored(c));
1033
1034 }
1035
1036 // Supplementary character test
1037 for (int i=0; i<1000; i++) {
1038 int c = generator.nextInt(Character.MAX_CODE_POINT
1039 - Character.MIN_SUPPLEMENTARY_CODE_POINT)
1040 + Character.MIN_SUPPLEMENTARY_CODE_POINT;
1041 check("{javaLowerCase}", c, Character.isLowerCase(c));
1042 check("{javaUpperCase}", c, Character.isUpperCase(c));
1043 check("{javaUpperCase}+", c, Character.isUpperCase(c));
1044 check("{javaTitleCase}", c, Character.isTitleCase(c));
1045 check("{javaDigit}", c, Character.isDigit(c));
1046 check("{javaDefined}", c, Character.isDefined(c));
1047 check("{javaLetter}", c, Character.isLetter(c));
1048 check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c));
1049 check("{javaJavaIdentifierStart}", c,
1050 Character.isJavaIdentifierStart(c));
1051 check("{javaJavaIdentifierPart}", c,
1052 Character.isJavaIdentifierPart(c));
1053 check("{javaUnicodeIdentifierStart}", c,
1054 Character.isUnicodeIdentifierStart(c));
1055 check("{javaUnicodeIdentifierPart}", c,
1056 Character.isUnicodeIdentifierPart(c));
1057 check("{javaIdentifierIgnorable}", c,
1058 Character.isIdentifierIgnorable(c));
1059 check("{javaSpaceChar}", c, Character.isSpaceChar(c));
1060 check("{javaWhitespace}", c, Character.isWhitespace(c));
1061 check("{javaISOControl}", c, Character.isISOControl(c));
1062 check("{javaMirrored}", c, Character.isMirrored(c));
1063 }
1064
1065 report("Java character classes");
1066 }
1067
1068 // This test is for 4523620
1069 /*
1070 private static void numOccurrencesTest() throws Exception {
1071 Pattern pattern = Pattern.compile("aaa");
1072
1073 if (pattern.numOccurrences("aaaaaa", false) != 2)
1074 failCount++;
1075 if (pattern.numOccurrences("aaaaaa", true) != 4)
1076 failCount++;
1077
1078 pattern = Pattern.compile("^");
1079 if (pattern.numOccurrences("aaaaaa", false) != 1)
1080 failCount++;
1081 if (pattern.numOccurrences("aaaaaa", true) != 1)
1082 failCount++;
1083
1084 report("Number of Occurrences");
1085 }
1086 */
1087
1088 // This test is for 4776374
1089 private static void caretBetweenTerminatorsTest() throws Exception {
1090 int flags1 = Pattern.DOTALL;
1091 int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES;
1092 int flags3 = Pattern.DOTALL | Pattern.UNIX_LINES | Pattern.MULTILINE;
1093 int flags4 = Pattern.DOTALL | Pattern.MULTILINE;
1094
1095 check("^....", flags1, "test\ntest", "test", true);
1096 check(".....^", flags1, "test\ntest", "test", false);
1097 check(".....^", flags1, "test\n", "test", false);
1098 check("....^", flags1, "test\r\n", "test", false);
1099
1100 check("^....", flags2, "test\ntest", "test", true);
1101 check("....^", flags2, "test\ntest", "test", false);
1102 check(".....^", flags2, "test\n", "test", false);
1103 check("....^", flags2, "test\r\n", "test", false);
1104
1105 check("^....", flags3, "test\ntest", "test", true);
1106 check(".....^", flags3, "test\ntest", "test\n", true);
1107 check(".....^", flags3, "test\u0085test", "test\u0085", false);
1108 check(".....^", flags3, "test\n", "test", false);
1109 check(".....^", flags3, "test\r\n", "test", false);
1110 check("......^", flags3, "test\r\ntest", "test\r\n", true);
1111
1112 check("^....", flags4, "test\ntest", "test", true);
1113 check(".....^", flags3, "test\ntest", "test\n", true);
1114 check(".....^", flags4, "test\u0085test", "test\u0085", true);
1115 check(".....^", flags4, "test\n", "test\n", false);
1116 check(".....^", flags4, "test\r\n", "test\r", false);
1117
1118 // Supplementary character test
1119 String t = toSupplementaries("test");
1120 check("^....", flags1, t+"\n"+t, t, true);
1121 check(".....^", flags1, t+"\n"+t, t, false);
1122 check(".....^", flags1, t+"\n", t, false);
1123 check("....^", flags1, t+"\r\n", t, false);
1124
1125 check("^....", flags2, t+"\n"+t, t, true);
1126 check("....^", flags2, t+"\n"+t, t, false);
1127 check(".....^", flags2, t+"\n", t, false);
1128 check("....^", flags2, t+"\r\n", t, false);
1129
1130 check("^....", flags3, t+"\n"+t, t, true);
1131 check(".....^", flags3, t+"\n"+t, t+"\n", true);
1132 check(".....^", flags3, t+"\u0085"+t, t+"\u0085", false);
1133 check(".....^", flags3, t+"\n", t, false);
1134 check(".....^", flags3, t+"\r\n", t, false);
1135 check("......^", flags3, t+"\r\n"+t, t+"\r\n", true);
1136
1137 check("^....", flags4, t+"\n"+t, t, true);
1138 check(".....^", flags3, t+"\n"+t, t+"\n", true);
1139 check(".....^", flags4, t+"\u0085"+t, t+"\u0085", true);
1140 check(".....^", flags4, t+"\n", t+"\n", false);
1141 check(".....^", flags4, t+"\r\n", t+"\r", false);
1142
1143 report("Caret between terminators");
1144 }
1145
1146 // This test is for 4727935
1147 private static void dollarAtEndTest() throws Exception {
1148 int flags1 = Pattern.DOTALL;
1149 int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES;
1150 int flags3 = Pattern.DOTALL | Pattern.MULTILINE;
1151
1152 check("....$", flags1, "test\n", "test", true);
1153 check("....$", flags1, "test\r\n", "test", true);
1154 check(".....$", flags1, "test\n", "test\n", true);
1155 check(".....$", flags1, "test\u0085", "test\u0085", true);
1156 check("....$", flags1, "test\u0085", "test", true);
1157
1158 check("....$", flags2, "test\n", "test", true);
1159 check(".....$", flags2, "test\n", "test\n", true);
1160 check(".....$", flags2, "test\u0085", "test\u0085", true);
1161 check("....$", flags2, "test\u0085", "est\u0085", true);
1162
1163 check("....$.blah", flags3, "test\nblah", "test\nblah", true);
1164 check(".....$.blah", flags3, "test\n\nblah", "test\n\nblah", true);
1165 check("....$blah", flags3, "test\nblah", "!!!!", false);
1166 check(".....$blah", flags3, "test\nblah", "!!!!", false);
1167
1168 // Supplementary character test
1169 String t = toSupplementaries("test");
1170 String b = toSupplementaries("blah");
1171 check("....$", flags1, t+"\n", t, true);
1172 check("....$", flags1, t+"\r\n", t, true);
1173 check(".....$", flags1, t+"\n", t+"\n", true);
1174 check(".....$", flags1, t+"\u0085", t+"\u0085", true);
1175 check("....$", flags1, t+"\u0085", t, true);
1176
1177 check("....$", flags2, t+"\n", t, true);
1178 check(".....$", flags2, t+"\n", t+"\n", true);
1179 check(".....$", flags2, t+"\u0085", t+"\u0085", true);
1180 check("....$", flags2, t+"\u0085", toSupplementaries("est\u0085"), true);
1181
1182 check("....$."+b, flags3, t+"\n"+b, t+"\n"+b, true);
1183 check(".....$."+b, flags3, t+"\n\n"+b, t+"\n\n"+b, true);
1184 check("....$"+b, flags3, t+"\n"+b, "!!!!", false);
1185 check(".....$"+b, flags3, t+"\n"+b, "!!!!", false);
1186
1187 report("Dollar at End");
1188 }
1189
1190 // This test is for 4711773
1191 private static void multilineDollarTest() throws Exception {
1192 Pattern findCR = Pattern.compile("$", Pattern.MULTILINE);
1193 Matcher matcher = findCR.matcher("first bit\nsecond bit");
1194 matcher.find();
1195 if (matcher.start(0) != 9)
1196 failCount++;
1197 matcher.find();
1198 if (matcher.start(0) != 20)
1199 failCount++;
1200
1201 // Supplementary character test
1202 matcher = findCR.matcher(toSupplementaries("first bit\n second bit")); // double BMP chars
1203 matcher.find();
1204 if (matcher.start(0) != 9*2)
1205 failCount++;
1206 matcher.find();
1207 if (matcher.start(0) != 20*2)
1208 failCount++;
1209
1210 report("Multiline Dollar");
1211 }
1212
1213 private static void reluctantRepetitionTest() throws Exception {
1214 Pattern p = Pattern.compile("1(\\s\\S+?){1,3}?[\\s,]2");
1215 check(p, "1 word word word 2", true);
1216 check(p, "1 wor wo w 2", true);
1217 check(p, "1 word word 2", true);
1218 check(p, "1 word 2", true);
1219 check(p, "1 wo w w 2", true);
1220 check(p, "1 wo w 2", true);
1221 check(p, "1 wor w 2", true);
1222
1223 p = Pattern.compile("([a-z])+?c");
1224 Matcher m = p.matcher("ababcdefdec");
1225 check(m, "ababc");
1226
1227 // Supplementary character test
1228 p = Pattern.compile(toSupplementaries("([a-z])+?c"));
1229 m = p.matcher(toSupplementaries("ababcdefdec"));
1230 check(m, toSupplementaries("ababc"));
1231
1232 report("Reluctant Repetition");
1233 }
1234
1235 private static void serializeTest() throws Exception {
1236 String patternStr = "(b)";
1237 String matchStr = "b";
1238 Pattern pattern = Pattern.compile(patternStr);
1239 ByteArrayOutputStream baos = new ByteArrayOutputStream();
1240 ObjectOutputStream oos = new ObjectOutputStream(baos);
1241 oos.writeObject(pattern);
1242 oos.close();
1243 ObjectInputStream ois = new ObjectInputStream(
1244 new ByteArrayInputStream(baos.toByteArray()));
1245 Pattern serializedPattern = (Pattern)ois.readObject();
1246 ois.close();
1247 Matcher matcher = serializedPattern.matcher(matchStr);
1248 if (!matcher.matches())
1249 failCount++;
1250 if (matcher.groupCount() != 1)
1251 failCount++;
1252
1253 report("Serialization");
1254 }
1255
1256 private static void gTest() {
1257 Pattern pattern = Pattern.compile("\\G\\w");
1258 Matcher matcher = pattern.matcher("abc#x#x");
1259 matcher.find();
1260 matcher.find();
1261 matcher.find();
1262 if (matcher.find())
1263 failCount++;
1264
1265 pattern = Pattern.compile("\\GA*");
1266 matcher = pattern.matcher("1A2AA3");
1267 matcher.find();
1268 if (matcher.find())
1269 failCount++;
1270
1271 pattern = Pattern.compile("\\GA*");
1272 matcher = pattern.matcher("1A2AA3");
1273 if (!matcher.find(1))
1274 failCount++;
1275 matcher.find();
1276 if (matcher.find())
1277 failCount++;
1278
1279 report("\\G");
1280 }
1281
1282 private static void zTest() {
1283 Pattern pattern = Pattern.compile("foo\\Z");
1284 // Positives
1285 check(pattern, "foo\u0085", true);
1286 check(pattern, "foo\u2028", true);
1287 check(pattern, "foo\u2029", true);
1288 check(pattern, "foo\n", true);
1289 check(pattern, "foo\r", true);
1290 check(pattern, "foo\r\n", true);
1291 // Negatives
1292 check(pattern, "fooo", false);
1293 check(pattern, "foo\n\r", false);
1294
1295 pattern = Pattern.compile("foo\\Z", Pattern.UNIX_LINES);
1296 // Positives
1297 check(pattern, "foo", true);
1298 check(pattern, "foo\n", true);
1299 // Negatives
1300 check(pattern, "foo\r", false);
1301 check(pattern, "foo\u0085", false);
1302 check(pattern, "foo\u2028", false);
1303 check(pattern, "foo\u2029", false);
1304
1305 report("\\Z");
1306 }
1307
1308 private static void replaceFirstTest() {
1309 Pattern pattern = Pattern.compile("(ab)(c*)");
1310 Matcher matcher = pattern.matcher("abccczzzabcczzzabccc");
1311 if (!matcher.replaceFirst("test").equals("testzzzabcczzzabccc"))
1312 failCount++;
1313
1314 matcher.reset("zzzabccczzzabcczzzabccczzz");
1315 if (!matcher.replaceFirst("test").equals("zzztestzzzabcczzzabccczzz"))
1316 failCount++;
1317
1318 matcher.reset("zzzabccczzzabcczzzabccczzz");
1319 String result = matcher.replaceFirst("$1");
1320 if (!result.equals("zzzabzzzabcczzzabccczzz"))
1321 failCount++;
1322
1323 matcher.reset("zzzabccczzzabcczzzabccczzz");
1324 result = matcher.replaceFirst("$2");
1325 if (!result.equals("zzzccczzzabcczzzabccczzz"))
1326 failCount++;
1327
1328 pattern = Pattern.compile("a*");
1329 matcher = pattern.matcher("aaaaaaaaaa");
1330 if (!matcher.replaceFirst("test").equals("test"))
1331 failCount++;
1332
1333 pattern = Pattern.compile("a+");
1334 matcher = pattern.matcher("zzzaaaaaaaaaa");
1335 if (!matcher.replaceFirst("test").equals("zzztest"))
1336 failCount++;
1337
1338 // Supplementary character test
1339 pattern = Pattern.compile(toSupplementaries("(ab)(c*)"));
1340 matcher = pattern.matcher(toSupplementaries("abccczzzabcczzzabccc"));
1341 if (!matcher.replaceFirst(toSupplementaries("test"))
1342 .equals(toSupplementaries("testzzzabcczzzabccc")))
1343 failCount++;
1344
1345 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
1346 if (!matcher.replaceFirst(toSupplementaries("test")).
1347 equals(toSupplementaries("zzztestzzzabcczzzabccczzz")))
1348 failCount++;
1349
1350 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
1351 result = matcher.replaceFirst("$1");
1352 if (!result.equals(toSupplementaries("zzzabzzzabcczzzabccczzz")))
1353 failCount++;
1354
1355 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
1356 result = matcher.replaceFirst("$2");
1357 if (!result.equals(toSupplementaries("zzzccczzzabcczzzabccczzz")))
1358 failCount++;
1359
1360 pattern = Pattern.compile(toSupplementaries("a*"));
1361 matcher = pattern.matcher(toSupplementaries("aaaaaaaaaa"));
1362 if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("test")))
1363 failCount++;
1364
1365 pattern = Pattern.compile(toSupplementaries("a+"));
1366 matcher = pattern.matcher(toSupplementaries("zzzaaaaaaaaaa"));
1367 if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("zzztest")))
1368 failCount++;
1369
1370 report("Replace First");
1371 }
1372
1373 private static void unixLinesTest() {
1374 Pattern pattern = Pattern.compile(".*");
1375 Matcher matcher = pattern.matcher("aa\u2028blah");
1376 matcher.find();
1377 if (!matcher.group(0).equals("aa"))
1378 failCount++;
1379
1380 pattern = Pattern.compile(".*", Pattern.UNIX_LINES);
1381 matcher = pattern.matcher("aa\u2028blah");
1382 matcher.find();
1383 if (!matcher.group(0).equals("aa\u2028blah"))
1384 failCount++;
1385
1386 pattern = Pattern.compile("[az]$",
1387 Pattern.MULTILINE | Pattern.UNIX_LINES);
1388 matcher = pattern.matcher("aa\u2028zz");
1389 check(matcher, "a\u2028", false);
1390
1391 // Supplementary character test
1392 pattern = Pattern.compile(".*");
1393 matcher = pattern.matcher(toSupplementaries("aa\u2028blah"));
1394 matcher.find();
1395 if (!matcher.group(0).equals(toSupplementaries("aa")))
1396 failCount++;
1397
1398 pattern = Pattern.compile(".*", Pattern.UNIX_LINES);
1399 matcher = pattern.matcher(toSupplementaries("aa\u2028blah"));
1400 matcher.find();
1401 if (!matcher.group(0).equals(toSupplementaries("aa\u2028blah")))
1402 failCount++;
1403
1404 pattern = Pattern.compile(toSupplementaries("[az]$"),
1405 Pattern.MULTILINE | Pattern.UNIX_LINES);
1406 matcher = pattern.matcher(toSupplementaries("aa\u2028zz"));
1407 check(matcher, toSupplementaries("a\u2028"), false);
1408
1409 report("Unix Lines");
1410 }
1411
1412 private static void commentsTest() {
1413 int flags = Pattern.COMMENTS;
1414
1415 Pattern pattern = Pattern.compile("aa \\# aa", flags);
1416 Matcher matcher = pattern.matcher("aa#aa");
1417 if (!matcher.matches())
1418 failCount++;
1419
1420 pattern = Pattern.compile("aa # blah", flags);
1421 matcher = pattern.matcher("aa");
1422 if (!matcher.matches())
1423 failCount++;
1424
1425 pattern = Pattern.compile("aa blah", flags);
1426 matcher = pattern.matcher("aablah");
1427 if (!matcher.matches())
1428 failCount++;
1429
1430 pattern = Pattern.compile("aa # blah blech ", flags);
1431 matcher = pattern.matcher("aa");
1432 if (!matcher.matches())
1433 failCount++;
1434
1435 pattern = Pattern.compile("aa # blah\n ", flags);
1436 matcher = pattern.matcher("aa");
1437 if (!matcher.matches())
1438 failCount++;
1439
1440 pattern = Pattern.compile("aa # blah\nbc # blech", flags);
1441 matcher = pattern.matcher("aabc");
1442 if (!matcher.matches())
1443 failCount++;
1444
1445 pattern = Pattern.compile("aa # blah\nbc# blech", flags);
1446 matcher = pattern.matcher("aabc");
1447 if (!matcher.matches())
1448 failCount++;
1449
1450 pattern = Pattern.compile("aa # blah\nbc\\# blech", flags);
1451 matcher = pattern.matcher("aabc#blech");
1452 if (!matcher.matches())
1453 failCount++;
1454
1455 // Supplementary character test
1456 pattern = Pattern.compile(toSupplementaries("aa \\# aa"), flags);
1457 matcher = pattern.matcher(toSupplementaries("aa#aa"));
1458 if (!matcher.matches())
1459 failCount++;
1460
1461 pattern = Pattern.compile(toSupplementaries("aa # blah"), flags);
1462 matcher = pattern.matcher(toSupplementaries("aa"));
1463 if (!matcher.matches())
1464 failCount++;
1465
1466 pattern = Pattern.compile(toSupplementaries("aa blah"), flags);
1467 matcher = pattern.matcher(toSupplementaries("aablah"));
1468 if (!matcher.matches())
1469 failCount++;
1470
1471 pattern = Pattern.compile(toSupplementaries("aa # blah blech "), flags);
1472 matcher = pattern.matcher(toSupplementaries("aa"));
1473 if (!matcher.matches())
1474 failCount++;
1475
1476 pattern = Pattern.compile(toSupplementaries("aa # blah\n "), flags);
1477 matcher = pattern.matcher(toSupplementaries("aa"));
1478 if (!matcher.matches())
1479 failCount++;
1480
1481 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc # blech"), flags);
1482 matcher = pattern.matcher(toSupplementaries("aabc"));
1483 if (!matcher.matches())
1484 failCount++;
1485
1486 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc# blech"), flags);
1487 matcher = pattern.matcher(toSupplementaries("aabc"));
1488 if (!matcher.matches())
1489 failCount++;
1490
1491 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc\\# blech"), flags);
1492 matcher = pattern.matcher(toSupplementaries("aabc#blech"));
1493 if (!matcher.matches())
1494 failCount++;
1495
1496 report("Comments");
1497 }
1498
1499 private static void caseFoldingTest() { // bug 4504687
1500 int flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE;
1501 Pattern pattern = Pattern.compile("aa", flags);
1502 Matcher matcher = pattern.matcher("ab");
1503 if (matcher.matches())
1504 failCount++;
1505
1506 pattern = Pattern.compile("aA", flags);
1507 matcher = pattern.matcher("ab");
1508 if (matcher.matches())
1509 failCount++;
1510
1511 pattern = Pattern.compile("aa", flags);
1512 matcher = pattern.matcher("aB");
1513 if (matcher.matches())
1514 failCount++;
1515 matcher = pattern.matcher("Ab");
1516 if (matcher.matches())
1517 failCount++;
1518
1519 // ASCII "a"
1520 // Latin-1 Supplement "a" + grave
1521 // Cyrillic "a"
1522 String[] patterns = new String[] {
1523 //single
1524 "a", "\u00e0", "\u0430",
1525 //slice
1526 "ab", "\u00e0\u00e1", "\u0430\u0431",
1527 //class single
1528 "[a]", "[\u00e0]", "[\u0430]",
1529 //class range
1530 "[a-b]", "[\u00e0-\u00e5]", "[\u0430-\u0431]",
1531 //back reference
1532 "(a)\\1", "(\u00e0)\\1", "(\u0430)\\1"
1533 };
1534
1535 String[] texts = new String[] {
1536 "A", "\u00c0", "\u0410",
1537 "AB", "\u00c0\u00c1", "\u0410\u0411",
1538 "A", "\u00c0", "\u0410",
1539 "B", "\u00c2", "\u0411",
1540 "aA", "\u00e0\u00c0", "\u0430\u0410"
1541 };
1542
1543 boolean[] expected = new boolean[] {
1544 true, false, false,
1545 true, false, false,
1546 true, false, false,
1547 true, false, false,
1548 true, false, false
1549 };
1550
1551 flags = Pattern.CASE_INSENSITIVE;
1552 for (int i = 0; i < patterns.length; i++) {
1553 pattern = Pattern.compile(patterns[i], flags);
1554 matcher = pattern.matcher(texts[i]);
1555 if (matcher.matches() != expected[i]) {
1556 System.out.println("<1> Failed at " + i);
1557 failCount++;
1558 }
1559 }
1560
1561 flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE;
1562 for (int i = 0; i < patterns.length; i++) {
1563 pattern = Pattern.compile(patterns[i], flags);
1564 matcher = pattern.matcher(texts[i]);
1565 if (!matcher.matches()) {
1566 System.out.println("<2> Failed at " + i);
1567 failCount++;
1568 }
1569 }
1570 // flag unicode_case alone should do nothing
1571 flags = Pattern.UNICODE_CASE;
1572 for (int i = 0; i < patterns.length; i++) {
1573 pattern = Pattern.compile(patterns[i], flags);
1574 matcher = pattern.matcher(texts[i]);
1575 if (matcher.matches()) {
1576 System.out.println("<3> Failed at " + i);
1577 failCount++;
1578 }
1579 }
1580
1581 // Special cases: i, I, u+0131 and u+0130
1582 flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE;
1583 pattern = Pattern.compile("[h-j]+", flags);
1584 if (!pattern.matcher("\u0131\u0130").matches())
1585 failCount++;
1586 report("Case Folding");
1587 }
1588
1589 private static void appendTest() {
1590 Pattern pattern = Pattern.compile("(ab)(cd)");
1591 Matcher matcher = pattern.matcher("abcd");
1592 String result = matcher.replaceAll("$2$1");
1593 if (!result.equals("cdab"))
1594 failCount++;
1595
1596 String s1 = "Swap all: first = 123, second = 456";
1597 String s2 = "Swap one: first = 123, second = 456";
1598 String r = "$3$2$1";
1599 pattern = Pattern.compile("([a-z]+)( *= *)([0-9]+)");
1600 matcher = pattern.matcher(s1);
1601
1602 result = matcher.replaceAll(r);
1603 if (!result.equals("Swap all: 123 = first, 456 = second"))
1604 failCount++;
1605
1606 matcher = pattern.matcher(s2);
1607
1608 if (matcher.find()) {
1609 StringBuffer sb = new StringBuffer();
1610 matcher.appendReplacement(sb, r);
1611 matcher.appendTail(sb);
1612 result = sb.toString();
1613 if (!result.equals("Swap one: 123 = first, second = 456"))
1614 failCount++;
1615 }
1616
1617 // Supplementary character test
1618 pattern = Pattern.compile(toSupplementaries("(ab)(cd)"));
1619 matcher = pattern.matcher(toSupplementaries("abcd"));
1620 result = matcher.replaceAll("$2$1");
1621 if (!result.equals(toSupplementaries("cdab")))
1622 failCount++;
1623
1624 s1 = toSupplementaries("Swap all: first = 123, second = 456");
1625 s2 = toSupplementaries("Swap one: first = 123, second = 456");
1626 r = toSupplementaries("$3$2$1");
1627 pattern = Pattern.compile(toSupplementaries("([a-z]+)( *= *)([0-9]+)"));
1628 matcher = pattern.matcher(s1);
1629
1630 result = matcher.replaceAll(r);
1631 if (!result.equals(toSupplementaries("Swap all: 123 = first, 456 = second")))
1632 failCount++;
1633
1634 matcher = pattern.matcher(s2);
1635
1636 if (matcher.find()) {
1637 StringBuffer sb = new StringBuffer();
1638 matcher.appendReplacement(sb, r);
1639 matcher.appendTail(sb);
1640 result = sb.toString();
1641 if (!result.equals(toSupplementaries("Swap one: 123 = first, second = 456")))
1642 failCount++;
1643 }
1644 report("Append");
1645 }
1646
1647 private static void splitTest() {
1648 Pattern pattern = Pattern.compile(":");
1649 String[] result = pattern.split("foo:and:boo", 2);
1650 if (!result[0].equals("foo"))
1651 failCount++;
1652 if (!result[1].equals("and:boo"))
1653 failCount++;
1654 // Supplementary character test
1655 Pattern patternX = Pattern.compile(toSupplementaries("X"));
1656 result = patternX.split(toSupplementaries("fooXandXboo"), 2);
1657 if (!result[0].equals(toSupplementaries("foo")))
1658 failCount++;
1659 if (!result[1].equals(toSupplementaries("andXboo")))
1660 failCount++;
1661
1662 CharBuffer cb = CharBuffer.allocate(100);
1663 cb.put("foo:and:boo");
1664 cb.flip();
1665 result = pattern.split(cb);
1666 if (!result[0].equals("foo"))
1667 failCount++;
1668 if (!result[1].equals("and"))
1669 failCount++;
1670 if (!result[2].equals("boo"))
1671 failCount++;
1672
1673 // Supplementary character test
1674 CharBuffer cbs = CharBuffer.allocate(100);
1675 cbs.put(toSupplementaries("fooXandXboo"));
1676 cbs.flip();
1677 result = patternX.split(cbs);
1678 if (!result[0].equals(toSupplementaries("foo")))
1679 failCount++;
1680 if (!result[1].equals(toSupplementaries("and")))
1681 failCount++;
1682 if (!result[2].equals(toSupplementaries("boo")))
1683 failCount++;
1684
1685 String source = "0123456789";
1686 for (int limit=-2; limit<3; limit++) {
1687 for (int x=0; x<10; x++) {
1688 result = source.split(Integer.toString(x), limit);
1689 int expectedLength = limit < 1 ? 2 : limit;
1690
1691 if ((limit == 0) && (x == 9)) {
1692 // expected dropping of ""
1693 if (result.length != 1)
1694 failCount++;
1695 if (!result[0].equals("012345678")) {
1696 failCount++;
1697 }
1698 } else {
1699 if (result.length != expectedLength) {
1700 failCount++;
1701 }
1702 if (!result[0].equals(source.substring(0,x))) {
1703 if (limit != 1) {
1704 failCount++;
1705 } else {
1706 if (!result[0].equals(source.substring(0,10))) {
1707 failCount++;
1708 }
1709 }
1710 }
1711 if (expectedLength > 1) { // Check segment 2
1712 if (!result[1].equals(source.substring(x+1,10)))
1713 failCount++;
1714 }
1715 }
1716 }
1717 }
1718 // Check the case for no match found
1719 for (int limit=-2; limit<3; limit++) {
1720 result = source.split("e", limit);
1721 if (result.length != 1)
1722 failCount++;
1723 if (!result[0].equals(source))
1724 failCount++;
1725 }
1726 // Check the case for limit == 0, source = "";
1727 source = "";
1728 result = source.split("e", 0);
1729 if (result.length != 1)
1730 failCount++;
1731 if (!result[0].equals(source))
1732 failCount++;
1733
1734 report("Split");
1735 }
1736
1737 private static void negationTest() {
1738 Pattern pattern = Pattern.compile("[\\[@^]+");
1739 Matcher matcher = pattern.matcher("@@@@[[[[^^^^");
1740 if (!matcher.find())
1741 failCount++;
1742 if (!matcher.group(0).equals("@@@@[[[[^^^^"))
1743 failCount++;
1744 pattern = Pattern.compile("[@\\[^]+");
1745 matcher = pattern.matcher("@@@@[[[[^^^^");
1746 if (!matcher.find())
1747 failCount++;
1748 if (!matcher.group(0).equals("@@@@[[[[^^^^"))
1749 failCount++;
1750 pattern = Pattern.compile("[@\\[^@]+");
1751 matcher = pattern.matcher("@@@@[[[[^^^^");
1752 if (!matcher.find())
1753 failCount++;
1754 if (!matcher.group(0).equals("@@@@[[[[^^^^"))
1755 failCount++;
1756
1757 pattern = Pattern.compile("\\)");
1758 matcher = pattern.matcher("xxx)xxx");
1759 if (!matcher.find())
1760 failCount++;
1761
1762 report("Negation");
1763 }
1764
1765 private static void ampersandTest() {
1766 Pattern pattern = Pattern.compile("[&@]+");
1767 check(pattern, "@@@@&&&&", true);
1768
1769 pattern = Pattern.compile("[@&]+");
1770 check(pattern, "@@@@&&&&", true);
1771
1772 pattern = Pattern.compile("[@\\&]+");
1773 check(pattern, "@@@@&&&&", true);
1774
1775 report("Ampersand");
1776 }
1777
1778 private static void octalTest() throws Exception {
1779 Pattern pattern = Pattern.compile("\\u0007");
1780 Matcher matcher = pattern.matcher("\u0007");
1781 if (!matcher.matches())
1782 failCount++;
1783 pattern = Pattern.compile("\\07");
1784 matcher = pattern.matcher("\u0007");
1785 if (!matcher.matches())
1786 failCount++;
1787 pattern = Pattern.compile("\\007");
1788 matcher = pattern.matcher("\u0007");
1789 if (!matcher.matches())
1790 failCount++;
1791 pattern = Pattern.compile("\\0007");
1792 matcher = pattern.matcher("\u0007");
1793 if (!matcher.matches())
1794 failCount++;
1795 pattern = Pattern.compile("\\040");
1796 matcher = pattern.matcher("\u0020");
1797 if (!matcher.matches())
1798 failCount++;
1799 pattern = Pattern.compile("\\0403");
1800 matcher = pattern.matcher("\u00203");
1801 if (!matcher.matches())
1802 failCount++;
1803 pattern = Pattern.compile("\\0103");
1804 matcher = pattern.matcher("\u0043");
1805 if (!matcher.matches())
1806 failCount++;
1807
1808 report("Octal");
1809 }
1810
1811 private static void longPatternTest() throws Exception {
1812 try {
1813 Pattern pattern = Pattern.compile(
1814 "a 32-character-long pattern xxxx");
1815 pattern = Pattern.compile("a 33-character-long pattern xxxxx");
1816 pattern = Pattern.compile("a thirty four character long regex");
1817 StringBuffer patternToBe = new StringBuffer(101);
1818 for (int i=0; i<100; i++)
1819 patternToBe.append((char)(97 + i%26));
1820 pattern = Pattern.compile(patternToBe.toString());
1821 } catch (PatternSyntaxException e) {
1822 failCount++;
1823 }
1824
1825 // Supplementary character test
1826 try {
1827 Pattern pattern = Pattern.compile(
1828 toSupplementaries("a 32-character-long pattern xxxx"));
1829 pattern = Pattern.compile(toSupplementaries("a 33-character-long pattern xxxxx"));
1830 pattern = Pattern.compile(toSupplementaries("a thirty four character long regex"));
1831 StringBuffer patternToBe = new StringBuffer(101*2);
1832 for (int i=0; i<100; i++)
1833 patternToBe.append(Character.toChars(Character.MIN_SUPPLEMENTARY_CODE_POINT
1834 + 97 + i%26));
1835 pattern = Pattern.compile(patternToBe.toString());
1836 } catch (PatternSyntaxException e) {
1837 failCount++;
1838 }
1839 report("LongPattern");
1840 }
1841
1842 private static void group0Test() throws Exception {
1843 Pattern pattern = Pattern.compile("(tes)ting");
1844 Matcher matcher = pattern.matcher("testing");
1845 check(matcher, "testing");
1846
1847 matcher.reset("testing");
1848 if (matcher.lookingAt()) {
1849 if (!matcher.group(0).equals("testing"))
1850 failCount++;
1851 } else {
1852 failCount++;
1853 }
1854
1855 matcher.reset("testing");
1856 if (matcher.matches()) {
1857 if (!matcher.group(0).equals("testing"))
1858 failCount++;
1859 } else {
1860 failCount++;
1861 }
1862
1863 pattern = Pattern.compile("(tes)ting");
1864 matcher = pattern.matcher("testing");
1865 if (matcher.lookingAt()) {
1866 if (!matcher.group(0).equals("testing"))
1867 failCount++;
1868 } else {
1869 failCount++;
1870 }
1871
1872 pattern = Pattern.compile("^(tes)ting");
1873 matcher = pattern.matcher("testing");
1874 if (matcher.matches()) {
1875 if (!matcher.group(0).equals("testing"))
1876 failCount++;
1877 } else {
1878 failCount++;
1879 }
1880
1881 // Supplementary character test
1882 pattern = Pattern.compile(toSupplementaries("(tes)ting"));
1883 matcher = pattern.matcher(toSupplementaries("testing"));
1884 check(matcher, toSupplementaries("testing"));
1885
1886 matcher.reset(toSupplementaries("testing"));
1887 if (matcher.lookingAt()) {
1888 if (!matcher.group(0).equals(toSupplementaries("testing")))
1889 failCount++;
1890 } else {
1891 failCount++;
1892 }
1893
1894 matcher.reset(toSupplementaries("testing"));
1895 if (matcher.matches()) {
1896 if (!matcher.group(0).equals(toSupplementaries("testing")))
1897 failCount++;
1898 } else {
1899 failCount++;
1900 }
1901
1902 pattern = Pattern.compile(toSupplementaries("(tes)ting"));
1903 matcher = pattern.matcher(toSupplementaries("testing"));
1904 if (matcher.lookingAt()) {
1905 if (!matcher.group(0).equals(toSupplementaries("testing")))
1906 failCount++;
1907 } else {
1908 failCount++;
1909 }
1910
1911 pattern = Pattern.compile(toSupplementaries("^(tes)ting"));
1912 matcher = pattern.matcher(toSupplementaries("testing"));
1913 if (matcher.matches()) {
1914 if (!matcher.group(0).equals(toSupplementaries("testing")))
1915 failCount++;
1916 } else {
1917 failCount++;
1918 }
1919
1920 report("Group0");
1921 }
1922
1923 private static void findIntTest() throws Exception {
1924 Pattern p = Pattern.compile("blah");
1925 Matcher m = p.matcher("zzzzblahzzzzzblah");
1926 boolean result = m.find(2);
1927 if (!result)
1928 failCount++;
1929
1930 p = Pattern.compile("$");
1931 m = p.matcher("1234567890");
1932 result = m.find(10);
1933 if (!result)
1934 failCount++;
1935 try {
1936 result = m.find(11);
1937 failCount++;
1938 } catch (IndexOutOfBoundsException e) {
1939 // correct result
1940 }
1941
1942 // Supplementary character test
1943 p = Pattern.compile(toSupplementaries("blah"));
1944 m = p.matcher(toSupplementaries("zzzzblahzzzzzblah"));
1945 result = m.find(2);
1946 if (!result)
1947 failCount++;
1948
1949 report("FindInt");
1950 }
1951
1952 private static void emptyPatternTest() throws Exception {
1953 Pattern p = Pattern.compile("");
1954 Matcher m = p.matcher("foo");
1955
1956 // Should find empty pattern at beginning of input
1957 boolean result = m.find();
1958 if (result != true)
1959 failCount++;
1960 if (m.start() != 0)
1961 failCount++;
1962
1963 // Should not match entire input if input is not empty
1964 m.reset();
1965 result = m.matches();
1966 if (result == true)
1967 failCount++;
1968
1969 try {
1970 m.start(0);
1971 failCount++;
1972 } catch (IllegalStateException e) {
1973 // Correct result
1974 }
1975
1976 // Should match entire input if input is empty
1977 m.reset("");
1978 result = m.matches();
1979 if (result != true)
1980 failCount++;
1981
1982 result = Pattern.matches("", "");
1983 if (result != true)
1984 failCount++;
1985
1986 result = Pattern.matches("", "foo");
1987 if (result == true)
1988 failCount++;
1989 report("EmptyPattern");
1990 }
1991
1992 private static void charClassTest() throws Exception {
1993 Pattern pattern = Pattern.compile("blah[ab]]blech");
1994 check(pattern, "blahb]blech", true);
1995
1996 pattern = Pattern.compile("[abc[def]]");
1997 check(pattern, "b", true);
1998
1999 // Supplementary character tests
2000 pattern = Pattern.compile(toSupplementaries("blah[ab]]blech"));
2001 check(pattern, toSupplementaries("blahb]blech"), true);
2002
2003 pattern = Pattern.compile(toSupplementaries("[abc[def]]"));
2004 check(pattern, toSupplementaries("b"), true);
2005
2006 try {
2007 // u00ff when UNICODE_CASE
2008 pattern = Pattern.compile("[ab\u00ffcd]",
2009 Pattern.CASE_INSENSITIVE|
2010 Pattern.UNICODE_CASE);
2011 check(pattern, "ab\u00ffcd", true);
2012 check(pattern, "Ab\u0178Cd", true);
2013
2014 // u00b5 when UNICODE_CASE
2015 pattern = Pattern.compile("[ab\u00b5cd]",
2016 Pattern.CASE_INSENSITIVE|
2017 Pattern.UNICODE_CASE);
2018 check(pattern, "ab\u00b5cd", true);
2019 check(pattern, "Ab\u039cCd", true);
2020 } catch (Exception e) { failCount++; }
2021
2022 /* Special cases
2023 (1)LatinSmallLetterLongS u+017f
2024 (2)LatinSmallLetterDotlessI u+0131
2025 (3)LatineCapitalLetterIWithDotAbove u+0130
2026 (4)KelvinSign u+212a
2027 (5)AngstromSign u+212b
2028 */
2029 int flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE;
2030 pattern = Pattern.compile("[sik\u00c5]+", flags);
2031 if (!pattern.matcher("\u017f\u0130\u0131\u212a\u212b").matches())
2032 failCount++;
2033
2034 report("CharClass");
2035 }
2036
2037 private static void caretTest() throws Exception {
2038 Pattern pattern = Pattern.compile("\\w*");
2039 Matcher matcher = pattern.matcher("a#bc#def##g");
2040 check(matcher, "a");
2041 check(matcher, "");
2042 check(matcher, "bc");
2043 check(matcher, "");
2044 check(matcher, "def");
2045 check(matcher, "");
2046 check(matcher, "");
2047 check(matcher, "g");
2048 check(matcher, "");
2049 if (matcher.find())
2050 failCount++;
2051
2052 pattern = Pattern.compile("^\\w*");
2053 matcher = pattern.matcher("a#bc#def##g");
2054 check(matcher, "a");
2055 if (matcher.find())
2056 failCount++;
2057
2058 pattern = Pattern.compile("\\w");
2059 matcher = pattern.matcher("abc##x");
2060 check(matcher, "a");
2061 check(matcher, "b");
2062 check(matcher, "c");
2063 check(matcher, "x");
2064 if (matcher.find())
2065 failCount++;
2066
2067 pattern = Pattern.compile("^\\w");
2068 matcher = pattern.matcher("abc##x");
2069 check(matcher, "a");
2070 if (matcher.find())
2071 failCount++;
2072
2073 pattern = Pattern.compile("\\A\\p{Alpha}{3}");
2074 matcher = pattern.matcher("abcdef-ghi\njklmno");
2075 check(matcher, "abc");
2076 if (matcher.find())
2077 failCount++;
2078
2079 pattern = Pattern.compile("^\\p{Alpha}{3}", Pattern.MULTILINE);
2080 matcher = pattern.matcher("abcdef-ghi\njklmno");
2081 check(matcher, "abc");
2082 check(matcher, "jkl");
2083 if (matcher.find())
2084 failCount++;
2085
2086 pattern = Pattern.compile("^", Pattern.MULTILINE);
2087 matcher = pattern.matcher("this is some text");
2088 String result = matcher.replaceAll("X");
2089 if (!result.equals("Xthis is some text"))
2090 failCount++;
2091
2092 pattern = Pattern.compile("^");
2093 matcher = pattern.matcher("this is some text");
2094 result = matcher.replaceAll("X");
2095 if (!result.equals("Xthis is some text"))
2096 failCount++;
2097
2098 pattern = Pattern.compile("^", Pattern.MULTILINE | Pattern.UNIX_LINES);
2099 matcher = pattern.matcher("this is some text\n");
2100 result = matcher.replaceAll("X");
2101 if (!result.equals("Xthis is some text\n"))
2102 failCount++;
2103
2104 report("Caret");
2105 }
2106
2107 private static void groupCaptureTest() throws Exception {
2108 // Independent group
2109 Pattern pattern = Pattern.compile("x+(?>y+)z+");
2110 Matcher matcher = pattern.matcher("xxxyyyzzz");
2111 matcher.find();
2112 try {
2113 String blah = matcher.group(1);
2114 failCount++;
2115 } catch (IndexOutOfBoundsException ioobe) {
2116 // Good result
2117 }
2118 // Pure group
2119 pattern = Pattern.compile("x+(?:y+)z+");
2120 matcher = pattern.matcher("xxxyyyzzz");
2121 matcher.find();
2122 try {
2123 String blah = matcher.group(1);
2124 failCount++;
2125 } catch (IndexOutOfBoundsException ioobe) {
2126 // Good result
2127 }
2128
2129 // Supplementary character tests
2130 // Independent group
2131 pattern = Pattern.compile(toSupplementaries("x+(?>y+)z+"));
2132 matcher = pattern.matcher(toSupplementaries("xxxyyyzzz"));
2133 matcher.find();
2134 try {
2135 String blah = matcher.group(1);
2136 failCount++;
2137 } catch (IndexOutOfBoundsException ioobe) {
2138 // Good result
2139 }
2140 // Pure group
2141 pattern = Pattern.compile(toSupplementaries("x+(?:y+)z+"));
2142 matcher = pattern.matcher(toSupplementaries("xxxyyyzzz"));
2143 matcher.find();
2144 try {
2145 String blah = matcher.group(1);
2146 failCount++;
2147 } catch (IndexOutOfBoundsException ioobe) {
2148 // Good result
2149 }
2150
2151 report("GroupCapture");
2152 }
2153
2154 private static void backRefTest() throws Exception {
2155 Pattern pattern = Pattern.compile("(a*)bc\\1");
2156 check(pattern, "zzzaabcazzz", true);
2157
2158 pattern = Pattern.compile("(a*)bc\\1");
2159 check(pattern, "zzzaabcaazzz", true);
2160
2161 pattern = Pattern.compile("(abc)(def)\\1");
2162 check(pattern, "abcdefabc", true);
2163
2164 pattern = Pattern.compile("(abc)(def)\\3");
2165 check(pattern, "abcdefabc", false);
2166
2167 try {
2168 for (int i = 1; i < 10; i++) {
2169 // Make sure backref 1-9 are always accepted
2170 pattern = Pattern.compile("abcdef\\" + i);
2171 // and fail to match if the target group does not exit
2172 check(pattern, "abcdef", false);
2173 }
2174 } catch(PatternSyntaxException e) {
2175 failCount++;
2176 }
2177
2178 pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11");
2179 check(pattern, "abcdefghija", false);
2180 check(pattern, "abcdefghija1", true);
2181
2182 pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11");
2183 check(pattern, "abcdefghijkk", true);
2184
2185 pattern = Pattern.compile("(a)bcdefghij\\11");
2186 check(pattern, "abcdefghija1", true);
2187
2188 // Supplementary character tests
2189 pattern = Pattern.compile(toSupplementaries("(a*)bc\\1"));
2190 check(pattern, toSupplementaries("zzzaabcazzz"), true);
2191
2192 pattern = Pattern.compile(toSupplementaries("(a*)bc\\1"));
2193 check(pattern, toSupplementaries("zzzaabcaazzz"), true);
2194
2195 pattern = Pattern.compile(toSupplementaries("(abc)(def)\\1"));
2196 check(pattern, toSupplementaries("abcdefabc"), true);
2197
2198 pattern = Pattern.compile(toSupplementaries("(abc)(def)\\3"));
2199 check(pattern, toSupplementaries("abcdefabc"), false);
2200
2201 pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11"));
2202 check(pattern, toSupplementaries("abcdefghija"), false);
2203 check(pattern, toSupplementaries("abcdefghija1"), true);
2204
2205 pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11"));
2206 check(pattern, toSupplementaries("abcdefghijkk"), true);
2207
2208 report("BackRef");
2209 }
2210
2211 /**
2212 * Unicode Technical Report #18, section 2.6 End of Line
2213 * There is no empty line to be matched in the sequence \u000D\u000A
2214 * but there is an empty line in the sequence \u000A\u000D.
2215 */
2216 private static void anchorTest() throws Exception {
2217 Pattern p = Pattern.compile("^.*$", Pattern.MULTILINE);
2218 Matcher m = p.matcher("blah1\r\nblah2");
2219 m.find();
2220 m.find();
2221 if (!m.group().equals("blah2"))
2222 failCount++;
2223
2224 m.reset("blah1\n\rblah2");
2225 m.find();
2226 m.find();
2227 m.find();
2228 if (!m.group().equals("blah2"))
2229 failCount++;
2230
2231 // Test behavior of $ with \r\n at end of input
2232 p = Pattern.compile(".+$");
2233 m = p.matcher("blah1\r\n");
2234 if (!m.find())
2235 failCount++;
2236 if (!m.group().equals("blah1"))
2237 failCount++;
2238 if (m.find())
2239 failCount++;
2240
2241 // Test behavior of $ with \r\n at end of input in multiline
2242 p = Pattern.compile(".+$", Pattern.MULTILINE);
2243 m = p.matcher("blah1\r\n");
2244 if (!m.find())
2245 failCount++;
2246 if (m.find())
2247 failCount++;
2248
2249 // Test for $ recognition of \u0085 for bug 4527731
2250 p = Pattern.compile(".+$", Pattern.MULTILINE);
2251 m = p.matcher("blah1\u0085");
2252 if (!m.find())
2253 failCount++;
2254
2255 // Supplementary character test
2256 p = Pattern.compile("^.*$", Pattern.MULTILINE);
2257 m = p.matcher(toSupplementaries("blah1\r\nblah2"));
2258 m.find();
2259 m.find();
2260 if (!m.group().equals(toSupplementaries("blah2")))
2261 failCount++;
2262
2263 m.reset(toSupplementaries("blah1\n\rblah2"));
2264 m.find();
2265 m.find();
2266 m.find();
2267 if (!m.group().equals(toSupplementaries("blah2")))
2268 failCount++;
2269
2270 // Test behavior of $ with \r\n at end of input
2271 p = Pattern.compile(".+$");
2272 m = p.matcher(toSupplementaries("blah1\r\n"));
2273 if (!m.find())
2274 failCount++;
2275 if (!m.group().equals(toSupplementaries("blah1")))
2276 failCount++;
2277 if (m.find())
2278 failCount++;
2279
2280 // Test behavior of $ with \r\n at end of input in multiline
2281 p = Pattern.compile(".+$", Pattern.MULTILINE);
2282 m = p.matcher(toSupplementaries("blah1\r\n"));
2283 if (!m.find())
2284 failCount++;
2285 if (m.find())
2286 failCount++;
2287
2288 // Test for $ recognition of \u0085 for bug 4527731
2289 p = Pattern.compile(".+$", Pattern.MULTILINE);
2290 m = p.matcher(toSupplementaries("blah1\u0085"));
2291 if (!m.find())
2292 failCount++;
2293
2294 report("Anchors");
2295 }
2296
2297 /**
2298 * A basic sanity test of Matcher.lookingAt().
2299 */
2300 private static void lookingAtTest() throws Exception {
2301 Pattern p = Pattern.compile("(ab)(c*)");
2302 Matcher m = p.matcher("abccczzzabcczzzabccc");
2303
2304 if (!m.lookingAt())
2305 failCount++;
2306
2307 if (!m.group().equals(m.group(0)))
2308 failCount++;
2309
2310 m = p.matcher("zzzabccczzzabcczzzabccczzz");
2311 if (m.lookingAt())
2312 failCount++;
2313
2314 // Supplementary character test
2315 p = Pattern.compile(toSupplementaries("(ab)(c*)"));
2316 m = p.matcher(toSupplementaries("abccczzzabcczzzabccc"));
2317
2318 if (!m.lookingAt())
2319 failCount++;
2320
2321 if (!m.group().equals(m.group(0)))
2322 failCount++;
2323
2324 m = p.matcher(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
2325 if (m.lookingAt())
2326 failCount++;
2327
2328 report("Looking At");
2329 }
2330
2331 /**
2332 * A basic sanity test of Matcher.matches().
2333 */
2334 private static void matchesTest() throws Exception {
2335 // matches()
2336 Pattern p = Pattern.compile("ulb(c*)");
2337 Matcher m = p.matcher("ulbcccccc");
2338 if (!m.matches())
2339 failCount++;
2340
2341 // find() but not matches()
2342 m.reset("zzzulbcccccc");
2343 if (m.matches())
2344 failCount++;
2345
2346 // lookingAt() but not matches()
2347 m.reset("ulbccccccdef");
2348 if (m.matches())
2349 failCount++;
2350
2351 // matches()
2352 p = Pattern.compile("a|ad");
2353 m = p.matcher("ad");
2354 if (!m.matches())
2355 failCount++;
2356
2357 // Supplementary character test
2358 // matches()
2359 p = Pattern.compile(toSupplementaries("ulb(c*)"));
2360 m = p.matcher(toSupplementaries("ulbcccccc"));
2361 if (!m.matches())
2362 failCount++;
2363
2364 // find() but not matches()
2365 m.reset(toSupplementaries("zzzulbcccccc"));
2366 if (m.matches())
2367 failCount++;
2368
2369 // lookingAt() but not matches()
2370 m.reset(toSupplementaries("ulbccccccdef"));
2371 if (m.matches())
2372 failCount++;
2373
2374 // matches()
2375 p = Pattern.compile(toSupplementaries("a|ad"));
2376 m = p.matcher(toSupplementaries("ad"));
2377 if (!m.matches())
2378 failCount++;
2379
2380 report("Matches");
2381 }
2382
2383 /**
2384 * A basic sanity test of Pattern.matches().
2385 */
2386 private static void patternMatchesTest() throws Exception {
2387 // matches()
2388 if (!Pattern.matches(toSupplementaries("ulb(c*)"),
2389 toSupplementaries("ulbcccccc")))
2390 failCount++;
2391
2392 // find() but not matches()
2393 if (Pattern.matches(toSupplementaries("ulb(c*)"),
2394 toSupplementaries("zzzulbcccccc")))
2395 failCount++;
2396
2397 // lookingAt() but not matches()
2398 if (Pattern.matches(toSupplementaries("ulb(c*)"),
2399 toSupplementaries("ulbccccccdef")))
2400 failCount++;
2401
2402 // Supplementary character test
2403 // matches()
2404 if (!Pattern.matches(toSupplementaries("ulb(c*)"),
2405 toSupplementaries("ulbcccccc")))
2406 failCount++;
2407
2408 // find() but not matches()
2409 if (Pattern.matches(toSupplementaries("ulb(c*)"),
2410 toSupplementaries("zzzulbcccccc")))
2411 failCount++;
2412
2413 // lookingAt() but not matches()
2414 if (Pattern.matches(toSupplementaries("ulb(c*)"),
2415 toSupplementaries("ulbccccccdef")))
2416 failCount++;
2417
2418 report("Pattern Matches");
2419 }
2420
2421 /**
2422 * Canonical equivalence testing. Tests the ability of the engine
2423 * to match sequences that are not explicitly specified in the
2424 * pattern when they are considered equivalent by the Unicode Standard.
2425 */
2426 private static void ceTest() throws Exception {
2427 // Decomposed char outside char classes
2428 Pattern p = Pattern.compile("testa\u030a", Pattern.CANON_EQ);
2429 Matcher m = p.matcher("test\u00e5");
2430 if (!m.matches())
2431 failCount++;
2432
2433 m.reset("testa\u030a");
2434 if (!m.matches())
2435 failCount++;
2436
2437 // Composed char outside char classes
2438 p = Pattern.compile("test\u00e5", Pattern.CANON_EQ);
2439 m = p.matcher("test\u00e5");
2440 if (!m.matches())
2441 failCount++;
2442
2443 m.reset("testa\u030a");
2444 if (!m.find())
2445 failCount++;
2446
2447 // Decomposed char inside a char class
2448 p = Pattern.compile("test[abca\u030a]", Pattern.CANON_EQ);
2449 m = p.matcher("test\u00e5");
2450 if (!m.find())
2451 failCount++;
2452
2453 m.reset("testa\u030a");
2454 if (!m.find())
2455 failCount++;
2456
2457 // Composed char inside a char class
2458 p = Pattern.compile("test[abc\u00e5def\u00e0]", Pattern.CANON_EQ);
2459 m = p.matcher("test\u00e5");
2460 if (!m.find())
2461 failCount++;
2462
2463 m.reset("testa\u0300");
2464 if (!m.find())
2465 failCount++;
2466
2467 m.reset("testa\u030a");
2468 if (!m.find())
2469 failCount++;
2470
2471 // Marks that cannot legally change order and be equivalent
2472 p = Pattern.compile("testa\u0308\u0300", Pattern.CANON_EQ);
2473 check(p, "testa\u0308\u0300", true);
2474 check(p, "testa\u0300\u0308", false);
2475
2476 // Marks that can legally change order and be equivalent
2477 p = Pattern.compile("testa\u0308\u0323", Pattern.CANON_EQ);
2478 check(p, "testa\u0308\u0323", true);
2479 check(p, "testa\u0323\u0308", true);
2480
2481 // Test all equivalences of the sequence a\u0308\u0323\u0300
2482 p = Pattern.compile("testa\u0308\u0323\u0300", Pattern.CANON_EQ);
2483 check(p, "testa\u0308\u0323\u0300", true);
2484 check(p, "testa\u0323\u0308\u0300", true);
2485 check(p, "testa\u0308\u0300\u0323", true);
2486 check(p, "test\u00e4\u0323\u0300", true);
2487 check(p, "test\u00e4\u0300\u0323", true);
2488
2489 /*
2490 * The following canonical equivalence tests don't work. Bug id: 4916384.
2491 *
2492 // Decomposed hangul (jamos)
2493 p = Pattern.compile("\u1100\u1161", Pattern.CANON_EQ);
2494 m = p.matcher("\u1100\u1161");
2495 if (!m.matches())
2496 failCount++;
2497
2498 m.reset("\uac00");
2499 if (!m.matches())
2500 failCount++;
2501
2502 // Composed hangul
2503 p = Pattern.compile("\uac00", Pattern.CANON_EQ);
2504 m = p.matcher("\u1100\u1161");
2505 if (!m.matches())
2506 failCount++;
2507
2508 m.reset("\uac00");
2509 if (!m.matches())
2510 failCount++;
2511
2512 // Decomposed supplementary outside char classes
2513 p = Pattern.compile("test\ud834\uddbc\ud834\udd6f", Pattern.CANON_EQ);
2514 m = p.matcher("test\ud834\uddc0");
2515 if (!m.matches())
2516 failCount++;
2517
2518 m.reset("test\ud834\uddbc\ud834\udd6f");
2519 if (!m.matches())
2520 failCount++;
2521
2522 // Composed supplementary outside char classes
2523 p = Pattern.compile("test\ud834\uddc0", Pattern.CANON_EQ);
2524 m.reset("test\ud834\uddbc\ud834\udd6f");
2525 if (!m.matches())
2526 failCount++;
2527
2528 m = p.matcher("test\ud834\uddc0");
2529 if (!m.matches())
2530 failCount++;
2531
2532 */
2533
2534 report("Canonical Equivalence");
2535 }
2536
2537 /**
2538 * A basic sanity test of Matcher.replaceAll().
2539 */
2540 private static void globalSubstitute() throws Exception {
2541 // Global substitution with a literal
2542 Pattern p = Pattern.compile("(ab)(c*)");
2543 Matcher m = p.matcher("abccczzzabcczzzabccc");
2544 if (!m.replaceAll("test").equals("testzzztestzzztest"))
2545 failCount++;
2546
2547 m.reset("zzzabccczzzabcczzzabccczzz");
2548 if (!m.replaceAll("test").equals("zzztestzzztestzzztestzzz"))
2549 failCount++;
2550
2551 // Global substitution with groups
2552 m.reset("zzzabccczzzabcczzzabccczzz");
2553 String result = m.replaceAll("$1");
2554 if (!result.equals("zzzabzzzabzzzabzzz"))
2555 failCount++;
2556
2557 // Supplementary character test
2558 // Global substitution with a literal
2559 p = Pattern.compile(toSupplementaries("(ab)(c*)"));
2560 m = p.matcher(toSupplementaries("abccczzzabcczzzabccc"));
2561 if (!m.replaceAll(toSupplementaries("test")).
2562 equals(toSupplementaries("testzzztestzzztest")))
2563 failCount++;
2564
2565 m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
2566 if (!m.replaceAll(toSupplementaries("test")).
2567 equals(toSupplementaries("zzztestzzztestzzztestzzz")))
2568 failCount++;
2569
2570 // Global substitution with groups
2571 m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
2572 result = m.replaceAll("$1");
2573 if (!result.equals(toSupplementaries("zzzabzzzabzzzabzzz")))
2574 failCount++;
2575
2576 report("Global Substitution");
2577 }
2578
2579 /**
2580 * Tests the usage of Matcher.appendReplacement() with literal
2581 * and group substitutions.
2582 */
2583 private static void stringbufferSubstitute() throws Exception {
2584 // SB substitution with literal
2585 String blah = "zzzblahzzz";
2586 Pattern p = Pattern.compile("blah");
2587 Matcher m = p.matcher(blah);
2588 StringBuffer result = new StringBuffer();
2589 try {
2590 m.appendReplacement(result, "blech");
2591 failCount++;
2592 } catch (IllegalStateException e) {
2593 }
2594 m.find();
2595 m.appendReplacement(result, "blech");
2596 if (!result.toString().equals("zzzblech"))
2597 failCount++;
2598
2599 m.appendTail(result);
2600 if (!result.toString().equals("zzzblechzzz"))
2601 failCount++;
2602
2603 // SB substitution with groups
2604 blah = "zzzabcdzzz";
2605 p = Pattern.compile("(ab)(cd)*");
2606 m = p.matcher(blah);
2607 result = new StringBuffer();
2608 try {
2609 m.appendReplacement(result, "$1");
2610 failCount++;
2611 } catch (IllegalStateException e) {
2612 }
2613 m.find();
2614 m.appendReplacement(result, "$1");
2615 if (!result.toString().equals("zzzab"))
2616 failCount++;
2617
2618 m.appendTail(result);
2619 if (!result.toString().equals("zzzabzzz"))
2620 failCount++;
2621
2622 // SB substitution with 3 groups
2623 blah = "zzzabcdcdefzzz";
2624 p = Pattern.compile("(ab)(cd)*(ef)");
2625 m = p.matcher(blah);
2626 result = new StringBuffer();
2627 try {
2628 m.appendReplacement(result, "$1w$2w$3");
2629 failCount++;
2630 } catch (IllegalStateException e) {
2631 }
2632 m.find();
2633 m.appendReplacement(result, "$1w$2w$3");
2634 if (!result.toString().equals("zzzabwcdwef"))
2635 failCount++;
2636
2637 m.appendTail(result);
2638 if (!result.toString().equals("zzzabwcdwefzzz"))
2639 failCount++;
2640
2641 // SB substitution with groups and three matches
2642 // skipping middle match
2643 blah = "zzzabcdzzzabcddzzzabcdzzz";
2644 p = Pattern.compile("(ab)(cd*)");
2645 m = p.matcher(blah);
2646 result = new StringBuffer();
2647 try {
2648 m.appendReplacement(result, "$1");
2649 failCount++;
2650 } catch (IllegalStateException e) {
2651 }
2652 m.find();
2653 m.appendReplacement(result, "$1");
2654 if (!result.toString().equals("zzzab"))
2655 failCount++;
2656
2657 m.find();
2658 m.find();
2659 m.appendReplacement(result, "$2");
2660 if (!result.toString().equals("zzzabzzzabcddzzzcd"))
2661 failCount++;
2662
2663 m.appendTail(result);
2664 if (!result.toString().equals("zzzabzzzabcddzzzcdzzz"))
2665 failCount++;
2666
2667 // Check to make sure escaped $ is ignored
2668 blah = "zzzabcdcdefzzz";
2669 p = Pattern.compile("(ab)(cd)*(ef)");
2670 m = p.matcher(blah);
2671 result = new StringBuffer();
2672 m.find();
2673 m.appendReplacement(result, "$1w\\$2w$3");
2674 if (!result.toString().equals("zzzabw$2wef"))
2675 failCount++;
2676
2677 m.appendTail(result);
2678 if (!result.toString().equals("zzzabw$2wefzzz"))
2679 failCount++;
2680
2681 // Check to make sure a reference to nonexistent group causes error
2682 blah = "zzzabcdcdefzzz";
2683 p = Pattern.compile("(ab)(cd)*(ef)");
2684 m = p.matcher(blah);
2685 result = new StringBuffer();
2686 m.find();
2687 try {
2688 m.appendReplacement(result, "$1w$5w$3");
2689 failCount++;
2690 } catch (IndexOutOfBoundsException ioobe) {
2691 // Correct result
2692 }
2693
2694 // Check double digit group references
2695 blah = "zzz123456789101112zzz";
2696 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)");
2697 m = p.matcher(blah);
2698 result = new StringBuffer();
2699 m.find();
2700 m.appendReplacement(result, "$1w$11w$3");
2701 if (!result.toString().equals("zzz1w11w3"))
2702 failCount++;
2703
2704 // Check to make sure it backs off $15 to $1 if only three groups
2705 blah = "zzzabcdcdefzzz";
2706 p = Pattern.compile("(ab)(cd)*(ef)");
2707 m = p.matcher(blah);
2708 result = new StringBuffer();
2709 m.find();
2710 m.appendReplacement(result, "$1w$15w$3");
2711 if (!result.toString().equals("zzzabwab5wef"))
2712 failCount++;
2713
2714
2715 // Supplementary character test
2716 // SB substitution with literal
2717 blah = toSupplementaries("zzzblahzzz");
2718 p = Pattern.compile(toSupplementaries("blah"));
2719 m = p.matcher(blah);
2720 result = new StringBuffer();
2721 try {
2722 m.appendReplacement(result, toSupplementaries("blech"));
2723 failCount++;
2724 } catch (IllegalStateException e) {
2725 }
2726 m.find();
2727 m.appendReplacement(result, toSupplementaries("blech"));
2728 if (!result.toString().equals(toSupplementaries("zzzblech")))
2729 failCount++;
2730
2731 m.appendTail(result);
2732 if (!result.toString().equals(toSupplementaries("zzzblechzzz")))
2733 failCount++;
2734
2735 // SB substitution with groups
2736 blah = toSupplementaries("zzzabcdzzz");
2737 p = Pattern.compile(toSupplementaries("(ab)(cd)*"));
2738 m = p.matcher(blah);
2739 result = new StringBuffer();
2740 try {
2741 m.appendReplacement(result, "$1");
2742 failCount++;
2743 } catch (IllegalStateException e) {
2744 }
2745 m.find();
2746 m.appendReplacement(result, "$1");
2747 if (!result.toString().equals(toSupplementaries("zzzab")))
2748 failCount++;
2749
2750 m.appendTail(result);
2751 if (!result.toString().equals(toSupplementaries("zzzabzzz")))
2752 failCount++;
2753
2754 // SB substitution with 3 groups
2755 blah = toSupplementaries("zzzabcdcdefzzz");
2756 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
2757 m = p.matcher(blah);
2758 result = new StringBuffer();
2759 try {
2760 m.appendReplacement(result, toSupplementaries("$1w$2w$3"));
2761 failCount++;
2762 } catch (IllegalStateException e) {
2763 }
2764 m.find();
2765 m.appendReplacement(result, toSupplementaries("$1w$2w$3"));
2766 if (!result.toString().equals(toSupplementaries("zzzabwcdwef")))
2767 failCount++;
2768
2769 m.appendTail(result);
2770 if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz")))
2771 failCount++;
2772
2773 // SB substitution with groups and three matches
2774 // skipping middle match
2775 blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz");
2776 p = Pattern.compile(toSupplementaries("(ab)(cd*)"));
2777 m = p.matcher(blah);
2778 result = new StringBuffer();
2779 try {
2780 m.appendReplacement(result, "$1");
2781 failCount++;
2782 } catch (IllegalStateException e) {
2783 }
2784 m.find();
2785 m.appendReplacement(result, "$1");
2786 if (!result.toString().equals(toSupplementaries("zzzab")))
2787 failCount++;
2788
2789 m.find();
2790 m.find();
2791 m.appendReplacement(result, "$2");
2792 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd")))
2793 failCount++;
2794
2795 m.appendTail(result);
2796 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz")))
2797 failCount++;
2798
2799 // Check to make sure escaped $ is ignored
2800 blah = toSupplementaries("zzzabcdcdefzzz");
2801 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
2802 m = p.matcher(blah);
2803 result = new StringBuffer();
2804 m.find();
2805 m.appendReplacement(result, toSupplementaries("$1w\\$2w$3"));
2806 if (!result.toString().equals(toSupplementaries("zzzabw$2wef")))
2807 failCount++;
2808
2809 m.appendTail(result);
2810 if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz")))
2811 failCount++;
2812
2813 // Check to make sure a reference to nonexistent group causes error
2814 blah = toSupplementaries("zzzabcdcdefzzz");
2815 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
2816 m = p.matcher(blah);
2817 result = new StringBuffer();
2818 m.find();
2819 try {
2820 m.appendReplacement(result, toSupplementaries("$1w$5w$3"));
2821 failCount++;
2822 } catch (IndexOutOfBoundsException ioobe) {
2823 // Correct result
2824 }
2825
2826 // Check double digit group references
2827 blah = toSupplementaries("zzz123456789101112zzz");
2828 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)");
2829 m = p.matcher(blah);
2830 result = new StringBuffer();
2831 m.find();
2832 m.appendReplacement(result, toSupplementaries("$1w$11w$3"));
2833 if (!result.toString().equals(toSupplementaries("zzz1w11w3")))
2834 failCount++;
2835
2836 // Check to make sure it backs off $15 to $1 if only three groups
2837 blah = toSupplementaries("zzzabcdcdefzzz");
2838 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
2839 m = p.matcher(blah);
2840 result = new StringBuffer();
2841 m.find();
2842 m.appendReplacement(result, toSupplementaries("$1w$15w$3"));
2843 if (!result.toString().equals(toSupplementaries("zzzabwab5wef")))
2844 failCount++;
2845
2846 // Check nothing has been appended into the output buffer if
2847 // the replacement string triggers IllegalArgumentException.
2848 p = Pattern.compile("(abc)");
2849 m = p.matcher("abcd");
2850 result = new StringBuffer();
2851 m.find();
2852 try {
2853 m.appendReplacement(result, ("xyz$g"));
2854 failCount++;
2855 } catch (IllegalArgumentException iae) {
2856 if (result.length() != 0)
2857 failCount++;
2858 }
2859
2860 report("SB Substitution");
2861 }
2862
2863 /*
2864 * 5 groups of characters are created to make a substitution string.
2865 * A base string will be created including random lead chars, the
2866 * substitution string, and random trailing chars.
2867 * A pattern containing the 5 groups is searched for and replaced with:
2868 * random group + random string + random group.
2869 * The results are checked for correctness.
2870 */
2871 private static void substitutionBasher() {
2872 for (int runs = 0; runs<1000; runs++) {
2873 // Create a base string to work in
2874 int leadingChars = generator.nextInt(10);
2875 StringBuffer baseBuffer = new StringBuffer(100);
2876 String leadingString = getRandomAlphaString(leadingChars);
2877 baseBuffer.append(leadingString);
2878
2879 // Create 5 groups of random number of random chars
2880 // Create the string to substitute
2881 // Create the pattern string to search for
2882 StringBuffer bufferToSub = new StringBuffer(25);
2883 StringBuffer bufferToPat = new StringBuffer(50);
2884 String[] groups = new String[5];
2885 for(int i=0; i<5; i++) {
2886 int aGroupSize = generator.nextInt(5)+1;
2887 groups[i] = getRandomAlphaString(aGroupSize);
2888 bufferToSub.append(groups[i]);
2889 bufferToPat.append('(');
2890 bufferToPat.append(groups[i]);
2891 bufferToPat.append(')');
2892 }
2893 String stringToSub = bufferToSub.toString();
2894 String pattern = bufferToPat.toString();
2895
2896 // Place sub string into working string at random index
2897 baseBuffer.append(stringToSub);
2898
2899 // Append random chars to end
2900 int trailingChars = generator.nextInt(10);
2901 String trailingString = getRandomAlphaString(trailingChars);
2902 baseBuffer.append(trailingString);
2903 String baseString = baseBuffer.toString();
2904
2905 // Create test pattern and matcher
2906 Pattern p = Pattern.compile(pattern);
2907 Matcher m = p.matcher(baseString);
2908
2909 // Reject candidate if pattern happens to start early
2910 m.find();
2911 if (m.start() < leadingChars)
2912 continue;
2913
2914 // Reject candidate if more than one match
2915 if (m.find())
2916 continue;
2917
2918 // Construct a replacement string with :
2919 // random group + random string + random group
2920 StringBuffer bufferToRep = new StringBuffer();
2921 int groupIndex1 = generator.nextInt(5);
2922 bufferToRep.append("$" + (groupIndex1 + 1));
2923 String randomMidString = getRandomAlphaString(5);
2924 bufferToRep.append(randomMidString);
2925 int groupIndex2 = generator.nextInt(5);
2926 bufferToRep.append("$" + (groupIndex2 + 1));
2927 String replacement = bufferToRep.toString();
2928
2929 // Do the replacement
2930 String result = m.replaceAll(replacement);
2931
2932 // Construct expected result
2933 StringBuffer bufferToRes = new StringBuffer();
2934 bufferToRes.append(leadingString);
2935 bufferToRes.append(groups[groupIndex1]);
2936 bufferToRes.append(randomMidString);
2937 bufferToRes.append(groups[groupIndex2]);
2938 bufferToRes.append(trailingString);
2939 String expectedResult = bufferToRes.toString();
2940
2941 // Check results
2942 if (!result.equals(expectedResult))
2943 failCount++;
2944 }
2945
2946 report("Substitution Basher");
2947 }
2948
2949 /**
2950 * Checks the handling of some escape sequences that the Pattern
2951 * class should process instead of the java compiler. These are
2952 * not in the file because the escapes should be be processed
2953 * by the Pattern class when the regex is compiled.
2954 */
2955 private static void escapes() throws Exception {
2956 Pattern p = Pattern.compile("\\043");
2957 Matcher m = p.matcher("#");
2958 if (!m.find())
2959 failCount++;
2960
2961 p = Pattern.compile("\\x23");
2962 m = p.matcher("#");
2963 if (!m.find())
2964 failCount++;
2965
2966 p = Pattern.compile("\\u0023");
2967 m = p.matcher("#");
2968 if (!m.find())
2969 failCount++;
2970
2971 report("Escape sequences");
2972 }
2973
2974 /**
2975 * Checks the handling of blank input situations. These
2976 * tests are incompatible with my test file format.
2977 */
2978 private static void blankInput() throws Exception {
2979 Pattern p = Pattern.compile("abc", Pattern.CASE_INSENSITIVE);
2980 Matcher m = p.matcher("");
2981 if (m.find())
2982 failCount++;
2983
2984 p = Pattern.compile("a*", Pattern.CASE_INSENSITIVE);
2985 m = p.matcher("");
2986 if (!m.find())
2987 failCount++;
2988
2989 p = Pattern.compile("abc");
2990 m = p.matcher("");
2991 if (m.find())
2992 failCount++;
2993
2994 p = Pattern.compile("a*");
2995 m = p.matcher("");
2996 if (!m.find())
2997 failCount++;
2998
2999 report("Blank input");
3000 }
3001
3002 /**
3003 * Tests the Boyer-Moore pattern matching of a character sequence
3004 * on randomly generated patterns.
3005 */
3006 private static void bm() throws Exception {
3007 doBnM('a');
3008 report("Boyer Moore (ASCII)");
3009
3010 doBnM(Character.MIN_SUPPLEMENTARY_CODE_POINT - 10);
3011 report("Boyer Moore (Supplementary)");
3012 }
3013
3014 private static void doBnM(int baseCharacter) throws Exception {
3015 int achar=0;
3016
3017 for (int i=0; i<100; i++) {
3018 // Create a short pattern to search for
3019 int patternLength = generator.nextInt(7) + 4;
3020 StringBuffer patternBuffer = new StringBuffer(patternLength);
3021 for (int x=0; x<patternLength; x++) {
3022 int ch = baseCharacter + generator.nextInt(26);
3023 if (Character.isSupplementaryCodePoint(ch)) {
3024 patternBuffer.append(Character.toChars(ch));
3025 } else {
3026 patternBuffer.append((char)ch);
3027 }
3028 }
3029 String pattern = patternBuffer.toString();
3030 Pattern p = Pattern.compile(pattern);
3031
3032 // Create a buffer with random ASCII chars that does
3033 // not match the sample
3034 String toSearch = null;
3035 StringBuffer s = null;
3036 Matcher m = p.matcher("");
3037 do {
3038 s = new StringBuffer(100);
3039 for (int x=0; x<100; x++) {
3040 int ch = baseCharacter + generator.nextInt(26);
3041 if (Character.isSupplementaryCodePoint(ch)) {
3042 s.append(Character.toChars(ch));
3043 } else {
3044 s.append((char)ch);
3045 }
3046 }
3047 toSearch = s.toString();
3048 m.reset(toSearch);
3049 } while (m.find());
3050
3051 // Insert the pattern at a random spot
3052 int insertIndex = generator.nextInt(99);
3053 if (Character.isLowSurrogate(s.charAt(insertIndex)))
3054 insertIndex++;
3055 s = s.insert(insertIndex, pattern);
3056 toSearch = s.toString();
3057
3058 // Make sure that the pattern is found
3059 m.reset(toSearch);
3060 if (!m.find())
3061 failCount++;
3062
3063 // Make sure that the match text is the pattern
3064 if (!m.group().equals(pattern))
3065 failCount++;
3066
3067 // Make sure match occured at insertion point
3068 if (m.start() != insertIndex)
3069 failCount++;
3070 }
3071 }
3072
3073 /**
3074 * Tests the matching of slices on randomly generated patterns.
3075 * The Boyer-Moore optimization is not done on these patterns
3076 * because it uses unicode case folding.
3077 */
3078 private static void slice() throws Exception {
3079 doSlice(Character.MAX_VALUE);
3080 report("Slice");
3081
3082 doSlice(Character.MAX_CODE_POINT);
3083 report("Slice (Supplementary)");
3084 }
3085
3086 private static void doSlice(int maxCharacter) throws Exception {
3087 Random generator = new Random();
3088 int achar=0;
3089
3090 for (int i=0; i<100; i++) {
3091 // Create a short pattern to search for
3092 int patternLength = generator.nextInt(7) + 4;
3093 StringBuffer patternBuffer = new StringBuffer(patternLength);
3094 for (int x=0; x<patternLength; x++) {
3095 int randomChar = 0;
3096 while (!Character.isLetterOrDigit(randomChar))
3097 randomChar = generator.nextInt(maxCharacter);
3098 if (Character.isSupplementaryCodePoint(randomChar)) {
3099 patternBuffer.append(Character.toChars(randomChar));
3100 } else {
3101 patternBuffer.append((char) randomChar);
3102 }
3103 }
3104 String pattern = patternBuffer.toString();
3105 Pattern p = Pattern.compile(pattern, Pattern.UNICODE_CASE);
3106
3107 // Create a buffer with random chars that does not match the sample
3108 String toSearch = null;
3109 StringBuffer s = null;
3110 Matcher m = p.matcher("");
3111 do {
3112 s = new StringBuffer(100);
3113 for (int x=0; x<100; x++) {
3114 int randomChar = 0;
3115 while (!Character.isLetterOrDigit(randomChar))
3116 randomChar = generator.nextInt(maxCharacter);
3117 if (Character.isSupplementaryCodePoint(randomChar)) {
3118 s.append(Character.toChars(randomChar));
3119 } else {
3120 s.append((char) randomChar);
3121 }
3122 }
3123 toSearch = s.toString();
3124 m.reset(toSearch);
3125 } while (m.find());
3126
3127 // Insert the pattern at a random spot
3128 int insertIndex = generator.nextInt(99);
3129 if (Character.isLowSurrogate(s.charAt(insertIndex)))
3130 insertIndex++;
3131 s = s.insert(insertIndex, pattern);
3132 toSearch = s.toString();
3133
3134 // Make sure that the pattern is found
3135 m.reset(toSearch);
3136 if (!m.find())
3137 failCount++;
3138
3139 // Make sure that the match text is the pattern
3140 if (!m.group().equals(pattern))
3141 failCount++;
3142
3143 // Make sure match occured at insertion point
3144 if (m.start() != insertIndex)
3145 failCount++;
3146 }
3147 }
3148
3149 private static void explainFailure(String pattern, String data,
3150 String expected, String actual) {
3151 System.err.println("----------------------------------------");
3152 System.err.println("Pattern = "+pattern);
3153 System.err.println("Data = "+data);
3154 System.err.println("Expected = " + expected);
3155 System.err.println("Actual = " + actual);
3156 }
3157
3158 private static void explainFailure(String pattern, String data,
3159 Throwable t) {
3160 System.err.println("----------------------------------------");
3161 System.err.println("Pattern = "+pattern);
3162 System.err.println("Data = "+data);
3163 t.printStackTrace(System.err);
3164 }
3165
3166 // Testing examples from a file
3167
3168 /**
3169 * Goes through the file "TestCases.txt" and creates many patterns
3170 * described in the file, matching the patterns against input lines in
3171 * the file, and comparing the results against the correct results
3172 * also found in the file. The file format is described in comments
3173 * at the head of the file.
3174 */
3175 private static void processFile(String fileName) throws Exception {
3176 File testCases = new File(System.getProperty("test.src", "."),
3177 fileName);
3178 FileInputStream in = new FileInputStream(testCases);
3179 BufferedReader r = new BufferedReader(new InputStreamReader(in));
3180
3181 // Process next test case.
3182 String aLine;
3183 while((aLine = r.readLine()) != null) {
3184 // Read a line for pattern
3185 String patternString = grabLine(r);
3186 Pattern p = null;
3187 try {
3188 p = compileTestPattern(patternString);
3189 } catch (PatternSyntaxException e) {
3190 String dataString = grabLine(r);
3191 String expectedResult = grabLine(r);
3192 if (expectedResult.startsWith("error"))
3193 continue;
3194 explainFailure(patternString, dataString, e);
3195 failCount++;
3196 continue;
3197 }
3198
3199 // Read a line for input string
3200 String dataString = grabLine(r);
3201 Matcher m = p.matcher(dataString);
3202 StringBuffer result = new StringBuffer();
3203
3204 // Check for IllegalStateExceptions before a match
3205 failCount += preMatchInvariants(m);
3206
3207 boolean found = m.find();
3208
3209 if (found)
3210 failCount += postTrueMatchInvariants(m);
3211 else
3212 failCount += postFalseMatchInvariants(m);
3213
3214 if (found) {
3215 result.append("true ");
3216 result.append(m.group(0) + " ");
3217 } else {
3218 result.append("false ");
3219 }
3220
3221 result.append(m.groupCount());
3222
3223 if (found) {
3224 for (int i=1; i<m.groupCount()+1; i++)
3225 if (m.group(i) != null)
3226 result.append(" " +m.group(i));
3227 }
3228
3229 // Read a line for the expected result
3230 String expectedResult = grabLine(r);
3231
3232 if (!result.toString().equals(expectedResult)) {
3233 explainFailure(patternString, dataString, expectedResult, result.toString());
3234 failCount++;
3235 }
3236 }
3237
3238 report(fileName);
3239 }
3240
3241 private static int preMatchInvariants(Matcher m) {
3242 int failCount = 0;
3243 try {
3244 m.start();
3245 failCount++;
3246 } catch (IllegalStateException ise) {}
3247 try {
3248 m.end();
3249 failCount++;
3250 } catch (IllegalStateException ise) {}
3251 try {
3252 m.group();
3253 failCount++;
3254 } catch (IllegalStateException ise) {}
3255 return failCount;
3256 }
3257
3258 private static int postFalseMatchInvariants(Matcher m) {
3259 int failCount = 0;
3260 try {
3261 m.group();
3262 failCount++;
3263 } catch (IllegalStateException ise) {}
3264 try {
3265 m.start();
3266 failCount++;
3267 } catch (IllegalStateException ise) {}
3268 try {
3269 m.end();
3270 failCount++;
3271 } catch (IllegalStateException ise) {}
3272 return failCount;
3273 }
3274
3275 private static int postTrueMatchInvariants(Matcher m) {
3276 int failCount = 0;
3277 //assert(m.start() = m.start(0);
3278 if (m.start() != m.start(0))
3279 failCount++;
3280 //assert(m.end() = m.end(0);
3281 if (m.start() != m.start(0))
3282 failCount++;
3283 //assert(m.group() = m.group(0);
3284 if (!m.group().equals(m.group(0)))
3285 failCount++;
3286 try {
3287 m.group(50);
3288 failCount++;
3289 } catch (IndexOutOfBoundsException ise) {}
3290
3291 return failCount;
3292 }
3293
3294 private static Pattern compileTestPattern(String patternString) {
3295 if (!patternString.startsWith("'")) {
3296 return Pattern.compile(patternString);
3297 }
3298
3299 int break1 = patternString.lastIndexOf("'");
3300 String flagString = patternString.substring(
3301 break1+1, patternString.length());
3302 patternString = patternString.substring(1, break1);
3303
3304 if (flagString.equals("i"))
3305 return Pattern.compile(patternString, Pattern.CASE_INSENSITIVE);
3306
3307 if (flagString.equals("m"))
3308 return Pattern.compile(patternString, Pattern.MULTILINE);
3309
3310 return Pattern.compile(patternString);
3311 }
3312
3313 /**
3314 * Reads a line from the input file. Keeps reading lines until a non
3315 * empty non comment line is read. If the line contains a \n then
3316 * these two characters are replaced by a newline char. If a \\uxxxx
3317 * sequence is read then the sequence is replaced by the unicode char.
3318 */
3319 private static String grabLine(BufferedReader r) throws Exception {
3320 int index = 0;
3321 String line = r.readLine();
3322 while (line.startsWith("//") || line.length() < 1)
3323 line = r.readLine();
3324 while ((index = line.indexOf("\\n")) != -1) {
3325 StringBuffer temp = new StringBuffer(line);
3326 temp.replace(index, index+2, "\n");
3327 line = temp.toString();
3328 }
3329 while ((index = line.indexOf("\\u")) != -1) {
3330 StringBuffer temp = new StringBuffer(line);
3331 String value = temp.substring(index+2, index+6);
3332 char aChar = (char)Integer.parseInt(value, 16);
3333 String unicodeChar = "" + aChar;
3334 temp.replace(index, index+6, unicodeChar);
3335 line = temp.toString();
3336 }
3337
3338 return line;
3339 }
3340
3341 private static void check(Pattern p, String s, String g, String expected) {
3342 Matcher m = p.matcher(s);
3343 m.find();
3344 if (!m.group(g).equals(expected))
3345 failCount++;
3346 }
3347
3348 private static void checkReplaceFirst(String p, String s, String r, String expected)
3349 {
3350 if (!expected.equals(Pattern.compile(p)
3351 .matcher(s)
3352 .replaceFirst(r)))
3353 failCount++;
3354 }
3355
3356 private static void checkReplaceAll(String p, String s, String r, String expected)
3357 {
3358 if (!expected.equals(Pattern.compile(p)
3359 .matcher(s)
3360 .replaceAll(r)))
3361 failCount++;
3362 }
3363
3364 private static void checkExpectedFail(String p) {
3365 try {
3366 Pattern.compile(p);
3367 } catch (PatternSyntaxException pse) {
3368 //pse.printStackTrace();
3369 return;
3370 }
3371 failCount++;
3372 }
3373
3374 private static void checkExpectedFail(Matcher m, String g) {
3375 m.find();
3376 try {
3377 m.group(g);
3378 } catch (IllegalArgumentException iae) {
3379 //iae.printStackTrace();
3380 return;
3381 } catch (NullPointerException npe) {
3382 return;
3383 }
3384 failCount++;
3385 }
3386
3387
3388 private static void namedGroupCaptureTest() throws Exception {
3389 check(Pattern.compile("x+(?<gname>y+)z+"),
3390 "xxxyyyzzz",
3391 "gname",
3392 "yyy");
3393
shermand9337e02009-10-21 11:40:40 -07003394 check(Pattern.compile("x+(?<gname8>y+)z+"),
shermanffaf4922009-03-20 16:22:59 -07003395 "xxxyyyzzz",
shermand9337e02009-10-21 11:40:40 -07003396 "gname8",
shermanffaf4922009-03-20 16:22:59 -07003397 "yyy");
3398
sherman0b4d42d2009-02-23 21:06:15 -08003399 //backref
3400 Pattern pattern = Pattern.compile("(a*)bc\\1");
3401 check(pattern, "zzzaabcazzz", true); // found "abca"
3402
3403 check(Pattern.compile("(?<gname>a*)bc\\k<gname>"),
3404 "zzzaabcaazzz", true);
3405
3406 check(Pattern.compile("(?<gname>abc)(def)\\k<gname>"),
3407 "abcdefabc", true);
3408
3409 check(Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(?<gname>k)\\k<gname>"),
3410 "abcdefghijkk", true);
3411
3412 // Supplementary character tests
3413 check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"),
3414 toSupplementaries("zzzaabcazzz"), true);
3415
3416 check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"),
3417 toSupplementaries("zzzaabcaazzz"), true);
3418
3419 check(Pattern.compile("(?<gname>" + toSupplementaries("abc)(def)") + "\\k<gname>"),
3420 toSupplementaries("abcdefabc"), true);
3421
3422 check(Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)") +
3423 "(?<gname>" +
3424 toSupplementaries("k)") + "\\k<gname>"),
3425 toSupplementaries("abcdefghijkk"), true);
3426
3427 check(Pattern.compile("x+(?<gname>y+)z+\\k<gname>"),
3428 "xxxyyyzzzyyy",
3429 "gname",
3430 "yyy");
3431
3432 //replaceFirst/All
3433 checkReplaceFirst("(?<gn>ab)(c*)",
3434 "abccczzzabcczzzabccc",
shermand9337e02009-10-21 11:40:40 -07003435 "${gn}",
sherman0b4d42d2009-02-23 21:06:15 -08003436 "abzzzabcczzzabccc");
3437
3438 checkReplaceAll("(?<gn>ab)(c*)",
3439 "abccczzzabcczzzabccc",
shermand9337e02009-10-21 11:40:40 -07003440 "${gn}",
sherman0b4d42d2009-02-23 21:06:15 -08003441 "abzzzabzzzab");
3442
3443
3444 checkReplaceFirst("(?<gn>ab)(c*)",
3445 "zzzabccczzzabcczzzabccczzz",
shermand9337e02009-10-21 11:40:40 -07003446 "${gn}",
sherman0b4d42d2009-02-23 21:06:15 -08003447 "zzzabzzzabcczzzabccczzz");
3448
3449 checkReplaceAll("(?<gn>ab)(c*)",
3450 "zzzabccczzzabcczzzabccczzz",
shermand9337e02009-10-21 11:40:40 -07003451 "${gn}",
sherman0b4d42d2009-02-23 21:06:15 -08003452 "zzzabzzzabzzzabzzz");
3453
3454 checkReplaceFirst("(?<gn1>ab)(?<gn2>c*)",
3455 "zzzabccczzzabcczzzabccczzz",
shermand9337e02009-10-21 11:40:40 -07003456 "${gn2}",
sherman0b4d42d2009-02-23 21:06:15 -08003457 "zzzccczzzabcczzzabccczzz");
3458
3459 checkReplaceAll("(?<gn1>ab)(?<gn2>c*)",
3460 "zzzabccczzzabcczzzabccczzz",
shermand9337e02009-10-21 11:40:40 -07003461 "${gn2}",
sherman0b4d42d2009-02-23 21:06:15 -08003462 "zzzccczzzcczzzccczzz");
3463
3464 //toSupplementaries("(ab)(c*)"));
3465 checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") +
3466 ")(?<gn2>" + toSupplementaries("c") + "*)",
3467 toSupplementaries("abccczzzabcczzzabccc"),
shermand9337e02009-10-21 11:40:40 -07003468 "${gn1}",
sherman0b4d42d2009-02-23 21:06:15 -08003469 toSupplementaries("abzzzabcczzzabccc"));
3470
3471
3472 checkReplaceAll("(?<gn1>" + toSupplementaries("ab") +
3473 ")(?<gn2>" + toSupplementaries("c") + "*)",
3474 toSupplementaries("abccczzzabcczzzabccc"),
shermand9337e02009-10-21 11:40:40 -07003475 "${gn1}",
sherman0b4d42d2009-02-23 21:06:15 -08003476 toSupplementaries("abzzzabzzzab"));
3477
3478 checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") +
3479 ")(?<gn2>" + toSupplementaries("c") + "*)",
3480 toSupplementaries("abccczzzabcczzzabccc"),
shermand9337e02009-10-21 11:40:40 -07003481 "${gn2}",
sherman0b4d42d2009-02-23 21:06:15 -08003482 toSupplementaries("ccczzzabcczzzabccc"));
3483
3484
3485 checkReplaceAll("(?<gn1>" + toSupplementaries("ab") +
3486 ")(?<gn2>" + toSupplementaries("c") + "*)",
3487 toSupplementaries("abccczzzabcczzzabccc"),
shermand9337e02009-10-21 11:40:40 -07003488 "${gn2}",
sherman0b4d42d2009-02-23 21:06:15 -08003489 toSupplementaries("ccczzzcczzzccc"));
3490
3491 checkReplaceFirst("(?<dog>Dog)AndCat",
3492 "zzzDogAndCatzzzDogAndCatzzz",
shermand9337e02009-10-21 11:40:40 -07003493 "${dog}",
sherman0b4d42d2009-02-23 21:06:15 -08003494 "zzzDogzzzDogAndCatzzz");
3495
3496
3497 checkReplaceAll("(?<dog>Dog)AndCat",
3498 "zzzDogAndCatzzzDogAndCatzzz",
shermand9337e02009-10-21 11:40:40 -07003499 "${dog}",
sherman0b4d42d2009-02-23 21:06:15 -08003500 "zzzDogzzzDogzzz");
3501
3502 // backref in Matcher & String
shermand9337e02009-10-21 11:40:40 -07003503 if (!"abcdefghij".replaceFirst("cd(?<gn>ef)gh", "${gn}").equals("abefij") ||
3504 !"abbbcbdbefgh".replaceAll("(?<gn>[a-e])b", "${gn}").equals("abcdefgh"))
sherman0b4d42d2009-02-23 21:06:15 -08003505 failCount++;
3506
3507 // negative
3508 checkExpectedFail("(?<groupnamehasnoascii.in>abc)(def)");
3509 checkExpectedFail("(?<groupnamehasnoascii_in>abc)(def)");
shermand9337e02009-10-21 11:40:40 -07003510 checkExpectedFail("(?<6groupnamestartswithdigit>abc)(def)");
sherman0b4d42d2009-02-23 21:06:15 -08003511 checkExpectedFail("(?<gname>abc)(def)\\k<gnameX>");
3512 checkExpectedFail("(?<gname>abc)(?<gname>def)\\k<gnameX>");
3513 checkExpectedFail(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"),
3514 "gnameX");
3515 checkExpectedFail(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"),
3516 null);
3517 report("NamedGroupCapture");
3518 }
sherman6782c962010-02-05 00:10:42 -08003519
shermancc01ef52010-05-18 15:36:47 -07003520 // This is for bug 6969132
sherman6782c962010-02-05 00:10:42 -08003521 private static void nonBmpClassComplementTest() throws Exception {
3522 Pattern p = Pattern.compile("\\P{Lu}");
3523 Matcher m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
3524 if (m.find() && m.start() == 1)
3525 failCount++;
3526
3527 // from a unicode category
3528 p = Pattern.compile("\\P{Lu}");
3529 m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
3530 if (m.find())
3531 failCount++;
3532 if (!m.hitEnd())
3533 failCount++;
3534
3535 // block
3536 p = Pattern.compile("\\P{InMathematicalAlphanumericSymbols}");
3537 m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
3538 if (m.find() && m.start() == 1)
3539 failCount++;
3540
3541 report("NonBmpClassComplement");
3542 }
3543
shermancc01ef52010-05-18 15:36:47 -07003544 private static void unicodePropertiesTest() throws Exception {
3545 // different forms
3546 if (!Pattern.compile("\\p{IsLu}").matcher("A").matches() ||
3547 !Pattern.compile("\\p{Lu}").matcher("A").matches() ||
3548 !Pattern.compile("\\p{gc=Lu}").matcher("A").matches() ||
3549 !Pattern.compile("\\p{general_category=Lu}").matcher("A").matches() ||
3550 !Pattern.compile("\\p{IsLatin}").matcher("B").matches() ||
3551 !Pattern.compile("\\p{sc=Latin}").matcher("B").matches() ||
3552 !Pattern.compile("\\p{script=Latin}").matcher("B").matches() ||
3553 !Pattern.compile("\\p{InBasicLatin}").matcher("c").matches() ||
3554 !Pattern.compile("\\p{blk=BasicLatin}").matcher("c").matches() ||
3555 !Pattern.compile("\\p{block=BasicLatin}").matcher("c").matches())
3556 failCount++;
3557
3558 Matcher common = Pattern.compile("\\p{script=Common}").matcher("");
3559 Matcher unknown = Pattern.compile("\\p{IsUnknown}").matcher("");
3560 Matcher lastSM = common;
3561 Character.UnicodeScript lastScript = Character.UnicodeScript.of(0);
3562
3563 Matcher latin = Pattern.compile("\\p{block=basic_latin}").matcher("");
3564 Matcher greek = Pattern.compile("\\p{InGreek}").matcher("");
3565 Matcher lastBM = latin;
3566 Character.UnicodeBlock lastBlock = Character.UnicodeBlock.of(0);
3567
3568 for (int cp = 1; cp < Character.MAX_CODE_POINT; cp++) {
3569 if (cp >= 0x30000 && (cp & 0x70) == 0){
3570 continue; // only pick couple code points, they are the same
3571 }
3572
3573 // Unicode Script
3574 Character.UnicodeScript script = Character.UnicodeScript.of(cp);
3575 Matcher m;
3576 String str = new String(Character.toChars(cp));
3577 if (script == lastScript) {
3578 m = lastSM;
3579 m.reset(str);
3580 } else {
3581 m = Pattern.compile("\\p{Is" + script.name() + "}").matcher(str);
3582 }
3583 if (!m.matches()) {
3584 failCount++;
3585 }
3586 Matcher other = (script == Character.UnicodeScript.COMMON)? unknown : common;
3587 other.reset(str);
3588 if (other.matches()) {
3589 failCount++;
3590 }
3591 lastSM = m;
3592 lastScript = script;
3593
3594 // Unicode Block
3595 Character.UnicodeBlock block = Character.UnicodeBlock.of(cp);
3596 if (block == null) {
3597 //System.out.printf("Not a Block: cp=%x%n", cp);
3598 continue;
3599 }
3600 if (block == lastBlock) {
3601 m = lastBM;
3602 m.reset(str);
3603 } else {
3604 m = Pattern.compile("\\p{block=" + block.toString() + "}").matcher(str);
3605 }
3606 if (!m.matches()) {
3607 failCount++;
3608 }
3609 other = (block == Character.UnicodeBlock.BASIC_LATIN)? greek : latin;
3610 other.reset(str);
3611 if (other.matches()) {
3612 failCount++;
3613 }
3614 lastBM = m;
3615 lastBlock = block;
3616 }
3617 report("unicodeProperties");
3618 }
shermanf03c78b2011-02-03 13:49:25 -08003619
3620 private static void unicodeHexNotationTest() throws Exception {
3621
3622 // negative
3623 checkExpectedFail("\\x{-23}");
3624 checkExpectedFail("\\x{110000}");
3625 checkExpectedFail("\\x{}");
3626 checkExpectedFail("\\x{AB[ef]");
3627
3628 // codepoint
3629 check("^\\x{1033c}$", "\uD800\uDF3C", true);
3630 check("^\\xF0\\x90\\x8C\\xBC$", "\uD800\uDF3C", false);
3631 check("^\\x{D800}\\x{DF3c}+$", "\uD800\uDF3C", false);
3632 check("^\\xF0\\x90\\x8C\\xBC$", "\uD800\uDF3C", false);
3633
3634 // in class
3635 check("^[\\x{D800}\\x{DF3c}]+$", "\uD800\uDF3C", false);
3636 check("^[\\xF0\\x90\\x8C\\xBC]+$", "\uD800\uDF3C", false);
3637 check("^[\\x{D800}\\x{DF3C}]+$", "\uD800\uDF3C", false);
3638 check("^[\\x{DF3C}\\x{D800}]+$", "\uD800\uDF3C", false);
3639 check("^[\\x{D800}\\x{DF3C}]+$", "\uDF3C\uD800", true);
3640 check("^[\\x{DF3C}\\x{D800}]+$", "\uDF3C\uD800", true);
3641
3642 for (int cp = 0; cp <= 0x10FFFF; cp++) {
3643 String s = "A" + new String(Character.toChars(cp)) + "B";
3644 String hexUTF16 = (cp <= 0xFFFF)? String.format("\\u%04x", cp)
3645 : String.format("\\u%04x\\u%04x",
3646 (int) Character.toChars(cp)[0],
3647 (int) Character.toChars(cp)[1]);
3648 String hexCodePoint = "\\x{" + Integer.toHexString(cp) + "}";
3649 if (!Pattern.matches("A" + hexUTF16 + "B", s))
3650 failCount++;
3651 if (!Pattern.matches("A[" + hexUTF16 + "]B", s))
3652 failCount++;
3653 if (!Pattern.matches("A" + hexCodePoint + "B", s))
3654 failCount++;
3655 if (!Pattern.matches("A[" + hexCodePoint + "]B", s))
3656 failCount++;
3657 }
3658 report("unicodeHexNotation");
3659 }
sherman0b4d42d2009-02-23 21:06:15 -08003660}