blob: f0563d94e2e7ea8029a83867b0fb14dcf304c796 [file] [log] [blame]
sherman0b4d42d2009-02-23 21:06:15 -08001/*
psandoze9d4ac92013-05-01 18:40:31 +02002 * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
sherman0b4d42d2009-02-23 21:06:15 -08003 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
ohair2283b9d2010-05-25 15:58:33 -07007 * published by the Free Software Foundation. Oracle designates this
sherman0b4d42d2009-02-23 21:06:15 -08008 * particular file as subject to the "Classpath" exception as provided
ohair2283b9d2010-05-25 15:58:33 -07009 * by Oracle in the LICENSE file that accompanied this code.
sherman0b4d42d2009-02-23 21:06:15 -080010 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
ohair2283b9d2010-05-25 15:58:33 -070021 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
sherman0b4d42d2009-02-23 21:06:15 -080024 */
25
26/**
27 * @test
28 * @summary tests RegExp framework
29 * @author Mike McCloskey
30 * @bug 4481568 4482696 4495089 4504687 4527731 4599621 4631553 4619345
31 * 4630911 4672616 4711773 4727935 4750573 4792284 4803197 4757029 4808962
32 * 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476
33 * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940
34 * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133
shermanb16229d2011-12-19 14:14:14 -080035 * 6350801 6676425 6878475 6919132 6931676 6948903 6990617 7014645 7039066
psandoze9d4ac92013-05-01 18:40:31 +020036 * 7067045 7014640 7189363 8007395 8013252 8013254 8012646
sherman0b4d42d2009-02-23 21:06:15 -080037 */
38
39import java.util.regex.*;
40import java.util.Random;
41import java.io.*;
42import java.util.*;
43import java.nio.CharBuffer;
psandoze9d4ac92013-05-01 18:40:31 +020044import java.util.function.Predicate;
sherman0b4d42d2009-02-23 21:06:15 -080045
46/**
47 * This is a test class created to check the operation of
48 * the Pattern and Matcher classes.
49 */
50public class RegExTest {
51
52 private static Random generator = new Random();
53 private static boolean failure = false;
54 private static int failCount = 0;
shermanb16229d2011-12-19 14:14:14 -080055 private static String firstFailure = null;
sherman0b4d42d2009-02-23 21:06:15 -080056
57 /**
58 * Main to interpret arguments and run several tests.
59 *
60 */
61 public static void main(String[] args) throws Exception {
62 // Most of the tests are in a file
63 processFile("TestCases.txt");
64 //processFile("PerlCases.txt");
65 processFile("BMPTestCases.txt");
66 processFile("SupplementaryTestCases.txt");
67
68 // These test many randomly generated char patterns
69 bm();
70 slice();
71
72 // These are hard to put into the file
73 escapes();
74 blankInput();
75
76 // Substitition tests on randomly generated sequences
77 globalSubstitute();
78 stringbufferSubstitute();
79 substitutionBasher();
80
81 // Canonical Equivalence
82 ceTest();
83
84 // Anchors
85 anchorTest();
86
87 // boolean match calls
88 matchesTest();
89 lookingAtTest();
90
91 // Pattern API
92 patternMatchesTest();
93
94 // Misc
95 lookbehindTest();
96 nullArgumentTest();
97 backRefTest();
98 groupCaptureTest();
99 caretTest();
100 charClassTest();
101 emptyPatternTest();
102 findIntTest();
103 group0Test();
104 longPatternTest();
105 octalTest();
106 ampersandTest();
107 negationTest();
108 splitTest();
109 appendTest();
110 caseFoldingTest();
111 commentsTest();
112 unixLinesTest();
113 replaceFirstTest();
114 gTest();
115 zTest();
116 serializeTest();
117 reluctantRepetitionTest();
118 multilineDollarTest();
119 dollarAtEndTest();
120 caretBetweenTerminatorsTest();
121 // This RFE rejected in Tiger numOccurrencesTest();
122 javaCharClassTest();
123 nonCaptureRepetitionTest();
124 notCapturedGroupCurlyMatchTest();
125 escapedSegmentTest();
126 literalPatternTest();
127 literalReplacementTest();
128 regionTest();
129 toStringTest();
130 negatedCharClassTest();
131 findFromTest();
132 boundsTest();
133 unicodeWordBoundsTest();
134 caretAtEndTest();
135 wordSearchTest();
136 hitEndTest();
137 toMatchResultTest();
138 surrogatesInClassTest();
shermanb16229d2011-12-19 14:14:14 -0800139 removeQEQuotingTest();
sherman0b4d42d2009-02-23 21:06:15 -0800140 namedGroupCaptureTest();
sherman6782c962010-02-05 00:10:42 -0800141 nonBmpClassComplementTest();
shermancc01ef52010-05-18 15:36:47 -0700142 unicodePropertiesTest();
shermanf03c78b2011-02-03 13:49:25 -0800143 unicodeHexNotationTest();
sherman85bbd8b2011-04-28 20:48:36 -0700144 unicodeClassesTest();
shermanecb65472012-05-08 10:57:13 -0700145 horizontalAndVerticalWSTest();
146 linebreakTest();
sherman36e2c8f2012-08-09 10:15:26 -0700147 branchTest();
shermanf6f35a12013-04-26 13:59:10 -0700148 groupCurlyNotFoundSuppTest();
psandoze9d4ac92013-05-01 18:40:31 +0200149 patternAsPredicate();
shermanb16229d2011-12-19 14:14:14 -0800150 if (failure) {
151 throw new
152 RuntimeException("RegExTest failed, 1st failure: " +
153 firstFailure);
154 } else {
sherman0b4d42d2009-02-23 21:06:15 -0800155 System.err.println("OKAY: All tests passed.");
shermanb16229d2011-12-19 14:14:14 -0800156 }
sherman0b4d42d2009-02-23 21:06:15 -0800157 }
158
159 // Utility functions
160
161 private static String getRandomAlphaString(int length) {
162 StringBuffer buf = new StringBuffer(length);
163 for (int i=0; i<length; i++) {
164 char randChar = (char)(97 + generator.nextInt(26));
165 buf.append(randChar);
166 }
167 return buf.toString();
168 }
169
170 private static void check(Matcher m, String expected) {
171 m.find();
172 if (!m.group().equals(expected))
173 failCount++;
174 }
175
176 private static void check(Matcher m, String result, boolean expected) {
177 m.find();
shermanf03c78b2011-02-03 13:49:25 -0800178 if (m.group().equals(result) != expected)
179 failCount++;
sherman0b4d42d2009-02-23 21:06:15 -0800180 }
181
182 private static void check(Pattern p, String s, boolean expected) {
shermanf03c78b2011-02-03 13:49:25 -0800183 if (p.matcher(s).find() != expected)
184 failCount++;
185 }
186
187 private static void check(String p, String s, boolean expected) {
188 Matcher matcher = Pattern.compile(p).matcher(s);
189 if (matcher.find() != expected)
190 failCount++;
sherman0b4d42d2009-02-23 21:06:15 -0800191 }
192
193 private static void check(String p, char c, boolean expected) {
194 String propertyPattern = expected ? "\\p" + p : "\\P" + p;
195 Pattern pattern = Pattern.compile(propertyPattern);
196 char[] ca = new char[1]; ca[0] = c;
197 Matcher matcher = pattern.matcher(new String(ca));
198 if (!matcher.find())
199 failCount++;
200 }
201
202 private static void check(String p, int codePoint, boolean expected) {
203 String propertyPattern = expected ? "\\p" + p : "\\P" + p;
204 Pattern pattern = Pattern.compile(propertyPattern);
205 char[] ca = Character.toChars(codePoint);
206 Matcher matcher = pattern.matcher(new String(ca));
207 if (!matcher.find())
208 failCount++;
209 }
210
211 private static void check(String p, int flag, String input, String s,
212 boolean expected)
213 {
214 Pattern pattern = Pattern.compile(p, flag);
215 Matcher matcher = pattern.matcher(input);
216 if (expected)
217 check(matcher, s, expected);
218 else
219 check(pattern, input, false);
220 }
221
222 private static void report(String testName) {
223 int spacesToAdd = 30 - testName.length();
224 StringBuffer paddedNameBuffer = new StringBuffer(testName);
225 for (int i=0; i<spacesToAdd; i++)
226 paddedNameBuffer.append(" ");
227 String paddedName = paddedNameBuffer.toString();
228 System.err.println(paddedName + ": " +
229 (failCount==0 ? "Passed":"Failed("+failCount+")"));
shermanb16229d2011-12-19 14:14:14 -0800230 if (failCount > 0) {
sherman0b4d42d2009-02-23 21:06:15 -0800231 failure = true;
shermanb16229d2011-12-19 14:14:14 -0800232
233 if (firstFailure == null) {
234 firstFailure = testName;
235 }
236 }
237
sherman0b4d42d2009-02-23 21:06:15 -0800238 failCount = 0;
239 }
240
241 /**
242 * Converts ASCII alphabet characters [A-Za-z] in the given 's' to
243 * supplementary characters. This method does NOT fully take care
244 * of the regex syntax.
245 */
246 private static String toSupplementaries(String s) {
247 int length = s.length();
248 StringBuffer sb = new StringBuffer(length * 2);
249
250 for (int i = 0; i < length; ) {
251 char c = s.charAt(i++);
252 if (c == '\\') {
253 sb.append(c);
254 if (i < length) {
255 c = s.charAt(i++);
256 sb.append(c);
257 if (c == 'u') {
258 // assume no syntax error
259 sb.append(s.charAt(i++));
260 sb.append(s.charAt(i++));
261 sb.append(s.charAt(i++));
262 sb.append(s.charAt(i++));
263 }
264 }
265 } else if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) {
266 sb.append('\ud800').append((char)('\udc00'+c));
267 } else {
268 sb.append(c);
269 }
270 }
271 return sb.toString();
272 }
273
274 // Regular expression tests
275
276 // This is for bug 6178785
277 // Test if an expected NPE gets thrown when passing in a null argument
278 private static boolean check(Runnable test) {
279 try {
280 test.run();
281 failCount++;
282 return false;
283 } catch (NullPointerException npe) {
284 return true;
285 }
286 }
287
288 private static void nullArgumentTest() {
289 check(new Runnable() { public void run() { Pattern.compile(null); }});
290 check(new Runnable() { public void run() { Pattern.matches(null, null); }});
291 check(new Runnable() { public void run() { Pattern.matches("xyz", null);}});
292 check(new Runnable() { public void run() { Pattern.quote(null);}});
293 check(new Runnable() { public void run() { Pattern.compile("xyz").split(null);}});
294 check(new Runnable() { public void run() { Pattern.compile("xyz").matcher(null);}});
295
296 final Matcher m = Pattern.compile("xyz").matcher("xyz");
297 m.matches();
298 check(new Runnable() { public void run() { m.appendTail(null);}});
299 check(new Runnable() { public void run() { m.replaceAll(null);}});
300 check(new Runnable() { public void run() { m.replaceFirst(null);}});
301 check(new Runnable() { public void run() { m.appendReplacement(null, null);}});
302 check(new Runnable() { public void run() { m.reset(null);}});
303 check(new Runnable() { public void run() { Matcher.quoteReplacement(null);}});
304 //check(new Runnable() { public void run() { m.usePattern(null);}});
305
306 report("Null Argument");
307 }
308
309 // This is for bug6635133
310 // Test if surrogate pair in Unicode escapes can be handled correctly.
311 private static void surrogatesInClassTest() throws Exception {
312 Pattern pattern = Pattern.compile("[\\ud834\\udd21-\\ud834\\udd24]");
313 Matcher matcher = pattern.matcher("\ud834\udd22");
314 if (!matcher.find())
315 failCount++;
shermanb16229d2011-12-19 14:14:14 -0800316
317 report("Surrogate pair in Unicode escape");
318 }
319
320 // This is for bug6990617
321 // Test if Pattern.RemoveQEQuoting works correctly if the octal unicode
322 // char encoding is only 2 or 3 digits instead of 4 and the first quoted
323 // char is an octal digit.
324 private static void removeQEQuotingTest() throws Exception {
325 Pattern pattern =
326 Pattern.compile("\\011\\Q1sometext\\E\\011\\Q2sometext\\E");
327 Matcher matcher = pattern.matcher("\t1sometext\t2sometext");
328 if (!matcher.find())
329 failCount++;
330
331 report("Remove Q/E Quoting");
sherman0b4d42d2009-02-23 21:06:15 -0800332 }
333
334 // This is for bug 4988891
335 // Test toMatchResult to see that it is a copy of the Matcher
336 // that is not affected by subsequent operations on the original
337 private static void toMatchResultTest() throws Exception {
338 Pattern pattern = Pattern.compile("squid");
339 Matcher matcher = pattern.matcher(
340 "agiantsquidofdestinyasmallsquidoffate");
341 matcher.find();
342 int matcherStart1 = matcher.start();
343 MatchResult mr = matcher.toMatchResult();
344 if (mr == matcher)
345 failCount++;
346 int resultStart1 = mr.start();
347 if (matcherStart1 != resultStart1)
348 failCount++;
349 matcher.find();
350 int matcherStart2 = matcher.start();
351 int resultStart2 = mr.start();
352 if (matcherStart2 == resultStart2)
353 failCount++;
354 if (resultStart1 != resultStart2)
355 failCount++;
356 MatchResult mr2 = matcher.toMatchResult();
357 if (mr == mr2)
358 failCount++;
359 if (mr2.start() != matcherStart2)
360 failCount++;
361 report("toMatchResult is a copy");
362 }
363
364 // This is for bug 5013885
365 // Must test a slice to see if it reports hitEnd correctly
366 private static void hitEndTest() throws Exception {
367 // Basic test of Slice node
368 Pattern p = Pattern.compile("^squidattack");
369 Matcher m = p.matcher("squack");
370 m.find();
371 if (m.hitEnd())
372 failCount++;
373 m.reset("squid");
374 m.find();
375 if (!m.hitEnd())
376 failCount++;
377
378 // Test Slice, SliceA and SliceU nodes
379 for (int i=0; i<3; i++) {
380 int flags = 0;
381 if (i==1) flags = Pattern.CASE_INSENSITIVE;
382 if (i==2) flags = Pattern.UNICODE_CASE;
383 p = Pattern.compile("^abc", flags);
384 m = p.matcher("ad");
385 m.find();
386 if (m.hitEnd())
387 failCount++;
388 m.reset("ab");
389 m.find();
390 if (!m.hitEnd())
391 failCount++;
392 }
393
394 // Test Boyer-Moore node
395 p = Pattern.compile("catattack");
396 m = p.matcher("attack");
397 m.find();
398 if (!m.hitEnd())
399 failCount++;
400
401 p = Pattern.compile("catattack");
402 m = p.matcher("attackattackattackcatatta");
403 m.find();
404 if (!m.hitEnd())
405 failCount++;
sherman0b4d42d2009-02-23 21:06:15 -0800406 report("hitEnd from a Slice");
407 }
408
409 // This is for bug 4997476
410 // It is weird code submitted by customer demonstrating a regression
411 private static void wordSearchTest() throws Exception {
412 String testString = new String("word1 word2 word3");
413 Pattern p = Pattern.compile("\\b");
414 Matcher m = p.matcher(testString);
415 int position = 0;
416 int start = 0;
417 while (m.find(position)) {
418 start = m.start();
419 if (start == testString.length())
420 break;
421 if (m.find(start+1)) {
422 position = m.start();
423 } else {
424 position = testString.length();
425 }
426 if (testString.substring(start, position).equals(" "))
427 continue;
428 if (!testString.substring(start, position-1).startsWith("word"))
429 failCount++;
430 }
431 report("Customer word search");
432 }
433
434 // This is for bug 4994840
435 private static void caretAtEndTest() throws Exception {
436 // Problem only occurs with multiline patterns
437 // containing a beginning-of-line caret "^" followed
438 // by an expression that also matches the empty string.
439 Pattern pattern = Pattern.compile("^x?", Pattern.MULTILINE);
440 Matcher matcher = pattern.matcher("\r");
441 matcher.find();
442 matcher.find();
443 report("Caret at end");
444 }
445
446 // This test is for 4979006
447 // Check to see if word boundary construct properly handles unicode
448 // non spacing marks
449 private static void unicodeWordBoundsTest() throws Exception {
450 String spaces = " ";
451 String wordChar = "a";
452 String nsm = "\u030a";
453
454 assert (Character.getType('\u030a') == Character.NON_SPACING_MARK);
455
456 Pattern pattern = Pattern.compile("\\b");
457 Matcher matcher = pattern.matcher("");
458 // S=other B=word character N=non spacing mark .=word boundary
459 // SS.BB.SS
460 String input = spaces + wordChar + wordChar + spaces;
461 twoFindIndexes(input, matcher, 2, 4);
462 // SS.BBN.SS
463 input = spaces + wordChar +wordChar + nsm + spaces;
464 twoFindIndexes(input, matcher, 2, 5);
465 // SS.BN.SS
466 input = spaces + wordChar + nsm + spaces;
467 twoFindIndexes(input, matcher, 2, 4);
468 // SS.BNN.SS
469 input = spaces + wordChar + nsm + nsm + spaces;
470 twoFindIndexes(input, matcher, 2, 5);
471 // SSN.BB.SS
472 input = spaces + nsm + wordChar + wordChar + spaces;
473 twoFindIndexes(input, matcher, 3, 5);
474 // SS.BNB.SS
475 input = spaces + wordChar + nsm + wordChar + spaces;
476 twoFindIndexes(input, matcher, 2, 5);
477 // SSNNSS
478 input = spaces + nsm + nsm + spaces;
479 matcher.reset(input);
480 if (matcher.find())
481 failCount++;
482 // SSN.BBN.SS
483 input = spaces + nsm + wordChar + wordChar + nsm + spaces;
484 twoFindIndexes(input, matcher, 3, 6);
485
486 report("Unicode word boundary");
487 }
488
489 private static void twoFindIndexes(String input, Matcher matcher, int a,
490 int b) throws Exception
491 {
492 matcher.reset(input);
493 matcher.find();
494 if (matcher.start() != a)
495 failCount++;
496 matcher.find();
497 if (matcher.start() != b)
498 failCount++;
499 }
500
501 // This test is for 6284152
502 static void check(String regex, String input, String[] expected) {
503 List<String> result = new ArrayList<String>();
504 Pattern p = Pattern.compile(regex);
505 Matcher m = p.matcher(input);
506 while (m.find()) {
507 result.add(m.group());
508 }
509 if (!Arrays.asList(expected).equals(result))
510 failCount++;
511 }
512
513 private static void lookbehindTest() throws Exception {
514 //Positive
515 check("(?<=%.{0,5})foo\\d",
516 "%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5",
517 new String[]{"foo1", "foo2", "foo3"});
518
519 //boundary at end of the lookbehind sub-regex should work consistently
520 //with the boundary just after the lookbehind sub-regex
521 check("(?<=.*\\b)foo", "abcd foo", new String[]{"foo"});
522 check("(?<=.*)\\bfoo", "abcd foo", new String[]{"foo"});
523 check("(?<!abc )\\bfoo", "abc foo", new String[0]);
524 check("(?<!abc \\b)foo", "abc foo", new String[0]);
525
526 //Negative
527 check("(?<!%.{0,5})foo\\d",
528 "%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5",
529 new String[] {"foo4", "foo5"});
530
531 //Positive greedy
532 check("(?<=%b{1,4})foo", "%bbbbfoo", new String[] {"foo"});
533
534 //Positive reluctant
535 check("(?<=%b{1,4}?)foo", "%bbbbfoo", new String[] {"foo"});
536
537 //supplementary
538 check("(?<=%b{1,4})fo\ud800\udc00o", "%bbbbfo\ud800\udc00o",
539 new String[] {"fo\ud800\udc00o"});
540 check("(?<=%b{1,4}?)fo\ud800\udc00o", "%bbbbfo\ud800\udc00o",
541 new String[] {"fo\ud800\udc00o"});
542 check("(?<!%b{1,4})fo\ud800\udc00o", "%afo\ud800\udc00o",
543 new String[] {"fo\ud800\udc00o"});
544 check("(?<!%b{1,4}?)fo\ud800\udc00o", "%afo\ud800\udc00o",
545 new String[] {"fo\ud800\udc00o"});
546 report("Lookbehind");
547 }
548
549 // This test is for 4938995
550 // Check to see if weak region boundaries are transparent to
551 // lookahead and lookbehind constructs
552 private static void boundsTest() throws Exception {
553 String fullMessage = "catdogcat";
554 Pattern pattern = Pattern.compile("(?<=cat)dog(?=cat)");
555 Matcher matcher = pattern.matcher("catdogca");
556 matcher.useTransparentBounds(true);
557 if (matcher.find())
558 failCount++;
559 matcher.reset("atdogcat");
560 if (matcher.find())
561 failCount++;
562 matcher.reset(fullMessage);
563 if (!matcher.find())
564 failCount++;
565 matcher.reset(fullMessage);
566 matcher.region(0,9);
567 if (!matcher.find())
568 failCount++;
569 matcher.reset(fullMessage);
570 matcher.region(0,6);
571 if (!matcher.find())
572 failCount++;
573 matcher.reset(fullMessage);
574 matcher.region(3,6);
575 if (!matcher.find())
576 failCount++;
577 matcher.useTransparentBounds(false);
578 if (matcher.find())
579 failCount++;
580
581 // Negative lookahead/lookbehind
582 pattern = Pattern.compile("(?<!cat)dog(?!cat)");
583 matcher = pattern.matcher("dogcat");
584 matcher.useTransparentBounds(true);
585 matcher.region(0,3);
586 if (matcher.find())
587 failCount++;
588 matcher.reset("catdog");
589 matcher.region(3,6);
590 if (matcher.find())
591 failCount++;
592 matcher.useTransparentBounds(false);
593 matcher.reset("dogcat");
594 matcher.region(0,3);
595 if (!matcher.find())
596 failCount++;
597 matcher.reset("catdog");
598 matcher.region(3,6);
599 if (!matcher.find())
600 failCount++;
601
602 report("Region bounds transparency");
603 }
604
605 // This test is for 4945394
606 private static void findFromTest() throws Exception {
607 String message = "This is 40 $0 message.";
608 Pattern pat = Pattern.compile("\\$0");
609 Matcher match = pat.matcher(message);
610 if (!match.find())
611 failCount++;
612 if (match.find())
613 failCount++;
614 if (match.find())
615 failCount++;
616 report("Check for alternating find");
617 }
618
619 // This test is for 4872664 and 4892980
620 private static void negatedCharClassTest() throws Exception {
621 Pattern pattern = Pattern.compile("[^>]");
622 Matcher matcher = pattern.matcher("\u203A");
623 if (!matcher.matches())
624 failCount++;
625 pattern = Pattern.compile("[^fr]");
626 matcher = pattern.matcher("a");
627 if (!matcher.find())
628 failCount++;
629 matcher.reset("\u203A");
630 if (!matcher.find())
631 failCount++;
632 String s = "for";
633 String result[] = s.split("[^fr]");
634 if (!result[0].equals("f"))
635 failCount++;
636 if (!result[1].equals("r"))
637 failCount++;
638 s = "f\u203Ar";
639 result = s.split("[^fr]");
640 if (!result[0].equals("f"))
641 failCount++;
642 if (!result[1].equals("r"))
643 failCount++;
644
645 // Test adding to bits, subtracting a node, then adding to bits again
646 pattern = Pattern.compile("[^f\u203Ar]");
647 matcher = pattern.matcher("a");
648 if (!matcher.find())
649 failCount++;
650 matcher.reset("f");
651 if (matcher.find())
652 failCount++;
653 matcher.reset("\u203A");
654 if (matcher.find())
655 failCount++;
656 matcher.reset("r");
657 if (matcher.find())
658 failCount++;
659 matcher.reset("\u203B");
660 if (!matcher.find())
661 failCount++;
662
663 // Test subtracting a node, adding to bits, subtracting again
664 pattern = Pattern.compile("[^\u203Ar\u203B]");
665 matcher = pattern.matcher("a");
666 if (!matcher.find())
667 failCount++;
668 matcher.reset("\u203A");
669 if (matcher.find())
670 failCount++;
671 matcher.reset("r");
672 if (matcher.find())
673 failCount++;
674 matcher.reset("\u203B");
675 if (matcher.find())
676 failCount++;
677 matcher.reset("\u203C");
678 if (!matcher.find())
679 failCount++;
680
681 report("Negated Character Class");
682 }
683
684 // This test is for 4628291
685 private static void toStringTest() throws Exception {
686 Pattern pattern = Pattern.compile("b+");
687 if (pattern.toString() != "b+")
688 failCount++;
689 Matcher matcher = pattern.matcher("aaabbbccc");
690 String matcherString = matcher.toString(); // unspecified
691 matcher.find();
692 matcherString = matcher.toString(); // unspecified
693 matcher.region(0,3);
694 matcherString = matcher.toString(); // unspecified
695 matcher.reset();
696 matcherString = matcher.toString(); // unspecified
697 report("toString");
698 }
699
700 // This test is for 4808962
701 private static void literalPatternTest() throws Exception {
702 int flags = Pattern.LITERAL;
703
704 Pattern pattern = Pattern.compile("abc\\t$^", flags);
705 check(pattern, "abc\\t$^", true);
706
707 pattern = Pattern.compile(Pattern.quote("abc\\t$^"));
708 check(pattern, "abc\\t$^", true);
709
710 pattern = Pattern.compile("\\Qa^$bcabc\\E", flags);
711 check(pattern, "\\Qa^$bcabc\\E", true);
712 check(pattern, "a^$bcabc", false);
713
714 pattern = Pattern.compile("\\\\Q\\\\E");
715 check(pattern, "\\Q\\E", true);
716
717 pattern = Pattern.compile("\\Qabc\\Eefg\\\\Q\\\\Ehij");
718 check(pattern, "abcefg\\Q\\Ehij", true);
719
720 pattern = Pattern.compile("\\\\\\Q\\\\E");
721 check(pattern, "\\\\\\\\", true);
722
723 pattern = Pattern.compile(Pattern.quote("\\Qa^$bcabc\\E"));
724 check(pattern, "\\Qa^$bcabc\\E", true);
725 check(pattern, "a^$bcabc", false);
726
727 pattern = Pattern.compile(Pattern.quote("\\Qabc\\Edef"));
728 check(pattern, "\\Qabc\\Edef", true);
729 check(pattern, "abcdef", false);
730
731 pattern = Pattern.compile(Pattern.quote("abc\\Edef"));
732 check(pattern, "abc\\Edef", true);
733 check(pattern, "abcdef", false);
734
735 pattern = Pattern.compile(Pattern.quote("\\E"));
736 check(pattern, "\\E", true);
737
738 pattern = Pattern.compile("((((abc.+?:)", flags);
739 check(pattern, "((((abc.+?:)", true);
740
741 flags |= Pattern.MULTILINE;
742
743 pattern = Pattern.compile("^cat$", flags);
744 check(pattern, "abc^cat$def", true);
745 check(pattern, "cat", false);
746
747 flags |= Pattern.CASE_INSENSITIVE;
748
749 pattern = Pattern.compile("abcdef", flags);
750 check(pattern, "ABCDEF", true);
751 check(pattern, "AbCdEf", true);
752
753 flags |= Pattern.DOTALL;
754
755 pattern = Pattern.compile("a...b", flags);
756 check(pattern, "A...b", true);
757 check(pattern, "Axxxb", false);
758
759 flags |= Pattern.CANON_EQ;
760
761 Pattern p = Pattern.compile("testa\u030a", flags);
762 check(pattern, "testa\u030a", false);
763 check(pattern, "test\u00e5", false);
764
765 // Supplementary character test
766 flags = Pattern.LITERAL;
767
768 pattern = Pattern.compile(toSupplementaries("abc\\t$^"), flags);
769 check(pattern, toSupplementaries("abc\\t$^"), true);
770
771 pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\t$^")));
772 check(pattern, toSupplementaries("abc\\t$^"), true);
773
774 pattern = Pattern.compile(toSupplementaries("\\Qa^$bcabc\\E"), flags);
775 check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true);
776 check(pattern, toSupplementaries("a^$bcabc"), false);
777
778 pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qa^$bcabc\\E")));
779 check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true);
780 check(pattern, toSupplementaries("a^$bcabc"), false);
781
782 pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qabc\\Edef")));
783 check(pattern, toSupplementaries("\\Qabc\\Edef"), true);
784 check(pattern, toSupplementaries("abcdef"), false);
785
786 pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\Edef")));
787 check(pattern, toSupplementaries("abc\\Edef"), true);
788 check(pattern, toSupplementaries("abcdef"), false);
789
790 pattern = Pattern.compile(toSupplementaries("((((abc.+?:)"), flags);
791 check(pattern, toSupplementaries("((((abc.+?:)"), true);
792
793 flags |= Pattern.MULTILINE;
794
795 pattern = Pattern.compile(toSupplementaries("^cat$"), flags);
796 check(pattern, toSupplementaries("abc^cat$def"), true);
797 check(pattern, toSupplementaries("cat"), false);
798
799 flags |= Pattern.DOTALL;
800
801 // note: this is case-sensitive.
802 pattern = Pattern.compile(toSupplementaries("a...b"), flags);
803 check(pattern, toSupplementaries("a...b"), true);
804 check(pattern, toSupplementaries("axxxb"), false);
805
806 flags |= Pattern.CANON_EQ;
807
808 String t = toSupplementaries("test");
809 p = Pattern.compile(t + "a\u030a", flags);
810 check(pattern, t + "a\u030a", false);
811 check(pattern, t + "\u00e5", false);
812
813 report("Literal pattern");
814 }
815
816 // This test is for 4803179
817 // This test is also for 4808962, replacement parts
818 private static void literalReplacementTest() throws Exception {
819 int flags = Pattern.LITERAL;
820
821 Pattern pattern = Pattern.compile("abc", flags);
822 Matcher matcher = pattern.matcher("zzzabczzz");
823 String replaceTest = "$0";
824 String result = matcher.replaceAll(replaceTest);
825 if (!result.equals("zzzabczzz"))
826 failCount++;
827
828 matcher.reset();
829 String literalReplacement = matcher.quoteReplacement(replaceTest);
830 result = matcher.replaceAll(literalReplacement);
831 if (!result.equals("zzz$0zzz"))
832 failCount++;
833
834 matcher.reset();
835 replaceTest = "\\t$\\$";
836 literalReplacement = matcher.quoteReplacement(replaceTest);
837 result = matcher.replaceAll(literalReplacement);
838 if (!result.equals("zzz\\t$\\$zzz"))
839 failCount++;
840
841 // Supplementary character test
842 pattern = Pattern.compile(toSupplementaries("abc"), flags);
843 matcher = pattern.matcher(toSupplementaries("zzzabczzz"));
844 replaceTest = "$0";
845 result = matcher.replaceAll(replaceTest);
846 if (!result.equals(toSupplementaries("zzzabczzz")))
847 failCount++;
848
849 matcher.reset();
850 literalReplacement = matcher.quoteReplacement(replaceTest);
851 result = matcher.replaceAll(literalReplacement);
852 if (!result.equals(toSupplementaries("zzz$0zzz")))
853 failCount++;
854
855 matcher.reset();
856 replaceTest = "\\t$\\$";
857 literalReplacement = matcher.quoteReplacement(replaceTest);
858 result = matcher.replaceAll(literalReplacement);
859 if (!result.equals(toSupplementaries("zzz\\t$\\$zzz")))
860 failCount++;
861
sherman5c8f3492012-04-12 15:01:41 -0700862 // IAE should be thrown if backslash or '$' is the last character
863 // in replacement string
864 try {
865 "\uac00".replaceAll("\uac00", "$");
shermanecb65472012-05-08 10:57:13 -0700866 failCount++;
867 } catch (IllegalArgumentException iie) {
868 } catch (Exception e) {
869 failCount++;
870 }
871 try {
sherman5c8f3492012-04-12 15:01:41 -0700872 "\uac00".replaceAll("\uac00", "\\");
873 failCount++;
874 } catch (IllegalArgumentException iie) {
875 } catch (Exception e) {
876 failCount++;
877 }
sherman0b4d42d2009-02-23 21:06:15 -0800878 report("Literal replacement");
879 }
880
881 // This test is for 4757029
882 private static void regionTest() throws Exception {
883 Pattern pattern = Pattern.compile("abc");
884 Matcher matcher = pattern.matcher("abcdefabc");
885
886 matcher.region(0,9);
887 if (!matcher.find())
888 failCount++;
889 if (!matcher.find())
890 failCount++;
891 matcher.region(0,3);
892 if (!matcher.find())
893 failCount++;
894 matcher.region(3,6);
895 if (matcher.find())
896 failCount++;
897 matcher.region(0,2);
898 if (matcher.find())
899 failCount++;
900
901 expectRegionFail(matcher, 1, -1);
902 expectRegionFail(matcher, -1, -1);
903 expectRegionFail(matcher, -1, 1);
904 expectRegionFail(matcher, 5, 3);
905 expectRegionFail(matcher, 5, 12);
906 expectRegionFail(matcher, 12, 12);
907
908 pattern = Pattern.compile("^abc$");
909 matcher = pattern.matcher("zzzabczzz");
910 matcher.region(0,9);
911 if (matcher.find())
912 failCount++;
913 matcher.region(3,6);
914 if (!matcher.find())
915 failCount++;
916 matcher.region(3,6);
917 matcher.useAnchoringBounds(false);
918 if (matcher.find())
919 failCount++;
920
921 // Supplementary character test
922 pattern = Pattern.compile(toSupplementaries("abc"));
923 matcher = pattern.matcher(toSupplementaries("abcdefabc"));
924 matcher.region(0,9*2);
925 if (!matcher.find())
926 failCount++;
927 if (!matcher.find())
928 failCount++;
929 matcher.region(0,3*2);
930 if (!matcher.find())
931 failCount++;
932 matcher.region(1,3*2);
933 if (matcher.find())
934 failCount++;
935 matcher.region(3*2,6*2);
936 if (matcher.find())
937 failCount++;
938 matcher.region(0,2*2);
939 if (matcher.find())
940 failCount++;
941 matcher.region(0,2*2+1);
942 if (matcher.find())
943 failCount++;
944
945 expectRegionFail(matcher, 1*2, -1);
946 expectRegionFail(matcher, -1, -1);
947 expectRegionFail(matcher, -1, 1*2);
948 expectRegionFail(matcher, 5*2, 3*2);
949 expectRegionFail(matcher, 5*2, 12*2);
950 expectRegionFail(matcher, 12*2, 12*2);
951
952 pattern = Pattern.compile(toSupplementaries("^abc$"));
953 matcher = pattern.matcher(toSupplementaries("zzzabczzz"));
954 matcher.region(0,9*2);
955 if (matcher.find())
956 failCount++;
957 matcher.region(3*2,6*2);
958 if (!matcher.find())
959 failCount++;
960 matcher.region(3*2+1,6*2);
961 if (matcher.find())
962 failCount++;
963 matcher.region(3*2,6*2-1);
964 if (matcher.find())
965 failCount++;
966 matcher.region(3*2,6*2);
967 matcher.useAnchoringBounds(false);
968 if (matcher.find())
969 failCount++;
970 report("Regions");
971 }
972
973 private static void expectRegionFail(Matcher matcher, int index1,
974 int index2)
975 {
976 try {
977 matcher.region(index1, index2);
978 failCount++;
979 } catch (IndexOutOfBoundsException ioobe) {
980 // Correct result
981 } catch (IllegalStateException ise) {
982 // Correct result
983 }
984 }
985
986 // This test is for 4803197
987 private static void escapedSegmentTest() throws Exception {
988
989 Pattern pattern = Pattern.compile("\\Qdir1\\dir2\\E");
990 check(pattern, "dir1\\dir2", true);
991
992 pattern = Pattern.compile("\\Qdir1\\dir2\\\\E");
993 check(pattern, "dir1\\dir2\\", true);
994
995 pattern = Pattern.compile("(\\Qdir1\\dir2\\\\E)");
996 check(pattern, "dir1\\dir2\\", true);
997
998 // Supplementary character test
999 pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2\\E"));
1000 check(pattern, toSupplementaries("dir1\\dir2"), true);
1001
1002 pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2")+"\\\\E");
1003 check(pattern, toSupplementaries("dir1\\dir2\\"), true);
1004
1005 pattern = Pattern.compile(toSupplementaries("(\\Qdir1\\dir2")+"\\\\E)");
1006 check(pattern, toSupplementaries("dir1\\dir2\\"), true);
1007
1008 report("Escaped segment");
1009 }
1010
1011 // This test is for 4792284
1012 private static void nonCaptureRepetitionTest() throws Exception {
1013 String input = "abcdefgh;";
1014
1015 String[] patterns = new String[] {
1016 "(?:\\w{4})+;",
1017 "(?:\\w{8})*;",
1018 "(?:\\w{2}){2,4};",
1019 "(?:\\w{4}){2,};", // only matches the
1020 ".*?(?:\\w{5})+;", // specified minimum
1021 ".*?(?:\\w{9})*;", // number of reps - OK
1022 "(?:\\w{4})+?;", // lazy repetition - OK
1023 "(?:\\w{4})++;", // possessive repetition - OK
1024 "(?:\\w{2,}?)+;", // non-deterministic - OK
1025 "(\\w{4})+;", // capturing group - OK
1026 };
1027
1028 for (int i = 0; i < patterns.length; i++) {
1029 // Check find()
1030 check(patterns[i], 0, input, input, true);
1031 // Check matches()
1032 Pattern p = Pattern.compile(patterns[i]);
1033 Matcher m = p.matcher(input);
1034
1035 if (m.matches()) {
1036 if (!m.group(0).equals(input))
1037 failCount++;
1038 } else {
1039 failCount++;
1040 }
1041 }
1042
1043 report("Non capturing repetition");
1044 }
1045
1046 // This test is for 6358731
1047 private static void notCapturedGroupCurlyMatchTest() throws Exception {
1048 Pattern pattern = Pattern.compile("(abc)+|(abcd)+");
1049 Matcher matcher = pattern.matcher("abcd");
1050 if (!matcher.matches() ||
1051 matcher.group(1) != null ||
1052 !matcher.group(2).equals("abcd")) {
1053 failCount++;
1054 }
1055 report("Not captured GroupCurly");
1056 }
1057
1058 // This test is for 4706545
1059 private static void javaCharClassTest() throws Exception {
1060 for (int i=0; i<1000; i++) {
1061 char c = (char)generator.nextInt();
1062 check("{javaLowerCase}", c, Character.isLowerCase(c));
1063 check("{javaUpperCase}", c, Character.isUpperCase(c));
1064 check("{javaUpperCase}+", c, Character.isUpperCase(c));
1065 check("{javaTitleCase}", c, Character.isTitleCase(c));
1066 check("{javaDigit}", c, Character.isDigit(c));
1067 check("{javaDefined}", c, Character.isDefined(c));
1068 check("{javaLetter}", c, Character.isLetter(c));
1069 check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c));
1070 check("{javaJavaIdentifierStart}", c,
1071 Character.isJavaIdentifierStart(c));
1072 check("{javaJavaIdentifierPart}", c,
1073 Character.isJavaIdentifierPart(c));
1074 check("{javaUnicodeIdentifierStart}", c,
1075 Character.isUnicodeIdentifierStart(c));
1076 check("{javaUnicodeIdentifierPart}", c,
1077 Character.isUnicodeIdentifierPart(c));
1078 check("{javaIdentifierIgnorable}", c,
1079 Character.isIdentifierIgnorable(c));
1080 check("{javaSpaceChar}", c, Character.isSpaceChar(c));
1081 check("{javaWhitespace}", c, Character.isWhitespace(c));
1082 check("{javaISOControl}", c, Character.isISOControl(c));
1083 check("{javaMirrored}", c, Character.isMirrored(c));
1084
1085 }
1086
1087 // Supplementary character test
1088 for (int i=0; i<1000; i++) {
1089 int c = generator.nextInt(Character.MAX_CODE_POINT
1090 - Character.MIN_SUPPLEMENTARY_CODE_POINT)
1091 + Character.MIN_SUPPLEMENTARY_CODE_POINT;
1092 check("{javaLowerCase}", c, Character.isLowerCase(c));
1093 check("{javaUpperCase}", c, Character.isUpperCase(c));
1094 check("{javaUpperCase}+", c, Character.isUpperCase(c));
1095 check("{javaTitleCase}", c, Character.isTitleCase(c));
1096 check("{javaDigit}", c, Character.isDigit(c));
1097 check("{javaDefined}", c, Character.isDefined(c));
1098 check("{javaLetter}", c, Character.isLetter(c));
1099 check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c));
1100 check("{javaJavaIdentifierStart}", c,
1101 Character.isJavaIdentifierStart(c));
1102 check("{javaJavaIdentifierPart}", c,
1103 Character.isJavaIdentifierPart(c));
1104 check("{javaUnicodeIdentifierStart}", c,
1105 Character.isUnicodeIdentifierStart(c));
1106 check("{javaUnicodeIdentifierPart}", c,
1107 Character.isUnicodeIdentifierPart(c));
1108 check("{javaIdentifierIgnorable}", c,
1109 Character.isIdentifierIgnorable(c));
1110 check("{javaSpaceChar}", c, Character.isSpaceChar(c));
1111 check("{javaWhitespace}", c, Character.isWhitespace(c));
1112 check("{javaISOControl}", c, Character.isISOControl(c));
1113 check("{javaMirrored}", c, Character.isMirrored(c));
1114 }
1115
1116 report("Java character classes");
1117 }
1118
1119 // This test is for 4523620
1120 /*
1121 private static void numOccurrencesTest() throws Exception {
1122 Pattern pattern = Pattern.compile("aaa");
1123
1124 if (pattern.numOccurrences("aaaaaa", false) != 2)
1125 failCount++;
1126 if (pattern.numOccurrences("aaaaaa", true) != 4)
1127 failCount++;
1128
1129 pattern = Pattern.compile("^");
1130 if (pattern.numOccurrences("aaaaaa", false) != 1)
1131 failCount++;
1132 if (pattern.numOccurrences("aaaaaa", true) != 1)
1133 failCount++;
1134
1135 report("Number of Occurrences");
1136 }
1137 */
1138
1139 // This test is for 4776374
1140 private static void caretBetweenTerminatorsTest() throws Exception {
1141 int flags1 = Pattern.DOTALL;
1142 int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES;
1143 int flags3 = Pattern.DOTALL | Pattern.UNIX_LINES | Pattern.MULTILINE;
1144 int flags4 = Pattern.DOTALL | Pattern.MULTILINE;
1145
1146 check("^....", flags1, "test\ntest", "test", true);
1147 check(".....^", flags1, "test\ntest", "test", false);
1148 check(".....^", flags1, "test\n", "test", false);
1149 check("....^", flags1, "test\r\n", "test", false);
1150
1151 check("^....", flags2, "test\ntest", "test", true);
1152 check("....^", flags2, "test\ntest", "test", false);
1153 check(".....^", flags2, "test\n", "test", false);
1154 check("....^", flags2, "test\r\n", "test", false);
1155
1156 check("^....", flags3, "test\ntest", "test", true);
1157 check(".....^", flags3, "test\ntest", "test\n", true);
1158 check(".....^", flags3, "test\u0085test", "test\u0085", false);
1159 check(".....^", flags3, "test\n", "test", false);
1160 check(".....^", flags3, "test\r\n", "test", false);
1161 check("......^", flags3, "test\r\ntest", "test\r\n", true);
1162
1163 check("^....", flags4, "test\ntest", "test", true);
1164 check(".....^", flags3, "test\ntest", "test\n", true);
1165 check(".....^", flags4, "test\u0085test", "test\u0085", true);
1166 check(".....^", flags4, "test\n", "test\n", false);
1167 check(".....^", flags4, "test\r\n", "test\r", false);
1168
1169 // Supplementary character test
1170 String t = toSupplementaries("test");
1171 check("^....", flags1, t+"\n"+t, t, true);
1172 check(".....^", flags1, t+"\n"+t, t, false);
1173 check(".....^", flags1, t+"\n", t, false);
1174 check("....^", flags1, t+"\r\n", t, false);
1175
1176 check("^....", flags2, t+"\n"+t, t, true);
1177 check("....^", flags2, t+"\n"+t, t, false);
1178 check(".....^", flags2, t+"\n", t, false);
1179 check("....^", flags2, t+"\r\n", t, false);
1180
1181 check("^....", flags3, t+"\n"+t, t, true);
1182 check(".....^", flags3, t+"\n"+t, t+"\n", true);
1183 check(".....^", flags3, t+"\u0085"+t, t+"\u0085", false);
1184 check(".....^", flags3, t+"\n", t, false);
1185 check(".....^", flags3, t+"\r\n", t, false);
1186 check("......^", flags3, t+"\r\n"+t, t+"\r\n", true);
1187
1188 check("^....", flags4, t+"\n"+t, t, true);
1189 check(".....^", flags3, t+"\n"+t, t+"\n", true);
1190 check(".....^", flags4, t+"\u0085"+t, t+"\u0085", true);
1191 check(".....^", flags4, t+"\n", t+"\n", false);
1192 check(".....^", flags4, t+"\r\n", t+"\r", false);
1193
1194 report("Caret between terminators");
1195 }
1196
1197 // This test is for 4727935
1198 private static void dollarAtEndTest() throws Exception {
1199 int flags1 = Pattern.DOTALL;
1200 int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES;
1201 int flags3 = Pattern.DOTALL | Pattern.MULTILINE;
1202
1203 check("....$", flags1, "test\n", "test", true);
1204 check("....$", flags1, "test\r\n", "test", true);
1205 check(".....$", flags1, "test\n", "test\n", true);
1206 check(".....$", flags1, "test\u0085", "test\u0085", true);
1207 check("....$", flags1, "test\u0085", "test", true);
1208
1209 check("....$", flags2, "test\n", "test", true);
1210 check(".....$", flags2, "test\n", "test\n", true);
1211 check(".....$", flags2, "test\u0085", "test\u0085", true);
1212 check("....$", flags2, "test\u0085", "est\u0085", true);
1213
1214 check("....$.blah", flags3, "test\nblah", "test\nblah", true);
1215 check(".....$.blah", flags3, "test\n\nblah", "test\n\nblah", true);
1216 check("....$blah", flags3, "test\nblah", "!!!!", false);
1217 check(".....$blah", flags3, "test\nblah", "!!!!", false);
1218
1219 // Supplementary character test
1220 String t = toSupplementaries("test");
1221 String b = toSupplementaries("blah");
1222 check("....$", flags1, t+"\n", t, true);
1223 check("....$", flags1, t+"\r\n", t, true);
1224 check(".....$", flags1, t+"\n", t+"\n", true);
1225 check(".....$", flags1, t+"\u0085", t+"\u0085", true);
1226 check("....$", flags1, t+"\u0085", t, true);
1227
1228 check("....$", flags2, t+"\n", t, true);
1229 check(".....$", flags2, t+"\n", t+"\n", true);
1230 check(".....$", flags2, t+"\u0085", t+"\u0085", true);
1231 check("....$", flags2, t+"\u0085", toSupplementaries("est\u0085"), true);
1232
1233 check("....$."+b, flags3, t+"\n"+b, t+"\n"+b, true);
1234 check(".....$."+b, flags3, t+"\n\n"+b, t+"\n\n"+b, true);
1235 check("....$"+b, flags3, t+"\n"+b, "!!!!", false);
1236 check(".....$"+b, flags3, t+"\n"+b, "!!!!", false);
1237
1238 report("Dollar at End");
1239 }
1240
1241 // This test is for 4711773
1242 private static void multilineDollarTest() throws Exception {
1243 Pattern findCR = Pattern.compile("$", Pattern.MULTILINE);
1244 Matcher matcher = findCR.matcher("first bit\nsecond bit");
1245 matcher.find();
1246 if (matcher.start(0) != 9)
1247 failCount++;
1248 matcher.find();
1249 if (matcher.start(0) != 20)
1250 failCount++;
1251
1252 // Supplementary character test
1253 matcher = findCR.matcher(toSupplementaries("first bit\n second bit")); // double BMP chars
1254 matcher.find();
1255 if (matcher.start(0) != 9*2)
1256 failCount++;
1257 matcher.find();
1258 if (matcher.start(0) != 20*2)
1259 failCount++;
1260
1261 report("Multiline Dollar");
1262 }
1263
1264 private static void reluctantRepetitionTest() throws Exception {
1265 Pattern p = Pattern.compile("1(\\s\\S+?){1,3}?[\\s,]2");
1266 check(p, "1 word word word 2", true);
1267 check(p, "1 wor wo w 2", true);
1268 check(p, "1 word word 2", true);
1269 check(p, "1 word 2", true);
1270 check(p, "1 wo w w 2", true);
1271 check(p, "1 wo w 2", true);
1272 check(p, "1 wor w 2", true);
1273
1274 p = Pattern.compile("([a-z])+?c");
1275 Matcher m = p.matcher("ababcdefdec");
1276 check(m, "ababc");
1277
1278 // Supplementary character test
1279 p = Pattern.compile(toSupplementaries("([a-z])+?c"));
1280 m = p.matcher(toSupplementaries("ababcdefdec"));
1281 check(m, toSupplementaries("ababc"));
1282
1283 report("Reluctant Repetition");
1284 }
1285
1286 private static void serializeTest() throws Exception {
1287 String patternStr = "(b)";
1288 String matchStr = "b";
1289 Pattern pattern = Pattern.compile(patternStr);
1290 ByteArrayOutputStream baos = new ByteArrayOutputStream();
1291 ObjectOutputStream oos = new ObjectOutputStream(baos);
1292 oos.writeObject(pattern);
1293 oos.close();
1294 ObjectInputStream ois = new ObjectInputStream(
1295 new ByteArrayInputStream(baos.toByteArray()));
1296 Pattern serializedPattern = (Pattern)ois.readObject();
1297 ois.close();
1298 Matcher matcher = serializedPattern.matcher(matchStr);
1299 if (!matcher.matches())
1300 failCount++;
1301 if (matcher.groupCount() != 1)
1302 failCount++;
1303
1304 report("Serialization");
1305 }
1306
1307 private static void gTest() {
1308 Pattern pattern = Pattern.compile("\\G\\w");
1309 Matcher matcher = pattern.matcher("abc#x#x");
1310 matcher.find();
1311 matcher.find();
1312 matcher.find();
1313 if (matcher.find())
1314 failCount++;
1315
1316 pattern = Pattern.compile("\\GA*");
1317 matcher = pattern.matcher("1A2AA3");
1318 matcher.find();
1319 if (matcher.find())
1320 failCount++;
1321
1322 pattern = Pattern.compile("\\GA*");
1323 matcher = pattern.matcher("1A2AA3");
1324 if (!matcher.find(1))
1325 failCount++;
1326 matcher.find();
1327 if (matcher.find())
1328 failCount++;
1329
1330 report("\\G");
1331 }
1332
1333 private static void zTest() {
1334 Pattern pattern = Pattern.compile("foo\\Z");
1335 // Positives
1336 check(pattern, "foo\u0085", true);
1337 check(pattern, "foo\u2028", true);
1338 check(pattern, "foo\u2029", true);
1339 check(pattern, "foo\n", true);
1340 check(pattern, "foo\r", true);
1341 check(pattern, "foo\r\n", true);
1342 // Negatives
1343 check(pattern, "fooo", false);
1344 check(pattern, "foo\n\r", false);
1345
1346 pattern = Pattern.compile("foo\\Z", Pattern.UNIX_LINES);
1347 // Positives
1348 check(pattern, "foo", true);
1349 check(pattern, "foo\n", true);
1350 // Negatives
1351 check(pattern, "foo\r", false);
1352 check(pattern, "foo\u0085", false);
1353 check(pattern, "foo\u2028", false);
1354 check(pattern, "foo\u2029", false);
1355
1356 report("\\Z");
1357 }
1358
1359 private static void replaceFirstTest() {
1360 Pattern pattern = Pattern.compile("(ab)(c*)");
1361 Matcher matcher = pattern.matcher("abccczzzabcczzzabccc");
1362 if (!matcher.replaceFirst("test").equals("testzzzabcczzzabccc"))
1363 failCount++;
1364
1365 matcher.reset("zzzabccczzzabcczzzabccczzz");
1366 if (!matcher.replaceFirst("test").equals("zzztestzzzabcczzzabccczzz"))
1367 failCount++;
1368
1369 matcher.reset("zzzabccczzzabcczzzabccczzz");
1370 String result = matcher.replaceFirst("$1");
1371 if (!result.equals("zzzabzzzabcczzzabccczzz"))
1372 failCount++;
1373
1374 matcher.reset("zzzabccczzzabcczzzabccczzz");
1375 result = matcher.replaceFirst("$2");
1376 if (!result.equals("zzzccczzzabcczzzabccczzz"))
1377 failCount++;
1378
1379 pattern = Pattern.compile("a*");
1380 matcher = pattern.matcher("aaaaaaaaaa");
1381 if (!matcher.replaceFirst("test").equals("test"))
1382 failCount++;
1383
1384 pattern = Pattern.compile("a+");
1385 matcher = pattern.matcher("zzzaaaaaaaaaa");
1386 if (!matcher.replaceFirst("test").equals("zzztest"))
1387 failCount++;
1388
1389 // Supplementary character test
1390 pattern = Pattern.compile(toSupplementaries("(ab)(c*)"));
1391 matcher = pattern.matcher(toSupplementaries("abccczzzabcczzzabccc"));
1392 if (!matcher.replaceFirst(toSupplementaries("test"))
1393 .equals(toSupplementaries("testzzzabcczzzabccc")))
1394 failCount++;
1395
1396 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
1397 if (!matcher.replaceFirst(toSupplementaries("test")).
1398 equals(toSupplementaries("zzztestzzzabcczzzabccczzz")))
1399 failCount++;
1400
1401 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
1402 result = matcher.replaceFirst("$1");
1403 if (!result.equals(toSupplementaries("zzzabzzzabcczzzabccczzz")))
1404 failCount++;
1405
1406 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
1407 result = matcher.replaceFirst("$2");
1408 if (!result.equals(toSupplementaries("zzzccczzzabcczzzabccczzz")))
1409 failCount++;
1410
1411 pattern = Pattern.compile(toSupplementaries("a*"));
1412 matcher = pattern.matcher(toSupplementaries("aaaaaaaaaa"));
1413 if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("test")))
1414 failCount++;
1415
1416 pattern = Pattern.compile(toSupplementaries("a+"));
1417 matcher = pattern.matcher(toSupplementaries("zzzaaaaaaaaaa"));
1418 if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("zzztest")))
1419 failCount++;
1420
1421 report("Replace First");
1422 }
1423
1424 private static void unixLinesTest() {
1425 Pattern pattern = Pattern.compile(".*");
1426 Matcher matcher = pattern.matcher("aa\u2028blah");
1427 matcher.find();
1428 if (!matcher.group(0).equals("aa"))
1429 failCount++;
1430
1431 pattern = Pattern.compile(".*", Pattern.UNIX_LINES);
1432 matcher = pattern.matcher("aa\u2028blah");
1433 matcher.find();
1434 if (!matcher.group(0).equals("aa\u2028blah"))
1435 failCount++;
1436
1437 pattern = Pattern.compile("[az]$",
1438 Pattern.MULTILINE | Pattern.UNIX_LINES);
1439 matcher = pattern.matcher("aa\u2028zz");
1440 check(matcher, "a\u2028", false);
1441
1442 // Supplementary character test
1443 pattern = Pattern.compile(".*");
1444 matcher = pattern.matcher(toSupplementaries("aa\u2028blah"));
1445 matcher.find();
1446 if (!matcher.group(0).equals(toSupplementaries("aa")))
1447 failCount++;
1448
1449 pattern = Pattern.compile(".*", Pattern.UNIX_LINES);
1450 matcher = pattern.matcher(toSupplementaries("aa\u2028blah"));
1451 matcher.find();
1452 if (!matcher.group(0).equals(toSupplementaries("aa\u2028blah")))
1453 failCount++;
1454
1455 pattern = Pattern.compile(toSupplementaries("[az]$"),
1456 Pattern.MULTILINE | Pattern.UNIX_LINES);
1457 matcher = pattern.matcher(toSupplementaries("aa\u2028zz"));
1458 check(matcher, toSupplementaries("a\u2028"), false);
1459
1460 report("Unix Lines");
1461 }
1462
1463 private static void commentsTest() {
1464 int flags = Pattern.COMMENTS;
1465
1466 Pattern pattern = Pattern.compile("aa \\# aa", flags);
1467 Matcher matcher = pattern.matcher("aa#aa");
1468 if (!matcher.matches())
1469 failCount++;
1470
1471 pattern = Pattern.compile("aa # blah", flags);
1472 matcher = pattern.matcher("aa");
1473 if (!matcher.matches())
1474 failCount++;
1475
1476 pattern = Pattern.compile("aa blah", flags);
1477 matcher = pattern.matcher("aablah");
1478 if (!matcher.matches())
1479 failCount++;
1480
1481 pattern = Pattern.compile("aa # blah blech ", flags);
1482 matcher = pattern.matcher("aa");
1483 if (!matcher.matches())
1484 failCount++;
1485
1486 pattern = Pattern.compile("aa # blah\n ", flags);
1487 matcher = pattern.matcher("aa");
1488 if (!matcher.matches())
1489 failCount++;
1490
1491 pattern = Pattern.compile("aa # blah\nbc # blech", flags);
1492 matcher = pattern.matcher("aabc");
1493 if (!matcher.matches())
1494 failCount++;
1495
1496 pattern = Pattern.compile("aa # blah\nbc# blech", flags);
1497 matcher = pattern.matcher("aabc");
1498 if (!matcher.matches())
1499 failCount++;
1500
1501 pattern = Pattern.compile("aa # blah\nbc\\# blech", flags);
1502 matcher = pattern.matcher("aabc#blech");
1503 if (!matcher.matches())
1504 failCount++;
1505
1506 // Supplementary character test
1507 pattern = Pattern.compile(toSupplementaries("aa \\# aa"), flags);
1508 matcher = pattern.matcher(toSupplementaries("aa#aa"));
1509 if (!matcher.matches())
1510 failCount++;
1511
1512 pattern = Pattern.compile(toSupplementaries("aa # blah"), flags);
1513 matcher = pattern.matcher(toSupplementaries("aa"));
1514 if (!matcher.matches())
1515 failCount++;
1516
1517 pattern = Pattern.compile(toSupplementaries("aa blah"), flags);
1518 matcher = pattern.matcher(toSupplementaries("aablah"));
1519 if (!matcher.matches())
1520 failCount++;
1521
1522 pattern = Pattern.compile(toSupplementaries("aa # blah blech "), flags);
1523 matcher = pattern.matcher(toSupplementaries("aa"));
1524 if (!matcher.matches())
1525 failCount++;
1526
1527 pattern = Pattern.compile(toSupplementaries("aa # blah\n "), flags);
1528 matcher = pattern.matcher(toSupplementaries("aa"));
1529 if (!matcher.matches())
1530 failCount++;
1531
1532 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc # blech"), flags);
1533 matcher = pattern.matcher(toSupplementaries("aabc"));
1534 if (!matcher.matches())
1535 failCount++;
1536
1537 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc# blech"), flags);
1538 matcher = pattern.matcher(toSupplementaries("aabc"));
1539 if (!matcher.matches())
1540 failCount++;
1541
1542 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc\\# blech"), flags);
1543 matcher = pattern.matcher(toSupplementaries("aabc#blech"));
1544 if (!matcher.matches())
1545 failCount++;
1546
1547 report("Comments");
1548 }
1549
1550 private static void caseFoldingTest() { // bug 4504687
1551 int flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE;
1552 Pattern pattern = Pattern.compile("aa", flags);
1553 Matcher matcher = pattern.matcher("ab");
1554 if (matcher.matches())
1555 failCount++;
1556
1557 pattern = Pattern.compile("aA", flags);
1558 matcher = pattern.matcher("ab");
1559 if (matcher.matches())
1560 failCount++;
1561
1562 pattern = Pattern.compile("aa", flags);
1563 matcher = pattern.matcher("aB");
1564 if (matcher.matches())
1565 failCount++;
1566 matcher = pattern.matcher("Ab");
1567 if (matcher.matches())
1568 failCount++;
1569
1570 // ASCII "a"
1571 // Latin-1 Supplement "a" + grave
1572 // Cyrillic "a"
1573 String[] patterns = new String[] {
1574 //single
1575 "a", "\u00e0", "\u0430",
1576 //slice
1577 "ab", "\u00e0\u00e1", "\u0430\u0431",
1578 //class single
1579 "[a]", "[\u00e0]", "[\u0430]",
1580 //class range
1581 "[a-b]", "[\u00e0-\u00e5]", "[\u0430-\u0431]",
1582 //back reference
1583 "(a)\\1", "(\u00e0)\\1", "(\u0430)\\1"
1584 };
1585
1586 String[] texts = new String[] {
1587 "A", "\u00c0", "\u0410",
1588 "AB", "\u00c0\u00c1", "\u0410\u0411",
1589 "A", "\u00c0", "\u0410",
1590 "B", "\u00c2", "\u0411",
1591 "aA", "\u00e0\u00c0", "\u0430\u0410"
1592 };
1593
1594 boolean[] expected = new boolean[] {
1595 true, false, false,
1596 true, false, false,
1597 true, false, false,
1598 true, false, false,
1599 true, false, false
1600 };
1601
1602 flags = Pattern.CASE_INSENSITIVE;
1603 for (int i = 0; i < patterns.length; i++) {
1604 pattern = Pattern.compile(patterns[i], flags);
1605 matcher = pattern.matcher(texts[i]);
1606 if (matcher.matches() != expected[i]) {
1607 System.out.println("<1> Failed at " + i);
1608 failCount++;
1609 }
1610 }
1611
1612 flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE;
1613 for (int i = 0; i < patterns.length; i++) {
1614 pattern = Pattern.compile(patterns[i], flags);
1615 matcher = pattern.matcher(texts[i]);
1616 if (!matcher.matches()) {
1617 System.out.println("<2> Failed at " + i);
1618 failCount++;
1619 }
1620 }
1621 // flag unicode_case alone should do nothing
1622 flags = Pattern.UNICODE_CASE;
1623 for (int i = 0; i < patterns.length; i++) {
1624 pattern = Pattern.compile(patterns[i], flags);
1625 matcher = pattern.matcher(texts[i]);
1626 if (matcher.matches()) {
1627 System.out.println("<3> Failed at " + i);
1628 failCount++;
1629 }
1630 }
1631
1632 // Special cases: i, I, u+0131 and u+0130
1633 flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE;
1634 pattern = Pattern.compile("[h-j]+", flags);
1635 if (!pattern.matcher("\u0131\u0130").matches())
1636 failCount++;
1637 report("Case Folding");
1638 }
1639
1640 private static void appendTest() {
1641 Pattern pattern = Pattern.compile("(ab)(cd)");
1642 Matcher matcher = pattern.matcher("abcd");
1643 String result = matcher.replaceAll("$2$1");
1644 if (!result.equals("cdab"))
1645 failCount++;
1646
1647 String s1 = "Swap all: first = 123, second = 456";
1648 String s2 = "Swap one: first = 123, second = 456";
1649 String r = "$3$2$1";
1650 pattern = Pattern.compile("([a-z]+)( *= *)([0-9]+)");
1651 matcher = pattern.matcher(s1);
1652
1653 result = matcher.replaceAll(r);
1654 if (!result.equals("Swap all: 123 = first, 456 = second"))
1655 failCount++;
1656
1657 matcher = pattern.matcher(s2);
1658
1659 if (matcher.find()) {
1660 StringBuffer sb = new StringBuffer();
1661 matcher.appendReplacement(sb, r);
1662 matcher.appendTail(sb);
1663 result = sb.toString();
1664 if (!result.equals("Swap one: 123 = first, second = 456"))
1665 failCount++;
1666 }
1667
1668 // Supplementary character test
1669 pattern = Pattern.compile(toSupplementaries("(ab)(cd)"));
1670 matcher = pattern.matcher(toSupplementaries("abcd"));
1671 result = matcher.replaceAll("$2$1");
1672 if (!result.equals(toSupplementaries("cdab")))
1673 failCount++;
1674
1675 s1 = toSupplementaries("Swap all: first = 123, second = 456");
1676 s2 = toSupplementaries("Swap one: first = 123, second = 456");
1677 r = toSupplementaries("$3$2$1");
1678 pattern = Pattern.compile(toSupplementaries("([a-z]+)( *= *)([0-9]+)"));
1679 matcher = pattern.matcher(s1);
1680
1681 result = matcher.replaceAll(r);
1682 if (!result.equals(toSupplementaries("Swap all: 123 = first, 456 = second")))
1683 failCount++;
1684
1685 matcher = pattern.matcher(s2);
1686
1687 if (matcher.find()) {
1688 StringBuffer sb = new StringBuffer();
1689 matcher.appendReplacement(sb, r);
1690 matcher.appendTail(sb);
1691 result = sb.toString();
1692 if (!result.equals(toSupplementaries("Swap one: 123 = first, second = 456")))
1693 failCount++;
1694 }
1695 report("Append");
1696 }
1697
1698 private static void splitTest() {
1699 Pattern pattern = Pattern.compile(":");
1700 String[] result = pattern.split("foo:and:boo", 2);
1701 if (!result[0].equals("foo"))
1702 failCount++;
1703 if (!result[1].equals("and:boo"))
1704 failCount++;
1705 // Supplementary character test
1706 Pattern patternX = Pattern.compile(toSupplementaries("X"));
1707 result = patternX.split(toSupplementaries("fooXandXboo"), 2);
1708 if (!result[0].equals(toSupplementaries("foo")))
1709 failCount++;
1710 if (!result[1].equals(toSupplementaries("andXboo")))
1711 failCount++;
1712
1713 CharBuffer cb = CharBuffer.allocate(100);
1714 cb.put("foo:and:boo");
1715 cb.flip();
1716 result = pattern.split(cb);
1717 if (!result[0].equals("foo"))
1718 failCount++;
1719 if (!result[1].equals("and"))
1720 failCount++;
1721 if (!result[2].equals("boo"))
1722 failCount++;
1723
1724 // Supplementary character test
1725 CharBuffer cbs = CharBuffer.allocate(100);
1726 cbs.put(toSupplementaries("fooXandXboo"));
1727 cbs.flip();
1728 result = patternX.split(cbs);
1729 if (!result[0].equals(toSupplementaries("foo")))
1730 failCount++;
1731 if (!result[1].equals(toSupplementaries("and")))
1732 failCount++;
1733 if (!result[2].equals(toSupplementaries("boo")))
1734 failCount++;
1735
1736 String source = "0123456789";
1737 for (int limit=-2; limit<3; limit++) {
1738 for (int x=0; x<10; x++) {
1739 result = source.split(Integer.toString(x), limit);
1740 int expectedLength = limit < 1 ? 2 : limit;
1741
1742 if ((limit == 0) && (x == 9)) {
1743 // expected dropping of ""
1744 if (result.length != 1)
1745 failCount++;
1746 if (!result[0].equals("012345678")) {
1747 failCount++;
1748 }
1749 } else {
1750 if (result.length != expectedLength) {
1751 failCount++;
1752 }
1753 if (!result[0].equals(source.substring(0,x))) {
1754 if (limit != 1) {
1755 failCount++;
1756 } else {
1757 if (!result[0].equals(source.substring(0,10))) {
1758 failCount++;
1759 }
1760 }
1761 }
1762 if (expectedLength > 1) { // Check segment 2
1763 if (!result[1].equals(source.substring(x+1,10)))
1764 failCount++;
1765 }
1766 }
1767 }
1768 }
1769 // Check the case for no match found
1770 for (int limit=-2; limit<3; limit++) {
1771 result = source.split("e", limit);
1772 if (result.length != 1)
1773 failCount++;
1774 if (!result[0].equals(source))
1775 failCount++;
1776 }
1777 // Check the case for limit == 0, source = "";
1778 source = "";
1779 result = source.split("e", 0);
1780 if (result.length != 1)
1781 failCount++;
1782 if (!result[0].equals(source))
1783 failCount++;
1784
1785 report("Split");
1786 }
1787
1788 private static void negationTest() {
1789 Pattern pattern = Pattern.compile("[\\[@^]+");
1790 Matcher matcher = pattern.matcher("@@@@[[[[^^^^");
1791 if (!matcher.find())
1792 failCount++;
1793 if (!matcher.group(0).equals("@@@@[[[[^^^^"))
1794 failCount++;
1795 pattern = Pattern.compile("[@\\[^]+");
1796 matcher = pattern.matcher("@@@@[[[[^^^^");
1797 if (!matcher.find())
1798 failCount++;
1799 if (!matcher.group(0).equals("@@@@[[[[^^^^"))
1800 failCount++;
1801 pattern = Pattern.compile("[@\\[^@]+");
1802 matcher = pattern.matcher("@@@@[[[[^^^^");
1803 if (!matcher.find())
1804 failCount++;
1805 if (!matcher.group(0).equals("@@@@[[[[^^^^"))
1806 failCount++;
1807
1808 pattern = Pattern.compile("\\)");
1809 matcher = pattern.matcher("xxx)xxx");
1810 if (!matcher.find())
1811 failCount++;
1812
1813 report("Negation");
1814 }
1815
1816 private static void ampersandTest() {
1817 Pattern pattern = Pattern.compile("[&@]+");
1818 check(pattern, "@@@@&&&&", true);
1819
1820 pattern = Pattern.compile("[@&]+");
1821 check(pattern, "@@@@&&&&", true);
1822
1823 pattern = Pattern.compile("[@\\&]+");
1824 check(pattern, "@@@@&&&&", true);
1825
1826 report("Ampersand");
1827 }
1828
1829 private static void octalTest() throws Exception {
1830 Pattern pattern = Pattern.compile("\\u0007");
1831 Matcher matcher = pattern.matcher("\u0007");
1832 if (!matcher.matches())
1833 failCount++;
1834 pattern = Pattern.compile("\\07");
1835 matcher = pattern.matcher("\u0007");
1836 if (!matcher.matches())
1837 failCount++;
1838 pattern = Pattern.compile("\\007");
1839 matcher = pattern.matcher("\u0007");
1840 if (!matcher.matches())
1841 failCount++;
1842 pattern = Pattern.compile("\\0007");
1843 matcher = pattern.matcher("\u0007");
1844 if (!matcher.matches())
1845 failCount++;
1846 pattern = Pattern.compile("\\040");
1847 matcher = pattern.matcher("\u0020");
1848 if (!matcher.matches())
1849 failCount++;
1850 pattern = Pattern.compile("\\0403");
1851 matcher = pattern.matcher("\u00203");
1852 if (!matcher.matches())
1853 failCount++;
1854 pattern = Pattern.compile("\\0103");
1855 matcher = pattern.matcher("\u0043");
1856 if (!matcher.matches())
1857 failCount++;
1858
1859 report("Octal");
1860 }
1861
1862 private static void longPatternTest() throws Exception {
1863 try {
1864 Pattern pattern = Pattern.compile(
1865 "a 32-character-long pattern xxxx");
1866 pattern = Pattern.compile("a 33-character-long pattern xxxxx");
1867 pattern = Pattern.compile("a thirty four character long regex");
1868 StringBuffer patternToBe = new StringBuffer(101);
1869 for (int i=0; i<100; i++)
1870 patternToBe.append((char)(97 + i%26));
1871 pattern = Pattern.compile(patternToBe.toString());
1872 } catch (PatternSyntaxException e) {
1873 failCount++;
1874 }
1875
1876 // Supplementary character test
1877 try {
1878 Pattern pattern = Pattern.compile(
1879 toSupplementaries("a 32-character-long pattern xxxx"));
1880 pattern = Pattern.compile(toSupplementaries("a 33-character-long pattern xxxxx"));
1881 pattern = Pattern.compile(toSupplementaries("a thirty four character long regex"));
1882 StringBuffer patternToBe = new StringBuffer(101*2);
1883 for (int i=0; i<100; i++)
1884 patternToBe.append(Character.toChars(Character.MIN_SUPPLEMENTARY_CODE_POINT
1885 + 97 + i%26));
1886 pattern = Pattern.compile(patternToBe.toString());
1887 } catch (PatternSyntaxException e) {
1888 failCount++;
1889 }
1890 report("LongPattern");
1891 }
1892
1893 private static void group0Test() throws Exception {
1894 Pattern pattern = Pattern.compile("(tes)ting");
1895 Matcher matcher = pattern.matcher("testing");
1896 check(matcher, "testing");
1897
1898 matcher.reset("testing");
1899 if (matcher.lookingAt()) {
1900 if (!matcher.group(0).equals("testing"))
1901 failCount++;
1902 } else {
1903 failCount++;
1904 }
1905
1906 matcher.reset("testing");
1907 if (matcher.matches()) {
1908 if (!matcher.group(0).equals("testing"))
1909 failCount++;
1910 } else {
1911 failCount++;
1912 }
1913
1914 pattern = Pattern.compile("(tes)ting");
1915 matcher = pattern.matcher("testing");
1916 if (matcher.lookingAt()) {
1917 if (!matcher.group(0).equals("testing"))
1918 failCount++;
1919 } else {
1920 failCount++;
1921 }
1922
1923 pattern = Pattern.compile("^(tes)ting");
1924 matcher = pattern.matcher("testing");
1925 if (matcher.matches()) {
1926 if (!matcher.group(0).equals("testing"))
1927 failCount++;
1928 } else {
1929 failCount++;
1930 }
1931
1932 // Supplementary character test
1933 pattern = Pattern.compile(toSupplementaries("(tes)ting"));
1934 matcher = pattern.matcher(toSupplementaries("testing"));
1935 check(matcher, toSupplementaries("testing"));
1936
1937 matcher.reset(toSupplementaries("testing"));
1938 if (matcher.lookingAt()) {
1939 if (!matcher.group(0).equals(toSupplementaries("testing")))
1940 failCount++;
1941 } else {
1942 failCount++;
1943 }
1944
1945 matcher.reset(toSupplementaries("testing"));
1946 if (matcher.matches()) {
1947 if (!matcher.group(0).equals(toSupplementaries("testing")))
1948 failCount++;
1949 } else {
1950 failCount++;
1951 }
1952
1953 pattern = Pattern.compile(toSupplementaries("(tes)ting"));
1954 matcher = pattern.matcher(toSupplementaries("testing"));
1955 if (matcher.lookingAt()) {
1956 if (!matcher.group(0).equals(toSupplementaries("testing")))
1957 failCount++;
1958 } else {
1959 failCount++;
1960 }
1961
1962 pattern = Pattern.compile(toSupplementaries("^(tes)ting"));
1963 matcher = pattern.matcher(toSupplementaries("testing"));
1964 if (matcher.matches()) {
1965 if (!matcher.group(0).equals(toSupplementaries("testing")))
1966 failCount++;
1967 } else {
1968 failCount++;
1969 }
1970
1971 report("Group0");
1972 }
1973
1974 private static void findIntTest() throws Exception {
1975 Pattern p = Pattern.compile("blah");
1976 Matcher m = p.matcher("zzzzblahzzzzzblah");
1977 boolean result = m.find(2);
1978 if (!result)
1979 failCount++;
1980
1981 p = Pattern.compile("$");
1982 m = p.matcher("1234567890");
1983 result = m.find(10);
1984 if (!result)
1985 failCount++;
1986 try {
1987 result = m.find(11);
1988 failCount++;
1989 } catch (IndexOutOfBoundsException e) {
1990 // correct result
1991 }
1992
1993 // Supplementary character test
1994 p = Pattern.compile(toSupplementaries("blah"));
1995 m = p.matcher(toSupplementaries("zzzzblahzzzzzblah"));
1996 result = m.find(2);
1997 if (!result)
1998 failCount++;
1999
2000 report("FindInt");
2001 }
2002
2003 private static void emptyPatternTest() throws Exception {
2004 Pattern p = Pattern.compile("");
2005 Matcher m = p.matcher("foo");
2006
2007 // Should find empty pattern at beginning of input
2008 boolean result = m.find();
2009 if (result != true)
2010 failCount++;
2011 if (m.start() != 0)
2012 failCount++;
2013
2014 // Should not match entire input if input is not empty
2015 m.reset();
2016 result = m.matches();
2017 if (result == true)
2018 failCount++;
2019
2020 try {
2021 m.start(0);
2022 failCount++;
2023 } catch (IllegalStateException e) {
2024 // Correct result
2025 }
2026
2027 // Should match entire input if input is empty
2028 m.reset("");
2029 result = m.matches();
2030 if (result != true)
2031 failCount++;
2032
2033 result = Pattern.matches("", "");
2034 if (result != true)
2035 failCount++;
2036
2037 result = Pattern.matches("", "foo");
2038 if (result == true)
2039 failCount++;
2040 report("EmptyPattern");
2041 }
2042
2043 private static void charClassTest() throws Exception {
2044 Pattern pattern = Pattern.compile("blah[ab]]blech");
2045 check(pattern, "blahb]blech", true);
2046
2047 pattern = Pattern.compile("[abc[def]]");
2048 check(pattern, "b", true);
2049
2050 // Supplementary character tests
2051 pattern = Pattern.compile(toSupplementaries("blah[ab]]blech"));
2052 check(pattern, toSupplementaries("blahb]blech"), true);
2053
2054 pattern = Pattern.compile(toSupplementaries("[abc[def]]"));
2055 check(pattern, toSupplementaries("b"), true);
2056
2057 try {
2058 // u00ff when UNICODE_CASE
2059 pattern = Pattern.compile("[ab\u00ffcd]",
2060 Pattern.CASE_INSENSITIVE|
2061 Pattern.UNICODE_CASE);
2062 check(pattern, "ab\u00ffcd", true);
2063 check(pattern, "Ab\u0178Cd", true);
2064
2065 // u00b5 when UNICODE_CASE
2066 pattern = Pattern.compile("[ab\u00b5cd]",
2067 Pattern.CASE_INSENSITIVE|
2068 Pattern.UNICODE_CASE);
2069 check(pattern, "ab\u00b5cd", true);
2070 check(pattern, "Ab\u039cCd", true);
2071 } catch (Exception e) { failCount++; }
2072
2073 /* Special cases
2074 (1)LatinSmallLetterLongS u+017f
2075 (2)LatinSmallLetterDotlessI u+0131
2076 (3)LatineCapitalLetterIWithDotAbove u+0130
2077 (4)KelvinSign u+212a
2078 (5)AngstromSign u+212b
2079 */
2080 int flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE;
2081 pattern = Pattern.compile("[sik\u00c5]+", flags);
2082 if (!pattern.matcher("\u017f\u0130\u0131\u212a\u212b").matches())
2083 failCount++;
2084
2085 report("CharClass");
2086 }
2087
2088 private static void caretTest() throws Exception {
2089 Pattern pattern = Pattern.compile("\\w*");
2090 Matcher matcher = pattern.matcher("a#bc#def##g");
2091 check(matcher, "a");
2092 check(matcher, "");
2093 check(matcher, "bc");
2094 check(matcher, "");
2095 check(matcher, "def");
2096 check(matcher, "");
2097 check(matcher, "");
2098 check(matcher, "g");
2099 check(matcher, "");
2100 if (matcher.find())
2101 failCount++;
2102
2103 pattern = Pattern.compile("^\\w*");
2104 matcher = pattern.matcher("a#bc#def##g");
2105 check(matcher, "a");
2106 if (matcher.find())
2107 failCount++;
2108
2109 pattern = Pattern.compile("\\w");
2110 matcher = pattern.matcher("abc##x");
2111 check(matcher, "a");
2112 check(matcher, "b");
2113 check(matcher, "c");
2114 check(matcher, "x");
2115 if (matcher.find())
2116 failCount++;
2117
2118 pattern = Pattern.compile("^\\w");
2119 matcher = pattern.matcher("abc##x");
2120 check(matcher, "a");
2121 if (matcher.find())
2122 failCount++;
2123
2124 pattern = Pattern.compile("\\A\\p{Alpha}{3}");
2125 matcher = pattern.matcher("abcdef-ghi\njklmno");
2126 check(matcher, "abc");
2127 if (matcher.find())
2128 failCount++;
2129
2130 pattern = Pattern.compile("^\\p{Alpha}{3}", Pattern.MULTILINE);
2131 matcher = pattern.matcher("abcdef-ghi\njklmno");
2132 check(matcher, "abc");
2133 check(matcher, "jkl");
2134 if (matcher.find())
2135 failCount++;
2136
2137 pattern = Pattern.compile("^", Pattern.MULTILINE);
2138 matcher = pattern.matcher("this is some text");
2139 String result = matcher.replaceAll("X");
2140 if (!result.equals("Xthis is some text"))
2141 failCount++;
2142
2143 pattern = Pattern.compile("^");
2144 matcher = pattern.matcher("this is some text");
2145 result = matcher.replaceAll("X");
2146 if (!result.equals("Xthis is some text"))
2147 failCount++;
2148
2149 pattern = Pattern.compile("^", Pattern.MULTILINE | Pattern.UNIX_LINES);
2150 matcher = pattern.matcher("this is some text\n");
2151 result = matcher.replaceAll("X");
2152 if (!result.equals("Xthis is some text\n"))
2153 failCount++;
2154
2155 report("Caret");
2156 }
2157
2158 private static void groupCaptureTest() throws Exception {
2159 // Independent group
2160 Pattern pattern = Pattern.compile("x+(?>y+)z+");
2161 Matcher matcher = pattern.matcher("xxxyyyzzz");
2162 matcher.find();
2163 try {
2164 String blah = matcher.group(1);
2165 failCount++;
2166 } catch (IndexOutOfBoundsException ioobe) {
2167 // Good result
2168 }
2169 // Pure group
2170 pattern = Pattern.compile("x+(?:y+)z+");
2171 matcher = pattern.matcher("xxxyyyzzz");
2172 matcher.find();
2173 try {
2174 String blah = matcher.group(1);
2175 failCount++;
2176 } catch (IndexOutOfBoundsException ioobe) {
2177 // Good result
2178 }
2179
2180 // Supplementary character tests
2181 // Independent group
2182 pattern = Pattern.compile(toSupplementaries("x+(?>y+)z+"));
2183 matcher = pattern.matcher(toSupplementaries("xxxyyyzzz"));
2184 matcher.find();
2185 try {
2186 String blah = matcher.group(1);
2187 failCount++;
2188 } catch (IndexOutOfBoundsException ioobe) {
2189 // Good result
2190 }
2191 // Pure group
2192 pattern = Pattern.compile(toSupplementaries("x+(?:y+)z+"));
2193 matcher = pattern.matcher(toSupplementaries("xxxyyyzzz"));
2194 matcher.find();
2195 try {
2196 String blah = matcher.group(1);
2197 failCount++;
2198 } catch (IndexOutOfBoundsException ioobe) {
2199 // Good result
2200 }
2201
2202 report("GroupCapture");
2203 }
2204
2205 private static void backRefTest() throws Exception {
2206 Pattern pattern = Pattern.compile("(a*)bc\\1");
2207 check(pattern, "zzzaabcazzz", true);
2208
2209 pattern = Pattern.compile("(a*)bc\\1");
2210 check(pattern, "zzzaabcaazzz", true);
2211
2212 pattern = Pattern.compile("(abc)(def)\\1");
2213 check(pattern, "abcdefabc", true);
2214
2215 pattern = Pattern.compile("(abc)(def)\\3");
2216 check(pattern, "abcdefabc", false);
2217
2218 try {
2219 for (int i = 1; i < 10; i++) {
2220 // Make sure backref 1-9 are always accepted
2221 pattern = Pattern.compile("abcdef\\" + i);
2222 // and fail to match if the target group does not exit
2223 check(pattern, "abcdef", false);
2224 }
2225 } catch(PatternSyntaxException e) {
2226 failCount++;
2227 }
2228
2229 pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11");
2230 check(pattern, "abcdefghija", false);
2231 check(pattern, "abcdefghija1", true);
2232
2233 pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11");
2234 check(pattern, "abcdefghijkk", true);
2235
2236 pattern = Pattern.compile("(a)bcdefghij\\11");
2237 check(pattern, "abcdefghija1", true);
2238
2239 // Supplementary character tests
2240 pattern = Pattern.compile(toSupplementaries("(a*)bc\\1"));
2241 check(pattern, toSupplementaries("zzzaabcazzz"), true);
2242
2243 pattern = Pattern.compile(toSupplementaries("(a*)bc\\1"));
2244 check(pattern, toSupplementaries("zzzaabcaazzz"), true);
2245
2246 pattern = Pattern.compile(toSupplementaries("(abc)(def)\\1"));
2247 check(pattern, toSupplementaries("abcdefabc"), true);
2248
2249 pattern = Pattern.compile(toSupplementaries("(abc)(def)\\3"));
2250 check(pattern, toSupplementaries("abcdefabc"), false);
2251
2252 pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11"));
2253 check(pattern, toSupplementaries("abcdefghija"), false);
2254 check(pattern, toSupplementaries("abcdefghija1"), true);
2255
2256 pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11"));
2257 check(pattern, toSupplementaries("abcdefghijkk"), true);
2258
2259 report("BackRef");
2260 }
2261
2262 /**
2263 * Unicode Technical Report #18, section 2.6 End of Line
2264 * There is no empty line to be matched in the sequence \u000D\u000A
2265 * but there is an empty line in the sequence \u000A\u000D.
2266 */
2267 private static void anchorTest() throws Exception {
2268 Pattern p = Pattern.compile("^.*$", Pattern.MULTILINE);
2269 Matcher m = p.matcher("blah1\r\nblah2");
2270 m.find();
2271 m.find();
2272 if (!m.group().equals("blah2"))
2273 failCount++;
2274
2275 m.reset("blah1\n\rblah2");
2276 m.find();
2277 m.find();
2278 m.find();
2279 if (!m.group().equals("blah2"))
2280 failCount++;
2281
2282 // Test behavior of $ with \r\n at end of input
2283 p = Pattern.compile(".+$");
2284 m = p.matcher("blah1\r\n");
2285 if (!m.find())
2286 failCount++;
2287 if (!m.group().equals("blah1"))
2288 failCount++;
2289 if (m.find())
2290 failCount++;
2291
2292 // Test behavior of $ with \r\n at end of input in multiline
2293 p = Pattern.compile(".+$", Pattern.MULTILINE);
2294 m = p.matcher("blah1\r\n");
2295 if (!m.find())
2296 failCount++;
2297 if (m.find())
2298 failCount++;
2299
2300 // Test for $ recognition of \u0085 for bug 4527731
2301 p = Pattern.compile(".+$", Pattern.MULTILINE);
2302 m = p.matcher("blah1\u0085");
2303 if (!m.find())
2304 failCount++;
2305
2306 // Supplementary character test
2307 p = Pattern.compile("^.*$", Pattern.MULTILINE);
2308 m = p.matcher(toSupplementaries("blah1\r\nblah2"));
2309 m.find();
2310 m.find();
2311 if (!m.group().equals(toSupplementaries("blah2")))
2312 failCount++;
2313
2314 m.reset(toSupplementaries("blah1\n\rblah2"));
2315 m.find();
2316 m.find();
2317 m.find();
2318 if (!m.group().equals(toSupplementaries("blah2")))
2319 failCount++;
2320
2321 // Test behavior of $ with \r\n at end of input
2322 p = Pattern.compile(".+$");
2323 m = p.matcher(toSupplementaries("blah1\r\n"));
2324 if (!m.find())
2325 failCount++;
2326 if (!m.group().equals(toSupplementaries("blah1")))
2327 failCount++;
2328 if (m.find())
2329 failCount++;
2330
2331 // Test behavior of $ with \r\n at end of input in multiline
2332 p = Pattern.compile(".+$", Pattern.MULTILINE);
2333 m = p.matcher(toSupplementaries("blah1\r\n"));
2334 if (!m.find())
2335 failCount++;
2336 if (m.find())
2337 failCount++;
2338
2339 // Test for $ recognition of \u0085 for bug 4527731
2340 p = Pattern.compile(".+$", Pattern.MULTILINE);
2341 m = p.matcher(toSupplementaries("blah1\u0085"));
2342 if (!m.find())
2343 failCount++;
2344
2345 report("Anchors");
2346 }
2347
2348 /**
2349 * A basic sanity test of Matcher.lookingAt().
2350 */
2351 private static void lookingAtTest() throws Exception {
2352 Pattern p = Pattern.compile("(ab)(c*)");
2353 Matcher m = p.matcher("abccczzzabcczzzabccc");
2354
2355 if (!m.lookingAt())
2356 failCount++;
2357
2358 if (!m.group().equals(m.group(0)))
2359 failCount++;
2360
2361 m = p.matcher("zzzabccczzzabcczzzabccczzz");
2362 if (m.lookingAt())
2363 failCount++;
2364
2365 // Supplementary character test
2366 p = Pattern.compile(toSupplementaries("(ab)(c*)"));
2367 m = p.matcher(toSupplementaries("abccczzzabcczzzabccc"));
2368
2369 if (!m.lookingAt())
2370 failCount++;
2371
2372 if (!m.group().equals(m.group(0)))
2373 failCount++;
2374
2375 m = p.matcher(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
2376 if (m.lookingAt())
2377 failCount++;
2378
2379 report("Looking At");
2380 }
2381
2382 /**
2383 * A basic sanity test of Matcher.matches().
2384 */
2385 private static void matchesTest() throws Exception {
2386 // matches()
2387 Pattern p = Pattern.compile("ulb(c*)");
2388 Matcher m = p.matcher("ulbcccccc");
2389 if (!m.matches())
2390 failCount++;
2391
2392 // find() but not matches()
2393 m.reset("zzzulbcccccc");
2394 if (m.matches())
2395 failCount++;
2396
2397 // lookingAt() but not matches()
2398 m.reset("ulbccccccdef");
2399 if (m.matches())
2400 failCount++;
2401
2402 // matches()
2403 p = Pattern.compile("a|ad");
2404 m = p.matcher("ad");
2405 if (!m.matches())
2406 failCount++;
2407
2408 // Supplementary character test
2409 // matches()
2410 p = Pattern.compile(toSupplementaries("ulb(c*)"));
2411 m = p.matcher(toSupplementaries("ulbcccccc"));
2412 if (!m.matches())
2413 failCount++;
2414
2415 // find() but not matches()
2416 m.reset(toSupplementaries("zzzulbcccccc"));
2417 if (m.matches())
2418 failCount++;
2419
2420 // lookingAt() but not matches()
2421 m.reset(toSupplementaries("ulbccccccdef"));
2422 if (m.matches())
2423 failCount++;
2424
2425 // matches()
2426 p = Pattern.compile(toSupplementaries("a|ad"));
2427 m = p.matcher(toSupplementaries("ad"));
2428 if (!m.matches())
2429 failCount++;
2430
2431 report("Matches");
2432 }
2433
2434 /**
2435 * A basic sanity test of Pattern.matches().
2436 */
2437 private static void patternMatchesTest() throws Exception {
2438 // matches()
2439 if (!Pattern.matches(toSupplementaries("ulb(c*)"),
2440 toSupplementaries("ulbcccccc")))
2441 failCount++;
2442
2443 // find() but not matches()
2444 if (Pattern.matches(toSupplementaries("ulb(c*)"),
2445 toSupplementaries("zzzulbcccccc")))
2446 failCount++;
2447
2448 // lookingAt() but not matches()
2449 if (Pattern.matches(toSupplementaries("ulb(c*)"),
2450 toSupplementaries("ulbccccccdef")))
2451 failCount++;
2452
2453 // Supplementary character test
2454 // matches()
2455 if (!Pattern.matches(toSupplementaries("ulb(c*)"),
2456 toSupplementaries("ulbcccccc")))
2457 failCount++;
2458
2459 // find() but not matches()
2460 if (Pattern.matches(toSupplementaries("ulb(c*)"),
2461 toSupplementaries("zzzulbcccccc")))
2462 failCount++;
2463
2464 // lookingAt() but not matches()
2465 if (Pattern.matches(toSupplementaries("ulb(c*)"),
2466 toSupplementaries("ulbccccccdef")))
2467 failCount++;
2468
2469 report("Pattern Matches");
2470 }
2471
2472 /**
2473 * Canonical equivalence testing. Tests the ability of the engine
2474 * to match sequences that are not explicitly specified in the
2475 * pattern when they are considered equivalent by the Unicode Standard.
2476 */
2477 private static void ceTest() throws Exception {
2478 // Decomposed char outside char classes
2479 Pattern p = Pattern.compile("testa\u030a", Pattern.CANON_EQ);
2480 Matcher m = p.matcher("test\u00e5");
2481 if (!m.matches())
2482 failCount++;
2483
2484 m.reset("testa\u030a");
2485 if (!m.matches())
2486 failCount++;
2487
2488 // Composed char outside char classes
2489 p = Pattern.compile("test\u00e5", Pattern.CANON_EQ);
2490 m = p.matcher("test\u00e5");
2491 if (!m.matches())
2492 failCount++;
2493
2494 m.reset("testa\u030a");
2495 if (!m.find())
2496 failCount++;
2497
2498 // Decomposed char inside a char class
2499 p = Pattern.compile("test[abca\u030a]", Pattern.CANON_EQ);
2500 m = p.matcher("test\u00e5");
2501 if (!m.find())
2502 failCount++;
2503
2504 m.reset("testa\u030a");
2505 if (!m.find())
2506 failCount++;
2507
2508 // Composed char inside a char class
2509 p = Pattern.compile("test[abc\u00e5def\u00e0]", Pattern.CANON_EQ);
2510 m = p.matcher("test\u00e5");
2511 if (!m.find())
2512 failCount++;
2513
2514 m.reset("testa\u0300");
2515 if (!m.find())
2516 failCount++;
2517
2518 m.reset("testa\u030a");
2519 if (!m.find())
2520 failCount++;
2521
2522 // Marks that cannot legally change order and be equivalent
2523 p = Pattern.compile("testa\u0308\u0300", Pattern.CANON_EQ);
2524 check(p, "testa\u0308\u0300", true);
2525 check(p, "testa\u0300\u0308", false);
2526
2527 // Marks that can legally change order and be equivalent
2528 p = Pattern.compile("testa\u0308\u0323", Pattern.CANON_EQ);
2529 check(p, "testa\u0308\u0323", true);
2530 check(p, "testa\u0323\u0308", true);
2531
2532 // Test all equivalences of the sequence a\u0308\u0323\u0300
2533 p = Pattern.compile("testa\u0308\u0323\u0300", Pattern.CANON_EQ);
2534 check(p, "testa\u0308\u0323\u0300", true);
2535 check(p, "testa\u0323\u0308\u0300", true);
2536 check(p, "testa\u0308\u0300\u0323", true);
2537 check(p, "test\u00e4\u0323\u0300", true);
2538 check(p, "test\u00e4\u0300\u0323", true);
2539
2540 /*
2541 * The following canonical equivalence tests don't work. Bug id: 4916384.
2542 *
2543 // Decomposed hangul (jamos)
2544 p = Pattern.compile("\u1100\u1161", Pattern.CANON_EQ);
2545 m = p.matcher("\u1100\u1161");
2546 if (!m.matches())
2547 failCount++;
2548
2549 m.reset("\uac00");
2550 if (!m.matches())
2551 failCount++;
2552
2553 // Composed hangul
2554 p = Pattern.compile("\uac00", Pattern.CANON_EQ);
2555 m = p.matcher("\u1100\u1161");
2556 if (!m.matches())
2557 failCount++;
2558
2559 m.reset("\uac00");
2560 if (!m.matches())
2561 failCount++;
2562
2563 // Decomposed supplementary outside char classes
2564 p = Pattern.compile("test\ud834\uddbc\ud834\udd6f", Pattern.CANON_EQ);
2565 m = p.matcher("test\ud834\uddc0");
2566 if (!m.matches())
2567 failCount++;
2568
2569 m.reset("test\ud834\uddbc\ud834\udd6f");
2570 if (!m.matches())
2571 failCount++;
2572
2573 // Composed supplementary outside char classes
2574 p = Pattern.compile("test\ud834\uddc0", Pattern.CANON_EQ);
2575 m.reset("test\ud834\uddbc\ud834\udd6f");
2576 if (!m.matches())
2577 failCount++;
2578
2579 m = p.matcher("test\ud834\uddc0");
2580 if (!m.matches())
2581 failCount++;
2582
2583 */
2584
2585 report("Canonical Equivalence");
2586 }
2587
2588 /**
2589 * A basic sanity test of Matcher.replaceAll().
2590 */
2591 private static void globalSubstitute() throws Exception {
2592 // Global substitution with a literal
2593 Pattern p = Pattern.compile("(ab)(c*)");
2594 Matcher m = p.matcher("abccczzzabcczzzabccc");
2595 if (!m.replaceAll("test").equals("testzzztestzzztest"))
2596 failCount++;
2597
2598 m.reset("zzzabccczzzabcczzzabccczzz");
2599 if (!m.replaceAll("test").equals("zzztestzzztestzzztestzzz"))
2600 failCount++;
2601
2602 // Global substitution with groups
2603 m.reset("zzzabccczzzabcczzzabccczzz");
2604 String result = m.replaceAll("$1");
2605 if (!result.equals("zzzabzzzabzzzabzzz"))
2606 failCount++;
2607
2608 // Supplementary character test
2609 // Global substitution with a literal
2610 p = Pattern.compile(toSupplementaries("(ab)(c*)"));
2611 m = p.matcher(toSupplementaries("abccczzzabcczzzabccc"));
2612 if (!m.replaceAll(toSupplementaries("test")).
2613 equals(toSupplementaries("testzzztestzzztest")))
2614 failCount++;
2615
2616 m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
2617 if (!m.replaceAll(toSupplementaries("test")).
2618 equals(toSupplementaries("zzztestzzztestzzztestzzz")))
2619 failCount++;
2620
2621 // Global substitution with groups
2622 m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
2623 result = m.replaceAll("$1");
2624 if (!result.equals(toSupplementaries("zzzabzzzabzzzabzzz")))
2625 failCount++;
2626
2627 report("Global Substitution");
2628 }
2629
2630 /**
2631 * Tests the usage of Matcher.appendReplacement() with literal
2632 * and group substitutions.
2633 */
2634 private static void stringbufferSubstitute() throws Exception {
2635 // SB substitution with literal
2636 String blah = "zzzblahzzz";
2637 Pattern p = Pattern.compile("blah");
2638 Matcher m = p.matcher(blah);
2639 StringBuffer result = new StringBuffer();
2640 try {
2641 m.appendReplacement(result, "blech");
2642 failCount++;
2643 } catch (IllegalStateException e) {
2644 }
2645 m.find();
2646 m.appendReplacement(result, "blech");
2647 if (!result.toString().equals("zzzblech"))
2648 failCount++;
2649
2650 m.appendTail(result);
2651 if (!result.toString().equals("zzzblechzzz"))
2652 failCount++;
2653
2654 // SB substitution with groups
2655 blah = "zzzabcdzzz";
2656 p = Pattern.compile("(ab)(cd)*");
2657 m = p.matcher(blah);
2658 result = new StringBuffer();
2659 try {
2660 m.appendReplacement(result, "$1");
2661 failCount++;
2662 } catch (IllegalStateException e) {
2663 }
2664 m.find();
2665 m.appendReplacement(result, "$1");
2666 if (!result.toString().equals("zzzab"))
2667 failCount++;
2668
2669 m.appendTail(result);
2670 if (!result.toString().equals("zzzabzzz"))
2671 failCount++;
2672
2673 // SB substitution with 3 groups
2674 blah = "zzzabcdcdefzzz";
2675 p = Pattern.compile("(ab)(cd)*(ef)");
2676 m = p.matcher(blah);
2677 result = new StringBuffer();
2678 try {
2679 m.appendReplacement(result, "$1w$2w$3");
2680 failCount++;
2681 } catch (IllegalStateException e) {
2682 }
2683 m.find();
2684 m.appendReplacement(result, "$1w$2w$3");
2685 if (!result.toString().equals("zzzabwcdwef"))
2686 failCount++;
2687
2688 m.appendTail(result);
2689 if (!result.toString().equals("zzzabwcdwefzzz"))
2690 failCount++;
2691
2692 // SB substitution with groups and three matches
2693 // skipping middle match
2694 blah = "zzzabcdzzzabcddzzzabcdzzz";
2695 p = Pattern.compile("(ab)(cd*)");
2696 m = p.matcher(blah);
2697 result = new StringBuffer();
2698 try {
2699 m.appendReplacement(result, "$1");
2700 failCount++;
2701 } catch (IllegalStateException e) {
2702 }
2703 m.find();
2704 m.appendReplacement(result, "$1");
2705 if (!result.toString().equals("zzzab"))
2706 failCount++;
2707
2708 m.find();
2709 m.find();
2710 m.appendReplacement(result, "$2");
2711 if (!result.toString().equals("zzzabzzzabcddzzzcd"))
2712 failCount++;
2713
2714 m.appendTail(result);
2715 if (!result.toString().equals("zzzabzzzabcddzzzcdzzz"))
2716 failCount++;
2717
2718 // Check to make sure escaped $ is ignored
2719 blah = "zzzabcdcdefzzz";
2720 p = Pattern.compile("(ab)(cd)*(ef)");
2721 m = p.matcher(blah);
2722 result = new StringBuffer();
2723 m.find();
2724 m.appendReplacement(result, "$1w\\$2w$3");
2725 if (!result.toString().equals("zzzabw$2wef"))
2726 failCount++;
2727
2728 m.appendTail(result);
2729 if (!result.toString().equals("zzzabw$2wefzzz"))
2730 failCount++;
2731
2732 // Check to make sure a reference to nonexistent group causes error
2733 blah = "zzzabcdcdefzzz";
2734 p = Pattern.compile("(ab)(cd)*(ef)");
2735 m = p.matcher(blah);
2736 result = new StringBuffer();
2737 m.find();
2738 try {
2739 m.appendReplacement(result, "$1w$5w$3");
2740 failCount++;
2741 } catch (IndexOutOfBoundsException ioobe) {
2742 // Correct result
2743 }
2744
2745 // Check double digit group references
2746 blah = "zzz123456789101112zzz";
2747 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)");
2748 m = p.matcher(blah);
2749 result = new StringBuffer();
2750 m.find();
2751 m.appendReplacement(result, "$1w$11w$3");
2752 if (!result.toString().equals("zzz1w11w3"))
2753 failCount++;
2754
2755 // Check to make sure it backs off $15 to $1 if only three groups
2756 blah = "zzzabcdcdefzzz";
2757 p = Pattern.compile("(ab)(cd)*(ef)");
2758 m = p.matcher(blah);
2759 result = new StringBuffer();
2760 m.find();
2761 m.appendReplacement(result, "$1w$15w$3");
2762 if (!result.toString().equals("zzzabwab5wef"))
2763 failCount++;
2764
2765
2766 // Supplementary character test
2767 // SB substitution with literal
2768 blah = toSupplementaries("zzzblahzzz");
2769 p = Pattern.compile(toSupplementaries("blah"));
2770 m = p.matcher(blah);
2771 result = new StringBuffer();
2772 try {
2773 m.appendReplacement(result, toSupplementaries("blech"));
2774 failCount++;
2775 } catch (IllegalStateException e) {
2776 }
2777 m.find();
2778 m.appendReplacement(result, toSupplementaries("blech"));
2779 if (!result.toString().equals(toSupplementaries("zzzblech")))
2780 failCount++;
2781
2782 m.appendTail(result);
2783 if (!result.toString().equals(toSupplementaries("zzzblechzzz")))
2784 failCount++;
2785
2786 // SB substitution with groups
2787 blah = toSupplementaries("zzzabcdzzz");
2788 p = Pattern.compile(toSupplementaries("(ab)(cd)*"));
2789 m = p.matcher(blah);
2790 result = new StringBuffer();
2791 try {
2792 m.appendReplacement(result, "$1");
2793 failCount++;
2794 } catch (IllegalStateException e) {
2795 }
2796 m.find();
2797 m.appendReplacement(result, "$1");
2798 if (!result.toString().equals(toSupplementaries("zzzab")))
2799 failCount++;
2800
2801 m.appendTail(result);
2802 if (!result.toString().equals(toSupplementaries("zzzabzzz")))
2803 failCount++;
2804
2805 // SB substitution with 3 groups
2806 blah = toSupplementaries("zzzabcdcdefzzz");
2807 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
2808 m = p.matcher(blah);
2809 result = new StringBuffer();
2810 try {
2811 m.appendReplacement(result, toSupplementaries("$1w$2w$3"));
2812 failCount++;
2813 } catch (IllegalStateException e) {
2814 }
2815 m.find();
2816 m.appendReplacement(result, toSupplementaries("$1w$2w$3"));
2817 if (!result.toString().equals(toSupplementaries("zzzabwcdwef")))
2818 failCount++;
2819
2820 m.appendTail(result);
2821 if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz")))
2822 failCount++;
2823
2824 // SB substitution with groups and three matches
2825 // skipping middle match
2826 blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz");
2827 p = Pattern.compile(toSupplementaries("(ab)(cd*)"));
2828 m = p.matcher(blah);
2829 result = new StringBuffer();
2830 try {
2831 m.appendReplacement(result, "$1");
2832 failCount++;
2833 } catch (IllegalStateException e) {
2834 }
2835 m.find();
2836 m.appendReplacement(result, "$1");
2837 if (!result.toString().equals(toSupplementaries("zzzab")))
2838 failCount++;
2839
2840 m.find();
2841 m.find();
2842 m.appendReplacement(result, "$2");
2843 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd")))
2844 failCount++;
2845
2846 m.appendTail(result);
2847 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz")))
2848 failCount++;
2849
2850 // Check to make sure escaped $ is ignored
2851 blah = toSupplementaries("zzzabcdcdefzzz");
2852 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
2853 m = p.matcher(blah);
2854 result = new StringBuffer();
2855 m.find();
2856 m.appendReplacement(result, toSupplementaries("$1w\\$2w$3"));
2857 if (!result.toString().equals(toSupplementaries("zzzabw$2wef")))
2858 failCount++;
2859
2860 m.appendTail(result);
2861 if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz")))
2862 failCount++;
2863
2864 // Check to make sure a reference to nonexistent group causes error
2865 blah = toSupplementaries("zzzabcdcdefzzz");
2866 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
2867 m = p.matcher(blah);
2868 result = new StringBuffer();
2869 m.find();
2870 try {
2871 m.appendReplacement(result, toSupplementaries("$1w$5w$3"));
2872 failCount++;
2873 } catch (IndexOutOfBoundsException ioobe) {
2874 // Correct result
2875 }
2876
2877 // Check double digit group references
2878 blah = toSupplementaries("zzz123456789101112zzz");
2879 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)");
2880 m = p.matcher(blah);
2881 result = new StringBuffer();
2882 m.find();
2883 m.appendReplacement(result, toSupplementaries("$1w$11w$3"));
2884 if (!result.toString().equals(toSupplementaries("zzz1w11w3")))
2885 failCount++;
2886
2887 // Check to make sure it backs off $15 to $1 if only three groups
2888 blah = toSupplementaries("zzzabcdcdefzzz");
2889 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
2890 m = p.matcher(blah);
2891 result = new StringBuffer();
2892 m.find();
2893 m.appendReplacement(result, toSupplementaries("$1w$15w$3"));
2894 if (!result.toString().equals(toSupplementaries("zzzabwab5wef")))
2895 failCount++;
2896
2897 // Check nothing has been appended into the output buffer if
2898 // the replacement string triggers IllegalArgumentException.
2899 p = Pattern.compile("(abc)");
2900 m = p.matcher("abcd");
2901 result = new StringBuffer();
2902 m.find();
2903 try {
2904 m.appendReplacement(result, ("xyz$g"));
2905 failCount++;
2906 } catch (IllegalArgumentException iae) {
2907 if (result.length() != 0)
2908 failCount++;
2909 }
2910
2911 report("SB Substitution");
2912 }
2913
2914 /*
2915 * 5 groups of characters are created to make a substitution string.
2916 * A base string will be created including random lead chars, the
2917 * substitution string, and random trailing chars.
2918 * A pattern containing the 5 groups is searched for and replaced with:
2919 * random group + random string + random group.
2920 * The results are checked for correctness.
2921 */
2922 private static void substitutionBasher() {
2923 for (int runs = 0; runs<1000; runs++) {
2924 // Create a base string to work in
2925 int leadingChars = generator.nextInt(10);
2926 StringBuffer baseBuffer = new StringBuffer(100);
2927 String leadingString = getRandomAlphaString(leadingChars);
2928 baseBuffer.append(leadingString);
2929
2930 // Create 5 groups of random number of random chars
2931 // Create the string to substitute
2932 // Create the pattern string to search for
2933 StringBuffer bufferToSub = new StringBuffer(25);
2934 StringBuffer bufferToPat = new StringBuffer(50);
2935 String[] groups = new String[5];
2936 for(int i=0; i<5; i++) {
2937 int aGroupSize = generator.nextInt(5)+1;
2938 groups[i] = getRandomAlphaString(aGroupSize);
2939 bufferToSub.append(groups[i]);
2940 bufferToPat.append('(');
2941 bufferToPat.append(groups[i]);
2942 bufferToPat.append(')');
2943 }
2944 String stringToSub = bufferToSub.toString();
2945 String pattern = bufferToPat.toString();
2946
2947 // Place sub string into working string at random index
2948 baseBuffer.append(stringToSub);
2949
2950 // Append random chars to end
2951 int trailingChars = generator.nextInt(10);
2952 String trailingString = getRandomAlphaString(trailingChars);
2953 baseBuffer.append(trailingString);
2954 String baseString = baseBuffer.toString();
2955
2956 // Create test pattern and matcher
2957 Pattern p = Pattern.compile(pattern);
2958 Matcher m = p.matcher(baseString);
2959
2960 // Reject candidate if pattern happens to start early
2961 m.find();
2962 if (m.start() < leadingChars)
2963 continue;
2964
2965 // Reject candidate if more than one match
2966 if (m.find())
2967 continue;
2968
2969 // Construct a replacement string with :
2970 // random group + random string + random group
2971 StringBuffer bufferToRep = new StringBuffer();
2972 int groupIndex1 = generator.nextInt(5);
2973 bufferToRep.append("$" + (groupIndex1 + 1));
2974 String randomMidString = getRandomAlphaString(5);
2975 bufferToRep.append(randomMidString);
2976 int groupIndex2 = generator.nextInt(5);
2977 bufferToRep.append("$" + (groupIndex2 + 1));
2978 String replacement = bufferToRep.toString();
2979
2980 // Do the replacement
2981 String result = m.replaceAll(replacement);
2982
2983 // Construct expected result
2984 StringBuffer bufferToRes = new StringBuffer();
2985 bufferToRes.append(leadingString);
2986 bufferToRes.append(groups[groupIndex1]);
2987 bufferToRes.append(randomMidString);
2988 bufferToRes.append(groups[groupIndex2]);
2989 bufferToRes.append(trailingString);
2990 String expectedResult = bufferToRes.toString();
2991
2992 // Check results
2993 if (!result.equals(expectedResult))
2994 failCount++;
2995 }
2996
2997 report("Substitution Basher");
2998 }
2999
3000 /**
3001 * Checks the handling of some escape sequences that the Pattern
3002 * class should process instead of the java compiler. These are
3003 * not in the file because the escapes should be be processed
3004 * by the Pattern class when the regex is compiled.
3005 */
3006 private static void escapes() throws Exception {
3007 Pattern p = Pattern.compile("\\043");
3008 Matcher m = p.matcher("#");
3009 if (!m.find())
3010 failCount++;
3011
3012 p = Pattern.compile("\\x23");
3013 m = p.matcher("#");
3014 if (!m.find())
3015 failCount++;
3016
3017 p = Pattern.compile("\\u0023");
3018 m = p.matcher("#");
3019 if (!m.find())
3020 failCount++;
3021
3022 report("Escape sequences");
3023 }
3024
3025 /**
3026 * Checks the handling of blank input situations. These
3027 * tests are incompatible with my test file format.
3028 */
3029 private static void blankInput() throws Exception {
3030 Pattern p = Pattern.compile("abc", Pattern.CASE_INSENSITIVE);
3031 Matcher m = p.matcher("");
3032 if (m.find())
3033 failCount++;
3034
3035 p = Pattern.compile("a*", Pattern.CASE_INSENSITIVE);
3036 m = p.matcher("");
3037 if (!m.find())
3038 failCount++;
3039
3040 p = Pattern.compile("abc");
3041 m = p.matcher("");
3042 if (m.find())
3043 failCount++;
3044
3045 p = Pattern.compile("a*");
3046 m = p.matcher("");
3047 if (!m.find())
3048 failCount++;
3049
3050 report("Blank input");
3051 }
3052
3053 /**
3054 * Tests the Boyer-Moore pattern matching of a character sequence
3055 * on randomly generated patterns.
3056 */
3057 private static void bm() throws Exception {
3058 doBnM('a');
3059 report("Boyer Moore (ASCII)");
3060
3061 doBnM(Character.MIN_SUPPLEMENTARY_CODE_POINT - 10);
3062 report("Boyer Moore (Supplementary)");
3063 }
3064
3065 private static void doBnM(int baseCharacter) throws Exception {
3066 int achar=0;
3067
3068 for (int i=0; i<100; i++) {
3069 // Create a short pattern to search for
3070 int patternLength = generator.nextInt(7) + 4;
3071 StringBuffer patternBuffer = new StringBuffer(patternLength);
3072 for (int x=0; x<patternLength; x++) {
3073 int ch = baseCharacter + generator.nextInt(26);
3074 if (Character.isSupplementaryCodePoint(ch)) {
3075 patternBuffer.append(Character.toChars(ch));
3076 } else {
3077 patternBuffer.append((char)ch);
3078 }
3079 }
3080 String pattern = patternBuffer.toString();
3081 Pattern p = Pattern.compile(pattern);
3082
3083 // Create a buffer with random ASCII chars that does
3084 // not match the sample
3085 String toSearch = null;
3086 StringBuffer s = null;
3087 Matcher m = p.matcher("");
3088 do {
3089 s = new StringBuffer(100);
3090 for (int x=0; x<100; x++) {
3091 int ch = baseCharacter + generator.nextInt(26);
3092 if (Character.isSupplementaryCodePoint(ch)) {
3093 s.append(Character.toChars(ch));
3094 } else {
3095 s.append((char)ch);
3096 }
3097 }
3098 toSearch = s.toString();
3099 m.reset(toSearch);
3100 } while (m.find());
3101
3102 // Insert the pattern at a random spot
3103 int insertIndex = generator.nextInt(99);
3104 if (Character.isLowSurrogate(s.charAt(insertIndex)))
3105 insertIndex++;
3106 s = s.insert(insertIndex, pattern);
3107 toSearch = s.toString();
3108
3109 // Make sure that the pattern is found
3110 m.reset(toSearch);
3111 if (!m.find())
3112 failCount++;
3113
3114 // Make sure that the match text is the pattern
3115 if (!m.group().equals(pattern))
3116 failCount++;
3117
3118 // Make sure match occured at insertion point
3119 if (m.start() != insertIndex)
3120 failCount++;
3121 }
3122 }
3123
3124 /**
3125 * Tests the matching of slices on randomly generated patterns.
3126 * The Boyer-Moore optimization is not done on these patterns
3127 * because it uses unicode case folding.
3128 */
3129 private static void slice() throws Exception {
3130 doSlice(Character.MAX_VALUE);
3131 report("Slice");
3132
3133 doSlice(Character.MAX_CODE_POINT);
3134 report("Slice (Supplementary)");
3135 }
3136
3137 private static void doSlice(int maxCharacter) throws Exception {
3138 Random generator = new Random();
3139 int achar=0;
3140
3141 for (int i=0; i<100; i++) {
3142 // Create a short pattern to search for
3143 int patternLength = generator.nextInt(7) + 4;
3144 StringBuffer patternBuffer = new StringBuffer(patternLength);
3145 for (int x=0; x<patternLength; x++) {
3146 int randomChar = 0;
3147 while (!Character.isLetterOrDigit(randomChar))
3148 randomChar = generator.nextInt(maxCharacter);
3149 if (Character.isSupplementaryCodePoint(randomChar)) {
3150 patternBuffer.append(Character.toChars(randomChar));
3151 } else {
3152 patternBuffer.append((char) randomChar);
3153 }
3154 }
3155 String pattern = patternBuffer.toString();
3156 Pattern p = Pattern.compile(pattern, Pattern.UNICODE_CASE);
3157
3158 // Create a buffer with random chars that does not match the sample
3159 String toSearch = null;
3160 StringBuffer s = null;
3161 Matcher m = p.matcher("");
3162 do {
3163 s = new StringBuffer(100);
3164 for (int x=0; x<100; x++) {
3165 int randomChar = 0;
3166 while (!Character.isLetterOrDigit(randomChar))
3167 randomChar = generator.nextInt(maxCharacter);
3168 if (Character.isSupplementaryCodePoint(randomChar)) {
3169 s.append(Character.toChars(randomChar));
3170 } else {
3171 s.append((char) randomChar);
3172 }
3173 }
3174 toSearch = s.toString();
3175 m.reset(toSearch);
3176 } while (m.find());
3177
3178 // Insert the pattern at a random spot
3179 int insertIndex = generator.nextInt(99);
3180 if (Character.isLowSurrogate(s.charAt(insertIndex)))
3181 insertIndex++;
3182 s = s.insert(insertIndex, pattern);
3183 toSearch = s.toString();
3184
3185 // Make sure that the pattern is found
3186 m.reset(toSearch);
3187 if (!m.find())
3188 failCount++;
3189
3190 // Make sure that the match text is the pattern
3191 if (!m.group().equals(pattern))
3192 failCount++;
3193
3194 // Make sure match occured at insertion point
3195 if (m.start() != insertIndex)
3196 failCount++;
3197 }
3198 }
3199
3200 private static void explainFailure(String pattern, String data,
3201 String expected, String actual) {
3202 System.err.println("----------------------------------------");
3203 System.err.println("Pattern = "+pattern);
3204 System.err.println("Data = "+data);
3205 System.err.println("Expected = " + expected);
3206 System.err.println("Actual = " + actual);
3207 }
3208
3209 private static void explainFailure(String pattern, String data,
3210 Throwable t) {
3211 System.err.println("----------------------------------------");
3212 System.err.println("Pattern = "+pattern);
3213 System.err.println("Data = "+data);
3214 t.printStackTrace(System.err);
3215 }
3216
3217 // Testing examples from a file
3218
3219 /**
3220 * Goes through the file "TestCases.txt" and creates many patterns
3221 * described in the file, matching the patterns against input lines in
3222 * the file, and comparing the results against the correct results
3223 * also found in the file. The file format is described in comments
3224 * at the head of the file.
3225 */
3226 private static void processFile(String fileName) throws Exception {
3227 File testCases = new File(System.getProperty("test.src", "."),
3228 fileName);
3229 FileInputStream in = new FileInputStream(testCases);
3230 BufferedReader r = new BufferedReader(new InputStreamReader(in));
3231
3232 // Process next test case.
3233 String aLine;
3234 while((aLine = r.readLine()) != null) {
3235 // Read a line for pattern
3236 String patternString = grabLine(r);
3237 Pattern p = null;
3238 try {
3239 p = compileTestPattern(patternString);
3240 } catch (PatternSyntaxException e) {
3241 String dataString = grabLine(r);
3242 String expectedResult = grabLine(r);
3243 if (expectedResult.startsWith("error"))
3244 continue;
3245 explainFailure(patternString, dataString, e);
3246 failCount++;
3247 continue;
3248 }
3249
3250 // Read a line for input string
3251 String dataString = grabLine(r);
3252 Matcher m = p.matcher(dataString);
3253 StringBuffer result = new StringBuffer();
3254
3255 // Check for IllegalStateExceptions before a match
3256 failCount += preMatchInvariants(m);
3257
3258 boolean found = m.find();
3259
3260 if (found)
3261 failCount += postTrueMatchInvariants(m);
3262 else
3263 failCount += postFalseMatchInvariants(m);
3264
3265 if (found) {
3266 result.append("true ");
3267 result.append(m.group(0) + " ");
3268 } else {
3269 result.append("false ");
3270 }
3271
3272 result.append(m.groupCount());
3273
3274 if (found) {
3275 for (int i=1; i<m.groupCount()+1; i++)
3276 if (m.group(i) != null)
3277 result.append(" " +m.group(i));
3278 }
3279
3280 // Read a line for the expected result
3281 String expectedResult = grabLine(r);
3282
3283 if (!result.toString().equals(expectedResult)) {
3284 explainFailure(patternString, dataString, expectedResult, result.toString());
3285 failCount++;
3286 }
3287 }
3288
3289 report(fileName);
3290 }
3291
3292 private static int preMatchInvariants(Matcher m) {
3293 int failCount = 0;
3294 try {
3295 m.start();
3296 failCount++;
3297 } catch (IllegalStateException ise) {}
3298 try {
3299 m.end();
3300 failCount++;
3301 } catch (IllegalStateException ise) {}
3302 try {
3303 m.group();
3304 failCount++;
3305 } catch (IllegalStateException ise) {}
3306 return failCount;
3307 }
3308
3309 private static int postFalseMatchInvariants(Matcher m) {
3310 int failCount = 0;
3311 try {
3312 m.group();
3313 failCount++;
3314 } catch (IllegalStateException ise) {}
3315 try {
3316 m.start();
3317 failCount++;
3318 } catch (IllegalStateException ise) {}
3319 try {
3320 m.end();
3321 failCount++;
3322 } catch (IllegalStateException ise) {}
3323 return failCount;
3324 }
3325
3326 private static int postTrueMatchInvariants(Matcher m) {
3327 int failCount = 0;
3328 //assert(m.start() = m.start(0);
3329 if (m.start() != m.start(0))
3330 failCount++;
3331 //assert(m.end() = m.end(0);
3332 if (m.start() != m.start(0))
3333 failCount++;
3334 //assert(m.group() = m.group(0);
3335 if (!m.group().equals(m.group(0)))
3336 failCount++;
3337 try {
3338 m.group(50);
3339 failCount++;
3340 } catch (IndexOutOfBoundsException ise) {}
3341
3342 return failCount;
3343 }
3344
3345 private static Pattern compileTestPattern(String patternString) {
3346 if (!patternString.startsWith("'")) {
3347 return Pattern.compile(patternString);
3348 }
3349
3350 int break1 = patternString.lastIndexOf("'");
3351 String flagString = patternString.substring(
3352 break1+1, patternString.length());
3353 patternString = patternString.substring(1, break1);
3354
3355 if (flagString.equals("i"))
3356 return Pattern.compile(patternString, Pattern.CASE_INSENSITIVE);
3357
3358 if (flagString.equals("m"))
3359 return Pattern.compile(patternString, Pattern.MULTILINE);
3360
3361 return Pattern.compile(patternString);
3362 }
3363
3364 /**
3365 * Reads a line from the input file. Keeps reading lines until a non
3366 * empty non comment line is read. If the line contains a \n then
3367 * these two characters are replaced by a newline char. If a \\uxxxx
3368 * sequence is read then the sequence is replaced by the unicode char.
3369 */
3370 private static String grabLine(BufferedReader r) throws Exception {
3371 int index = 0;
3372 String line = r.readLine();
3373 while (line.startsWith("//") || line.length() < 1)
3374 line = r.readLine();
3375 while ((index = line.indexOf("\\n")) != -1) {
3376 StringBuffer temp = new StringBuffer(line);
3377 temp.replace(index, index+2, "\n");
3378 line = temp.toString();
3379 }
3380 while ((index = line.indexOf("\\u")) != -1) {
3381 StringBuffer temp = new StringBuffer(line);
3382 String value = temp.substring(index+2, index+6);
3383 char aChar = (char)Integer.parseInt(value, 16);
3384 String unicodeChar = "" + aChar;
3385 temp.replace(index, index+6, unicodeChar);
3386 line = temp.toString();
3387 }
3388
3389 return line;
3390 }
3391
3392 private static void check(Pattern p, String s, String g, String expected) {
3393 Matcher m = p.matcher(s);
3394 m.find();
shermana244eb52013-05-06 21:24:37 -07003395 if (!m.group(g).equals(expected) ||
3396 s.charAt(m.start(g)) != expected.charAt(0) ||
3397 s.charAt(m.end(g) - 1) != expected.charAt(expected.length() - 1))
sherman0b4d42d2009-02-23 21:06:15 -08003398 failCount++;
3399 }
3400
3401 private static void checkReplaceFirst(String p, String s, String r, String expected)
3402 {
3403 if (!expected.equals(Pattern.compile(p)
3404 .matcher(s)
3405 .replaceFirst(r)))
3406 failCount++;
3407 }
3408
3409 private static void checkReplaceAll(String p, String s, String r, String expected)
3410 {
3411 if (!expected.equals(Pattern.compile(p)
3412 .matcher(s)
3413 .replaceAll(r)))
3414 failCount++;
3415 }
3416
3417 private static void checkExpectedFail(String p) {
3418 try {
3419 Pattern.compile(p);
3420 } catch (PatternSyntaxException pse) {
3421 //pse.printStackTrace();
3422 return;
3423 }
3424 failCount++;
3425 }
3426
shermana244eb52013-05-06 21:24:37 -07003427 private static void checkExpectedIAE(Matcher m, String g) {
sherman0b4d42d2009-02-23 21:06:15 -08003428 m.find();
3429 try {
3430 m.group(g);
shermana244eb52013-05-06 21:24:37 -07003431 } catch (IllegalArgumentException x) {
sherman0b4d42d2009-02-23 21:06:15 -08003432 //iae.printStackTrace();
shermana244eb52013-05-06 21:24:37 -07003433 try {
3434 m.start(g);
3435 } catch (IllegalArgumentException xx) {
3436 try {
3437 m.start(g);
3438 } catch (IllegalArgumentException xxx) {
3439 return;
3440 }
3441 }
sherman0b4d42d2009-02-23 21:06:15 -08003442 }
3443 failCount++;
3444 }
3445
shermana244eb52013-05-06 21:24:37 -07003446 private static void checkExpectedNPE(Matcher m) {
3447 m.find();
3448 try {
3449 m.group(null);
3450 } catch (NullPointerException x) {
3451 try {
3452 m.start(null);
3453 } catch (NullPointerException xx) {
3454 try {
3455 m.end(null);
3456 } catch (NullPointerException xxx) {
3457 return;
3458 }
3459 }
3460 }
3461 failCount++;
3462 }
sherman0b4d42d2009-02-23 21:06:15 -08003463
3464 private static void namedGroupCaptureTest() throws Exception {
3465 check(Pattern.compile("x+(?<gname>y+)z+"),
3466 "xxxyyyzzz",
3467 "gname",
3468 "yyy");
3469
shermand9337e02009-10-21 11:40:40 -07003470 check(Pattern.compile("x+(?<gname8>y+)z+"),
shermanffaf4922009-03-20 16:22:59 -07003471 "xxxyyyzzz",
shermand9337e02009-10-21 11:40:40 -07003472 "gname8",
shermanffaf4922009-03-20 16:22:59 -07003473 "yyy");
3474
sherman0b4d42d2009-02-23 21:06:15 -08003475 //backref
3476 Pattern pattern = Pattern.compile("(a*)bc\\1");
3477 check(pattern, "zzzaabcazzz", true); // found "abca"
3478
3479 check(Pattern.compile("(?<gname>a*)bc\\k<gname>"),
3480 "zzzaabcaazzz", true);
3481
3482 check(Pattern.compile("(?<gname>abc)(def)\\k<gname>"),
3483 "abcdefabc", true);
3484
3485 check(Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(?<gname>k)\\k<gname>"),
3486 "abcdefghijkk", true);
3487
3488 // Supplementary character tests
3489 check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"),
3490 toSupplementaries("zzzaabcazzz"), true);
3491
3492 check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"),
3493 toSupplementaries("zzzaabcaazzz"), true);
3494
3495 check(Pattern.compile("(?<gname>" + toSupplementaries("abc)(def)") + "\\k<gname>"),
3496 toSupplementaries("abcdefabc"), true);
3497
3498 check(Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)") +
3499 "(?<gname>" +
3500 toSupplementaries("k)") + "\\k<gname>"),
3501 toSupplementaries("abcdefghijkk"), true);
3502
3503 check(Pattern.compile("x+(?<gname>y+)z+\\k<gname>"),
3504 "xxxyyyzzzyyy",
3505 "gname",
3506 "yyy");
3507
3508 //replaceFirst/All
3509 checkReplaceFirst("(?<gn>ab)(c*)",
3510 "abccczzzabcczzzabccc",
shermand9337e02009-10-21 11:40:40 -07003511 "${gn}",
sherman0b4d42d2009-02-23 21:06:15 -08003512 "abzzzabcczzzabccc");
3513
3514 checkReplaceAll("(?<gn>ab)(c*)",
3515 "abccczzzabcczzzabccc",
shermand9337e02009-10-21 11:40:40 -07003516 "${gn}",
sherman0b4d42d2009-02-23 21:06:15 -08003517 "abzzzabzzzab");
3518
3519
3520 checkReplaceFirst("(?<gn>ab)(c*)",
3521 "zzzabccczzzabcczzzabccczzz",
shermand9337e02009-10-21 11:40:40 -07003522 "${gn}",
sherman0b4d42d2009-02-23 21:06:15 -08003523 "zzzabzzzabcczzzabccczzz");
3524
3525 checkReplaceAll("(?<gn>ab)(c*)",
3526 "zzzabccczzzabcczzzabccczzz",
shermand9337e02009-10-21 11:40:40 -07003527 "${gn}",
sherman0b4d42d2009-02-23 21:06:15 -08003528 "zzzabzzzabzzzabzzz");
3529
3530 checkReplaceFirst("(?<gn1>ab)(?<gn2>c*)",
3531 "zzzabccczzzabcczzzabccczzz",
shermand9337e02009-10-21 11:40:40 -07003532 "${gn2}",
sherman0b4d42d2009-02-23 21:06:15 -08003533 "zzzccczzzabcczzzabccczzz");
3534
3535 checkReplaceAll("(?<gn1>ab)(?<gn2>c*)",
3536 "zzzabccczzzabcczzzabccczzz",
shermand9337e02009-10-21 11:40:40 -07003537 "${gn2}",
sherman0b4d42d2009-02-23 21:06:15 -08003538 "zzzccczzzcczzzccczzz");
3539
3540 //toSupplementaries("(ab)(c*)"));
3541 checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") +
3542 ")(?<gn2>" + toSupplementaries("c") + "*)",
3543 toSupplementaries("abccczzzabcczzzabccc"),
shermand9337e02009-10-21 11:40:40 -07003544 "${gn1}",
sherman0b4d42d2009-02-23 21:06:15 -08003545 toSupplementaries("abzzzabcczzzabccc"));
3546
3547
3548 checkReplaceAll("(?<gn1>" + toSupplementaries("ab") +
3549 ")(?<gn2>" + toSupplementaries("c") + "*)",
3550 toSupplementaries("abccczzzabcczzzabccc"),
shermand9337e02009-10-21 11:40:40 -07003551 "${gn1}",
sherman0b4d42d2009-02-23 21:06:15 -08003552 toSupplementaries("abzzzabzzzab"));
3553
3554 checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") +
3555 ")(?<gn2>" + toSupplementaries("c") + "*)",
3556 toSupplementaries("abccczzzabcczzzabccc"),
shermand9337e02009-10-21 11:40:40 -07003557 "${gn2}",
sherman0b4d42d2009-02-23 21:06:15 -08003558 toSupplementaries("ccczzzabcczzzabccc"));
3559
3560
3561 checkReplaceAll("(?<gn1>" + toSupplementaries("ab") +
3562 ")(?<gn2>" + toSupplementaries("c") + "*)",
3563 toSupplementaries("abccczzzabcczzzabccc"),
shermand9337e02009-10-21 11:40:40 -07003564 "${gn2}",
sherman0b4d42d2009-02-23 21:06:15 -08003565 toSupplementaries("ccczzzcczzzccc"));
3566
3567 checkReplaceFirst("(?<dog>Dog)AndCat",
3568 "zzzDogAndCatzzzDogAndCatzzz",
shermand9337e02009-10-21 11:40:40 -07003569 "${dog}",
sherman0b4d42d2009-02-23 21:06:15 -08003570 "zzzDogzzzDogAndCatzzz");
3571
3572
3573 checkReplaceAll("(?<dog>Dog)AndCat",
3574 "zzzDogAndCatzzzDogAndCatzzz",
shermand9337e02009-10-21 11:40:40 -07003575 "${dog}",
sherman0b4d42d2009-02-23 21:06:15 -08003576 "zzzDogzzzDogzzz");
3577
3578 // backref in Matcher & String
shermand9337e02009-10-21 11:40:40 -07003579 if (!"abcdefghij".replaceFirst("cd(?<gn>ef)gh", "${gn}").equals("abefij") ||
3580 !"abbbcbdbefgh".replaceAll("(?<gn>[a-e])b", "${gn}").equals("abcdefgh"))
sherman0b4d42d2009-02-23 21:06:15 -08003581 failCount++;
3582
3583 // negative
3584 checkExpectedFail("(?<groupnamehasnoascii.in>abc)(def)");
3585 checkExpectedFail("(?<groupnamehasnoascii_in>abc)(def)");
shermand9337e02009-10-21 11:40:40 -07003586 checkExpectedFail("(?<6groupnamestartswithdigit>abc)(def)");
sherman0b4d42d2009-02-23 21:06:15 -08003587 checkExpectedFail("(?<gname>abc)(def)\\k<gnameX>");
3588 checkExpectedFail("(?<gname>abc)(?<gname>def)\\k<gnameX>");
shermana244eb52013-05-06 21:24:37 -07003589 checkExpectedIAE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"),
3590 "gnameX");
3591 checkExpectedNPE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"));
sherman0b4d42d2009-02-23 21:06:15 -08003592 report("NamedGroupCapture");
3593 }
sherman6782c962010-02-05 00:10:42 -08003594
shermancc01ef52010-05-18 15:36:47 -07003595 // This is for bug 6969132
sherman6782c962010-02-05 00:10:42 -08003596 private static void nonBmpClassComplementTest() throws Exception {
3597 Pattern p = Pattern.compile("\\P{Lu}");
3598 Matcher m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
3599 if (m.find() && m.start() == 1)
3600 failCount++;
3601
3602 // from a unicode category
3603 p = Pattern.compile("\\P{Lu}");
3604 m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
3605 if (m.find())
3606 failCount++;
3607 if (!m.hitEnd())
3608 failCount++;
3609
3610 // block
3611 p = Pattern.compile("\\P{InMathematicalAlphanumericSymbols}");
3612 m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
3613 if (m.find() && m.start() == 1)
3614 failCount++;
3615
3616 report("NonBmpClassComplement");
3617 }
3618
shermancc01ef52010-05-18 15:36:47 -07003619 private static void unicodePropertiesTest() throws Exception {
3620 // different forms
3621 if (!Pattern.compile("\\p{IsLu}").matcher("A").matches() ||
3622 !Pattern.compile("\\p{Lu}").matcher("A").matches() ||
3623 !Pattern.compile("\\p{gc=Lu}").matcher("A").matches() ||
3624 !Pattern.compile("\\p{general_category=Lu}").matcher("A").matches() ||
3625 !Pattern.compile("\\p{IsLatin}").matcher("B").matches() ||
3626 !Pattern.compile("\\p{sc=Latin}").matcher("B").matches() ||
3627 !Pattern.compile("\\p{script=Latin}").matcher("B").matches() ||
3628 !Pattern.compile("\\p{InBasicLatin}").matcher("c").matches() ||
3629 !Pattern.compile("\\p{blk=BasicLatin}").matcher("c").matches() ||
3630 !Pattern.compile("\\p{block=BasicLatin}").matcher("c").matches())
3631 failCount++;
3632
3633 Matcher common = Pattern.compile("\\p{script=Common}").matcher("");
3634 Matcher unknown = Pattern.compile("\\p{IsUnknown}").matcher("");
3635 Matcher lastSM = common;
3636 Character.UnicodeScript lastScript = Character.UnicodeScript.of(0);
3637
3638 Matcher latin = Pattern.compile("\\p{block=basic_latin}").matcher("");
3639 Matcher greek = Pattern.compile("\\p{InGreek}").matcher("");
3640 Matcher lastBM = latin;
3641 Character.UnicodeBlock lastBlock = Character.UnicodeBlock.of(0);
3642
3643 for (int cp = 1; cp < Character.MAX_CODE_POINT; cp++) {
3644 if (cp >= 0x30000 && (cp & 0x70) == 0){
3645 continue; // only pick couple code points, they are the same
3646 }
3647
3648 // Unicode Script
3649 Character.UnicodeScript script = Character.UnicodeScript.of(cp);
3650 Matcher m;
3651 String str = new String(Character.toChars(cp));
3652 if (script == lastScript) {
3653 m = lastSM;
3654 m.reset(str);
3655 } else {
3656 m = Pattern.compile("\\p{Is" + script.name() + "}").matcher(str);
3657 }
3658 if (!m.matches()) {
3659 failCount++;
3660 }
3661 Matcher other = (script == Character.UnicodeScript.COMMON)? unknown : common;
3662 other.reset(str);
3663 if (other.matches()) {
3664 failCount++;
3665 }
3666 lastSM = m;
3667 lastScript = script;
3668
3669 // Unicode Block
3670 Character.UnicodeBlock block = Character.UnicodeBlock.of(cp);
3671 if (block == null) {
3672 //System.out.printf("Not a Block: cp=%x%n", cp);
3673 continue;
3674 }
3675 if (block == lastBlock) {
3676 m = lastBM;
3677 m.reset(str);
3678 } else {
3679 m = Pattern.compile("\\p{block=" + block.toString() + "}").matcher(str);
3680 }
3681 if (!m.matches()) {
3682 failCount++;
3683 }
3684 other = (block == Character.UnicodeBlock.BASIC_LATIN)? greek : latin;
3685 other.reset(str);
3686 if (other.matches()) {
3687 failCount++;
3688 }
3689 lastBM = m;
3690 lastBlock = block;
3691 }
3692 report("unicodeProperties");
3693 }
shermanf03c78b2011-02-03 13:49:25 -08003694
3695 private static void unicodeHexNotationTest() throws Exception {
3696
3697 // negative
3698 checkExpectedFail("\\x{-23}");
3699 checkExpectedFail("\\x{110000}");
3700 checkExpectedFail("\\x{}");
3701 checkExpectedFail("\\x{AB[ef]");
3702
3703 // codepoint
3704 check("^\\x{1033c}$", "\uD800\uDF3C", true);
3705 check("^\\xF0\\x90\\x8C\\xBC$", "\uD800\uDF3C", false);
3706 check("^\\x{D800}\\x{DF3c}+$", "\uD800\uDF3C", false);
3707 check("^\\xF0\\x90\\x8C\\xBC$", "\uD800\uDF3C", false);
3708
3709 // in class
3710 check("^[\\x{D800}\\x{DF3c}]+$", "\uD800\uDF3C", false);
3711 check("^[\\xF0\\x90\\x8C\\xBC]+$", "\uD800\uDF3C", false);
3712 check("^[\\x{D800}\\x{DF3C}]+$", "\uD800\uDF3C", false);
3713 check("^[\\x{DF3C}\\x{D800}]+$", "\uD800\uDF3C", false);
3714 check("^[\\x{D800}\\x{DF3C}]+$", "\uDF3C\uD800", true);
3715 check("^[\\x{DF3C}\\x{D800}]+$", "\uDF3C\uD800", true);
3716
3717 for (int cp = 0; cp <= 0x10FFFF; cp++) {
3718 String s = "A" + new String(Character.toChars(cp)) + "B";
3719 String hexUTF16 = (cp <= 0xFFFF)? String.format("\\u%04x", cp)
3720 : String.format("\\u%04x\\u%04x",
3721 (int) Character.toChars(cp)[0],
3722 (int) Character.toChars(cp)[1]);
3723 String hexCodePoint = "\\x{" + Integer.toHexString(cp) + "}";
3724 if (!Pattern.matches("A" + hexUTF16 + "B", s))
3725 failCount++;
3726 if (!Pattern.matches("A[" + hexUTF16 + "]B", s))
3727 failCount++;
3728 if (!Pattern.matches("A" + hexCodePoint + "B", s))
3729 failCount++;
3730 if (!Pattern.matches("A[" + hexCodePoint + "]B", s))
3731 failCount++;
3732 }
3733 report("unicodeHexNotation");
sherman85bbd8b2011-04-28 20:48:36 -07003734 }
3735
3736 private static void unicodeClassesTest() throws Exception {
3737
3738 Matcher lower = Pattern.compile("\\p{Lower}").matcher("");
3739 Matcher upper = Pattern.compile("\\p{Upper}").matcher("");
3740 Matcher ASCII = Pattern.compile("\\p{ASCII}").matcher("");
3741 Matcher alpha = Pattern.compile("\\p{Alpha}").matcher("");
3742 Matcher digit = Pattern.compile("\\p{Digit}").matcher("");
3743 Matcher alnum = Pattern.compile("\\p{Alnum}").matcher("");
3744 Matcher punct = Pattern.compile("\\p{Punct}").matcher("");
3745 Matcher graph = Pattern.compile("\\p{Graph}").matcher("");
3746 Matcher print = Pattern.compile("\\p{Print}").matcher("");
3747 Matcher blank = Pattern.compile("\\p{Blank}").matcher("");
3748 Matcher cntrl = Pattern.compile("\\p{Cntrl}").matcher("");
3749 Matcher xdigit = Pattern.compile("\\p{XDigit}").matcher("");
3750 Matcher space = Pattern.compile("\\p{Space}").matcher("");
3751 Matcher bound = Pattern.compile("\\b").matcher("");
3752 Matcher word = Pattern.compile("\\w++").matcher("");
3753 // UNICODE_CHARACTER_CLASS
3754 Matcher lowerU = Pattern.compile("\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3755 Matcher upperU = Pattern.compile("\\p{Upper}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3756 Matcher ASCIIU = Pattern.compile("\\p{ASCII}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3757 Matcher alphaU = Pattern.compile("\\p{Alpha}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3758 Matcher digitU = Pattern.compile("\\p{Digit}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3759 Matcher alnumU = Pattern.compile("\\p{Alnum}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3760 Matcher punctU = Pattern.compile("\\p{Punct}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3761 Matcher graphU = Pattern.compile("\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3762 Matcher printU = Pattern.compile("\\p{Print}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3763 Matcher blankU = Pattern.compile("\\p{Blank}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3764 Matcher cntrlU = Pattern.compile("\\p{Cntrl}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3765 Matcher xdigitU = Pattern.compile("\\p{XDigit}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3766 Matcher spaceU = Pattern.compile("\\p{Space}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3767 Matcher boundU = Pattern.compile("\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3768 Matcher wordU = Pattern.compile("\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3769 // embedded flag (?U)
3770 Matcher lowerEU = Pattern.compile("(?U)\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3771 Matcher graphEU = Pattern.compile("(?U)\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3772 Matcher wordEU = Pattern.compile("(?U)\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3773
3774 Matcher bwb = Pattern.compile("\\b\\w\\b").matcher("");
3775 Matcher bwbU = Pattern.compile("\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3776 Matcher bwbEU = Pattern.compile("(?U)\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3777 // properties
3778 Matcher lowerP = Pattern.compile("\\p{IsLowerCase}").matcher("");
3779 Matcher upperP = Pattern.compile("\\p{IsUpperCase}").matcher("");
3780 Matcher titleP = Pattern.compile("\\p{IsTitleCase}").matcher("");
3781 Matcher letterP = Pattern.compile("\\p{IsLetter}").matcher("");
3782 Matcher alphaP = Pattern.compile("\\p{IsAlphabetic}").matcher("");
3783 Matcher ideogP = Pattern.compile("\\p{IsIdeographic}").matcher("");
3784 Matcher cntrlP = Pattern.compile("\\p{IsControl}").matcher("");
3785 Matcher spaceP = Pattern.compile("\\p{IsWhiteSpace}").matcher("");
3786 Matcher definedP = Pattern.compile("\\p{IsAssigned}").matcher("");
3787 Matcher nonCCPP = Pattern.compile("\\p{IsNoncharacterCodePoint}").matcher("");
shermana244eb52013-05-06 21:24:37 -07003788 Matcher joinCrtl = Pattern.compile("\\p{IsJoinControl}").matcher("");
sherman85bbd8b2011-04-28 20:48:36 -07003789
3790 // javaMethod
3791 Matcher lowerJ = Pattern.compile("\\p{javaLowerCase}").matcher("");
3792 Matcher upperJ = Pattern.compile("\\p{javaUpperCase}").matcher("");
3793 Matcher alphaJ = Pattern.compile("\\p{javaAlphabetic}").matcher("");
3794 Matcher ideogJ = Pattern.compile("\\p{javaIdeographic}").matcher("");
3795
3796 for (int cp = 1; cp < 0x30000; cp++) {
3797 String str = new String(Character.toChars(cp));
3798 int type = Character.getType(cp);
3799 if (// lower
3800 POSIX_ASCII.isLower(cp) != lower.reset(str).matches() ||
3801 Character.isLowerCase(cp) != lowerU.reset(str).matches() ||
3802 Character.isLowerCase(cp) != lowerP.reset(str).matches() ||
3803 Character.isLowerCase(cp) != lowerEU.reset(str).matches()||
3804 Character.isLowerCase(cp) != lowerJ.reset(str).matches()||
3805 // upper
3806 POSIX_ASCII.isUpper(cp) != upper.reset(str).matches() ||
3807 POSIX_Unicode.isUpper(cp) != upperU.reset(str).matches() ||
3808 Character.isUpperCase(cp) != upperP.reset(str).matches() ||
3809 Character.isUpperCase(cp) != upperJ.reset(str).matches() ||
3810 // alpha
3811 POSIX_ASCII.isAlpha(cp) != alpha.reset(str).matches() ||
3812 POSIX_Unicode.isAlpha(cp) != alphaU.reset(str).matches() ||
3813 Character.isAlphabetic(cp)!= alphaP.reset(str).matches() ||
3814 Character.isAlphabetic(cp)!= alphaJ.reset(str).matches() ||
3815 // digit
3816 POSIX_ASCII.isDigit(cp) != digit.reset(str).matches() ||
3817 Character.isDigit(cp) != digitU.reset(str).matches() ||
3818 // alnum
3819 POSIX_ASCII.isAlnum(cp) != alnum.reset(str).matches() ||
3820 POSIX_Unicode.isAlnum(cp) != alnumU.reset(str).matches() ||
3821 // punct
3822 POSIX_ASCII.isPunct(cp) != punct.reset(str).matches() ||
3823 POSIX_Unicode.isPunct(cp) != punctU.reset(str).matches() ||
3824 // graph
3825 POSIX_ASCII.isGraph(cp) != graph.reset(str).matches() ||
3826 POSIX_Unicode.isGraph(cp) != graphU.reset(str).matches() ||
3827 POSIX_Unicode.isGraph(cp) != graphEU.reset(str).matches()||
3828 // blank
3829 POSIX_ASCII.isType(cp, POSIX_ASCII.BLANK)
3830 != blank.reset(str).matches() ||
3831 POSIX_Unicode.isBlank(cp) != blankU.reset(str).matches() ||
3832 // print
3833 POSIX_ASCII.isPrint(cp) != print.reset(str).matches() ||
3834 POSIX_Unicode.isPrint(cp) != printU.reset(str).matches() ||
3835 // cntrl
3836 POSIX_ASCII.isCntrl(cp) != cntrl.reset(str).matches() ||
3837 POSIX_Unicode.isCntrl(cp) != cntrlU.reset(str).matches() ||
3838 (Character.CONTROL == type) != cntrlP.reset(str).matches() ||
3839 // hexdigit
3840 POSIX_ASCII.isHexDigit(cp) != xdigit.reset(str).matches() ||
3841 POSIX_Unicode.isHexDigit(cp) != xdigitU.reset(str).matches() ||
3842 // space
3843 POSIX_ASCII.isSpace(cp) != space.reset(str).matches() ||
3844 POSIX_Unicode.isSpace(cp) != spaceU.reset(str).matches() ||
3845 POSIX_Unicode.isSpace(cp) != spaceP.reset(str).matches() ||
3846 // word
3847 POSIX_ASCII.isWord(cp) != word.reset(str).matches() ||
3848 POSIX_Unicode.isWord(cp) != wordU.reset(str).matches() ||
3849 POSIX_Unicode.isWord(cp) != wordEU.reset(str).matches()||
3850 // bwordb
3851 POSIX_ASCII.isWord(cp) != bwb.reset(str).matches() ||
3852 POSIX_Unicode.isWord(cp) != bwbU.reset(str).matches() ||
3853 // properties
3854 Character.isTitleCase(cp) != titleP.reset(str).matches() ||
3855 Character.isLetter(cp) != letterP.reset(str).matches()||
3856 Character.isIdeographic(cp) != ideogP.reset(str).matches() ||
3857 Character.isIdeographic(cp) != ideogJ.reset(str).matches() ||
3858 (Character.UNASSIGNED == type) == definedP.reset(str).matches() ||
shermana244eb52013-05-06 21:24:37 -07003859 POSIX_Unicode.isNoncharacterCodePoint(cp) != nonCCPP.reset(str).matches() ||
3860 POSIX_Unicode.isJoinControl(cp) != joinCrtl.reset(str).matches())
sherman85bbd8b2011-04-28 20:48:36 -07003861 failCount++;
3862 }
3863
3864 // bounds/word align
3865 twoFindIndexes(" \u0180sherman\u0400 ", bound, 1, 10);
3866 if (!bwbU.reset("\u0180sherman\u0400").matches())
3867 failCount++;
3868 twoFindIndexes(" \u0180sh\u0345erman\u0400 ", bound, 1, 11);
3869 if (!bwbU.reset("\u0180sh\u0345erman\u0400").matches())
3870 failCount++;
3871 twoFindIndexes(" \u0724\u0739\u0724 ", bound, 1, 4);
3872 if (!bwbU.reset("\u0724\u0739\u0724").matches())
3873 failCount++;
3874 if (!bwbEU.reset("\u0724\u0739\u0724").matches())
3875 failCount++;
3876 report("unicodePredefinedClasses");
3877 }
shermanecb65472012-05-08 10:57:13 -07003878
3879 private static void horizontalAndVerticalWSTest() throws Exception {
3880 String hws = new String (new char[] {
3881 0x09, 0x20, 0xa0, 0x1680, 0x180e,
3882 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005,
3883 0x2006, 0x2007, 0x2008, 0x2009, 0x200a,
3884 0x202f, 0x205f, 0x3000 });
3885 String vws = new String (new char[] {
3886 0x0a, 0x0b, 0x0c, 0x0d, 0x85, 0x2028, 0x2029 });
3887 if (!Pattern.compile("\\h+").matcher(hws).matches() ||
3888 !Pattern.compile("[\\h]+").matcher(hws).matches())
3889 failCount++;
3890 if (Pattern.compile("\\H").matcher(hws).find() ||
3891 Pattern.compile("[\\H]").matcher(hws).find())
3892 failCount++;
3893 if (!Pattern.compile("\\v+").matcher(vws).matches() ||
3894 !Pattern.compile("[\\v]+").matcher(vws).matches())
3895 failCount++;
3896 if (Pattern.compile("\\V").matcher(vws).find() ||
3897 Pattern.compile("[\\V]").matcher(vws).find())
3898 failCount++;
3899 String prefix = "abcd";
3900 String suffix = "efgh";
3901 String ng = "A";
3902 for (int i = 0; i < hws.length(); i++) {
3903 String c = String.valueOf(hws.charAt(i));
3904 Matcher m = Pattern.compile("\\h").matcher(prefix + c + suffix);
3905 if (!m.find() || !c.equals(m.group()))
3906 failCount++;
3907 m = Pattern.compile("[\\h]").matcher(prefix + c + suffix);
3908 if (!m.find() || !c.equals(m.group()))
3909 failCount++;
3910
3911 m = Pattern.compile("\\H").matcher(hws.substring(0, i) + ng + hws.substring(i));
3912 if (!m.find() || !ng.equals(m.group()))
3913 failCount++;
3914 m = Pattern.compile("[\\H]").matcher(hws.substring(0, i) + ng + hws.substring(i));
3915 if (!m.find() || !ng.equals(m.group()))
3916 failCount++;
3917 }
3918 for (int i = 0; i < vws.length(); i++) {
3919 String c = String.valueOf(vws.charAt(i));
3920 Matcher m = Pattern.compile("\\v").matcher(prefix + c + suffix);
3921 if (!m.find() || !c.equals(m.group()))
3922 failCount++;
3923 m = Pattern.compile("[\\v]").matcher(prefix + c + suffix);
3924 if (!m.find() || !c.equals(m.group()))
3925 failCount++;
3926
3927 m = Pattern.compile("\\V").matcher(vws.substring(0, i) + ng + vws.substring(i));
3928 if (!m.find() || !ng.equals(m.group()))
3929 failCount++;
3930 m = Pattern.compile("[\\V]").matcher(vws.substring(0, i) + ng + vws.substring(i));
3931 if (!m.find() || !ng.equals(m.group()))
3932 failCount++;
3933 }
3934 // \v in range is interpreted as 0x0B. This is the undocumented behavior
3935 if (!Pattern.compile("[\\v-\\v]").matcher(String.valueOf((char)0x0B)).matches())
3936 failCount++;
3937 report("horizontalAndVerticalWSTest");
3938 }
3939
3940 private static void linebreakTest() throws Exception {
3941 String linebreaks = new String (new char[] {
3942 0x0A, 0x0B, 0x0C, 0x0D, 0x85, 0x2028, 0x2029 });
3943 String crnl = "\r\n";
3944 if (!Pattern.compile("\\R+").matcher(linebreaks).matches() ||
3945 !Pattern.compile("\\R").matcher(crnl).matches() ||
3946 Pattern.compile("\\R\\R").matcher(crnl).matches())
3947 failCount++;
3948 report("linebreakTest");
3949 }
3950
sherman36e2c8f2012-08-09 10:15:26 -07003951 // #7189363
3952 private static void branchTest() throws Exception {
3953 if (!Pattern.compile("(a)?bc|d").matcher("d").find() || // greedy
3954 !Pattern.compile("(a)+bc|d").matcher("d").find() ||
3955 !Pattern.compile("(a)*bc|d").matcher("d").find() ||
3956 !Pattern.compile("(a)??bc|d").matcher("d").find() || // reluctant
3957 !Pattern.compile("(a)+?bc|d").matcher("d").find() ||
3958 !Pattern.compile("(a)*?bc|d").matcher("d").find() ||
3959 !Pattern.compile("(a)?+bc|d").matcher("d").find() || // possessive
3960 !Pattern.compile("(a)++bc|d").matcher("d").find() ||
3961 !Pattern.compile("(a)*+bc|d").matcher("d").find() ||
3962 !Pattern.compile("(a)?bc|d").matcher("d").matches() || // greedy
3963 !Pattern.compile("(a)+bc|d").matcher("d").matches() ||
3964 !Pattern.compile("(a)*bc|d").matcher("d").matches() ||
3965 !Pattern.compile("(a)??bc|d").matcher("d").matches() || // reluctant
3966 !Pattern.compile("(a)+?bc|d").matcher("d").matches() ||
3967 !Pattern.compile("(a)*?bc|d").matcher("d").matches() ||
3968 !Pattern.compile("(a)?+bc|d").matcher("d").matches() || // possessive
3969 !Pattern.compile("(a)++bc|d").matcher("d").matches() ||
3970 !Pattern.compile("(a)*+bc|d").matcher("d").matches() ||
3971 !Pattern.compile("(a)?bc|de").matcher("de").find() || // others
3972 !Pattern.compile("(a)??bc|de").matcher("de").find() ||
3973 !Pattern.compile("(a)?bc|de").matcher("de").matches() ||
3974 !Pattern.compile("(a)??bc|de").matcher("de").matches())
3975 failCount++;
3976 report("branchTest");
3977 }
3978
shermanf6f35a12013-04-26 13:59:10 -07003979 // This test is for 8007395
3980 private static void groupCurlyNotFoundSuppTest() throws Exception {
3981 String input = "test this as \ud83d\ude0d";
3982 for (String pStr : new String[] { "test(.)+(@[a-zA-Z.]+)",
3983 "test(.)*(@[a-zA-Z.]+)",
3984 "test([^B])+(@[a-zA-Z.]+)",
3985 "test([^B])*(@[a-zA-Z.]+)",
3986 "test(\\P{IsControl})+(@[a-zA-Z.]+)",
3987 "test(\\P{IsControl})*(@[a-zA-Z.]+)",
3988 }) {
3989 Matcher m = Pattern.compile(pStr, Pattern.CASE_INSENSITIVE)
3990 .matcher(input);
3991 try {
3992 if (m.find()) {
3993 failCount++;
3994 }
3995 } catch (Exception x) {
3996 failCount++;
3997 }
3998 }
3999 report("GroupCurly NotFoundSupp");
4000 }
4001
psandoze9d4ac92013-05-01 18:40:31 +02004002 // This test is for 8012646
4003 private static void patternAsPredicate() throws Exception {
4004 Predicate<String> p = Pattern.compile("[a-z]+").asPredicate();
4005
4006 if (p.test("")) {
4007 failCount++;
4008 }
4009 if (!p.test("word")) {
4010 failCount++;
4011 }
4012 if (p.test("1234")) {
4013 failCount++;
4014 }
4015 report("Pattern.asPredicate");
4016 }
sherman0b4d42d2009-02-23 21:06:15 -08004017}