blob: da20461a30f35b0c6f3e0c5ada5af963bd1ab623 [file] [log] [blame]
sherman0b4d42d2009-02-23 21:06:15 -08001/*
psandoze9d4ac92013-05-01 18:40:31 +02002 * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
sherman0b4d42d2009-02-23 21:06:15 -08003 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
serb9adafbe2013-11-12 20:24:25 +04007 * published by the Free Software Foundation.
sherman0b4d42d2009-02-23 21:06:15 -08008 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
ohair2283b9d2010-05-25 15:58:33 -070019 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
sherman0b4d42d2009-02-23 21:06:15 -080022 */
23
24/**
25 * @test
26 * @summary tests RegExp framework
27 * @author Mike McCloskey
28 * @bug 4481568 4482696 4495089 4504687 4527731 4599621 4631553 4619345
29 * 4630911 4672616 4711773 4727935 4750573 4792284 4803197 4757029 4808962
30 * 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476
31 * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940
32 * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133
shermanb16229d2011-12-19 14:14:14 -080033 * 6350801 6676425 6878475 6919132 6931676 6948903 6990617 7014645 7039066
sherman1242a6d2013-11-13 11:26:01 -080034 * 7067045 7014640 7189363 8007395 8013252 8013254 8012646 8023647 6559590
35 * 8027645
sherman0b4d42d2009-02-23 21:06:15 -080036 */
37
38import java.util.regex.*;
39import java.util.Random;
40import java.io.*;
41import java.util.*;
42import java.nio.CharBuffer;
psandoze9d4ac92013-05-01 18:40:31 +020043import java.util.function.Predicate;
sherman0b4d42d2009-02-23 21:06:15 -080044
45/**
46 * This is a test class created to check the operation of
47 * the Pattern and Matcher classes.
48 */
49public class RegExTest {
50
51 private static Random generator = new Random();
52 private static boolean failure = false;
53 private static int failCount = 0;
shermanb16229d2011-12-19 14:14:14 -080054 private static String firstFailure = null;
sherman0b4d42d2009-02-23 21:06:15 -080055
56 /**
57 * Main to interpret arguments and run several tests.
58 *
59 */
60 public static void main(String[] args) throws Exception {
61 // Most of the tests are in a file
62 processFile("TestCases.txt");
63 //processFile("PerlCases.txt");
64 processFile("BMPTestCases.txt");
65 processFile("SupplementaryTestCases.txt");
66
67 // These test many randomly generated char patterns
68 bm();
69 slice();
70
71 // These are hard to put into the file
72 escapes();
73 blankInput();
74
75 // Substitition tests on randomly generated sequences
76 globalSubstitute();
77 stringbufferSubstitute();
78 substitutionBasher();
79
80 // Canonical Equivalence
81 ceTest();
82
83 // Anchors
84 anchorTest();
85
86 // boolean match calls
87 matchesTest();
88 lookingAtTest();
89
90 // Pattern API
91 patternMatchesTest();
92
93 // Misc
94 lookbehindTest();
95 nullArgumentTest();
96 backRefTest();
97 groupCaptureTest();
98 caretTest();
99 charClassTest();
100 emptyPatternTest();
101 findIntTest();
102 group0Test();
103 longPatternTest();
104 octalTest();
105 ampersandTest();
106 negationTest();
107 splitTest();
108 appendTest();
109 caseFoldingTest();
110 commentsTest();
111 unixLinesTest();
112 replaceFirstTest();
113 gTest();
114 zTest();
115 serializeTest();
116 reluctantRepetitionTest();
117 multilineDollarTest();
118 dollarAtEndTest();
119 caretBetweenTerminatorsTest();
120 // This RFE rejected in Tiger numOccurrencesTest();
121 javaCharClassTest();
122 nonCaptureRepetitionTest();
123 notCapturedGroupCurlyMatchTest();
124 escapedSegmentTest();
125 literalPatternTest();
126 literalReplacementTest();
127 regionTest();
128 toStringTest();
129 negatedCharClassTest();
130 findFromTest();
131 boundsTest();
132 unicodeWordBoundsTest();
133 caretAtEndTest();
134 wordSearchTest();
135 hitEndTest();
136 toMatchResultTest();
137 surrogatesInClassTest();
shermanb16229d2011-12-19 14:14:14 -0800138 removeQEQuotingTest();
sherman0b4d42d2009-02-23 21:06:15 -0800139 namedGroupCaptureTest();
sherman6782c962010-02-05 00:10:42 -0800140 nonBmpClassComplementTest();
shermancc01ef52010-05-18 15:36:47 -0700141 unicodePropertiesTest();
shermanf03c78b2011-02-03 13:49:25 -0800142 unicodeHexNotationTest();
sherman85bbd8b2011-04-28 20:48:36 -0700143 unicodeClassesTest();
shermanecb65472012-05-08 10:57:13 -0700144 horizontalAndVerticalWSTest();
145 linebreakTest();
sherman36e2c8f2012-08-09 10:15:26 -0700146 branchTest();
shermanf6f35a12013-04-26 13:59:10 -0700147 groupCurlyNotFoundSuppTest();
sherman95a939c2013-08-27 12:54:44 -0700148 groupCurlyBackoffTest();
psandoze9d4ac92013-05-01 18:40:31 +0200149 patternAsPredicate();
sherman1242a6d2013-11-13 11:26:01 -0800150
shermanb16229d2011-12-19 14:14:14 -0800151 if (failure) {
152 throw new
153 RuntimeException("RegExTest failed, 1st failure: " +
154 firstFailure);
155 } else {
sherman0b4d42d2009-02-23 21:06:15 -0800156 System.err.println("OKAY: All tests passed.");
shermanb16229d2011-12-19 14:14:14 -0800157 }
sherman0b4d42d2009-02-23 21:06:15 -0800158 }
159
160 // Utility functions
161
162 private static String getRandomAlphaString(int length) {
163 StringBuffer buf = new StringBuffer(length);
164 for (int i=0; i<length; i++) {
165 char randChar = (char)(97 + generator.nextInt(26));
166 buf.append(randChar);
167 }
168 return buf.toString();
169 }
170
171 private static void check(Matcher m, String expected) {
172 m.find();
173 if (!m.group().equals(expected))
174 failCount++;
175 }
176
177 private static void check(Matcher m, String result, boolean expected) {
178 m.find();
shermanf03c78b2011-02-03 13:49:25 -0800179 if (m.group().equals(result) != expected)
180 failCount++;
sherman0b4d42d2009-02-23 21:06:15 -0800181 }
182
183 private static void check(Pattern p, String s, boolean expected) {
shermanf03c78b2011-02-03 13:49:25 -0800184 if (p.matcher(s).find() != expected)
185 failCount++;
186 }
187
188 private static void check(String p, String s, boolean expected) {
189 Matcher matcher = Pattern.compile(p).matcher(s);
190 if (matcher.find() != expected)
191 failCount++;
sherman0b4d42d2009-02-23 21:06:15 -0800192 }
193
194 private static void check(String p, char c, boolean expected) {
195 String propertyPattern = expected ? "\\p" + p : "\\P" + p;
196 Pattern pattern = Pattern.compile(propertyPattern);
197 char[] ca = new char[1]; ca[0] = c;
198 Matcher matcher = pattern.matcher(new String(ca));
199 if (!matcher.find())
200 failCount++;
201 }
202
203 private static void check(String p, int codePoint, boolean expected) {
204 String propertyPattern = expected ? "\\p" + p : "\\P" + p;
205 Pattern pattern = Pattern.compile(propertyPattern);
206 char[] ca = Character.toChars(codePoint);
207 Matcher matcher = pattern.matcher(new String(ca));
208 if (!matcher.find())
209 failCount++;
210 }
211
212 private static void check(String p, int flag, String input, String s,
213 boolean expected)
214 {
215 Pattern pattern = Pattern.compile(p, flag);
216 Matcher matcher = pattern.matcher(input);
217 if (expected)
218 check(matcher, s, expected);
219 else
220 check(pattern, input, false);
221 }
222
223 private static void report(String testName) {
224 int spacesToAdd = 30 - testName.length();
225 StringBuffer paddedNameBuffer = new StringBuffer(testName);
226 for (int i=0; i<spacesToAdd; i++)
227 paddedNameBuffer.append(" ");
228 String paddedName = paddedNameBuffer.toString();
229 System.err.println(paddedName + ": " +
230 (failCount==0 ? "Passed":"Failed("+failCount+")"));
shermanb16229d2011-12-19 14:14:14 -0800231 if (failCount > 0) {
sherman0b4d42d2009-02-23 21:06:15 -0800232 failure = true;
shermanb16229d2011-12-19 14:14:14 -0800233
234 if (firstFailure == null) {
235 firstFailure = testName;
236 }
237 }
238
sherman0b4d42d2009-02-23 21:06:15 -0800239 failCount = 0;
240 }
241
242 /**
243 * Converts ASCII alphabet characters [A-Za-z] in the given 's' to
244 * supplementary characters. This method does NOT fully take care
245 * of the regex syntax.
246 */
247 private static String toSupplementaries(String s) {
248 int length = s.length();
249 StringBuffer sb = new StringBuffer(length * 2);
250
251 for (int i = 0; i < length; ) {
252 char c = s.charAt(i++);
253 if (c == '\\') {
254 sb.append(c);
255 if (i < length) {
256 c = s.charAt(i++);
257 sb.append(c);
258 if (c == 'u') {
259 // assume no syntax error
260 sb.append(s.charAt(i++));
261 sb.append(s.charAt(i++));
262 sb.append(s.charAt(i++));
263 sb.append(s.charAt(i++));
264 }
265 }
266 } else if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) {
267 sb.append('\ud800').append((char)('\udc00'+c));
268 } else {
269 sb.append(c);
270 }
271 }
272 return sb.toString();
273 }
274
275 // Regular expression tests
276
277 // This is for bug 6178785
278 // Test if an expected NPE gets thrown when passing in a null argument
279 private static boolean check(Runnable test) {
280 try {
281 test.run();
282 failCount++;
283 return false;
284 } catch (NullPointerException npe) {
285 return true;
286 }
287 }
288
289 private static void nullArgumentTest() {
290 check(new Runnable() { public void run() { Pattern.compile(null); }});
291 check(new Runnable() { public void run() { Pattern.matches(null, null); }});
292 check(new Runnable() { public void run() { Pattern.matches("xyz", null);}});
293 check(new Runnable() { public void run() { Pattern.quote(null);}});
294 check(new Runnable() { public void run() { Pattern.compile("xyz").split(null);}});
295 check(new Runnable() { public void run() { Pattern.compile("xyz").matcher(null);}});
296
297 final Matcher m = Pattern.compile("xyz").matcher("xyz");
298 m.matches();
299 check(new Runnable() { public void run() { m.appendTail(null);}});
300 check(new Runnable() { public void run() { m.replaceAll(null);}});
301 check(new Runnable() { public void run() { m.replaceFirst(null);}});
302 check(new Runnable() { public void run() { m.appendReplacement(null, null);}});
303 check(new Runnable() { public void run() { m.reset(null);}});
304 check(new Runnable() { public void run() { Matcher.quoteReplacement(null);}});
305 //check(new Runnable() { public void run() { m.usePattern(null);}});
306
307 report("Null Argument");
308 }
309
310 // This is for bug6635133
311 // Test if surrogate pair in Unicode escapes can be handled correctly.
312 private static void surrogatesInClassTest() throws Exception {
313 Pattern pattern = Pattern.compile("[\\ud834\\udd21-\\ud834\\udd24]");
314 Matcher matcher = pattern.matcher("\ud834\udd22");
315 if (!matcher.find())
316 failCount++;
shermanb16229d2011-12-19 14:14:14 -0800317
318 report("Surrogate pair in Unicode escape");
319 }
320
321 // This is for bug6990617
322 // Test if Pattern.RemoveQEQuoting works correctly if the octal unicode
323 // char encoding is only 2 or 3 digits instead of 4 and the first quoted
324 // char is an octal digit.
325 private static void removeQEQuotingTest() throws Exception {
326 Pattern pattern =
327 Pattern.compile("\\011\\Q1sometext\\E\\011\\Q2sometext\\E");
328 Matcher matcher = pattern.matcher("\t1sometext\t2sometext");
329 if (!matcher.find())
330 failCount++;
331
332 report("Remove Q/E Quoting");
sherman0b4d42d2009-02-23 21:06:15 -0800333 }
334
335 // This is for bug 4988891
336 // Test toMatchResult to see that it is a copy of the Matcher
337 // that is not affected by subsequent operations on the original
338 private static void toMatchResultTest() throws Exception {
339 Pattern pattern = Pattern.compile("squid");
340 Matcher matcher = pattern.matcher(
341 "agiantsquidofdestinyasmallsquidoffate");
342 matcher.find();
343 int matcherStart1 = matcher.start();
344 MatchResult mr = matcher.toMatchResult();
345 if (mr == matcher)
346 failCount++;
347 int resultStart1 = mr.start();
348 if (matcherStart1 != resultStart1)
349 failCount++;
350 matcher.find();
351 int matcherStart2 = matcher.start();
352 int resultStart2 = mr.start();
353 if (matcherStart2 == resultStart2)
354 failCount++;
355 if (resultStart1 != resultStart2)
356 failCount++;
357 MatchResult mr2 = matcher.toMatchResult();
358 if (mr == mr2)
359 failCount++;
360 if (mr2.start() != matcherStart2)
361 failCount++;
362 report("toMatchResult is a copy");
363 }
364
365 // This is for bug 5013885
366 // Must test a slice to see if it reports hitEnd correctly
367 private static void hitEndTest() throws Exception {
368 // Basic test of Slice node
369 Pattern p = Pattern.compile("^squidattack");
370 Matcher m = p.matcher("squack");
371 m.find();
372 if (m.hitEnd())
373 failCount++;
374 m.reset("squid");
375 m.find();
376 if (!m.hitEnd())
377 failCount++;
378
379 // Test Slice, SliceA and SliceU nodes
380 for (int i=0; i<3; i++) {
381 int flags = 0;
382 if (i==1) flags = Pattern.CASE_INSENSITIVE;
383 if (i==2) flags = Pattern.UNICODE_CASE;
384 p = Pattern.compile("^abc", flags);
385 m = p.matcher("ad");
386 m.find();
387 if (m.hitEnd())
388 failCount++;
389 m.reset("ab");
390 m.find();
391 if (!m.hitEnd())
392 failCount++;
393 }
394
395 // Test Boyer-Moore node
396 p = Pattern.compile("catattack");
397 m = p.matcher("attack");
398 m.find();
399 if (!m.hitEnd())
400 failCount++;
401
402 p = Pattern.compile("catattack");
403 m = p.matcher("attackattackattackcatatta");
404 m.find();
405 if (!m.hitEnd())
406 failCount++;
sherman0b4d42d2009-02-23 21:06:15 -0800407 report("hitEnd from a Slice");
408 }
409
410 // This is for bug 4997476
411 // It is weird code submitted by customer demonstrating a regression
412 private static void wordSearchTest() throws Exception {
413 String testString = new String("word1 word2 word3");
414 Pattern p = Pattern.compile("\\b");
415 Matcher m = p.matcher(testString);
416 int position = 0;
417 int start = 0;
418 while (m.find(position)) {
419 start = m.start();
420 if (start == testString.length())
421 break;
422 if (m.find(start+1)) {
423 position = m.start();
424 } else {
425 position = testString.length();
426 }
427 if (testString.substring(start, position).equals(" "))
428 continue;
429 if (!testString.substring(start, position-1).startsWith("word"))
430 failCount++;
431 }
432 report("Customer word search");
433 }
434
435 // This is for bug 4994840
436 private static void caretAtEndTest() throws Exception {
437 // Problem only occurs with multiline patterns
438 // containing a beginning-of-line caret "^" followed
439 // by an expression that also matches the empty string.
440 Pattern pattern = Pattern.compile("^x?", Pattern.MULTILINE);
441 Matcher matcher = pattern.matcher("\r");
442 matcher.find();
443 matcher.find();
444 report("Caret at end");
445 }
446
447 // This test is for 4979006
448 // Check to see if word boundary construct properly handles unicode
449 // non spacing marks
450 private static void unicodeWordBoundsTest() throws Exception {
451 String spaces = " ";
452 String wordChar = "a";
453 String nsm = "\u030a";
454
455 assert (Character.getType('\u030a') == Character.NON_SPACING_MARK);
456
457 Pattern pattern = Pattern.compile("\\b");
458 Matcher matcher = pattern.matcher("");
459 // S=other B=word character N=non spacing mark .=word boundary
460 // SS.BB.SS
461 String input = spaces + wordChar + wordChar + spaces;
462 twoFindIndexes(input, matcher, 2, 4);
463 // SS.BBN.SS
464 input = spaces + wordChar +wordChar + nsm + spaces;
465 twoFindIndexes(input, matcher, 2, 5);
466 // SS.BN.SS
467 input = spaces + wordChar + nsm + spaces;
468 twoFindIndexes(input, matcher, 2, 4);
469 // SS.BNN.SS
470 input = spaces + wordChar + nsm + nsm + spaces;
471 twoFindIndexes(input, matcher, 2, 5);
472 // SSN.BB.SS
473 input = spaces + nsm + wordChar + wordChar + spaces;
474 twoFindIndexes(input, matcher, 3, 5);
475 // SS.BNB.SS
476 input = spaces + wordChar + nsm + wordChar + spaces;
477 twoFindIndexes(input, matcher, 2, 5);
478 // SSNNSS
479 input = spaces + nsm + nsm + spaces;
480 matcher.reset(input);
481 if (matcher.find())
482 failCount++;
483 // SSN.BBN.SS
484 input = spaces + nsm + wordChar + wordChar + nsm + spaces;
485 twoFindIndexes(input, matcher, 3, 6);
486
487 report("Unicode word boundary");
488 }
489
490 private static void twoFindIndexes(String input, Matcher matcher, int a,
491 int b) throws Exception
492 {
493 matcher.reset(input);
494 matcher.find();
495 if (matcher.start() != a)
496 failCount++;
497 matcher.find();
498 if (matcher.start() != b)
499 failCount++;
500 }
501
502 // This test is for 6284152
503 static void check(String regex, String input, String[] expected) {
504 List<String> result = new ArrayList<String>();
505 Pattern p = Pattern.compile(regex);
506 Matcher m = p.matcher(input);
507 while (m.find()) {
508 result.add(m.group());
509 }
510 if (!Arrays.asList(expected).equals(result))
511 failCount++;
512 }
513
514 private static void lookbehindTest() throws Exception {
515 //Positive
516 check("(?<=%.{0,5})foo\\d",
517 "%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5",
518 new String[]{"foo1", "foo2", "foo3"});
519
520 //boundary at end of the lookbehind sub-regex should work consistently
521 //with the boundary just after the lookbehind sub-regex
522 check("(?<=.*\\b)foo", "abcd foo", new String[]{"foo"});
523 check("(?<=.*)\\bfoo", "abcd foo", new String[]{"foo"});
524 check("(?<!abc )\\bfoo", "abc foo", new String[0]);
525 check("(?<!abc \\b)foo", "abc foo", new String[0]);
526
527 //Negative
528 check("(?<!%.{0,5})foo\\d",
529 "%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5",
530 new String[] {"foo4", "foo5"});
531
532 //Positive greedy
533 check("(?<=%b{1,4})foo", "%bbbbfoo", new String[] {"foo"});
534
535 //Positive reluctant
536 check("(?<=%b{1,4}?)foo", "%bbbbfoo", new String[] {"foo"});
537
538 //supplementary
539 check("(?<=%b{1,4})fo\ud800\udc00o", "%bbbbfo\ud800\udc00o",
540 new String[] {"fo\ud800\udc00o"});
541 check("(?<=%b{1,4}?)fo\ud800\udc00o", "%bbbbfo\ud800\udc00o",
542 new String[] {"fo\ud800\udc00o"});
543 check("(?<!%b{1,4})fo\ud800\udc00o", "%afo\ud800\udc00o",
544 new String[] {"fo\ud800\udc00o"});
545 check("(?<!%b{1,4}?)fo\ud800\udc00o", "%afo\ud800\udc00o",
546 new String[] {"fo\ud800\udc00o"});
547 report("Lookbehind");
548 }
549
550 // This test is for 4938995
551 // Check to see if weak region boundaries are transparent to
552 // lookahead and lookbehind constructs
553 private static void boundsTest() throws Exception {
554 String fullMessage = "catdogcat";
555 Pattern pattern = Pattern.compile("(?<=cat)dog(?=cat)");
556 Matcher matcher = pattern.matcher("catdogca");
557 matcher.useTransparentBounds(true);
558 if (matcher.find())
559 failCount++;
560 matcher.reset("atdogcat");
561 if (matcher.find())
562 failCount++;
563 matcher.reset(fullMessage);
564 if (!matcher.find())
565 failCount++;
566 matcher.reset(fullMessage);
567 matcher.region(0,9);
568 if (!matcher.find())
569 failCount++;
570 matcher.reset(fullMessage);
571 matcher.region(0,6);
572 if (!matcher.find())
573 failCount++;
574 matcher.reset(fullMessage);
575 matcher.region(3,6);
576 if (!matcher.find())
577 failCount++;
578 matcher.useTransparentBounds(false);
579 if (matcher.find())
580 failCount++;
581
582 // Negative lookahead/lookbehind
583 pattern = Pattern.compile("(?<!cat)dog(?!cat)");
584 matcher = pattern.matcher("dogcat");
585 matcher.useTransparentBounds(true);
586 matcher.region(0,3);
587 if (matcher.find())
588 failCount++;
589 matcher.reset("catdog");
590 matcher.region(3,6);
591 if (matcher.find())
592 failCount++;
593 matcher.useTransparentBounds(false);
594 matcher.reset("dogcat");
595 matcher.region(0,3);
596 if (!matcher.find())
597 failCount++;
598 matcher.reset("catdog");
599 matcher.region(3,6);
600 if (!matcher.find())
601 failCount++;
602
603 report("Region bounds transparency");
604 }
605
606 // This test is for 4945394
607 private static void findFromTest() throws Exception {
608 String message = "This is 40 $0 message.";
609 Pattern pat = Pattern.compile("\\$0");
610 Matcher match = pat.matcher(message);
611 if (!match.find())
612 failCount++;
613 if (match.find())
614 failCount++;
615 if (match.find())
616 failCount++;
617 report("Check for alternating find");
618 }
619
620 // This test is for 4872664 and 4892980
621 private static void negatedCharClassTest() throws Exception {
622 Pattern pattern = Pattern.compile("[^>]");
623 Matcher matcher = pattern.matcher("\u203A");
624 if (!matcher.matches())
625 failCount++;
626 pattern = Pattern.compile("[^fr]");
627 matcher = pattern.matcher("a");
628 if (!matcher.find())
629 failCount++;
630 matcher.reset("\u203A");
631 if (!matcher.find())
632 failCount++;
633 String s = "for";
634 String result[] = s.split("[^fr]");
635 if (!result[0].equals("f"))
636 failCount++;
637 if (!result[1].equals("r"))
638 failCount++;
639 s = "f\u203Ar";
640 result = s.split("[^fr]");
641 if (!result[0].equals("f"))
642 failCount++;
643 if (!result[1].equals("r"))
644 failCount++;
645
646 // Test adding to bits, subtracting a node, then adding to bits again
647 pattern = Pattern.compile("[^f\u203Ar]");
648 matcher = pattern.matcher("a");
649 if (!matcher.find())
650 failCount++;
651 matcher.reset("f");
652 if (matcher.find())
653 failCount++;
654 matcher.reset("\u203A");
655 if (matcher.find())
656 failCount++;
657 matcher.reset("r");
658 if (matcher.find())
659 failCount++;
660 matcher.reset("\u203B");
661 if (!matcher.find())
662 failCount++;
663
664 // Test subtracting a node, adding to bits, subtracting again
665 pattern = Pattern.compile("[^\u203Ar\u203B]");
666 matcher = pattern.matcher("a");
667 if (!matcher.find())
668 failCount++;
669 matcher.reset("\u203A");
670 if (matcher.find())
671 failCount++;
672 matcher.reset("r");
673 if (matcher.find())
674 failCount++;
675 matcher.reset("\u203B");
676 if (matcher.find())
677 failCount++;
678 matcher.reset("\u203C");
679 if (!matcher.find())
680 failCount++;
681
682 report("Negated Character Class");
683 }
684
685 // This test is for 4628291
686 private static void toStringTest() throws Exception {
687 Pattern pattern = Pattern.compile("b+");
688 if (pattern.toString() != "b+")
689 failCount++;
690 Matcher matcher = pattern.matcher("aaabbbccc");
691 String matcherString = matcher.toString(); // unspecified
692 matcher.find();
693 matcherString = matcher.toString(); // unspecified
694 matcher.region(0,3);
695 matcherString = matcher.toString(); // unspecified
696 matcher.reset();
697 matcherString = matcher.toString(); // unspecified
698 report("toString");
699 }
700
701 // This test is for 4808962
702 private static void literalPatternTest() throws Exception {
703 int flags = Pattern.LITERAL;
704
705 Pattern pattern = Pattern.compile("abc\\t$^", flags);
706 check(pattern, "abc\\t$^", true);
707
708 pattern = Pattern.compile(Pattern.quote("abc\\t$^"));
709 check(pattern, "abc\\t$^", true);
710
711 pattern = Pattern.compile("\\Qa^$bcabc\\E", flags);
712 check(pattern, "\\Qa^$bcabc\\E", true);
713 check(pattern, "a^$bcabc", false);
714
715 pattern = Pattern.compile("\\\\Q\\\\E");
716 check(pattern, "\\Q\\E", true);
717
718 pattern = Pattern.compile("\\Qabc\\Eefg\\\\Q\\\\Ehij");
719 check(pattern, "abcefg\\Q\\Ehij", true);
720
721 pattern = Pattern.compile("\\\\\\Q\\\\E");
722 check(pattern, "\\\\\\\\", true);
723
724 pattern = Pattern.compile(Pattern.quote("\\Qa^$bcabc\\E"));
725 check(pattern, "\\Qa^$bcabc\\E", true);
726 check(pattern, "a^$bcabc", false);
727
728 pattern = Pattern.compile(Pattern.quote("\\Qabc\\Edef"));
729 check(pattern, "\\Qabc\\Edef", true);
730 check(pattern, "abcdef", false);
731
732 pattern = Pattern.compile(Pattern.quote("abc\\Edef"));
733 check(pattern, "abc\\Edef", true);
734 check(pattern, "abcdef", false);
735
736 pattern = Pattern.compile(Pattern.quote("\\E"));
737 check(pattern, "\\E", true);
738
739 pattern = Pattern.compile("((((abc.+?:)", flags);
740 check(pattern, "((((abc.+?:)", true);
741
742 flags |= Pattern.MULTILINE;
743
744 pattern = Pattern.compile("^cat$", flags);
745 check(pattern, "abc^cat$def", true);
746 check(pattern, "cat", false);
747
748 flags |= Pattern.CASE_INSENSITIVE;
749
750 pattern = Pattern.compile("abcdef", flags);
751 check(pattern, "ABCDEF", true);
752 check(pattern, "AbCdEf", true);
753
754 flags |= Pattern.DOTALL;
755
756 pattern = Pattern.compile("a...b", flags);
757 check(pattern, "A...b", true);
758 check(pattern, "Axxxb", false);
759
760 flags |= Pattern.CANON_EQ;
761
762 Pattern p = Pattern.compile("testa\u030a", flags);
763 check(pattern, "testa\u030a", false);
764 check(pattern, "test\u00e5", false);
765
766 // Supplementary character test
767 flags = Pattern.LITERAL;
768
769 pattern = Pattern.compile(toSupplementaries("abc\\t$^"), flags);
770 check(pattern, toSupplementaries("abc\\t$^"), true);
771
772 pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\t$^")));
773 check(pattern, toSupplementaries("abc\\t$^"), true);
774
775 pattern = Pattern.compile(toSupplementaries("\\Qa^$bcabc\\E"), flags);
776 check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true);
777 check(pattern, toSupplementaries("a^$bcabc"), false);
778
779 pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qa^$bcabc\\E")));
780 check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true);
781 check(pattern, toSupplementaries("a^$bcabc"), false);
782
783 pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qabc\\Edef")));
784 check(pattern, toSupplementaries("\\Qabc\\Edef"), true);
785 check(pattern, toSupplementaries("abcdef"), false);
786
787 pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\Edef")));
788 check(pattern, toSupplementaries("abc\\Edef"), true);
789 check(pattern, toSupplementaries("abcdef"), false);
790
791 pattern = Pattern.compile(toSupplementaries("((((abc.+?:)"), flags);
792 check(pattern, toSupplementaries("((((abc.+?:)"), true);
793
794 flags |= Pattern.MULTILINE;
795
796 pattern = Pattern.compile(toSupplementaries("^cat$"), flags);
797 check(pattern, toSupplementaries("abc^cat$def"), true);
798 check(pattern, toSupplementaries("cat"), false);
799
800 flags |= Pattern.DOTALL;
801
802 // note: this is case-sensitive.
803 pattern = Pattern.compile(toSupplementaries("a...b"), flags);
804 check(pattern, toSupplementaries("a...b"), true);
805 check(pattern, toSupplementaries("axxxb"), false);
806
807 flags |= Pattern.CANON_EQ;
808
809 String t = toSupplementaries("test");
810 p = Pattern.compile(t + "a\u030a", flags);
811 check(pattern, t + "a\u030a", false);
812 check(pattern, t + "\u00e5", false);
813
814 report("Literal pattern");
815 }
816
817 // This test is for 4803179
818 // This test is also for 4808962, replacement parts
819 private static void literalReplacementTest() throws Exception {
820 int flags = Pattern.LITERAL;
821
822 Pattern pattern = Pattern.compile("abc", flags);
823 Matcher matcher = pattern.matcher("zzzabczzz");
824 String replaceTest = "$0";
825 String result = matcher.replaceAll(replaceTest);
826 if (!result.equals("zzzabczzz"))
827 failCount++;
828
829 matcher.reset();
830 String literalReplacement = matcher.quoteReplacement(replaceTest);
831 result = matcher.replaceAll(literalReplacement);
832 if (!result.equals("zzz$0zzz"))
833 failCount++;
834
835 matcher.reset();
836 replaceTest = "\\t$\\$";
837 literalReplacement = matcher.quoteReplacement(replaceTest);
838 result = matcher.replaceAll(literalReplacement);
839 if (!result.equals("zzz\\t$\\$zzz"))
840 failCount++;
841
842 // Supplementary character test
843 pattern = Pattern.compile(toSupplementaries("abc"), flags);
844 matcher = pattern.matcher(toSupplementaries("zzzabczzz"));
845 replaceTest = "$0";
846 result = matcher.replaceAll(replaceTest);
847 if (!result.equals(toSupplementaries("zzzabczzz")))
848 failCount++;
849
850 matcher.reset();
851 literalReplacement = matcher.quoteReplacement(replaceTest);
852 result = matcher.replaceAll(literalReplacement);
853 if (!result.equals(toSupplementaries("zzz$0zzz")))
854 failCount++;
855
856 matcher.reset();
857 replaceTest = "\\t$\\$";
858 literalReplacement = matcher.quoteReplacement(replaceTest);
859 result = matcher.replaceAll(literalReplacement);
860 if (!result.equals(toSupplementaries("zzz\\t$\\$zzz")))
861 failCount++;
862
sherman5c8f3492012-04-12 15:01:41 -0700863 // IAE should be thrown if backslash or '$' is the last character
864 // in replacement string
865 try {
866 "\uac00".replaceAll("\uac00", "$");
shermanecb65472012-05-08 10:57:13 -0700867 failCount++;
868 } catch (IllegalArgumentException iie) {
869 } catch (Exception e) {
870 failCount++;
871 }
872 try {
sherman5c8f3492012-04-12 15:01:41 -0700873 "\uac00".replaceAll("\uac00", "\\");
874 failCount++;
875 } catch (IllegalArgumentException iie) {
876 } catch (Exception e) {
877 failCount++;
878 }
sherman0b4d42d2009-02-23 21:06:15 -0800879 report("Literal replacement");
880 }
881
882 // This test is for 4757029
883 private static void regionTest() throws Exception {
884 Pattern pattern = Pattern.compile("abc");
885 Matcher matcher = pattern.matcher("abcdefabc");
886
887 matcher.region(0,9);
888 if (!matcher.find())
889 failCount++;
890 if (!matcher.find())
891 failCount++;
892 matcher.region(0,3);
893 if (!matcher.find())
894 failCount++;
895 matcher.region(3,6);
896 if (matcher.find())
897 failCount++;
898 matcher.region(0,2);
899 if (matcher.find())
900 failCount++;
901
902 expectRegionFail(matcher, 1, -1);
903 expectRegionFail(matcher, -1, -1);
904 expectRegionFail(matcher, -1, 1);
905 expectRegionFail(matcher, 5, 3);
906 expectRegionFail(matcher, 5, 12);
907 expectRegionFail(matcher, 12, 12);
908
909 pattern = Pattern.compile("^abc$");
910 matcher = pattern.matcher("zzzabczzz");
911 matcher.region(0,9);
912 if (matcher.find())
913 failCount++;
914 matcher.region(3,6);
915 if (!matcher.find())
916 failCount++;
917 matcher.region(3,6);
918 matcher.useAnchoringBounds(false);
919 if (matcher.find())
920 failCount++;
921
922 // Supplementary character test
923 pattern = Pattern.compile(toSupplementaries("abc"));
924 matcher = pattern.matcher(toSupplementaries("abcdefabc"));
925 matcher.region(0,9*2);
926 if (!matcher.find())
927 failCount++;
928 if (!matcher.find())
929 failCount++;
930 matcher.region(0,3*2);
931 if (!matcher.find())
932 failCount++;
933 matcher.region(1,3*2);
934 if (matcher.find())
935 failCount++;
936 matcher.region(3*2,6*2);
937 if (matcher.find())
938 failCount++;
939 matcher.region(0,2*2);
940 if (matcher.find())
941 failCount++;
942 matcher.region(0,2*2+1);
943 if (matcher.find())
944 failCount++;
945
946 expectRegionFail(matcher, 1*2, -1);
947 expectRegionFail(matcher, -1, -1);
948 expectRegionFail(matcher, -1, 1*2);
949 expectRegionFail(matcher, 5*2, 3*2);
950 expectRegionFail(matcher, 5*2, 12*2);
951 expectRegionFail(matcher, 12*2, 12*2);
952
953 pattern = Pattern.compile(toSupplementaries("^abc$"));
954 matcher = pattern.matcher(toSupplementaries("zzzabczzz"));
955 matcher.region(0,9*2);
956 if (matcher.find())
957 failCount++;
958 matcher.region(3*2,6*2);
959 if (!matcher.find())
960 failCount++;
961 matcher.region(3*2+1,6*2);
962 if (matcher.find())
963 failCount++;
964 matcher.region(3*2,6*2-1);
965 if (matcher.find())
966 failCount++;
967 matcher.region(3*2,6*2);
968 matcher.useAnchoringBounds(false);
969 if (matcher.find())
970 failCount++;
971 report("Regions");
972 }
973
974 private static void expectRegionFail(Matcher matcher, int index1,
975 int index2)
976 {
977 try {
978 matcher.region(index1, index2);
979 failCount++;
980 } catch (IndexOutOfBoundsException ioobe) {
981 // Correct result
982 } catch (IllegalStateException ise) {
983 // Correct result
984 }
985 }
986
987 // This test is for 4803197
988 private static void escapedSegmentTest() throws Exception {
989
990 Pattern pattern = Pattern.compile("\\Qdir1\\dir2\\E");
991 check(pattern, "dir1\\dir2", true);
992
993 pattern = Pattern.compile("\\Qdir1\\dir2\\\\E");
994 check(pattern, "dir1\\dir2\\", true);
995
996 pattern = Pattern.compile("(\\Qdir1\\dir2\\\\E)");
997 check(pattern, "dir1\\dir2\\", true);
998
999 // Supplementary character test
1000 pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2\\E"));
1001 check(pattern, toSupplementaries("dir1\\dir2"), true);
1002
1003 pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2")+"\\\\E");
1004 check(pattern, toSupplementaries("dir1\\dir2\\"), true);
1005
1006 pattern = Pattern.compile(toSupplementaries("(\\Qdir1\\dir2")+"\\\\E)");
1007 check(pattern, toSupplementaries("dir1\\dir2\\"), true);
1008
1009 report("Escaped segment");
1010 }
1011
1012 // This test is for 4792284
1013 private static void nonCaptureRepetitionTest() throws Exception {
1014 String input = "abcdefgh;";
1015
1016 String[] patterns = new String[] {
1017 "(?:\\w{4})+;",
1018 "(?:\\w{8})*;",
1019 "(?:\\w{2}){2,4};",
1020 "(?:\\w{4}){2,};", // only matches the
1021 ".*?(?:\\w{5})+;", // specified minimum
1022 ".*?(?:\\w{9})*;", // number of reps - OK
1023 "(?:\\w{4})+?;", // lazy repetition - OK
1024 "(?:\\w{4})++;", // possessive repetition - OK
1025 "(?:\\w{2,}?)+;", // non-deterministic - OK
1026 "(\\w{4})+;", // capturing group - OK
1027 };
1028
1029 for (int i = 0; i < patterns.length; i++) {
1030 // Check find()
1031 check(patterns[i], 0, input, input, true);
1032 // Check matches()
1033 Pattern p = Pattern.compile(patterns[i]);
1034 Matcher m = p.matcher(input);
1035
1036 if (m.matches()) {
1037 if (!m.group(0).equals(input))
1038 failCount++;
1039 } else {
1040 failCount++;
1041 }
1042 }
1043
1044 report("Non capturing repetition");
1045 }
1046
1047 // This test is for 6358731
1048 private static void notCapturedGroupCurlyMatchTest() throws Exception {
1049 Pattern pattern = Pattern.compile("(abc)+|(abcd)+");
1050 Matcher matcher = pattern.matcher("abcd");
1051 if (!matcher.matches() ||
1052 matcher.group(1) != null ||
1053 !matcher.group(2).equals("abcd")) {
1054 failCount++;
1055 }
1056 report("Not captured GroupCurly");
1057 }
1058
1059 // This test is for 4706545
1060 private static void javaCharClassTest() throws Exception {
1061 for (int i=0; i<1000; i++) {
1062 char c = (char)generator.nextInt();
1063 check("{javaLowerCase}", c, Character.isLowerCase(c));
1064 check("{javaUpperCase}", c, Character.isUpperCase(c));
1065 check("{javaUpperCase}+", c, Character.isUpperCase(c));
1066 check("{javaTitleCase}", c, Character.isTitleCase(c));
1067 check("{javaDigit}", c, Character.isDigit(c));
1068 check("{javaDefined}", c, Character.isDefined(c));
1069 check("{javaLetter}", c, Character.isLetter(c));
1070 check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c));
1071 check("{javaJavaIdentifierStart}", c,
1072 Character.isJavaIdentifierStart(c));
1073 check("{javaJavaIdentifierPart}", c,
1074 Character.isJavaIdentifierPart(c));
1075 check("{javaUnicodeIdentifierStart}", c,
1076 Character.isUnicodeIdentifierStart(c));
1077 check("{javaUnicodeIdentifierPart}", c,
1078 Character.isUnicodeIdentifierPart(c));
1079 check("{javaIdentifierIgnorable}", c,
1080 Character.isIdentifierIgnorable(c));
1081 check("{javaSpaceChar}", c, Character.isSpaceChar(c));
1082 check("{javaWhitespace}", c, Character.isWhitespace(c));
1083 check("{javaISOControl}", c, Character.isISOControl(c));
1084 check("{javaMirrored}", c, Character.isMirrored(c));
1085
1086 }
1087
1088 // Supplementary character test
1089 for (int i=0; i<1000; i++) {
1090 int c = generator.nextInt(Character.MAX_CODE_POINT
1091 - Character.MIN_SUPPLEMENTARY_CODE_POINT)
1092 + Character.MIN_SUPPLEMENTARY_CODE_POINT;
1093 check("{javaLowerCase}", c, Character.isLowerCase(c));
1094 check("{javaUpperCase}", c, Character.isUpperCase(c));
1095 check("{javaUpperCase}+", c, Character.isUpperCase(c));
1096 check("{javaTitleCase}", c, Character.isTitleCase(c));
1097 check("{javaDigit}", c, Character.isDigit(c));
1098 check("{javaDefined}", c, Character.isDefined(c));
1099 check("{javaLetter}", c, Character.isLetter(c));
1100 check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c));
1101 check("{javaJavaIdentifierStart}", c,
1102 Character.isJavaIdentifierStart(c));
1103 check("{javaJavaIdentifierPart}", c,
1104 Character.isJavaIdentifierPart(c));
1105 check("{javaUnicodeIdentifierStart}", c,
1106 Character.isUnicodeIdentifierStart(c));
1107 check("{javaUnicodeIdentifierPart}", c,
1108 Character.isUnicodeIdentifierPart(c));
1109 check("{javaIdentifierIgnorable}", c,
1110 Character.isIdentifierIgnorable(c));
1111 check("{javaSpaceChar}", c, Character.isSpaceChar(c));
1112 check("{javaWhitespace}", c, Character.isWhitespace(c));
1113 check("{javaISOControl}", c, Character.isISOControl(c));
1114 check("{javaMirrored}", c, Character.isMirrored(c));
1115 }
1116
1117 report("Java character classes");
1118 }
1119
1120 // This test is for 4523620
1121 /*
1122 private static void numOccurrencesTest() throws Exception {
1123 Pattern pattern = Pattern.compile("aaa");
1124
1125 if (pattern.numOccurrences("aaaaaa", false) != 2)
1126 failCount++;
1127 if (pattern.numOccurrences("aaaaaa", true) != 4)
1128 failCount++;
1129
1130 pattern = Pattern.compile("^");
1131 if (pattern.numOccurrences("aaaaaa", false) != 1)
1132 failCount++;
1133 if (pattern.numOccurrences("aaaaaa", true) != 1)
1134 failCount++;
1135
1136 report("Number of Occurrences");
1137 }
1138 */
1139
1140 // This test is for 4776374
1141 private static void caretBetweenTerminatorsTest() throws Exception {
1142 int flags1 = Pattern.DOTALL;
1143 int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES;
1144 int flags3 = Pattern.DOTALL | Pattern.UNIX_LINES | Pattern.MULTILINE;
1145 int flags4 = Pattern.DOTALL | Pattern.MULTILINE;
1146
1147 check("^....", flags1, "test\ntest", "test", true);
1148 check(".....^", flags1, "test\ntest", "test", false);
1149 check(".....^", flags1, "test\n", "test", false);
1150 check("....^", flags1, "test\r\n", "test", false);
1151
1152 check("^....", flags2, "test\ntest", "test", true);
1153 check("....^", flags2, "test\ntest", "test", false);
1154 check(".....^", flags2, "test\n", "test", false);
1155 check("....^", flags2, "test\r\n", "test", false);
1156
1157 check("^....", flags3, "test\ntest", "test", true);
1158 check(".....^", flags3, "test\ntest", "test\n", true);
1159 check(".....^", flags3, "test\u0085test", "test\u0085", false);
1160 check(".....^", flags3, "test\n", "test", false);
1161 check(".....^", flags3, "test\r\n", "test", false);
1162 check("......^", flags3, "test\r\ntest", "test\r\n", true);
1163
1164 check("^....", flags4, "test\ntest", "test", true);
1165 check(".....^", flags3, "test\ntest", "test\n", true);
1166 check(".....^", flags4, "test\u0085test", "test\u0085", true);
1167 check(".....^", flags4, "test\n", "test\n", false);
1168 check(".....^", flags4, "test\r\n", "test\r", false);
1169
1170 // Supplementary character test
1171 String t = toSupplementaries("test");
1172 check("^....", flags1, t+"\n"+t, t, true);
1173 check(".....^", flags1, t+"\n"+t, t, false);
1174 check(".....^", flags1, t+"\n", t, false);
1175 check("....^", flags1, t+"\r\n", t, false);
1176
1177 check("^....", flags2, t+"\n"+t, t, true);
1178 check("....^", flags2, t+"\n"+t, t, false);
1179 check(".....^", flags2, t+"\n", t, false);
1180 check("....^", flags2, t+"\r\n", t, false);
1181
1182 check("^....", flags3, t+"\n"+t, t, true);
1183 check(".....^", flags3, t+"\n"+t, t+"\n", true);
1184 check(".....^", flags3, t+"\u0085"+t, t+"\u0085", false);
1185 check(".....^", flags3, t+"\n", t, false);
1186 check(".....^", flags3, t+"\r\n", t, false);
1187 check("......^", flags3, t+"\r\n"+t, t+"\r\n", true);
1188
1189 check("^....", flags4, t+"\n"+t, t, true);
1190 check(".....^", flags3, t+"\n"+t, t+"\n", true);
1191 check(".....^", flags4, t+"\u0085"+t, t+"\u0085", true);
1192 check(".....^", flags4, t+"\n", t+"\n", false);
1193 check(".....^", flags4, t+"\r\n", t+"\r", false);
1194
1195 report("Caret between terminators");
1196 }
1197
1198 // This test is for 4727935
1199 private static void dollarAtEndTest() throws Exception {
1200 int flags1 = Pattern.DOTALL;
1201 int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES;
1202 int flags3 = Pattern.DOTALL | Pattern.MULTILINE;
1203
1204 check("....$", flags1, "test\n", "test", true);
1205 check("....$", flags1, "test\r\n", "test", true);
1206 check(".....$", flags1, "test\n", "test\n", true);
1207 check(".....$", flags1, "test\u0085", "test\u0085", true);
1208 check("....$", flags1, "test\u0085", "test", true);
1209
1210 check("....$", flags2, "test\n", "test", true);
1211 check(".....$", flags2, "test\n", "test\n", true);
1212 check(".....$", flags2, "test\u0085", "test\u0085", true);
1213 check("....$", flags2, "test\u0085", "est\u0085", true);
1214
1215 check("....$.blah", flags3, "test\nblah", "test\nblah", true);
1216 check(".....$.blah", flags3, "test\n\nblah", "test\n\nblah", true);
1217 check("....$blah", flags3, "test\nblah", "!!!!", false);
1218 check(".....$blah", flags3, "test\nblah", "!!!!", false);
1219
1220 // Supplementary character test
1221 String t = toSupplementaries("test");
1222 String b = toSupplementaries("blah");
1223 check("....$", flags1, t+"\n", t, true);
1224 check("....$", flags1, t+"\r\n", t, true);
1225 check(".....$", flags1, t+"\n", t+"\n", true);
1226 check(".....$", flags1, t+"\u0085", t+"\u0085", true);
1227 check("....$", flags1, t+"\u0085", t, true);
1228
1229 check("....$", flags2, t+"\n", t, true);
1230 check(".....$", flags2, t+"\n", t+"\n", true);
1231 check(".....$", flags2, t+"\u0085", t+"\u0085", true);
1232 check("....$", flags2, t+"\u0085", toSupplementaries("est\u0085"), true);
1233
1234 check("....$."+b, flags3, t+"\n"+b, t+"\n"+b, true);
1235 check(".....$."+b, flags3, t+"\n\n"+b, t+"\n\n"+b, true);
1236 check("....$"+b, flags3, t+"\n"+b, "!!!!", false);
1237 check(".....$"+b, flags3, t+"\n"+b, "!!!!", false);
1238
1239 report("Dollar at End");
1240 }
1241
1242 // This test is for 4711773
1243 private static void multilineDollarTest() throws Exception {
1244 Pattern findCR = Pattern.compile("$", Pattern.MULTILINE);
1245 Matcher matcher = findCR.matcher("first bit\nsecond bit");
1246 matcher.find();
1247 if (matcher.start(0) != 9)
1248 failCount++;
1249 matcher.find();
1250 if (matcher.start(0) != 20)
1251 failCount++;
1252
1253 // Supplementary character test
1254 matcher = findCR.matcher(toSupplementaries("first bit\n second bit")); // double BMP chars
1255 matcher.find();
1256 if (matcher.start(0) != 9*2)
1257 failCount++;
1258 matcher.find();
1259 if (matcher.start(0) != 20*2)
1260 failCount++;
1261
1262 report("Multiline Dollar");
1263 }
1264
1265 private static void reluctantRepetitionTest() throws Exception {
1266 Pattern p = Pattern.compile("1(\\s\\S+?){1,3}?[\\s,]2");
1267 check(p, "1 word word word 2", true);
1268 check(p, "1 wor wo w 2", true);
1269 check(p, "1 word word 2", true);
1270 check(p, "1 word 2", true);
1271 check(p, "1 wo w w 2", true);
1272 check(p, "1 wo w 2", true);
1273 check(p, "1 wor w 2", true);
1274
1275 p = Pattern.compile("([a-z])+?c");
1276 Matcher m = p.matcher("ababcdefdec");
1277 check(m, "ababc");
1278
1279 // Supplementary character test
1280 p = Pattern.compile(toSupplementaries("([a-z])+?c"));
1281 m = p.matcher(toSupplementaries("ababcdefdec"));
1282 check(m, toSupplementaries("ababc"));
1283
1284 report("Reluctant Repetition");
1285 }
1286
1287 private static void serializeTest() throws Exception {
1288 String patternStr = "(b)";
1289 String matchStr = "b";
1290 Pattern pattern = Pattern.compile(patternStr);
1291 ByteArrayOutputStream baos = new ByteArrayOutputStream();
1292 ObjectOutputStream oos = new ObjectOutputStream(baos);
1293 oos.writeObject(pattern);
1294 oos.close();
1295 ObjectInputStream ois = new ObjectInputStream(
1296 new ByteArrayInputStream(baos.toByteArray()));
1297 Pattern serializedPattern = (Pattern)ois.readObject();
1298 ois.close();
1299 Matcher matcher = serializedPattern.matcher(matchStr);
1300 if (!matcher.matches())
1301 failCount++;
1302 if (matcher.groupCount() != 1)
1303 failCount++;
1304
1305 report("Serialization");
1306 }
1307
1308 private static void gTest() {
1309 Pattern pattern = Pattern.compile("\\G\\w");
1310 Matcher matcher = pattern.matcher("abc#x#x");
1311 matcher.find();
1312 matcher.find();
1313 matcher.find();
1314 if (matcher.find())
1315 failCount++;
1316
1317 pattern = Pattern.compile("\\GA*");
1318 matcher = pattern.matcher("1A2AA3");
1319 matcher.find();
1320 if (matcher.find())
1321 failCount++;
1322
1323 pattern = Pattern.compile("\\GA*");
1324 matcher = pattern.matcher("1A2AA3");
1325 if (!matcher.find(1))
1326 failCount++;
1327 matcher.find();
1328 if (matcher.find())
1329 failCount++;
1330
1331 report("\\G");
1332 }
1333
1334 private static void zTest() {
1335 Pattern pattern = Pattern.compile("foo\\Z");
1336 // Positives
1337 check(pattern, "foo\u0085", true);
1338 check(pattern, "foo\u2028", true);
1339 check(pattern, "foo\u2029", true);
1340 check(pattern, "foo\n", true);
1341 check(pattern, "foo\r", true);
1342 check(pattern, "foo\r\n", true);
1343 // Negatives
1344 check(pattern, "fooo", false);
1345 check(pattern, "foo\n\r", false);
1346
1347 pattern = Pattern.compile("foo\\Z", Pattern.UNIX_LINES);
1348 // Positives
1349 check(pattern, "foo", true);
1350 check(pattern, "foo\n", true);
1351 // Negatives
1352 check(pattern, "foo\r", false);
1353 check(pattern, "foo\u0085", false);
1354 check(pattern, "foo\u2028", false);
1355 check(pattern, "foo\u2029", false);
1356
1357 report("\\Z");
1358 }
1359
1360 private static void replaceFirstTest() {
1361 Pattern pattern = Pattern.compile("(ab)(c*)");
1362 Matcher matcher = pattern.matcher("abccczzzabcczzzabccc");
1363 if (!matcher.replaceFirst("test").equals("testzzzabcczzzabccc"))
1364 failCount++;
1365
1366 matcher.reset("zzzabccczzzabcczzzabccczzz");
1367 if (!matcher.replaceFirst("test").equals("zzztestzzzabcczzzabccczzz"))
1368 failCount++;
1369
1370 matcher.reset("zzzabccczzzabcczzzabccczzz");
1371 String result = matcher.replaceFirst("$1");
1372 if (!result.equals("zzzabzzzabcczzzabccczzz"))
1373 failCount++;
1374
1375 matcher.reset("zzzabccczzzabcczzzabccczzz");
1376 result = matcher.replaceFirst("$2");
1377 if (!result.equals("zzzccczzzabcczzzabccczzz"))
1378 failCount++;
1379
1380 pattern = Pattern.compile("a*");
1381 matcher = pattern.matcher("aaaaaaaaaa");
1382 if (!matcher.replaceFirst("test").equals("test"))
1383 failCount++;
1384
1385 pattern = Pattern.compile("a+");
1386 matcher = pattern.matcher("zzzaaaaaaaaaa");
1387 if (!matcher.replaceFirst("test").equals("zzztest"))
1388 failCount++;
1389
1390 // Supplementary character test
1391 pattern = Pattern.compile(toSupplementaries("(ab)(c*)"));
1392 matcher = pattern.matcher(toSupplementaries("abccczzzabcczzzabccc"));
1393 if (!matcher.replaceFirst(toSupplementaries("test"))
1394 .equals(toSupplementaries("testzzzabcczzzabccc")))
1395 failCount++;
1396
1397 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
1398 if (!matcher.replaceFirst(toSupplementaries("test")).
1399 equals(toSupplementaries("zzztestzzzabcczzzabccczzz")))
1400 failCount++;
1401
1402 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
1403 result = matcher.replaceFirst("$1");
1404 if (!result.equals(toSupplementaries("zzzabzzzabcczzzabccczzz")))
1405 failCount++;
1406
1407 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
1408 result = matcher.replaceFirst("$2");
1409 if (!result.equals(toSupplementaries("zzzccczzzabcczzzabccczzz")))
1410 failCount++;
1411
1412 pattern = Pattern.compile(toSupplementaries("a*"));
1413 matcher = pattern.matcher(toSupplementaries("aaaaaaaaaa"));
1414 if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("test")))
1415 failCount++;
1416
1417 pattern = Pattern.compile(toSupplementaries("a+"));
1418 matcher = pattern.matcher(toSupplementaries("zzzaaaaaaaaaa"));
1419 if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("zzztest")))
1420 failCount++;
1421
1422 report("Replace First");
1423 }
1424
1425 private static void unixLinesTest() {
1426 Pattern pattern = Pattern.compile(".*");
1427 Matcher matcher = pattern.matcher("aa\u2028blah");
1428 matcher.find();
1429 if (!matcher.group(0).equals("aa"))
1430 failCount++;
1431
1432 pattern = Pattern.compile(".*", Pattern.UNIX_LINES);
1433 matcher = pattern.matcher("aa\u2028blah");
1434 matcher.find();
1435 if (!matcher.group(0).equals("aa\u2028blah"))
1436 failCount++;
1437
1438 pattern = Pattern.compile("[az]$",
1439 Pattern.MULTILINE | Pattern.UNIX_LINES);
1440 matcher = pattern.matcher("aa\u2028zz");
1441 check(matcher, "a\u2028", false);
1442
1443 // Supplementary character test
1444 pattern = Pattern.compile(".*");
1445 matcher = pattern.matcher(toSupplementaries("aa\u2028blah"));
1446 matcher.find();
1447 if (!matcher.group(0).equals(toSupplementaries("aa")))
1448 failCount++;
1449
1450 pattern = Pattern.compile(".*", Pattern.UNIX_LINES);
1451 matcher = pattern.matcher(toSupplementaries("aa\u2028blah"));
1452 matcher.find();
1453 if (!matcher.group(0).equals(toSupplementaries("aa\u2028blah")))
1454 failCount++;
1455
1456 pattern = Pattern.compile(toSupplementaries("[az]$"),
1457 Pattern.MULTILINE | Pattern.UNIX_LINES);
1458 matcher = pattern.matcher(toSupplementaries("aa\u2028zz"));
1459 check(matcher, toSupplementaries("a\u2028"), false);
1460
1461 report("Unix Lines");
1462 }
1463
1464 private static void commentsTest() {
1465 int flags = Pattern.COMMENTS;
1466
1467 Pattern pattern = Pattern.compile("aa \\# aa", flags);
1468 Matcher matcher = pattern.matcher("aa#aa");
1469 if (!matcher.matches())
1470 failCount++;
1471
1472 pattern = Pattern.compile("aa # blah", flags);
1473 matcher = pattern.matcher("aa");
1474 if (!matcher.matches())
1475 failCount++;
1476
1477 pattern = Pattern.compile("aa blah", flags);
1478 matcher = pattern.matcher("aablah");
1479 if (!matcher.matches())
1480 failCount++;
1481
1482 pattern = Pattern.compile("aa # blah blech ", flags);
1483 matcher = pattern.matcher("aa");
1484 if (!matcher.matches())
1485 failCount++;
1486
1487 pattern = Pattern.compile("aa # blah\n ", flags);
1488 matcher = pattern.matcher("aa");
1489 if (!matcher.matches())
1490 failCount++;
1491
1492 pattern = Pattern.compile("aa # blah\nbc # blech", flags);
1493 matcher = pattern.matcher("aabc");
1494 if (!matcher.matches())
1495 failCount++;
1496
1497 pattern = Pattern.compile("aa # blah\nbc# blech", flags);
1498 matcher = pattern.matcher("aabc");
1499 if (!matcher.matches())
1500 failCount++;
1501
1502 pattern = Pattern.compile("aa # blah\nbc\\# blech", flags);
1503 matcher = pattern.matcher("aabc#blech");
1504 if (!matcher.matches())
1505 failCount++;
1506
1507 // Supplementary character test
1508 pattern = Pattern.compile(toSupplementaries("aa \\# aa"), flags);
1509 matcher = pattern.matcher(toSupplementaries("aa#aa"));
1510 if (!matcher.matches())
1511 failCount++;
1512
1513 pattern = Pattern.compile(toSupplementaries("aa # blah"), flags);
1514 matcher = pattern.matcher(toSupplementaries("aa"));
1515 if (!matcher.matches())
1516 failCount++;
1517
1518 pattern = Pattern.compile(toSupplementaries("aa blah"), flags);
1519 matcher = pattern.matcher(toSupplementaries("aablah"));
1520 if (!matcher.matches())
1521 failCount++;
1522
1523 pattern = Pattern.compile(toSupplementaries("aa # blah blech "), flags);
1524 matcher = pattern.matcher(toSupplementaries("aa"));
1525 if (!matcher.matches())
1526 failCount++;
1527
1528 pattern = Pattern.compile(toSupplementaries("aa # blah\n "), flags);
1529 matcher = pattern.matcher(toSupplementaries("aa"));
1530 if (!matcher.matches())
1531 failCount++;
1532
1533 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc # blech"), flags);
1534 matcher = pattern.matcher(toSupplementaries("aabc"));
1535 if (!matcher.matches())
1536 failCount++;
1537
1538 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc# blech"), flags);
1539 matcher = pattern.matcher(toSupplementaries("aabc"));
1540 if (!matcher.matches())
1541 failCount++;
1542
1543 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc\\# blech"), flags);
1544 matcher = pattern.matcher(toSupplementaries("aabc#blech"));
1545 if (!matcher.matches())
1546 failCount++;
1547
1548 report("Comments");
1549 }
1550
1551 private static void caseFoldingTest() { // bug 4504687
1552 int flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE;
1553 Pattern pattern = Pattern.compile("aa", flags);
1554 Matcher matcher = pattern.matcher("ab");
1555 if (matcher.matches())
1556 failCount++;
1557
1558 pattern = Pattern.compile("aA", flags);
1559 matcher = pattern.matcher("ab");
1560 if (matcher.matches())
1561 failCount++;
1562
1563 pattern = Pattern.compile("aa", flags);
1564 matcher = pattern.matcher("aB");
1565 if (matcher.matches())
1566 failCount++;
1567 matcher = pattern.matcher("Ab");
1568 if (matcher.matches())
1569 failCount++;
1570
1571 // ASCII "a"
1572 // Latin-1 Supplement "a" + grave
1573 // Cyrillic "a"
1574 String[] patterns = new String[] {
1575 //single
1576 "a", "\u00e0", "\u0430",
1577 //slice
1578 "ab", "\u00e0\u00e1", "\u0430\u0431",
1579 //class single
1580 "[a]", "[\u00e0]", "[\u0430]",
1581 //class range
1582 "[a-b]", "[\u00e0-\u00e5]", "[\u0430-\u0431]",
1583 //back reference
1584 "(a)\\1", "(\u00e0)\\1", "(\u0430)\\1"
1585 };
1586
1587 String[] texts = new String[] {
1588 "A", "\u00c0", "\u0410",
1589 "AB", "\u00c0\u00c1", "\u0410\u0411",
1590 "A", "\u00c0", "\u0410",
1591 "B", "\u00c2", "\u0411",
1592 "aA", "\u00e0\u00c0", "\u0430\u0410"
1593 };
1594
1595 boolean[] expected = new boolean[] {
1596 true, false, false,
1597 true, false, false,
1598 true, false, false,
1599 true, false, false,
1600 true, false, false
1601 };
1602
1603 flags = Pattern.CASE_INSENSITIVE;
1604 for (int i = 0; i < patterns.length; i++) {
1605 pattern = Pattern.compile(patterns[i], flags);
1606 matcher = pattern.matcher(texts[i]);
1607 if (matcher.matches() != expected[i]) {
1608 System.out.println("<1> Failed at " + i);
1609 failCount++;
1610 }
1611 }
1612
1613 flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE;
1614 for (int i = 0; i < patterns.length; i++) {
1615 pattern = Pattern.compile(patterns[i], flags);
1616 matcher = pattern.matcher(texts[i]);
1617 if (!matcher.matches()) {
1618 System.out.println("<2> Failed at " + i);
1619 failCount++;
1620 }
1621 }
1622 // flag unicode_case alone should do nothing
1623 flags = Pattern.UNICODE_CASE;
1624 for (int i = 0; i < patterns.length; i++) {
1625 pattern = Pattern.compile(patterns[i], flags);
1626 matcher = pattern.matcher(texts[i]);
1627 if (matcher.matches()) {
1628 System.out.println("<3> Failed at " + i);
1629 failCount++;
1630 }
1631 }
1632
1633 // Special cases: i, I, u+0131 and u+0130
1634 flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE;
1635 pattern = Pattern.compile("[h-j]+", flags);
1636 if (!pattern.matcher("\u0131\u0130").matches())
1637 failCount++;
1638 report("Case Folding");
1639 }
1640
1641 private static void appendTest() {
1642 Pattern pattern = Pattern.compile("(ab)(cd)");
1643 Matcher matcher = pattern.matcher("abcd");
1644 String result = matcher.replaceAll("$2$1");
1645 if (!result.equals("cdab"))
1646 failCount++;
1647
1648 String s1 = "Swap all: first = 123, second = 456";
1649 String s2 = "Swap one: first = 123, second = 456";
1650 String r = "$3$2$1";
1651 pattern = Pattern.compile("([a-z]+)( *= *)([0-9]+)");
1652 matcher = pattern.matcher(s1);
1653
1654 result = matcher.replaceAll(r);
1655 if (!result.equals("Swap all: 123 = first, 456 = second"))
1656 failCount++;
1657
1658 matcher = pattern.matcher(s2);
1659
1660 if (matcher.find()) {
1661 StringBuffer sb = new StringBuffer();
1662 matcher.appendReplacement(sb, r);
1663 matcher.appendTail(sb);
1664 result = sb.toString();
1665 if (!result.equals("Swap one: 123 = first, second = 456"))
1666 failCount++;
1667 }
1668
1669 // Supplementary character test
1670 pattern = Pattern.compile(toSupplementaries("(ab)(cd)"));
1671 matcher = pattern.matcher(toSupplementaries("abcd"));
1672 result = matcher.replaceAll("$2$1");
1673 if (!result.equals(toSupplementaries("cdab")))
1674 failCount++;
1675
1676 s1 = toSupplementaries("Swap all: first = 123, second = 456");
1677 s2 = toSupplementaries("Swap one: first = 123, second = 456");
1678 r = toSupplementaries("$3$2$1");
1679 pattern = Pattern.compile(toSupplementaries("([a-z]+)( *= *)([0-9]+)"));
1680 matcher = pattern.matcher(s1);
1681
1682 result = matcher.replaceAll(r);
1683 if (!result.equals(toSupplementaries("Swap all: 123 = first, 456 = second")))
1684 failCount++;
1685
1686 matcher = pattern.matcher(s2);
1687
1688 if (matcher.find()) {
1689 StringBuffer sb = new StringBuffer();
1690 matcher.appendReplacement(sb, r);
1691 matcher.appendTail(sb);
1692 result = sb.toString();
1693 if (!result.equals(toSupplementaries("Swap one: 123 = first, second = 456")))
1694 failCount++;
1695 }
1696 report("Append");
1697 }
1698
1699 private static void splitTest() {
1700 Pattern pattern = Pattern.compile(":");
1701 String[] result = pattern.split("foo:and:boo", 2);
1702 if (!result[0].equals("foo"))
1703 failCount++;
1704 if (!result[1].equals("and:boo"))
1705 failCount++;
1706 // Supplementary character test
1707 Pattern patternX = Pattern.compile(toSupplementaries("X"));
1708 result = patternX.split(toSupplementaries("fooXandXboo"), 2);
1709 if (!result[0].equals(toSupplementaries("foo")))
1710 failCount++;
1711 if (!result[1].equals(toSupplementaries("andXboo")))
1712 failCount++;
1713
1714 CharBuffer cb = CharBuffer.allocate(100);
1715 cb.put("foo:and:boo");
1716 cb.flip();
1717 result = pattern.split(cb);
1718 if (!result[0].equals("foo"))
1719 failCount++;
1720 if (!result[1].equals("and"))
1721 failCount++;
1722 if (!result[2].equals("boo"))
1723 failCount++;
1724
1725 // Supplementary character test
1726 CharBuffer cbs = CharBuffer.allocate(100);
1727 cbs.put(toSupplementaries("fooXandXboo"));
1728 cbs.flip();
1729 result = patternX.split(cbs);
1730 if (!result[0].equals(toSupplementaries("foo")))
1731 failCount++;
1732 if (!result[1].equals(toSupplementaries("and")))
1733 failCount++;
1734 if (!result[2].equals(toSupplementaries("boo")))
1735 failCount++;
1736
1737 String source = "0123456789";
1738 for (int limit=-2; limit<3; limit++) {
1739 for (int x=0; x<10; x++) {
1740 result = source.split(Integer.toString(x), limit);
1741 int expectedLength = limit < 1 ? 2 : limit;
1742
1743 if ((limit == 0) && (x == 9)) {
1744 // expected dropping of ""
1745 if (result.length != 1)
1746 failCount++;
1747 if (!result[0].equals("012345678")) {
1748 failCount++;
1749 }
1750 } else {
1751 if (result.length != expectedLength) {
1752 failCount++;
1753 }
1754 if (!result[0].equals(source.substring(0,x))) {
1755 if (limit != 1) {
1756 failCount++;
1757 } else {
1758 if (!result[0].equals(source.substring(0,10))) {
1759 failCount++;
1760 }
1761 }
1762 }
1763 if (expectedLength > 1) { // Check segment 2
1764 if (!result[1].equals(source.substring(x+1,10)))
1765 failCount++;
1766 }
1767 }
1768 }
1769 }
1770 // Check the case for no match found
1771 for (int limit=-2; limit<3; limit++) {
1772 result = source.split("e", limit);
1773 if (result.length != 1)
1774 failCount++;
1775 if (!result[0].equals(source))
1776 failCount++;
1777 }
1778 // Check the case for limit == 0, source = "";
sherman1242a6d2013-11-13 11:26:01 -08001779 // split() now returns 0-length for empty source "" see #6559590
sherman0b4d42d2009-02-23 21:06:15 -08001780 source = "";
1781 result = source.split("e", 0);
1782 if (result.length != 1)
1783 failCount++;
1784 if (!result[0].equals(source))
1785 failCount++;
1786
sherman1242a6d2013-11-13 11:26:01 -08001787 // Check both split() and splitAsStraem(), especially for zero-lenth
1788 // input and zero-lenth match cases
1789 String[][] input = new String[][] {
1790 { " ", "Abc Efg Hij" }, // normal non-zero-match
1791 { " ", " Abc Efg Hij" }, // leading empty str for non-zero-match
1792 { " ", "Abc Efg Hij" }, // non-zero-match in the middle
1793 { "(?=\\p{Lu})", "AbcEfgHij" }, // no leading empty str for zero-match
1794 { "(?=\\p{Lu})", "AbcEfg" },
1795 { "(?=\\p{Lu})", "Abc" },
1796 { " ", "" }, // zero-length input
1797 { ".*", "" },
1798
1799 // some tests from PatternStreamTest.java
1800 { "4", "awgqwefg1fefw4vssv1vvv1" },
1801 { "\u00a3a", "afbfq\u00a3abgwgb\u00a3awngnwggw\u00a3a\u00a3ahjrnhneerh" },
1802 { "1", "awgqwefg1fefw4vssv1vvv1" },
1803 { "1", "a\u4ebafg1fefw\u4eba4\u9f9cvssv\u9f9c1v\u672c\u672cvv" },
1804 { "\u56da", "1\u56da23\u56da456\u56da7890" },
1805 { "\u56da", "1\u56da23\u9f9c\u672c\u672c\u56da456\u56da\u9f9c\u672c7890" },
1806 { "\u56da", "" },
1807 { "[ \t,:.]","This is,testing: with\tdifferent separators." }, //multiple septs
1808 { "o", "boo:and:foo" },
1809 { "o", "booooo:and:fooooo" },
1810 { "o", "fooooo:" },
1811 };
1812
1813 String[][] expected = new String[][] {
1814 { "Abc", "Efg", "Hij" },
1815 { "", "Abc", "Efg", "Hij" },
1816 { "Abc", "", "Efg", "Hij" },
1817 { "Abc", "Efg", "Hij" },
1818 { "Abc", "Efg" },
1819 { "Abc" },
sherman12888112013-11-13 22:22:28 -08001820 { "" },
1821 { "" },
sherman1242a6d2013-11-13 11:26:01 -08001822
1823 { "awgqwefg1fefw", "vssv1vvv1" },
1824 { "afbfq", "bgwgb", "wngnwggw", "", "hjrnhneerh" },
1825 { "awgqwefg", "fefw4vssv", "vvv" },
1826 { "a\u4ebafg", "fefw\u4eba4\u9f9cvssv\u9f9c", "v\u672c\u672cvv" },
1827 { "1", "23", "456", "7890" },
1828 { "1", "23\u9f9c\u672c\u672c", "456", "\u9f9c\u672c7890" },
sherman12888112013-11-13 22:22:28 -08001829 { "" },
sherman1242a6d2013-11-13 11:26:01 -08001830 { "This", "is", "testing", "", "with", "different", "separators" },
1831 { "b", "", ":and:f" },
1832 { "b", "", "", "", "", ":and:f" },
1833 { "f", "", "", "", "", ":" },
1834 };
1835 for (int i = 0; i < input.length; i++) {
1836 pattern = Pattern.compile(input[i][0]);
sherman12888112013-11-13 22:22:28 -08001837 if (!Arrays.equals(pattern.split(input[i][1]), expected[i])) {
sherman1242a6d2013-11-13 11:26:01 -08001838 failCount++;
sherman12888112013-11-13 22:22:28 -08001839 }
1840 if (input[i][1].length() > 0 && // splitAsStream() return empty resulting
1841 // array for zero-length input for now
1842 !Arrays.equals(pattern.splitAsStream(input[i][1]).toArray(),
1843 expected[i])) {
sherman1242a6d2013-11-13 11:26:01 -08001844 failCount++;
sherman12888112013-11-13 22:22:28 -08001845 }
sherman1242a6d2013-11-13 11:26:01 -08001846 }
sherman0b4d42d2009-02-23 21:06:15 -08001847 report("Split");
1848 }
1849
1850 private static void negationTest() {
1851 Pattern pattern = Pattern.compile("[\\[@^]+");
1852 Matcher matcher = pattern.matcher("@@@@[[[[^^^^");
1853 if (!matcher.find())
1854 failCount++;
1855 if (!matcher.group(0).equals("@@@@[[[[^^^^"))
1856 failCount++;
1857 pattern = Pattern.compile("[@\\[^]+");
1858 matcher = pattern.matcher("@@@@[[[[^^^^");
1859 if (!matcher.find())
1860 failCount++;
1861 if (!matcher.group(0).equals("@@@@[[[[^^^^"))
1862 failCount++;
1863 pattern = Pattern.compile("[@\\[^@]+");
1864 matcher = pattern.matcher("@@@@[[[[^^^^");
1865 if (!matcher.find())
1866 failCount++;
1867 if (!matcher.group(0).equals("@@@@[[[[^^^^"))
1868 failCount++;
1869
1870 pattern = Pattern.compile("\\)");
1871 matcher = pattern.matcher("xxx)xxx");
1872 if (!matcher.find())
1873 failCount++;
1874
1875 report("Negation");
1876 }
1877
1878 private static void ampersandTest() {
1879 Pattern pattern = Pattern.compile("[&@]+");
1880 check(pattern, "@@@@&&&&", true);
1881
1882 pattern = Pattern.compile("[@&]+");
1883 check(pattern, "@@@@&&&&", true);
1884
1885 pattern = Pattern.compile("[@\\&]+");
1886 check(pattern, "@@@@&&&&", true);
1887
1888 report("Ampersand");
1889 }
1890
1891 private static void octalTest() throws Exception {
1892 Pattern pattern = Pattern.compile("\\u0007");
1893 Matcher matcher = pattern.matcher("\u0007");
1894 if (!matcher.matches())
1895 failCount++;
1896 pattern = Pattern.compile("\\07");
1897 matcher = pattern.matcher("\u0007");
1898 if (!matcher.matches())
1899 failCount++;
1900 pattern = Pattern.compile("\\007");
1901 matcher = pattern.matcher("\u0007");
1902 if (!matcher.matches())
1903 failCount++;
1904 pattern = Pattern.compile("\\0007");
1905 matcher = pattern.matcher("\u0007");
1906 if (!matcher.matches())
1907 failCount++;
1908 pattern = Pattern.compile("\\040");
1909 matcher = pattern.matcher("\u0020");
1910 if (!matcher.matches())
1911 failCount++;
1912 pattern = Pattern.compile("\\0403");
1913 matcher = pattern.matcher("\u00203");
1914 if (!matcher.matches())
1915 failCount++;
1916 pattern = Pattern.compile("\\0103");
1917 matcher = pattern.matcher("\u0043");
1918 if (!matcher.matches())
1919 failCount++;
1920
1921 report("Octal");
1922 }
1923
1924 private static void longPatternTest() throws Exception {
1925 try {
1926 Pattern pattern = Pattern.compile(
1927 "a 32-character-long pattern xxxx");
1928 pattern = Pattern.compile("a 33-character-long pattern xxxxx");
1929 pattern = Pattern.compile("a thirty four character long regex");
1930 StringBuffer patternToBe = new StringBuffer(101);
1931 for (int i=0; i<100; i++)
1932 patternToBe.append((char)(97 + i%26));
1933 pattern = Pattern.compile(patternToBe.toString());
1934 } catch (PatternSyntaxException e) {
1935 failCount++;
1936 }
1937
1938 // Supplementary character test
1939 try {
1940 Pattern pattern = Pattern.compile(
1941 toSupplementaries("a 32-character-long pattern xxxx"));
1942 pattern = Pattern.compile(toSupplementaries("a 33-character-long pattern xxxxx"));
1943 pattern = Pattern.compile(toSupplementaries("a thirty four character long regex"));
1944 StringBuffer patternToBe = new StringBuffer(101*2);
1945 for (int i=0; i<100; i++)
1946 patternToBe.append(Character.toChars(Character.MIN_SUPPLEMENTARY_CODE_POINT
1947 + 97 + i%26));
1948 pattern = Pattern.compile(patternToBe.toString());
1949 } catch (PatternSyntaxException e) {
1950 failCount++;
1951 }
1952 report("LongPattern");
1953 }
1954
1955 private static void group0Test() throws Exception {
1956 Pattern pattern = Pattern.compile("(tes)ting");
1957 Matcher matcher = pattern.matcher("testing");
1958 check(matcher, "testing");
1959
1960 matcher.reset("testing");
1961 if (matcher.lookingAt()) {
1962 if (!matcher.group(0).equals("testing"))
1963 failCount++;
1964 } else {
1965 failCount++;
1966 }
1967
1968 matcher.reset("testing");
1969 if (matcher.matches()) {
1970 if (!matcher.group(0).equals("testing"))
1971 failCount++;
1972 } else {
1973 failCount++;
1974 }
1975
1976 pattern = Pattern.compile("(tes)ting");
1977 matcher = pattern.matcher("testing");
1978 if (matcher.lookingAt()) {
1979 if (!matcher.group(0).equals("testing"))
1980 failCount++;
1981 } else {
1982 failCount++;
1983 }
1984
1985 pattern = Pattern.compile("^(tes)ting");
1986 matcher = pattern.matcher("testing");
1987 if (matcher.matches()) {
1988 if (!matcher.group(0).equals("testing"))
1989 failCount++;
1990 } else {
1991 failCount++;
1992 }
1993
1994 // Supplementary character test
1995 pattern = Pattern.compile(toSupplementaries("(tes)ting"));
1996 matcher = pattern.matcher(toSupplementaries("testing"));
1997 check(matcher, toSupplementaries("testing"));
1998
1999 matcher.reset(toSupplementaries("testing"));
2000 if (matcher.lookingAt()) {
2001 if (!matcher.group(0).equals(toSupplementaries("testing")))
2002 failCount++;
2003 } else {
2004 failCount++;
2005 }
2006
2007 matcher.reset(toSupplementaries("testing"));
2008 if (matcher.matches()) {
2009 if (!matcher.group(0).equals(toSupplementaries("testing")))
2010 failCount++;
2011 } else {
2012 failCount++;
2013 }
2014
2015 pattern = Pattern.compile(toSupplementaries("(tes)ting"));
2016 matcher = pattern.matcher(toSupplementaries("testing"));
2017 if (matcher.lookingAt()) {
2018 if (!matcher.group(0).equals(toSupplementaries("testing")))
2019 failCount++;
2020 } else {
2021 failCount++;
2022 }
2023
2024 pattern = Pattern.compile(toSupplementaries("^(tes)ting"));
2025 matcher = pattern.matcher(toSupplementaries("testing"));
2026 if (matcher.matches()) {
2027 if (!matcher.group(0).equals(toSupplementaries("testing")))
2028 failCount++;
2029 } else {
2030 failCount++;
2031 }
2032
2033 report("Group0");
2034 }
2035
2036 private static void findIntTest() throws Exception {
2037 Pattern p = Pattern.compile("blah");
2038 Matcher m = p.matcher("zzzzblahzzzzzblah");
2039 boolean result = m.find(2);
2040 if (!result)
2041 failCount++;
2042
2043 p = Pattern.compile("$");
2044 m = p.matcher("1234567890");
2045 result = m.find(10);
2046 if (!result)
2047 failCount++;
2048 try {
2049 result = m.find(11);
2050 failCount++;
2051 } catch (IndexOutOfBoundsException e) {
2052 // correct result
2053 }
2054
2055 // Supplementary character test
2056 p = Pattern.compile(toSupplementaries("blah"));
2057 m = p.matcher(toSupplementaries("zzzzblahzzzzzblah"));
2058 result = m.find(2);
2059 if (!result)
2060 failCount++;
2061
2062 report("FindInt");
2063 }
2064
2065 private static void emptyPatternTest() throws Exception {
2066 Pattern p = Pattern.compile("");
2067 Matcher m = p.matcher("foo");
2068
2069 // Should find empty pattern at beginning of input
2070 boolean result = m.find();
2071 if (result != true)
2072 failCount++;
2073 if (m.start() != 0)
2074 failCount++;
2075
2076 // Should not match entire input if input is not empty
2077 m.reset();
2078 result = m.matches();
2079 if (result == true)
2080 failCount++;
2081
2082 try {
2083 m.start(0);
2084 failCount++;
2085 } catch (IllegalStateException e) {
2086 // Correct result
2087 }
2088
2089 // Should match entire input if input is empty
2090 m.reset("");
2091 result = m.matches();
2092 if (result != true)
2093 failCount++;
2094
2095 result = Pattern.matches("", "");
2096 if (result != true)
2097 failCount++;
2098
2099 result = Pattern.matches("", "foo");
2100 if (result == true)
2101 failCount++;
2102 report("EmptyPattern");
2103 }
2104
2105 private static void charClassTest() throws Exception {
2106 Pattern pattern = Pattern.compile("blah[ab]]blech");
2107 check(pattern, "blahb]blech", true);
2108
2109 pattern = Pattern.compile("[abc[def]]");
2110 check(pattern, "b", true);
2111
2112 // Supplementary character tests
2113 pattern = Pattern.compile(toSupplementaries("blah[ab]]blech"));
2114 check(pattern, toSupplementaries("blahb]blech"), true);
2115
2116 pattern = Pattern.compile(toSupplementaries("[abc[def]]"));
2117 check(pattern, toSupplementaries("b"), true);
2118
2119 try {
2120 // u00ff when UNICODE_CASE
2121 pattern = Pattern.compile("[ab\u00ffcd]",
2122 Pattern.CASE_INSENSITIVE|
2123 Pattern.UNICODE_CASE);
2124 check(pattern, "ab\u00ffcd", true);
2125 check(pattern, "Ab\u0178Cd", true);
2126
2127 // u00b5 when UNICODE_CASE
2128 pattern = Pattern.compile("[ab\u00b5cd]",
2129 Pattern.CASE_INSENSITIVE|
2130 Pattern.UNICODE_CASE);
2131 check(pattern, "ab\u00b5cd", true);
2132 check(pattern, "Ab\u039cCd", true);
2133 } catch (Exception e) { failCount++; }
2134
2135 /* Special cases
2136 (1)LatinSmallLetterLongS u+017f
2137 (2)LatinSmallLetterDotlessI u+0131
2138 (3)LatineCapitalLetterIWithDotAbove u+0130
2139 (4)KelvinSign u+212a
2140 (5)AngstromSign u+212b
2141 */
2142 int flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE;
2143 pattern = Pattern.compile("[sik\u00c5]+", flags);
2144 if (!pattern.matcher("\u017f\u0130\u0131\u212a\u212b").matches())
2145 failCount++;
2146
2147 report("CharClass");
2148 }
2149
2150 private static void caretTest() throws Exception {
2151 Pattern pattern = Pattern.compile("\\w*");
2152 Matcher matcher = pattern.matcher("a#bc#def##g");
2153 check(matcher, "a");
2154 check(matcher, "");
2155 check(matcher, "bc");
2156 check(matcher, "");
2157 check(matcher, "def");
2158 check(matcher, "");
2159 check(matcher, "");
2160 check(matcher, "g");
2161 check(matcher, "");
2162 if (matcher.find())
2163 failCount++;
2164
2165 pattern = Pattern.compile("^\\w*");
2166 matcher = pattern.matcher("a#bc#def##g");
2167 check(matcher, "a");
2168 if (matcher.find())
2169 failCount++;
2170
2171 pattern = Pattern.compile("\\w");
2172 matcher = pattern.matcher("abc##x");
2173 check(matcher, "a");
2174 check(matcher, "b");
2175 check(matcher, "c");
2176 check(matcher, "x");
2177 if (matcher.find())
2178 failCount++;
2179
2180 pattern = Pattern.compile("^\\w");
2181 matcher = pattern.matcher("abc##x");
2182 check(matcher, "a");
2183 if (matcher.find())
2184 failCount++;
2185
2186 pattern = Pattern.compile("\\A\\p{Alpha}{3}");
2187 matcher = pattern.matcher("abcdef-ghi\njklmno");
2188 check(matcher, "abc");
2189 if (matcher.find())
2190 failCount++;
2191
2192 pattern = Pattern.compile("^\\p{Alpha}{3}", Pattern.MULTILINE);
2193 matcher = pattern.matcher("abcdef-ghi\njklmno");
2194 check(matcher, "abc");
2195 check(matcher, "jkl");
2196 if (matcher.find())
2197 failCount++;
2198
2199 pattern = Pattern.compile("^", Pattern.MULTILINE);
2200 matcher = pattern.matcher("this is some text");
2201 String result = matcher.replaceAll("X");
2202 if (!result.equals("Xthis is some text"))
2203 failCount++;
2204
2205 pattern = Pattern.compile("^");
2206 matcher = pattern.matcher("this is some text");
2207 result = matcher.replaceAll("X");
2208 if (!result.equals("Xthis is some text"))
2209 failCount++;
2210
2211 pattern = Pattern.compile("^", Pattern.MULTILINE | Pattern.UNIX_LINES);
2212 matcher = pattern.matcher("this is some text\n");
2213 result = matcher.replaceAll("X");
2214 if (!result.equals("Xthis is some text\n"))
2215 failCount++;
2216
2217 report("Caret");
2218 }
2219
2220 private static void groupCaptureTest() throws Exception {
2221 // Independent group
2222 Pattern pattern = Pattern.compile("x+(?>y+)z+");
2223 Matcher matcher = pattern.matcher("xxxyyyzzz");
2224 matcher.find();
2225 try {
2226 String blah = matcher.group(1);
2227 failCount++;
2228 } catch (IndexOutOfBoundsException ioobe) {
2229 // Good result
2230 }
2231 // Pure group
2232 pattern = Pattern.compile("x+(?:y+)z+");
2233 matcher = pattern.matcher("xxxyyyzzz");
2234 matcher.find();
2235 try {
2236 String blah = matcher.group(1);
2237 failCount++;
2238 } catch (IndexOutOfBoundsException ioobe) {
2239 // Good result
2240 }
2241
2242 // Supplementary character tests
2243 // Independent group
2244 pattern = Pattern.compile(toSupplementaries("x+(?>y+)z+"));
2245 matcher = pattern.matcher(toSupplementaries("xxxyyyzzz"));
2246 matcher.find();
2247 try {
2248 String blah = matcher.group(1);
2249 failCount++;
2250 } catch (IndexOutOfBoundsException ioobe) {
2251 // Good result
2252 }
2253 // Pure group
2254 pattern = Pattern.compile(toSupplementaries("x+(?:y+)z+"));
2255 matcher = pattern.matcher(toSupplementaries("xxxyyyzzz"));
2256 matcher.find();
2257 try {
2258 String blah = matcher.group(1);
2259 failCount++;
2260 } catch (IndexOutOfBoundsException ioobe) {
2261 // Good result
2262 }
2263
2264 report("GroupCapture");
2265 }
2266
2267 private static void backRefTest() throws Exception {
2268 Pattern pattern = Pattern.compile("(a*)bc\\1");
2269 check(pattern, "zzzaabcazzz", true);
2270
2271 pattern = Pattern.compile("(a*)bc\\1");
2272 check(pattern, "zzzaabcaazzz", true);
2273
2274 pattern = Pattern.compile("(abc)(def)\\1");
2275 check(pattern, "abcdefabc", true);
2276
2277 pattern = Pattern.compile("(abc)(def)\\3");
2278 check(pattern, "abcdefabc", false);
2279
2280 try {
2281 for (int i = 1; i < 10; i++) {
2282 // Make sure backref 1-9 are always accepted
2283 pattern = Pattern.compile("abcdef\\" + i);
2284 // and fail to match if the target group does not exit
2285 check(pattern, "abcdef", false);
2286 }
2287 } catch(PatternSyntaxException e) {
2288 failCount++;
2289 }
2290
2291 pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11");
2292 check(pattern, "abcdefghija", false);
2293 check(pattern, "abcdefghija1", true);
2294
2295 pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11");
2296 check(pattern, "abcdefghijkk", true);
2297
2298 pattern = Pattern.compile("(a)bcdefghij\\11");
2299 check(pattern, "abcdefghija1", true);
2300
2301 // Supplementary character tests
2302 pattern = Pattern.compile(toSupplementaries("(a*)bc\\1"));
2303 check(pattern, toSupplementaries("zzzaabcazzz"), true);
2304
2305 pattern = Pattern.compile(toSupplementaries("(a*)bc\\1"));
2306 check(pattern, toSupplementaries("zzzaabcaazzz"), true);
2307
2308 pattern = Pattern.compile(toSupplementaries("(abc)(def)\\1"));
2309 check(pattern, toSupplementaries("abcdefabc"), true);
2310
2311 pattern = Pattern.compile(toSupplementaries("(abc)(def)\\3"));
2312 check(pattern, toSupplementaries("abcdefabc"), false);
2313
2314 pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11"));
2315 check(pattern, toSupplementaries("abcdefghija"), false);
2316 check(pattern, toSupplementaries("abcdefghija1"), true);
2317
2318 pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11"));
2319 check(pattern, toSupplementaries("abcdefghijkk"), true);
2320
2321 report("BackRef");
2322 }
2323
2324 /**
2325 * Unicode Technical Report #18, section 2.6 End of Line
2326 * There is no empty line to be matched in the sequence \u000D\u000A
2327 * but there is an empty line in the sequence \u000A\u000D.
2328 */
2329 private static void anchorTest() throws Exception {
2330 Pattern p = Pattern.compile("^.*$", Pattern.MULTILINE);
2331 Matcher m = p.matcher("blah1\r\nblah2");
2332 m.find();
2333 m.find();
2334 if (!m.group().equals("blah2"))
2335 failCount++;
2336
2337 m.reset("blah1\n\rblah2");
2338 m.find();
2339 m.find();
2340 m.find();
2341 if (!m.group().equals("blah2"))
2342 failCount++;
2343
2344 // Test behavior of $ with \r\n at end of input
2345 p = Pattern.compile(".+$");
2346 m = p.matcher("blah1\r\n");
2347 if (!m.find())
2348 failCount++;
2349 if (!m.group().equals("blah1"))
2350 failCount++;
2351 if (m.find())
2352 failCount++;
2353
2354 // Test behavior of $ with \r\n at end of input in multiline
2355 p = Pattern.compile(".+$", Pattern.MULTILINE);
2356 m = p.matcher("blah1\r\n");
2357 if (!m.find())
2358 failCount++;
2359 if (m.find())
2360 failCount++;
2361
2362 // Test for $ recognition of \u0085 for bug 4527731
2363 p = Pattern.compile(".+$", Pattern.MULTILINE);
2364 m = p.matcher("blah1\u0085");
2365 if (!m.find())
2366 failCount++;
2367
2368 // Supplementary character test
2369 p = Pattern.compile("^.*$", Pattern.MULTILINE);
2370 m = p.matcher(toSupplementaries("blah1\r\nblah2"));
2371 m.find();
2372 m.find();
2373 if (!m.group().equals(toSupplementaries("blah2")))
2374 failCount++;
2375
2376 m.reset(toSupplementaries("blah1\n\rblah2"));
2377 m.find();
2378 m.find();
2379 m.find();
2380 if (!m.group().equals(toSupplementaries("blah2")))
2381 failCount++;
2382
2383 // Test behavior of $ with \r\n at end of input
2384 p = Pattern.compile(".+$");
2385 m = p.matcher(toSupplementaries("blah1\r\n"));
2386 if (!m.find())
2387 failCount++;
2388 if (!m.group().equals(toSupplementaries("blah1")))
2389 failCount++;
2390 if (m.find())
2391 failCount++;
2392
2393 // Test behavior of $ with \r\n at end of input in multiline
2394 p = Pattern.compile(".+$", Pattern.MULTILINE);
2395 m = p.matcher(toSupplementaries("blah1\r\n"));
2396 if (!m.find())
2397 failCount++;
2398 if (m.find())
2399 failCount++;
2400
2401 // Test for $ recognition of \u0085 for bug 4527731
2402 p = Pattern.compile(".+$", Pattern.MULTILINE);
2403 m = p.matcher(toSupplementaries("blah1\u0085"));
2404 if (!m.find())
2405 failCount++;
2406
2407 report("Anchors");
2408 }
2409
2410 /**
2411 * A basic sanity test of Matcher.lookingAt().
2412 */
2413 private static void lookingAtTest() throws Exception {
2414 Pattern p = Pattern.compile("(ab)(c*)");
2415 Matcher m = p.matcher("abccczzzabcczzzabccc");
2416
2417 if (!m.lookingAt())
2418 failCount++;
2419
2420 if (!m.group().equals(m.group(0)))
2421 failCount++;
2422
2423 m = p.matcher("zzzabccczzzabcczzzabccczzz");
2424 if (m.lookingAt())
2425 failCount++;
2426
2427 // Supplementary character test
2428 p = Pattern.compile(toSupplementaries("(ab)(c*)"));
2429 m = p.matcher(toSupplementaries("abccczzzabcczzzabccc"));
2430
2431 if (!m.lookingAt())
2432 failCount++;
2433
2434 if (!m.group().equals(m.group(0)))
2435 failCount++;
2436
2437 m = p.matcher(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
2438 if (m.lookingAt())
2439 failCount++;
2440
2441 report("Looking At");
2442 }
2443
2444 /**
2445 * A basic sanity test of Matcher.matches().
2446 */
2447 private static void matchesTest() throws Exception {
2448 // matches()
2449 Pattern p = Pattern.compile("ulb(c*)");
2450 Matcher m = p.matcher("ulbcccccc");
2451 if (!m.matches())
2452 failCount++;
2453
2454 // find() but not matches()
2455 m.reset("zzzulbcccccc");
2456 if (m.matches())
2457 failCount++;
2458
2459 // lookingAt() but not matches()
2460 m.reset("ulbccccccdef");
2461 if (m.matches())
2462 failCount++;
2463
2464 // matches()
2465 p = Pattern.compile("a|ad");
2466 m = p.matcher("ad");
2467 if (!m.matches())
2468 failCount++;
2469
2470 // Supplementary character test
2471 // matches()
2472 p = Pattern.compile(toSupplementaries("ulb(c*)"));
2473 m = p.matcher(toSupplementaries("ulbcccccc"));
2474 if (!m.matches())
2475 failCount++;
2476
2477 // find() but not matches()
2478 m.reset(toSupplementaries("zzzulbcccccc"));
2479 if (m.matches())
2480 failCount++;
2481
2482 // lookingAt() but not matches()
2483 m.reset(toSupplementaries("ulbccccccdef"));
2484 if (m.matches())
2485 failCount++;
2486
2487 // matches()
2488 p = Pattern.compile(toSupplementaries("a|ad"));
2489 m = p.matcher(toSupplementaries("ad"));
2490 if (!m.matches())
2491 failCount++;
2492
2493 report("Matches");
2494 }
2495
2496 /**
2497 * A basic sanity test of Pattern.matches().
2498 */
2499 private static void patternMatchesTest() throws Exception {
2500 // matches()
2501 if (!Pattern.matches(toSupplementaries("ulb(c*)"),
2502 toSupplementaries("ulbcccccc")))
2503 failCount++;
2504
2505 // find() but not matches()
2506 if (Pattern.matches(toSupplementaries("ulb(c*)"),
2507 toSupplementaries("zzzulbcccccc")))
2508 failCount++;
2509
2510 // lookingAt() but not matches()
2511 if (Pattern.matches(toSupplementaries("ulb(c*)"),
2512 toSupplementaries("ulbccccccdef")))
2513 failCount++;
2514
2515 // Supplementary character test
2516 // matches()
2517 if (!Pattern.matches(toSupplementaries("ulb(c*)"),
2518 toSupplementaries("ulbcccccc")))
2519 failCount++;
2520
2521 // find() but not matches()
2522 if (Pattern.matches(toSupplementaries("ulb(c*)"),
2523 toSupplementaries("zzzulbcccccc")))
2524 failCount++;
2525
2526 // lookingAt() but not matches()
2527 if (Pattern.matches(toSupplementaries("ulb(c*)"),
2528 toSupplementaries("ulbccccccdef")))
2529 failCount++;
2530
2531 report("Pattern Matches");
2532 }
2533
2534 /**
2535 * Canonical equivalence testing. Tests the ability of the engine
2536 * to match sequences that are not explicitly specified in the
2537 * pattern when they are considered equivalent by the Unicode Standard.
2538 */
2539 private static void ceTest() throws Exception {
2540 // Decomposed char outside char classes
2541 Pattern p = Pattern.compile("testa\u030a", Pattern.CANON_EQ);
2542 Matcher m = p.matcher("test\u00e5");
2543 if (!m.matches())
2544 failCount++;
2545
2546 m.reset("testa\u030a");
2547 if (!m.matches())
2548 failCount++;
2549
2550 // Composed char outside char classes
2551 p = Pattern.compile("test\u00e5", Pattern.CANON_EQ);
2552 m = p.matcher("test\u00e5");
2553 if (!m.matches())
2554 failCount++;
2555
2556 m.reset("testa\u030a");
2557 if (!m.find())
2558 failCount++;
2559
2560 // Decomposed char inside a char class
2561 p = Pattern.compile("test[abca\u030a]", Pattern.CANON_EQ);
2562 m = p.matcher("test\u00e5");
2563 if (!m.find())
2564 failCount++;
2565
2566 m.reset("testa\u030a");
2567 if (!m.find())
2568 failCount++;
2569
2570 // Composed char inside a char class
2571 p = Pattern.compile("test[abc\u00e5def\u00e0]", Pattern.CANON_EQ);
2572 m = p.matcher("test\u00e5");
2573 if (!m.find())
2574 failCount++;
2575
2576 m.reset("testa\u0300");
2577 if (!m.find())
2578 failCount++;
2579
2580 m.reset("testa\u030a");
2581 if (!m.find())
2582 failCount++;
2583
2584 // Marks that cannot legally change order and be equivalent
2585 p = Pattern.compile("testa\u0308\u0300", Pattern.CANON_EQ);
2586 check(p, "testa\u0308\u0300", true);
2587 check(p, "testa\u0300\u0308", false);
2588
2589 // Marks that can legally change order and be equivalent
2590 p = Pattern.compile("testa\u0308\u0323", Pattern.CANON_EQ);
2591 check(p, "testa\u0308\u0323", true);
2592 check(p, "testa\u0323\u0308", true);
2593
2594 // Test all equivalences of the sequence a\u0308\u0323\u0300
2595 p = Pattern.compile("testa\u0308\u0323\u0300", Pattern.CANON_EQ);
2596 check(p, "testa\u0308\u0323\u0300", true);
2597 check(p, "testa\u0323\u0308\u0300", true);
2598 check(p, "testa\u0308\u0300\u0323", true);
2599 check(p, "test\u00e4\u0323\u0300", true);
2600 check(p, "test\u00e4\u0300\u0323", true);
2601
2602 /*
2603 * The following canonical equivalence tests don't work. Bug id: 4916384.
2604 *
2605 // Decomposed hangul (jamos)
2606 p = Pattern.compile("\u1100\u1161", Pattern.CANON_EQ);
2607 m = p.matcher("\u1100\u1161");
2608 if (!m.matches())
2609 failCount++;
2610
2611 m.reset("\uac00");
2612 if (!m.matches())
2613 failCount++;
2614
2615 // Composed hangul
2616 p = Pattern.compile("\uac00", Pattern.CANON_EQ);
2617 m = p.matcher("\u1100\u1161");
2618 if (!m.matches())
2619 failCount++;
2620
2621 m.reset("\uac00");
2622 if (!m.matches())
2623 failCount++;
2624
2625 // Decomposed supplementary outside char classes
2626 p = Pattern.compile("test\ud834\uddbc\ud834\udd6f", Pattern.CANON_EQ);
2627 m = p.matcher("test\ud834\uddc0");
2628 if (!m.matches())
2629 failCount++;
2630
2631 m.reset("test\ud834\uddbc\ud834\udd6f");
2632 if (!m.matches())
2633 failCount++;
2634
2635 // Composed supplementary outside char classes
2636 p = Pattern.compile("test\ud834\uddc0", Pattern.CANON_EQ);
2637 m.reset("test\ud834\uddbc\ud834\udd6f");
2638 if (!m.matches())
2639 failCount++;
2640
2641 m = p.matcher("test\ud834\uddc0");
2642 if (!m.matches())
2643 failCount++;
2644
2645 */
2646
2647 report("Canonical Equivalence");
2648 }
2649
2650 /**
2651 * A basic sanity test of Matcher.replaceAll().
2652 */
2653 private static void globalSubstitute() throws Exception {
2654 // Global substitution with a literal
2655 Pattern p = Pattern.compile("(ab)(c*)");
2656 Matcher m = p.matcher("abccczzzabcczzzabccc");
2657 if (!m.replaceAll("test").equals("testzzztestzzztest"))
2658 failCount++;
2659
2660 m.reset("zzzabccczzzabcczzzabccczzz");
2661 if (!m.replaceAll("test").equals("zzztestzzztestzzztestzzz"))
2662 failCount++;
2663
2664 // Global substitution with groups
2665 m.reset("zzzabccczzzabcczzzabccczzz");
2666 String result = m.replaceAll("$1");
2667 if (!result.equals("zzzabzzzabzzzabzzz"))
2668 failCount++;
2669
2670 // Supplementary character test
2671 // Global substitution with a literal
2672 p = Pattern.compile(toSupplementaries("(ab)(c*)"));
2673 m = p.matcher(toSupplementaries("abccczzzabcczzzabccc"));
2674 if (!m.replaceAll(toSupplementaries("test")).
2675 equals(toSupplementaries("testzzztestzzztest")))
2676 failCount++;
2677
2678 m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
2679 if (!m.replaceAll(toSupplementaries("test")).
2680 equals(toSupplementaries("zzztestzzztestzzztestzzz")))
2681 failCount++;
2682
2683 // Global substitution with groups
2684 m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
2685 result = m.replaceAll("$1");
2686 if (!result.equals(toSupplementaries("zzzabzzzabzzzabzzz")))
2687 failCount++;
2688
2689 report("Global Substitution");
2690 }
2691
2692 /**
2693 * Tests the usage of Matcher.appendReplacement() with literal
2694 * and group substitutions.
2695 */
2696 private static void stringbufferSubstitute() throws Exception {
2697 // SB substitution with literal
2698 String blah = "zzzblahzzz";
2699 Pattern p = Pattern.compile("blah");
2700 Matcher m = p.matcher(blah);
2701 StringBuffer result = new StringBuffer();
2702 try {
2703 m.appendReplacement(result, "blech");
2704 failCount++;
2705 } catch (IllegalStateException e) {
2706 }
2707 m.find();
2708 m.appendReplacement(result, "blech");
2709 if (!result.toString().equals("zzzblech"))
2710 failCount++;
2711
2712 m.appendTail(result);
2713 if (!result.toString().equals("zzzblechzzz"))
2714 failCount++;
2715
2716 // SB substitution with groups
2717 blah = "zzzabcdzzz";
2718 p = Pattern.compile("(ab)(cd)*");
2719 m = p.matcher(blah);
2720 result = new StringBuffer();
2721 try {
2722 m.appendReplacement(result, "$1");
2723 failCount++;
2724 } catch (IllegalStateException e) {
2725 }
2726 m.find();
2727 m.appendReplacement(result, "$1");
2728 if (!result.toString().equals("zzzab"))
2729 failCount++;
2730
2731 m.appendTail(result);
2732 if (!result.toString().equals("zzzabzzz"))
2733 failCount++;
2734
2735 // SB substitution with 3 groups
2736 blah = "zzzabcdcdefzzz";
2737 p = Pattern.compile("(ab)(cd)*(ef)");
2738 m = p.matcher(blah);
2739 result = new StringBuffer();
2740 try {
2741 m.appendReplacement(result, "$1w$2w$3");
2742 failCount++;
2743 } catch (IllegalStateException e) {
2744 }
2745 m.find();
2746 m.appendReplacement(result, "$1w$2w$3");
2747 if (!result.toString().equals("zzzabwcdwef"))
2748 failCount++;
2749
2750 m.appendTail(result);
2751 if (!result.toString().equals("zzzabwcdwefzzz"))
2752 failCount++;
2753
2754 // SB substitution with groups and three matches
2755 // skipping middle match
2756 blah = "zzzabcdzzzabcddzzzabcdzzz";
2757 p = Pattern.compile("(ab)(cd*)");
2758 m = p.matcher(blah);
2759 result = new StringBuffer();
2760 try {
2761 m.appendReplacement(result, "$1");
2762 failCount++;
2763 } catch (IllegalStateException e) {
2764 }
2765 m.find();
2766 m.appendReplacement(result, "$1");
2767 if (!result.toString().equals("zzzab"))
2768 failCount++;
2769
2770 m.find();
2771 m.find();
2772 m.appendReplacement(result, "$2");
2773 if (!result.toString().equals("zzzabzzzabcddzzzcd"))
2774 failCount++;
2775
2776 m.appendTail(result);
2777 if (!result.toString().equals("zzzabzzzabcddzzzcdzzz"))
2778 failCount++;
2779
2780 // Check to make sure escaped $ is ignored
2781 blah = "zzzabcdcdefzzz";
2782 p = Pattern.compile("(ab)(cd)*(ef)");
2783 m = p.matcher(blah);
2784 result = new StringBuffer();
2785 m.find();
2786 m.appendReplacement(result, "$1w\\$2w$3");
2787 if (!result.toString().equals("zzzabw$2wef"))
2788 failCount++;
2789
2790 m.appendTail(result);
2791 if (!result.toString().equals("zzzabw$2wefzzz"))
2792 failCount++;
2793
2794 // Check to make sure a reference to nonexistent group causes error
2795 blah = "zzzabcdcdefzzz";
2796 p = Pattern.compile("(ab)(cd)*(ef)");
2797 m = p.matcher(blah);
2798 result = new StringBuffer();
2799 m.find();
2800 try {
2801 m.appendReplacement(result, "$1w$5w$3");
2802 failCount++;
2803 } catch (IndexOutOfBoundsException ioobe) {
2804 // Correct result
2805 }
2806
2807 // Check double digit group references
2808 blah = "zzz123456789101112zzz";
2809 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)");
2810 m = p.matcher(blah);
2811 result = new StringBuffer();
2812 m.find();
2813 m.appendReplacement(result, "$1w$11w$3");
2814 if (!result.toString().equals("zzz1w11w3"))
2815 failCount++;
2816
2817 // Check to make sure it backs off $15 to $1 if only three groups
2818 blah = "zzzabcdcdefzzz";
2819 p = Pattern.compile("(ab)(cd)*(ef)");
2820 m = p.matcher(blah);
2821 result = new StringBuffer();
2822 m.find();
2823 m.appendReplacement(result, "$1w$15w$3");
2824 if (!result.toString().equals("zzzabwab5wef"))
2825 failCount++;
2826
2827
2828 // Supplementary character test
2829 // SB substitution with literal
2830 blah = toSupplementaries("zzzblahzzz");
2831 p = Pattern.compile(toSupplementaries("blah"));
2832 m = p.matcher(blah);
2833 result = new StringBuffer();
2834 try {
2835 m.appendReplacement(result, toSupplementaries("blech"));
2836 failCount++;
2837 } catch (IllegalStateException e) {
2838 }
2839 m.find();
2840 m.appendReplacement(result, toSupplementaries("blech"));
2841 if (!result.toString().equals(toSupplementaries("zzzblech")))
2842 failCount++;
2843
2844 m.appendTail(result);
2845 if (!result.toString().equals(toSupplementaries("zzzblechzzz")))
2846 failCount++;
2847
2848 // SB substitution with groups
2849 blah = toSupplementaries("zzzabcdzzz");
2850 p = Pattern.compile(toSupplementaries("(ab)(cd)*"));
2851 m = p.matcher(blah);
2852 result = new StringBuffer();
2853 try {
2854 m.appendReplacement(result, "$1");
2855 failCount++;
2856 } catch (IllegalStateException e) {
2857 }
2858 m.find();
2859 m.appendReplacement(result, "$1");
2860 if (!result.toString().equals(toSupplementaries("zzzab")))
2861 failCount++;
2862
2863 m.appendTail(result);
2864 if (!result.toString().equals(toSupplementaries("zzzabzzz")))
2865 failCount++;
2866
2867 // SB substitution with 3 groups
2868 blah = toSupplementaries("zzzabcdcdefzzz");
2869 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
2870 m = p.matcher(blah);
2871 result = new StringBuffer();
2872 try {
2873 m.appendReplacement(result, toSupplementaries("$1w$2w$3"));
2874 failCount++;
2875 } catch (IllegalStateException e) {
2876 }
2877 m.find();
2878 m.appendReplacement(result, toSupplementaries("$1w$2w$3"));
2879 if (!result.toString().equals(toSupplementaries("zzzabwcdwef")))
2880 failCount++;
2881
2882 m.appendTail(result);
2883 if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz")))
2884 failCount++;
2885
2886 // SB substitution with groups and three matches
2887 // skipping middle match
2888 blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz");
2889 p = Pattern.compile(toSupplementaries("(ab)(cd*)"));
2890 m = p.matcher(blah);
2891 result = new StringBuffer();
2892 try {
2893 m.appendReplacement(result, "$1");
2894 failCount++;
2895 } catch (IllegalStateException e) {
2896 }
2897 m.find();
2898 m.appendReplacement(result, "$1");
2899 if (!result.toString().equals(toSupplementaries("zzzab")))
2900 failCount++;
2901
2902 m.find();
2903 m.find();
2904 m.appendReplacement(result, "$2");
2905 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd")))
2906 failCount++;
2907
2908 m.appendTail(result);
2909 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz")))
2910 failCount++;
2911
2912 // Check to make sure escaped $ is ignored
2913 blah = toSupplementaries("zzzabcdcdefzzz");
2914 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
2915 m = p.matcher(blah);
2916 result = new StringBuffer();
2917 m.find();
2918 m.appendReplacement(result, toSupplementaries("$1w\\$2w$3"));
2919 if (!result.toString().equals(toSupplementaries("zzzabw$2wef")))
2920 failCount++;
2921
2922 m.appendTail(result);
2923 if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz")))
2924 failCount++;
2925
2926 // Check to make sure a reference to nonexistent group causes error
2927 blah = toSupplementaries("zzzabcdcdefzzz");
2928 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
2929 m = p.matcher(blah);
2930 result = new StringBuffer();
2931 m.find();
2932 try {
2933 m.appendReplacement(result, toSupplementaries("$1w$5w$3"));
2934 failCount++;
2935 } catch (IndexOutOfBoundsException ioobe) {
2936 // Correct result
2937 }
2938
2939 // Check double digit group references
2940 blah = toSupplementaries("zzz123456789101112zzz");
2941 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)");
2942 m = p.matcher(blah);
2943 result = new StringBuffer();
2944 m.find();
2945 m.appendReplacement(result, toSupplementaries("$1w$11w$3"));
2946 if (!result.toString().equals(toSupplementaries("zzz1w11w3")))
2947 failCount++;
2948
2949 // Check to make sure it backs off $15 to $1 if only three groups
2950 blah = toSupplementaries("zzzabcdcdefzzz");
2951 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
2952 m = p.matcher(blah);
2953 result = new StringBuffer();
2954 m.find();
2955 m.appendReplacement(result, toSupplementaries("$1w$15w$3"));
2956 if (!result.toString().equals(toSupplementaries("zzzabwab5wef")))
2957 failCount++;
2958
2959 // Check nothing has been appended into the output buffer if
2960 // the replacement string triggers IllegalArgumentException.
2961 p = Pattern.compile("(abc)");
2962 m = p.matcher("abcd");
2963 result = new StringBuffer();
2964 m.find();
2965 try {
2966 m.appendReplacement(result, ("xyz$g"));
2967 failCount++;
2968 } catch (IllegalArgumentException iae) {
2969 if (result.length() != 0)
2970 failCount++;
2971 }
2972
2973 report("SB Substitution");
2974 }
2975
2976 /*
2977 * 5 groups of characters are created to make a substitution string.
2978 * A base string will be created including random lead chars, the
2979 * substitution string, and random trailing chars.
2980 * A pattern containing the 5 groups is searched for and replaced with:
2981 * random group + random string + random group.
2982 * The results are checked for correctness.
2983 */
2984 private static void substitutionBasher() {
2985 for (int runs = 0; runs<1000; runs++) {
2986 // Create a base string to work in
2987 int leadingChars = generator.nextInt(10);
2988 StringBuffer baseBuffer = new StringBuffer(100);
2989 String leadingString = getRandomAlphaString(leadingChars);
2990 baseBuffer.append(leadingString);
2991
2992 // Create 5 groups of random number of random chars
2993 // Create the string to substitute
2994 // Create the pattern string to search for
2995 StringBuffer bufferToSub = new StringBuffer(25);
2996 StringBuffer bufferToPat = new StringBuffer(50);
2997 String[] groups = new String[5];
2998 for(int i=0; i<5; i++) {
2999 int aGroupSize = generator.nextInt(5)+1;
3000 groups[i] = getRandomAlphaString(aGroupSize);
3001 bufferToSub.append(groups[i]);
3002 bufferToPat.append('(');
3003 bufferToPat.append(groups[i]);
3004 bufferToPat.append(')');
3005 }
3006 String stringToSub = bufferToSub.toString();
3007 String pattern = bufferToPat.toString();
3008
3009 // Place sub string into working string at random index
3010 baseBuffer.append(stringToSub);
3011
3012 // Append random chars to end
3013 int trailingChars = generator.nextInt(10);
3014 String trailingString = getRandomAlphaString(trailingChars);
3015 baseBuffer.append(trailingString);
3016 String baseString = baseBuffer.toString();
3017
3018 // Create test pattern and matcher
3019 Pattern p = Pattern.compile(pattern);
3020 Matcher m = p.matcher(baseString);
3021
3022 // Reject candidate if pattern happens to start early
3023 m.find();
3024 if (m.start() < leadingChars)
3025 continue;
3026
3027 // Reject candidate if more than one match
3028 if (m.find())
3029 continue;
3030
3031 // Construct a replacement string with :
3032 // random group + random string + random group
3033 StringBuffer bufferToRep = new StringBuffer();
3034 int groupIndex1 = generator.nextInt(5);
3035 bufferToRep.append("$" + (groupIndex1 + 1));
3036 String randomMidString = getRandomAlphaString(5);
3037 bufferToRep.append(randomMidString);
3038 int groupIndex2 = generator.nextInt(5);
3039 bufferToRep.append("$" + (groupIndex2 + 1));
3040 String replacement = bufferToRep.toString();
3041
3042 // Do the replacement
3043 String result = m.replaceAll(replacement);
3044
3045 // Construct expected result
3046 StringBuffer bufferToRes = new StringBuffer();
3047 bufferToRes.append(leadingString);
3048 bufferToRes.append(groups[groupIndex1]);
3049 bufferToRes.append(randomMidString);
3050 bufferToRes.append(groups[groupIndex2]);
3051 bufferToRes.append(trailingString);
3052 String expectedResult = bufferToRes.toString();
3053
3054 // Check results
3055 if (!result.equals(expectedResult))
3056 failCount++;
3057 }
3058
3059 report("Substitution Basher");
3060 }
3061
3062 /**
3063 * Checks the handling of some escape sequences that the Pattern
3064 * class should process instead of the java compiler. These are
3065 * not in the file because the escapes should be be processed
3066 * by the Pattern class when the regex is compiled.
3067 */
3068 private static void escapes() throws Exception {
3069 Pattern p = Pattern.compile("\\043");
3070 Matcher m = p.matcher("#");
3071 if (!m.find())
3072 failCount++;
3073
3074 p = Pattern.compile("\\x23");
3075 m = p.matcher("#");
3076 if (!m.find())
3077 failCount++;
3078
3079 p = Pattern.compile("\\u0023");
3080 m = p.matcher("#");
3081 if (!m.find())
3082 failCount++;
3083
3084 report("Escape sequences");
3085 }
3086
3087 /**
3088 * Checks the handling of blank input situations. These
3089 * tests are incompatible with my test file format.
3090 */
3091 private static void blankInput() throws Exception {
3092 Pattern p = Pattern.compile("abc", Pattern.CASE_INSENSITIVE);
3093 Matcher m = p.matcher("");
3094 if (m.find())
3095 failCount++;
3096
3097 p = Pattern.compile("a*", Pattern.CASE_INSENSITIVE);
3098 m = p.matcher("");
3099 if (!m.find())
3100 failCount++;
3101
3102 p = Pattern.compile("abc");
3103 m = p.matcher("");
3104 if (m.find())
3105 failCount++;
3106
3107 p = Pattern.compile("a*");
3108 m = p.matcher("");
3109 if (!m.find())
3110 failCount++;
3111
3112 report("Blank input");
3113 }
3114
3115 /**
3116 * Tests the Boyer-Moore pattern matching of a character sequence
3117 * on randomly generated patterns.
3118 */
3119 private static void bm() throws Exception {
3120 doBnM('a');
3121 report("Boyer Moore (ASCII)");
3122
3123 doBnM(Character.MIN_SUPPLEMENTARY_CODE_POINT - 10);
3124 report("Boyer Moore (Supplementary)");
3125 }
3126
3127 private static void doBnM(int baseCharacter) throws Exception {
3128 int achar=0;
3129
3130 for (int i=0; i<100; i++) {
3131 // Create a short pattern to search for
3132 int patternLength = generator.nextInt(7) + 4;
3133 StringBuffer patternBuffer = new StringBuffer(patternLength);
3134 for (int x=0; x<patternLength; x++) {
3135 int ch = baseCharacter + generator.nextInt(26);
3136 if (Character.isSupplementaryCodePoint(ch)) {
3137 patternBuffer.append(Character.toChars(ch));
3138 } else {
3139 patternBuffer.append((char)ch);
3140 }
3141 }
3142 String pattern = patternBuffer.toString();
3143 Pattern p = Pattern.compile(pattern);
3144
3145 // Create a buffer with random ASCII chars that does
3146 // not match the sample
3147 String toSearch = null;
3148 StringBuffer s = null;
3149 Matcher m = p.matcher("");
3150 do {
3151 s = new StringBuffer(100);
3152 for (int x=0; x<100; x++) {
3153 int ch = baseCharacter + generator.nextInt(26);
3154 if (Character.isSupplementaryCodePoint(ch)) {
3155 s.append(Character.toChars(ch));
3156 } else {
3157 s.append((char)ch);
3158 }
3159 }
3160 toSearch = s.toString();
3161 m.reset(toSearch);
3162 } while (m.find());
3163
3164 // Insert the pattern at a random spot
3165 int insertIndex = generator.nextInt(99);
3166 if (Character.isLowSurrogate(s.charAt(insertIndex)))
3167 insertIndex++;
3168 s = s.insert(insertIndex, pattern);
3169 toSearch = s.toString();
3170
3171 // Make sure that the pattern is found
3172 m.reset(toSearch);
3173 if (!m.find())
3174 failCount++;
3175
3176 // Make sure that the match text is the pattern
3177 if (!m.group().equals(pattern))
3178 failCount++;
3179
3180 // Make sure match occured at insertion point
3181 if (m.start() != insertIndex)
3182 failCount++;
3183 }
3184 }
3185
3186 /**
3187 * Tests the matching of slices on randomly generated patterns.
3188 * The Boyer-Moore optimization is not done on these patterns
3189 * because it uses unicode case folding.
3190 */
3191 private static void slice() throws Exception {
3192 doSlice(Character.MAX_VALUE);
3193 report("Slice");
3194
3195 doSlice(Character.MAX_CODE_POINT);
3196 report("Slice (Supplementary)");
3197 }
3198
3199 private static void doSlice(int maxCharacter) throws Exception {
3200 Random generator = new Random();
3201 int achar=0;
3202
3203 for (int i=0; i<100; i++) {
3204 // Create a short pattern to search for
3205 int patternLength = generator.nextInt(7) + 4;
3206 StringBuffer patternBuffer = new StringBuffer(patternLength);
3207 for (int x=0; x<patternLength; x++) {
3208 int randomChar = 0;
3209 while (!Character.isLetterOrDigit(randomChar))
3210 randomChar = generator.nextInt(maxCharacter);
3211 if (Character.isSupplementaryCodePoint(randomChar)) {
3212 patternBuffer.append(Character.toChars(randomChar));
3213 } else {
3214 patternBuffer.append((char) randomChar);
3215 }
3216 }
3217 String pattern = patternBuffer.toString();
3218 Pattern p = Pattern.compile(pattern, Pattern.UNICODE_CASE);
3219
3220 // Create a buffer with random chars that does not match the sample
3221 String toSearch = null;
3222 StringBuffer s = null;
3223 Matcher m = p.matcher("");
3224 do {
3225 s = new StringBuffer(100);
3226 for (int x=0; x<100; x++) {
3227 int randomChar = 0;
3228 while (!Character.isLetterOrDigit(randomChar))
3229 randomChar = generator.nextInt(maxCharacter);
3230 if (Character.isSupplementaryCodePoint(randomChar)) {
3231 s.append(Character.toChars(randomChar));
3232 } else {
3233 s.append((char) randomChar);
3234 }
3235 }
3236 toSearch = s.toString();
3237 m.reset(toSearch);
3238 } while (m.find());
3239
3240 // Insert the pattern at a random spot
3241 int insertIndex = generator.nextInt(99);
3242 if (Character.isLowSurrogate(s.charAt(insertIndex)))
3243 insertIndex++;
3244 s = s.insert(insertIndex, pattern);
3245 toSearch = s.toString();
3246
3247 // Make sure that the pattern is found
3248 m.reset(toSearch);
3249 if (!m.find())
3250 failCount++;
3251
3252 // Make sure that the match text is the pattern
3253 if (!m.group().equals(pattern))
3254 failCount++;
3255
3256 // Make sure match occured at insertion point
3257 if (m.start() != insertIndex)
3258 failCount++;
3259 }
3260 }
3261
3262 private static void explainFailure(String pattern, String data,
3263 String expected, String actual) {
3264 System.err.println("----------------------------------------");
3265 System.err.println("Pattern = "+pattern);
3266 System.err.println("Data = "+data);
3267 System.err.println("Expected = " + expected);
3268 System.err.println("Actual = " + actual);
3269 }
3270
3271 private static void explainFailure(String pattern, String data,
3272 Throwable t) {
3273 System.err.println("----------------------------------------");
3274 System.err.println("Pattern = "+pattern);
3275 System.err.println("Data = "+data);
3276 t.printStackTrace(System.err);
3277 }
3278
3279 // Testing examples from a file
3280
3281 /**
3282 * Goes through the file "TestCases.txt" and creates many patterns
3283 * described in the file, matching the patterns against input lines in
3284 * the file, and comparing the results against the correct results
3285 * also found in the file. The file format is described in comments
3286 * at the head of the file.
3287 */
3288 private static void processFile(String fileName) throws Exception {
3289 File testCases = new File(System.getProperty("test.src", "."),
3290 fileName);
3291 FileInputStream in = new FileInputStream(testCases);
3292 BufferedReader r = new BufferedReader(new InputStreamReader(in));
3293
3294 // Process next test case.
3295 String aLine;
3296 while((aLine = r.readLine()) != null) {
3297 // Read a line for pattern
3298 String patternString = grabLine(r);
3299 Pattern p = null;
3300 try {
3301 p = compileTestPattern(patternString);
3302 } catch (PatternSyntaxException e) {
3303 String dataString = grabLine(r);
3304 String expectedResult = grabLine(r);
3305 if (expectedResult.startsWith("error"))
3306 continue;
3307 explainFailure(patternString, dataString, e);
3308 failCount++;
3309 continue;
3310 }
3311
3312 // Read a line for input string
3313 String dataString = grabLine(r);
3314 Matcher m = p.matcher(dataString);
3315 StringBuffer result = new StringBuffer();
3316
3317 // Check for IllegalStateExceptions before a match
3318 failCount += preMatchInvariants(m);
3319
3320 boolean found = m.find();
3321
3322 if (found)
3323 failCount += postTrueMatchInvariants(m);
3324 else
3325 failCount += postFalseMatchInvariants(m);
3326
3327 if (found) {
3328 result.append("true ");
3329 result.append(m.group(0) + " ");
3330 } else {
3331 result.append("false ");
3332 }
3333
3334 result.append(m.groupCount());
3335
3336 if (found) {
3337 for (int i=1; i<m.groupCount()+1; i++)
3338 if (m.group(i) != null)
3339 result.append(" " +m.group(i));
3340 }
3341
3342 // Read a line for the expected result
3343 String expectedResult = grabLine(r);
3344
3345 if (!result.toString().equals(expectedResult)) {
3346 explainFailure(patternString, dataString, expectedResult, result.toString());
3347 failCount++;
3348 }
3349 }
3350
3351 report(fileName);
3352 }
3353
3354 private static int preMatchInvariants(Matcher m) {
3355 int failCount = 0;
3356 try {
3357 m.start();
3358 failCount++;
3359 } catch (IllegalStateException ise) {}
3360 try {
3361 m.end();
3362 failCount++;
3363 } catch (IllegalStateException ise) {}
3364 try {
3365 m.group();
3366 failCount++;
3367 } catch (IllegalStateException ise) {}
3368 return failCount;
3369 }
3370
3371 private static int postFalseMatchInvariants(Matcher m) {
3372 int failCount = 0;
3373 try {
3374 m.group();
3375 failCount++;
3376 } catch (IllegalStateException ise) {}
3377 try {
3378 m.start();
3379 failCount++;
3380 } catch (IllegalStateException ise) {}
3381 try {
3382 m.end();
3383 failCount++;
3384 } catch (IllegalStateException ise) {}
3385 return failCount;
3386 }
3387
3388 private static int postTrueMatchInvariants(Matcher m) {
3389 int failCount = 0;
3390 //assert(m.start() = m.start(0);
3391 if (m.start() != m.start(0))
3392 failCount++;
3393 //assert(m.end() = m.end(0);
3394 if (m.start() != m.start(0))
3395 failCount++;
3396 //assert(m.group() = m.group(0);
3397 if (!m.group().equals(m.group(0)))
3398 failCount++;
3399 try {
3400 m.group(50);
3401 failCount++;
3402 } catch (IndexOutOfBoundsException ise) {}
3403
3404 return failCount;
3405 }
3406
3407 private static Pattern compileTestPattern(String patternString) {
3408 if (!patternString.startsWith("'")) {
3409 return Pattern.compile(patternString);
3410 }
3411
3412 int break1 = patternString.lastIndexOf("'");
3413 String flagString = patternString.substring(
3414 break1+1, patternString.length());
3415 patternString = patternString.substring(1, break1);
3416
3417 if (flagString.equals("i"))
3418 return Pattern.compile(patternString, Pattern.CASE_INSENSITIVE);
3419
3420 if (flagString.equals("m"))
3421 return Pattern.compile(patternString, Pattern.MULTILINE);
3422
3423 return Pattern.compile(patternString);
3424 }
3425
3426 /**
3427 * Reads a line from the input file. Keeps reading lines until a non
3428 * empty non comment line is read. If the line contains a \n then
3429 * these two characters are replaced by a newline char. If a \\uxxxx
3430 * sequence is read then the sequence is replaced by the unicode char.
3431 */
3432 private static String grabLine(BufferedReader r) throws Exception {
3433 int index = 0;
3434 String line = r.readLine();
3435 while (line.startsWith("//") || line.length() < 1)
3436 line = r.readLine();
3437 while ((index = line.indexOf("\\n")) != -1) {
3438 StringBuffer temp = new StringBuffer(line);
3439 temp.replace(index, index+2, "\n");
3440 line = temp.toString();
3441 }
3442 while ((index = line.indexOf("\\u")) != -1) {
3443 StringBuffer temp = new StringBuffer(line);
3444 String value = temp.substring(index+2, index+6);
3445 char aChar = (char)Integer.parseInt(value, 16);
3446 String unicodeChar = "" + aChar;
3447 temp.replace(index, index+6, unicodeChar);
3448 line = temp.toString();
3449 }
3450
3451 return line;
3452 }
3453
3454 private static void check(Pattern p, String s, String g, String expected) {
3455 Matcher m = p.matcher(s);
3456 m.find();
shermana244eb52013-05-06 21:24:37 -07003457 if (!m.group(g).equals(expected) ||
3458 s.charAt(m.start(g)) != expected.charAt(0) ||
3459 s.charAt(m.end(g) - 1) != expected.charAt(expected.length() - 1))
sherman0b4d42d2009-02-23 21:06:15 -08003460 failCount++;
3461 }
3462
3463 private static void checkReplaceFirst(String p, String s, String r, String expected)
3464 {
3465 if (!expected.equals(Pattern.compile(p)
3466 .matcher(s)
3467 .replaceFirst(r)))
3468 failCount++;
3469 }
3470
3471 private static void checkReplaceAll(String p, String s, String r, String expected)
3472 {
3473 if (!expected.equals(Pattern.compile(p)
3474 .matcher(s)
3475 .replaceAll(r)))
3476 failCount++;
3477 }
3478
3479 private static void checkExpectedFail(String p) {
3480 try {
3481 Pattern.compile(p);
3482 } catch (PatternSyntaxException pse) {
3483 //pse.printStackTrace();
3484 return;
3485 }
3486 failCount++;
3487 }
3488
shermana244eb52013-05-06 21:24:37 -07003489 private static void checkExpectedIAE(Matcher m, String g) {
sherman0b4d42d2009-02-23 21:06:15 -08003490 m.find();
3491 try {
3492 m.group(g);
shermana244eb52013-05-06 21:24:37 -07003493 } catch (IllegalArgumentException x) {
sherman0b4d42d2009-02-23 21:06:15 -08003494 //iae.printStackTrace();
shermana244eb52013-05-06 21:24:37 -07003495 try {
3496 m.start(g);
3497 } catch (IllegalArgumentException xx) {
3498 try {
3499 m.start(g);
3500 } catch (IllegalArgumentException xxx) {
3501 return;
3502 }
3503 }
sherman0b4d42d2009-02-23 21:06:15 -08003504 }
3505 failCount++;
3506 }
3507
shermana244eb52013-05-06 21:24:37 -07003508 private static void checkExpectedNPE(Matcher m) {
3509 m.find();
3510 try {
3511 m.group(null);
3512 } catch (NullPointerException x) {
3513 try {
3514 m.start(null);
3515 } catch (NullPointerException xx) {
3516 try {
3517 m.end(null);
3518 } catch (NullPointerException xxx) {
3519 return;
3520 }
3521 }
3522 }
3523 failCount++;
3524 }
sherman0b4d42d2009-02-23 21:06:15 -08003525
3526 private static void namedGroupCaptureTest() throws Exception {
3527 check(Pattern.compile("x+(?<gname>y+)z+"),
3528 "xxxyyyzzz",
3529 "gname",
3530 "yyy");
3531
shermand9337e02009-10-21 11:40:40 -07003532 check(Pattern.compile("x+(?<gname8>y+)z+"),
shermanffaf4922009-03-20 16:22:59 -07003533 "xxxyyyzzz",
shermand9337e02009-10-21 11:40:40 -07003534 "gname8",
shermanffaf4922009-03-20 16:22:59 -07003535 "yyy");
3536
sherman0b4d42d2009-02-23 21:06:15 -08003537 //backref
3538 Pattern pattern = Pattern.compile("(a*)bc\\1");
3539 check(pattern, "zzzaabcazzz", true); // found "abca"
3540
3541 check(Pattern.compile("(?<gname>a*)bc\\k<gname>"),
3542 "zzzaabcaazzz", true);
3543
3544 check(Pattern.compile("(?<gname>abc)(def)\\k<gname>"),
3545 "abcdefabc", true);
3546
3547 check(Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(?<gname>k)\\k<gname>"),
3548 "abcdefghijkk", true);
3549
3550 // Supplementary character tests
3551 check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"),
3552 toSupplementaries("zzzaabcazzz"), true);
3553
3554 check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"),
3555 toSupplementaries("zzzaabcaazzz"), true);
3556
3557 check(Pattern.compile("(?<gname>" + toSupplementaries("abc)(def)") + "\\k<gname>"),
3558 toSupplementaries("abcdefabc"), true);
3559
3560 check(Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)") +
3561 "(?<gname>" +
3562 toSupplementaries("k)") + "\\k<gname>"),
3563 toSupplementaries("abcdefghijkk"), true);
3564
3565 check(Pattern.compile("x+(?<gname>y+)z+\\k<gname>"),
3566 "xxxyyyzzzyyy",
3567 "gname",
3568 "yyy");
3569
3570 //replaceFirst/All
3571 checkReplaceFirst("(?<gn>ab)(c*)",
3572 "abccczzzabcczzzabccc",
shermand9337e02009-10-21 11:40:40 -07003573 "${gn}",
sherman0b4d42d2009-02-23 21:06:15 -08003574 "abzzzabcczzzabccc");
3575
3576 checkReplaceAll("(?<gn>ab)(c*)",
3577 "abccczzzabcczzzabccc",
shermand9337e02009-10-21 11:40:40 -07003578 "${gn}",
sherman0b4d42d2009-02-23 21:06:15 -08003579 "abzzzabzzzab");
3580
3581
3582 checkReplaceFirst("(?<gn>ab)(c*)",
3583 "zzzabccczzzabcczzzabccczzz",
shermand9337e02009-10-21 11:40:40 -07003584 "${gn}",
sherman0b4d42d2009-02-23 21:06:15 -08003585 "zzzabzzzabcczzzabccczzz");
3586
3587 checkReplaceAll("(?<gn>ab)(c*)",
3588 "zzzabccczzzabcczzzabccczzz",
shermand9337e02009-10-21 11:40:40 -07003589 "${gn}",
sherman0b4d42d2009-02-23 21:06:15 -08003590 "zzzabzzzabzzzabzzz");
3591
3592 checkReplaceFirst("(?<gn1>ab)(?<gn2>c*)",
3593 "zzzabccczzzabcczzzabccczzz",
shermand9337e02009-10-21 11:40:40 -07003594 "${gn2}",
sherman0b4d42d2009-02-23 21:06:15 -08003595 "zzzccczzzabcczzzabccczzz");
3596
3597 checkReplaceAll("(?<gn1>ab)(?<gn2>c*)",
3598 "zzzabccczzzabcczzzabccczzz",
shermand9337e02009-10-21 11:40:40 -07003599 "${gn2}",
sherman0b4d42d2009-02-23 21:06:15 -08003600 "zzzccczzzcczzzccczzz");
3601
3602 //toSupplementaries("(ab)(c*)"));
3603 checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") +
3604 ")(?<gn2>" + toSupplementaries("c") + "*)",
3605 toSupplementaries("abccczzzabcczzzabccc"),
shermand9337e02009-10-21 11:40:40 -07003606 "${gn1}",
sherman0b4d42d2009-02-23 21:06:15 -08003607 toSupplementaries("abzzzabcczzzabccc"));
3608
3609
3610 checkReplaceAll("(?<gn1>" + toSupplementaries("ab") +
3611 ")(?<gn2>" + toSupplementaries("c") + "*)",
3612 toSupplementaries("abccczzzabcczzzabccc"),
shermand9337e02009-10-21 11:40:40 -07003613 "${gn1}",
sherman0b4d42d2009-02-23 21:06:15 -08003614 toSupplementaries("abzzzabzzzab"));
3615
3616 checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") +
3617 ")(?<gn2>" + toSupplementaries("c") + "*)",
3618 toSupplementaries("abccczzzabcczzzabccc"),
shermand9337e02009-10-21 11:40:40 -07003619 "${gn2}",
sherman0b4d42d2009-02-23 21:06:15 -08003620 toSupplementaries("ccczzzabcczzzabccc"));
3621
3622
3623 checkReplaceAll("(?<gn1>" + toSupplementaries("ab") +
3624 ")(?<gn2>" + toSupplementaries("c") + "*)",
3625 toSupplementaries("abccczzzabcczzzabccc"),
shermand9337e02009-10-21 11:40:40 -07003626 "${gn2}",
sherman0b4d42d2009-02-23 21:06:15 -08003627 toSupplementaries("ccczzzcczzzccc"));
3628
3629 checkReplaceFirst("(?<dog>Dog)AndCat",
3630 "zzzDogAndCatzzzDogAndCatzzz",
shermand9337e02009-10-21 11:40:40 -07003631 "${dog}",
sherman0b4d42d2009-02-23 21:06:15 -08003632 "zzzDogzzzDogAndCatzzz");
3633
3634
3635 checkReplaceAll("(?<dog>Dog)AndCat",
3636 "zzzDogAndCatzzzDogAndCatzzz",
shermand9337e02009-10-21 11:40:40 -07003637 "${dog}",
sherman0b4d42d2009-02-23 21:06:15 -08003638 "zzzDogzzzDogzzz");
3639
3640 // backref in Matcher & String
shermand9337e02009-10-21 11:40:40 -07003641 if (!"abcdefghij".replaceFirst("cd(?<gn>ef)gh", "${gn}").equals("abefij") ||
3642 !"abbbcbdbefgh".replaceAll("(?<gn>[a-e])b", "${gn}").equals("abcdefgh"))
sherman0b4d42d2009-02-23 21:06:15 -08003643 failCount++;
3644
3645 // negative
3646 checkExpectedFail("(?<groupnamehasnoascii.in>abc)(def)");
3647 checkExpectedFail("(?<groupnamehasnoascii_in>abc)(def)");
shermand9337e02009-10-21 11:40:40 -07003648 checkExpectedFail("(?<6groupnamestartswithdigit>abc)(def)");
sherman0b4d42d2009-02-23 21:06:15 -08003649 checkExpectedFail("(?<gname>abc)(def)\\k<gnameX>");
3650 checkExpectedFail("(?<gname>abc)(?<gname>def)\\k<gnameX>");
shermana244eb52013-05-06 21:24:37 -07003651 checkExpectedIAE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"),
3652 "gnameX");
3653 checkExpectedNPE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"));
sherman0b4d42d2009-02-23 21:06:15 -08003654 report("NamedGroupCapture");
3655 }
sherman6782c962010-02-05 00:10:42 -08003656
shermancc01ef52010-05-18 15:36:47 -07003657 // This is for bug 6969132
sherman6782c962010-02-05 00:10:42 -08003658 private static void nonBmpClassComplementTest() throws Exception {
3659 Pattern p = Pattern.compile("\\P{Lu}");
3660 Matcher m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
3661 if (m.find() && m.start() == 1)
3662 failCount++;
3663
3664 // from a unicode category
3665 p = Pattern.compile("\\P{Lu}");
3666 m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
3667 if (m.find())
3668 failCount++;
3669 if (!m.hitEnd())
3670 failCount++;
3671
3672 // block
3673 p = Pattern.compile("\\P{InMathematicalAlphanumericSymbols}");
3674 m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
3675 if (m.find() && m.start() == 1)
3676 failCount++;
3677
3678 report("NonBmpClassComplement");
3679 }
3680
shermancc01ef52010-05-18 15:36:47 -07003681 private static void unicodePropertiesTest() throws Exception {
3682 // different forms
3683 if (!Pattern.compile("\\p{IsLu}").matcher("A").matches() ||
3684 !Pattern.compile("\\p{Lu}").matcher("A").matches() ||
3685 !Pattern.compile("\\p{gc=Lu}").matcher("A").matches() ||
3686 !Pattern.compile("\\p{general_category=Lu}").matcher("A").matches() ||
3687 !Pattern.compile("\\p{IsLatin}").matcher("B").matches() ||
3688 !Pattern.compile("\\p{sc=Latin}").matcher("B").matches() ||
3689 !Pattern.compile("\\p{script=Latin}").matcher("B").matches() ||
3690 !Pattern.compile("\\p{InBasicLatin}").matcher("c").matches() ||
3691 !Pattern.compile("\\p{blk=BasicLatin}").matcher("c").matches() ||
3692 !Pattern.compile("\\p{block=BasicLatin}").matcher("c").matches())
3693 failCount++;
3694
3695 Matcher common = Pattern.compile("\\p{script=Common}").matcher("");
3696 Matcher unknown = Pattern.compile("\\p{IsUnknown}").matcher("");
3697 Matcher lastSM = common;
3698 Character.UnicodeScript lastScript = Character.UnicodeScript.of(0);
3699
3700 Matcher latin = Pattern.compile("\\p{block=basic_latin}").matcher("");
3701 Matcher greek = Pattern.compile("\\p{InGreek}").matcher("");
3702 Matcher lastBM = latin;
3703 Character.UnicodeBlock lastBlock = Character.UnicodeBlock.of(0);
3704
3705 for (int cp = 1; cp < Character.MAX_CODE_POINT; cp++) {
3706 if (cp >= 0x30000 && (cp & 0x70) == 0){
3707 continue; // only pick couple code points, they are the same
3708 }
3709
3710 // Unicode Script
3711 Character.UnicodeScript script = Character.UnicodeScript.of(cp);
3712 Matcher m;
3713 String str = new String(Character.toChars(cp));
3714 if (script == lastScript) {
3715 m = lastSM;
3716 m.reset(str);
3717 } else {
3718 m = Pattern.compile("\\p{Is" + script.name() + "}").matcher(str);
3719 }
3720 if (!m.matches()) {
3721 failCount++;
3722 }
3723 Matcher other = (script == Character.UnicodeScript.COMMON)? unknown : common;
3724 other.reset(str);
3725 if (other.matches()) {
3726 failCount++;
3727 }
3728 lastSM = m;
3729 lastScript = script;
3730
3731 // Unicode Block
3732 Character.UnicodeBlock block = Character.UnicodeBlock.of(cp);
3733 if (block == null) {
3734 //System.out.printf("Not a Block: cp=%x%n", cp);
3735 continue;
3736 }
3737 if (block == lastBlock) {
3738 m = lastBM;
3739 m.reset(str);
3740 } else {
3741 m = Pattern.compile("\\p{block=" + block.toString() + "}").matcher(str);
3742 }
3743 if (!m.matches()) {
3744 failCount++;
3745 }
3746 other = (block == Character.UnicodeBlock.BASIC_LATIN)? greek : latin;
3747 other.reset(str);
3748 if (other.matches()) {
3749 failCount++;
3750 }
3751 lastBM = m;
3752 lastBlock = block;
3753 }
3754 report("unicodeProperties");
3755 }
shermanf03c78b2011-02-03 13:49:25 -08003756
3757 private static void unicodeHexNotationTest() throws Exception {
3758
3759 // negative
3760 checkExpectedFail("\\x{-23}");
3761 checkExpectedFail("\\x{110000}");
3762 checkExpectedFail("\\x{}");
3763 checkExpectedFail("\\x{AB[ef]");
3764
3765 // codepoint
3766 check("^\\x{1033c}$", "\uD800\uDF3C", true);
3767 check("^\\xF0\\x90\\x8C\\xBC$", "\uD800\uDF3C", false);
3768 check("^\\x{D800}\\x{DF3c}+$", "\uD800\uDF3C", false);
3769 check("^\\xF0\\x90\\x8C\\xBC$", "\uD800\uDF3C", false);
3770
3771 // in class
3772 check("^[\\x{D800}\\x{DF3c}]+$", "\uD800\uDF3C", false);
3773 check("^[\\xF0\\x90\\x8C\\xBC]+$", "\uD800\uDF3C", false);
3774 check("^[\\x{D800}\\x{DF3C}]+$", "\uD800\uDF3C", false);
3775 check("^[\\x{DF3C}\\x{D800}]+$", "\uD800\uDF3C", false);
3776 check("^[\\x{D800}\\x{DF3C}]+$", "\uDF3C\uD800", true);
3777 check("^[\\x{DF3C}\\x{D800}]+$", "\uDF3C\uD800", true);
3778
3779 for (int cp = 0; cp <= 0x10FFFF; cp++) {
3780 String s = "A" + new String(Character.toChars(cp)) + "B";
3781 String hexUTF16 = (cp <= 0xFFFF)? String.format("\\u%04x", cp)
3782 : String.format("\\u%04x\\u%04x",
3783 (int) Character.toChars(cp)[0],
3784 (int) Character.toChars(cp)[1]);
3785 String hexCodePoint = "\\x{" + Integer.toHexString(cp) + "}";
3786 if (!Pattern.matches("A" + hexUTF16 + "B", s))
3787 failCount++;
3788 if (!Pattern.matches("A[" + hexUTF16 + "]B", s))
3789 failCount++;
3790 if (!Pattern.matches("A" + hexCodePoint + "B", s))
3791 failCount++;
3792 if (!Pattern.matches("A[" + hexCodePoint + "]B", s))
3793 failCount++;
3794 }
3795 report("unicodeHexNotation");
sherman85bbd8b2011-04-28 20:48:36 -07003796 }
3797
3798 private static void unicodeClassesTest() throws Exception {
3799
3800 Matcher lower = Pattern.compile("\\p{Lower}").matcher("");
3801 Matcher upper = Pattern.compile("\\p{Upper}").matcher("");
3802 Matcher ASCII = Pattern.compile("\\p{ASCII}").matcher("");
3803 Matcher alpha = Pattern.compile("\\p{Alpha}").matcher("");
3804 Matcher digit = Pattern.compile("\\p{Digit}").matcher("");
3805 Matcher alnum = Pattern.compile("\\p{Alnum}").matcher("");
3806 Matcher punct = Pattern.compile("\\p{Punct}").matcher("");
3807 Matcher graph = Pattern.compile("\\p{Graph}").matcher("");
3808 Matcher print = Pattern.compile("\\p{Print}").matcher("");
3809 Matcher blank = Pattern.compile("\\p{Blank}").matcher("");
3810 Matcher cntrl = Pattern.compile("\\p{Cntrl}").matcher("");
3811 Matcher xdigit = Pattern.compile("\\p{XDigit}").matcher("");
3812 Matcher space = Pattern.compile("\\p{Space}").matcher("");
3813 Matcher bound = Pattern.compile("\\b").matcher("");
3814 Matcher word = Pattern.compile("\\w++").matcher("");
3815 // UNICODE_CHARACTER_CLASS
3816 Matcher lowerU = Pattern.compile("\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3817 Matcher upperU = Pattern.compile("\\p{Upper}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3818 Matcher ASCIIU = Pattern.compile("\\p{ASCII}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3819 Matcher alphaU = Pattern.compile("\\p{Alpha}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3820 Matcher digitU = Pattern.compile("\\p{Digit}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3821 Matcher alnumU = Pattern.compile("\\p{Alnum}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3822 Matcher punctU = Pattern.compile("\\p{Punct}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3823 Matcher graphU = Pattern.compile("\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3824 Matcher printU = Pattern.compile("\\p{Print}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3825 Matcher blankU = Pattern.compile("\\p{Blank}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3826 Matcher cntrlU = Pattern.compile("\\p{Cntrl}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3827 Matcher xdigitU = Pattern.compile("\\p{XDigit}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3828 Matcher spaceU = Pattern.compile("\\p{Space}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3829 Matcher boundU = Pattern.compile("\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3830 Matcher wordU = Pattern.compile("\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3831 // embedded flag (?U)
3832 Matcher lowerEU = Pattern.compile("(?U)\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3833 Matcher graphEU = Pattern.compile("(?U)\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3834 Matcher wordEU = Pattern.compile("(?U)\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3835
3836 Matcher bwb = Pattern.compile("\\b\\w\\b").matcher("");
3837 Matcher bwbU = Pattern.compile("\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3838 Matcher bwbEU = Pattern.compile("(?U)\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3839 // properties
3840 Matcher lowerP = Pattern.compile("\\p{IsLowerCase}").matcher("");
3841 Matcher upperP = Pattern.compile("\\p{IsUpperCase}").matcher("");
3842 Matcher titleP = Pattern.compile("\\p{IsTitleCase}").matcher("");
3843 Matcher letterP = Pattern.compile("\\p{IsLetter}").matcher("");
3844 Matcher alphaP = Pattern.compile("\\p{IsAlphabetic}").matcher("");
3845 Matcher ideogP = Pattern.compile("\\p{IsIdeographic}").matcher("");
3846 Matcher cntrlP = Pattern.compile("\\p{IsControl}").matcher("");
3847 Matcher spaceP = Pattern.compile("\\p{IsWhiteSpace}").matcher("");
3848 Matcher definedP = Pattern.compile("\\p{IsAssigned}").matcher("");
3849 Matcher nonCCPP = Pattern.compile("\\p{IsNoncharacterCodePoint}").matcher("");
shermana244eb52013-05-06 21:24:37 -07003850 Matcher joinCrtl = Pattern.compile("\\p{IsJoinControl}").matcher("");
sherman85bbd8b2011-04-28 20:48:36 -07003851
3852 // javaMethod
3853 Matcher lowerJ = Pattern.compile("\\p{javaLowerCase}").matcher("");
3854 Matcher upperJ = Pattern.compile("\\p{javaUpperCase}").matcher("");
3855 Matcher alphaJ = Pattern.compile("\\p{javaAlphabetic}").matcher("");
3856 Matcher ideogJ = Pattern.compile("\\p{javaIdeographic}").matcher("");
3857
3858 for (int cp = 1; cp < 0x30000; cp++) {
3859 String str = new String(Character.toChars(cp));
3860 int type = Character.getType(cp);
3861 if (// lower
3862 POSIX_ASCII.isLower(cp) != lower.reset(str).matches() ||
3863 Character.isLowerCase(cp) != lowerU.reset(str).matches() ||
3864 Character.isLowerCase(cp) != lowerP.reset(str).matches() ||
3865 Character.isLowerCase(cp) != lowerEU.reset(str).matches()||
3866 Character.isLowerCase(cp) != lowerJ.reset(str).matches()||
3867 // upper
3868 POSIX_ASCII.isUpper(cp) != upper.reset(str).matches() ||
3869 POSIX_Unicode.isUpper(cp) != upperU.reset(str).matches() ||
3870 Character.isUpperCase(cp) != upperP.reset(str).matches() ||
3871 Character.isUpperCase(cp) != upperJ.reset(str).matches() ||
3872 // alpha
3873 POSIX_ASCII.isAlpha(cp) != alpha.reset(str).matches() ||
3874 POSIX_Unicode.isAlpha(cp) != alphaU.reset(str).matches() ||
3875 Character.isAlphabetic(cp)!= alphaP.reset(str).matches() ||
3876 Character.isAlphabetic(cp)!= alphaJ.reset(str).matches() ||
3877 // digit
3878 POSIX_ASCII.isDigit(cp) != digit.reset(str).matches() ||
3879 Character.isDigit(cp) != digitU.reset(str).matches() ||
3880 // alnum
3881 POSIX_ASCII.isAlnum(cp) != alnum.reset(str).matches() ||
3882 POSIX_Unicode.isAlnum(cp) != alnumU.reset(str).matches() ||
3883 // punct
3884 POSIX_ASCII.isPunct(cp) != punct.reset(str).matches() ||
3885 POSIX_Unicode.isPunct(cp) != punctU.reset(str).matches() ||
3886 // graph
3887 POSIX_ASCII.isGraph(cp) != graph.reset(str).matches() ||
3888 POSIX_Unicode.isGraph(cp) != graphU.reset(str).matches() ||
3889 POSIX_Unicode.isGraph(cp) != graphEU.reset(str).matches()||
3890 // blank
3891 POSIX_ASCII.isType(cp, POSIX_ASCII.BLANK)
3892 != blank.reset(str).matches() ||
3893 POSIX_Unicode.isBlank(cp) != blankU.reset(str).matches() ||
3894 // print
3895 POSIX_ASCII.isPrint(cp) != print.reset(str).matches() ||
3896 POSIX_Unicode.isPrint(cp) != printU.reset(str).matches() ||
3897 // cntrl
3898 POSIX_ASCII.isCntrl(cp) != cntrl.reset(str).matches() ||
3899 POSIX_Unicode.isCntrl(cp) != cntrlU.reset(str).matches() ||
3900 (Character.CONTROL == type) != cntrlP.reset(str).matches() ||
3901 // hexdigit
3902 POSIX_ASCII.isHexDigit(cp) != xdigit.reset(str).matches() ||
3903 POSIX_Unicode.isHexDigit(cp) != xdigitU.reset(str).matches() ||
3904 // space
3905 POSIX_ASCII.isSpace(cp) != space.reset(str).matches() ||
3906 POSIX_Unicode.isSpace(cp) != spaceU.reset(str).matches() ||
3907 POSIX_Unicode.isSpace(cp) != spaceP.reset(str).matches() ||
3908 // word
3909 POSIX_ASCII.isWord(cp) != word.reset(str).matches() ||
3910 POSIX_Unicode.isWord(cp) != wordU.reset(str).matches() ||
3911 POSIX_Unicode.isWord(cp) != wordEU.reset(str).matches()||
3912 // bwordb
3913 POSIX_ASCII.isWord(cp) != bwb.reset(str).matches() ||
3914 POSIX_Unicode.isWord(cp) != bwbU.reset(str).matches() ||
3915 // properties
3916 Character.isTitleCase(cp) != titleP.reset(str).matches() ||
3917 Character.isLetter(cp) != letterP.reset(str).matches()||
3918 Character.isIdeographic(cp) != ideogP.reset(str).matches() ||
3919 Character.isIdeographic(cp) != ideogJ.reset(str).matches() ||
3920 (Character.UNASSIGNED == type) == definedP.reset(str).matches() ||
shermana244eb52013-05-06 21:24:37 -07003921 POSIX_Unicode.isNoncharacterCodePoint(cp) != nonCCPP.reset(str).matches() ||
3922 POSIX_Unicode.isJoinControl(cp) != joinCrtl.reset(str).matches())
sherman85bbd8b2011-04-28 20:48:36 -07003923 failCount++;
3924 }
3925
3926 // bounds/word align
3927 twoFindIndexes(" \u0180sherman\u0400 ", bound, 1, 10);
3928 if (!bwbU.reset("\u0180sherman\u0400").matches())
3929 failCount++;
3930 twoFindIndexes(" \u0180sh\u0345erman\u0400 ", bound, 1, 11);
3931 if (!bwbU.reset("\u0180sh\u0345erman\u0400").matches())
3932 failCount++;
3933 twoFindIndexes(" \u0724\u0739\u0724 ", bound, 1, 4);
3934 if (!bwbU.reset("\u0724\u0739\u0724").matches())
3935 failCount++;
3936 if (!bwbEU.reset("\u0724\u0739\u0724").matches())
3937 failCount++;
3938 report("unicodePredefinedClasses");
3939 }
shermanecb65472012-05-08 10:57:13 -07003940
3941 private static void horizontalAndVerticalWSTest() throws Exception {
3942 String hws = new String (new char[] {
3943 0x09, 0x20, 0xa0, 0x1680, 0x180e,
3944 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005,
3945 0x2006, 0x2007, 0x2008, 0x2009, 0x200a,
3946 0x202f, 0x205f, 0x3000 });
3947 String vws = new String (new char[] {
3948 0x0a, 0x0b, 0x0c, 0x0d, 0x85, 0x2028, 0x2029 });
3949 if (!Pattern.compile("\\h+").matcher(hws).matches() ||
3950 !Pattern.compile("[\\h]+").matcher(hws).matches())
3951 failCount++;
3952 if (Pattern.compile("\\H").matcher(hws).find() ||
3953 Pattern.compile("[\\H]").matcher(hws).find())
3954 failCount++;
3955 if (!Pattern.compile("\\v+").matcher(vws).matches() ||
3956 !Pattern.compile("[\\v]+").matcher(vws).matches())
3957 failCount++;
3958 if (Pattern.compile("\\V").matcher(vws).find() ||
3959 Pattern.compile("[\\V]").matcher(vws).find())
3960 failCount++;
3961 String prefix = "abcd";
3962 String suffix = "efgh";
3963 String ng = "A";
3964 for (int i = 0; i < hws.length(); i++) {
3965 String c = String.valueOf(hws.charAt(i));
3966 Matcher m = Pattern.compile("\\h").matcher(prefix + c + suffix);
3967 if (!m.find() || !c.equals(m.group()))
3968 failCount++;
3969 m = Pattern.compile("[\\h]").matcher(prefix + c + suffix);
3970 if (!m.find() || !c.equals(m.group()))
3971 failCount++;
3972
3973 m = Pattern.compile("\\H").matcher(hws.substring(0, i) + ng + hws.substring(i));
3974 if (!m.find() || !ng.equals(m.group()))
3975 failCount++;
3976 m = Pattern.compile("[\\H]").matcher(hws.substring(0, i) + ng + hws.substring(i));
3977 if (!m.find() || !ng.equals(m.group()))
3978 failCount++;
3979 }
3980 for (int i = 0; i < vws.length(); i++) {
3981 String c = String.valueOf(vws.charAt(i));
3982 Matcher m = Pattern.compile("\\v").matcher(prefix + c + suffix);
3983 if (!m.find() || !c.equals(m.group()))
3984 failCount++;
3985 m = Pattern.compile("[\\v]").matcher(prefix + c + suffix);
3986 if (!m.find() || !c.equals(m.group()))
3987 failCount++;
3988
3989 m = Pattern.compile("\\V").matcher(vws.substring(0, i) + ng + vws.substring(i));
3990 if (!m.find() || !ng.equals(m.group()))
3991 failCount++;
3992 m = Pattern.compile("[\\V]").matcher(vws.substring(0, i) + ng + vws.substring(i));
3993 if (!m.find() || !ng.equals(m.group()))
3994 failCount++;
3995 }
3996 // \v in range is interpreted as 0x0B. This is the undocumented behavior
3997 if (!Pattern.compile("[\\v-\\v]").matcher(String.valueOf((char)0x0B)).matches())
3998 failCount++;
3999 report("horizontalAndVerticalWSTest");
4000 }
4001
4002 private static void linebreakTest() throws Exception {
4003 String linebreaks = new String (new char[] {
4004 0x0A, 0x0B, 0x0C, 0x0D, 0x85, 0x2028, 0x2029 });
4005 String crnl = "\r\n";
4006 if (!Pattern.compile("\\R+").matcher(linebreaks).matches() ||
4007 !Pattern.compile("\\R").matcher(crnl).matches() ||
4008 Pattern.compile("\\R\\R").matcher(crnl).matches())
4009 failCount++;
4010 report("linebreakTest");
4011 }
4012
sherman36e2c8f2012-08-09 10:15:26 -07004013 // #7189363
4014 private static void branchTest() throws Exception {
4015 if (!Pattern.compile("(a)?bc|d").matcher("d").find() || // greedy
4016 !Pattern.compile("(a)+bc|d").matcher("d").find() ||
4017 !Pattern.compile("(a)*bc|d").matcher("d").find() ||
4018 !Pattern.compile("(a)??bc|d").matcher("d").find() || // reluctant
4019 !Pattern.compile("(a)+?bc|d").matcher("d").find() ||
4020 !Pattern.compile("(a)*?bc|d").matcher("d").find() ||
4021 !Pattern.compile("(a)?+bc|d").matcher("d").find() || // possessive
4022 !Pattern.compile("(a)++bc|d").matcher("d").find() ||
4023 !Pattern.compile("(a)*+bc|d").matcher("d").find() ||
4024 !Pattern.compile("(a)?bc|d").matcher("d").matches() || // greedy
4025 !Pattern.compile("(a)+bc|d").matcher("d").matches() ||
4026 !Pattern.compile("(a)*bc|d").matcher("d").matches() ||
4027 !Pattern.compile("(a)??bc|d").matcher("d").matches() || // reluctant
4028 !Pattern.compile("(a)+?bc|d").matcher("d").matches() ||
4029 !Pattern.compile("(a)*?bc|d").matcher("d").matches() ||
4030 !Pattern.compile("(a)?+bc|d").matcher("d").matches() || // possessive
4031 !Pattern.compile("(a)++bc|d").matcher("d").matches() ||
4032 !Pattern.compile("(a)*+bc|d").matcher("d").matches() ||
4033 !Pattern.compile("(a)?bc|de").matcher("de").find() || // others
4034 !Pattern.compile("(a)??bc|de").matcher("de").find() ||
4035 !Pattern.compile("(a)?bc|de").matcher("de").matches() ||
4036 !Pattern.compile("(a)??bc|de").matcher("de").matches())
4037 failCount++;
4038 report("branchTest");
4039 }
4040
shermanf6f35a12013-04-26 13:59:10 -07004041 // This test is for 8007395
4042 private static void groupCurlyNotFoundSuppTest() throws Exception {
4043 String input = "test this as \ud83d\ude0d";
4044 for (String pStr : new String[] { "test(.)+(@[a-zA-Z.]+)",
4045 "test(.)*(@[a-zA-Z.]+)",
4046 "test([^B])+(@[a-zA-Z.]+)",
4047 "test([^B])*(@[a-zA-Z.]+)",
4048 "test(\\P{IsControl})+(@[a-zA-Z.]+)",
4049 "test(\\P{IsControl})*(@[a-zA-Z.]+)",
4050 }) {
4051 Matcher m = Pattern.compile(pStr, Pattern.CASE_INSENSITIVE)
4052 .matcher(input);
4053 try {
4054 if (m.find()) {
4055 failCount++;
4056 }
4057 } catch (Exception x) {
4058 failCount++;
4059 }
4060 }
4061 report("GroupCurly NotFoundSupp");
4062 }
4063
sherman95a939c2013-08-27 12:54:44 -07004064 // This test is for 8023647
4065 private static void groupCurlyBackoffTest() throws Exception {
4066 if (!"abc1c".matches("(\\w)+1\\1") ||
4067 "abc11".matches("(\\w)+1\\1")) {
4068 failCount++;
4069 }
4070 report("GroupCurly backoff");
4071 }
4072
psandoze9d4ac92013-05-01 18:40:31 +02004073 // This test is for 8012646
4074 private static void patternAsPredicate() throws Exception {
4075 Predicate<String> p = Pattern.compile("[a-z]+").asPredicate();
4076
4077 if (p.test("")) {
4078 failCount++;
4079 }
4080 if (!p.test("word")) {
4081 failCount++;
4082 }
4083 if (p.test("1234")) {
4084 failCount++;
4085 }
4086 report("Pattern.asPredicate");
4087 }
sherman0b4d42d2009-02-23 21:06:15 -08004088}