blob: 79e95772b5942a975502eb34926a86f29ef56d24 [file] [log] [blame]
sherman0b4d42d2009-02-23 21:06:15 -08001/*
psandoze9d4ac92013-05-01 18:40:31 +02002 * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
sherman0b4d42d2009-02-23 21:06:15 -08003 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
ohair2283b9d2010-05-25 15:58:33 -07007 * published by the Free Software Foundation. Oracle designates this
sherman0b4d42d2009-02-23 21:06:15 -08008 * particular file as subject to the "Classpath" exception as provided
ohair2283b9d2010-05-25 15:58:33 -07009 * by Oracle in the LICENSE file that accompanied this code.
sherman0b4d42d2009-02-23 21:06:15 -080010 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
ohair2283b9d2010-05-25 15:58:33 -070021 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
sherman0b4d42d2009-02-23 21:06:15 -080024 */
25
26/**
27 * @test
28 * @summary tests RegExp framework
29 * @author Mike McCloskey
30 * @bug 4481568 4482696 4495089 4504687 4527731 4599621 4631553 4619345
31 * 4630911 4672616 4711773 4727935 4750573 4792284 4803197 4757029 4808962
32 * 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476
33 * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940
34 * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133
shermanb16229d2011-12-19 14:14:14 -080035 * 6350801 6676425 6878475 6919132 6931676 6948903 6990617 7014645 7039066
sherman1242a6d2013-11-13 11:26:01 -080036 * 7067045 7014640 7189363 8007395 8013252 8013254 8012646 8023647 6559590
37 * 8027645
sherman0b4d42d2009-02-23 21:06:15 -080038 */
39
40import java.util.regex.*;
41import java.util.Random;
42import java.io.*;
43import java.util.*;
44import java.nio.CharBuffer;
psandoze9d4ac92013-05-01 18:40:31 +020045import java.util.function.Predicate;
sherman0b4d42d2009-02-23 21:06:15 -080046
47/**
48 * This is a test class created to check the operation of
49 * the Pattern and Matcher classes.
50 */
51public class RegExTest {
52
53 private static Random generator = new Random();
54 private static boolean failure = false;
55 private static int failCount = 0;
shermanb16229d2011-12-19 14:14:14 -080056 private static String firstFailure = null;
sherman0b4d42d2009-02-23 21:06:15 -080057
58 /**
59 * Main to interpret arguments and run several tests.
60 *
61 */
62 public static void main(String[] args) throws Exception {
63 // Most of the tests are in a file
64 processFile("TestCases.txt");
65 //processFile("PerlCases.txt");
66 processFile("BMPTestCases.txt");
67 processFile("SupplementaryTestCases.txt");
68
69 // These test many randomly generated char patterns
70 bm();
71 slice();
72
73 // These are hard to put into the file
74 escapes();
75 blankInput();
76
77 // Substitition tests on randomly generated sequences
78 globalSubstitute();
79 stringbufferSubstitute();
80 substitutionBasher();
81
82 // Canonical Equivalence
83 ceTest();
84
85 // Anchors
86 anchorTest();
87
88 // boolean match calls
89 matchesTest();
90 lookingAtTest();
91
92 // Pattern API
93 patternMatchesTest();
94
95 // Misc
96 lookbehindTest();
97 nullArgumentTest();
98 backRefTest();
99 groupCaptureTest();
100 caretTest();
101 charClassTest();
102 emptyPatternTest();
103 findIntTest();
104 group0Test();
105 longPatternTest();
106 octalTest();
107 ampersandTest();
108 negationTest();
109 splitTest();
110 appendTest();
111 caseFoldingTest();
112 commentsTest();
113 unixLinesTest();
114 replaceFirstTest();
115 gTest();
116 zTest();
117 serializeTest();
118 reluctantRepetitionTest();
119 multilineDollarTest();
120 dollarAtEndTest();
121 caretBetweenTerminatorsTest();
122 // This RFE rejected in Tiger numOccurrencesTest();
123 javaCharClassTest();
124 nonCaptureRepetitionTest();
125 notCapturedGroupCurlyMatchTest();
126 escapedSegmentTest();
127 literalPatternTest();
128 literalReplacementTest();
129 regionTest();
130 toStringTest();
131 negatedCharClassTest();
132 findFromTest();
133 boundsTest();
134 unicodeWordBoundsTest();
135 caretAtEndTest();
136 wordSearchTest();
137 hitEndTest();
138 toMatchResultTest();
139 surrogatesInClassTest();
shermanb16229d2011-12-19 14:14:14 -0800140 removeQEQuotingTest();
sherman0b4d42d2009-02-23 21:06:15 -0800141 namedGroupCaptureTest();
sherman6782c962010-02-05 00:10:42 -0800142 nonBmpClassComplementTest();
shermancc01ef52010-05-18 15:36:47 -0700143 unicodePropertiesTest();
shermanf03c78b2011-02-03 13:49:25 -0800144 unicodeHexNotationTest();
sherman85bbd8b2011-04-28 20:48:36 -0700145 unicodeClassesTest();
shermanecb65472012-05-08 10:57:13 -0700146 horizontalAndVerticalWSTest();
147 linebreakTest();
sherman36e2c8f2012-08-09 10:15:26 -0700148 branchTest();
shermanf6f35a12013-04-26 13:59:10 -0700149 groupCurlyNotFoundSuppTest();
sherman95a939c2013-08-27 12:54:44 -0700150 groupCurlyBackoffTest();
psandoze9d4ac92013-05-01 18:40:31 +0200151 patternAsPredicate();
sherman1242a6d2013-11-13 11:26:01 -0800152
shermanb16229d2011-12-19 14:14:14 -0800153 if (failure) {
154 throw new
155 RuntimeException("RegExTest failed, 1st failure: " +
156 firstFailure);
157 } else {
sherman0b4d42d2009-02-23 21:06:15 -0800158 System.err.println("OKAY: All tests passed.");
shermanb16229d2011-12-19 14:14:14 -0800159 }
sherman0b4d42d2009-02-23 21:06:15 -0800160 }
161
162 // Utility functions
163
164 private static String getRandomAlphaString(int length) {
165 StringBuffer buf = new StringBuffer(length);
166 for (int i=0; i<length; i++) {
167 char randChar = (char)(97 + generator.nextInt(26));
168 buf.append(randChar);
169 }
170 return buf.toString();
171 }
172
173 private static void check(Matcher m, String expected) {
174 m.find();
175 if (!m.group().equals(expected))
176 failCount++;
177 }
178
179 private static void check(Matcher m, String result, boolean expected) {
180 m.find();
shermanf03c78b2011-02-03 13:49:25 -0800181 if (m.group().equals(result) != expected)
182 failCount++;
sherman0b4d42d2009-02-23 21:06:15 -0800183 }
184
185 private static void check(Pattern p, String s, boolean expected) {
shermanf03c78b2011-02-03 13:49:25 -0800186 if (p.matcher(s).find() != expected)
187 failCount++;
188 }
189
190 private static void check(String p, String s, boolean expected) {
191 Matcher matcher = Pattern.compile(p).matcher(s);
192 if (matcher.find() != expected)
193 failCount++;
sherman0b4d42d2009-02-23 21:06:15 -0800194 }
195
196 private static void check(String p, char c, boolean expected) {
197 String propertyPattern = expected ? "\\p" + p : "\\P" + p;
198 Pattern pattern = Pattern.compile(propertyPattern);
199 char[] ca = new char[1]; ca[0] = c;
200 Matcher matcher = pattern.matcher(new String(ca));
201 if (!matcher.find())
202 failCount++;
203 }
204
205 private static void check(String p, int codePoint, boolean expected) {
206 String propertyPattern = expected ? "\\p" + p : "\\P" + p;
207 Pattern pattern = Pattern.compile(propertyPattern);
208 char[] ca = Character.toChars(codePoint);
209 Matcher matcher = pattern.matcher(new String(ca));
210 if (!matcher.find())
211 failCount++;
212 }
213
214 private static void check(String p, int flag, String input, String s,
215 boolean expected)
216 {
217 Pattern pattern = Pattern.compile(p, flag);
218 Matcher matcher = pattern.matcher(input);
219 if (expected)
220 check(matcher, s, expected);
221 else
222 check(pattern, input, false);
223 }
224
225 private static void report(String testName) {
226 int spacesToAdd = 30 - testName.length();
227 StringBuffer paddedNameBuffer = new StringBuffer(testName);
228 for (int i=0; i<spacesToAdd; i++)
229 paddedNameBuffer.append(" ");
230 String paddedName = paddedNameBuffer.toString();
231 System.err.println(paddedName + ": " +
232 (failCount==0 ? "Passed":"Failed("+failCount+")"));
shermanb16229d2011-12-19 14:14:14 -0800233 if (failCount > 0) {
sherman0b4d42d2009-02-23 21:06:15 -0800234 failure = true;
shermanb16229d2011-12-19 14:14:14 -0800235
236 if (firstFailure == null) {
237 firstFailure = testName;
238 }
239 }
240
sherman0b4d42d2009-02-23 21:06:15 -0800241 failCount = 0;
242 }
243
244 /**
245 * Converts ASCII alphabet characters [A-Za-z] in the given 's' to
246 * supplementary characters. This method does NOT fully take care
247 * of the regex syntax.
248 */
249 private static String toSupplementaries(String s) {
250 int length = s.length();
251 StringBuffer sb = new StringBuffer(length * 2);
252
253 for (int i = 0; i < length; ) {
254 char c = s.charAt(i++);
255 if (c == '\\') {
256 sb.append(c);
257 if (i < length) {
258 c = s.charAt(i++);
259 sb.append(c);
260 if (c == 'u') {
261 // assume no syntax error
262 sb.append(s.charAt(i++));
263 sb.append(s.charAt(i++));
264 sb.append(s.charAt(i++));
265 sb.append(s.charAt(i++));
266 }
267 }
268 } else if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) {
269 sb.append('\ud800').append((char)('\udc00'+c));
270 } else {
271 sb.append(c);
272 }
273 }
274 return sb.toString();
275 }
276
277 // Regular expression tests
278
279 // This is for bug 6178785
280 // Test if an expected NPE gets thrown when passing in a null argument
281 private static boolean check(Runnable test) {
282 try {
283 test.run();
284 failCount++;
285 return false;
286 } catch (NullPointerException npe) {
287 return true;
288 }
289 }
290
291 private static void nullArgumentTest() {
292 check(new Runnable() { public void run() { Pattern.compile(null); }});
293 check(new Runnable() { public void run() { Pattern.matches(null, null); }});
294 check(new Runnable() { public void run() { Pattern.matches("xyz", null);}});
295 check(new Runnable() { public void run() { Pattern.quote(null);}});
296 check(new Runnable() { public void run() { Pattern.compile("xyz").split(null);}});
297 check(new Runnable() { public void run() { Pattern.compile("xyz").matcher(null);}});
298
299 final Matcher m = Pattern.compile("xyz").matcher("xyz");
300 m.matches();
301 check(new Runnable() { public void run() { m.appendTail(null);}});
302 check(new Runnable() { public void run() { m.replaceAll(null);}});
303 check(new Runnable() { public void run() { m.replaceFirst(null);}});
304 check(new Runnable() { public void run() { m.appendReplacement(null, null);}});
305 check(new Runnable() { public void run() { m.reset(null);}});
306 check(new Runnable() { public void run() { Matcher.quoteReplacement(null);}});
307 //check(new Runnable() { public void run() { m.usePattern(null);}});
308
309 report("Null Argument");
310 }
311
312 // This is for bug6635133
313 // Test if surrogate pair in Unicode escapes can be handled correctly.
314 private static void surrogatesInClassTest() throws Exception {
315 Pattern pattern = Pattern.compile("[\\ud834\\udd21-\\ud834\\udd24]");
316 Matcher matcher = pattern.matcher("\ud834\udd22");
317 if (!matcher.find())
318 failCount++;
shermanb16229d2011-12-19 14:14:14 -0800319
320 report("Surrogate pair in Unicode escape");
321 }
322
323 // This is for bug6990617
324 // Test if Pattern.RemoveQEQuoting works correctly if the octal unicode
325 // char encoding is only 2 or 3 digits instead of 4 and the first quoted
326 // char is an octal digit.
327 private static void removeQEQuotingTest() throws Exception {
328 Pattern pattern =
329 Pattern.compile("\\011\\Q1sometext\\E\\011\\Q2sometext\\E");
330 Matcher matcher = pattern.matcher("\t1sometext\t2sometext");
331 if (!matcher.find())
332 failCount++;
333
334 report("Remove Q/E Quoting");
sherman0b4d42d2009-02-23 21:06:15 -0800335 }
336
337 // This is for bug 4988891
338 // Test toMatchResult to see that it is a copy of the Matcher
339 // that is not affected by subsequent operations on the original
340 private static void toMatchResultTest() throws Exception {
341 Pattern pattern = Pattern.compile("squid");
342 Matcher matcher = pattern.matcher(
343 "agiantsquidofdestinyasmallsquidoffate");
344 matcher.find();
345 int matcherStart1 = matcher.start();
346 MatchResult mr = matcher.toMatchResult();
347 if (mr == matcher)
348 failCount++;
349 int resultStart1 = mr.start();
350 if (matcherStart1 != resultStart1)
351 failCount++;
352 matcher.find();
353 int matcherStart2 = matcher.start();
354 int resultStart2 = mr.start();
355 if (matcherStart2 == resultStart2)
356 failCount++;
357 if (resultStart1 != resultStart2)
358 failCount++;
359 MatchResult mr2 = matcher.toMatchResult();
360 if (mr == mr2)
361 failCount++;
362 if (mr2.start() != matcherStart2)
363 failCount++;
364 report("toMatchResult is a copy");
365 }
366
367 // This is for bug 5013885
368 // Must test a slice to see if it reports hitEnd correctly
369 private static void hitEndTest() throws Exception {
370 // Basic test of Slice node
371 Pattern p = Pattern.compile("^squidattack");
372 Matcher m = p.matcher("squack");
373 m.find();
374 if (m.hitEnd())
375 failCount++;
376 m.reset("squid");
377 m.find();
378 if (!m.hitEnd())
379 failCount++;
380
381 // Test Slice, SliceA and SliceU nodes
382 for (int i=0; i<3; i++) {
383 int flags = 0;
384 if (i==1) flags = Pattern.CASE_INSENSITIVE;
385 if (i==2) flags = Pattern.UNICODE_CASE;
386 p = Pattern.compile("^abc", flags);
387 m = p.matcher("ad");
388 m.find();
389 if (m.hitEnd())
390 failCount++;
391 m.reset("ab");
392 m.find();
393 if (!m.hitEnd())
394 failCount++;
395 }
396
397 // Test Boyer-Moore node
398 p = Pattern.compile("catattack");
399 m = p.matcher("attack");
400 m.find();
401 if (!m.hitEnd())
402 failCount++;
403
404 p = Pattern.compile("catattack");
405 m = p.matcher("attackattackattackcatatta");
406 m.find();
407 if (!m.hitEnd())
408 failCount++;
sherman0b4d42d2009-02-23 21:06:15 -0800409 report("hitEnd from a Slice");
410 }
411
412 // This is for bug 4997476
413 // It is weird code submitted by customer demonstrating a regression
414 private static void wordSearchTest() throws Exception {
415 String testString = new String("word1 word2 word3");
416 Pattern p = Pattern.compile("\\b");
417 Matcher m = p.matcher(testString);
418 int position = 0;
419 int start = 0;
420 while (m.find(position)) {
421 start = m.start();
422 if (start == testString.length())
423 break;
424 if (m.find(start+1)) {
425 position = m.start();
426 } else {
427 position = testString.length();
428 }
429 if (testString.substring(start, position).equals(" "))
430 continue;
431 if (!testString.substring(start, position-1).startsWith("word"))
432 failCount++;
433 }
434 report("Customer word search");
435 }
436
437 // This is for bug 4994840
438 private static void caretAtEndTest() throws Exception {
439 // Problem only occurs with multiline patterns
440 // containing a beginning-of-line caret "^" followed
441 // by an expression that also matches the empty string.
442 Pattern pattern = Pattern.compile("^x?", Pattern.MULTILINE);
443 Matcher matcher = pattern.matcher("\r");
444 matcher.find();
445 matcher.find();
446 report("Caret at end");
447 }
448
449 // This test is for 4979006
450 // Check to see if word boundary construct properly handles unicode
451 // non spacing marks
452 private static void unicodeWordBoundsTest() throws Exception {
453 String spaces = " ";
454 String wordChar = "a";
455 String nsm = "\u030a";
456
457 assert (Character.getType('\u030a') == Character.NON_SPACING_MARK);
458
459 Pattern pattern = Pattern.compile("\\b");
460 Matcher matcher = pattern.matcher("");
461 // S=other B=word character N=non spacing mark .=word boundary
462 // SS.BB.SS
463 String input = spaces + wordChar + wordChar + spaces;
464 twoFindIndexes(input, matcher, 2, 4);
465 // SS.BBN.SS
466 input = spaces + wordChar +wordChar + nsm + spaces;
467 twoFindIndexes(input, matcher, 2, 5);
468 // SS.BN.SS
469 input = spaces + wordChar + nsm + spaces;
470 twoFindIndexes(input, matcher, 2, 4);
471 // SS.BNN.SS
472 input = spaces + wordChar + nsm + nsm + spaces;
473 twoFindIndexes(input, matcher, 2, 5);
474 // SSN.BB.SS
475 input = spaces + nsm + wordChar + wordChar + spaces;
476 twoFindIndexes(input, matcher, 3, 5);
477 // SS.BNB.SS
478 input = spaces + wordChar + nsm + wordChar + spaces;
479 twoFindIndexes(input, matcher, 2, 5);
480 // SSNNSS
481 input = spaces + nsm + nsm + spaces;
482 matcher.reset(input);
483 if (matcher.find())
484 failCount++;
485 // SSN.BBN.SS
486 input = spaces + nsm + wordChar + wordChar + nsm + spaces;
487 twoFindIndexes(input, matcher, 3, 6);
488
489 report("Unicode word boundary");
490 }
491
492 private static void twoFindIndexes(String input, Matcher matcher, int a,
493 int b) throws Exception
494 {
495 matcher.reset(input);
496 matcher.find();
497 if (matcher.start() != a)
498 failCount++;
499 matcher.find();
500 if (matcher.start() != b)
501 failCount++;
502 }
503
504 // This test is for 6284152
505 static void check(String regex, String input, String[] expected) {
506 List<String> result = new ArrayList<String>();
507 Pattern p = Pattern.compile(regex);
508 Matcher m = p.matcher(input);
509 while (m.find()) {
510 result.add(m.group());
511 }
512 if (!Arrays.asList(expected).equals(result))
513 failCount++;
514 }
515
516 private static void lookbehindTest() throws Exception {
517 //Positive
518 check("(?<=%.{0,5})foo\\d",
519 "%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5",
520 new String[]{"foo1", "foo2", "foo3"});
521
522 //boundary at end of the lookbehind sub-regex should work consistently
523 //with the boundary just after the lookbehind sub-regex
524 check("(?<=.*\\b)foo", "abcd foo", new String[]{"foo"});
525 check("(?<=.*)\\bfoo", "abcd foo", new String[]{"foo"});
526 check("(?<!abc )\\bfoo", "abc foo", new String[0]);
527 check("(?<!abc \\b)foo", "abc foo", new String[0]);
528
529 //Negative
530 check("(?<!%.{0,5})foo\\d",
531 "%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5",
532 new String[] {"foo4", "foo5"});
533
534 //Positive greedy
535 check("(?<=%b{1,4})foo", "%bbbbfoo", new String[] {"foo"});
536
537 //Positive reluctant
538 check("(?<=%b{1,4}?)foo", "%bbbbfoo", new String[] {"foo"});
539
540 //supplementary
541 check("(?<=%b{1,4})fo\ud800\udc00o", "%bbbbfo\ud800\udc00o",
542 new String[] {"fo\ud800\udc00o"});
543 check("(?<=%b{1,4}?)fo\ud800\udc00o", "%bbbbfo\ud800\udc00o",
544 new String[] {"fo\ud800\udc00o"});
545 check("(?<!%b{1,4})fo\ud800\udc00o", "%afo\ud800\udc00o",
546 new String[] {"fo\ud800\udc00o"});
547 check("(?<!%b{1,4}?)fo\ud800\udc00o", "%afo\ud800\udc00o",
548 new String[] {"fo\ud800\udc00o"});
549 report("Lookbehind");
550 }
551
552 // This test is for 4938995
553 // Check to see if weak region boundaries are transparent to
554 // lookahead and lookbehind constructs
555 private static void boundsTest() throws Exception {
556 String fullMessage = "catdogcat";
557 Pattern pattern = Pattern.compile("(?<=cat)dog(?=cat)");
558 Matcher matcher = pattern.matcher("catdogca");
559 matcher.useTransparentBounds(true);
560 if (matcher.find())
561 failCount++;
562 matcher.reset("atdogcat");
563 if (matcher.find())
564 failCount++;
565 matcher.reset(fullMessage);
566 if (!matcher.find())
567 failCount++;
568 matcher.reset(fullMessage);
569 matcher.region(0,9);
570 if (!matcher.find())
571 failCount++;
572 matcher.reset(fullMessage);
573 matcher.region(0,6);
574 if (!matcher.find())
575 failCount++;
576 matcher.reset(fullMessage);
577 matcher.region(3,6);
578 if (!matcher.find())
579 failCount++;
580 matcher.useTransparentBounds(false);
581 if (matcher.find())
582 failCount++;
583
584 // Negative lookahead/lookbehind
585 pattern = Pattern.compile("(?<!cat)dog(?!cat)");
586 matcher = pattern.matcher("dogcat");
587 matcher.useTransparentBounds(true);
588 matcher.region(0,3);
589 if (matcher.find())
590 failCount++;
591 matcher.reset("catdog");
592 matcher.region(3,6);
593 if (matcher.find())
594 failCount++;
595 matcher.useTransparentBounds(false);
596 matcher.reset("dogcat");
597 matcher.region(0,3);
598 if (!matcher.find())
599 failCount++;
600 matcher.reset("catdog");
601 matcher.region(3,6);
602 if (!matcher.find())
603 failCount++;
604
605 report("Region bounds transparency");
606 }
607
608 // This test is for 4945394
609 private static void findFromTest() throws Exception {
610 String message = "This is 40 $0 message.";
611 Pattern pat = Pattern.compile("\\$0");
612 Matcher match = pat.matcher(message);
613 if (!match.find())
614 failCount++;
615 if (match.find())
616 failCount++;
617 if (match.find())
618 failCount++;
619 report("Check for alternating find");
620 }
621
622 // This test is for 4872664 and 4892980
623 private static void negatedCharClassTest() throws Exception {
624 Pattern pattern = Pattern.compile("[^>]");
625 Matcher matcher = pattern.matcher("\u203A");
626 if (!matcher.matches())
627 failCount++;
628 pattern = Pattern.compile("[^fr]");
629 matcher = pattern.matcher("a");
630 if (!matcher.find())
631 failCount++;
632 matcher.reset("\u203A");
633 if (!matcher.find())
634 failCount++;
635 String s = "for";
636 String result[] = s.split("[^fr]");
637 if (!result[0].equals("f"))
638 failCount++;
639 if (!result[1].equals("r"))
640 failCount++;
641 s = "f\u203Ar";
642 result = s.split("[^fr]");
643 if (!result[0].equals("f"))
644 failCount++;
645 if (!result[1].equals("r"))
646 failCount++;
647
648 // Test adding to bits, subtracting a node, then adding to bits again
649 pattern = Pattern.compile("[^f\u203Ar]");
650 matcher = pattern.matcher("a");
651 if (!matcher.find())
652 failCount++;
653 matcher.reset("f");
654 if (matcher.find())
655 failCount++;
656 matcher.reset("\u203A");
657 if (matcher.find())
658 failCount++;
659 matcher.reset("r");
660 if (matcher.find())
661 failCount++;
662 matcher.reset("\u203B");
663 if (!matcher.find())
664 failCount++;
665
666 // Test subtracting a node, adding to bits, subtracting again
667 pattern = Pattern.compile("[^\u203Ar\u203B]");
668 matcher = pattern.matcher("a");
669 if (!matcher.find())
670 failCount++;
671 matcher.reset("\u203A");
672 if (matcher.find())
673 failCount++;
674 matcher.reset("r");
675 if (matcher.find())
676 failCount++;
677 matcher.reset("\u203B");
678 if (matcher.find())
679 failCount++;
680 matcher.reset("\u203C");
681 if (!matcher.find())
682 failCount++;
683
684 report("Negated Character Class");
685 }
686
687 // This test is for 4628291
688 private static void toStringTest() throws Exception {
689 Pattern pattern = Pattern.compile("b+");
690 if (pattern.toString() != "b+")
691 failCount++;
692 Matcher matcher = pattern.matcher("aaabbbccc");
693 String matcherString = matcher.toString(); // unspecified
694 matcher.find();
695 matcherString = matcher.toString(); // unspecified
696 matcher.region(0,3);
697 matcherString = matcher.toString(); // unspecified
698 matcher.reset();
699 matcherString = matcher.toString(); // unspecified
700 report("toString");
701 }
702
703 // This test is for 4808962
704 private static void literalPatternTest() throws Exception {
705 int flags = Pattern.LITERAL;
706
707 Pattern pattern = Pattern.compile("abc\\t$^", flags);
708 check(pattern, "abc\\t$^", true);
709
710 pattern = Pattern.compile(Pattern.quote("abc\\t$^"));
711 check(pattern, "abc\\t$^", true);
712
713 pattern = Pattern.compile("\\Qa^$bcabc\\E", flags);
714 check(pattern, "\\Qa^$bcabc\\E", true);
715 check(pattern, "a^$bcabc", false);
716
717 pattern = Pattern.compile("\\\\Q\\\\E");
718 check(pattern, "\\Q\\E", true);
719
720 pattern = Pattern.compile("\\Qabc\\Eefg\\\\Q\\\\Ehij");
721 check(pattern, "abcefg\\Q\\Ehij", true);
722
723 pattern = Pattern.compile("\\\\\\Q\\\\E");
724 check(pattern, "\\\\\\\\", true);
725
726 pattern = Pattern.compile(Pattern.quote("\\Qa^$bcabc\\E"));
727 check(pattern, "\\Qa^$bcabc\\E", true);
728 check(pattern, "a^$bcabc", false);
729
730 pattern = Pattern.compile(Pattern.quote("\\Qabc\\Edef"));
731 check(pattern, "\\Qabc\\Edef", true);
732 check(pattern, "abcdef", false);
733
734 pattern = Pattern.compile(Pattern.quote("abc\\Edef"));
735 check(pattern, "abc\\Edef", true);
736 check(pattern, "abcdef", false);
737
738 pattern = Pattern.compile(Pattern.quote("\\E"));
739 check(pattern, "\\E", true);
740
741 pattern = Pattern.compile("((((abc.+?:)", flags);
742 check(pattern, "((((abc.+?:)", true);
743
744 flags |= Pattern.MULTILINE;
745
746 pattern = Pattern.compile("^cat$", flags);
747 check(pattern, "abc^cat$def", true);
748 check(pattern, "cat", false);
749
750 flags |= Pattern.CASE_INSENSITIVE;
751
752 pattern = Pattern.compile("abcdef", flags);
753 check(pattern, "ABCDEF", true);
754 check(pattern, "AbCdEf", true);
755
756 flags |= Pattern.DOTALL;
757
758 pattern = Pattern.compile("a...b", flags);
759 check(pattern, "A...b", true);
760 check(pattern, "Axxxb", false);
761
762 flags |= Pattern.CANON_EQ;
763
764 Pattern p = Pattern.compile("testa\u030a", flags);
765 check(pattern, "testa\u030a", false);
766 check(pattern, "test\u00e5", false);
767
768 // Supplementary character test
769 flags = Pattern.LITERAL;
770
771 pattern = Pattern.compile(toSupplementaries("abc\\t$^"), flags);
772 check(pattern, toSupplementaries("abc\\t$^"), true);
773
774 pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\t$^")));
775 check(pattern, toSupplementaries("abc\\t$^"), true);
776
777 pattern = Pattern.compile(toSupplementaries("\\Qa^$bcabc\\E"), flags);
778 check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true);
779 check(pattern, toSupplementaries("a^$bcabc"), false);
780
781 pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qa^$bcabc\\E")));
782 check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true);
783 check(pattern, toSupplementaries("a^$bcabc"), false);
784
785 pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qabc\\Edef")));
786 check(pattern, toSupplementaries("\\Qabc\\Edef"), true);
787 check(pattern, toSupplementaries("abcdef"), false);
788
789 pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\Edef")));
790 check(pattern, toSupplementaries("abc\\Edef"), true);
791 check(pattern, toSupplementaries("abcdef"), false);
792
793 pattern = Pattern.compile(toSupplementaries("((((abc.+?:)"), flags);
794 check(pattern, toSupplementaries("((((abc.+?:)"), true);
795
796 flags |= Pattern.MULTILINE;
797
798 pattern = Pattern.compile(toSupplementaries("^cat$"), flags);
799 check(pattern, toSupplementaries("abc^cat$def"), true);
800 check(pattern, toSupplementaries("cat"), false);
801
802 flags |= Pattern.DOTALL;
803
804 // note: this is case-sensitive.
805 pattern = Pattern.compile(toSupplementaries("a...b"), flags);
806 check(pattern, toSupplementaries("a...b"), true);
807 check(pattern, toSupplementaries("axxxb"), false);
808
809 flags |= Pattern.CANON_EQ;
810
811 String t = toSupplementaries("test");
812 p = Pattern.compile(t + "a\u030a", flags);
813 check(pattern, t + "a\u030a", false);
814 check(pattern, t + "\u00e5", false);
815
816 report("Literal pattern");
817 }
818
819 // This test is for 4803179
820 // This test is also for 4808962, replacement parts
821 private static void literalReplacementTest() throws Exception {
822 int flags = Pattern.LITERAL;
823
824 Pattern pattern = Pattern.compile("abc", flags);
825 Matcher matcher = pattern.matcher("zzzabczzz");
826 String replaceTest = "$0";
827 String result = matcher.replaceAll(replaceTest);
828 if (!result.equals("zzzabczzz"))
829 failCount++;
830
831 matcher.reset();
832 String literalReplacement = matcher.quoteReplacement(replaceTest);
833 result = matcher.replaceAll(literalReplacement);
834 if (!result.equals("zzz$0zzz"))
835 failCount++;
836
837 matcher.reset();
838 replaceTest = "\\t$\\$";
839 literalReplacement = matcher.quoteReplacement(replaceTest);
840 result = matcher.replaceAll(literalReplacement);
841 if (!result.equals("zzz\\t$\\$zzz"))
842 failCount++;
843
844 // Supplementary character test
845 pattern = Pattern.compile(toSupplementaries("abc"), flags);
846 matcher = pattern.matcher(toSupplementaries("zzzabczzz"));
847 replaceTest = "$0";
848 result = matcher.replaceAll(replaceTest);
849 if (!result.equals(toSupplementaries("zzzabczzz")))
850 failCount++;
851
852 matcher.reset();
853 literalReplacement = matcher.quoteReplacement(replaceTest);
854 result = matcher.replaceAll(literalReplacement);
855 if (!result.equals(toSupplementaries("zzz$0zzz")))
856 failCount++;
857
858 matcher.reset();
859 replaceTest = "\\t$\\$";
860 literalReplacement = matcher.quoteReplacement(replaceTest);
861 result = matcher.replaceAll(literalReplacement);
862 if (!result.equals(toSupplementaries("zzz\\t$\\$zzz")))
863 failCount++;
864
sherman5c8f3492012-04-12 15:01:41 -0700865 // IAE should be thrown if backslash or '$' is the last character
866 // in replacement string
867 try {
868 "\uac00".replaceAll("\uac00", "$");
shermanecb65472012-05-08 10:57:13 -0700869 failCount++;
870 } catch (IllegalArgumentException iie) {
871 } catch (Exception e) {
872 failCount++;
873 }
874 try {
sherman5c8f3492012-04-12 15:01:41 -0700875 "\uac00".replaceAll("\uac00", "\\");
876 failCount++;
877 } catch (IllegalArgumentException iie) {
878 } catch (Exception e) {
879 failCount++;
880 }
sherman0b4d42d2009-02-23 21:06:15 -0800881 report("Literal replacement");
882 }
883
884 // This test is for 4757029
885 private static void regionTest() throws Exception {
886 Pattern pattern = Pattern.compile("abc");
887 Matcher matcher = pattern.matcher("abcdefabc");
888
889 matcher.region(0,9);
890 if (!matcher.find())
891 failCount++;
892 if (!matcher.find())
893 failCount++;
894 matcher.region(0,3);
895 if (!matcher.find())
896 failCount++;
897 matcher.region(3,6);
898 if (matcher.find())
899 failCount++;
900 matcher.region(0,2);
901 if (matcher.find())
902 failCount++;
903
904 expectRegionFail(matcher, 1, -1);
905 expectRegionFail(matcher, -1, -1);
906 expectRegionFail(matcher, -1, 1);
907 expectRegionFail(matcher, 5, 3);
908 expectRegionFail(matcher, 5, 12);
909 expectRegionFail(matcher, 12, 12);
910
911 pattern = Pattern.compile("^abc$");
912 matcher = pattern.matcher("zzzabczzz");
913 matcher.region(0,9);
914 if (matcher.find())
915 failCount++;
916 matcher.region(3,6);
917 if (!matcher.find())
918 failCount++;
919 matcher.region(3,6);
920 matcher.useAnchoringBounds(false);
921 if (matcher.find())
922 failCount++;
923
924 // Supplementary character test
925 pattern = Pattern.compile(toSupplementaries("abc"));
926 matcher = pattern.matcher(toSupplementaries("abcdefabc"));
927 matcher.region(0,9*2);
928 if (!matcher.find())
929 failCount++;
930 if (!matcher.find())
931 failCount++;
932 matcher.region(0,3*2);
933 if (!matcher.find())
934 failCount++;
935 matcher.region(1,3*2);
936 if (matcher.find())
937 failCount++;
938 matcher.region(3*2,6*2);
939 if (matcher.find())
940 failCount++;
941 matcher.region(0,2*2);
942 if (matcher.find())
943 failCount++;
944 matcher.region(0,2*2+1);
945 if (matcher.find())
946 failCount++;
947
948 expectRegionFail(matcher, 1*2, -1);
949 expectRegionFail(matcher, -1, -1);
950 expectRegionFail(matcher, -1, 1*2);
951 expectRegionFail(matcher, 5*2, 3*2);
952 expectRegionFail(matcher, 5*2, 12*2);
953 expectRegionFail(matcher, 12*2, 12*2);
954
955 pattern = Pattern.compile(toSupplementaries("^abc$"));
956 matcher = pattern.matcher(toSupplementaries("zzzabczzz"));
957 matcher.region(0,9*2);
958 if (matcher.find())
959 failCount++;
960 matcher.region(3*2,6*2);
961 if (!matcher.find())
962 failCount++;
963 matcher.region(3*2+1,6*2);
964 if (matcher.find())
965 failCount++;
966 matcher.region(3*2,6*2-1);
967 if (matcher.find())
968 failCount++;
969 matcher.region(3*2,6*2);
970 matcher.useAnchoringBounds(false);
971 if (matcher.find())
972 failCount++;
973 report("Regions");
974 }
975
976 private static void expectRegionFail(Matcher matcher, int index1,
977 int index2)
978 {
979 try {
980 matcher.region(index1, index2);
981 failCount++;
982 } catch (IndexOutOfBoundsException ioobe) {
983 // Correct result
984 } catch (IllegalStateException ise) {
985 // Correct result
986 }
987 }
988
989 // This test is for 4803197
990 private static void escapedSegmentTest() throws Exception {
991
992 Pattern pattern = Pattern.compile("\\Qdir1\\dir2\\E");
993 check(pattern, "dir1\\dir2", true);
994
995 pattern = Pattern.compile("\\Qdir1\\dir2\\\\E");
996 check(pattern, "dir1\\dir2\\", true);
997
998 pattern = Pattern.compile("(\\Qdir1\\dir2\\\\E)");
999 check(pattern, "dir1\\dir2\\", true);
1000
1001 // Supplementary character test
1002 pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2\\E"));
1003 check(pattern, toSupplementaries("dir1\\dir2"), true);
1004
1005 pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2")+"\\\\E");
1006 check(pattern, toSupplementaries("dir1\\dir2\\"), true);
1007
1008 pattern = Pattern.compile(toSupplementaries("(\\Qdir1\\dir2")+"\\\\E)");
1009 check(pattern, toSupplementaries("dir1\\dir2\\"), true);
1010
1011 report("Escaped segment");
1012 }
1013
1014 // This test is for 4792284
1015 private static void nonCaptureRepetitionTest() throws Exception {
1016 String input = "abcdefgh;";
1017
1018 String[] patterns = new String[] {
1019 "(?:\\w{4})+;",
1020 "(?:\\w{8})*;",
1021 "(?:\\w{2}){2,4};",
1022 "(?:\\w{4}){2,};", // only matches the
1023 ".*?(?:\\w{5})+;", // specified minimum
1024 ".*?(?:\\w{9})*;", // number of reps - OK
1025 "(?:\\w{4})+?;", // lazy repetition - OK
1026 "(?:\\w{4})++;", // possessive repetition - OK
1027 "(?:\\w{2,}?)+;", // non-deterministic - OK
1028 "(\\w{4})+;", // capturing group - OK
1029 };
1030
1031 for (int i = 0; i < patterns.length; i++) {
1032 // Check find()
1033 check(patterns[i], 0, input, input, true);
1034 // Check matches()
1035 Pattern p = Pattern.compile(patterns[i]);
1036 Matcher m = p.matcher(input);
1037
1038 if (m.matches()) {
1039 if (!m.group(0).equals(input))
1040 failCount++;
1041 } else {
1042 failCount++;
1043 }
1044 }
1045
1046 report("Non capturing repetition");
1047 }
1048
1049 // This test is for 6358731
1050 private static void notCapturedGroupCurlyMatchTest() throws Exception {
1051 Pattern pattern = Pattern.compile("(abc)+|(abcd)+");
1052 Matcher matcher = pattern.matcher("abcd");
1053 if (!matcher.matches() ||
1054 matcher.group(1) != null ||
1055 !matcher.group(2).equals("abcd")) {
1056 failCount++;
1057 }
1058 report("Not captured GroupCurly");
1059 }
1060
1061 // This test is for 4706545
1062 private static void javaCharClassTest() throws Exception {
1063 for (int i=0; i<1000; i++) {
1064 char c = (char)generator.nextInt();
1065 check("{javaLowerCase}", c, Character.isLowerCase(c));
1066 check("{javaUpperCase}", c, Character.isUpperCase(c));
1067 check("{javaUpperCase}+", c, Character.isUpperCase(c));
1068 check("{javaTitleCase}", c, Character.isTitleCase(c));
1069 check("{javaDigit}", c, Character.isDigit(c));
1070 check("{javaDefined}", c, Character.isDefined(c));
1071 check("{javaLetter}", c, Character.isLetter(c));
1072 check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c));
1073 check("{javaJavaIdentifierStart}", c,
1074 Character.isJavaIdentifierStart(c));
1075 check("{javaJavaIdentifierPart}", c,
1076 Character.isJavaIdentifierPart(c));
1077 check("{javaUnicodeIdentifierStart}", c,
1078 Character.isUnicodeIdentifierStart(c));
1079 check("{javaUnicodeIdentifierPart}", c,
1080 Character.isUnicodeIdentifierPart(c));
1081 check("{javaIdentifierIgnorable}", c,
1082 Character.isIdentifierIgnorable(c));
1083 check("{javaSpaceChar}", c, Character.isSpaceChar(c));
1084 check("{javaWhitespace}", c, Character.isWhitespace(c));
1085 check("{javaISOControl}", c, Character.isISOControl(c));
1086 check("{javaMirrored}", c, Character.isMirrored(c));
1087
1088 }
1089
1090 // Supplementary character test
1091 for (int i=0; i<1000; i++) {
1092 int c = generator.nextInt(Character.MAX_CODE_POINT
1093 - Character.MIN_SUPPLEMENTARY_CODE_POINT)
1094 + Character.MIN_SUPPLEMENTARY_CODE_POINT;
1095 check("{javaLowerCase}", c, Character.isLowerCase(c));
1096 check("{javaUpperCase}", c, Character.isUpperCase(c));
1097 check("{javaUpperCase}+", c, Character.isUpperCase(c));
1098 check("{javaTitleCase}", c, Character.isTitleCase(c));
1099 check("{javaDigit}", c, Character.isDigit(c));
1100 check("{javaDefined}", c, Character.isDefined(c));
1101 check("{javaLetter}", c, Character.isLetter(c));
1102 check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c));
1103 check("{javaJavaIdentifierStart}", c,
1104 Character.isJavaIdentifierStart(c));
1105 check("{javaJavaIdentifierPart}", c,
1106 Character.isJavaIdentifierPart(c));
1107 check("{javaUnicodeIdentifierStart}", c,
1108 Character.isUnicodeIdentifierStart(c));
1109 check("{javaUnicodeIdentifierPart}", c,
1110 Character.isUnicodeIdentifierPart(c));
1111 check("{javaIdentifierIgnorable}", c,
1112 Character.isIdentifierIgnorable(c));
1113 check("{javaSpaceChar}", c, Character.isSpaceChar(c));
1114 check("{javaWhitespace}", c, Character.isWhitespace(c));
1115 check("{javaISOControl}", c, Character.isISOControl(c));
1116 check("{javaMirrored}", c, Character.isMirrored(c));
1117 }
1118
1119 report("Java character classes");
1120 }
1121
1122 // This test is for 4523620
1123 /*
1124 private static void numOccurrencesTest() throws Exception {
1125 Pattern pattern = Pattern.compile("aaa");
1126
1127 if (pattern.numOccurrences("aaaaaa", false) != 2)
1128 failCount++;
1129 if (pattern.numOccurrences("aaaaaa", true) != 4)
1130 failCount++;
1131
1132 pattern = Pattern.compile("^");
1133 if (pattern.numOccurrences("aaaaaa", false) != 1)
1134 failCount++;
1135 if (pattern.numOccurrences("aaaaaa", true) != 1)
1136 failCount++;
1137
1138 report("Number of Occurrences");
1139 }
1140 */
1141
1142 // This test is for 4776374
1143 private static void caretBetweenTerminatorsTest() throws Exception {
1144 int flags1 = Pattern.DOTALL;
1145 int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES;
1146 int flags3 = Pattern.DOTALL | Pattern.UNIX_LINES | Pattern.MULTILINE;
1147 int flags4 = Pattern.DOTALL | Pattern.MULTILINE;
1148
1149 check("^....", flags1, "test\ntest", "test", true);
1150 check(".....^", flags1, "test\ntest", "test", false);
1151 check(".....^", flags1, "test\n", "test", false);
1152 check("....^", flags1, "test\r\n", "test", false);
1153
1154 check("^....", flags2, "test\ntest", "test", true);
1155 check("....^", flags2, "test\ntest", "test", false);
1156 check(".....^", flags2, "test\n", "test", false);
1157 check("....^", flags2, "test\r\n", "test", false);
1158
1159 check("^....", flags3, "test\ntest", "test", true);
1160 check(".....^", flags3, "test\ntest", "test\n", true);
1161 check(".....^", flags3, "test\u0085test", "test\u0085", false);
1162 check(".....^", flags3, "test\n", "test", false);
1163 check(".....^", flags3, "test\r\n", "test", false);
1164 check("......^", flags3, "test\r\ntest", "test\r\n", true);
1165
1166 check("^....", flags4, "test\ntest", "test", true);
1167 check(".....^", flags3, "test\ntest", "test\n", true);
1168 check(".....^", flags4, "test\u0085test", "test\u0085", true);
1169 check(".....^", flags4, "test\n", "test\n", false);
1170 check(".....^", flags4, "test\r\n", "test\r", false);
1171
1172 // Supplementary character test
1173 String t = toSupplementaries("test");
1174 check("^....", flags1, t+"\n"+t, t, true);
1175 check(".....^", flags1, t+"\n"+t, t, false);
1176 check(".....^", flags1, t+"\n", t, false);
1177 check("....^", flags1, t+"\r\n", t, false);
1178
1179 check("^....", flags2, t+"\n"+t, t, true);
1180 check("....^", flags2, t+"\n"+t, t, false);
1181 check(".....^", flags2, t+"\n", t, false);
1182 check("....^", flags2, t+"\r\n", t, false);
1183
1184 check("^....", flags3, t+"\n"+t, t, true);
1185 check(".....^", flags3, t+"\n"+t, t+"\n", true);
1186 check(".....^", flags3, t+"\u0085"+t, t+"\u0085", false);
1187 check(".....^", flags3, t+"\n", t, false);
1188 check(".....^", flags3, t+"\r\n", t, false);
1189 check("......^", flags3, t+"\r\n"+t, t+"\r\n", true);
1190
1191 check("^....", flags4, t+"\n"+t, t, true);
1192 check(".....^", flags3, t+"\n"+t, t+"\n", true);
1193 check(".....^", flags4, t+"\u0085"+t, t+"\u0085", true);
1194 check(".....^", flags4, t+"\n", t+"\n", false);
1195 check(".....^", flags4, t+"\r\n", t+"\r", false);
1196
1197 report("Caret between terminators");
1198 }
1199
1200 // This test is for 4727935
1201 private static void dollarAtEndTest() throws Exception {
1202 int flags1 = Pattern.DOTALL;
1203 int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES;
1204 int flags3 = Pattern.DOTALL | Pattern.MULTILINE;
1205
1206 check("....$", flags1, "test\n", "test", true);
1207 check("....$", flags1, "test\r\n", "test", true);
1208 check(".....$", flags1, "test\n", "test\n", true);
1209 check(".....$", flags1, "test\u0085", "test\u0085", true);
1210 check("....$", flags1, "test\u0085", "test", true);
1211
1212 check("....$", flags2, "test\n", "test", true);
1213 check(".....$", flags2, "test\n", "test\n", true);
1214 check(".....$", flags2, "test\u0085", "test\u0085", true);
1215 check("....$", flags2, "test\u0085", "est\u0085", true);
1216
1217 check("....$.blah", flags3, "test\nblah", "test\nblah", true);
1218 check(".....$.blah", flags3, "test\n\nblah", "test\n\nblah", true);
1219 check("....$blah", flags3, "test\nblah", "!!!!", false);
1220 check(".....$blah", flags3, "test\nblah", "!!!!", false);
1221
1222 // Supplementary character test
1223 String t = toSupplementaries("test");
1224 String b = toSupplementaries("blah");
1225 check("....$", flags1, t+"\n", t, true);
1226 check("....$", flags1, t+"\r\n", t, true);
1227 check(".....$", flags1, t+"\n", t+"\n", true);
1228 check(".....$", flags1, t+"\u0085", t+"\u0085", true);
1229 check("....$", flags1, t+"\u0085", t, true);
1230
1231 check("....$", flags2, t+"\n", t, true);
1232 check(".....$", flags2, t+"\n", t+"\n", true);
1233 check(".....$", flags2, t+"\u0085", t+"\u0085", true);
1234 check("....$", flags2, t+"\u0085", toSupplementaries("est\u0085"), true);
1235
1236 check("....$."+b, flags3, t+"\n"+b, t+"\n"+b, true);
1237 check(".....$."+b, flags3, t+"\n\n"+b, t+"\n\n"+b, true);
1238 check("....$"+b, flags3, t+"\n"+b, "!!!!", false);
1239 check(".....$"+b, flags3, t+"\n"+b, "!!!!", false);
1240
1241 report("Dollar at End");
1242 }
1243
1244 // This test is for 4711773
1245 private static void multilineDollarTest() throws Exception {
1246 Pattern findCR = Pattern.compile("$", Pattern.MULTILINE);
1247 Matcher matcher = findCR.matcher("first bit\nsecond bit");
1248 matcher.find();
1249 if (matcher.start(0) != 9)
1250 failCount++;
1251 matcher.find();
1252 if (matcher.start(0) != 20)
1253 failCount++;
1254
1255 // Supplementary character test
1256 matcher = findCR.matcher(toSupplementaries("first bit\n second bit")); // double BMP chars
1257 matcher.find();
1258 if (matcher.start(0) != 9*2)
1259 failCount++;
1260 matcher.find();
1261 if (matcher.start(0) != 20*2)
1262 failCount++;
1263
1264 report("Multiline Dollar");
1265 }
1266
1267 private static void reluctantRepetitionTest() throws Exception {
1268 Pattern p = Pattern.compile("1(\\s\\S+?){1,3}?[\\s,]2");
1269 check(p, "1 word word word 2", true);
1270 check(p, "1 wor wo w 2", true);
1271 check(p, "1 word word 2", true);
1272 check(p, "1 word 2", true);
1273 check(p, "1 wo w w 2", true);
1274 check(p, "1 wo w 2", true);
1275 check(p, "1 wor w 2", true);
1276
1277 p = Pattern.compile("([a-z])+?c");
1278 Matcher m = p.matcher("ababcdefdec");
1279 check(m, "ababc");
1280
1281 // Supplementary character test
1282 p = Pattern.compile(toSupplementaries("([a-z])+?c"));
1283 m = p.matcher(toSupplementaries("ababcdefdec"));
1284 check(m, toSupplementaries("ababc"));
1285
1286 report("Reluctant Repetition");
1287 }
1288
1289 private static void serializeTest() throws Exception {
1290 String patternStr = "(b)";
1291 String matchStr = "b";
1292 Pattern pattern = Pattern.compile(patternStr);
1293 ByteArrayOutputStream baos = new ByteArrayOutputStream();
1294 ObjectOutputStream oos = new ObjectOutputStream(baos);
1295 oos.writeObject(pattern);
1296 oos.close();
1297 ObjectInputStream ois = new ObjectInputStream(
1298 new ByteArrayInputStream(baos.toByteArray()));
1299 Pattern serializedPattern = (Pattern)ois.readObject();
1300 ois.close();
1301 Matcher matcher = serializedPattern.matcher(matchStr);
1302 if (!matcher.matches())
1303 failCount++;
1304 if (matcher.groupCount() != 1)
1305 failCount++;
1306
1307 report("Serialization");
1308 }
1309
1310 private static void gTest() {
1311 Pattern pattern = Pattern.compile("\\G\\w");
1312 Matcher matcher = pattern.matcher("abc#x#x");
1313 matcher.find();
1314 matcher.find();
1315 matcher.find();
1316 if (matcher.find())
1317 failCount++;
1318
1319 pattern = Pattern.compile("\\GA*");
1320 matcher = pattern.matcher("1A2AA3");
1321 matcher.find();
1322 if (matcher.find())
1323 failCount++;
1324
1325 pattern = Pattern.compile("\\GA*");
1326 matcher = pattern.matcher("1A2AA3");
1327 if (!matcher.find(1))
1328 failCount++;
1329 matcher.find();
1330 if (matcher.find())
1331 failCount++;
1332
1333 report("\\G");
1334 }
1335
1336 private static void zTest() {
1337 Pattern pattern = Pattern.compile("foo\\Z");
1338 // Positives
1339 check(pattern, "foo\u0085", true);
1340 check(pattern, "foo\u2028", true);
1341 check(pattern, "foo\u2029", true);
1342 check(pattern, "foo\n", true);
1343 check(pattern, "foo\r", true);
1344 check(pattern, "foo\r\n", true);
1345 // Negatives
1346 check(pattern, "fooo", false);
1347 check(pattern, "foo\n\r", false);
1348
1349 pattern = Pattern.compile("foo\\Z", Pattern.UNIX_LINES);
1350 // Positives
1351 check(pattern, "foo", true);
1352 check(pattern, "foo\n", true);
1353 // Negatives
1354 check(pattern, "foo\r", false);
1355 check(pattern, "foo\u0085", false);
1356 check(pattern, "foo\u2028", false);
1357 check(pattern, "foo\u2029", false);
1358
1359 report("\\Z");
1360 }
1361
1362 private static void replaceFirstTest() {
1363 Pattern pattern = Pattern.compile("(ab)(c*)");
1364 Matcher matcher = pattern.matcher("abccczzzabcczzzabccc");
1365 if (!matcher.replaceFirst("test").equals("testzzzabcczzzabccc"))
1366 failCount++;
1367
1368 matcher.reset("zzzabccczzzabcczzzabccczzz");
1369 if (!matcher.replaceFirst("test").equals("zzztestzzzabcczzzabccczzz"))
1370 failCount++;
1371
1372 matcher.reset("zzzabccczzzabcczzzabccczzz");
1373 String result = matcher.replaceFirst("$1");
1374 if (!result.equals("zzzabzzzabcczzzabccczzz"))
1375 failCount++;
1376
1377 matcher.reset("zzzabccczzzabcczzzabccczzz");
1378 result = matcher.replaceFirst("$2");
1379 if (!result.equals("zzzccczzzabcczzzabccczzz"))
1380 failCount++;
1381
1382 pattern = Pattern.compile("a*");
1383 matcher = pattern.matcher("aaaaaaaaaa");
1384 if (!matcher.replaceFirst("test").equals("test"))
1385 failCount++;
1386
1387 pattern = Pattern.compile("a+");
1388 matcher = pattern.matcher("zzzaaaaaaaaaa");
1389 if (!matcher.replaceFirst("test").equals("zzztest"))
1390 failCount++;
1391
1392 // Supplementary character test
1393 pattern = Pattern.compile(toSupplementaries("(ab)(c*)"));
1394 matcher = pattern.matcher(toSupplementaries("abccczzzabcczzzabccc"));
1395 if (!matcher.replaceFirst(toSupplementaries("test"))
1396 .equals(toSupplementaries("testzzzabcczzzabccc")))
1397 failCount++;
1398
1399 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
1400 if (!matcher.replaceFirst(toSupplementaries("test")).
1401 equals(toSupplementaries("zzztestzzzabcczzzabccczzz")))
1402 failCount++;
1403
1404 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
1405 result = matcher.replaceFirst("$1");
1406 if (!result.equals(toSupplementaries("zzzabzzzabcczzzabccczzz")))
1407 failCount++;
1408
1409 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
1410 result = matcher.replaceFirst("$2");
1411 if (!result.equals(toSupplementaries("zzzccczzzabcczzzabccczzz")))
1412 failCount++;
1413
1414 pattern = Pattern.compile(toSupplementaries("a*"));
1415 matcher = pattern.matcher(toSupplementaries("aaaaaaaaaa"));
1416 if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("test")))
1417 failCount++;
1418
1419 pattern = Pattern.compile(toSupplementaries("a+"));
1420 matcher = pattern.matcher(toSupplementaries("zzzaaaaaaaaaa"));
1421 if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("zzztest")))
1422 failCount++;
1423
1424 report("Replace First");
1425 }
1426
1427 private static void unixLinesTest() {
1428 Pattern pattern = Pattern.compile(".*");
1429 Matcher matcher = pattern.matcher("aa\u2028blah");
1430 matcher.find();
1431 if (!matcher.group(0).equals("aa"))
1432 failCount++;
1433
1434 pattern = Pattern.compile(".*", Pattern.UNIX_LINES);
1435 matcher = pattern.matcher("aa\u2028blah");
1436 matcher.find();
1437 if (!matcher.group(0).equals("aa\u2028blah"))
1438 failCount++;
1439
1440 pattern = Pattern.compile("[az]$",
1441 Pattern.MULTILINE | Pattern.UNIX_LINES);
1442 matcher = pattern.matcher("aa\u2028zz");
1443 check(matcher, "a\u2028", false);
1444
1445 // Supplementary character test
1446 pattern = Pattern.compile(".*");
1447 matcher = pattern.matcher(toSupplementaries("aa\u2028blah"));
1448 matcher.find();
1449 if (!matcher.group(0).equals(toSupplementaries("aa")))
1450 failCount++;
1451
1452 pattern = Pattern.compile(".*", Pattern.UNIX_LINES);
1453 matcher = pattern.matcher(toSupplementaries("aa\u2028blah"));
1454 matcher.find();
1455 if (!matcher.group(0).equals(toSupplementaries("aa\u2028blah")))
1456 failCount++;
1457
1458 pattern = Pattern.compile(toSupplementaries("[az]$"),
1459 Pattern.MULTILINE | Pattern.UNIX_LINES);
1460 matcher = pattern.matcher(toSupplementaries("aa\u2028zz"));
1461 check(matcher, toSupplementaries("a\u2028"), false);
1462
1463 report("Unix Lines");
1464 }
1465
1466 private static void commentsTest() {
1467 int flags = Pattern.COMMENTS;
1468
1469 Pattern pattern = Pattern.compile("aa \\# aa", flags);
1470 Matcher matcher = pattern.matcher("aa#aa");
1471 if (!matcher.matches())
1472 failCount++;
1473
1474 pattern = Pattern.compile("aa # blah", flags);
1475 matcher = pattern.matcher("aa");
1476 if (!matcher.matches())
1477 failCount++;
1478
1479 pattern = Pattern.compile("aa blah", flags);
1480 matcher = pattern.matcher("aablah");
1481 if (!matcher.matches())
1482 failCount++;
1483
1484 pattern = Pattern.compile("aa # blah blech ", flags);
1485 matcher = pattern.matcher("aa");
1486 if (!matcher.matches())
1487 failCount++;
1488
1489 pattern = Pattern.compile("aa # blah\n ", flags);
1490 matcher = pattern.matcher("aa");
1491 if (!matcher.matches())
1492 failCount++;
1493
1494 pattern = Pattern.compile("aa # blah\nbc # blech", flags);
1495 matcher = pattern.matcher("aabc");
1496 if (!matcher.matches())
1497 failCount++;
1498
1499 pattern = Pattern.compile("aa # blah\nbc# blech", flags);
1500 matcher = pattern.matcher("aabc");
1501 if (!matcher.matches())
1502 failCount++;
1503
1504 pattern = Pattern.compile("aa # blah\nbc\\# blech", flags);
1505 matcher = pattern.matcher("aabc#blech");
1506 if (!matcher.matches())
1507 failCount++;
1508
1509 // Supplementary character test
1510 pattern = Pattern.compile(toSupplementaries("aa \\# aa"), flags);
1511 matcher = pattern.matcher(toSupplementaries("aa#aa"));
1512 if (!matcher.matches())
1513 failCount++;
1514
1515 pattern = Pattern.compile(toSupplementaries("aa # blah"), flags);
1516 matcher = pattern.matcher(toSupplementaries("aa"));
1517 if (!matcher.matches())
1518 failCount++;
1519
1520 pattern = Pattern.compile(toSupplementaries("aa blah"), flags);
1521 matcher = pattern.matcher(toSupplementaries("aablah"));
1522 if (!matcher.matches())
1523 failCount++;
1524
1525 pattern = Pattern.compile(toSupplementaries("aa # blah blech "), flags);
1526 matcher = pattern.matcher(toSupplementaries("aa"));
1527 if (!matcher.matches())
1528 failCount++;
1529
1530 pattern = Pattern.compile(toSupplementaries("aa # blah\n "), flags);
1531 matcher = pattern.matcher(toSupplementaries("aa"));
1532 if (!matcher.matches())
1533 failCount++;
1534
1535 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc # blech"), flags);
1536 matcher = pattern.matcher(toSupplementaries("aabc"));
1537 if (!matcher.matches())
1538 failCount++;
1539
1540 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc# blech"), flags);
1541 matcher = pattern.matcher(toSupplementaries("aabc"));
1542 if (!matcher.matches())
1543 failCount++;
1544
1545 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc\\# blech"), flags);
1546 matcher = pattern.matcher(toSupplementaries("aabc#blech"));
1547 if (!matcher.matches())
1548 failCount++;
1549
1550 report("Comments");
1551 }
1552
1553 private static void caseFoldingTest() { // bug 4504687
1554 int flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE;
1555 Pattern pattern = Pattern.compile("aa", flags);
1556 Matcher matcher = pattern.matcher("ab");
1557 if (matcher.matches())
1558 failCount++;
1559
1560 pattern = Pattern.compile("aA", flags);
1561 matcher = pattern.matcher("ab");
1562 if (matcher.matches())
1563 failCount++;
1564
1565 pattern = Pattern.compile("aa", flags);
1566 matcher = pattern.matcher("aB");
1567 if (matcher.matches())
1568 failCount++;
1569 matcher = pattern.matcher("Ab");
1570 if (matcher.matches())
1571 failCount++;
1572
1573 // ASCII "a"
1574 // Latin-1 Supplement "a" + grave
1575 // Cyrillic "a"
1576 String[] patterns = new String[] {
1577 //single
1578 "a", "\u00e0", "\u0430",
1579 //slice
1580 "ab", "\u00e0\u00e1", "\u0430\u0431",
1581 //class single
1582 "[a]", "[\u00e0]", "[\u0430]",
1583 //class range
1584 "[a-b]", "[\u00e0-\u00e5]", "[\u0430-\u0431]",
1585 //back reference
1586 "(a)\\1", "(\u00e0)\\1", "(\u0430)\\1"
1587 };
1588
1589 String[] texts = new String[] {
1590 "A", "\u00c0", "\u0410",
1591 "AB", "\u00c0\u00c1", "\u0410\u0411",
1592 "A", "\u00c0", "\u0410",
1593 "B", "\u00c2", "\u0411",
1594 "aA", "\u00e0\u00c0", "\u0430\u0410"
1595 };
1596
1597 boolean[] expected = new boolean[] {
1598 true, false, false,
1599 true, false, false,
1600 true, false, false,
1601 true, false, false,
1602 true, false, false
1603 };
1604
1605 flags = Pattern.CASE_INSENSITIVE;
1606 for (int i = 0; i < patterns.length; i++) {
1607 pattern = Pattern.compile(patterns[i], flags);
1608 matcher = pattern.matcher(texts[i]);
1609 if (matcher.matches() != expected[i]) {
1610 System.out.println("<1> Failed at " + i);
1611 failCount++;
1612 }
1613 }
1614
1615 flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE;
1616 for (int i = 0; i < patterns.length; i++) {
1617 pattern = Pattern.compile(patterns[i], flags);
1618 matcher = pattern.matcher(texts[i]);
1619 if (!matcher.matches()) {
1620 System.out.println("<2> Failed at " + i);
1621 failCount++;
1622 }
1623 }
1624 // flag unicode_case alone should do nothing
1625 flags = Pattern.UNICODE_CASE;
1626 for (int i = 0; i < patterns.length; i++) {
1627 pattern = Pattern.compile(patterns[i], flags);
1628 matcher = pattern.matcher(texts[i]);
1629 if (matcher.matches()) {
1630 System.out.println("<3> Failed at " + i);
1631 failCount++;
1632 }
1633 }
1634
1635 // Special cases: i, I, u+0131 and u+0130
1636 flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE;
1637 pattern = Pattern.compile("[h-j]+", flags);
1638 if (!pattern.matcher("\u0131\u0130").matches())
1639 failCount++;
1640 report("Case Folding");
1641 }
1642
1643 private static void appendTest() {
1644 Pattern pattern = Pattern.compile("(ab)(cd)");
1645 Matcher matcher = pattern.matcher("abcd");
1646 String result = matcher.replaceAll("$2$1");
1647 if (!result.equals("cdab"))
1648 failCount++;
1649
1650 String s1 = "Swap all: first = 123, second = 456";
1651 String s2 = "Swap one: first = 123, second = 456";
1652 String r = "$3$2$1";
1653 pattern = Pattern.compile("([a-z]+)( *= *)([0-9]+)");
1654 matcher = pattern.matcher(s1);
1655
1656 result = matcher.replaceAll(r);
1657 if (!result.equals("Swap all: 123 = first, 456 = second"))
1658 failCount++;
1659
1660 matcher = pattern.matcher(s2);
1661
1662 if (matcher.find()) {
1663 StringBuffer sb = new StringBuffer();
1664 matcher.appendReplacement(sb, r);
1665 matcher.appendTail(sb);
1666 result = sb.toString();
1667 if (!result.equals("Swap one: 123 = first, second = 456"))
1668 failCount++;
1669 }
1670
1671 // Supplementary character test
1672 pattern = Pattern.compile(toSupplementaries("(ab)(cd)"));
1673 matcher = pattern.matcher(toSupplementaries("abcd"));
1674 result = matcher.replaceAll("$2$1");
1675 if (!result.equals(toSupplementaries("cdab")))
1676 failCount++;
1677
1678 s1 = toSupplementaries("Swap all: first = 123, second = 456");
1679 s2 = toSupplementaries("Swap one: first = 123, second = 456");
1680 r = toSupplementaries("$3$2$1");
1681 pattern = Pattern.compile(toSupplementaries("([a-z]+)( *= *)([0-9]+)"));
1682 matcher = pattern.matcher(s1);
1683
1684 result = matcher.replaceAll(r);
1685 if (!result.equals(toSupplementaries("Swap all: 123 = first, 456 = second")))
1686 failCount++;
1687
1688 matcher = pattern.matcher(s2);
1689
1690 if (matcher.find()) {
1691 StringBuffer sb = new StringBuffer();
1692 matcher.appendReplacement(sb, r);
1693 matcher.appendTail(sb);
1694 result = sb.toString();
1695 if (!result.equals(toSupplementaries("Swap one: 123 = first, second = 456")))
1696 failCount++;
1697 }
1698 report("Append");
1699 }
1700
1701 private static void splitTest() {
1702 Pattern pattern = Pattern.compile(":");
1703 String[] result = pattern.split("foo:and:boo", 2);
1704 if (!result[0].equals("foo"))
1705 failCount++;
1706 if (!result[1].equals("and:boo"))
1707 failCount++;
1708 // Supplementary character test
1709 Pattern patternX = Pattern.compile(toSupplementaries("X"));
1710 result = patternX.split(toSupplementaries("fooXandXboo"), 2);
1711 if (!result[0].equals(toSupplementaries("foo")))
1712 failCount++;
1713 if (!result[1].equals(toSupplementaries("andXboo")))
1714 failCount++;
1715
1716 CharBuffer cb = CharBuffer.allocate(100);
1717 cb.put("foo:and:boo");
1718 cb.flip();
1719 result = pattern.split(cb);
1720 if (!result[0].equals("foo"))
1721 failCount++;
1722 if (!result[1].equals("and"))
1723 failCount++;
1724 if (!result[2].equals("boo"))
1725 failCount++;
1726
1727 // Supplementary character test
1728 CharBuffer cbs = CharBuffer.allocate(100);
1729 cbs.put(toSupplementaries("fooXandXboo"));
1730 cbs.flip();
1731 result = patternX.split(cbs);
1732 if (!result[0].equals(toSupplementaries("foo")))
1733 failCount++;
1734 if (!result[1].equals(toSupplementaries("and")))
1735 failCount++;
1736 if (!result[2].equals(toSupplementaries("boo")))
1737 failCount++;
1738
1739 String source = "0123456789";
1740 for (int limit=-2; limit<3; limit++) {
1741 for (int x=0; x<10; x++) {
1742 result = source.split(Integer.toString(x), limit);
1743 int expectedLength = limit < 1 ? 2 : limit;
1744
1745 if ((limit == 0) && (x == 9)) {
1746 // expected dropping of ""
1747 if (result.length != 1)
1748 failCount++;
1749 if (!result[0].equals("012345678")) {
1750 failCount++;
1751 }
1752 } else {
1753 if (result.length != expectedLength) {
1754 failCount++;
1755 }
1756 if (!result[0].equals(source.substring(0,x))) {
1757 if (limit != 1) {
1758 failCount++;
1759 } else {
1760 if (!result[0].equals(source.substring(0,10))) {
1761 failCount++;
1762 }
1763 }
1764 }
1765 if (expectedLength > 1) { // Check segment 2
1766 if (!result[1].equals(source.substring(x+1,10)))
1767 failCount++;
1768 }
1769 }
1770 }
1771 }
1772 // Check the case for no match found
1773 for (int limit=-2; limit<3; limit++) {
1774 result = source.split("e", limit);
1775 if (result.length != 1)
1776 failCount++;
1777 if (!result[0].equals(source))
1778 failCount++;
1779 }
1780 // Check the case for limit == 0, source = "";
sherman1242a6d2013-11-13 11:26:01 -08001781 // split() now returns 0-length for empty source "" see #6559590
sherman0b4d42d2009-02-23 21:06:15 -08001782 source = "";
1783 result = source.split("e", 0);
sherman12888112013-11-13 22:22:28 -08001784 if (result.length != 1)
1785 failCount++;
1786 if (!result[0].equals(source))
sherman0b4d42d2009-02-23 21:06:15 -08001787 failCount++;
1788
sherman1242a6d2013-11-13 11:26:01 -08001789 // Check both split() and splitAsStraem(), especially for zero-lenth
1790 // input and zero-lenth match cases
1791 String[][] input = new String[][] {
1792 { " ", "Abc Efg Hij" }, // normal non-zero-match
1793 { " ", " Abc Efg Hij" }, // leading empty str for non-zero-match
1794 { " ", "Abc Efg Hij" }, // non-zero-match in the middle
1795 { "(?=\\p{Lu})", "AbcEfgHij" }, // no leading empty str for zero-match
1796 { "(?=\\p{Lu})", "AbcEfg" },
1797 { "(?=\\p{Lu})", "Abc" },
1798 { " ", "" }, // zero-length input
1799 { ".*", "" },
1800
1801 // some tests from PatternStreamTest.java
1802 { "4", "awgqwefg1fefw4vssv1vvv1" },
1803 { "\u00a3a", "afbfq\u00a3abgwgb\u00a3awngnwggw\u00a3a\u00a3ahjrnhneerh" },
1804 { "1", "awgqwefg1fefw4vssv1vvv1" },
1805 { "1", "a\u4ebafg1fefw\u4eba4\u9f9cvssv\u9f9c1v\u672c\u672cvv" },
1806 { "\u56da", "1\u56da23\u56da456\u56da7890" },
1807 { "\u56da", "1\u56da23\u9f9c\u672c\u672c\u56da456\u56da\u9f9c\u672c7890" },
1808 { "\u56da", "" },
1809 { "[ \t,:.]","This is,testing: with\tdifferent separators." }, //multiple septs
1810 { "o", "boo:and:foo" },
1811 { "o", "booooo:and:fooooo" },
1812 { "o", "fooooo:" },
1813 };
1814
1815 String[][] expected = new String[][] {
1816 { "Abc", "Efg", "Hij" },
1817 { "", "Abc", "Efg", "Hij" },
1818 { "Abc", "", "Efg", "Hij" },
1819 { "Abc", "Efg", "Hij" },
1820 { "Abc", "Efg" },
1821 { "Abc" },
sherman12888112013-11-13 22:22:28 -08001822 { "" },
1823 { "" },
sherman1242a6d2013-11-13 11:26:01 -08001824
1825 { "awgqwefg1fefw", "vssv1vvv1" },
1826 { "afbfq", "bgwgb", "wngnwggw", "", "hjrnhneerh" },
1827 { "awgqwefg", "fefw4vssv", "vvv" },
1828 { "a\u4ebafg", "fefw\u4eba4\u9f9cvssv\u9f9c", "v\u672c\u672cvv" },
1829 { "1", "23", "456", "7890" },
1830 { "1", "23\u9f9c\u672c\u672c", "456", "\u9f9c\u672c7890" },
sherman12888112013-11-13 22:22:28 -08001831 { "" },
sherman1242a6d2013-11-13 11:26:01 -08001832 { "This", "is", "testing", "", "with", "different", "separators" },
1833 { "b", "", ":and:f" },
1834 { "b", "", "", "", "", ":and:f" },
1835 { "f", "", "", "", "", ":" },
1836 };
1837 for (int i = 0; i < input.length; i++) {
1838 pattern = Pattern.compile(input[i][0]);
sherman12888112013-11-13 22:22:28 -08001839 if (!Arrays.equals(pattern.split(input[i][1]), expected[i])) {
sherman1242a6d2013-11-13 11:26:01 -08001840 failCount++;
sherman12888112013-11-13 22:22:28 -08001841 }
1842 if (input[i][1].length() > 0 && // splitAsStream() return empty resulting
1843 // array for zero-length input for now
1844 !Arrays.equals(pattern.splitAsStream(input[i][1]).toArray(),
1845 expected[i])) {
sherman1242a6d2013-11-13 11:26:01 -08001846 failCount++;
sherman12888112013-11-13 22:22:28 -08001847 }
sherman1242a6d2013-11-13 11:26:01 -08001848 }
sherman0b4d42d2009-02-23 21:06:15 -08001849 report("Split");
1850 }
1851
1852 private static void negationTest() {
1853 Pattern pattern = Pattern.compile("[\\[@^]+");
1854 Matcher matcher = pattern.matcher("@@@@[[[[^^^^");
1855 if (!matcher.find())
1856 failCount++;
1857 if (!matcher.group(0).equals("@@@@[[[[^^^^"))
1858 failCount++;
1859 pattern = Pattern.compile("[@\\[^]+");
1860 matcher = pattern.matcher("@@@@[[[[^^^^");
1861 if (!matcher.find())
1862 failCount++;
1863 if (!matcher.group(0).equals("@@@@[[[[^^^^"))
1864 failCount++;
1865 pattern = Pattern.compile("[@\\[^@]+");
1866 matcher = pattern.matcher("@@@@[[[[^^^^");
1867 if (!matcher.find())
1868 failCount++;
1869 if (!matcher.group(0).equals("@@@@[[[[^^^^"))
1870 failCount++;
1871
1872 pattern = Pattern.compile("\\)");
1873 matcher = pattern.matcher("xxx)xxx");
1874 if (!matcher.find())
1875 failCount++;
1876
1877 report("Negation");
1878 }
1879
1880 private static void ampersandTest() {
1881 Pattern pattern = Pattern.compile("[&@]+");
1882 check(pattern, "@@@@&&&&", true);
1883
1884 pattern = Pattern.compile("[@&]+");
1885 check(pattern, "@@@@&&&&", true);
1886
1887 pattern = Pattern.compile("[@\\&]+");
1888 check(pattern, "@@@@&&&&", true);
1889
1890 report("Ampersand");
1891 }
1892
1893 private static void octalTest() throws Exception {
1894 Pattern pattern = Pattern.compile("\\u0007");
1895 Matcher matcher = pattern.matcher("\u0007");
1896 if (!matcher.matches())
1897 failCount++;
1898 pattern = Pattern.compile("\\07");
1899 matcher = pattern.matcher("\u0007");
1900 if (!matcher.matches())
1901 failCount++;
1902 pattern = Pattern.compile("\\007");
1903 matcher = pattern.matcher("\u0007");
1904 if (!matcher.matches())
1905 failCount++;
1906 pattern = Pattern.compile("\\0007");
1907 matcher = pattern.matcher("\u0007");
1908 if (!matcher.matches())
1909 failCount++;
1910 pattern = Pattern.compile("\\040");
1911 matcher = pattern.matcher("\u0020");
1912 if (!matcher.matches())
1913 failCount++;
1914 pattern = Pattern.compile("\\0403");
1915 matcher = pattern.matcher("\u00203");
1916 if (!matcher.matches())
1917 failCount++;
1918 pattern = Pattern.compile("\\0103");
1919 matcher = pattern.matcher("\u0043");
1920 if (!matcher.matches())
1921 failCount++;
1922
1923 report("Octal");
1924 }
1925
1926 private static void longPatternTest() throws Exception {
1927 try {
1928 Pattern pattern = Pattern.compile(
1929 "a 32-character-long pattern xxxx");
1930 pattern = Pattern.compile("a 33-character-long pattern xxxxx");
1931 pattern = Pattern.compile("a thirty four character long regex");
1932 StringBuffer patternToBe = new StringBuffer(101);
1933 for (int i=0; i<100; i++)
1934 patternToBe.append((char)(97 + i%26));
1935 pattern = Pattern.compile(patternToBe.toString());
1936 } catch (PatternSyntaxException e) {
1937 failCount++;
1938 }
1939
1940 // Supplementary character test
1941 try {
1942 Pattern pattern = Pattern.compile(
1943 toSupplementaries("a 32-character-long pattern xxxx"));
1944 pattern = Pattern.compile(toSupplementaries("a 33-character-long pattern xxxxx"));
1945 pattern = Pattern.compile(toSupplementaries("a thirty four character long regex"));
1946 StringBuffer patternToBe = new StringBuffer(101*2);
1947 for (int i=0; i<100; i++)
1948 patternToBe.append(Character.toChars(Character.MIN_SUPPLEMENTARY_CODE_POINT
1949 + 97 + i%26));
1950 pattern = Pattern.compile(patternToBe.toString());
1951 } catch (PatternSyntaxException e) {
1952 failCount++;
1953 }
1954 report("LongPattern");
1955 }
1956
1957 private static void group0Test() throws Exception {
1958 Pattern pattern = Pattern.compile("(tes)ting");
1959 Matcher matcher = pattern.matcher("testing");
1960 check(matcher, "testing");
1961
1962 matcher.reset("testing");
1963 if (matcher.lookingAt()) {
1964 if (!matcher.group(0).equals("testing"))
1965 failCount++;
1966 } else {
1967 failCount++;
1968 }
1969
1970 matcher.reset("testing");
1971 if (matcher.matches()) {
1972 if (!matcher.group(0).equals("testing"))
1973 failCount++;
1974 } else {
1975 failCount++;
1976 }
1977
1978 pattern = Pattern.compile("(tes)ting");
1979 matcher = pattern.matcher("testing");
1980 if (matcher.lookingAt()) {
1981 if (!matcher.group(0).equals("testing"))
1982 failCount++;
1983 } else {
1984 failCount++;
1985 }
1986
1987 pattern = Pattern.compile("^(tes)ting");
1988 matcher = pattern.matcher("testing");
1989 if (matcher.matches()) {
1990 if (!matcher.group(0).equals("testing"))
1991 failCount++;
1992 } else {
1993 failCount++;
1994 }
1995
1996 // Supplementary character test
1997 pattern = Pattern.compile(toSupplementaries("(tes)ting"));
1998 matcher = pattern.matcher(toSupplementaries("testing"));
1999 check(matcher, toSupplementaries("testing"));
2000
2001 matcher.reset(toSupplementaries("testing"));
2002 if (matcher.lookingAt()) {
2003 if (!matcher.group(0).equals(toSupplementaries("testing")))
2004 failCount++;
2005 } else {
2006 failCount++;
2007 }
2008
2009 matcher.reset(toSupplementaries("testing"));
2010 if (matcher.matches()) {
2011 if (!matcher.group(0).equals(toSupplementaries("testing")))
2012 failCount++;
2013 } else {
2014 failCount++;
2015 }
2016
2017 pattern = Pattern.compile(toSupplementaries("(tes)ting"));
2018 matcher = pattern.matcher(toSupplementaries("testing"));
2019 if (matcher.lookingAt()) {
2020 if (!matcher.group(0).equals(toSupplementaries("testing")))
2021 failCount++;
2022 } else {
2023 failCount++;
2024 }
2025
2026 pattern = Pattern.compile(toSupplementaries("^(tes)ting"));
2027 matcher = pattern.matcher(toSupplementaries("testing"));
2028 if (matcher.matches()) {
2029 if (!matcher.group(0).equals(toSupplementaries("testing")))
2030 failCount++;
2031 } else {
2032 failCount++;
2033 }
2034
2035 report("Group0");
2036 }
2037
2038 private static void findIntTest() throws Exception {
2039 Pattern p = Pattern.compile("blah");
2040 Matcher m = p.matcher("zzzzblahzzzzzblah");
2041 boolean result = m.find(2);
2042 if (!result)
2043 failCount++;
2044
2045 p = Pattern.compile("$");
2046 m = p.matcher("1234567890");
2047 result = m.find(10);
2048 if (!result)
2049 failCount++;
2050 try {
2051 result = m.find(11);
2052 failCount++;
2053 } catch (IndexOutOfBoundsException e) {
2054 // correct result
2055 }
2056
2057 // Supplementary character test
2058 p = Pattern.compile(toSupplementaries("blah"));
2059 m = p.matcher(toSupplementaries("zzzzblahzzzzzblah"));
2060 result = m.find(2);
2061 if (!result)
2062 failCount++;
2063
2064 report("FindInt");
2065 }
2066
2067 private static void emptyPatternTest() throws Exception {
2068 Pattern p = Pattern.compile("");
2069 Matcher m = p.matcher("foo");
2070
2071 // Should find empty pattern at beginning of input
2072 boolean result = m.find();
2073 if (result != true)
2074 failCount++;
2075 if (m.start() != 0)
2076 failCount++;
2077
2078 // Should not match entire input if input is not empty
2079 m.reset();
2080 result = m.matches();
2081 if (result == true)
2082 failCount++;
2083
2084 try {
2085 m.start(0);
2086 failCount++;
2087 } catch (IllegalStateException e) {
2088 // Correct result
2089 }
2090
2091 // Should match entire input if input is empty
2092 m.reset("");
2093 result = m.matches();
2094 if (result != true)
2095 failCount++;
2096
2097 result = Pattern.matches("", "");
2098 if (result != true)
2099 failCount++;
2100
2101 result = Pattern.matches("", "foo");
2102 if (result == true)
2103 failCount++;
2104 report("EmptyPattern");
2105 }
2106
2107 private static void charClassTest() throws Exception {
2108 Pattern pattern = Pattern.compile("blah[ab]]blech");
2109 check(pattern, "blahb]blech", true);
2110
2111 pattern = Pattern.compile("[abc[def]]");
2112 check(pattern, "b", true);
2113
2114 // Supplementary character tests
2115 pattern = Pattern.compile(toSupplementaries("blah[ab]]blech"));
2116 check(pattern, toSupplementaries("blahb]blech"), true);
2117
2118 pattern = Pattern.compile(toSupplementaries("[abc[def]]"));
2119 check(pattern, toSupplementaries("b"), true);
2120
2121 try {
2122 // u00ff when UNICODE_CASE
2123 pattern = Pattern.compile("[ab\u00ffcd]",
2124 Pattern.CASE_INSENSITIVE|
2125 Pattern.UNICODE_CASE);
2126 check(pattern, "ab\u00ffcd", true);
2127 check(pattern, "Ab\u0178Cd", true);
2128
2129 // u00b5 when UNICODE_CASE
2130 pattern = Pattern.compile("[ab\u00b5cd]",
2131 Pattern.CASE_INSENSITIVE|
2132 Pattern.UNICODE_CASE);
2133 check(pattern, "ab\u00b5cd", true);
2134 check(pattern, "Ab\u039cCd", true);
2135 } catch (Exception e) { failCount++; }
2136
2137 /* Special cases
2138 (1)LatinSmallLetterLongS u+017f
2139 (2)LatinSmallLetterDotlessI u+0131
2140 (3)LatineCapitalLetterIWithDotAbove u+0130
2141 (4)KelvinSign u+212a
2142 (5)AngstromSign u+212b
2143 */
2144 int flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE;
2145 pattern = Pattern.compile("[sik\u00c5]+", flags);
2146 if (!pattern.matcher("\u017f\u0130\u0131\u212a\u212b").matches())
2147 failCount++;
2148
2149 report("CharClass");
2150 }
2151
2152 private static void caretTest() throws Exception {
2153 Pattern pattern = Pattern.compile("\\w*");
2154 Matcher matcher = pattern.matcher("a#bc#def##g");
2155 check(matcher, "a");
2156 check(matcher, "");
2157 check(matcher, "bc");
2158 check(matcher, "");
2159 check(matcher, "def");
2160 check(matcher, "");
2161 check(matcher, "");
2162 check(matcher, "g");
2163 check(matcher, "");
2164 if (matcher.find())
2165 failCount++;
2166
2167 pattern = Pattern.compile("^\\w*");
2168 matcher = pattern.matcher("a#bc#def##g");
2169 check(matcher, "a");
2170 if (matcher.find())
2171 failCount++;
2172
2173 pattern = Pattern.compile("\\w");
2174 matcher = pattern.matcher("abc##x");
2175 check(matcher, "a");
2176 check(matcher, "b");
2177 check(matcher, "c");
2178 check(matcher, "x");
2179 if (matcher.find())
2180 failCount++;
2181
2182 pattern = Pattern.compile("^\\w");
2183 matcher = pattern.matcher("abc##x");
2184 check(matcher, "a");
2185 if (matcher.find())
2186 failCount++;
2187
2188 pattern = Pattern.compile("\\A\\p{Alpha}{3}");
2189 matcher = pattern.matcher("abcdef-ghi\njklmno");
2190 check(matcher, "abc");
2191 if (matcher.find())
2192 failCount++;
2193
2194 pattern = Pattern.compile("^\\p{Alpha}{3}", Pattern.MULTILINE);
2195 matcher = pattern.matcher("abcdef-ghi\njklmno");
2196 check(matcher, "abc");
2197 check(matcher, "jkl");
2198 if (matcher.find())
2199 failCount++;
2200
2201 pattern = Pattern.compile("^", Pattern.MULTILINE);
2202 matcher = pattern.matcher("this is some text");
2203 String result = matcher.replaceAll("X");
2204 if (!result.equals("Xthis is some text"))
2205 failCount++;
2206
2207 pattern = Pattern.compile("^");
2208 matcher = pattern.matcher("this is some text");
2209 result = matcher.replaceAll("X");
2210 if (!result.equals("Xthis is some text"))
2211 failCount++;
2212
2213 pattern = Pattern.compile("^", Pattern.MULTILINE | Pattern.UNIX_LINES);
2214 matcher = pattern.matcher("this is some text\n");
2215 result = matcher.replaceAll("X");
2216 if (!result.equals("Xthis is some text\n"))
2217 failCount++;
2218
2219 report("Caret");
2220 }
2221
2222 private static void groupCaptureTest() throws Exception {
2223 // Independent group
2224 Pattern pattern = Pattern.compile("x+(?>y+)z+");
2225 Matcher matcher = pattern.matcher("xxxyyyzzz");
2226 matcher.find();
2227 try {
2228 String blah = matcher.group(1);
2229 failCount++;
2230 } catch (IndexOutOfBoundsException ioobe) {
2231 // Good result
2232 }
2233 // Pure group
2234 pattern = Pattern.compile("x+(?:y+)z+");
2235 matcher = pattern.matcher("xxxyyyzzz");
2236 matcher.find();
2237 try {
2238 String blah = matcher.group(1);
2239 failCount++;
2240 } catch (IndexOutOfBoundsException ioobe) {
2241 // Good result
2242 }
2243
2244 // Supplementary character tests
2245 // Independent group
2246 pattern = Pattern.compile(toSupplementaries("x+(?>y+)z+"));
2247 matcher = pattern.matcher(toSupplementaries("xxxyyyzzz"));
2248 matcher.find();
2249 try {
2250 String blah = matcher.group(1);
2251 failCount++;
2252 } catch (IndexOutOfBoundsException ioobe) {
2253 // Good result
2254 }
2255 // Pure group
2256 pattern = Pattern.compile(toSupplementaries("x+(?:y+)z+"));
2257 matcher = pattern.matcher(toSupplementaries("xxxyyyzzz"));
2258 matcher.find();
2259 try {
2260 String blah = matcher.group(1);
2261 failCount++;
2262 } catch (IndexOutOfBoundsException ioobe) {
2263 // Good result
2264 }
2265
2266 report("GroupCapture");
2267 }
2268
2269 private static void backRefTest() throws Exception {
2270 Pattern pattern = Pattern.compile("(a*)bc\\1");
2271 check(pattern, "zzzaabcazzz", true);
2272
2273 pattern = Pattern.compile("(a*)bc\\1");
2274 check(pattern, "zzzaabcaazzz", true);
2275
2276 pattern = Pattern.compile("(abc)(def)\\1");
2277 check(pattern, "abcdefabc", true);
2278
2279 pattern = Pattern.compile("(abc)(def)\\3");
2280 check(pattern, "abcdefabc", false);
2281
2282 try {
2283 for (int i = 1; i < 10; i++) {
2284 // Make sure backref 1-9 are always accepted
2285 pattern = Pattern.compile("abcdef\\" + i);
2286 // and fail to match if the target group does not exit
2287 check(pattern, "abcdef", false);
2288 }
2289 } catch(PatternSyntaxException e) {
2290 failCount++;
2291 }
2292
2293 pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11");
2294 check(pattern, "abcdefghija", false);
2295 check(pattern, "abcdefghija1", true);
2296
2297 pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11");
2298 check(pattern, "abcdefghijkk", true);
2299
2300 pattern = Pattern.compile("(a)bcdefghij\\11");
2301 check(pattern, "abcdefghija1", true);
2302
2303 // Supplementary character tests
2304 pattern = Pattern.compile(toSupplementaries("(a*)bc\\1"));
2305 check(pattern, toSupplementaries("zzzaabcazzz"), true);
2306
2307 pattern = Pattern.compile(toSupplementaries("(a*)bc\\1"));
2308 check(pattern, toSupplementaries("zzzaabcaazzz"), true);
2309
2310 pattern = Pattern.compile(toSupplementaries("(abc)(def)\\1"));
2311 check(pattern, toSupplementaries("abcdefabc"), true);
2312
2313 pattern = Pattern.compile(toSupplementaries("(abc)(def)\\3"));
2314 check(pattern, toSupplementaries("abcdefabc"), false);
2315
2316 pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11"));
2317 check(pattern, toSupplementaries("abcdefghija"), false);
2318 check(pattern, toSupplementaries("abcdefghija1"), true);
2319
2320 pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11"));
2321 check(pattern, toSupplementaries("abcdefghijkk"), true);
2322
2323 report("BackRef");
2324 }
2325
2326 /**
2327 * Unicode Technical Report #18, section 2.6 End of Line
2328 * There is no empty line to be matched in the sequence \u000D\u000A
2329 * but there is an empty line in the sequence \u000A\u000D.
2330 */
2331 private static void anchorTest() throws Exception {
2332 Pattern p = Pattern.compile("^.*$", Pattern.MULTILINE);
2333 Matcher m = p.matcher("blah1\r\nblah2");
2334 m.find();
2335 m.find();
2336 if (!m.group().equals("blah2"))
2337 failCount++;
2338
2339 m.reset("blah1\n\rblah2");
2340 m.find();
2341 m.find();
2342 m.find();
2343 if (!m.group().equals("blah2"))
2344 failCount++;
2345
2346 // Test behavior of $ with \r\n at end of input
2347 p = Pattern.compile(".+$");
2348 m = p.matcher("blah1\r\n");
2349 if (!m.find())
2350 failCount++;
2351 if (!m.group().equals("blah1"))
2352 failCount++;
2353 if (m.find())
2354 failCount++;
2355
2356 // Test behavior of $ with \r\n at end of input in multiline
2357 p = Pattern.compile(".+$", Pattern.MULTILINE);
2358 m = p.matcher("blah1\r\n");
2359 if (!m.find())
2360 failCount++;
2361 if (m.find())
2362 failCount++;
2363
2364 // Test for $ recognition of \u0085 for bug 4527731
2365 p = Pattern.compile(".+$", Pattern.MULTILINE);
2366 m = p.matcher("blah1\u0085");
2367 if (!m.find())
2368 failCount++;
2369
2370 // Supplementary character test
2371 p = Pattern.compile("^.*$", Pattern.MULTILINE);
2372 m = p.matcher(toSupplementaries("blah1\r\nblah2"));
2373 m.find();
2374 m.find();
2375 if (!m.group().equals(toSupplementaries("blah2")))
2376 failCount++;
2377
2378 m.reset(toSupplementaries("blah1\n\rblah2"));
2379 m.find();
2380 m.find();
2381 m.find();
2382 if (!m.group().equals(toSupplementaries("blah2")))
2383 failCount++;
2384
2385 // Test behavior of $ with \r\n at end of input
2386 p = Pattern.compile(".+$");
2387 m = p.matcher(toSupplementaries("blah1\r\n"));
2388 if (!m.find())
2389 failCount++;
2390 if (!m.group().equals(toSupplementaries("blah1")))
2391 failCount++;
2392 if (m.find())
2393 failCount++;
2394
2395 // Test behavior of $ with \r\n at end of input in multiline
2396 p = Pattern.compile(".+$", Pattern.MULTILINE);
2397 m = p.matcher(toSupplementaries("blah1\r\n"));
2398 if (!m.find())
2399 failCount++;
2400 if (m.find())
2401 failCount++;
2402
2403 // Test for $ recognition of \u0085 for bug 4527731
2404 p = Pattern.compile(".+$", Pattern.MULTILINE);
2405 m = p.matcher(toSupplementaries("blah1\u0085"));
2406 if (!m.find())
2407 failCount++;
2408
2409 report("Anchors");
2410 }
2411
2412 /**
2413 * A basic sanity test of Matcher.lookingAt().
2414 */
2415 private static void lookingAtTest() throws Exception {
2416 Pattern p = Pattern.compile("(ab)(c*)");
2417 Matcher m = p.matcher("abccczzzabcczzzabccc");
2418
2419 if (!m.lookingAt())
2420 failCount++;
2421
2422 if (!m.group().equals(m.group(0)))
2423 failCount++;
2424
2425 m = p.matcher("zzzabccczzzabcczzzabccczzz");
2426 if (m.lookingAt())
2427 failCount++;
2428
2429 // Supplementary character test
2430 p = Pattern.compile(toSupplementaries("(ab)(c*)"));
2431 m = p.matcher(toSupplementaries("abccczzzabcczzzabccc"));
2432
2433 if (!m.lookingAt())
2434 failCount++;
2435
2436 if (!m.group().equals(m.group(0)))
2437 failCount++;
2438
2439 m = p.matcher(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
2440 if (m.lookingAt())
2441 failCount++;
2442
2443 report("Looking At");
2444 }
2445
2446 /**
2447 * A basic sanity test of Matcher.matches().
2448 */
2449 private static void matchesTest() throws Exception {
2450 // matches()
2451 Pattern p = Pattern.compile("ulb(c*)");
2452 Matcher m = p.matcher("ulbcccccc");
2453 if (!m.matches())
2454 failCount++;
2455
2456 // find() but not matches()
2457 m.reset("zzzulbcccccc");
2458 if (m.matches())
2459 failCount++;
2460
2461 // lookingAt() but not matches()
2462 m.reset("ulbccccccdef");
2463 if (m.matches())
2464 failCount++;
2465
2466 // matches()
2467 p = Pattern.compile("a|ad");
2468 m = p.matcher("ad");
2469 if (!m.matches())
2470 failCount++;
2471
2472 // Supplementary character test
2473 // matches()
2474 p = Pattern.compile(toSupplementaries("ulb(c*)"));
2475 m = p.matcher(toSupplementaries("ulbcccccc"));
2476 if (!m.matches())
2477 failCount++;
2478
2479 // find() but not matches()
2480 m.reset(toSupplementaries("zzzulbcccccc"));
2481 if (m.matches())
2482 failCount++;
2483
2484 // lookingAt() but not matches()
2485 m.reset(toSupplementaries("ulbccccccdef"));
2486 if (m.matches())
2487 failCount++;
2488
2489 // matches()
2490 p = Pattern.compile(toSupplementaries("a|ad"));
2491 m = p.matcher(toSupplementaries("ad"));
2492 if (!m.matches())
2493 failCount++;
2494
2495 report("Matches");
2496 }
2497
2498 /**
2499 * A basic sanity test of Pattern.matches().
2500 */
2501 private static void patternMatchesTest() throws Exception {
2502 // matches()
2503 if (!Pattern.matches(toSupplementaries("ulb(c*)"),
2504 toSupplementaries("ulbcccccc")))
2505 failCount++;
2506
2507 // find() but not matches()
2508 if (Pattern.matches(toSupplementaries("ulb(c*)"),
2509 toSupplementaries("zzzulbcccccc")))
2510 failCount++;
2511
2512 // lookingAt() but not matches()
2513 if (Pattern.matches(toSupplementaries("ulb(c*)"),
2514 toSupplementaries("ulbccccccdef")))
2515 failCount++;
2516
2517 // Supplementary character test
2518 // matches()
2519 if (!Pattern.matches(toSupplementaries("ulb(c*)"),
2520 toSupplementaries("ulbcccccc")))
2521 failCount++;
2522
2523 // find() but not matches()
2524 if (Pattern.matches(toSupplementaries("ulb(c*)"),
2525 toSupplementaries("zzzulbcccccc")))
2526 failCount++;
2527
2528 // lookingAt() but not matches()
2529 if (Pattern.matches(toSupplementaries("ulb(c*)"),
2530 toSupplementaries("ulbccccccdef")))
2531 failCount++;
2532
2533 report("Pattern Matches");
2534 }
2535
2536 /**
2537 * Canonical equivalence testing. Tests the ability of the engine
2538 * to match sequences that are not explicitly specified in the
2539 * pattern when they are considered equivalent by the Unicode Standard.
2540 */
2541 private static void ceTest() throws Exception {
2542 // Decomposed char outside char classes
2543 Pattern p = Pattern.compile("testa\u030a", Pattern.CANON_EQ);
2544 Matcher m = p.matcher("test\u00e5");
2545 if (!m.matches())
2546 failCount++;
2547
2548 m.reset("testa\u030a");
2549 if (!m.matches())
2550 failCount++;
2551
2552 // Composed char outside char classes
2553 p = Pattern.compile("test\u00e5", Pattern.CANON_EQ);
2554 m = p.matcher("test\u00e5");
2555 if (!m.matches())
2556 failCount++;
2557
2558 m.reset("testa\u030a");
2559 if (!m.find())
2560 failCount++;
2561
2562 // Decomposed char inside a char class
2563 p = Pattern.compile("test[abca\u030a]", Pattern.CANON_EQ);
2564 m = p.matcher("test\u00e5");
2565 if (!m.find())
2566 failCount++;
2567
2568 m.reset("testa\u030a");
2569 if (!m.find())
2570 failCount++;
2571
2572 // Composed char inside a char class
2573 p = Pattern.compile("test[abc\u00e5def\u00e0]", Pattern.CANON_EQ);
2574 m = p.matcher("test\u00e5");
2575 if (!m.find())
2576 failCount++;
2577
2578 m.reset("testa\u0300");
2579 if (!m.find())
2580 failCount++;
2581
2582 m.reset("testa\u030a");
2583 if (!m.find())
2584 failCount++;
2585
2586 // Marks that cannot legally change order and be equivalent
2587 p = Pattern.compile("testa\u0308\u0300", Pattern.CANON_EQ);
2588 check(p, "testa\u0308\u0300", true);
2589 check(p, "testa\u0300\u0308", false);
2590
2591 // Marks that can legally change order and be equivalent
2592 p = Pattern.compile("testa\u0308\u0323", Pattern.CANON_EQ);
2593 check(p, "testa\u0308\u0323", true);
2594 check(p, "testa\u0323\u0308", true);
2595
2596 // Test all equivalences of the sequence a\u0308\u0323\u0300
2597 p = Pattern.compile("testa\u0308\u0323\u0300", Pattern.CANON_EQ);
2598 check(p, "testa\u0308\u0323\u0300", true);
2599 check(p, "testa\u0323\u0308\u0300", true);
2600 check(p, "testa\u0308\u0300\u0323", true);
2601 check(p, "test\u00e4\u0323\u0300", true);
2602 check(p, "test\u00e4\u0300\u0323", true);
2603
2604 /*
2605 * The following canonical equivalence tests don't work. Bug id: 4916384.
2606 *
2607 // Decomposed hangul (jamos)
2608 p = Pattern.compile("\u1100\u1161", Pattern.CANON_EQ);
2609 m = p.matcher("\u1100\u1161");
2610 if (!m.matches())
2611 failCount++;
2612
2613 m.reset("\uac00");
2614 if (!m.matches())
2615 failCount++;
2616
2617 // Composed hangul
2618 p = Pattern.compile("\uac00", Pattern.CANON_EQ);
2619 m = p.matcher("\u1100\u1161");
2620 if (!m.matches())
2621 failCount++;
2622
2623 m.reset("\uac00");
2624 if (!m.matches())
2625 failCount++;
2626
2627 // Decomposed supplementary outside char classes
2628 p = Pattern.compile("test\ud834\uddbc\ud834\udd6f", Pattern.CANON_EQ);
2629 m = p.matcher("test\ud834\uddc0");
2630 if (!m.matches())
2631 failCount++;
2632
2633 m.reset("test\ud834\uddbc\ud834\udd6f");
2634 if (!m.matches())
2635 failCount++;
2636
2637 // Composed supplementary outside char classes
2638 p = Pattern.compile("test\ud834\uddc0", Pattern.CANON_EQ);
2639 m.reset("test\ud834\uddbc\ud834\udd6f");
2640 if (!m.matches())
2641 failCount++;
2642
2643 m = p.matcher("test\ud834\uddc0");
2644 if (!m.matches())
2645 failCount++;
2646
2647 */
2648
2649 report("Canonical Equivalence");
2650 }
2651
2652 /**
2653 * A basic sanity test of Matcher.replaceAll().
2654 */
2655 private static void globalSubstitute() throws Exception {
2656 // Global substitution with a literal
2657 Pattern p = Pattern.compile("(ab)(c*)");
2658 Matcher m = p.matcher("abccczzzabcczzzabccc");
2659 if (!m.replaceAll("test").equals("testzzztestzzztest"))
2660 failCount++;
2661
2662 m.reset("zzzabccczzzabcczzzabccczzz");
2663 if (!m.replaceAll("test").equals("zzztestzzztestzzztestzzz"))
2664 failCount++;
2665
2666 // Global substitution with groups
2667 m.reset("zzzabccczzzabcczzzabccczzz");
2668 String result = m.replaceAll("$1");
2669 if (!result.equals("zzzabzzzabzzzabzzz"))
2670 failCount++;
2671
2672 // Supplementary character test
2673 // Global substitution with a literal
2674 p = Pattern.compile(toSupplementaries("(ab)(c*)"));
2675 m = p.matcher(toSupplementaries("abccczzzabcczzzabccc"));
2676 if (!m.replaceAll(toSupplementaries("test")).
2677 equals(toSupplementaries("testzzztestzzztest")))
2678 failCount++;
2679
2680 m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
2681 if (!m.replaceAll(toSupplementaries("test")).
2682 equals(toSupplementaries("zzztestzzztestzzztestzzz")))
2683 failCount++;
2684
2685 // Global substitution with groups
2686 m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
2687 result = m.replaceAll("$1");
2688 if (!result.equals(toSupplementaries("zzzabzzzabzzzabzzz")))
2689 failCount++;
2690
2691 report("Global Substitution");
2692 }
2693
2694 /**
2695 * Tests the usage of Matcher.appendReplacement() with literal
2696 * and group substitutions.
2697 */
2698 private static void stringbufferSubstitute() throws Exception {
2699 // SB substitution with literal
2700 String blah = "zzzblahzzz";
2701 Pattern p = Pattern.compile("blah");
2702 Matcher m = p.matcher(blah);
2703 StringBuffer result = new StringBuffer();
2704 try {
2705 m.appendReplacement(result, "blech");
2706 failCount++;
2707 } catch (IllegalStateException e) {
2708 }
2709 m.find();
2710 m.appendReplacement(result, "blech");
2711 if (!result.toString().equals("zzzblech"))
2712 failCount++;
2713
2714 m.appendTail(result);
2715 if (!result.toString().equals("zzzblechzzz"))
2716 failCount++;
2717
2718 // SB substitution with groups
2719 blah = "zzzabcdzzz";
2720 p = Pattern.compile("(ab)(cd)*");
2721 m = p.matcher(blah);
2722 result = new StringBuffer();
2723 try {
2724 m.appendReplacement(result, "$1");
2725 failCount++;
2726 } catch (IllegalStateException e) {
2727 }
2728 m.find();
2729 m.appendReplacement(result, "$1");
2730 if (!result.toString().equals("zzzab"))
2731 failCount++;
2732
2733 m.appendTail(result);
2734 if (!result.toString().equals("zzzabzzz"))
2735 failCount++;
2736
2737 // SB substitution with 3 groups
2738 blah = "zzzabcdcdefzzz";
2739 p = Pattern.compile("(ab)(cd)*(ef)");
2740 m = p.matcher(blah);
2741 result = new StringBuffer();
2742 try {
2743 m.appendReplacement(result, "$1w$2w$3");
2744 failCount++;
2745 } catch (IllegalStateException e) {
2746 }
2747 m.find();
2748 m.appendReplacement(result, "$1w$2w$3");
2749 if (!result.toString().equals("zzzabwcdwef"))
2750 failCount++;
2751
2752 m.appendTail(result);
2753 if (!result.toString().equals("zzzabwcdwefzzz"))
2754 failCount++;
2755
2756 // SB substitution with groups and three matches
2757 // skipping middle match
2758 blah = "zzzabcdzzzabcddzzzabcdzzz";
2759 p = Pattern.compile("(ab)(cd*)");
2760 m = p.matcher(blah);
2761 result = new StringBuffer();
2762 try {
2763 m.appendReplacement(result, "$1");
2764 failCount++;
2765 } catch (IllegalStateException e) {
2766 }
2767 m.find();
2768 m.appendReplacement(result, "$1");
2769 if (!result.toString().equals("zzzab"))
2770 failCount++;
2771
2772 m.find();
2773 m.find();
2774 m.appendReplacement(result, "$2");
2775 if (!result.toString().equals("zzzabzzzabcddzzzcd"))
2776 failCount++;
2777
2778 m.appendTail(result);
2779 if (!result.toString().equals("zzzabzzzabcddzzzcdzzz"))
2780 failCount++;
2781
2782 // Check to make sure escaped $ is ignored
2783 blah = "zzzabcdcdefzzz";
2784 p = Pattern.compile("(ab)(cd)*(ef)");
2785 m = p.matcher(blah);
2786 result = new StringBuffer();
2787 m.find();
2788 m.appendReplacement(result, "$1w\\$2w$3");
2789 if (!result.toString().equals("zzzabw$2wef"))
2790 failCount++;
2791
2792 m.appendTail(result);
2793 if (!result.toString().equals("zzzabw$2wefzzz"))
2794 failCount++;
2795
2796 // Check to make sure a reference to nonexistent group causes error
2797 blah = "zzzabcdcdefzzz";
2798 p = Pattern.compile("(ab)(cd)*(ef)");
2799 m = p.matcher(blah);
2800 result = new StringBuffer();
2801 m.find();
2802 try {
2803 m.appendReplacement(result, "$1w$5w$3");
2804 failCount++;
2805 } catch (IndexOutOfBoundsException ioobe) {
2806 // Correct result
2807 }
2808
2809 // Check double digit group references
2810 blah = "zzz123456789101112zzz";
2811 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)");
2812 m = p.matcher(blah);
2813 result = new StringBuffer();
2814 m.find();
2815 m.appendReplacement(result, "$1w$11w$3");
2816 if (!result.toString().equals("zzz1w11w3"))
2817 failCount++;
2818
2819 // Check to make sure it backs off $15 to $1 if only three groups
2820 blah = "zzzabcdcdefzzz";
2821 p = Pattern.compile("(ab)(cd)*(ef)");
2822 m = p.matcher(blah);
2823 result = new StringBuffer();
2824 m.find();
2825 m.appendReplacement(result, "$1w$15w$3");
2826 if (!result.toString().equals("zzzabwab5wef"))
2827 failCount++;
2828
2829
2830 // Supplementary character test
2831 // SB substitution with literal
2832 blah = toSupplementaries("zzzblahzzz");
2833 p = Pattern.compile(toSupplementaries("blah"));
2834 m = p.matcher(blah);
2835 result = new StringBuffer();
2836 try {
2837 m.appendReplacement(result, toSupplementaries("blech"));
2838 failCount++;
2839 } catch (IllegalStateException e) {
2840 }
2841 m.find();
2842 m.appendReplacement(result, toSupplementaries("blech"));
2843 if (!result.toString().equals(toSupplementaries("zzzblech")))
2844 failCount++;
2845
2846 m.appendTail(result);
2847 if (!result.toString().equals(toSupplementaries("zzzblechzzz")))
2848 failCount++;
2849
2850 // SB substitution with groups
2851 blah = toSupplementaries("zzzabcdzzz");
2852 p = Pattern.compile(toSupplementaries("(ab)(cd)*"));
2853 m = p.matcher(blah);
2854 result = new StringBuffer();
2855 try {
2856 m.appendReplacement(result, "$1");
2857 failCount++;
2858 } catch (IllegalStateException e) {
2859 }
2860 m.find();
2861 m.appendReplacement(result, "$1");
2862 if (!result.toString().equals(toSupplementaries("zzzab")))
2863 failCount++;
2864
2865 m.appendTail(result);
2866 if (!result.toString().equals(toSupplementaries("zzzabzzz")))
2867 failCount++;
2868
2869 // SB substitution with 3 groups
2870 blah = toSupplementaries("zzzabcdcdefzzz");
2871 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
2872 m = p.matcher(blah);
2873 result = new StringBuffer();
2874 try {
2875 m.appendReplacement(result, toSupplementaries("$1w$2w$3"));
2876 failCount++;
2877 } catch (IllegalStateException e) {
2878 }
2879 m.find();
2880 m.appendReplacement(result, toSupplementaries("$1w$2w$3"));
2881 if (!result.toString().equals(toSupplementaries("zzzabwcdwef")))
2882 failCount++;
2883
2884 m.appendTail(result);
2885 if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz")))
2886 failCount++;
2887
2888 // SB substitution with groups and three matches
2889 // skipping middle match
2890 blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz");
2891 p = Pattern.compile(toSupplementaries("(ab)(cd*)"));
2892 m = p.matcher(blah);
2893 result = new StringBuffer();
2894 try {
2895 m.appendReplacement(result, "$1");
2896 failCount++;
2897 } catch (IllegalStateException e) {
2898 }
2899 m.find();
2900 m.appendReplacement(result, "$1");
2901 if (!result.toString().equals(toSupplementaries("zzzab")))
2902 failCount++;
2903
2904 m.find();
2905 m.find();
2906 m.appendReplacement(result, "$2");
2907 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd")))
2908 failCount++;
2909
2910 m.appendTail(result);
2911 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz")))
2912 failCount++;
2913
2914 // Check to make sure escaped $ is ignored
2915 blah = toSupplementaries("zzzabcdcdefzzz");
2916 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
2917 m = p.matcher(blah);
2918 result = new StringBuffer();
2919 m.find();
2920 m.appendReplacement(result, toSupplementaries("$1w\\$2w$3"));
2921 if (!result.toString().equals(toSupplementaries("zzzabw$2wef")))
2922 failCount++;
2923
2924 m.appendTail(result);
2925 if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz")))
2926 failCount++;
2927
2928 // Check to make sure a reference to nonexistent group causes error
2929 blah = toSupplementaries("zzzabcdcdefzzz");
2930 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
2931 m = p.matcher(blah);
2932 result = new StringBuffer();
2933 m.find();
2934 try {
2935 m.appendReplacement(result, toSupplementaries("$1w$5w$3"));
2936 failCount++;
2937 } catch (IndexOutOfBoundsException ioobe) {
2938 // Correct result
2939 }
2940
2941 // Check double digit group references
2942 blah = toSupplementaries("zzz123456789101112zzz");
2943 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)");
2944 m = p.matcher(blah);
2945 result = new StringBuffer();
2946 m.find();
2947 m.appendReplacement(result, toSupplementaries("$1w$11w$3"));
2948 if (!result.toString().equals(toSupplementaries("zzz1w11w3")))
2949 failCount++;
2950
2951 // Check to make sure it backs off $15 to $1 if only three groups
2952 blah = toSupplementaries("zzzabcdcdefzzz");
2953 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
2954 m = p.matcher(blah);
2955 result = new StringBuffer();
2956 m.find();
2957 m.appendReplacement(result, toSupplementaries("$1w$15w$3"));
2958 if (!result.toString().equals(toSupplementaries("zzzabwab5wef")))
2959 failCount++;
2960
2961 // Check nothing has been appended into the output buffer if
2962 // the replacement string triggers IllegalArgumentException.
2963 p = Pattern.compile("(abc)");
2964 m = p.matcher("abcd");
2965 result = new StringBuffer();
2966 m.find();
2967 try {
2968 m.appendReplacement(result, ("xyz$g"));
2969 failCount++;
2970 } catch (IllegalArgumentException iae) {
2971 if (result.length() != 0)
2972 failCount++;
2973 }
2974
2975 report("SB Substitution");
2976 }
2977
2978 /*
2979 * 5 groups of characters are created to make a substitution string.
2980 * A base string will be created including random lead chars, the
2981 * substitution string, and random trailing chars.
2982 * A pattern containing the 5 groups is searched for and replaced with:
2983 * random group + random string + random group.
2984 * The results are checked for correctness.
2985 */
2986 private static void substitutionBasher() {
2987 for (int runs = 0; runs<1000; runs++) {
2988 // Create a base string to work in
2989 int leadingChars = generator.nextInt(10);
2990 StringBuffer baseBuffer = new StringBuffer(100);
2991 String leadingString = getRandomAlphaString(leadingChars);
2992 baseBuffer.append(leadingString);
2993
2994 // Create 5 groups of random number of random chars
2995 // Create the string to substitute
2996 // Create the pattern string to search for
2997 StringBuffer bufferToSub = new StringBuffer(25);
2998 StringBuffer bufferToPat = new StringBuffer(50);
2999 String[] groups = new String[5];
3000 for(int i=0; i<5; i++) {
3001 int aGroupSize = generator.nextInt(5)+1;
3002 groups[i] = getRandomAlphaString(aGroupSize);
3003 bufferToSub.append(groups[i]);
3004 bufferToPat.append('(');
3005 bufferToPat.append(groups[i]);
3006 bufferToPat.append(')');
3007 }
3008 String stringToSub = bufferToSub.toString();
3009 String pattern = bufferToPat.toString();
3010
3011 // Place sub string into working string at random index
3012 baseBuffer.append(stringToSub);
3013
3014 // Append random chars to end
3015 int trailingChars = generator.nextInt(10);
3016 String trailingString = getRandomAlphaString(trailingChars);
3017 baseBuffer.append(trailingString);
3018 String baseString = baseBuffer.toString();
3019
3020 // Create test pattern and matcher
3021 Pattern p = Pattern.compile(pattern);
3022 Matcher m = p.matcher(baseString);
3023
3024 // Reject candidate if pattern happens to start early
3025 m.find();
3026 if (m.start() < leadingChars)
3027 continue;
3028
3029 // Reject candidate if more than one match
3030 if (m.find())
3031 continue;
3032
3033 // Construct a replacement string with :
3034 // random group + random string + random group
3035 StringBuffer bufferToRep = new StringBuffer();
3036 int groupIndex1 = generator.nextInt(5);
3037 bufferToRep.append("$" + (groupIndex1 + 1));
3038 String randomMidString = getRandomAlphaString(5);
3039 bufferToRep.append(randomMidString);
3040 int groupIndex2 = generator.nextInt(5);
3041 bufferToRep.append("$" + (groupIndex2 + 1));
3042 String replacement = bufferToRep.toString();
3043
3044 // Do the replacement
3045 String result = m.replaceAll(replacement);
3046
3047 // Construct expected result
3048 StringBuffer bufferToRes = new StringBuffer();
3049 bufferToRes.append(leadingString);
3050 bufferToRes.append(groups[groupIndex1]);
3051 bufferToRes.append(randomMidString);
3052 bufferToRes.append(groups[groupIndex2]);
3053 bufferToRes.append(trailingString);
3054 String expectedResult = bufferToRes.toString();
3055
3056 // Check results
3057 if (!result.equals(expectedResult))
3058 failCount++;
3059 }
3060
3061 report("Substitution Basher");
3062 }
3063
3064 /**
3065 * Checks the handling of some escape sequences that the Pattern
3066 * class should process instead of the java compiler. These are
3067 * not in the file because the escapes should be be processed
3068 * by the Pattern class when the regex is compiled.
3069 */
3070 private static void escapes() throws Exception {
3071 Pattern p = Pattern.compile("\\043");
3072 Matcher m = p.matcher("#");
3073 if (!m.find())
3074 failCount++;
3075
3076 p = Pattern.compile("\\x23");
3077 m = p.matcher("#");
3078 if (!m.find())
3079 failCount++;
3080
3081 p = Pattern.compile("\\u0023");
3082 m = p.matcher("#");
3083 if (!m.find())
3084 failCount++;
3085
3086 report("Escape sequences");
3087 }
3088
3089 /**
3090 * Checks the handling of blank input situations. These
3091 * tests are incompatible with my test file format.
3092 */
3093 private static void blankInput() throws Exception {
3094 Pattern p = Pattern.compile("abc", Pattern.CASE_INSENSITIVE);
3095 Matcher m = p.matcher("");
3096 if (m.find())
3097 failCount++;
3098
3099 p = Pattern.compile("a*", Pattern.CASE_INSENSITIVE);
3100 m = p.matcher("");
3101 if (!m.find())
3102 failCount++;
3103
3104 p = Pattern.compile("abc");
3105 m = p.matcher("");
3106 if (m.find())
3107 failCount++;
3108
3109 p = Pattern.compile("a*");
3110 m = p.matcher("");
3111 if (!m.find())
3112 failCount++;
3113
3114 report("Blank input");
3115 }
3116
3117 /**
3118 * Tests the Boyer-Moore pattern matching of a character sequence
3119 * on randomly generated patterns.
3120 */
3121 private static void bm() throws Exception {
3122 doBnM('a');
3123 report("Boyer Moore (ASCII)");
3124
3125 doBnM(Character.MIN_SUPPLEMENTARY_CODE_POINT - 10);
3126 report("Boyer Moore (Supplementary)");
3127 }
3128
3129 private static void doBnM(int baseCharacter) throws Exception {
3130 int achar=0;
3131
3132 for (int i=0; i<100; i++) {
3133 // Create a short pattern to search for
3134 int patternLength = generator.nextInt(7) + 4;
3135 StringBuffer patternBuffer = new StringBuffer(patternLength);
3136 for (int x=0; x<patternLength; x++) {
3137 int ch = baseCharacter + generator.nextInt(26);
3138 if (Character.isSupplementaryCodePoint(ch)) {
3139 patternBuffer.append(Character.toChars(ch));
3140 } else {
3141 patternBuffer.append((char)ch);
3142 }
3143 }
3144 String pattern = patternBuffer.toString();
3145 Pattern p = Pattern.compile(pattern);
3146
3147 // Create a buffer with random ASCII chars that does
3148 // not match the sample
3149 String toSearch = null;
3150 StringBuffer s = null;
3151 Matcher m = p.matcher("");
3152 do {
3153 s = new StringBuffer(100);
3154 for (int x=0; x<100; x++) {
3155 int ch = baseCharacter + generator.nextInt(26);
3156 if (Character.isSupplementaryCodePoint(ch)) {
3157 s.append(Character.toChars(ch));
3158 } else {
3159 s.append((char)ch);
3160 }
3161 }
3162 toSearch = s.toString();
3163 m.reset(toSearch);
3164 } while (m.find());
3165
3166 // Insert the pattern at a random spot
3167 int insertIndex = generator.nextInt(99);
3168 if (Character.isLowSurrogate(s.charAt(insertIndex)))
3169 insertIndex++;
3170 s = s.insert(insertIndex, pattern);
3171 toSearch = s.toString();
3172
3173 // Make sure that the pattern is found
3174 m.reset(toSearch);
3175 if (!m.find())
3176 failCount++;
3177
3178 // Make sure that the match text is the pattern
3179 if (!m.group().equals(pattern))
3180 failCount++;
3181
3182 // Make sure match occured at insertion point
3183 if (m.start() != insertIndex)
3184 failCount++;
3185 }
3186 }
3187
3188 /**
3189 * Tests the matching of slices on randomly generated patterns.
3190 * The Boyer-Moore optimization is not done on these patterns
3191 * because it uses unicode case folding.
3192 */
3193 private static void slice() throws Exception {
3194 doSlice(Character.MAX_VALUE);
3195 report("Slice");
3196
3197 doSlice(Character.MAX_CODE_POINT);
3198 report("Slice (Supplementary)");
3199 }
3200
3201 private static void doSlice(int maxCharacter) throws Exception {
3202 Random generator = new Random();
3203 int achar=0;
3204
3205 for (int i=0; i<100; i++) {
3206 // Create a short pattern to search for
3207 int patternLength = generator.nextInt(7) + 4;
3208 StringBuffer patternBuffer = new StringBuffer(patternLength);
3209 for (int x=0; x<patternLength; x++) {
3210 int randomChar = 0;
3211 while (!Character.isLetterOrDigit(randomChar))
3212 randomChar = generator.nextInt(maxCharacter);
3213 if (Character.isSupplementaryCodePoint(randomChar)) {
3214 patternBuffer.append(Character.toChars(randomChar));
3215 } else {
3216 patternBuffer.append((char) randomChar);
3217 }
3218 }
3219 String pattern = patternBuffer.toString();
3220 Pattern p = Pattern.compile(pattern, Pattern.UNICODE_CASE);
3221
3222 // Create a buffer with random chars that does not match the sample
3223 String toSearch = null;
3224 StringBuffer s = null;
3225 Matcher m = p.matcher("");
3226 do {
3227 s = new StringBuffer(100);
3228 for (int x=0; x<100; x++) {
3229 int randomChar = 0;
3230 while (!Character.isLetterOrDigit(randomChar))
3231 randomChar = generator.nextInt(maxCharacter);
3232 if (Character.isSupplementaryCodePoint(randomChar)) {
3233 s.append(Character.toChars(randomChar));
3234 } else {
3235 s.append((char) randomChar);
3236 }
3237 }
3238 toSearch = s.toString();
3239 m.reset(toSearch);
3240 } while (m.find());
3241
3242 // Insert the pattern at a random spot
3243 int insertIndex = generator.nextInt(99);
3244 if (Character.isLowSurrogate(s.charAt(insertIndex)))
3245 insertIndex++;
3246 s = s.insert(insertIndex, pattern);
3247 toSearch = s.toString();
3248
3249 // Make sure that the pattern is found
3250 m.reset(toSearch);
3251 if (!m.find())
3252 failCount++;
3253
3254 // Make sure that the match text is the pattern
3255 if (!m.group().equals(pattern))
3256 failCount++;
3257
3258 // Make sure match occured at insertion point
3259 if (m.start() != insertIndex)
3260 failCount++;
3261 }
3262 }
3263
3264 private static void explainFailure(String pattern, String data,
3265 String expected, String actual) {
3266 System.err.println("----------------------------------------");
3267 System.err.println("Pattern = "+pattern);
3268 System.err.println("Data = "+data);
3269 System.err.println("Expected = " + expected);
3270 System.err.println("Actual = " + actual);
3271 }
3272
3273 private static void explainFailure(String pattern, String data,
3274 Throwable t) {
3275 System.err.println("----------------------------------------");
3276 System.err.println("Pattern = "+pattern);
3277 System.err.println("Data = "+data);
3278 t.printStackTrace(System.err);
3279 }
3280
3281 // Testing examples from a file
3282
3283 /**
3284 * Goes through the file "TestCases.txt" and creates many patterns
3285 * described in the file, matching the patterns against input lines in
3286 * the file, and comparing the results against the correct results
3287 * also found in the file. The file format is described in comments
3288 * at the head of the file.
3289 */
3290 private static void processFile(String fileName) throws Exception {
3291 File testCases = new File(System.getProperty("test.src", "."),
3292 fileName);
3293 FileInputStream in = new FileInputStream(testCases);
3294 BufferedReader r = new BufferedReader(new InputStreamReader(in));
3295
3296 // Process next test case.
3297 String aLine;
3298 while((aLine = r.readLine()) != null) {
3299 // Read a line for pattern
3300 String patternString = grabLine(r);
3301 Pattern p = null;
3302 try {
3303 p = compileTestPattern(patternString);
3304 } catch (PatternSyntaxException e) {
3305 String dataString = grabLine(r);
3306 String expectedResult = grabLine(r);
3307 if (expectedResult.startsWith("error"))
3308 continue;
3309 explainFailure(patternString, dataString, e);
3310 failCount++;
3311 continue;
3312 }
3313
3314 // Read a line for input string
3315 String dataString = grabLine(r);
3316 Matcher m = p.matcher(dataString);
3317 StringBuffer result = new StringBuffer();
3318
3319 // Check for IllegalStateExceptions before a match
3320 failCount += preMatchInvariants(m);
3321
3322 boolean found = m.find();
3323
3324 if (found)
3325 failCount += postTrueMatchInvariants(m);
3326 else
3327 failCount += postFalseMatchInvariants(m);
3328
3329 if (found) {
3330 result.append("true ");
3331 result.append(m.group(0) + " ");
3332 } else {
3333 result.append("false ");
3334 }
3335
3336 result.append(m.groupCount());
3337
3338 if (found) {
3339 for (int i=1; i<m.groupCount()+1; i++)
3340 if (m.group(i) != null)
3341 result.append(" " +m.group(i));
3342 }
3343
3344 // Read a line for the expected result
3345 String expectedResult = grabLine(r);
3346
3347 if (!result.toString().equals(expectedResult)) {
3348 explainFailure(patternString, dataString, expectedResult, result.toString());
3349 failCount++;
3350 }
3351 }
3352
3353 report(fileName);
3354 }
3355
3356 private static int preMatchInvariants(Matcher m) {
3357 int failCount = 0;
3358 try {
3359 m.start();
3360 failCount++;
3361 } catch (IllegalStateException ise) {}
3362 try {
3363 m.end();
3364 failCount++;
3365 } catch (IllegalStateException ise) {}
3366 try {
3367 m.group();
3368 failCount++;
3369 } catch (IllegalStateException ise) {}
3370 return failCount;
3371 }
3372
3373 private static int postFalseMatchInvariants(Matcher m) {
3374 int failCount = 0;
3375 try {
3376 m.group();
3377 failCount++;
3378 } catch (IllegalStateException ise) {}
3379 try {
3380 m.start();
3381 failCount++;
3382 } catch (IllegalStateException ise) {}
3383 try {
3384 m.end();
3385 failCount++;
3386 } catch (IllegalStateException ise) {}
3387 return failCount;
3388 }
3389
3390 private static int postTrueMatchInvariants(Matcher m) {
3391 int failCount = 0;
3392 //assert(m.start() = m.start(0);
3393 if (m.start() != m.start(0))
3394 failCount++;
3395 //assert(m.end() = m.end(0);
3396 if (m.start() != m.start(0))
3397 failCount++;
3398 //assert(m.group() = m.group(0);
3399 if (!m.group().equals(m.group(0)))
3400 failCount++;
3401 try {
3402 m.group(50);
3403 failCount++;
3404 } catch (IndexOutOfBoundsException ise) {}
3405
3406 return failCount;
3407 }
3408
3409 private static Pattern compileTestPattern(String patternString) {
3410 if (!patternString.startsWith("'")) {
3411 return Pattern.compile(patternString);
3412 }
3413
3414 int break1 = patternString.lastIndexOf("'");
3415 String flagString = patternString.substring(
3416 break1+1, patternString.length());
3417 patternString = patternString.substring(1, break1);
3418
3419 if (flagString.equals("i"))
3420 return Pattern.compile(patternString, Pattern.CASE_INSENSITIVE);
3421
3422 if (flagString.equals("m"))
3423 return Pattern.compile(patternString, Pattern.MULTILINE);
3424
3425 return Pattern.compile(patternString);
3426 }
3427
3428 /**
3429 * Reads a line from the input file. Keeps reading lines until a non
3430 * empty non comment line is read. If the line contains a \n then
3431 * these two characters are replaced by a newline char. If a \\uxxxx
3432 * sequence is read then the sequence is replaced by the unicode char.
3433 */
3434 private static String grabLine(BufferedReader r) throws Exception {
3435 int index = 0;
3436 String line = r.readLine();
3437 while (line.startsWith("//") || line.length() < 1)
3438 line = r.readLine();
3439 while ((index = line.indexOf("\\n")) != -1) {
3440 StringBuffer temp = new StringBuffer(line);
3441 temp.replace(index, index+2, "\n");
3442 line = temp.toString();
3443 }
3444 while ((index = line.indexOf("\\u")) != -1) {
3445 StringBuffer temp = new StringBuffer(line);
3446 String value = temp.substring(index+2, index+6);
3447 char aChar = (char)Integer.parseInt(value, 16);
3448 String unicodeChar = "" + aChar;
3449 temp.replace(index, index+6, unicodeChar);
3450 line = temp.toString();
3451 }
3452
3453 return line;
3454 }
3455
3456 private static void check(Pattern p, String s, String g, String expected) {
3457 Matcher m = p.matcher(s);
3458 m.find();
shermana244eb52013-05-06 21:24:37 -07003459 if (!m.group(g).equals(expected) ||
3460 s.charAt(m.start(g)) != expected.charAt(0) ||
3461 s.charAt(m.end(g) - 1) != expected.charAt(expected.length() - 1))
sherman0b4d42d2009-02-23 21:06:15 -08003462 failCount++;
3463 }
3464
3465 private static void checkReplaceFirst(String p, String s, String r, String expected)
3466 {
3467 if (!expected.equals(Pattern.compile(p)
3468 .matcher(s)
3469 .replaceFirst(r)))
3470 failCount++;
3471 }
3472
3473 private static void checkReplaceAll(String p, String s, String r, String expected)
3474 {
3475 if (!expected.equals(Pattern.compile(p)
3476 .matcher(s)
3477 .replaceAll(r)))
3478 failCount++;
3479 }
3480
3481 private static void checkExpectedFail(String p) {
3482 try {
3483 Pattern.compile(p);
3484 } catch (PatternSyntaxException pse) {
3485 //pse.printStackTrace();
3486 return;
3487 }
3488 failCount++;
3489 }
3490
shermana244eb52013-05-06 21:24:37 -07003491 private static void checkExpectedIAE(Matcher m, String g) {
sherman0b4d42d2009-02-23 21:06:15 -08003492 m.find();
3493 try {
3494 m.group(g);
shermana244eb52013-05-06 21:24:37 -07003495 } catch (IllegalArgumentException x) {
sherman0b4d42d2009-02-23 21:06:15 -08003496 //iae.printStackTrace();
shermana244eb52013-05-06 21:24:37 -07003497 try {
3498 m.start(g);
3499 } catch (IllegalArgumentException xx) {
3500 try {
3501 m.start(g);
3502 } catch (IllegalArgumentException xxx) {
3503 return;
3504 }
3505 }
sherman0b4d42d2009-02-23 21:06:15 -08003506 }
3507 failCount++;
3508 }
3509
shermana244eb52013-05-06 21:24:37 -07003510 private static void checkExpectedNPE(Matcher m) {
3511 m.find();
3512 try {
3513 m.group(null);
3514 } catch (NullPointerException x) {
3515 try {
3516 m.start(null);
3517 } catch (NullPointerException xx) {
3518 try {
3519 m.end(null);
3520 } catch (NullPointerException xxx) {
3521 return;
3522 }
3523 }
3524 }
3525 failCount++;
3526 }
sherman0b4d42d2009-02-23 21:06:15 -08003527
3528 private static void namedGroupCaptureTest() throws Exception {
3529 check(Pattern.compile("x+(?<gname>y+)z+"),
3530 "xxxyyyzzz",
3531 "gname",
3532 "yyy");
3533
shermand9337e02009-10-21 11:40:40 -07003534 check(Pattern.compile("x+(?<gname8>y+)z+"),
shermanffaf4922009-03-20 16:22:59 -07003535 "xxxyyyzzz",
shermand9337e02009-10-21 11:40:40 -07003536 "gname8",
shermanffaf4922009-03-20 16:22:59 -07003537 "yyy");
3538
sherman0b4d42d2009-02-23 21:06:15 -08003539 //backref
3540 Pattern pattern = Pattern.compile("(a*)bc\\1");
3541 check(pattern, "zzzaabcazzz", true); // found "abca"
3542
3543 check(Pattern.compile("(?<gname>a*)bc\\k<gname>"),
3544 "zzzaabcaazzz", true);
3545
3546 check(Pattern.compile("(?<gname>abc)(def)\\k<gname>"),
3547 "abcdefabc", true);
3548
3549 check(Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(?<gname>k)\\k<gname>"),
3550 "abcdefghijkk", true);
3551
3552 // Supplementary character tests
3553 check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"),
3554 toSupplementaries("zzzaabcazzz"), true);
3555
3556 check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"),
3557 toSupplementaries("zzzaabcaazzz"), true);
3558
3559 check(Pattern.compile("(?<gname>" + toSupplementaries("abc)(def)") + "\\k<gname>"),
3560 toSupplementaries("abcdefabc"), true);
3561
3562 check(Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)") +
3563 "(?<gname>" +
3564 toSupplementaries("k)") + "\\k<gname>"),
3565 toSupplementaries("abcdefghijkk"), true);
3566
3567 check(Pattern.compile("x+(?<gname>y+)z+\\k<gname>"),
3568 "xxxyyyzzzyyy",
3569 "gname",
3570 "yyy");
3571
3572 //replaceFirst/All
3573 checkReplaceFirst("(?<gn>ab)(c*)",
3574 "abccczzzabcczzzabccc",
shermand9337e02009-10-21 11:40:40 -07003575 "${gn}",
sherman0b4d42d2009-02-23 21:06:15 -08003576 "abzzzabcczzzabccc");
3577
3578 checkReplaceAll("(?<gn>ab)(c*)",
3579 "abccczzzabcczzzabccc",
shermand9337e02009-10-21 11:40:40 -07003580 "${gn}",
sherman0b4d42d2009-02-23 21:06:15 -08003581 "abzzzabzzzab");
3582
3583
3584 checkReplaceFirst("(?<gn>ab)(c*)",
3585 "zzzabccczzzabcczzzabccczzz",
shermand9337e02009-10-21 11:40:40 -07003586 "${gn}",
sherman0b4d42d2009-02-23 21:06:15 -08003587 "zzzabzzzabcczzzabccczzz");
3588
3589 checkReplaceAll("(?<gn>ab)(c*)",
3590 "zzzabccczzzabcczzzabccczzz",
shermand9337e02009-10-21 11:40:40 -07003591 "${gn}",
sherman0b4d42d2009-02-23 21:06:15 -08003592 "zzzabzzzabzzzabzzz");
3593
3594 checkReplaceFirst("(?<gn1>ab)(?<gn2>c*)",
3595 "zzzabccczzzabcczzzabccczzz",
shermand9337e02009-10-21 11:40:40 -07003596 "${gn2}",
sherman0b4d42d2009-02-23 21:06:15 -08003597 "zzzccczzzabcczzzabccczzz");
3598
3599 checkReplaceAll("(?<gn1>ab)(?<gn2>c*)",
3600 "zzzabccczzzabcczzzabccczzz",
shermand9337e02009-10-21 11:40:40 -07003601 "${gn2}",
sherman0b4d42d2009-02-23 21:06:15 -08003602 "zzzccczzzcczzzccczzz");
3603
3604 //toSupplementaries("(ab)(c*)"));
3605 checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") +
3606 ")(?<gn2>" + toSupplementaries("c") + "*)",
3607 toSupplementaries("abccczzzabcczzzabccc"),
shermand9337e02009-10-21 11:40:40 -07003608 "${gn1}",
sherman0b4d42d2009-02-23 21:06:15 -08003609 toSupplementaries("abzzzabcczzzabccc"));
3610
3611
3612 checkReplaceAll("(?<gn1>" + toSupplementaries("ab") +
3613 ")(?<gn2>" + toSupplementaries("c") + "*)",
3614 toSupplementaries("abccczzzabcczzzabccc"),
shermand9337e02009-10-21 11:40:40 -07003615 "${gn1}",
sherman0b4d42d2009-02-23 21:06:15 -08003616 toSupplementaries("abzzzabzzzab"));
3617
3618 checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") +
3619 ")(?<gn2>" + toSupplementaries("c") + "*)",
3620 toSupplementaries("abccczzzabcczzzabccc"),
shermand9337e02009-10-21 11:40:40 -07003621 "${gn2}",
sherman0b4d42d2009-02-23 21:06:15 -08003622 toSupplementaries("ccczzzabcczzzabccc"));
3623
3624
3625 checkReplaceAll("(?<gn1>" + toSupplementaries("ab") +
3626 ")(?<gn2>" + toSupplementaries("c") + "*)",
3627 toSupplementaries("abccczzzabcczzzabccc"),
shermand9337e02009-10-21 11:40:40 -07003628 "${gn2}",
sherman0b4d42d2009-02-23 21:06:15 -08003629 toSupplementaries("ccczzzcczzzccc"));
3630
3631 checkReplaceFirst("(?<dog>Dog)AndCat",
3632 "zzzDogAndCatzzzDogAndCatzzz",
shermand9337e02009-10-21 11:40:40 -07003633 "${dog}",
sherman0b4d42d2009-02-23 21:06:15 -08003634 "zzzDogzzzDogAndCatzzz");
3635
3636
3637 checkReplaceAll("(?<dog>Dog)AndCat",
3638 "zzzDogAndCatzzzDogAndCatzzz",
shermand9337e02009-10-21 11:40:40 -07003639 "${dog}",
sherman0b4d42d2009-02-23 21:06:15 -08003640 "zzzDogzzzDogzzz");
3641
3642 // backref in Matcher & String
shermand9337e02009-10-21 11:40:40 -07003643 if (!"abcdefghij".replaceFirst("cd(?<gn>ef)gh", "${gn}").equals("abefij") ||
3644 !"abbbcbdbefgh".replaceAll("(?<gn>[a-e])b", "${gn}").equals("abcdefgh"))
sherman0b4d42d2009-02-23 21:06:15 -08003645 failCount++;
3646
3647 // negative
3648 checkExpectedFail("(?<groupnamehasnoascii.in>abc)(def)");
3649 checkExpectedFail("(?<groupnamehasnoascii_in>abc)(def)");
shermand9337e02009-10-21 11:40:40 -07003650 checkExpectedFail("(?<6groupnamestartswithdigit>abc)(def)");
sherman0b4d42d2009-02-23 21:06:15 -08003651 checkExpectedFail("(?<gname>abc)(def)\\k<gnameX>");
3652 checkExpectedFail("(?<gname>abc)(?<gname>def)\\k<gnameX>");
shermana244eb52013-05-06 21:24:37 -07003653 checkExpectedIAE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"),
3654 "gnameX");
3655 checkExpectedNPE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"));
sherman0b4d42d2009-02-23 21:06:15 -08003656 report("NamedGroupCapture");
3657 }
sherman6782c962010-02-05 00:10:42 -08003658
shermancc01ef52010-05-18 15:36:47 -07003659 // This is for bug 6969132
sherman6782c962010-02-05 00:10:42 -08003660 private static void nonBmpClassComplementTest() throws Exception {
3661 Pattern p = Pattern.compile("\\P{Lu}");
3662 Matcher m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
3663 if (m.find() && m.start() == 1)
3664 failCount++;
3665
3666 // from a unicode category
3667 p = Pattern.compile("\\P{Lu}");
3668 m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
3669 if (m.find())
3670 failCount++;
3671 if (!m.hitEnd())
3672 failCount++;
3673
3674 // block
3675 p = Pattern.compile("\\P{InMathematicalAlphanumericSymbols}");
3676 m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
3677 if (m.find() && m.start() == 1)
3678 failCount++;
3679
3680 report("NonBmpClassComplement");
3681 }
3682
shermancc01ef52010-05-18 15:36:47 -07003683 private static void unicodePropertiesTest() throws Exception {
3684 // different forms
3685 if (!Pattern.compile("\\p{IsLu}").matcher("A").matches() ||
3686 !Pattern.compile("\\p{Lu}").matcher("A").matches() ||
3687 !Pattern.compile("\\p{gc=Lu}").matcher("A").matches() ||
3688 !Pattern.compile("\\p{general_category=Lu}").matcher("A").matches() ||
3689 !Pattern.compile("\\p{IsLatin}").matcher("B").matches() ||
3690 !Pattern.compile("\\p{sc=Latin}").matcher("B").matches() ||
3691 !Pattern.compile("\\p{script=Latin}").matcher("B").matches() ||
3692 !Pattern.compile("\\p{InBasicLatin}").matcher("c").matches() ||
3693 !Pattern.compile("\\p{blk=BasicLatin}").matcher("c").matches() ||
3694 !Pattern.compile("\\p{block=BasicLatin}").matcher("c").matches())
3695 failCount++;
3696
3697 Matcher common = Pattern.compile("\\p{script=Common}").matcher("");
3698 Matcher unknown = Pattern.compile("\\p{IsUnknown}").matcher("");
3699 Matcher lastSM = common;
3700 Character.UnicodeScript lastScript = Character.UnicodeScript.of(0);
3701
3702 Matcher latin = Pattern.compile("\\p{block=basic_latin}").matcher("");
3703 Matcher greek = Pattern.compile("\\p{InGreek}").matcher("");
3704 Matcher lastBM = latin;
3705 Character.UnicodeBlock lastBlock = Character.UnicodeBlock.of(0);
3706
3707 for (int cp = 1; cp < Character.MAX_CODE_POINT; cp++) {
3708 if (cp >= 0x30000 && (cp & 0x70) == 0){
3709 continue; // only pick couple code points, they are the same
3710 }
3711
3712 // Unicode Script
3713 Character.UnicodeScript script = Character.UnicodeScript.of(cp);
3714 Matcher m;
3715 String str = new String(Character.toChars(cp));
3716 if (script == lastScript) {
3717 m = lastSM;
3718 m.reset(str);
3719 } else {
3720 m = Pattern.compile("\\p{Is" + script.name() + "}").matcher(str);
3721 }
3722 if (!m.matches()) {
3723 failCount++;
3724 }
3725 Matcher other = (script == Character.UnicodeScript.COMMON)? unknown : common;
3726 other.reset(str);
3727 if (other.matches()) {
3728 failCount++;
3729 }
3730 lastSM = m;
3731 lastScript = script;
3732
3733 // Unicode Block
3734 Character.UnicodeBlock block = Character.UnicodeBlock.of(cp);
3735 if (block == null) {
3736 //System.out.printf("Not a Block: cp=%x%n", cp);
3737 continue;
3738 }
3739 if (block == lastBlock) {
3740 m = lastBM;
3741 m.reset(str);
3742 } else {
3743 m = Pattern.compile("\\p{block=" + block.toString() + "}").matcher(str);
3744 }
3745 if (!m.matches()) {
3746 failCount++;
3747 }
3748 other = (block == Character.UnicodeBlock.BASIC_LATIN)? greek : latin;
3749 other.reset(str);
3750 if (other.matches()) {
3751 failCount++;
3752 }
3753 lastBM = m;
3754 lastBlock = block;
3755 }
3756 report("unicodeProperties");
3757 }
shermanf03c78b2011-02-03 13:49:25 -08003758
3759 private static void unicodeHexNotationTest() throws Exception {
3760
3761 // negative
3762 checkExpectedFail("\\x{-23}");
3763 checkExpectedFail("\\x{110000}");
3764 checkExpectedFail("\\x{}");
3765 checkExpectedFail("\\x{AB[ef]");
3766
3767 // codepoint
3768 check("^\\x{1033c}$", "\uD800\uDF3C", true);
3769 check("^\\xF0\\x90\\x8C\\xBC$", "\uD800\uDF3C", false);
3770 check("^\\x{D800}\\x{DF3c}+$", "\uD800\uDF3C", false);
3771 check("^\\xF0\\x90\\x8C\\xBC$", "\uD800\uDF3C", false);
3772
3773 // in class
3774 check("^[\\x{D800}\\x{DF3c}]+$", "\uD800\uDF3C", false);
3775 check("^[\\xF0\\x90\\x8C\\xBC]+$", "\uD800\uDF3C", false);
3776 check("^[\\x{D800}\\x{DF3C}]+$", "\uD800\uDF3C", false);
3777 check("^[\\x{DF3C}\\x{D800}]+$", "\uD800\uDF3C", false);
3778 check("^[\\x{D800}\\x{DF3C}]+$", "\uDF3C\uD800", true);
3779 check("^[\\x{DF3C}\\x{D800}]+$", "\uDF3C\uD800", true);
3780
3781 for (int cp = 0; cp <= 0x10FFFF; cp++) {
3782 String s = "A" + new String(Character.toChars(cp)) + "B";
3783 String hexUTF16 = (cp <= 0xFFFF)? String.format("\\u%04x", cp)
3784 : String.format("\\u%04x\\u%04x",
3785 (int) Character.toChars(cp)[0],
3786 (int) Character.toChars(cp)[1]);
3787 String hexCodePoint = "\\x{" + Integer.toHexString(cp) + "}";
3788 if (!Pattern.matches("A" + hexUTF16 + "B", s))
3789 failCount++;
3790 if (!Pattern.matches("A[" + hexUTF16 + "]B", s))
3791 failCount++;
3792 if (!Pattern.matches("A" + hexCodePoint + "B", s))
3793 failCount++;
3794 if (!Pattern.matches("A[" + hexCodePoint + "]B", s))
3795 failCount++;
3796 }
3797 report("unicodeHexNotation");
sherman85bbd8b2011-04-28 20:48:36 -07003798 }
3799
3800 private static void unicodeClassesTest() throws Exception {
3801
3802 Matcher lower = Pattern.compile("\\p{Lower}").matcher("");
3803 Matcher upper = Pattern.compile("\\p{Upper}").matcher("");
3804 Matcher ASCII = Pattern.compile("\\p{ASCII}").matcher("");
3805 Matcher alpha = Pattern.compile("\\p{Alpha}").matcher("");
3806 Matcher digit = Pattern.compile("\\p{Digit}").matcher("");
3807 Matcher alnum = Pattern.compile("\\p{Alnum}").matcher("");
3808 Matcher punct = Pattern.compile("\\p{Punct}").matcher("");
3809 Matcher graph = Pattern.compile("\\p{Graph}").matcher("");
3810 Matcher print = Pattern.compile("\\p{Print}").matcher("");
3811 Matcher blank = Pattern.compile("\\p{Blank}").matcher("");
3812 Matcher cntrl = Pattern.compile("\\p{Cntrl}").matcher("");
3813 Matcher xdigit = Pattern.compile("\\p{XDigit}").matcher("");
3814 Matcher space = Pattern.compile("\\p{Space}").matcher("");
3815 Matcher bound = Pattern.compile("\\b").matcher("");
3816 Matcher word = Pattern.compile("\\w++").matcher("");
3817 // UNICODE_CHARACTER_CLASS
3818 Matcher lowerU = Pattern.compile("\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3819 Matcher upperU = Pattern.compile("\\p{Upper}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3820 Matcher ASCIIU = Pattern.compile("\\p{ASCII}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3821 Matcher alphaU = Pattern.compile("\\p{Alpha}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3822 Matcher digitU = Pattern.compile("\\p{Digit}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3823 Matcher alnumU = Pattern.compile("\\p{Alnum}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3824 Matcher punctU = Pattern.compile("\\p{Punct}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3825 Matcher graphU = Pattern.compile("\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3826 Matcher printU = Pattern.compile("\\p{Print}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3827 Matcher blankU = Pattern.compile("\\p{Blank}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3828 Matcher cntrlU = Pattern.compile("\\p{Cntrl}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3829 Matcher xdigitU = Pattern.compile("\\p{XDigit}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3830 Matcher spaceU = Pattern.compile("\\p{Space}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3831 Matcher boundU = Pattern.compile("\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3832 Matcher wordU = Pattern.compile("\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3833 // embedded flag (?U)
3834 Matcher lowerEU = Pattern.compile("(?U)\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3835 Matcher graphEU = Pattern.compile("(?U)\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3836 Matcher wordEU = Pattern.compile("(?U)\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3837
3838 Matcher bwb = Pattern.compile("\\b\\w\\b").matcher("");
3839 Matcher bwbU = Pattern.compile("\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3840 Matcher bwbEU = Pattern.compile("(?U)\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3841 // properties
3842 Matcher lowerP = Pattern.compile("\\p{IsLowerCase}").matcher("");
3843 Matcher upperP = Pattern.compile("\\p{IsUpperCase}").matcher("");
3844 Matcher titleP = Pattern.compile("\\p{IsTitleCase}").matcher("");
3845 Matcher letterP = Pattern.compile("\\p{IsLetter}").matcher("");
3846 Matcher alphaP = Pattern.compile("\\p{IsAlphabetic}").matcher("");
3847 Matcher ideogP = Pattern.compile("\\p{IsIdeographic}").matcher("");
3848 Matcher cntrlP = Pattern.compile("\\p{IsControl}").matcher("");
3849 Matcher spaceP = Pattern.compile("\\p{IsWhiteSpace}").matcher("");
3850 Matcher definedP = Pattern.compile("\\p{IsAssigned}").matcher("");
3851 Matcher nonCCPP = Pattern.compile("\\p{IsNoncharacterCodePoint}").matcher("");
shermana244eb52013-05-06 21:24:37 -07003852 Matcher joinCrtl = Pattern.compile("\\p{IsJoinControl}").matcher("");
sherman85bbd8b2011-04-28 20:48:36 -07003853
3854 // javaMethod
3855 Matcher lowerJ = Pattern.compile("\\p{javaLowerCase}").matcher("");
3856 Matcher upperJ = Pattern.compile("\\p{javaUpperCase}").matcher("");
3857 Matcher alphaJ = Pattern.compile("\\p{javaAlphabetic}").matcher("");
3858 Matcher ideogJ = Pattern.compile("\\p{javaIdeographic}").matcher("");
3859
3860 for (int cp = 1; cp < 0x30000; cp++) {
3861 String str = new String(Character.toChars(cp));
3862 int type = Character.getType(cp);
3863 if (// lower
3864 POSIX_ASCII.isLower(cp) != lower.reset(str).matches() ||
3865 Character.isLowerCase(cp) != lowerU.reset(str).matches() ||
3866 Character.isLowerCase(cp) != lowerP.reset(str).matches() ||
3867 Character.isLowerCase(cp) != lowerEU.reset(str).matches()||
3868 Character.isLowerCase(cp) != lowerJ.reset(str).matches()||
3869 // upper
3870 POSIX_ASCII.isUpper(cp) != upper.reset(str).matches() ||
3871 POSIX_Unicode.isUpper(cp) != upperU.reset(str).matches() ||
3872 Character.isUpperCase(cp) != upperP.reset(str).matches() ||
3873 Character.isUpperCase(cp) != upperJ.reset(str).matches() ||
3874 // alpha
3875 POSIX_ASCII.isAlpha(cp) != alpha.reset(str).matches() ||
3876 POSIX_Unicode.isAlpha(cp) != alphaU.reset(str).matches() ||
3877 Character.isAlphabetic(cp)!= alphaP.reset(str).matches() ||
3878 Character.isAlphabetic(cp)!= alphaJ.reset(str).matches() ||
3879 // digit
3880 POSIX_ASCII.isDigit(cp) != digit.reset(str).matches() ||
3881 Character.isDigit(cp) != digitU.reset(str).matches() ||
3882 // alnum
3883 POSIX_ASCII.isAlnum(cp) != alnum.reset(str).matches() ||
3884 POSIX_Unicode.isAlnum(cp) != alnumU.reset(str).matches() ||
3885 // punct
3886 POSIX_ASCII.isPunct(cp) != punct.reset(str).matches() ||
3887 POSIX_Unicode.isPunct(cp) != punctU.reset(str).matches() ||
3888 // graph
3889 POSIX_ASCII.isGraph(cp) != graph.reset(str).matches() ||
3890 POSIX_Unicode.isGraph(cp) != graphU.reset(str).matches() ||
3891 POSIX_Unicode.isGraph(cp) != graphEU.reset(str).matches()||
3892 // blank
3893 POSIX_ASCII.isType(cp, POSIX_ASCII.BLANK)
3894 != blank.reset(str).matches() ||
3895 POSIX_Unicode.isBlank(cp) != blankU.reset(str).matches() ||
3896 // print
3897 POSIX_ASCII.isPrint(cp) != print.reset(str).matches() ||
3898 POSIX_Unicode.isPrint(cp) != printU.reset(str).matches() ||
3899 // cntrl
3900 POSIX_ASCII.isCntrl(cp) != cntrl.reset(str).matches() ||
3901 POSIX_Unicode.isCntrl(cp) != cntrlU.reset(str).matches() ||
3902 (Character.CONTROL == type) != cntrlP.reset(str).matches() ||
3903 // hexdigit
3904 POSIX_ASCII.isHexDigit(cp) != xdigit.reset(str).matches() ||
3905 POSIX_Unicode.isHexDigit(cp) != xdigitU.reset(str).matches() ||
3906 // space
3907 POSIX_ASCII.isSpace(cp) != space.reset(str).matches() ||
3908 POSIX_Unicode.isSpace(cp) != spaceU.reset(str).matches() ||
3909 POSIX_Unicode.isSpace(cp) != spaceP.reset(str).matches() ||
3910 // word
3911 POSIX_ASCII.isWord(cp) != word.reset(str).matches() ||
3912 POSIX_Unicode.isWord(cp) != wordU.reset(str).matches() ||
3913 POSIX_Unicode.isWord(cp) != wordEU.reset(str).matches()||
3914 // bwordb
3915 POSIX_ASCII.isWord(cp) != bwb.reset(str).matches() ||
3916 POSIX_Unicode.isWord(cp) != bwbU.reset(str).matches() ||
3917 // properties
3918 Character.isTitleCase(cp) != titleP.reset(str).matches() ||
3919 Character.isLetter(cp) != letterP.reset(str).matches()||
3920 Character.isIdeographic(cp) != ideogP.reset(str).matches() ||
3921 Character.isIdeographic(cp) != ideogJ.reset(str).matches() ||
3922 (Character.UNASSIGNED == type) == definedP.reset(str).matches() ||
shermana244eb52013-05-06 21:24:37 -07003923 POSIX_Unicode.isNoncharacterCodePoint(cp) != nonCCPP.reset(str).matches() ||
3924 POSIX_Unicode.isJoinControl(cp) != joinCrtl.reset(str).matches())
sherman85bbd8b2011-04-28 20:48:36 -07003925 failCount++;
3926 }
3927
3928 // bounds/word align
3929 twoFindIndexes(" \u0180sherman\u0400 ", bound, 1, 10);
3930 if (!bwbU.reset("\u0180sherman\u0400").matches())
3931 failCount++;
3932 twoFindIndexes(" \u0180sh\u0345erman\u0400 ", bound, 1, 11);
3933 if (!bwbU.reset("\u0180sh\u0345erman\u0400").matches())
3934 failCount++;
3935 twoFindIndexes(" \u0724\u0739\u0724 ", bound, 1, 4);
3936 if (!bwbU.reset("\u0724\u0739\u0724").matches())
3937 failCount++;
3938 if (!bwbEU.reset("\u0724\u0739\u0724").matches())
3939 failCount++;
3940 report("unicodePredefinedClasses");
3941 }
shermanecb65472012-05-08 10:57:13 -07003942
3943 private static void horizontalAndVerticalWSTest() throws Exception {
3944 String hws = new String (new char[] {
3945 0x09, 0x20, 0xa0, 0x1680, 0x180e,
3946 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005,
3947 0x2006, 0x2007, 0x2008, 0x2009, 0x200a,
3948 0x202f, 0x205f, 0x3000 });
3949 String vws = new String (new char[] {
3950 0x0a, 0x0b, 0x0c, 0x0d, 0x85, 0x2028, 0x2029 });
3951 if (!Pattern.compile("\\h+").matcher(hws).matches() ||
3952 !Pattern.compile("[\\h]+").matcher(hws).matches())
3953 failCount++;
3954 if (Pattern.compile("\\H").matcher(hws).find() ||
3955 Pattern.compile("[\\H]").matcher(hws).find())
3956 failCount++;
3957 if (!Pattern.compile("\\v+").matcher(vws).matches() ||
3958 !Pattern.compile("[\\v]+").matcher(vws).matches())
3959 failCount++;
3960 if (Pattern.compile("\\V").matcher(vws).find() ||
3961 Pattern.compile("[\\V]").matcher(vws).find())
3962 failCount++;
3963 String prefix = "abcd";
3964 String suffix = "efgh";
3965 String ng = "A";
3966 for (int i = 0; i < hws.length(); i++) {
3967 String c = String.valueOf(hws.charAt(i));
3968 Matcher m = Pattern.compile("\\h").matcher(prefix + c + suffix);
3969 if (!m.find() || !c.equals(m.group()))
3970 failCount++;
3971 m = Pattern.compile("[\\h]").matcher(prefix + c + suffix);
3972 if (!m.find() || !c.equals(m.group()))
3973 failCount++;
3974
3975 m = Pattern.compile("\\H").matcher(hws.substring(0, i) + ng + hws.substring(i));
3976 if (!m.find() || !ng.equals(m.group()))
3977 failCount++;
3978 m = Pattern.compile("[\\H]").matcher(hws.substring(0, i) + ng + hws.substring(i));
3979 if (!m.find() || !ng.equals(m.group()))
3980 failCount++;
3981 }
3982 for (int i = 0; i < vws.length(); i++) {
3983 String c = String.valueOf(vws.charAt(i));
3984 Matcher m = Pattern.compile("\\v").matcher(prefix + c + suffix);
3985 if (!m.find() || !c.equals(m.group()))
3986 failCount++;
3987 m = Pattern.compile("[\\v]").matcher(prefix + c + suffix);
3988 if (!m.find() || !c.equals(m.group()))
3989 failCount++;
3990
3991 m = Pattern.compile("\\V").matcher(vws.substring(0, i) + ng + vws.substring(i));
3992 if (!m.find() || !ng.equals(m.group()))
3993 failCount++;
3994 m = Pattern.compile("[\\V]").matcher(vws.substring(0, i) + ng + vws.substring(i));
3995 if (!m.find() || !ng.equals(m.group()))
3996 failCount++;
3997 }
3998 // \v in range is interpreted as 0x0B. This is the undocumented behavior
3999 if (!Pattern.compile("[\\v-\\v]").matcher(String.valueOf((char)0x0B)).matches())
4000 failCount++;
4001 report("horizontalAndVerticalWSTest");
4002 }
4003
4004 private static void linebreakTest() throws Exception {
4005 String linebreaks = new String (new char[] {
4006 0x0A, 0x0B, 0x0C, 0x0D, 0x85, 0x2028, 0x2029 });
4007 String crnl = "\r\n";
4008 if (!Pattern.compile("\\R+").matcher(linebreaks).matches() ||
4009 !Pattern.compile("\\R").matcher(crnl).matches() ||
4010 Pattern.compile("\\R\\R").matcher(crnl).matches())
4011 failCount++;
4012 report("linebreakTest");
4013 }
4014
sherman36e2c8f2012-08-09 10:15:26 -07004015 // #7189363
4016 private static void branchTest() throws Exception {
4017 if (!Pattern.compile("(a)?bc|d").matcher("d").find() || // greedy
4018 !Pattern.compile("(a)+bc|d").matcher("d").find() ||
4019 !Pattern.compile("(a)*bc|d").matcher("d").find() ||
4020 !Pattern.compile("(a)??bc|d").matcher("d").find() || // reluctant
4021 !Pattern.compile("(a)+?bc|d").matcher("d").find() ||
4022 !Pattern.compile("(a)*?bc|d").matcher("d").find() ||
4023 !Pattern.compile("(a)?+bc|d").matcher("d").find() || // possessive
4024 !Pattern.compile("(a)++bc|d").matcher("d").find() ||
4025 !Pattern.compile("(a)*+bc|d").matcher("d").find() ||
4026 !Pattern.compile("(a)?bc|d").matcher("d").matches() || // greedy
4027 !Pattern.compile("(a)+bc|d").matcher("d").matches() ||
4028 !Pattern.compile("(a)*bc|d").matcher("d").matches() ||
4029 !Pattern.compile("(a)??bc|d").matcher("d").matches() || // reluctant
4030 !Pattern.compile("(a)+?bc|d").matcher("d").matches() ||
4031 !Pattern.compile("(a)*?bc|d").matcher("d").matches() ||
4032 !Pattern.compile("(a)?+bc|d").matcher("d").matches() || // possessive
4033 !Pattern.compile("(a)++bc|d").matcher("d").matches() ||
4034 !Pattern.compile("(a)*+bc|d").matcher("d").matches() ||
4035 !Pattern.compile("(a)?bc|de").matcher("de").find() || // others
4036 !Pattern.compile("(a)??bc|de").matcher("de").find() ||
4037 !Pattern.compile("(a)?bc|de").matcher("de").matches() ||
4038 !Pattern.compile("(a)??bc|de").matcher("de").matches())
4039 failCount++;
4040 report("branchTest");
4041 }
4042
shermanf6f35a12013-04-26 13:59:10 -07004043 // This test is for 8007395
4044 private static void groupCurlyNotFoundSuppTest() throws Exception {
4045 String input = "test this as \ud83d\ude0d";
4046 for (String pStr : new String[] { "test(.)+(@[a-zA-Z.]+)",
4047 "test(.)*(@[a-zA-Z.]+)",
4048 "test([^B])+(@[a-zA-Z.]+)",
4049 "test([^B])*(@[a-zA-Z.]+)",
4050 "test(\\P{IsControl})+(@[a-zA-Z.]+)",
4051 "test(\\P{IsControl})*(@[a-zA-Z.]+)",
4052 }) {
4053 Matcher m = Pattern.compile(pStr, Pattern.CASE_INSENSITIVE)
4054 .matcher(input);
4055 try {
4056 if (m.find()) {
4057 failCount++;
4058 }
4059 } catch (Exception x) {
4060 failCount++;
4061 }
4062 }
4063 report("GroupCurly NotFoundSupp");
4064 }
4065
sherman95a939c2013-08-27 12:54:44 -07004066 // This test is for 8023647
4067 private static void groupCurlyBackoffTest() throws Exception {
4068 if (!"abc1c".matches("(\\w)+1\\1") ||
4069 "abc11".matches("(\\w)+1\\1")) {
4070 failCount++;
4071 }
4072 report("GroupCurly backoff");
4073 }
4074
psandoze9d4ac92013-05-01 18:40:31 +02004075 // This test is for 8012646
4076 private static void patternAsPredicate() throws Exception {
4077 Predicate<String> p = Pattern.compile("[a-z]+").asPredicate();
4078
4079 if (p.test("")) {
4080 failCount++;
4081 }
4082 if (!p.test("word")) {
4083 failCount++;
4084 }
4085 if (p.test("1234")) {
4086 failCount++;
4087 }
4088 report("Pattern.asPredicate");
4089 }
sherman0b4d42d2009-02-23 21:06:15 -08004090}