blob: 7ca2f99026ebe5c388faab1bec6334523b7be5d5 [file] [log] [blame]
sherman0b4d42d2009-02-23 21:06:15 -08001/*
igerasime69462b2015-08-03 22:36:28 +03002 * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
sherman0b4d42d2009-02-23 21:06:15 -08003 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
serb9adafbe2013-11-12 20:24:25 +04007 * published by the Free Software Foundation.
sherman0b4d42d2009-02-23 21:06:15 -08008 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
ohair2283b9d2010-05-25 15:58:33 -070019 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
sherman0b4d42d2009-02-23 21:06:15 -080022 */
23
24/**
25 * @test
26 * @summary tests RegExp framework
27 * @author Mike McCloskey
28 * @bug 4481568 4482696 4495089 4504687 4527731 4599621 4631553 4619345
29 * 4630911 4672616 4711773 4727935 4750573 4792284 4803197 4757029 4808962
30 * 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476
31 * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940
32 * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133
shermanb16229d2011-12-19 14:14:14 -080033 * 6350801 6676425 6878475 6919132 6931676 6948903 6990617 7014645 7039066
sherman1242a6d2013-11-13 11:26:01 -080034 * 7067045 7014640 7189363 8007395 8013252 8013254 8012646 8023647 6559590
igerasime69462b2015-08-03 22:36:28 +030035 * 8027645 6854417
sherman0b4d42d2009-02-23 21:06:15 -080036 */
37
38import java.util.regex.*;
39import java.util.Random;
40import java.io.*;
41import java.util.*;
42import java.nio.CharBuffer;
psandoze9d4ac92013-05-01 18:40:31 +020043import java.util.function.Predicate;
sherman0b4d42d2009-02-23 21:06:15 -080044
45/**
46 * This is a test class created to check the operation of
47 * the Pattern and Matcher classes.
48 */
49public class RegExTest {
50
51 private static Random generator = new Random();
52 private static boolean failure = false;
53 private static int failCount = 0;
shermanb16229d2011-12-19 14:14:14 -080054 private static String firstFailure = null;
sherman0b4d42d2009-02-23 21:06:15 -080055
56 /**
57 * Main to interpret arguments and run several tests.
58 *
59 */
60 public static void main(String[] args) throws Exception {
61 // Most of the tests are in a file
62 processFile("TestCases.txt");
63 //processFile("PerlCases.txt");
64 processFile("BMPTestCases.txt");
65 processFile("SupplementaryTestCases.txt");
66
67 // These test many randomly generated char patterns
68 bm();
69 slice();
70
71 // These are hard to put into the file
72 escapes();
73 blankInput();
74
75 // Substitition tests on randomly generated sequences
76 globalSubstitute();
77 stringbufferSubstitute();
78 substitutionBasher();
79
80 // Canonical Equivalence
81 ceTest();
82
83 // Anchors
84 anchorTest();
85
86 // boolean match calls
87 matchesTest();
88 lookingAtTest();
89
90 // Pattern API
91 patternMatchesTest();
92
93 // Misc
94 lookbehindTest();
95 nullArgumentTest();
96 backRefTest();
97 groupCaptureTest();
98 caretTest();
99 charClassTest();
100 emptyPatternTest();
101 findIntTest();
102 group0Test();
103 longPatternTest();
104 octalTest();
105 ampersandTest();
106 negationTest();
107 splitTest();
108 appendTest();
109 caseFoldingTest();
110 commentsTest();
111 unixLinesTest();
112 replaceFirstTest();
113 gTest();
114 zTest();
115 serializeTest();
116 reluctantRepetitionTest();
117 multilineDollarTest();
118 dollarAtEndTest();
119 caretBetweenTerminatorsTest();
120 // This RFE rejected in Tiger numOccurrencesTest();
121 javaCharClassTest();
122 nonCaptureRepetitionTest();
123 notCapturedGroupCurlyMatchTest();
124 escapedSegmentTest();
125 literalPatternTest();
126 literalReplacementTest();
127 regionTest();
128 toStringTest();
129 negatedCharClassTest();
130 findFromTest();
131 boundsTest();
132 unicodeWordBoundsTest();
133 caretAtEndTest();
134 wordSearchTest();
135 hitEndTest();
136 toMatchResultTest();
137 surrogatesInClassTest();
shermanb16229d2011-12-19 14:14:14 -0800138 removeQEQuotingTest();
sherman0b4d42d2009-02-23 21:06:15 -0800139 namedGroupCaptureTest();
sherman6782c962010-02-05 00:10:42 -0800140 nonBmpClassComplementTest();
shermancc01ef52010-05-18 15:36:47 -0700141 unicodePropertiesTest();
shermanf03c78b2011-02-03 13:49:25 -0800142 unicodeHexNotationTest();
sherman85bbd8b2011-04-28 20:48:36 -0700143 unicodeClassesTest();
shermanecb65472012-05-08 10:57:13 -0700144 horizontalAndVerticalWSTest();
145 linebreakTest();
sherman36e2c8f2012-08-09 10:15:26 -0700146 branchTest();
shermanf6f35a12013-04-26 13:59:10 -0700147 groupCurlyNotFoundSuppTest();
sherman95a939c2013-08-27 12:54:44 -0700148 groupCurlyBackoffTest();
psandoze9d4ac92013-05-01 18:40:31 +0200149 patternAsPredicate();
sherman1242a6d2013-11-13 11:26:01 -0800150
shermanb16229d2011-12-19 14:14:14 -0800151 if (failure) {
152 throw new
153 RuntimeException("RegExTest failed, 1st failure: " +
154 firstFailure);
155 } else {
sherman0b4d42d2009-02-23 21:06:15 -0800156 System.err.println("OKAY: All tests passed.");
shermanb16229d2011-12-19 14:14:14 -0800157 }
sherman0b4d42d2009-02-23 21:06:15 -0800158 }
159
160 // Utility functions
161
162 private static String getRandomAlphaString(int length) {
163 StringBuffer buf = new StringBuffer(length);
164 for (int i=0; i<length; i++) {
165 char randChar = (char)(97 + generator.nextInt(26));
166 buf.append(randChar);
167 }
168 return buf.toString();
169 }
170
171 private static void check(Matcher m, String expected) {
172 m.find();
173 if (!m.group().equals(expected))
174 failCount++;
175 }
176
177 private static void check(Matcher m, String result, boolean expected) {
178 m.find();
shermanf03c78b2011-02-03 13:49:25 -0800179 if (m.group().equals(result) != expected)
180 failCount++;
sherman0b4d42d2009-02-23 21:06:15 -0800181 }
182
183 private static void check(Pattern p, String s, boolean expected) {
shermanf03c78b2011-02-03 13:49:25 -0800184 if (p.matcher(s).find() != expected)
185 failCount++;
186 }
187
188 private static void check(String p, String s, boolean expected) {
189 Matcher matcher = Pattern.compile(p).matcher(s);
190 if (matcher.find() != expected)
191 failCount++;
sherman0b4d42d2009-02-23 21:06:15 -0800192 }
193
194 private static void check(String p, char c, boolean expected) {
195 String propertyPattern = expected ? "\\p" + p : "\\P" + p;
196 Pattern pattern = Pattern.compile(propertyPattern);
197 char[] ca = new char[1]; ca[0] = c;
198 Matcher matcher = pattern.matcher(new String(ca));
199 if (!matcher.find())
200 failCount++;
201 }
202
203 private static void check(String p, int codePoint, boolean expected) {
204 String propertyPattern = expected ? "\\p" + p : "\\P" + p;
205 Pattern pattern = Pattern.compile(propertyPattern);
206 char[] ca = Character.toChars(codePoint);
207 Matcher matcher = pattern.matcher(new String(ca));
208 if (!matcher.find())
209 failCount++;
210 }
211
212 private static void check(String p, int flag, String input, String s,
213 boolean expected)
214 {
215 Pattern pattern = Pattern.compile(p, flag);
216 Matcher matcher = pattern.matcher(input);
217 if (expected)
218 check(matcher, s, expected);
219 else
220 check(pattern, input, false);
221 }
222
223 private static void report(String testName) {
224 int spacesToAdd = 30 - testName.length();
225 StringBuffer paddedNameBuffer = new StringBuffer(testName);
226 for (int i=0; i<spacesToAdd; i++)
227 paddedNameBuffer.append(" ");
228 String paddedName = paddedNameBuffer.toString();
229 System.err.println(paddedName + ": " +
230 (failCount==0 ? "Passed":"Failed("+failCount+")"));
shermanb16229d2011-12-19 14:14:14 -0800231 if (failCount > 0) {
sherman0b4d42d2009-02-23 21:06:15 -0800232 failure = true;
shermanb16229d2011-12-19 14:14:14 -0800233
234 if (firstFailure == null) {
235 firstFailure = testName;
236 }
237 }
238
sherman0b4d42d2009-02-23 21:06:15 -0800239 failCount = 0;
240 }
241
242 /**
243 * Converts ASCII alphabet characters [A-Za-z] in the given 's' to
244 * supplementary characters. This method does NOT fully take care
245 * of the regex syntax.
246 */
247 private static String toSupplementaries(String s) {
248 int length = s.length();
249 StringBuffer sb = new StringBuffer(length * 2);
250
251 for (int i = 0; i < length; ) {
252 char c = s.charAt(i++);
253 if (c == '\\') {
254 sb.append(c);
255 if (i < length) {
256 c = s.charAt(i++);
257 sb.append(c);
258 if (c == 'u') {
259 // assume no syntax error
260 sb.append(s.charAt(i++));
261 sb.append(s.charAt(i++));
262 sb.append(s.charAt(i++));
263 sb.append(s.charAt(i++));
264 }
265 }
266 } else if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) {
267 sb.append('\ud800').append((char)('\udc00'+c));
268 } else {
269 sb.append(c);
270 }
271 }
272 return sb.toString();
273 }
274
275 // Regular expression tests
276
277 // This is for bug 6178785
278 // Test if an expected NPE gets thrown when passing in a null argument
279 private static boolean check(Runnable test) {
280 try {
281 test.run();
282 failCount++;
283 return false;
284 } catch (NullPointerException npe) {
285 return true;
286 }
287 }
288
289 private static void nullArgumentTest() {
290 check(new Runnable() { public void run() { Pattern.compile(null); }});
291 check(new Runnable() { public void run() { Pattern.matches(null, null); }});
292 check(new Runnable() { public void run() { Pattern.matches("xyz", null);}});
293 check(new Runnable() { public void run() { Pattern.quote(null);}});
294 check(new Runnable() { public void run() { Pattern.compile("xyz").split(null);}});
295 check(new Runnable() { public void run() { Pattern.compile("xyz").matcher(null);}});
296
297 final Matcher m = Pattern.compile("xyz").matcher("xyz");
298 m.matches();
299 check(new Runnable() { public void run() { m.appendTail(null);}});
300 check(new Runnable() { public void run() { m.replaceAll(null);}});
301 check(new Runnable() { public void run() { m.replaceFirst(null);}});
302 check(new Runnable() { public void run() { m.appendReplacement(null, null);}});
303 check(new Runnable() { public void run() { m.reset(null);}});
304 check(new Runnable() { public void run() { Matcher.quoteReplacement(null);}});
305 //check(new Runnable() { public void run() { m.usePattern(null);}});
306
307 report("Null Argument");
308 }
309
310 // This is for bug6635133
311 // Test if surrogate pair in Unicode escapes can be handled correctly.
312 private static void surrogatesInClassTest() throws Exception {
313 Pattern pattern = Pattern.compile("[\\ud834\\udd21-\\ud834\\udd24]");
314 Matcher matcher = pattern.matcher("\ud834\udd22");
315 if (!matcher.find())
316 failCount++;
shermanb16229d2011-12-19 14:14:14 -0800317
318 report("Surrogate pair in Unicode escape");
319 }
320
321 // This is for bug6990617
322 // Test if Pattern.RemoveQEQuoting works correctly if the octal unicode
323 // char encoding is only 2 or 3 digits instead of 4 and the first quoted
324 // char is an octal digit.
325 private static void removeQEQuotingTest() throws Exception {
326 Pattern pattern =
327 Pattern.compile("\\011\\Q1sometext\\E\\011\\Q2sometext\\E");
328 Matcher matcher = pattern.matcher("\t1sometext\t2sometext");
329 if (!matcher.find())
330 failCount++;
331
332 report("Remove Q/E Quoting");
sherman0b4d42d2009-02-23 21:06:15 -0800333 }
334
335 // This is for bug 4988891
336 // Test toMatchResult to see that it is a copy of the Matcher
337 // that is not affected by subsequent operations on the original
338 private static void toMatchResultTest() throws Exception {
339 Pattern pattern = Pattern.compile("squid");
340 Matcher matcher = pattern.matcher(
341 "agiantsquidofdestinyasmallsquidoffate");
342 matcher.find();
343 int matcherStart1 = matcher.start();
344 MatchResult mr = matcher.toMatchResult();
345 if (mr == matcher)
346 failCount++;
347 int resultStart1 = mr.start();
348 if (matcherStart1 != resultStart1)
349 failCount++;
350 matcher.find();
351 int matcherStart2 = matcher.start();
352 int resultStart2 = mr.start();
353 if (matcherStart2 == resultStart2)
354 failCount++;
355 if (resultStart1 != resultStart2)
356 failCount++;
357 MatchResult mr2 = matcher.toMatchResult();
358 if (mr == mr2)
359 failCount++;
360 if (mr2.start() != matcherStart2)
361 failCount++;
362 report("toMatchResult is a copy");
363 }
364
365 // This is for bug 5013885
366 // Must test a slice to see if it reports hitEnd correctly
367 private static void hitEndTest() throws Exception {
368 // Basic test of Slice node
369 Pattern p = Pattern.compile("^squidattack");
370 Matcher m = p.matcher("squack");
371 m.find();
372 if (m.hitEnd())
373 failCount++;
374 m.reset("squid");
375 m.find();
376 if (!m.hitEnd())
377 failCount++;
378
379 // Test Slice, SliceA and SliceU nodes
380 for (int i=0; i<3; i++) {
381 int flags = 0;
382 if (i==1) flags = Pattern.CASE_INSENSITIVE;
383 if (i==2) flags = Pattern.UNICODE_CASE;
384 p = Pattern.compile("^abc", flags);
385 m = p.matcher("ad");
386 m.find();
387 if (m.hitEnd())
388 failCount++;
389 m.reset("ab");
390 m.find();
391 if (!m.hitEnd())
392 failCount++;
393 }
394
395 // Test Boyer-Moore node
396 p = Pattern.compile("catattack");
397 m = p.matcher("attack");
398 m.find();
399 if (!m.hitEnd())
400 failCount++;
401
402 p = Pattern.compile("catattack");
403 m = p.matcher("attackattackattackcatatta");
404 m.find();
405 if (!m.hitEnd())
406 failCount++;
sherman0b4d42d2009-02-23 21:06:15 -0800407 report("hitEnd from a Slice");
408 }
409
410 // This is for bug 4997476
411 // It is weird code submitted by customer demonstrating a regression
412 private static void wordSearchTest() throws Exception {
413 String testString = new String("word1 word2 word3");
414 Pattern p = Pattern.compile("\\b");
415 Matcher m = p.matcher(testString);
416 int position = 0;
417 int start = 0;
418 while (m.find(position)) {
419 start = m.start();
420 if (start == testString.length())
421 break;
422 if (m.find(start+1)) {
423 position = m.start();
424 } else {
425 position = testString.length();
426 }
427 if (testString.substring(start, position).equals(" "))
428 continue;
429 if (!testString.substring(start, position-1).startsWith("word"))
430 failCount++;
431 }
432 report("Customer word search");
433 }
434
435 // This is for bug 4994840
436 private static void caretAtEndTest() throws Exception {
437 // Problem only occurs with multiline patterns
438 // containing a beginning-of-line caret "^" followed
439 // by an expression that also matches the empty string.
440 Pattern pattern = Pattern.compile("^x?", Pattern.MULTILINE);
441 Matcher matcher = pattern.matcher("\r");
442 matcher.find();
443 matcher.find();
444 report("Caret at end");
445 }
446
447 // This test is for 4979006
448 // Check to see if word boundary construct properly handles unicode
449 // non spacing marks
450 private static void unicodeWordBoundsTest() throws Exception {
451 String spaces = " ";
452 String wordChar = "a";
453 String nsm = "\u030a";
454
455 assert (Character.getType('\u030a') == Character.NON_SPACING_MARK);
456
457 Pattern pattern = Pattern.compile("\\b");
458 Matcher matcher = pattern.matcher("");
459 // S=other B=word character N=non spacing mark .=word boundary
460 // SS.BB.SS
461 String input = spaces + wordChar + wordChar + spaces;
462 twoFindIndexes(input, matcher, 2, 4);
463 // SS.BBN.SS
464 input = spaces + wordChar +wordChar + nsm + spaces;
465 twoFindIndexes(input, matcher, 2, 5);
466 // SS.BN.SS
467 input = spaces + wordChar + nsm + spaces;
468 twoFindIndexes(input, matcher, 2, 4);
469 // SS.BNN.SS
470 input = spaces + wordChar + nsm + nsm + spaces;
471 twoFindIndexes(input, matcher, 2, 5);
472 // SSN.BB.SS
473 input = spaces + nsm + wordChar + wordChar + spaces;
474 twoFindIndexes(input, matcher, 3, 5);
475 // SS.BNB.SS
476 input = spaces + wordChar + nsm + wordChar + spaces;
477 twoFindIndexes(input, matcher, 2, 5);
478 // SSNNSS
479 input = spaces + nsm + nsm + spaces;
480 matcher.reset(input);
481 if (matcher.find())
482 failCount++;
483 // SSN.BBN.SS
484 input = spaces + nsm + wordChar + wordChar + nsm + spaces;
485 twoFindIndexes(input, matcher, 3, 6);
486
487 report("Unicode word boundary");
488 }
489
490 private static void twoFindIndexes(String input, Matcher matcher, int a,
491 int b) throws Exception
492 {
493 matcher.reset(input);
494 matcher.find();
495 if (matcher.start() != a)
496 failCount++;
497 matcher.find();
498 if (matcher.start() != b)
499 failCount++;
500 }
501
502 // This test is for 6284152
503 static void check(String regex, String input, String[] expected) {
504 List<String> result = new ArrayList<String>();
505 Pattern p = Pattern.compile(regex);
506 Matcher m = p.matcher(input);
507 while (m.find()) {
508 result.add(m.group());
509 }
510 if (!Arrays.asList(expected).equals(result))
511 failCount++;
512 }
513
514 private static void lookbehindTest() throws Exception {
515 //Positive
516 check("(?<=%.{0,5})foo\\d",
517 "%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5",
518 new String[]{"foo1", "foo2", "foo3"});
519
520 //boundary at end of the lookbehind sub-regex should work consistently
521 //with the boundary just after the lookbehind sub-regex
522 check("(?<=.*\\b)foo", "abcd foo", new String[]{"foo"});
523 check("(?<=.*)\\bfoo", "abcd foo", new String[]{"foo"});
524 check("(?<!abc )\\bfoo", "abc foo", new String[0]);
525 check("(?<!abc \\b)foo", "abc foo", new String[0]);
526
527 //Negative
528 check("(?<!%.{0,5})foo\\d",
529 "%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5",
530 new String[] {"foo4", "foo5"});
531
532 //Positive greedy
533 check("(?<=%b{1,4})foo", "%bbbbfoo", new String[] {"foo"});
534
535 //Positive reluctant
536 check("(?<=%b{1,4}?)foo", "%bbbbfoo", new String[] {"foo"});
537
538 //supplementary
539 check("(?<=%b{1,4})fo\ud800\udc00o", "%bbbbfo\ud800\udc00o",
540 new String[] {"fo\ud800\udc00o"});
541 check("(?<=%b{1,4}?)fo\ud800\udc00o", "%bbbbfo\ud800\udc00o",
542 new String[] {"fo\ud800\udc00o"});
543 check("(?<!%b{1,4})fo\ud800\udc00o", "%afo\ud800\udc00o",
544 new String[] {"fo\ud800\udc00o"});
545 check("(?<!%b{1,4}?)fo\ud800\udc00o", "%afo\ud800\udc00o",
546 new String[] {"fo\ud800\udc00o"});
547 report("Lookbehind");
548 }
549
550 // This test is for 4938995
551 // Check to see if weak region boundaries are transparent to
552 // lookahead and lookbehind constructs
553 private static void boundsTest() throws Exception {
554 String fullMessage = "catdogcat";
555 Pattern pattern = Pattern.compile("(?<=cat)dog(?=cat)");
556 Matcher matcher = pattern.matcher("catdogca");
557 matcher.useTransparentBounds(true);
558 if (matcher.find())
559 failCount++;
560 matcher.reset("atdogcat");
561 if (matcher.find())
562 failCount++;
563 matcher.reset(fullMessage);
564 if (!matcher.find())
565 failCount++;
566 matcher.reset(fullMessage);
567 matcher.region(0,9);
568 if (!matcher.find())
569 failCount++;
570 matcher.reset(fullMessage);
571 matcher.region(0,6);
572 if (!matcher.find())
573 failCount++;
574 matcher.reset(fullMessage);
575 matcher.region(3,6);
576 if (!matcher.find())
577 failCount++;
578 matcher.useTransparentBounds(false);
579 if (matcher.find())
580 failCount++;
581
582 // Negative lookahead/lookbehind
583 pattern = Pattern.compile("(?<!cat)dog(?!cat)");
584 matcher = pattern.matcher("dogcat");
585 matcher.useTransparentBounds(true);
586 matcher.region(0,3);
587 if (matcher.find())
588 failCount++;
589 matcher.reset("catdog");
590 matcher.region(3,6);
591 if (matcher.find())
592 failCount++;
593 matcher.useTransparentBounds(false);
594 matcher.reset("dogcat");
595 matcher.region(0,3);
596 if (!matcher.find())
597 failCount++;
598 matcher.reset("catdog");
599 matcher.region(3,6);
600 if (!matcher.find())
601 failCount++;
602
603 report("Region bounds transparency");
604 }
605
606 // This test is for 4945394
607 private static void findFromTest() throws Exception {
608 String message = "This is 40 $0 message.";
609 Pattern pat = Pattern.compile("\\$0");
610 Matcher match = pat.matcher(message);
611 if (!match.find())
612 failCount++;
613 if (match.find())
614 failCount++;
615 if (match.find())
616 failCount++;
617 report("Check for alternating find");
618 }
619
620 // This test is for 4872664 and 4892980
621 private static void negatedCharClassTest() throws Exception {
622 Pattern pattern = Pattern.compile("[^>]");
623 Matcher matcher = pattern.matcher("\u203A");
624 if (!matcher.matches())
625 failCount++;
626 pattern = Pattern.compile("[^fr]");
627 matcher = pattern.matcher("a");
628 if (!matcher.find())
629 failCount++;
630 matcher.reset("\u203A");
631 if (!matcher.find())
632 failCount++;
633 String s = "for";
634 String result[] = s.split("[^fr]");
635 if (!result[0].equals("f"))
636 failCount++;
637 if (!result[1].equals("r"))
638 failCount++;
639 s = "f\u203Ar";
640 result = s.split("[^fr]");
641 if (!result[0].equals("f"))
642 failCount++;
643 if (!result[1].equals("r"))
644 failCount++;
645
646 // Test adding to bits, subtracting a node, then adding to bits again
647 pattern = Pattern.compile("[^f\u203Ar]");
648 matcher = pattern.matcher("a");
649 if (!matcher.find())
650 failCount++;
651 matcher.reset("f");
652 if (matcher.find())
653 failCount++;
654 matcher.reset("\u203A");
655 if (matcher.find())
656 failCount++;
657 matcher.reset("r");
658 if (matcher.find())
659 failCount++;
660 matcher.reset("\u203B");
661 if (!matcher.find())
662 failCount++;
663
664 // Test subtracting a node, adding to bits, subtracting again
665 pattern = Pattern.compile("[^\u203Ar\u203B]");
666 matcher = pattern.matcher("a");
667 if (!matcher.find())
668 failCount++;
669 matcher.reset("\u203A");
670 if (matcher.find())
671 failCount++;
672 matcher.reset("r");
673 if (matcher.find())
674 failCount++;
675 matcher.reset("\u203B");
676 if (matcher.find())
677 failCount++;
678 matcher.reset("\u203C");
679 if (!matcher.find())
680 failCount++;
681
682 report("Negated Character Class");
683 }
684
685 // This test is for 4628291
686 private static void toStringTest() throws Exception {
687 Pattern pattern = Pattern.compile("b+");
688 if (pattern.toString() != "b+")
689 failCount++;
690 Matcher matcher = pattern.matcher("aaabbbccc");
691 String matcherString = matcher.toString(); // unspecified
692 matcher.find();
693 matcherString = matcher.toString(); // unspecified
694 matcher.region(0,3);
695 matcherString = matcher.toString(); // unspecified
696 matcher.reset();
697 matcherString = matcher.toString(); // unspecified
698 report("toString");
699 }
700
701 // This test is for 4808962
702 private static void literalPatternTest() throws Exception {
703 int flags = Pattern.LITERAL;
704
705 Pattern pattern = Pattern.compile("abc\\t$^", flags);
706 check(pattern, "abc\\t$^", true);
707
708 pattern = Pattern.compile(Pattern.quote("abc\\t$^"));
709 check(pattern, "abc\\t$^", true);
710
711 pattern = Pattern.compile("\\Qa^$bcabc\\E", flags);
712 check(pattern, "\\Qa^$bcabc\\E", true);
713 check(pattern, "a^$bcabc", false);
714
715 pattern = Pattern.compile("\\\\Q\\\\E");
716 check(pattern, "\\Q\\E", true);
717
718 pattern = Pattern.compile("\\Qabc\\Eefg\\\\Q\\\\Ehij");
719 check(pattern, "abcefg\\Q\\Ehij", true);
720
721 pattern = Pattern.compile("\\\\\\Q\\\\E");
722 check(pattern, "\\\\\\\\", true);
723
724 pattern = Pattern.compile(Pattern.quote("\\Qa^$bcabc\\E"));
725 check(pattern, "\\Qa^$bcabc\\E", true);
726 check(pattern, "a^$bcabc", false);
727
728 pattern = Pattern.compile(Pattern.quote("\\Qabc\\Edef"));
729 check(pattern, "\\Qabc\\Edef", true);
730 check(pattern, "abcdef", false);
731
732 pattern = Pattern.compile(Pattern.quote("abc\\Edef"));
733 check(pattern, "abc\\Edef", true);
734 check(pattern, "abcdef", false);
735
736 pattern = Pattern.compile(Pattern.quote("\\E"));
737 check(pattern, "\\E", true);
738
739 pattern = Pattern.compile("((((abc.+?:)", flags);
740 check(pattern, "((((abc.+?:)", true);
741
742 flags |= Pattern.MULTILINE;
743
744 pattern = Pattern.compile("^cat$", flags);
745 check(pattern, "abc^cat$def", true);
746 check(pattern, "cat", false);
747
748 flags |= Pattern.CASE_INSENSITIVE;
749
750 pattern = Pattern.compile("abcdef", flags);
751 check(pattern, "ABCDEF", true);
752 check(pattern, "AbCdEf", true);
753
754 flags |= Pattern.DOTALL;
755
756 pattern = Pattern.compile("a...b", flags);
757 check(pattern, "A...b", true);
758 check(pattern, "Axxxb", false);
759
760 flags |= Pattern.CANON_EQ;
761
762 Pattern p = Pattern.compile("testa\u030a", flags);
763 check(pattern, "testa\u030a", false);
764 check(pattern, "test\u00e5", false);
765
766 // Supplementary character test
767 flags = Pattern.LITERAL;
768
769 pattern = Pattern.compile(toSupplementaries("abc\\t$^"), flags);
770 check(pattern, toSupplementaries("abc\\t$^"), true);
771
772 pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\t$^")));
773 check(pattern, toSupplementaries("abc\\t$^"), true);
774
775 pattern = Pattern.compile(toSupplementaries("\\Qa^$bcabc\\E"), flags);
776 check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true);
777 check(pattern, toSupplementaries("a^$bcabc"), false);
778
779 pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qa^$bcabc\\E")));
780 check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true);
781 check(pattern, toSupplementaries("a^$bcabc"), false);
782
783 pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qabc\\Edef")));
784 check(pattern, toSupplementaries("\\Qabc\\Edef"), true);
785 check(pattern, toSupplementaries("abcdef"), false);
786
787 pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\Edef")));
788 check(pattern, toSupplementaries("abc\\Edef"), true);
789 check(pattern, toSupplementaries("abcdef"), false);
790
791 pattern = Pattern.compile(toSupplementaries("((((abc.+?:)"), flags);
792 check(pattern, toSupplementaries("((((abc.+?:)"), true);
793
794 flags |= Pattern.MULTILINE;
795
796 pattern = Pattern.compile(toSupplementaries("^cat$"), flags);
797 check(pattern, toSupplementaries("abc^cat$def"), true);
798 check(pattern, toSupplementaries("cat"), false);
799
800 flags |= Pattern.DOTALL;
801
802 // note: this is case-sensitive.
803 pattern = Pattern.compile(toSupplementaries("a...b"), flags);
804 check(pattern, toSupplementaries("a...b"), true);
805 check(pattern, toSupplementaries("axxxb"), false);
806
807 flags |= Pattern.CANON_EQ;
808
809 String t = toSupplementaries("test");
810 p = Pattern.compile(t + "a\u030a", flags);
811 check(pattern, t + "a\u030a", false);
812 check(pattern, t + "\u00e5", false);
813
814 report("Literal pattern");
815 }
816
817 // This test is for 4803179
818 // This test is also for 4808962, replacement parts
819 private static void literalReplacementTest() throws Exception {
820 int flags = Pattern.LITERAL;
821
822 Pattern pattern = Pattern.compile("abc", flags);
823 Matcher matcher = pattern.matcher("zzzabczzz");
824 String replaceTest = "$0";
825 String result = matcher.replaceAll(replaceTest);
826 if (!result.equals("zzzabczzz"))
827 failCount++;
828
829 matcher.reset();
830 String literalReplacement = matcher.quoteReplacement(replaceTest);
831 result = matcher.replaceAll(literalReplacement);
832 if (!result.equals("zzz$0zzz"))
833 failCount++;
834
835 matcher.reset();
836 replaceTest = "\\t$\\$";
837 literalReplacement = matcher.quoteReplacement(replaceTest);
838 result = matcher.replaceAll(literalReplacement);
839 if (!result.equals("zzz\\t$\\$zzz"))
840 failCount++;
841
842 // Supplementary character test
843 pattern = Pattern.compile(toSupplementaries("abc"), flags);
844 matcher = pattern.matcher(toSupplementaries("zzzabczzz"));
845 replaceTest = "$0";
846 result = matcher.replaceAll(replaceTest);
847 if (!result.equals(toSupplementaries("zzzabczzz")))
848 failCount++;
849
850 matcher.reset();
851 literalReplacement = matcher.quoteReplacement(replaceTest);
852 result = matcher.replaceAll(literalReplacement);
853 if (!result.equals(toSupplementaries("zzz$0zzz")))
854 failCount++;
855
856 matcher.reset();
857 replaceTest = "\\t$\\$";
858 literalReplacement = matcher.quoteReplacement(replaceTest);
859 result = matcher.replaceAll(literalReplacement);
860 if (!result.equals(toSupplementaries("zzz\\t$\\$zzz")))
861 failCount++;
862
sherman5c8f3492012-04-12 15:01:41 -0700863 // IAE should be thrown if backslash or '$' is the last character
864 // in replacement string
865 try {
866 "\uac00".replaceAll("\uac00", "$");
shermanecb65472012-05-08 10:57:13 -0700867 failCount++;
868 } catch (IllegalArgumentException iie) {
869 } catch (Exception e) {
870 failCount++;
871 }
872 try {
sherman5c8f3492012-04-12 15:01:41 -0700873 "\uac00".replaceAll("\uac00", "\\");
874 failCount++;
875 } catch (IllegalArgumentException iie) {
876 } catch (Exception e) {
877 failCount++;
878 }
sherman0b4d42d2009-02-23 21:06:15 -0800879 report("Literal replacement");
880 }
881
882 // This test is for 4757029
883 private static void regionTest() throws Exception {
884 Pattern pattern = Pattern.compile("abc");
885 Matcher matcher = pattern.matcher("abcdefabc");
886
887 matcher.region(0,9);
888 if (!matcher.find())
889 failCount++;
890 if (!matcher.find())
891 failCount++;
892 matcher.region(0,3);
893 if (!matcher.find())
894 failCount++;
895 matcher.region(3,6);
896 if (matcher.find())
897 failCount++;
898 matcher.region(0,2);
899 if (matcher.find())
900 failCount++;
901
902 expectRegionFail(matcher, 1, -1);
903 expectRegionFail(matcher, -1, -1);
904 expectRegionFail(matcher, -1, 1);
905 expectRegionFail(matcher, 5, 3);
906 expectRegionFail(matcher, 5, 12);
907 expectRegionFail(matcher, 12, 12);
908
909 pattern = Pattern.compile("^abc$");
910 matcher = pattern.matcher("zzzabczzz");
911 matcher.region(0,9);
912 if (matcher.find())
913 failCount++;
914 matcher.region(3,6);
915 if (!matcher.find())
916 failCount++;
917 matcher.region(3,6);
918 matcher.useAnchoringBounds(false);
919 if (matcher.find())
920 failCount++;
921
922 // Supplementary character test
923 pattern = Pattern.compile(toSupplementaries("abc"));
924 matcher = pattern.matcher(toSupplementaries("abcdefabc"));
925 matcher.region(0,9*2);
926 if (!matcher.find())
927 failCount++;
928 if (!matcher.find())
929 failCount++;
930 matcher.region(0,3*2);
931 if (!matcher.find())
932 failCount++;
933 matcher.region(1,3*2);
934 if (matcher.find())
935 failCount++;
936 matcher.region(3*2,6*2);
937 if (matcher.find())
938 failCount++;
939 matcher.region(0,2*2);
940 if (matcher.find())
941 failCount++;
942 matcher.region(0,2*2+1);
943 if (matcher.find())
944 failCount++;
945
946 expectRegionFail(matcher, 1*2, -1);
947 expectRegionFail(matcher, -1, -1);
948 expectRegionFail(matcher, -1, 1*2);
949 expectRegionFail(matcher, 5*2, 3*2);
950 expectRegionFail(matcher, 5*2, 12*2);
951 expectRegionFail(matcher, 12*2, 12*2);
952
953 pattern = Pattern.compile(toSupplementaries("^abc$"));
954 matcher = pattern.matcher(toSupplementaries("zzzabczzz"));
955 matcher.region(0,9*2);
956 if (matcher.find())
957 failCount++;
958 matcher.region(3*2,6*2);
959 if (!matcher.find())
960 failCount++;
961 matcher.region(3*2+1,6*2);
962 if (matcher.find())
963 failCount++;
964 matcher.region(3*2,6*2-1);
965 if (matcher.find())
966 failCount++;
967 matcher.region(3*2,6*2);
968 matcher.useAnchoringBounds(false);
969 if (matcher.find())
970 failCount++;
971 report("Regions");
972 }
973
974 private static void expectRegionFail(Matcher matcher, int index1,
975 int index2)
976 {
977 try {
978 matcher.region(index1, index2);
979 failCount++;
980 } catch (IndexOutOfBoundsException ioobe) {
981 // Correct result
982 } catch (IllegalStateException ise) {
983 // Correct result
984 }
985 }
986
987 // This test is for 4803197
988 private static void escapedSegmentTest() throws Exception {
989
990 Pattern pattern = Pattern.compile("\\Qdir1\\dir2\\E");
991 check(pattern, "dir1\\dir2", true);
992
993 pattern = Pattern.compile("\\Qdir1\\dir2\\\\E");
994 check(pattern, "dir1\\dir2\\", true);
995
996 pattern = Pattern.compile("(\\Qdir1\\dir2\\\\E)");
997 check(pattern, "dir1\\dir2\\", true);
998
999 // Supplementary character test
1000 pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2\\E"));
1001 check(pattern, toSupplementaries("dir1\\dir2"), true);
1002
1003 pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2")+"\\\\E");
1004 check(pattern, toSupplementaries("dir1\\dir2\\"), true);
1005
1006 pattern = Pattern.compile(toSupplementaries("(\\Qdir1\\dir2")+"\\\\E)");
1007 check(pattern, toSupplementaries("dir1\\dir2\\"), true);
1008
1009 report("Escaped segment");
1010 }
1011
1012 // This test is for 4792284
1013 private static void nonCaptureRepetitionTest() throws Exception {
1014 String input = "abcdefgh;";
1015
1016 String[] patterns = new String[] {
1017 "(?:\\w{4})+;",
1018 "(?:\\w{8})*;",
1019 "(?:\\w{2}){2,4};",
1020 "(?:\\w{4}){2,};", // only matches the
1021 ".*?(?:\\w{5})+;", // specified minimum
1022 ".*?(?:\\w{9})*;", // number of reps - OK
1023 "(?:\\w{4})+?;", // lazy repetition - OK
1024 "(?:\\w{4})++;", // possessive repetition - OK
1025 "(?:\\w{2,}?)+;", // non-deterministic - OK
1026 "(\\w{4})+;", // capturing group - OK
1027 };
1028
1029 for (int i = 0; i < patterns.length; i++) {
1030 // Check find()
1031 check(patterns[i], 0, input, input, true);
1032 // Check matches()
1033 Pattern p = Pattern.compile(patterns[i]);
1034 Matcher m = p.matcher(input);
1035
1036 if (m.matches()) {
1037 if (!m.group(0).equals(input))
1038 failCount++;
1039 } else {
1040 failCount++;
1041 }
1042 }
1043
1044 report("Non capturing repetition");
1045 }
1046
1047 // This test is for 6358731
1048 private static void notCapturedGroupCurlyMatchTest() throws Exception {
1049 Pattern pattern = Pattern.compile("(abc)+|(abcd)+");
1050 Matcher matcher = pattern.matcher("abcd");
1051 if (!matcher.matches() ||
1052 matcher.group(1) != null ||
1053 !matcher.group(2).equals("abcd")) {
1054 failCount++;
1055 }
1056 report("Not captured GroupCurly");
1057 }
1058
1059 // This test is for 4706545
1060 private static void javaCharClassTest() throws Exception {
1061 for (int i=0; i<1000; i++) {
1062 char c = (char)generator.nextInt();
1063 check("{javaLowerCase}", c, Character.isLowerCase(c));
1064 check("{javaUpperCase}", c, Character.isUpperCase(c));
1065 check("{javaUpperCase}+", c, Character.isUpperCase(c));
1066 check("{javaTitleCase}", c, Character.isTitleCase(c));
1067 check("{javaDigit}", c, Character.isDigit(c));
1068 check("{javaDefined}", c, Character.isDefined(c));
1069 check("{javaLetter}", c, Character.isLetter(c));
1070 check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c));
1071 check("{javaJavaIdentifierStart}", c,
1072 Character.isJavaIdentifierStart(c));
1073 check("{javaJavaIdentifierPart}", c,
1074 Character.isJavaIdentifierPart(c));
1075 check("{javaUnicodeIdentifierStart}", c,
1076 Character.isUnicodeIdentifierStart(c));
1077 check("{javaUnicodeIdentifierPart}", c,
1078 Character.isUnicodeIdentifierPart(c));
1079 check("{javaIdentifierIgnorable}", c,
1080 Character.isIdentifierIgnorable(c));
1081 check("{javaSpaceChar}", c, Character.isSpaceChar(c));
1082 check("{javaWhitespace}", c, Character.isWhitespace(c));
1083 check("{javaISOControl}", c, Character.isISOControl(c));
1084 check("{javaMirrored}", c, Character.isMirrored(c));
1085
1086 }
1087
1088 // Supplementary character test
1089 for (int i=0; i<1000; i++) {
1090 int c = generator.nextInt(Character.MAX_CODE_POINT
1091 - Character.MIN_SUPPLEMENTARY_CODE_POINT)
1092 + Character.MIN_SUPPLEMENTARY_CODE_POINT;
1093 check("{javaLowerCase}", c, Character.isLowerCase(c));
1094 check("{javaUpperCase}", c, Character.isUpperCase(c));
1095 check("{javaUpperCase}+", c, Character.isUpperCase(c));
1096 check("{javaTitleCase}", c, Character.isTitleCase(c));
1097 check("{javaDigit}", c, Character.isDigit(c));
1098 check("{javaDefined}", c, Character.isDefined(c));
1099 check("{javaLetter}", c, Character.isLetter(c));
1100 check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c));
1101 check("{javaJavaIdentifierStart}", c,
1102 Character.isJavaIdentifierStart(c));
1103 check("{javaJavaIdentifierPart}", c,
1104 Character.isJavaIdentifierPart(c));
1105 check("{javaUnicodeIdentifierStart}", c,
1106 Character.isUnicodeIdentifierStart(c));
1107 check("{javaUnicodeIdentifierPart}", c,
1108 Character.isUnicodeIdentifierPart(c));
1109 check("{javaIdentifierIgnorable}", c,
1110 Character.isIdentifierIgnorable(c));
1111 check("{javaSpaceChar}", c, Character.isSpaceChar(c));
1112 check("{javaWhitespace}", c, Character.isWhitespace(c));
1113 check("{javaISOControl}", c, Character.isISOControl(c));
1114 check("{javaMirrored}", c, Character.isMirrored(c));
1115 }
1116
1117 report("Java character classes");
1118 }
1119
1120 // This test is for 4523620
1121 /*
1122 private static void numOccurrencesTest() throws Exception {
1123 Pattern pattern = Pattern.compile("aaa");
1124
1125 if (pattern.numOccurrences("aaaaaa", false) != 2)
1126 failCount++;
1127 if (pattern.numOccurrences("aaaaaa", true) != 4)
1128 failCount++;
1129
1130 pattern = Pattern.compile("^");
1131 if (pattern.numOccurrences("aaaaaa", false) != 1)
1132 failCount++;
1133 if (pattern.numOccurrences("aaaaaa", true) != 1)
1134 failCount++;
1135
1136 report("Number of Occurrences");
1137 }
1138 */
1139
1140 // This test is for 4776374
1141 private static void caretBetweenTerminatorsTest() throws Exception {
1142 int flags1 = Pattern.DOTALL;
1143 int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES;
1144 int flags3 = Pattern.DOTALL | Pattern.UNIX_LINES | Pattern.MULTILINE;
1145 int flags4 = Pattern.DOTALL | Pattern.MULTILINE;
1146
1147 check("^....", flags1, "test\ntest", "test", true);
1148 check(".....^", flags1, "test\ntest", "test", false);
1149 check(".....^", flags1, "test\n", "test", false);
1150 check("....^", flags1, "test\r\n", "test", false);
1151
1152 check("^....", flags2, "test\ntest", "test", true);
1153 check("....^", flags2, "test\ntest", "test", false);
1154 check(".....^", flags2, "test\n", "test", false);
1155 check("....^", flags2, "test\r\n", "test", false);
1156
1157 check("^....", flags3, "test\ntest", "test", true);
1158 check(".....^", flags3, "test\ntest", "test\n", true);
1159 check(".....^", flags3, "test\u0085test", "test\u0085", false);
1160 check(".....^", flags3, "test\n", "test", false);
1161 check(".....^", flags3, "test\r\n", "test", false);
1162 check("......^", flags3, "test\r\ntest", "test\r\n", true);
1163
1164 check("^....", flags4, "test\ntest", "test", true);
1165 check(".....^", flags3, "test\ntest", "test\n", true);
1166 check(".....^", flags4, "test\u0085test", "test\u0085", true);
1167 check(".....^", flags4, "test\n", "test\n", false);
1168 check(".....^", flags4, "test\r\n", "test\r", false);
1169
1170 // Supplementary character test
1171 String t = toSupplementaries("test");
1172 check("^....", flags1, t+"\n"+t, t, true);
1173 check(".....^", flags1, t+"\n"+t, t, false);
1174 check(".....^", flags1, t+"\n", t, false);
1175 check("....^", flags1, t+"\r\n", t, false);
1176
1177 check("^....", flags2, t+"\n"+t, t, true);
1178 check("....^", flags2, t+"\n"+t, t, false);
1179 check(".....^", flags2, t+"\n", t, false);
1180 check("....^", flags2, t+"\r\n", t, false);
1181
1182 check("^....", flags3, t+"\n"+t, t, true);
1183 check(".....^", flags3, t+"\n"+t, t+"\n", true);
1184 check(".....^", flags3, t+"\u0085"+t, t+"\u0085", false);
1185 check(".....^", flags3, t+"\n", t, false);
1186 check(".....^", flags3, t+"\r\n", t, false);
1187 check("......^", flags3, t+"\r\n"+t, t+"\r\n", true);
1188
1189 check("^....", flags4, t+"\n"+t, t, true);
1190 check(".....^", flags3, t+"\n"+t, t+"\n", true);
1191 check(".....^", flags4, t+"\u0085"+t, t+"\u0085", true);
1192 check(".....^", flags4, t+"\n", t+"\n", false);
1193 check(".....^", flags4, t+"\r\n", t+"\r", false);
1194
1195 report("Caret between terminators");
1196 }
1197
1198 // This test is for 4727935
1199 private static void dollarAtEndTest() throws Exception {
1200 int flags1 = Pattern.DOTALL;
1201 int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES;
1202 int flags3 = Pattern.DOTALL | Pattern.MULTILINE;
1203
1204 check("....$", flags1, "test\n", "test", true);
1205 check("....$", flags1, "test\r\n", "test", true);
1206 check(".....$", flags1, "test\n", "test\n", true);
1207 check(".....$", flags1, "test\u0085", "test\u0085", true);
1208 check("....$", flags1, "test\u0085", "test", true);
1209
1210 check("....$", flags2, "test\n", "test", true);
1211 check(".....$", flags2, "test\n", "test\n", true);
1212 check(".....$", flags2, "test\u0085", "test\u0085", true);
1213 check("....$", flags2, "test\u0085", "est\u0085", true);
1214
1215 check("....$.blah", flags3, "test\nblah", "test\nblah", true);
1216 check(".....$.blah", flags3, "test\n\nblah", "test\n\nblah", true);
1217 check("....$blah", flags3, "test\nblah", "!!!!", false);
1218 check(".....$blah", flags3, "test\nblah", "!!!!", false);
1219
1220 // Supplementary character test
1221 String t = toSupplementaries("test");
1222 String b = toSupplementaries("blah");
1223 check("....$", flags1, t+"\n", t, true);
1224 check("....$", flags1, t+"\r\n", t, true);
1225 check(".....$", flags1, t+"\n", t+"\n", true);
1226 check(".....$", flags1, t+"\u0085", t+"\u0085", true);
1227 check("....$", flags1, t+"\u0085", t, true);
1228
1229 check("....$", flags2, t+"\n", t, true);
1230 check(".....$", flags2, t+"\n", t+"\n", true);
1231 check(".....$", flags2, t+"\u0085", t+"\u0085", true);
1232 check("....$", flags2, t+"\u0085", toSupplementaries("est\u0085"), true);
1233
1234 check("....$."+b, flags3, t+"\n"+b, t+"\n"+b, true);
1235 check(".....$."+b, flags3, t+"\n\n"+b, t+"\n\n"+b, true);
1236 check("....$"+b, flags3, t+"\n"+b, "!!!!", false);
1237 check(".....$"+b, flags3, t+"\n"+b, "!!!!", false);
1238
1239 report("Dollar at End");
1240 }
1241
1242 // This test is for 4711773
1243 private static void multilineDollarTest() throws Exception {
1244 Pattern findCR = Pattern.compile("$", Pattern.MULTILINE);
1245 Matcher matcher = findCR.matcher("first bit\nsecond bit");
1246 matcher.find();
1247 if (matcher.start(0) != 9)
1248 failCount++;
1249 matcher.find();
1250 if (matcher.start(0) != 20)
1251 failCount++;
1252
1253 // Supplementary character test
1254 matcher = findCR.matcher(toSupplementaries("first bit\n second bit")); // double BMP chars
1255 matcher.find();
1256 if (matcher.start(0) != 9*2)
1257 failCount++;
1258 matcher.find();
1259 if (matcher.start(0) != 20*2)
1260 failCount++;
1261
1262 report("Multiline Dollar");
1263 }
1264
1265 private static void reluctantRepetitionTest() throws Exception {
1266 Pattern p = Pattern.compile("1(\\s\\S+?){1,3}?[\\s,]2");
1267 check(p, "1 word word word 2", true);
1268 check(p, "1 wor wo w 2", true);
1269 check(p, "1 word word 2", true);
1270 check(p, "1 word 2", true);
1271 check(p, "1 wo w w 2", true);
1272 check(p, "1 wo w 2", true);
1273 check(p, "1 wor w 2", true);
1274
1275 p = Pattern.compile("([a-z])+?c");
1276 Matcher m = p.matcher("ababcdefdec");
1277 check(m, "ababc");
1278
1279 // Supplementary character test
1280 p = Pattern.compile(toSupplementaries("([a-z])+?c"));
1281 m = p.matcher(toSupplementaries("ababcdefdec"));
1282 check(m, toSupplementaries("ababc"));
1283
1284 report("Reluctant Repetition");
1285 }
1286
1287 private static void serializeTest() throws Exception {
1288 String patternStr = "(b)";
1289 String matchStr = "b";
1290 Pattern pattern = Pattern.compile(patternStr);
1291 ByteArrayOutputStream baos = new ByteArrayOutputStream();
1292 ObjectOutputStream oos = new ObjectOutputStream(baos);
1293 oos.writeObject(pattern);
1294 oos.close();
1295 ObjectInputStream ois = new ObjectInputStream(
1296 new ByteArrayInputStream(baos.toByteArray()));
1297 Pattern serializedPattern = (Pattern)ois.readObject();
1298 ois.close();
1299 Matcher matcher = serializedPattern.matcher(matchStr);
1300 if (!matcher.matches())
1301 failCount++;
1302 if (matcher.groupCount() != 1)
1303 failCount++;
1304
1305 report("Serialization");
1306 }
1307
1308 private static void gTest() {
1309 Pattern pattern = Pattern.compile("\\G\\w");
1310 Matcher matcher = pattern.matcher("abc#x#x");
1311 matcher.find();
1312 matcher.find();
1313 matcher.find();
1314 if (matcher.find())
1315 failCount++;
1316
1317 pattern = Pattern.compile("\\GA*");
1318 matcher = pattern.matcher("1A2AA3");
1319 matcher.find();
1320 if (matcher.find())
1321 failCount++;
1322
1323 pattern = Pattern.compile("\\GA*");
1324 matcher = pattern.matcher("1A2AA3");
1325 if (!matcher.find(1))
1326 failCount++;
1327 matcher.find();
1328 if (matcher.find())
1329 failCount++;
1330
1331 report("\\G");
1332 }
1333
1334 private static void zTest() {
1335 Pattern pattern = Pattern.compile("foo\\Z");
1336 // Positives
1337 check(pattern, "foo\u0085", true);
1338 check(pattern, "foo\u2028", true);
1339 check(pattern, "foo\u2029", true);
1340 check(pattern, "foo\n", true);
1341 check(pattern, "foo\r", true);
1342 check(pattern, "foo\r\n", true);
1343 // Negatives
1344 check(pattern, "fooo", false);
1345 check(pattern, "foo\n\r", false);
1346
1347 pattern = Pattern.compile("foo\\Z", Pattern.UNIX_LINES);
1348 // Positives
1349 check(pattern, "foo", true);
1350 check(pattern, "foo\n", true);
1351 // Negatives
1352 check(pattern, "foo\r", false);
1353 check(pattern, "foo\u0085", false);
1354 check(pattern, "foo\u2028", false);
1355 check(pattern, "foo\u2029", false);
1356
1357 report("\\Z");
1358 }
1359
1360 private static void replaceFirstTest() {
1361 Pattern pattern = Pattern.compile("(ab)(c*)");
1362 Matcher matcher = pattern.matcher("abccczzzabcczzzabccc");
1363 if (!matcher.replaceFirst("test").equals("testzzzabcczzzabccc"))
1364 failCount++;
1365
1366 matcher.reset("zzzabccczzzabcczzzabccczzz");
1367 if (!matcher.replaceFirst("test").equals("zzztestzzzabcczzzabccczzz"))
1368 failCount++;
1369
1370 matcher.reset("zzzabccczzzabcczzzabccczzz");
1371 String result = matcher.replaceFirst("$1");
1372 if (!result.equals("zzzabzzzabcczzzabccczzz"))
1373 failCount++;
1374
1375 matcher.reset("zzzabccczzzabcczzzabccczzz");
1376 result = matcher.replaceFirst("$2");
1377 if (!result.equals("zzzccczzzabcczzzabccczzz"))
1378 failCount++;
1379
1380 pattern = Pattern.compile("a*");
1381 matcher = pattern.matcher("aaaaaaaaaa");
1382 if (!matcher.replaceFirst("test").equals("test"))
1383 failCount++;
1384
1385 pattern = Pattern.compile("a+");
1386 matcher = pattern.matcher("zzzaaaaaaaaaa");
1387 if (!matcher.replaceFirst("test").equals("zzztest"))
1388 failCount++;
1389
1390 // Supplementary character test
1391 pattern = Pattern.compile(toSupplementaries("(ab)(c*)"));
1392 matcher = pattern.matcher(toSupplementaries("abccczzzabcczzzabccc"));
1393 if (!matcher.replaceFirst(toSupplementaries("test"))
1394 .equals(toSupplementaries("testzzzabcczzzabccc")))
1395 failCount++;
1396
1397 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
1398 if (!matcher.replaceFirst(toSupplementaries("test")).
1399 equals(toSupplementaries("zzztestzzzabcczzzabccczzz")))
1400 failCount++;
1401
1402 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
1403 result = matcher.replaceFirst("$1");
1404 if (!result.equals(toSupplementaries("zzzabzzzabcczzzabccczzz")))
1405 failCount++;
1406
1407 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
1408 result = matcher.replaceFirst("$2");
1409 if (!result.equals(toSupplementaries("zzzccczzzabcczzzabccczzz")))
1410 failCount++;
1411
1412 pattern = Pattern.compile(toSupplementaries("a*"));
1413 matcher = pattern.matcher(toSupplementaries("aaaaaaaaaa"));
1414 if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("test")))
1415 failCount++;
1416
1417 pattern = Pattern.compile(toSupplementaries("a+"));
1418 matcher = pattern.matcher(toSupplementaries("zzzaaaaaaaaaa"));
1419 if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("zzztest")))
1420 failCount++;
1421
1422 report("Replace First");
1423 }
1424
1425 private static void unixLinesTest() {
1426 Pattern pattern = Pattern.compile(".*");
1427 Matcher matcher = pattern.matcher("aa\u2028blah");
1428 matcher.find();
1429 if (!matcher.group(0).equals("aa"))
1430 failCount++;
1431
1432 pattern = Pattern.compile(".*", Pattern.UNIX_LINES);
1433 matcher = pattern.matcher("aa\u2028blah");
1434 matcher.find();
1435 if (!matcher.group(0).equals("aa\u2028blah"))
1436 failCount++;
1437
1438 pattern = Pattern.compile("[az]$",
1439 Pattern.MULTILINE | Pattern.UNIX_LINES);
1440 matcher = pattern.matcher("aa\u2028zz");
1441 check(matcher, "a\u2028", false);
1442
1443 // Supplementary character test
1444 pattern = Pattern.compile(".*");
1445 matcher = pattern.matcher(toSupplementaries("aa\u2028blah"));
1446 matcher.find();
1447 if (!matcher.group(0).equals(toSupplementaries("aa")))
1448 failCount++;
1449
1450 pattern = Pattern.compile(".*", Pattern.UNIX_LINES);
1451 matcher = pattern.matcher(toSupplementaries("aa\u2028blah"));
1452 matcher.find();
1453 if (!matcher.group(0).equals(toSupplementaries("aa\u2028blah")))
1454 failCount++;
1455
1456 pattern = Pattern.compile(toSupplementaries("[az]$"),
1457 Pattern.MULTILINE | Pattern.UNIX_LINES);
1458 matcher = pattern.matcher(toSupplementaries("aa\u2028zz"));
1459 check(matcher, toSupplementaries("a\u2028"), false);
1460
1461 report("Unix Lines");
1462 }
1463
1464 private static void commentsTest() {
1465 int flags = Pattern.COMMENTS;
1466
1467 Pattern pattern = Pattern.compile("aa \\# aa", flags);
1468 Matcher matcher = pattern.matcher("aa#aa");
1469 if (!matcher.matches())
1470 failCount++;
1471
1472 pattern = Pattern.compile("aa # blah", flags);
1473 matcher = pattern.matcher("aa");
1474 if (!matcher.matches())
1475 failCount++;
1476
1477 pattern = Pattern.compile("aa blah", flags);
1478 matcher = pattern.matcher("aablah");
1479 if (!matcher.matches())
1480 failCount++;
1481
1482 pattern = Pattern.compile("aa # blah blech ", flags);
1483 matcher = pattern.matcher("aa");
1484 if (!matcher.matches())
1485 failCount++;
1486
1487 pattern = Pattern.compile("aa # blah\n ", flags);
1488 matcher = pattern.matcher("aa");
1489 if (!matcher.matches())
1490 failCount++;
1491
1492 pattern = Pattern.compile("aa # blah\nbc # blech", flags);
1493 matcher = pattern.matcher("aabc");
1494 if (!matcher.matches())
1495 failCount++;
1496
1497 pattern = Pattern.compile("aa # blah\nbc# blech", flags);
1498 matcher = pattern.matcher("aabc");
1499 if (!matcher.matches())
1500 failCount++;
1501
1502 pattern = Pattern.compile("aa # blah\nbc\\# blech", flags);
1503 matcher = pattern.matcher("aabc#blech");
1504 if (!matcher.matches())
1505 failCount++;
1506
1507 // Supplementary character test
1508 pattern = Pattern.compile(toSupplementaries("aa \\# aa"), flags);
1509 matcher = pattern.matcher(toSupplementaries("aa#aa"));
1510 if (!matcher.matches())
1511 failCount++;
1512
1513 pattern = Pattern.compile(toSupplementaries("aa # blah"), flags);
1514 matcher = pattern.matcher(toSupplementaries("aa"));
1515 if (!matcher.matches())
1516 failCount++;
1517
1518 pattern = Pattern.compile(toSupplementaries("aa blah"), flags);
1519 matcher = pattern.matcher(toSupplementaries("aablah"));
1520 if (!matcher.matches())
1521 failCount++;
1522
1523 pattern = Pattern.compile(toSupplementaries("aa # blah blech "), flags);
1524 matcher = pattern.matcher(toSupplementaries("aa"));
1525 if (!matcher.matches())
1526 failCount++;
1527
1528 pattern = Pattern.compile(toSupplementaries("aa # blah\n "), flags);
1529 matcher = pattern.matcher(toSupplementaries("aa"));
1530 if (!matcher.matches())
1531 failCount++;
1532
1533 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc # blech"), flags);
1534 matcher = pattern.matcher(toSupplementaries("aabc"));
1535 if (!matcher.matches())
1536 failCount++;
1537
1538 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc# blech"), flags);
1539 matcher = pattern.matcher(toSupplementaries("aabc"));
1540 if (!matcher.matches())
1541 failCount++;
1542
1543 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc\\# blech"), flags);
1544 matcher = pattern.matcher(toSupplementaries("aabc#blech"));
1545 if (!matcher.matches())
1546 failCount++;
1547
1548 report("Comments");
1549 }
1550
1551 private static void caseFoldingTest() { // bug 4504687
1552 int flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE;
1553 Pattern pattern = Pattern.compile("aa", flags);
1554 Matcher matcher = pattern.matcher("ab");
1555 if (matcher.matches())
1556 failCount++;
1557
1558 pattern = Pattern.compile("aA", flags);
1559 matcher = pattern.matcher("ab");
1560 if (matcher.matches())
1561 failCount++;
1562
1563 pattern = Pattern.compile("aa", flags);
1564 matcher = pattern.matcher("aB");
1565 if (matcher.matches())
1566 failCount++;
1567 matcher = pattern.matcher("Ab");
1568 if (matcher.matches())
1569 failCount++;
1570
1571 // ASCII "a"
1572 // Latin-1 Supplement "a" + grave
1573 // Cyrillic "a"
1574 String[] patterns = new String[] {
1575 //single
1576 "a", "\u00e0", "\u0430",
1577 //slice
1578 "ab", "\u00e0\u00e1", "\u0430\u0431",
1579 //class single
1580 "[a]", "[\u00e0]", "[\u0430]",
1581 //class range
1582 "[a-b]", "[\u00e0-\u00e5]", "[\u0430-\u0431]",
1583 //back reference
1584 "(a)\\1", "(\u00e0)\\1", "(\u0430)\\1"
1585 };
1586
1587 String[] texts = new String[] {
1588 "A", "\u00c0", "\u0410",
1589 "AB", "\u00c0\u00c1", "\u0410\u0411",
1590 "A", "\u00c0", "\u0410",
1591 "B", "\u00c2", "\u0411",
1592 "aA", "\u00e0\u00c0", "\u0430\u0410"
1593 };
1594
1595 boolean[] expected = new boolean[] {
1596 true, false, false,
1597 true, false, false,
1598 true, false, false,
1599 true, false, false,
1600 true, false, false
1601 };
1602
1603 flags = Pattern.CASE_INSENSITIVE;
1604 for (int i = 0; i < patterns.length; i++) {
1605 pattern = Pattern.compile(patterns[i], flags);
1606 matcher = pattern.matcher(texts[i]);
1607 if (matcher.matches() != expected[i]) {
1608 System.out.println("<1> Failed at " + i);
1609 failCount++;
1610 }
1611 }
1612
1613 flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE;
1614 for (int i = 0; i < patterns.length; i++) {
1615 pattern = Pattern.compile(patterns[i], flags);
1616 matcher = pattern.matcher(texts[i]);
1617 if (!matcher.matches()) {
1618 System.out.println("<2> Failed at " + i);
1619 failCount++;
1620 }
1621 }
1622 // flag unicode_case alone should do nothing
1623 flags = Pattern.UNICODE_CASE;
1624 for (int i = 0; i < patterns.length; i++) {
1625 pattern = Pattern.compile(patterns[i], flags);
1626 matcher = pattern.matcher(texts[i]);
1627 if (matcher.matches()) {
1628 System.out.println("<3> Failed at " + i);
1629 failCount++;
1630 }
1631 }
1632
1633 // Special cases: i, I, u+0131 and u+0130
1634 flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE;
1635 pattern = Pattern.compile("[h-j]+", flags);
1636 if (!pattern.matcher("\u0131\u0130").matches())
1637 failCount++;
1638 report("Case Folding");
1639 }
1640
1641 private static void appendTest() {
1642 Pattern pattern = Pattern.compile("(ab)(cd)");
1643 Matcher matcher = pattern.matcher("abcd");
1644 String result = matcher.replaceAll("$2$1");
1645 if (!result.equals("cdab"))
1646 failCount++;
1647
1648 String s1 = "Swap all: first = 123, second = 456";
1649 String s2 = "Swap one: first = 123, second = 456";
1650 String r = "$3$2$1";
1651 pattern = Pattern.compile("([a-z]+)( *= *)([0-9]+)");
1652 matcher = pattern.matcher(s1);
1653
1654 result = matcher.replaceAll(r);
1655 if (!result.equals("Swap all: 123 = first, 456 = second"))
1656 failCount++;
1657
1658 matcher = pattern.matcher(s2);
1659
1660 if (matcher.find()) {
1661 StringBuffer sb = new StringBuffer();
1662 matcher.appendReplacement(sb, r);
1663 matcher.appendTail(sb);
1664 result = sb.toString();
1665 if (!result.equals("Swap one: 123 = first, second = 456"))
1666 failCount++;
1667 }
1668
1669 // Supplementary character test
1670 pattern = Pattern.compile(toSupplementaries("(ab)(cd)"));
1671 matcher = pattern.matcher(toSupplementaries("abcd"));
1672 result = matcher.replaceAll("$2$1");
1673 if (!result.equals(toSupplementaries("cdab")))
1674 failCount++;
1675
1676 s1 = toSupplementaries("Swap all: first = 123, second = 456");
1677 s2 = toSupplementaries("Swap one: first = 123, second = 456");
1678 r = toSupplementaries("$3$2$1");
1679 pattern = Pattern.compile(toSupplementaries("([a-z]+)( *= *)([0-9]+)"));
1680 matcher = pattern.matcher(s1);
1681
1682 result = matcher.replaceAll(r);
1683 if (!result.equals(toSupplementaries("Swap all: 123 = first, 456 = second")))
1684 failCount++;
1685
1686 matcher = pattern.matcher(s2);
1687
1688 if (matcher.find()) {
1689 StringBuffer sb = new StringBuffer();
1690 matcher.appendReplacement(sb, r);
1691 matcher.appendTail(sb);
1692 result = sb.toString();
1693 if (!result.equals(toSupplementaries("Swap one: 123 = first, second = 456")))
1694 failCount++;
1695 }
1696 report("Append");
1697 }
1698
1699 private static void splitTest() {
1700 Pattern pattern = Pattern.compile(":");
1701 String[] result = pattern.split("foo:and:boo", 2);
1702 if (!result[0].equals("foo"))
1703 failCount++;
1704 if (!result[1].equals("and:boo"))
1705 failCount++;
1706 // Supplementary character test
1707 Pattern patternX = Pattern.compile(toSupplementaries("X"));
1708 result = patternX.split(toSupplementaries("fooXandXboo"), 2);
1709 if (!result[0].equals(toSupplementaries("foo")))
1710 failCount++;
1711 if (!result[1].equals(toSupplementaries("andXboo")))
1712 failCount++;
1713
1714 CharBuffer cb = CharBuffer.allocate(100);
1715 cb.put("foo:and:boo");
1716 cb.flip();
1717 result = pattern.split(cb);
1718 if (!result[0].equals("foo"))
1719 failCount++;
1720 if (!result[1].equals("and"))
1721 failCount++;
1722 if (!result[2].equals("boo"))
1723 failCount++;
1724
1725 // Supplementary character test
1726 CharBuffer cbs = CharBuffer.allocate(100);
1727 cbs.put(toSupplementaries("fooXandXboo"));
1728 cbs.flip();
1729 result = patternX.split(cbs);
1730 if (!result[0].equals(toSupplementaries("foo")))
1731 failCount++;
1732 if (!result[1].equals(toSupplementaries("and")))
1733 failCount++;
1734 if (!result[2].equals(toSupplementaries("boo")))
1735 failCount++;
1736
1737 String source = "0123456789";
1738 for (int limit=-2; limit<3; limit++) {
1739 for (int x=0; x<10; x++) {
1740 result = source.split(Integer.toString(x), limit);
1741 int expectedLength = limit < 1 ? 2 : limit;
1742
1743 if ((limit == 0) && (x == 9)) {
1744 // expected dropping of ""
1745 if (result.length != 1)
1746 failCount++;
1747 if (!result[0].equals("012345678")) {
1748 failCount++;
1749 }
1750 } else {
1751 if (result.length != expectedLength) {
1752 failCount++;
1753 }
1754 if (!result[0].equals(source.substring(0,x))) {
1755 if (limit != 1) {
1756 failCount++;
1757 } else {
1758 if (!result[0].equals(source.substring(0,10))) {
1759 failCount++;
1760 }
1761 }
1762 }
1763 if (expectedLength > 1) { // Check segment 2
1764 if (!result[1].equals(source.substring(x+1,10)))
1765 failCount++;
1766 }
1767 }
1768 }
1769 }
1770 // Check the case for no match found
1771 for (int limit=-2; limit<3; limit++) {
1772 result = source.split("e", limit);
1773 if (result.length != 1)
1774 failCount++;
1775 if (!result[0].equals(source))
1776 failCount++;
1777 }
1778 // Check the case for limit == 0, source = "";
sherman1242a6d2013-11-13 11:26:01 -08001779 // split() now returns 0-length for empty source "" see #6559590
sherman0b4d42d2009-02-23 21:06:15 -08001780 source = "";
1781 result = source.split("e", 0);
1782 if (result.length != 1)
1783 failCount++;
1784 if (!result[0].equals(source))
1785 failCount++;
1786
sherman1242a6d2013-11-13 11:26:01 -08001787 // Check both split() and splitAsStraem(), especially for zero-lenth
1788 // input and zero-lenth match cases
1789 String[][] input = new String[][] {
1790 { " ", "Abc Efg Hij" }, // normal non-zero-match
1791 { " ", " Abc Efg Hij" }, // leading empty str for non-zero-match
1792 { " ", "Abc Efg Hij" }, // non-zero-match in the middle
1793 { "(?=\\p{Lu})", "AbcEfgHij" }, // no leading empty str for zero-match
1794 { "(?=\\p{Lu})", "AbcEfg" },
1795 { "(?=\\p{Lu})", "Abc" },
1796 { " ", "" }, // zero-length input
1797 { ".*", "" },
1798
1799 // some tests from PatternStreamTest.java
1800 { "4", "awgqwefg1fefw4vssv1vvv1" },
1801 { "\u00a3a", "afbfq\u00a3abgwgb\u00a3awngnwggw\u00a3a\u00a3ahjrnhneerh" },
1802 { "1", "awgqwefg1fefw4vssv1vvv1" },
1803 { "1", "a\u4ebafg1fefw\u4eba4\u9f9cvssv\u9f9c1v\u672c\u672cvv" },
1804 { "\u56da", "1\u56da23\u56da456\u56da7890" },
1805 { "\u56da", "1\u56da23\u9f9c\u672c\u672c\u56da456\u56da\u9f9c\u672c7890" },
1806 { "\u56da", "" },
1807 { "[ \t,:.]","This is,testing: with\tdifferent separators." }, //multiple septs
1808 { "o", "boo:and:foo" },
1809 { "o", "booooo:and:fooooo" },
1810 { "o", "fooooo:" },
1811 };
1812
1813 String[][] expected = new String[][] {
1814 { "Abc", "Efg", "Hij" },
1815 { "", "Abc", "Efg", "Hij" },
1816 { "Abc", "", "Efg", "Hij" },
1817 { "Abc", "Efg", "Hij" },
1818 { "Abc", "Efg" },
1819 { "Abc" },
sherman12888112013-11-13 22:22:28 -08001820 { "" },
1821 { "" },
sherman1242a6d2013-11-13 11:26:01 -08001822
1823 { "awgqwefg1fefw", "vssv1vvv1" },
1824 { "afbfq", "bgwgb", "wngnwggw", "", "hjrnhneerh" },
1825 { "awgqwefg", "fefw4vssv", "vvv" },
1826 { "a\u4ebafg", "fefw\u4eba4\u9f9cvssv\u9f9c", "v\u672c\u672cvv" },
1827 { "1", "23", "456", "7890" },
1828 { "1", "23\u9f9c\u672c\u672c", "456", "\u9f9c\u672c7890" },
sherman12888112013-11-13 22:22:28 -08001829 { "" },
sherman1242a6d2013-11-13 11:26:01 -08001830 { "This", "is", "testing", "", "with", "different", "separators" },
1831 { "b", "", ":and:f" },
1832 { "b", "", "", "", "", ":and:f" },
1833 { "f", "", "", "", "", ":" },
1834 };
1835 for (int i = 0; i < input.length; i++) {
1836 pattern = Pattern.compile(input[i][0]);
sherman12888112013-11-13 22:22:28 -08001837 if (!Arrays.equals(pattern.split(input[i][1]), expected[i])) {
sherman1242a6d2013-11-13 11:26:01 -08001838 failCount++;
sherman12888112013-11-13 22:22:28 -08001839 }
1840 if (input[i][1].length() > 0 && // splitAsStream() return empty resulting
1841 // array for zero-length input for now
1842 !Arrays.equals(pattern.splitAsStream(input[i][1]).toArray(),
1843 expected[i])) {
sherman1242a6d2013-11-13 11:26:01 -08001844 failCount++;
sherman12888112013-11-13 22:22:28 -08001845 }
sherman1242a6d2013-11-13 11:26:01 -08001846 }
sherman0b4d42d2009-02-23 21:06:15 -08001847 report("Split");
1848 }
1849
1850 private static void negationTest() {
1851 Pattern pattern = Pattern.compile("[\\[@^]+");
1852 Matcher matcher = pattern.matcher("@@@@[[[[^^^^");
1853 if (!matcher.find())
1854 failCount++;
1855 if (!matcher.group(0).equals("@@@@[[[[^^^^"))
1856 failCount++;
1857 pattern = Pattern.compile("[@\\[^]+");
1858 matcher = pattern.matcher("@@@@[[[[^^^^");
1859 if (!matcher.find())
1860 failCount++;
1861 if (!matcher.group(0).equals("@@@@[[[[^^^^"))
1862 failCount++;
1863 pattern = Pattern.compile("[@\\[^@]+");
1864 matcher = pattern.matcher("@@@@[[[[^^^^");
1865 if (!matcher.find())
1866 failCount++;
1867 if (!matcher.group(0).equals("@@@@[[[[^^^^"))
1868 failCount++;
1869
1870 pattern = Pattern.compile("\\)");
1871 matcher = pattern.matcher("xxx)xxx");
1872 if (!matcher.find())
1873 failCount++;
1874
1875 report("Negation");
1876 }
1877
1878 private static void ampersandTest() {
1879 Pattern pattern = Pattern.compile("[&@]+");
1880 check(pattern, "@@@@&&&&", true);
1881
1882 pattern = Pattern.compile("[@&]+");
1883 check(pattern, "@@@@&&&&", true);
1884
1885 pattern = Pattern.compile("[@\\&]+");
1886 check(pattern, "@@@@&&&&", true);
1887
1888 report("Ampersand");
1889 }
1890
1891 private static void octalTest() throws Exception {
1892 Pattern pattern = Pattern.compile("\\u0007");
1893 Matcher matcher = pattern.matcher("\u0007");
1894 if (!matcher.matches())
1895 failCount++;
1896 pattern = Pattern.compile("\\07");
1897 matcher = pattern.matcher("\u0007");
1898 if (!matcher.matches())
1899 failCount++;
1900 pattern = Pattern.compile("\\007");
1901 matcher = pattern.matcher("\u0007");
1902 if (!matcher.matches())
1903 failCount++;
1904 pattern = Pattern.compile("\\0007");
1905 matcher = pattern.matcher("\u0007");
1906 if (!matcher.matches())
1907 failCount++;
1908 pattern = Pattern.compile("\\040");
1909 matcher = pattern.matcher("\u0020");
1910 if (!matcher.matches())
1911 failCount++;
1912 pattern = Pattern.compile("\\0403");
1913 matcher = pattern.matcher("\u00203");
1914 if (!matcher.matches())
1915 failCount++;
1916 pattern = Pattern.compile("\\0103");
1917 matcher = pattern.matcher("\u0043");
1918 if (!matcher.matches())
1919 failCount++;
1920
1921 report("Octal");
1922 }
1923
1924 private static void longPatternTest() throws Exception {
1925 try {
1926 Pattern pattern = Pattern.compile(
1927 "a 32-character-long pattern xxxx");
1928 pattern = Pattern.compile("a 33-character-long pattern xxxxx");
1929 pattern = Pattern.compile("a thirty four character long regex");
1930 StringBuffer patternToBe = new StringBuffer(101);
1931 for (int i=0; i<100; i++)
1932 patternToBe.append((char)(97 + i%26));
1933 pattern = Pattern.compile(patternToBe.toString());
1934 } catch (PatternSyntaxException e) {
1935 failCount++;
1936 }
1937
1938 // Supplementary character test
1939 try {
1940 Pattern pattern = Pattern.compile(
1941 toSupplementaries("a 32-character-long pattern xxxx"));
1942 pattern = Pattern.compile(toSupplementaries("a 33-character-long pattern xxxxx"));
1943 pattern = Pattern.compile(toSupplementaries("a thirty four character long regex"));
1944 StringBuffer patternToBe = new StringBuffer(101*2);
1945 for (int i=0; i<100; i++)
1946 patternToBe.append(Character.toChars(Character.MIN_SUPPLEMENTARY_CODE_POINT
1947 + 97 + i%26));
1948 pattern = Pattern.compile(patternToBe.toString());
1949 } catch (PatternSyntaxException e) {
1950 failCount++;
1951 }
1952 report("LongPattern");
1953 }
1954
1955 private static void group0Test() throws Exception {
1956 Pattern pattern = Pattern.compile("(tes)ting");
1957 Matcher matcher = pattern.matcher("testing");
1958 check(matcher, "testing");
1959
1960 matcher.reset("testing");
1961 if (matcher.lookingAt()) {
1962 if (!matcher.group(0).equals("testing"))
1963 failCount++;
1964 } else {
1965 failCount++;
1966 }
1967
1968 matcher.reset("testing");
1969 if (matcher.matches()) {
1970 if (!matcher.group(0).equals("testing"))
1971 failCount++;
1972 } else {
1973 failCount++;
1974 }
1975
1976 pattern = Pattern.compile("(tes)ting");
1977 matcher = pattern.matcher("testing");
1978 if (matcher.lookingAt()) {
1979 if (!matcher.group(0).equals("testing"))
1980 failCount++;
1981 } else {
1982 failCount++;
1983 }
1984
1985 pattern = Pattern.compile("^(tes)ting");
1986 matcher = pattern.matcher("testing");
1987 if (matcher.matches()) {
1988 if (!matcher.group(0).equals("testing"))
1989 failCount++;
1990 } else {
1991 failCount++;
1992 }
1993
1994 // Supplementary character test
1995 pattern = Pattern.compile(toSupplementaries("(tes)ting"));
1996 matcher = pattern.matcher(toSupplementaries("testing"));
1997 check(matcher, toSupplementaries("testing"));
1998
1999 matcher.reset(toSupplementaries("testing"));
2000 if (matcher.lookingAt()) {
2001 if (!matcher.group(0).equals(toSupplementaries("testing")))
2002 failCount++;
2003 } else {
2004 failCount++;
2005 }
2006
2007 matcher.reset(toSupplementaries("testing"));
2008 if (matcher.matches()) {
2009 if (!matcher.group(0).equals(toSupplementaries("testing")))
2010 failCount++;
2011 } else {
2012 failCount++;
2013 }
2014
2015 pattern = Pattern.compile(toSupplementaries("(tes)ting"));
2016 matcher = pattern.matcher(toSupplementaries("testing"));
2017 if (matcher.lookingAt()) {
2018 if (!matcher.group(0).equals(toSupplementaries("testing")))
2019 failCount++;
2020 } else {
2021 failCount++;
2022 }
2023
2024 pattern = Pattern.compile(toSupplementaries("^(tes)ting"));
2025 matcher = pattern.matcher(toSupplementaries("testing"));
2026 if (matcher.matches()) {
2027 if (!matcher.group(0).equals(toSupplementaries("testing")))
2028 failCount++;
2029 } else {
2030 failCount++;
2031 }
2032
2033 report("Group0");
2034 }
2035
2036 private static void findIntTest() throws Exception {
2037 Pattern p = Pattern.compile("blah");
2038 Matcher m = p.matcher("zzzzblahzzzzzblah");
2039 boolean result = m.find(2);
2040 if (!result)
2041 failCount++;
2042
2043 p = Pattern.compile("$");
2044 m = p.matcher("1234567890");
2045 result = m.find(10);
2046 if (!result)
2047 failCount++;
2048 try {
2049 result = m.find(11);
2050 failCount++;
2051 } catch (IndexOutOfBoundsException e) {
2052 // correct result
2053 }
2054
2055 // Supplementary character test
2056 p = Pattern.compile(toSupplementaries("blah"));
2057 m = p.matcher(toSupplementaries("zzzzblahzzzzzblah"));
2058 result = m.find(2);
2059 if (!result)
2060 failCount++;
2061
2062 report("FindInt");
2063 }
2064
2065 private static void emptyPatternTest() throws Exception {
2066 Pattern p = Pattern.compile("");
2067 Matcher m = p.matcher("foo");
2068
2069 // Should find empty pattern at beginning of input
2070 boolean result = m.find();
2071 if (result != true)
2072 failCount++;
2073 if (m.start() != 0)
2074 failCount++;
2075
2076 // Should not match entire input if input is not empty
2077 m.reset();
2078 result = m.matches();
2079 if (result == true)
2080 failCount++;
2081
2082 try {
2083 m.start(0);
2084 failCount++;
2085 } catch (IllegalStateException e) {
2086 // Correct result
2087 }
2088
2089 // Should match entire input if input is empty
2090 m.reset("");
2091 result = m.matches();
2092 if (result != true)
2093 failCount++;
2094
2095 result = Pattern.matches("", "");
2096 if (result != true)
2097 failCount++;
2098
2099 result = Pattern.matches("", "foo");
2100 if (result == true)
2101 failCount++;
2102 report("EmptyPattern");
2103 }
2104
2105 private static void charClassTest() throws Exception {
2106 Pattern pattern = Pattern.compile("blah[ab]]blech");
2107 check(pattern, "blahb]blech", true);
2108
2109 pattern = Pattern.compile("[abc[def]]");
2110 check(pattern, "b", true);
2111
2112 // Supplementary character tests
2113 pattern = Pattern.compile(toSupplementaries("blah[ab]]blech"));
2114 check(pattern, toSupplementaries("blahb]blech"), true);
2115
2116 pattern = Pattern.compile(toSupplementaries("[abc[def]]"));
2117 check(pattern, toSupplementaries("b"), true);
2118
2119 try {
2120 // u00ff when UNICODE_CASE
2121 pattern = Pattern.compile("[ab\u00ffcd]",
2122 Pattern.CASE_INSENSITIVE|
2123 Pattern.UNICODE_CASE);
2124 check(pattern, "ab\u00ffcd", true);
2125 check(pattern, "Ab\u0178Cd", true);
2126
2127 // u00b5 when UNICODE_CASE
2128 pattern = Pattern.compile("[ab\u00b5cd]",
2129 Pattern.CASE_INSENSITIVE|
2130 Pattern.UNICODE_CASE);
2131 check(pattern, "ab\u00b5cd", true);
2132 check(pattern, "Ab\u039cCd", true);
2133 } catch (Exception e) { failCount++; }
2134
2135 /* Special cases
2136 (1)LatinSmallLetterLongS u+017f
2137 (2)LatinSmallLetterDotlessI u+0131
2138 (3)LatineCapitalLetterIWithDotAbove u+0130
2139 (4)KelvinSign u+212a
2140 (5)AngstromSign u+212b
2141 */
2142 int flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE;
2143 pattern = Pattern.compile("[sik\u00c5]+", flags);
2144 if (!pattern.matcher("\u017f\u0130\u0131\u212a\u212b").matches())
2145 failCount++;
2146
2147 report("CharClass");
2148 }
2149
2150 private static void caretTest() throws Exception {
2151 Pattern pattern = Pattern.compile("\\w*");
2152 Matcher matcher = pattern.matcher("a#bc#def##g");
2153 check(matcher, "a");
2154 check(matcher, "");
2155 check(matcher, "bc");
2156 check(matcher, "");
2157 check(matcher, "def");
2158 check(matcher, "");
2159 check(matcher, "");
2160 check(matcher, "g");
2161 check(matcher, "");
2162 if (matcher.find())
2163 failCount++;
2164
2165 pattern = Pattern.compile("^\\w*");
2166 matcher = pattern.matcher("a#bc#def##g");
2167 check(matcher, "a");
2168 if (matcher.find())
2169 failCount++;
2170
2171 pattern = Pattern.compile("\\w");
2172 matcher = pattern.matcher("abc##x");
2173 check(matcher, "a");
2174 check(matcher, "b");
2175 check(matcher, "c");
2176 check(matcher, "x");
2177 if (matcher.find())
2178 failCount++;
2179
2180 pattern = Pattern.compile("^\\w");
2181 matcher = pattern.matcher("abc##x");
2182 check(matcher, "a");
2183 if (matcher.find())
2184 failCount++;
2185
2186 pattern = Pattern.compile("\\A\\p{Alpha}{3}");
2187 matcher = pattern.matcher("abcdef-ghi\njklmno");
2188 check(matcher, "abc");
2189 if (matcher.find())
2190 failCount++;
2191
2192 pattern = Pattern.compile("^\\p{Alpha}{3}", Pattern.MULTILINE);
2193 matcher = pattern.matcher("abcdef-ghi\njklmno");
2194 check(matcher, "abc");
2195 check(matcher, "jkl");
2196 if (matcher.find())
2197 failCount++;
2198
2199 pattern = Pattern.compile("^", Pattern.MULTILINE);
2200 matcher = pattern.matcher("this is some text");
2201 String result = matcher.replaceAll("X");
2202 if (!result.equals("Xthis is some text"))
2203 failCount++;
2204
2205 pattern = Pattern.compile("^");
2206 matcher = pattern.matcher("this is some text");
2207 result = matcher.replaceAll("X");
2208 if (!result.equals("Xthis is some text"))
2209 failCount++;
2210
2211 pattern = Pattern.compile("^", Pattern.MULTILINE | Pattern.UNIX_LINES);
2212 matcher = pattern.matcher("this is some text\n");
2213 result = matcher.replaceAll("X");
2214 if (!result.equals("Xthis is some text\n"))
2215 failCount++;
2216
2217 report("Caret");
2218 }
2219
2220 private static void groupCaptureTest() throws Exception {
2221 // Independent group
2222 Pattern pattern = Pattern.compile("x+(?>y+)z+");
2223 Matcher matcher = pattern.matcher("xxxyyyzzz");
2224 matcher.find();
2225 try {
2226 String blah = matcher.group(1);
2227 failCount++;
2228 } catch (IndexOutOfBoundsException ioobe) {
2229 // Good result
2230 }
2231 // Pure group
2232 pattern = Pattern.compile("x+(?:y+)z+");
2233 matcher = pattern.matcher("xxxyyyzzz");
2234 matcher.find();
2235 try {
2236 String blah = matcher.group(1);
2237 failCount++;
2238 } catch (IndexOutOfBoundsException ioobe) {
2239 // Good result
2240 }
2241
2242 // Supplementary character tests
2243 // Independent group
2244 pattern = Pattern.compile(toSupplementaries("x+(?>y+)z+"));
2245 matcher = pattern.matcher(toSupplementaries("xxxyyyzzz"));
2246 matcher.find();
2247 try {
2248 String blah = matcher.group(1);
2249 failCount++;
2250 } catch (IndexOutOfBoundsException ioobe) {
2251 // Good result
2252 }
2253 // Pure group
2254 pattern = Pattern.compile(toSupplementaries("x+(?:y+)z+"));
2255 matcher = pattern.matcher(toSupplementaries("xxxyyyzzz"));
2256 matcher.find();
2257 try {
2258 String blah = matcher.group(1);
2259 failCount++;
2260 } catch (IndexOutOfBoundsException ioobe) {
2261 // Good result
2262 }
2263
2264 report("GroupCapture");
2265 }
2266
2267 private static void backRefTest() throws Exception {
2268 Pattern pattern = Pattern.compile("(a*)bc\\1");
2269 check(pattern, "zzzaabcazzz", true);
2270
2271 pattern = Pattern.compile("(a*)bc\\1");
2272 check(pattern, "zzzaabcaazzz", true);
2273
2274 pattern = Pattern.compile("(abc)(def)\\1");
2275 check(pattern, "abcdefabc", true);
2276
2277 pattern = Pattern.compile("(abc)(def)\\3");
2278 check(pattern, "abcdefabc", false);
2279
2280 try {
2281 for (int i = 1; i < 10; i++) {
2282 // Make sure backref 1-9 are always accepted
2283 pattern = Pattern.compile("abcdef\\" + i);
2284 // and fail to match if the target group does not exit
2285 check(pattern, "abcdef", false);
2286 }
2287 } catch(PatternSyntaxException e) {
2288 failCount++;
2289 }
2290
2291 pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11");
2292 check(pattern, "abcdefghija", false);
2293 check(pattern, "abcdefghija1", true);
2294
2295 pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11");
2296 check(pattern, "abcdefghijkk", true);
2297
2298 pattern = Pattern.compile("(a)bcdefghij\\11");
2299 check(pattern, "abcdefghija1", true);
2300
2301 // Supplementary character tests
2302 pattern = Pattern.compile(toSupplementaries("(a*)bc\\1"));
2303 check(pattern, toSupplementaries("zzzaabcazzz"), true);
2304
2305 pattern = Pattern.compile(toSupplementaries("(a*)bc\\1"));
2306 check(pattern, toSupplementaries("zzzaabcaazzz"), true);
2307
2308 pattern = Pattern.compile(toSupplementaries("(abc)(def)\\1"));
2309 check(pattern, toSupplementaries("abcdefabc"), true);
2310
2311 pattern = Pattern.compile(toSupplementaries("(abc)(def)\\3"));
2312 check(pattern, toSupplementaries("abcdefabc"), false);
2313
2314 pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11"));
2315 check(pattern, toSupplementaries("abcdefghija"), false);
2316 check(pattern, toSupplementaries("abcdefghija1"), true);
2317
2318 pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11"));
2319 check(pattern, toSupplementaries("abcdefghijkk"), true);
2320
2321 report("BackRef");
2322 }
2323
2324 /**
2325 * Unicode Technical Report #18, section 2.6 End of Line
2326 * There is no empty line to be matched in the sequence \u000D\u000A
2327 * but there is an empty line in the sequence \u000A\u000D.
2328 */
2329 private static void anchorTest() throws Exception {
2330 Pattern p = Pattern.compile("^.*$", Pattern.MULTILINE);
2331 Matcher m = p.matcher("blah1\r\nblah2");
2332 m.find();
2333 m.find();
2334 if (!m.group().equals("blah2"))
2335 failCount++;
2336
2337 m.reset("blah1\n\rblah2");
2338 m.find();
2339 m.find();
2340 m.find();
2341 if (!m.group().equals("blah2"))
2342 failCount++;
2343
2344 // Test behavior of $ with \r\n at end of input
2345 p = Pattern.compile(".+$");
2346 m = p.matcher("blah1\r\n");
2347 if (!m.find())
2348 failCount++;
2349 if (!m.group().equals("blah1"))
2350 failCount++;
2351 if (m.find())
2352 failCount++;
2353
2354 // Test behavior of $ with \r\n at end of input in multiline
2355 p = Pattern.compile(".+$", Pattern.MULTILINE);
2356 m = p.matcher("blah1\r\n");
2357 if (!m.find())
2358 failCount++;
2359 if (m.find())
2360 failCount++;
2361
2362 // Test for $ recognition of \u0085 for bug 4527731
2363 p = Pattern.compile(".+$", Pattern.MULTILINE);
2364 m = p.matcher("blah1\u0085");
2365 if (!m.find())
2366 failCount++;
2367
2368 // Supplementary character test
2369 p = Pattern.compile("^.*$", Pattern.MULTILINE);
2370 m = p.matcher(toSupplementaries("blah1\r\nblah2"));
2371 m.find();
2372 m.find();
2373 if (!m.group().equals(toSupplementaries("blah2")))
2374 failCount++;
2375
2376 m.reset(toSupplementaries("blah1\n\rblah2"));
2377 m.find();
2378 m.find();
2379 m.find();
2380 if (!m.group().equals(toSupplementaries("blah2")))
2381 failCount++;
2382
2383 // Test behavior of $ with \r\n at end of input
2384 p = Pattern.compile(".+$");
2385 m = p.matcher(toSupplementaries("blah1\r\n"));
2386 if (!m.find())
2387 failCount++;
2388 if (!m.group().equals(toSupplementaries("blah1")))
2389 failCount++;
2390 if (m.find())
2391 failCount++;
2392
2393 // Test behavior of $ with \r\n at end of input in multiline
2394 p = Pattern.compile(".+$", Pattern.MULTILINE);
2395 m = p.matcher(toSupplementaries("blah1\r\n"));
2396 if (!m.find())
2397 failCount++;
2398 if (m.find())
2399 failCount++;
2400
2401 // Test for $ recognition of \u0085 for bug 4527731
2402 p = Pattern.compile(".+$", Pattern.MULTILINE);
2403 m = p.matcher(toSupplementaries("blah1\u0085"));
2404 if (!m.find())
2405 failCount++;
2406
2407 report("Anchors");
2408 }
2409
2410 /**
2411 * A basic sanity test of Matcher.lookingAt().
2412 */
2413 private static void lookingAtTest() throws Exception {
2414 Pattern p = Pattern.compile("(ab)(c*)");
2415 Matcher m = p.matcher("abccczzzabcczzzabccc");
2416
2417 if (!m.lookingAt())
2418 failCount++;
2419
2420 if (!m.group().equals(m.group(0)))
2421 failCount++;
2422
2423 m = p.matcher("zzzabccczzzabcczzzabccczzz");
2424 if (m.lookingAt())
2425 failCount++;
2426
2427 // Supplementary character test
2428 p = Pattern.compile(toSupplementaries("(ab)(c*)"));
2429 m = p.matcher(toSupplementaries("abccczzzabcczzzabccc"));
2430
2431 if (!m.lookingAt())
2432 failCount++;
2433
2434 if (!m.group().equals(m.group(0)))
2435 failCount++;
2436
2437 m = p.matcher(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
2438 if (m.lookingAt())
2439 failCount++;
2440
2441 report("Looking At");
2442 }
2443
2444 /**
2445 * A basic sanity test of Matcher.matches().
2446 */
2447 private static void matchesTest() throws Exception {
2448 // matches()
2449 Pattern p = Pattern.compile("ulb(c*)");
2450 Matcher m = p.matcher("ulbcccccc");
2451 if (!m.matches())
2452 failCount++;
2453
2454 // find() but not matches()
2455 m.reset("zzzulbcccccc");
2456 if (m.matches())
2457 failCount++;
2458
2459 // lookingAt() but not matches()
2460 m.reset("ulbccccccdef");
2461 if (m.matches())
2462 failCount++;
2463
2464 // matches()
2465 p = Pattern.compile("a|ad");
2466 m = p.matcher("ad");
2467 if (!m.matches())
2468 failCount++;
2469
2470 // Supplementary character test
2471 // matches()
2472 p = Pattern.compile(toSupplementaries("ulb(c*)"));
2473 m = p.matcher(toSupplementaries("ulbcccccc"));
2474 if (!m.matches())
2475 failCount++;
2476
2477 // find() but not matches()
2478 m.reset(toSupplementaries("zzzulbcccccc"));
2479 if (m.matches())
2480 failCount++;
2481
2482 // lookingAt() but not matches()
2483 m.reset(toSupplementaries("ulbccccccdef"));
2484 if (m.matches())
2485 failCount++;
2486
2487 // matches()
2488 p = Pattern.compile(toSupplementaries("a|ad"));
2489 m = p.matcher(toSupplementaries("ad"));
2490 if (!m.matches())
2491 failCount++;
2492
2493 report("Matches");
2494 }
2495
2496 /**
2497 * A basic sanity test of Pattern.matches().
2498 */
2499 private static void patternMatchesTest() throws Exception {
2500 // matches()
2501 if (!Pattern.matches(toSupplementaries("ulb(c*)"),
2502 toSupplementaries("ulbcccccc")))
2503 failCount++;
2504
2505 // find() but not matches()
2506 if (Pattern.matches(toSupplementaries("ulb(c*)"),
2507 toSupplementaries("zzzulbcccccc")))
2508 failCount++;
2509
2510 // lookingAt() but not matches()
2511 if (Pattern.matches(toSupplementaries("ulb(c*)"),
2512 toSupplementaries("ulbccccccdef")))
2513 failCount++;
2514
2515 // Supplementary character test
2516 // matches()
2517 if (!Pattern.matches(toSupplementaries("ulb(c*)"),
2518 toSupplementaries("ulbcccccc")))
2519 failCount++;
2520
2521 // find() but not matches()
2522 if (Pattern.matches(toSupplementaries("ulb(c*)"),
2523 toSupplementaries("zzzulbcccccc")))
2524 failCount++;
2525
2526 // lookingAt() but not matches()
2527 if (Pattern.matches(toSupplementaries("ulb(c*)"),
2528 toSupplementaries("ulbccccccdef")))
2529 failCount++;
2530
2531 report("Pattern Matches");
2532 }
2533
2534 /**
2535 * Canonical equivalence testing. Tests the ability of the engine
2536 * to match sequences that are not explicitly specified in the
2537 * pattern when they are considered equivalent by the Unicode Standard.
2538 */
2539 private static void ceTest() throws Exception {
2540 // Decomposed char outside char classes
2541 Pattern p = Pattern.compile("testa\u030a", Pattern.CANON_EQ);
2542 Matcher m = p.matcher("test\u00e5");
2543 if (!m.matches())
2544 failCount++;
2545
2546 m.reset("testa\u030a");
2547 if (!m.matches())
2548 failCount++;
2549
2550 // Composed char outside char classes
2551 p = Pattern.compile("test\u00e5", Pattern.CANON_EQ);
2552 m = p.matcher("test\u00e5");
2553 if (!m.matches())
2554 failCount++;
2555
2556 m.reset("testa\u030a");
2557 if (!m.find())
2558 failCount++;
2559
2560 // Decomposed char inside a char class
2561 p = Pattern.compile("test[abca\u030a]", Pattern.CANON_EQ);
2562 m = p.matcher("test\u00e5");
2563 if (!m.find())
2564 failCount++;
2565
2566 m.reset("testa\u030a");
2567 if (!m.find())
2568 failCount++;
2569
2570 // Composed char inside a char class
2571 p = Pattern.compile("test[abc\u00e5def\u00e0]", Pattern.CANON_EQ);
2572 m = p.matcher("test\u00e5");
2573 if (!m.find())
2574 failCount++;
2575
2576 m.reset("testa\u0300");
2577 if (!m.find())
2578 failCount++;
2579
2580 m.reset("testa\u030a");
2581 if (!m.find())
2582 failCount++;
2583
2584 // Marks that cannot legally change order and be equivalent
2585 p = Pattern.compile("testa\u0308\u0300", Pattern.CANON_EQ);
2586 check(p, "testa\u0308\u0300", true);
2587 check(p, "testa\u0300\u0308", false);
2588
2589 // Marks that can legally change order and be equivalent
2590 p = Pattern.compile("testa\u0308\u0323", Pattern.CANON_EQ);
2591 check(p, "testa\u0308\u0323", true);
2592 check(p, "testa\u0323\u0308", true);
2593
2594 // Test all equivalences of the sequence a\u0308\u0323\u0300
2595 p = Pattern.compile("testa\u0308\u0323\u0300", Pattern.CANON_EQ);
2596 check(p, "testa\u0308\u0323\u0300", true);
2597 check(p, "testa\u0323\u0308\u0300", true);
2598 check(p, "testa\u0308\u0300\u0323", true);
2599 check(p, "test\u00e4\u0323\u0300", true);
2600 check(p, "test\u00e4\u0300\u0323", true);
2601
2602 /*
2603 * The following canonical equivalence tests don't work. Bug id: 4916384.
2604 *
2605 // Decomposed hangul (jamos)
2606 p = Pattern.compile("\u1100\u1161", Pattern.CANON_EQ);
2607 m = p.matcher("\u1100\u1161");
2608 if (!m.matches())
2609 failCount++;
2610
2611 m.reset("\uac00");
2612 if (!m.matches())
2613 failCount++;
2614
2615 // Composed hangul
2616 p = Pattern.compile("\uac00", Pattern.CANON_EQ);
2617 m = p.matcher("\u1100\u1161");
2618 if (!m.matches())
2619 failCount++;
2620
2621 m.reset("\uac00");
2622 if (!m.matches())
2623 failCount++;
2624
2625 // Decomposed supplementary outside char classes
2626 p = Pattern.compile("test\ud834\uddbc\ud834\udd6f", Pattern.CANON_EQ);
2627 m = p.matcher("test\ud834\uddc0");
2628 if (!m.matches())
2629 failCount++;
2630
2631 m.reset("test\ud834\uddbc\ud834\udd6f");
2632 if (!m.matches())
2633 failCount++;
2634
2635 // Composed supplementary outside char classes
2636 p = Pattern.compile("test\ud834\uddc0", Pattern.CANON_EQ);
2637 m.reset("test\ud834\uddbc\ud834\udd6f");
2638 if (!m.matches())
2639 failCount++;
2640
2641 m = p.matcher("test\ud834\uddc0");
2642 if (!m.matches())
2643 failCount++;
2644
2645 */
2646
2647 report("Canonical Equivalence");
2648 }
2649
2650 /**
2651 * A basic sanity test of Matcher.replaceAll().
2652 */
2653 private static void globalSubstitute() throws Exception {
2654 // Global substitution with a literal
2655 Pattern p = Pattern.compile("(ab)(c*)");
2656 Matcher m = p.matcher("abccczzzabcczzzabccc");
2657 if (!m.replaceAll("test").equals("testzzztestzzztest"))
2658 failCount++;
2659
2660 m.reset("zzzabccczzzabcczzzabccczzz");
2661 if (!m.replaceAll("test").equals("zzztestzzztestzzztestzzz"))
2662 failCount++;
2663
2664 // Global substitution with groups
2665 m.reset("zzzabccczzzabcczzzabccczzz");
2666 String result = m.replaceAll("$1");
2667 if (!result.equals("zzzabzzzabzzzabzzz"))
2668 failCount++;
2669
2670 // Supplementary character test
2671 // Global substitution with a literal
2672 p = Pattern.compile(toSupplementaries("(ab)(c*)"));
2673 m = p.matcher(toSupplementaries("abccczzzabcczzzabccc"));
2674 if (!m.replaceAll(toSupplementaries("test")).
2675 equals(toSupplementaries("testzzztestzzztest")))
2676 failCount++;
2677
2678 m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
2679 if (!m.replaceAll(toSupplementaries("test")).
2680 equals(toSupplementaries("zzztestzzztestzzztestzzz")))
2681 failCount++;
2682
2683 // Global substitution with groups
2684 m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
2685 result = m.replaceAll("$1");
2686 if (!result.equals(toSupplementaries("zzzabzzzabzzzabzzz")))
2687 failCount++;
2688
2689 report("Global Substitution");
2690 }
2691
2692 /**
2693 * Tests the usage of Matcher.appendReplacement() with literal
2694 * and group substitutions.
2695 */
2696 private static void stringbufferSubstitute() throws Exception {
2697 // SB substitution with literal
2698 String blah = "zzzblahzzz";
2699 Pattern p = Pattern.compile("blah");
2700 Matcher m = p.matcher(blah);
2701 StringBuffer result = new StringBuffer();
2702 try {
2703 m.appendReplacement(result, "blech");
2704 failCount++;
2705 } catch (IllegalStateException e) {
2706 }
2707 m.find();
2708 m.appendReplacement(result, "blech");
2709 if (!result.toString().equals("zzzblech"))
2710 failCount++;
2711
2712 m.appendTail(result);
2713 if (!result.toString().equals("zzzblechzzz"))
2714 failCount++;
2715
2716 // SB substitution with groups
2717 blah = "zzzabcdzzz";
2718 p = Pattern.compile("(ab)(cd)*");
2719 m = p.matcher(blah);
2720 result = new StringBuffer();
2721 try {
2722 m.appendReplacement(result, "$1");
2723 failCount++;
2724 } catch (IllegalStateException e) {
2725 }
2726 m.find();
2727 m.appendReplacement(result, "$1");
2728 if (!result.toString().equals("zzzab"))
2729 failCount++;
2730
2731 m.appendTail(result);
2732 if (!result.toString().equals("zzzabzzz"))
2733 failCount++;
2734
2735 // SB substitution with 3 groups
2736 blah = "zzzabcdcdefzzz";
2737 p = Pattern.compile("(ab)(cd)*(ef)");
2738 m = p.matcher(blah);
2739 result = new StringBuffer();
2740 try {
2741 m.appendReplacement(result, "$1w$2w$3");
2742 failCount++;
2743 } catch (IllegalStateException e) {
2744 }
2745 m.find();
2746 m.appendReplacement(result, "$1w$2w$3");
2747 if (!result.toString().equals("zzzabwcdwef"))
2748 failCount++;
2749
2750 m.appendTail(result);
2751 if (!result.toString().equals("zzzabwcdwefzzz"))
2752 failCount++;
2753
2754 // SB substitution with groups and three matches
2755 // skipping middle match
2756 blah = "zzzabcdzzzabcddzzzabcdzzz";
2757 p = Pattern.compile("(ab)(cd*)");
2758 m = p.matcher(blah);
2759 result = new StringBuffer();
2760 try {
2761 m.appendReplacement(result, "$1");
2762 failCount++;
2763 } catch (IllegalStateException e) {
2764 }
2765 m.find();
2766 m.appendReplacement(result, "$1");
2767 if (!result.toString().equals("zzzab"))
2768 failCount++;
2769
2770 m.find();
2771 m.find();
2772 m.appendReplacement(result, "$2");
2773 if (!result.toString().equals("zzzabzzzabcddzzzcd"))
2774 failCount++;
2775
2776 m.appendTail(result);
2777 if (!result.toString().equals("zzzabzzzabcddzzzcdzzz"))
2778 failCount++;
2779
2780 // Check to make sure escaped $ is ignored
2781 blah = "zzzabcdcdefzzz";
2782 p = Pattern.compile("(ab)(cd)*(ef)");
2783 m = p.matcher(blah);
2784 result = new StringBuffer();
2785 m.find();
2786 m.appendReplacement(result, "$1w\\$2w$3");
2787 if (!result.toString().equals("zzzabw$2wef"))
2788 failCount++;
2789
2790 m.appendTail(result);
2791 if (!result.toString().equals("zzzabw$2wefzzz"))
2792 failCount++;
2793
2794 // Check to make sure a reference to nonexistent group causes error
2795 blah = "zzzabcdcdefzzz";
2796 p = Pattern.compile("(ab)(cd)*(ef)");
2797 m = p.matcher(blah);
2798 result = new StringBuffer();
2799 m.find();
2800 try {
2801 m.appendReplacement(result, "$1w$5w$3");
2802 failCount++;
2803 } catch (IndexOutOfBoundsException ioobe) {
2804 // Correct result
2805 }
2806
2807 // Check double digit group references
2808 blah = "zzz123456789101112zzz";
2809 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)");
2810 m = p.matcher(blah);
2811 result = new StringBuffer();
2812 m.find();
2813 m.appendReplacement(result, "$1w$11w$3");
2814 if (!result.toString().equals("zzz1w11w3"))
2815 failCount++;
2816
2817 // Check to make sure it backs off $15 to $1 if only three groups
2818 blah = "zzzabcdcdefzzz";
2819 p = Pattern.compile("(ab)(cd)*(ef)");
2820 m = p.matcher(blah);
2821 result = new StringBuffer();
2822 m.find();
2823 m.appendReplacement(result, "$1w$15w$3");
2824 if (!result.toString().equals("zzzabwab5wef"))
2825 failCount++;
2826
2827
2828 // Supplementary character test
2829 // SB substitution with literal
2830 blah = toSupplementaries("zzzblahzzz");
2831 p = Pattern.compile(toSupplementaries("blah"));
2832 m = p.matcher(blah);
2833 result = new StringBuffer();
2834 try {
2835 m.appendReplacement(result, toSupplementaries("blech"));
2836 failCount++;
2837 } catch (IllegalStateException e) {
2838 }
2839 m.find();
2840 m.appendReplacement(result, toSupplementaries("blech"));
2841 if (!result.toString().equals(toSupplementaries("zzzblech")))
2842 failCount++;
2843
2844 m.appendTail(result);
2845 if (!result.toString().equals(toSupplementaries("zzzblechzzz")))
2846 failCount++;
2847
2848 // SB substitution with groups
2849 blah = toSupplementaries("zzzabcdzzz");
2850 p = Pattern.compile(toSupplementaries("(ab)(cd)*"));
2851 m = p.matcher(blah);
2852 result = new StringBuffer();
2853 try {
2854 m.appendReplacement(result, "$1");
2855 failCount++;
2856 } catch (IllegalStateException e) {
2857 }
2858 m.find();
2859 m.appendReplacement(result, "$1");
2860 if (!result.toString().equals(toSupplementaries("zzzab")))
2861 failCount++;
2862
2863 m.appendTail(result);
2864 if (!result.toString().equals(toSupplementaries("zzzabzzz")))
2865 failCount++;
2866
2867 // SB substitution with 3 groups
2868 blah = toSupplementaries("zzzabcdcdefzzz");
2869 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
2870 m = p.matcher(blah);
2871 result = new StringBuffer();
2872 try {
2873 m.appendReplacement(result, toSupplementaries("$1w$2w$3"));
2874 failCount++;
2875 } catch (IllegalStateException e) {
2876 }
2877 m.find();
2878 m.appendReplacement(result, toSupplementaries("$1w$2w$3"));
2879 if (!result.toString().equals(toSupplementaries("zzzabwcdwef")))
2880 failCount++;
2881
2882 m.appendTail(result);
2883 if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz")))
2884 failCount++;
2885
2886 // SB substitution with groups and three matches
2887 // skipping middle match
2888 blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz");
2889 p = Pattern.compile(toSupplementaries("(ab)(cd*)"));
2890 m = p.matcher(blah);
2891 result = new StringBuffer();
2892 try {
2893 m.appendReplacement(result, "$1");
2894 failCount++;
2895 } catch (IllegalStateException e) {
2896 }
2897 m.find();
2898 m.appendReplacement(result, "$1");
2899 if (!result.toString().equals(toSupplementaries("zzzab")))
2900 failCount++;
2901
2902 m.find();
2903 m.find();
2904 m.appendReplacement(result, "$2");
2905 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd")))
2906 failCount++;
2907
2908 m.appendTail(result);
2909 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz")))
2910 failCount++;
2911
2912 // Check to make sure escaped $ is ignored
2913 blah = toSupplementaries("zzzabcdcdefzzz");
2914 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
2915 m = p.matcher(blah);
2916 result = new StringBuffer();
2917 m.find();
2918 m.appendReplacement(result, toSupplementaries("$1w\\$2w$3"));
2919 if (!result.toString().equals(toSupplementaries("zzzabw$2wef")))
2920 failCount++;
2921
2922 m.appendTail(result);
2923 if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz")))
2924 failCount++;
2925
2926 // Check to make sure a reference to nonexistent group causes error
2927 blah = toSupplementaries("zzzabcdcdefzzz");
2928 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
2929 m = p.matcher(blah);
2930 result = new StringBuffer();
2931 m.find();
2932 try {
2933 m.appendReplacement(result, toSupplementaries("$1w$5w$3"));
2934 failCount++;
2935 } catch (IndexOutOfBoundsException ioobe) {
2936 // Correct result
2937 }
2938
2939 // Check double digit group references
2940 blah = toSupplementaries("zzz123456789101112zzz");
2941 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)");
2942 m = p.matcher(blah);
2943 result = new StringBuffer();
2944 m.find();
2945 m.appendReplacement(result, toSupplementaries("$1w$11w$3"));
2946 if (!result.toString().equals(toSupplementaries("zzz1w11w3")))
2947 failCount++;
2948
2949 // Check to make sure it backs off $15 to $1 if only three groups
2950 blah = toSupplementaries("zzzabcdcdefzzz");
2951 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
2952 m = p.matcher(blah);
2953 result = new StringBuffer();
2954 m.find();
2955 m.appendReplacement(result, toSupplementaries("$1w$15w$3"));
2956 if (!result.toString().equals(toSupplementaries("zzzabwab5wef")))
2957 failCount++;
2958
2959 // Check nothing has been appended into the output buffer if
2960 // the replacement string triggers IllegalArgumentException.
2961 p = Pattern.compile("(abc)");
2962 m = p.matcher("abcd");
2963 result = new StringBuffer();
2964 m.find();
2965 try {
2966 m.appendReplacement(result, ("xyz$g"));
2967 failCount++;
2968 } catch (IllegalArgumentException iae) {
2969 if (result.length() != 0)
2970 failCount++;
2971 }
2972
2973 report("SB Substitution");
2974 }
2975
2976 /*
2977 * 5 groups of characters are created to make a substitution string.
2978 * A base string will be created including random lead chars, the
2979 * substitution string, and random trailing chars.
2980 * A pattern containing the 5 groups is searched for and replaced with:
2981 * random group + random string + random group.
2982 * The results are checked for correctness.
2983 */
2984 private static void substitutionBasher() {
2985 for (int runs = 0; runs<1000; runs++) {
2986 // Create a base string to work in
2987 int leadingChars = generator.nextInt(10);
2988 StringBuffer baseBuffer = new StringBuffer(100);
2989 String leadingString = getRandomAlphaString(leadingChars);
2990 baseBuffer.append(leadingString);
2991
2992 // Create 5 groups of random number of random chars
2993 // Create the string to substitute
2994 // Create the pattern string to search for
2995 StringBuffer bufferToSub = new StringBuffer(25);
2996 StringBuffer bufferToPat = new StringBuffer(50);
2997 String[] groups = new String[5];
2998 for(int i=0; i<5; i++) {
2999 int aGroupSize = generator.nextInt(5)+1;
3000 groups[i] = getRandomAlphaString(aGroupSize);
3001 bufferToSub.append(groups[i]);
3002 bufferToPat.append('(');
3003 bufferToPat.append(groups[i]);
3004 bufferToPat.append(')');
3005 }
3006 String stringToSub = bufferToSub.toString();
3007 String pattern = bufferToPat.toString();
3008
3009 // Place sub string into working string at random index
3010 baseBuffer.append(stringToSub);
3011
3012 // Append random chars to end
3013 int trailingChars = generator.nextInt(10);
3014 String trailingString = getRandomAlphaString(trailingChars);
3015 baseBuffer.append(trailingString);
3016 String baseString = baseBuffer.toString();
3017
3018 // Create test pattern and matcher
3019 Pattern p = Pattern.compile(pattern);
3020 Matcher m = p.matcher(baseString);
3021
3022 // Reject candidate if pattern happens to start early
3023 m.find();
3024 if (m.start() < leadingChars)
3025 continue;
3026
3027 // Reject candidate if more than one match
3028 if (m.find())
3029 continue;
3030
3031 // Construct a replacement string with :
3032 // random group + random string + random group
3033 StringBuffer bufferToRep = new StringBuffer();
3034 int groupIndex1 = generator.nextInt(5);
3035 bufferToRep.append("$" + (groupIndex1 + 1));
3036 String randomMidString = getRandomAlphaString(5);
3037 bufferToRep.append(randomMidString);
3038 int groupIndex2 = generator.nextInt(5);
3039 bufferToRep.append("$" + (groupIndex2 + 1));
3040 String replacement = bufferToRep.toString();
3041
3042 // Do the replacement
3043 String result = m.replaceAll(replacement);
3044
3045 // Construct expected result
3046 StringBuffer bufferToRes = new StringBuffer();
3047 bufferToRes.append(leadingString);
3048 bufferToRes.append(groups[groupIndex1]);
3049 bufferToRes.append(randomMidString);
3050 bufferToRes.append(groups[groupIndex2]);
3051 bufferToRes.append(trailingString);
3052 String expectedResult = bufferToRes.toString();
3053
3054 // Check results
3055 if (!result.equals(expectedResult))
3056 failCount++;
3057 }
3058
3059 report("Substitution Basher");
3060 }
3061
3062 /**
3063 * Checks the handling of some escape sequences that the Pattern
3064 * class should process instead of the java compiler. These are
3065 * not in the file because the escapes should be be processed
3066 * by the Pattern class when the regex is compiled.
3067 */
3068 private static void escapes() throws Exception {
3069 Pattern p = Pattern.compile("\\043");
3070 Matcher m = p.matcher("#");
3071 if (!m.find())
3072 failCount++;
3073
3074 p = Pattern.compile("\\x23");
3075 m = p.matcher("#");
3076 if (!m.find())
3077 failCount++;
3078
3079 p = Pattern.compile("\\u0023");
3080 m = p.matcher("#");
3081 if (!m.find())
3082 failCount++;
3083
3084 report("Escape sequences");
3085 }
3086
3087 /**
3088 * Checks the handling of blank input situations. These
3089 * tests are incompatible with my test file format.
3090 */
3091 private static void blankInput() throws Exception {
3092 Pattern p = Pattern.compile("abc", Pattern.CASE_INSENSITIVE);
3093 Matcher m = p.matcher("");
3094 if (m.find())
3095 failCount++;
3096
3097 p = Pattern.compile("a*", Pattern.CASE_INSENSITIVE);
3098 m = p.matcher("");
3099 if (!m.find())
3100 failCount++;
3101
3102 p = Pattern.compile("abc");
3103 m = p.matcher("");
3104 if (m.find())
3105 failCount++;
3106
3107 p = Pattern.compile("a*");
3108 m = p.matcher("");
3109 if (!m.find())
3110 failCount++;
3111
3112 report("Blank input");
3113 }
3114
3115 /**
3116 * Tests the Boyer-Moore pattern matching of a character sequence
3117 * on randomly generated patterns.
3118 */
3119 private static void bm() throws Exception {
3120 doBnM('a');
3121 report("Boyer Moore (ASCII)");
3122
3123 doBnM(Character.MIN_SUPPLEMENTARY_CODE_POINT - 10);
3124 report("Boyer Moore (Supplementary)");
3125 }
3126
3127 private static void doBnM(int baseCharacter) throws Exception {
3128 int achar=0;
3129
3130 for (int i=0; i<100; i++) {
3131 // Create a short pattern to search for
3132 int patternLength = generator.nextInt(7) + 4;
3133 StringBuffer patternBuffer = new StringBuffer(patternLength);
igerasime69462b2015-08-03 22:36:28 +03003134 String pattern;
3135 retry: for (;;) {
3136 for (int x=0; x<patternLength; x++) {
3137 int ch = baseCharacter + generator.nextInt(26);
3138 if (Character.isSupplementaryCodePoint(ch)) {
3139 patternBuffer.append(Character.toChars(ch));
3140 } else {
3141 patternBuffer.append((char)ch);
3142 }
sherman0b4d42d2009-02-23 21:06:15 -08003143 }
igerasime69462b2015-08-03 22:36:28 +03003144 pattern = patternBuffer.toString();
3145
3146 // Avoid patterns that start and end with the same substring
3147 // See JDK-6854417
3148 for (int x=1; x <patternLength; x++) {
3149 if (pattern.startsWith(pattern.substring(x)))
3150 continue retry;
3151 }
3152 break;
sherman0b4d42d2009-02-23 21:06:15 -08003153 }
sherman0b4d42d2009-02-23 21:06:15 -08003154 Pattern p = Pattern.compile(pattern);
3155
3156 // Create a buffer with random ASCII chars that does
3157 // not match the sample
3158 String toSearch = null;
3159 StringBuffer s = null;
3160 Matcher m = p.matcher("");
3161 do {
3162 s = new StringBuffer(100);
3163 for (int x=0; x<100; x++) {
3164 int ch = baseCharacter + generator.nextInt(26);
3165 if (Character.isSupplementaryCodePoint(ch)) {
3166 s.append(Character.toChars(ch));
3167 } else {
3168 s.append((char)ch);
3169 }
3170 }
3171 toSearch = s.toString();
3172 m.reset(toSearch);
3173 } while (m.find());
3174
3175 // Insert the pattern at a random spot
3176 int insertIndex = generator.nextInt(99);
3177 if (Character.isLowSurrogate(s.charAt(insertIndex)))
3178 insertIndex++;
3179 s = s.insert(insertIndex, pattern);
3180 toSearch = s.toString();
3181
3182 // Make sure that the pattern is found
3183 m.reset(toSearch);
3184 if (!m.find())
3185 failCount++;
3186
3187 // Make sure that the match text is the pattern
3188 if (!m.group().equals(pattern))
3189 failCount++;
3190
3191 // Make sure match occured at insertion point
3192 if (m.start() != insertIndex)
3193 failCount++;
3194 }
3195 }
3196
3197 /**
3198 * Tests the matching of slices on randomly generated patterns.
3199 * The Boyer-Moore optimization is not done on these patterns
3200 * because it uses unicode case folding.
3201 */
3202 private static void slice() throws Exception {
3203 doSlice(Character.MAX_VALUE);
3204 report("Slice");
3205
3206 doSlice(Character.MAX_CODE_POINT);
3207 report("Slice (Supplementary)");
3208 }
3209
3210 private static void doSlice(int maxCharacter) throws Exception {
3211 Random generator = new Random();
3212 int achar=0;
3213
3214 for (int i=0; i<100; i++) {
3215 // Create a short pattern to search for
3216 int patternLength = generator.nextInt(7) + 4;
3217 StringBuffer patternBuffer = new StringBuffer(patternLength);
3218 for (int x=0; x<patternLength; x++) {
3219 int randomChar = 0;
3220 while (!Character.isLetterOrDigit(randomChar))
3221 randomChar = generator.nextInt(maxCharacter);
3222 if (Character.isSupplementaryCodePoint(randomChar)) {
3223 patternBuffer.append(Character.toChars(randomChar));
3224 } else {
3225 patternBuffer.append((char) randomChar);
3226 }
3227 }
3228 String pattern = patternBuffer.toString();
3229 Pattern p = Pattern.compile(pattern, Pattern.UNICODE_CASE);
3230
3231 // Create a buffer with random chars that does not match the sample
3232 String toSearch = null;
3233 StringBuffer s = null;
3234 Matcher m = p.matcher("");
3235 do {
3236 s = new StringBuffer(100);
3237 for (int x=0; x<100; x++) {
3238 int randomChar = 0;
3239 while (!Character.isLetterOrDigit(randomChar))
3240 randomChar = generator.nextInt(maxCharacter);
3241 if (Character.isSupplementaryCodePoint(randomChar)) {
3242 s.append(Character.toChars(randomChar));
3243 } else {
3244 s.append((char) randomChar);
3245 }
3246 }
3247 toSearch = s.toString();
3248 m.reset(toSearch);
3249 } while (m.find());
3250
3251 // Insert the pattern at a random spot
3252 int insertIndex = generator.nextInt(99);
3253 if (Character.isLowSurrogate(s.charAt(insertIndex)))
3254 insertIndex++;
3255 s = s.insert(insertIndex, pattern);
3256 toSearch = s.toString();
3257
3258 // Make sure that the pattern is found
3259 m.reset(toSearch);
3260 if (!m.find())
3261 failCount++;
3262
3263 // Make sure that the match text is the pattern
3264 if (!m.group().equals(pattern))
3265 failCount++;
3266
3267 // Make sure match occured at insertion point
3268 if (m.start() != insertIndex)
3269 failCount++;
3270 }
3271 }
3272
3273 private static void explainFailure(String pattern, String data,
3274 String expected, String actual) {
3275 System.err.println("----------------------------------------");
3276 System.err.println("Pattern = "+pattern);
3277 System.err.println("Data = "+data);
3278 System.err.println("Expected = " + expected);
3279 System.err.println("Actual = " + actual);
3280 }
3281
3282 private static void explainFailure(String pattern, String data,
3283 Throwable t) {
3284 System.err.println("----------------------------------------");
3285 System.err.println("Pattern = "+pattern);
3286 System.err.println("Data = "+data);
3287 t.printStackTrace(System.err);
3288 }
3289
3290 // Testing examples from a file
3291
3292 /**
3293 * Goes through the file "TestCases.txt" and creates many patterns
3294 * described in the file, matching the patterns against input lines in
3295 * the file, and comparing the results against the correct results
3296 * also found in the file. The file format is described in comments
3297 * at the head of the file.
3298 */
3299 private static void processFile(String fileName) throws Exception {
3300 File testCases = new File(System.getProperty("test.src", "."),
3301 fileName);
3302 FileInputStream in = new FileInputStream(testCases);
3303 BufferedReader r = new BufferedReader(new InputStreamReader(in));
3304
3305 // Process next test case.
3306 String aLine;
3307 while((aLine = r.readLine()) != null) {
3308 // Read a line for pattern
3309 String patternString = grabLine(r);
3310 Pattern p = null;
3311 try {
3312 p = compileTestPattern(patternString);
3313 } catch (PatternSyntaxException e) {
3314 String dataString = grabLine(r);
3315 String expectedResult = grabLine(r);
3316 if (expectedResult.startsWith("error"))
3317 continue;
3318 explainFailure(patternString, dataString, e);
3319 failCount++;
3320 continue;
3321 }
3322
3323 // Read a line for input string
3324 String dataString = grabLine(r);
3325 Matcher m = p.matcher(dataString);
3326 StringBuffer result = new StringBuffer();
3327
3328 // Check for IllegalStateExceptions before a match
3329 failCount += preMatchInvariants(m);
3330
3331 boolean found = m.find();
3332
3333 if (found)
3334 failCount += postTrueMatchInvariants(m);
3335 else
3336 failCount += postFalseMatchInvariants(m);
3337
3338 if (found) {
3339 result.append("true ");
3340 result.append(m.group(0) + " ");
3341 } else {
3342 result.append("false ");
3343 }
3344
3345 result.append(m.groupCount());
3346
3347 if (found) {
3348 for (int i=1; i<m.groupCount()+1; i++)
3349 if (m.group(i) != null)
3350 result.append(" " +m.group(i));
3351 }
3352
3353 // Read a line for the expected result
3354 String expectedResult = grabLine(r);
3355
3356 if (!result.toString().equals(expectedResult)) {
3357 explainFailure(patternString, dataString, expectedResult, result.toString());
3358 failCount++;
3359 }
3360 }
3361
3362 report(fileName);
3363 }
3364
3365 private static int preMatchInvariants(Matcher m) {
3366 int failCount = 0;
3367 try {
3368 m.start();
3369 failCount++;
3370 } catch (IllegalStateException ise) {}
3371 try {
3372 m.end();
3373 failCount++;
3374 } catch (IllegalStateException ise) {}
3375 try {
3376 m.group();
3377 failCount++;
3378 } catch (IllegalStateException ise) {}
3379 return failCount;
3380 }
3381
3382 private static int postFalseMatchInvariants(Matcher m) {
3383 int failCount = 0;
3384 try {
3385 m.group();
3386 failCount++;
3387 } catch (IllegalStateException ise) {}
3388 try {
3389 m.start();
3390 failCount++;
3391 } catch (IllegalStateException ise) {}
3392 try {
3393 m.end();
3394 failCount++;
3395 } catch (IllegalStateException ise) {}
3396 return failCount;
3397 }
3398
3399 private static int postTrueMatchInvariants(Matcher m) {
3400 int failCount = 0;
3401 //assert(m.start() = m.start(0);
3402 if (m.start() != m.start(0))
3403 failCount++;
3404 //assert(m.end() = m.end(0);
3405 if (m.start() != m.start(0))
3406 failCount++;
3407 //assert(m.group() = m.group(0);
3408 if (!m.group().equals(m.group(0)))
3409 failCount++;
3410 try {
3411 m.group(50);
3412 failCount++;
3413 } catch (IndexOutOfBoundsException ise) {}
3414
3415 return failCount;
3416 }
3417
3418 private static Pattern compileTestPattern(String patternString) {
3419 if (!patternString.startsWith("'")) {
3420 return Pattern.compile(patternString);
3421 }
3422
3423 int break1 = patternString.lastIndexOf("'");
3424 String flagString = patternString.substring(
3425 break1+1, patternString.length());
3426 patternString = patternString.substring(1, break1);
3427
3428 if (flagString.equals("i"))
3429 return Pattern.compile(patternString, Pattern.CASE_INSENSITIVE);
3430
3431 if (flagString.equals("m"))
3432 return Pattern.compile(patternString, Pattern.MULTILINE);
3433
3434 return Pattern.compile(patternString);
3435 }
3436
3437 /**
3438 * Reads a line from the input file. Keeps reading lines until a non
3439 * empty non comment line is read. If the line contains a \n then
3440 * these two characters are replaced by a newline char. If a \\uxxxx
3441 * sequence is read then the sequence is replaced by the unicode char.
3442 */
3443 private static String grabLine(BufferedReader r) throws Exception {
3444 int index = 0;
3445 String line = r.readLine();
3446 while (line.startsWith("//") || line.length() < 1)
3447 line = r.readLine();
3448 while ((index = line.indexOf("\\n")) != -1) {
3449 StringBuffer temp = new StringBuffer(line);
3450 temp.replace(index, index+2, "\n");
3451 line = temp.toString();
3452 }
3453 while ((index = line.indexOf("\\u")) != -1) {
3454 StringBuffer temp = new StringBuffer(line);
3455 String value = temp.substring(index+2, index+6);
3456 char aChar = (char)Integer.parseInt(value, 16);
3457 String unicodeChar = "" + aChar;
3458 temp.replace(index, index+6, unicodeChar);
3459 line = temp.toString();
3460 }
3461
3462 return line;
3463 }
3464
3465 private static void check(Pattern p, String s, String g, String expected) {
3466 Matcher m = p.matcher(s);
3467 m.find();
shermana244eb52013-05-06 21:24:37 -07003468 if (!m.group(g).equals(expected) ||
3469 s.charAt(m.start(g)) != expected.charAt(0) ||
3470 s.charAt(m.end(g) - 1) != expected.charAt(expected.length() - 1))
sherman0b4d42d2009-02-23 21:06:15 -08003471 failCount++;
3472 }
3473
3474 private static void checkReplaceFirst(String p, String s, String r, String expected)
3475 {
3476 if (!expected.equals(Pattern.compile(p)
3477 .matcher(s)
3478 .replaceFirst(r)))
3479 failCount++;
3480 }
3481
3482 private static void checkReplaceAll(String p, String s, String r, String expected)
3483 {
3484 if (!expected.equals(Pattern.compile(p)
3485 .matcher(s)
3486 .replaceAll(r)))
3487 failCount++;
3488 }
3489
3490 private static void checkExpectedFail(String p) {
3491 try {
3492 Pattern.compile(p);
3493 } catch (PatternSyntaxException pse) {
3494 //pse.printStackTrace();
3495 return;
3496 }
3497 failCount++;
3498 }
3499
shermana244eb52013-05-06 21:24:37 -07003500 private static void checkExpectedIAE(Matcher m, String g) {
sherman0b4d42d2009-02-23 21:06:15 -08003501 m.find();
3502 try {
3503 m.group(g);
shermana244eb52013-05-06 21:24:37 -07003504 } catch (IllegalArgumentException x) {
sherman0b4d42d2009-02-23 21:06:15 -08003505 //iae.printStackTrace();
shermana244eb52013-05-06 21:24:37 -07003506 try {
3507 m.start(g);
3508 } catch (IllegalArgumentException xx) {
3509 try {
3510 m.start(g);
3511 } catch (IllegalArgumentException xxx) {
3512 return;
3513 }
3514 }
sherman0b4d42d2009-02-23 21:06:15 -08003515 }
3516 failCount++;
3517 }
3518
shermana244eb52013-05-06 21:24:37 -07003519 private static void checkExpectedNPE(Matcher m) {
3520 m.find();
3521 try {
3522 m.group(null);
3523 } catch (NullPointerException x) {
3524 try {
3525 m.start(null);
3526 } catch (NullPointerException xx) {
3527 try {
3528 m.end(null);
3529 } catch (NullPointerException xxx) {
3530 return;
3531 }
3532 }
3533 }
3534 failCount++;
3535 }
sherman0b4d42d2009-02-23 21:06:15 -08003536
3537 private static void namedGroupCaptureTest() throws Exception {
3538 check(Pattern.compile("x+(?<gname>y+)z+"),
3539 "xxxyyyzzz",
3540 "gname",
3541 "yyy");
3542
shermand9337e02009-10-21 11:40:40 -07003543 check(Pattern.compile("x+(?<gname8>y+)z+"),
shermanffaf4922009-03-20 16:22:59 -07003544 "xxxyyyzzz",
shermand9337e02009-10-21 11:40:40 -07003545 "gname8",
shermanffaf4922009-03-20 16:22:59 -07003546 "yyy");
3547
sherman0b4d42d2009-02-23 21:06:15 -08003548 //backref
3549 Pattern pattern = Pattern.compile("(a*)bc\\1");
3550 check(pattern, "zzzaabcazzz", true); // found "abca"
3551
3552 check(Pattern.compile("(?<gname>a*)bc\\k<gname>"),
3553 "zzzaabcaazzz", true);
3554
3555 check(Pattern.compile("(?<gname>abc)(def)\\k<gname>"),
3556 "abcdefabc", true);
3557
3558 check(Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(?<gname>k)\\k<gname>"),
3559 "abcdefghijkk", true);
3560
3561 // Supplementary character tests
3562 check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"),
3563 toSupplementaries("zzzaabcazzz"), true);
3564
3565 check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"),
3566 toSupplementaries("zzzaabcaazzz"), true);
3567
3568 check(Pattern.compile("(?<gname>" + toSupplementaries("abc)(def)") + "\\k<gname>"),
3569 toSupplementaries("abcdefabc"), true);
3570
3571 check(Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)") +
3572 "(?<gname>" +
3573 toSupplementaries("k)") + "\\k<gname>"),
3574 toSupplementaries("abcdefghijkk"), true);
3575
3576 check(Pattern.compile("x+(?<gname>y+)z+\\k<gname>"),
3577 "xxxyyyzzzyyy",
3578 "gname",
3579 "yyy");
3580
3581 //replaceFirst/All
3582 checkReplaceFirst("(?<gn>ab)(c*)",
3583 "abccczzzabcczzzabccc",
shermand9337e02009-10-21 11:40:40 -07003584 "${gn}",
sherman0b4d42d2009-02-23 21:06:15 -08003585 "abzzzabcczzzabccc");
3586
3587 checkReplaceAll("(?<gn>ab)(c*)",
3588 "abccczzzabcczzzabccc",
shermand9337e02009-10-21 11:40:40 -07003589 "${gn}",
sherman0b4d42d2009-02-23 21:06:15 -08003590 "abzzzabzzzab");
3591
3592
3593 checkReplaceFirst("(?<gn>ab)(c*)",
3594 "zzzabccczzzabcczzzabccczzz",
shermand9337e02009-10-21 11:40:40 -07003595 "${gn}",
sherman0b4d42d2009-02-23 21:06:15 -08003596 "zzzabzzzabcczzzabccczzz");
3597
3598 checkReplaceAll("(?<gn>ab)(c*)",
3599 "zzzabccczzzabcczzzabccczzz",
shermand9337e02009-10-21 11:40:40 -07003600 "${gn}",
sherman0b4d42d2009-02-23 21:06:15 -08003601 "zzzabzzzabzzzabzzz");
3602
3603 checkReplaceFirst("(?<gn1>ab)(?<gn2>c*)",
3604 "zzzabccczzzabcczzzabccczzz",
shermand9337e02009-10-21 11:40:40 -07003605 "${gn2}",
sherman0b4d42d2009-02-23 21:06:15 -08003606 "zzzccczzzabcczzzabccczzz");
3607
3608 checkReplaceAll("(?<gn1>ab)(?<gn2>c*)",
3609 "zzzabccczzzabcczzzabccczzz",
shermand9337e02009-10-21 11:40:40 -07003610 "${gn2}",
sherman0b4d42d2009-02-23 21:06:15 -08003611 "zzzccczzzcczzzccczzz");
3612
3613 //toSupplementaries("(ab)(c*)"));
3614 checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") +
3615 ")(?<gn2>" + toSupplementaries("c") + "*)",
3616 toSupplementaries("abccczzzabcczzzabccc"),
shermand9337e02009-10-21 11:40:40 -07003617 "${gn1}",
sherman0b4d42d2009-02-23 21:06:15 -08003618 toSupplementaries("abzzzabcczzzabccc"));
3619
3620
3621 checkReplaceAll("(?<gn1>" + toSupplementaries("ab") +
3622 ")(?<gn2>" + toSupplementaries("c") + "*)",
3623 toSupplementaries("abccczzzabcczzzabccc"),
shermand9337e02009-10-21 11:40:40 -07003624 "${gn1}",
sherman0b4d42d2009-02-23 21:06:15 -08003625 toSupplementaries("abzzzabzzzab"));
3626
3627 checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") +
3628 ")(?<gn2>" + toSupplementaries("c") + "*)",
3629 toSupplementaries("abccczzzabcczzzabccc"),
shermand9337e02009-10-21 11:40:40 -07003630 "${gn2}",
sherman0b4d42d2009-02-23 21:06:15 -08003631 toSupplementaries("ccczzzabcczzzabccc"));
3632
3633
3634 checkReplaceAll("(?<gn1>" + toSupplementaries("ab") +
3635 ")(?<gn2>" + toSupplementaries("c") + "*)",
3636 toSupplementaries("abccczzzabcczzzabccc"),
shermand9337e02009-10-21 11:40:40 -07003637 "${gn2}",
sherman0b4d42d2009-02-23 21:06:15 -08003638 toSupplementaries("ccczzzcczzzccc"));
3639
3640 checkReplaceFirst("(?<dog>Dog)AndCat",
3641 "zzzDogAndCatzzzDogAndCatzzz",
shermand9337e02009-10-21 11:40:40 -07003642 "${dog}",
sherman0b4d42d2009-02-23 21:06:15 -08003643 "zzzDogzzzDogAndCatzzz");
3644
3645
3646 checkReplaceAll("(?<dog>Dog)AndCat",
3647 "zzzDogAndCatzzzDogAndCatzzz",
shermand9337e02009-10-21 11:40:40 -07003648 "${dog}",
sherman0b4d42d2009-02-23 21:06:15 -08003649 "zzzDogzzzDogzzz");
3650
3651 // backref in Matcher & String
shermand9337e02009-10-21 11:40:40 -07003652 if (!"abcdefghij".replaceFirst("cd(?<gn>ef)gh", "${gn}").equals("abefij") ||
3653 !"abbbcbdbefgh".replaceAll("(?<gn>[a-e])b", "${gn}").equals("abcdefgh"))
sherman0b4d42d2009-02-23 21:06:15 -08003654 failCount++;
3655
3656 // negative
3657 checkExpectedFail("(?<groupnamehasnoascii.in>abc)(def)");
3658 checkExpectedFail("(?<groupnamehasnoascii_in>abc)(def)");
shermand9337e02009-10-21 11:40:40 -07003659 checkExpectedFail("(?<6groupnamestartswithdigit>abc)(def)");
sherman0b4d42d2009-02-23 21:06:15 -08003660 checkExpectedFail("(?<gname>abc)(def)\\k<gnameX>");
3661 checkExpectedFail("(?<gname>abc)(?<gname>def)\\k<gnameX>");
shermana244eb52013-05-06 21:24:37 -07003662 checkExpectedIAE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"),
3663 "gnameX");
3664 checkExpectedNPE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"));
sherman0b4d42d2009-02-23 21:06:15 -08003665 report("NamedGroupCapture");
3666 }
sherman6782c962010-02-05 00:10:42 -08003667
shermancc01ef52010-05-18 15:36:47 -07003668 // This is for bug 6969132
sherman6782c962010-02-05 00:10:42 -08003669 private static void nonBmpClassComplementTest() throws Exception {
3670 Pattern p = Pattern.compile("\\P{Lu}");
3671 Matcher m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
3672 if (m.find() && m.start() == 1)
3673 failCount++;
3674
3675 // from a unicode category
3676 p = Pattern.compile("\\P{Lu}");
3677 m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
3678 if (m.find())
3679 failCount++;
3680 if (!m.hitEnd())
3681 failCount++;
3682
3683 // block
3684 p = Pattern.compile("\\P{InMathematicalAlphanumericSymbols}");
3685 m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
3686 if (m.find() && m.start() == 1)
3687 failCount++;
3688
3689 report("NonBmpClassComplement");
3690 }
3691
shermancc01ef52010-05-18 15:36:47 -07003692 private static void unicodePropertiesTest() throws Exception {
3693 // different forms
3694 if (!Pattern.compile("\\p{IsLu}").matcher("A").matches() ||
3695 !Pattern.compile("\\p{Lu}").matcher("A").matches() ||
3696 !Pattern.compile("\\p{gc=Lu}").matcher("A").matches() ||
3697 !Pattern.compile("\\p{general_category=Lu}").matcher("A").matches() ||
3698 !Pattern.compile("\\p{IsLatin}").matcher("B").matches() ||
3699 !Pattern.compile("\\p{sc=Latin}").matcher("B").matches() ||
3700 !Pattern.compile("\\p{script=Latin}").matcher("B").matches() ||
3701 !Pattern.compile("\\p{InBasicLatin}").matcher("c").matches() ||
3702 !Pattern.compile("\\p{blk=BasicLatin}").matcher("c").matches() ||
3703 !Pattern.compile("\\p{block=BasicLatin}").matcher("c").matches())
3704 failCount++;
3705
3706 Matcher common = Pattern.compile("\\p{script=Common}").matcher("");
3707 Matcher unknown = Pattern.compile("\\p{IsUnknown}").matcher("");
3708 Matcher lastSM = common;
3709 Character.UnicodeScript lastScript = Character.UnicodeScript.of(0);
3710
3711 Matcher latin = Pattern.compile("\\p{block=basic_latin}").matcher("");
3712 Matcher greek = Pattern.compile("\\p{InGreek}").matcher("");
3713 Matcher lastBM = latin;
3714 Character.UnicodeBlock lastBlock = Character.UnicodeBlock.of(0);
3715
3716 for (int cp = 1; cp < Character.MAX_CODE_POINT; cp++) {
3717 if (cp >= 0x30000 && (cp & 0x70) == 0){
3718 continue; // only pick couple code points, they are the same
3719 }
3720
3721 // Unicode Script
3722 Character.UnicodeScript script = Character.UnicodeScript.of(cp);
3723 Matcher m;
3724 String str = new String(Character.toChars(cp));
3725 if (script == lastScript) {
3726 m = lastSM;
3727 m.reset(str);
3728 } else {
3729 m = Pattern.compile("\\p{Is" + script.name() + "}").matcher(str);
3730 }
3731 if (!m.matches()) {
3732 failCount++;
3733 }
3734 Matcher other = (script == Character.UnicodeScript.COMMON)? unknown : common;
3735 other.reset(str);
3736 if (other.matches()) {
3737 failCount++;
3738 }
3739 lastSM = m;
3740 lastScript = script;
3741
3742 // Unicode Block
3743 Character.UnicodeBlock block = Character.UnicodeBlock.of(cp);
3744 if (block == null) {
3745 //System.out.printf("Not a Block: cp=%x%n", cp);
3746 continue;
3747 }
3748 if (block == lastBlock) {
3749 m = lastBM;
3750 m.reset(str);
3751 } else {
3752 m = Pattern.compile("\\p{block=" + block.toString() + "}").matcher(str);
3753 }
3754 if (!m.matches()) {
3755 failCount++;
3756 }
3757 other = (block == Character.UnicodeBlock.BASIC_LATIN)? greek : latin;
3758 other.reset(str);
3759 if (other.matches()) {
3760 failCount++;
3761 }
3762 lastBM = m;
3763 lastBlock = block;
3764 }
3765 report("unicodeProperties");
3766 }
shermanf03c78b2011-02-03 13:49:25 -08003767
3768 private static void unicodeHexNotationTest() throws Exception {
3769
3770 // negative
3771 checkExpectedFail("\\x{-23}");
3772 checkExpectedFail("\\x{110000}");
3773 checkExpectedFail("\\x{}");
3774 checkExpectedFail("\\x{AB[ef]");
3775
3776 // codepoint
3777 check("^\\x{1033c}$", "\uD800\uDF3C", true);
3778 check("^\\xF0\\x90\\x8C\\xBC$", "\uD800\uDF3C", false);
3779 check("^\\x{D800}\\x{DF3c}+$", "\uD800\uDF3C", false);
3780 check("^\\xF0\\x90\\x8C\\xBC$", "\uD800\uDF3C", false);
3781
3782 // in class
3783 check("^[\\x{D800}\\x{DF3c}]+$", "\uD800\uDF3C", false);
3784 check("^[\\xF0\\x90\\x8C\\xBC]+$", "\uD800\uDF3C", false);
3785 check("^[\\x{D800}\\x{DF3C}]+$", "\uD800\uDF3C", false);
3786 check("^[\\x{DF3C}\\x{D800}]+$", "\uD800\uDF3C", false);
3787 check("^[\\x{D800}\\x{DF3C}]+$", "\uDF3C\uD800", true);
3788 check("^[\\x{DF3C}\\x{D800}]+$", "\uDF3C\uD800", true);
3789
3790 for (int cp = 0; cp <= 0x10FFFF; cp++) {
3791 String s = "A" + new String(Character.toChars(cp)) + "B";
3792 String hexUTF16 = (cp <= 0xFFFF)? String.format("\\u%04x", cp)
3793 : String.format("\\u%04x\\u%04x",
3794 (int) Character.toChars(cp)[0],
3795 (int) Character.toChars(cp)[1]);
3796 String hexCodePoint = "\\x{" + Integer.toHexString(cp) + "}";
3797 if (!Pattern.matches("A" + hexUTF16 + "B", s))
3798 failCount++;
3799 if (!Pattern.matches("A[" + hexUTF16 + "]B", s))
3800 failCount++;
3801 if (!Pattern.matches("A" + hexCodePoint + "B", s))
3802 failCount++;
3803 if (!Pattern.matches("A[" + hexCodePoint + "]B", s))
3804 failCount++;
3805 }
3806 report("unicodeHexNotation");
sherman85bbd8b2011-04-28 20:48:36 -07003807 }
3808
3809 private static void unicodeClassesTest() throws Exception {
3810
3811 Matcher lower = Pattern.compile("\\p{Lower}").matcher("");
3812 Matcher upper = Pattern.compile("\\p{Upper}").matcher("");
3813 Matcher ASCII = Pattern.compile("\\p{ASCII}").matcher("");
3814 Matcher alpha = Pattern.compile("\\p{Alpha}").matcher("");
3815 Matcher digit = Pattern.compile("\\p{Digit}").matcher("");
3816 Matcher alnum = Pattern.compile("\\p{Alnum}").matcher("");
3817 Matcher punct = Pattern.compile("\\p{Punct}").matcher("");
3818 Matcher graph = Pattern.compile("\\p{Graph}").matcher("");
3819 Matcher print = Pattern.compile("\\p{Print}").matcher("");
3820 Matcher blank = Pattern.compile("\\p{Blank}").matcher("");
3821 Matcher cntrl = Pattern.compile("\\p{Cntrl}").matcher("");
3822 Matcher xdigit = Pattern.compile("\\p{XDigit}").matcher("");
3823 Matcher space = Pattern.compile("\\p{Space}").matcher("");
3824 Matcher bound = Pattern.compile("\\b").matcher("");
3825 Matcher word = Pattern.compile("\\w++").matcher("");
3826 // UNICODE_CHARACTER_CLASS
3827 Matcher lowerU = Pattern.compile("\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3828 Matcher upperU = Pattern.compile("\\p{Upper}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3829 Matcher ASCIIU = Pattern.compile("\\p{ASCII}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3830 Matcher alphaU = Pattern.compile("\\p{Alpha}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3831 Matcher digitU = Pattern.compile("\\p{Digit}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3832 Matcher alnumU = Pattern.compile("\\p{Alnum}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3833 Matcher punctU = Pattern.compile("\\p{Punct}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3834 Matcher graphU = Pattern.compile("\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3835 Matcher printU = Pattern.compile("\\p{Print}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3836 Matcher blankU = Pattern.compile("\\p{Blank}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3837 Matcher cntrlU = Pattern.compile("\\p{Cntrl}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3838 Matcher xdigitU = Pattern.compile("\\p{XDigit}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3839 Matcher spaceU = Pattern.compile("\\p{Space}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3840 Matcher boundU = Pattern.compile("\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3841 Matcher wordU = Pattern.compile("\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3842 // embedded flag (?U)
3843 Matcher lowerEU = Pattern.compile("(?U)\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3844 Matcher graphEU = Pattern.compile("(?U)\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3845 Matcher wordEU = Pattern.compile("(?U)\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3846
3847 Matcher bwb = Pattern.compile("\\b\\w\\b").matcher("");
3848 Matcher bwbU = Pattern.compile("\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3849 Matcher bwbEU = Pattern.compile("(?U)\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3850 // properties
3851 Matcher lowerP = Pattern.compile("\\p{IsLowerCase}").matcher("");
3852 Matcher upperP = Pattern.compile("\\p{IsUpperCase}").matcher("");
3853 Matcher titleP = Pattern.compile("\\p{IsTitleCase}").matcher("");
3854 Matcher letterP = Pattern.compile("\\p{IsLetter}").matcher("");
3855 Matcher alphaP = Pattern.compile("\\p{IsAlphabetic}").matcher("");
3856 Matcher ideogP = Pattern.compile("\\p{IsIdeographic}").matcher("");
3857 Matcher cntrlP = Pattern.compile("\\p{IsControl}").matcher("");
3858 Matcher spaceP = Pattern.compile("\\p{IsWhiteSpace}").matcher("");
3859 Matcher definedP = Pattern.compile("\\p{IsAssigned}").matcher("");
3860 Matcher nonCCPP = Pattern.compile("\\p{IsNoncharacterCodePoint}").matcher("");
shermana244eb52013-05-06 21:24:37 -07003861 Matcher joinCrtl = Pattern.compile("\\p{IsJoinControl}").matcher("");
sherman85bbd8b2011-04-28 20:48:36 -07003862
3863 // javaMethod
3864 Matcher lowerJ = Pattern.compile("\\p{javaLowerCase}").matcher("");
3865 Matcher upperJ = Pattern.compile("\\p{javaUpperCase}").matcher("");
3866 Matcher alphaJ = Pattern.compile("\\p{javaAlphabetic}").matcher("");
3867 Matcher ideogJ = Pattern.compile("\\p{javaIdeographic}").matcher("");
3868
3869 for (int cp = 1; cp < 0x30000; cp++) {
3870 String str = new String(Character.toChars(cp));
3871 int type = Character.getType(cp);
3872 if (// lower
3873 POSIX_ASCII.isLower(cp) != lower.reset(str).matches() ||
3874 Character.isLowerCase(cp) != lowerU.reset(str).matches() ||
3875 Character.isLowerCase(cp) != lowerP.reset(str).matches() ||
3876 Character.isLowerCase(cp) != lowerEU.reset(str).matches()||
3877 Character.isLowerCase(cp) != lowerJ.reset(str).matches()||
3878 // upper
3879 POSIX_ASCII.isUpper(cp) != upper.reset(str).matches() ||
3880 POSIX_Unicode.isUpper(cp) != upperU.reset(str).matches() ||
3881 Character.isUpperCase(cp) != upperP.reset(str).matches() ||
3882 Character.isUpperCase(cp) != upperJ.reset(str).matches() ||
3883 // alpha
3884 POSIX_ASCII.isAlpha(cp) != alpha.reset(str).matches() ||
3885 POSIX_Unicode.isAlpha(cp) != alphaU.reset(str).matches() ||
3886 Character.isAlphabetic(cp)!= alphaP.reset(str).matches() ||
3887 Character.isAlphabetic(cp)!= alphaJ.reset(str).matches() ||
3888 // digit
3889 POSIX_ASCII.isDigit(cp) != digit.reset(str).matches() ||
3890 Character.isDigit(cp) != digitU.reset(str).matches() ||
3891 // alnum
3892 POSIX_ASCII.isAlnum(cp) != alnum.reset(str).matches() ||
3893 POSIX_Unicode.isAlnum(cp) != alnumU.reset(str).matches() ||
3894 // punct
3895 POSIX_ASCII.isPunct(cp) != punct.reset(str).matches() ||
3896 POSIX_Unicode.isPunct(cp) != punctU.reset(str).matches() ||
3897 // graph
3898 POSIX_ASCII.isGraph(cp) != graph.reset(str).matches() ||
3899 POSIX_Unicode.isGraph(cp) != graphU.reset(str).matches() ||
3900 POSIX_Unicode.isGraph(cp) != graphEU.reset(str).matches()||
3901 // blank
3902 POSIX_ASCII.isType(cp, POSIX_ASCII.BLANK)
3903 != blank.reset(str).matches() ||
3904 POSIX_Unicode.isBlank(cp) != blankU.reset(str).matches() ||
3905 // print
3906 POSIX_ASCII.isPrint(cp) != print.reset(str).matches() ||
3907 POSIX_Unicode.isPrint(cp) != printU.reset(str).matches() ||
3908 // cntrl
3909 POSIX_ASCII.isCntrl(cp) != cntrl.reset(str).matches() ||
3910 POSIX_Unicode.isCntrl(cp) != cntrlU.reset(str).matches() ||
3911 (Character.CONTROL == type) != cntrlP.reset(str).matches() ||
3912 // hexdigit
3913 POSIX_ASCII.isHexDigit(cp) != xdigit.reset(str).matches() ||
3914 POSIX_Unicode.isHexDigit(cp) != xdigitU.reset(str).matches() ||
3915 // space
3916 POSIX_ASCII.isSpace(cp) != space.reset(str).matches() ||
3917 POSIX_Unicode.isSpace(cp) != spaceU.reset(str).matches() ||
3918 POSIX_Unicode.isSpace(cp) != spaceP.reset(str).matches() ||
3919 // word
3920 POSIX_ASCII.isWord(cp) != word.reset(str).matches() ||
3921 POSIX_Unicode.isWord(cp) != wordU.reset(str).matches() ||
3922 POSIX_Unicode.isWord(cp) != wordEU.reset(str).matches()||
3923 // bwordb
3924 POSIX_ASCII.isWord(cp) != bwb.reset(str).matches() ||
3925 POSIX_Unicode.isWord(cp) != bwbU.reset(str).matches() ||
3926 // properties
3927 Character.isTitleCase(cp) != titleP.reset(str).matches() ||
3928 Character.isLetter(cp) != letterP.reset(str).matches()||
3929 Character.isIdeographic(cp) != ideogP.reset(str).matches() ||
3930 Character.isIdeographic(cp) != ideogJ.reset(str).matches() ||
3931 (Character.UNASSIGNED == type) == definedP.reset(str).matches() ||
shermana244eb52013-05-06 21:24:37 -07003932 POSIX_Unicode.isNoncharacterCodePoint(cp) != nonCCPP.reset(str).matches() ||
3933 POSIX_Unicode.isJoinControl(cp) != joinCrtl.reset(str).matches())
sherman85bbd8b2011-04-28 20:48:36 -07003934 failCount++;
3935 }
3936
3937 // bounds/word align
3938 twoFindIndexes(" \u0180sherman\u0400 ", bound, 1, 10);
3939 if (!bwbU.reset("\u0180sherman\u0400").matches())
3940 failCount++;
3941 twoFindIndexes(" \u0180sh\u0345erman\u0400 ", bound, 1, 11);
3942 if (!bwbU.reset("\u0180sh\u0345erman\u0400").matches())
3943 failCount++;
3944 twoFindIndexes(" \u0724\u0739\u0724 ", bound, 1, 4);
3945 if (!bwbU.reset("\u0724\u0739\u0724").matches())
3946 failCount++;
3947 if (!bwbEU.reset("\u0724\u0739\u0724").matches())
3948 failCount++;
3949 report("unicodePredefinedClasses");
3950 }
shermanecb65472012-05-08 10:57:13 -07003951
3952 private static void horizontalAndVerticalWSTest() throws Exception {
3953 String hws = new String (new char[] {
3954 0x09, 0x20, 0xa0, 0x1680, 0x180e,
3955 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005,
3956 0x2006, 0x2007, 0x2008, 0x2009, 0x200a,
3957 0x202f, 0x205f, 0x3000 });
3958 String vws = new String (new char[] {
3959 0x0a, 0x0b, 0x0c, 0x0d, 0x85, 0x2028, 0x2029 });
3960 if (!Pattern.compile("\\h+").matcher(hws).matches() ||
3961 !Pattern.compile("[\\h]+").matcher(hws).matches())
3962 failCount++;
3963 if (Pattern.compile("\\H").matcher(hws).find() ||
3964 Pattern.compile("[\\H]").matcher(hws).find())
3965 failCount++;
3966 if (!Pattern.compile("\\v+").matcher(vws).matches() ||
3967 !Pattern.compile("[\\v]+").matcher(vws).matches())
3968 failCount++;
3969 if (Pattern.compile("\\V").matcher(vws).find() ||
3970 Pattern.compile("[\\V]").matcher(vws).find())
3971 failCount++;
3972 String prefix = "abcd";
3973 String suffix = "efgh";
3974 String ng = "A";
3975 for (int i = 0; i < hws.length(); i++) {
3976 String c = String.valueOf(hws.charAt(i));
3977 Matcher m = Pattern.compile("\\h").matcher(prefix + c + suffix);
3978 if (!m.find() || !c.equals(m.group()))
3979 failCount++;
3980 m = Pattern.compile("[\\h]").matcher(prefix + c + suffix);
3981 if (!m.find() || !c.equals(m.group()))
3982 failCount++;
3983
3984 m = Pattern.compile("\\H").matcher(hws.substring(0, i) + ng + hws.substring(i));
3985 if (!m.find() || !ng.equals(m.group()))
3986 failCount++;
3987 m = Pattern.compile("[\\H]").matcher(hws.substring(0, i) + ng + hws.substring(i));
3988 if (!m.find() || !ng.equals(m.group()))
3989 failCount++;
3990 }
3991 for (int i = 0; i < vws.length(); i++) {
3992 String c = String.valueOf(vws.charAt(i));
3993 Matcher m = Pattern.compile("\\v").matcher(prefix + c + suffix);
3994 if (!m.find() || !c.equals(m.group()))
3995 failCount++;
3996 m = Pattern.compile("[\\v]").matcher(prefix + c + suffix);
3997 if (!m.find() || !c.equals(m.group()))
3998 failCount++;
3999
4000 m = Pattern.compile("\\V").matcher(vws.substring(0, i) + ng + vws.substring(i));
4001 if (!m.find() || !ng.equals(m.group()))
4002 failCount++;
4003 m = Pattern.compile("[\\V]").matcher(vws.substring(0, i) + ng + vws.substring(i));
4004 if (!m.find() || !ng.equals(m.group()))
4005 failCount++;
4006 }
4007 // \v in range is interpreted as 0x0B. This is the undocumented behavior
4008 if (!Pattern.compile("[\\v-\\v]").matcher(String.valueOf((char)0x0B)).matches())
4009 failCount++;
4010 report("horizontalAndVerticalWSTest");
4011 }
4012
4013 private static void linebreakTest() throws Exception {
4014 String linebreaks = new String (new char[] {
4015 0x0A, 0x0B, 0x0C, 0x0D, 0x85, 0x2028, 0x2029 });
4016 String crnl = "\r\n";
4017 if (!Pattern.compile("\\R+").matcher(linebreaks).matches() ||
4018 !Pattern.compile("\\R").matcher(crnl).matches() ||
4019 Pattern.compile("\\R\\R").matcher(crnl).matches())
4020 failCount++;
4021 report("linebreakTest");
4022 }
4023
sherman36e2c8f2012-08-09 10:15:26 -07004024 // #7189363
4025 private static void branchTest() throws Exception {
4026 if (!Pattern.compile("(a)?bc|d").matcher("d").find() || // greedy
4027 !Pattern.compile("(a)+bc|d").matcher("d").find() ||
4028 !Pattern.compile("(a)*bc|d").matcher("d").find() ||
4029 !Pattern.compile("(a)??bc|d").matcher("d").find() || // reluctant
4030 !Pattern.compile("(a)+?bc|d").matcher("d").find() ||
4031 !Pattern.compile("(a)*?bc|d").matcher("d").find() ||
4032 !Pattern.compile("(a)?+bc|d").matcher("d").find() || // possessive
4033 !Pattern.compile("(a)++bc|d").matcher("d").find() ||
4034 !Pattern.compile("(a)*+bc|d").matcher("d").find() ||
4035 !Pattern.compile("(a)?bc|d").matcher("d").matches() || // greedy
4036 !Pattern.compile("(a)+bc|d").matcher("d").matches() ||
4037 !Pattern.compile("(a)*bc|d").matcher("d").matches() ||
4038 !Pattern.compile("(a)??bc|d").matcher("d").matches() || // reluctant
4039 !Pattern.compile("(a)+?bc|d").matcher("d").matches() ||
4040 !Pattern.compile("(a)*?bc|d").matcher("d").matches() ||
4041 !Pattern.compile("(a)?+bc|d").matcher("d").matches() || // possessive
4042 !Pattern.compile("(a)++bc|d").matcher("d").matches() ||
4043 !Pattern.compile("(a)*+bc|d").matcher("d").matches() ||
4044 !Pattern.compile("(a)?bc|de").matcher("de").find() || // others
4045 !Pattern.compile("(a)??bc|de").matcher("de").find() ||
4046 !Pattern.compile("(a)?bc|de").matcher("de").matches() ||
4047 !Pattern.compile("(a)??bc|de").matcher("de").matches())
4048 failCount++;
4049 report("branchTest");
4050 }
4051
shermanf6f35a12013-04-26 13:59:10 -07004052 // This test is for 8007395
4053 private static void groupCurlyNotFoundSuppTest() throws Exception {
4054 String input = "test this as \ud83d\ude0d";
4055 for (String pStr : new String[] { "test(.)+(@[a-zA-Z.]+)",
4056 "test(.)*(@[a-zA-Z.]+)",
4057 "test([^B])+(@[a-zA-Z.]+)",
4058 "test([^B])*(@[a-zA-Z.]+)",
4059 "test(\\P{IsControl})+(@[a-zA-Z.]+)",
4060 "test(\\P{IsControl})*(@[a-zA-Z.]+)",
4061 }) {
4062 Matcher m = Pattern.compile(pStr, Pattern.CASE_INSENSITIVE)
4063 .matcher(input);
4064 try {
4065 if (m.find()) {
4066 failCount++;
4067 }
4068 } catch (Exception x) {
4069 failCount++;
4070 }
4071 }
4072 report("GroupCurly NotFoundSupp");
4073 }
4074
sherman95a939c2013-08-27 12:54:44 -07004075 // This test is for 8023647
4076 private static void groupCurlyBackoffTest() throws Exception {
4077 if (!"abc1c".matches("(\\w)+1\\1") ||
4078 "abc11".matches("(\\w)+1\\1")) {
4079 failCount++;
4080 }
4081 report("GroupCurly backoff");
4082 }
4083
psandoze9d4ac92013-05-01 18:40:31 +02004084 // This test is for 8012646
4085 private static void patternAsPredicate() throws Exception {
4086 Predicate<String> p = Pattern.compile("[a-z]+").asPredicate();
4087
4088 if (p.test("")) {
4089 failCount++;
4090 }
4091 if (!p.test("word")) {
4092 failCount++;
4093 }
4094 if (p.test("1234")) {
4095 failCount++;
4096 }
4097 report("Pattern.asPredicate");
4098 }
sherman0b4d42d2009-02-23 21:06:15 -08004099}