blob: f5282b27c03ffc6f11d382ebf6ba8bd5e38954b0 [file] [log] [blame]
sherman0b4d42d2009-02-23 21:06:15 -08001/*
psandoze9d4ac92013-05-01 18:40:31 +02002 * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
sherman0b4d42d2009-02-23 21:06:15 -08003 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
ohair2283b9d2010-05-25 15:58:33 -07007 * published by the Free Software Foundation. Oracle designates this
sherman0b4d42d2009-02-23 21:06:15 -08008 * particular file as subject to the "Classpath" exception as provided
ohair2283b9d2010-05-25 15:58:33 -07009 * by Oracle in the LICENSE file that accompanied this code.
sherman0b4d42d2009-02-23 21:06:15 -080010 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
ohair2283b9d2010-05-25 15:58:33 -070021 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
sherman0b4d42d2009-02-23 21:06:15 -080024 */
25
26/**
27 * @test
28 * @summary tests RegExp framework
29 * @author Mike McCloskey
30 * @bug 4481568 4482696 4495089 4504687 4527731 4599621 4631553 4619345
31 * 4630911 4672616 4711773 4727935 4750573 4792284 4803197 4757029 4808962
32 * 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476
33 * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940
34 * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133
shermanb16229d2011-12-19 14:14:14 -080035 * 6350801 6676425 6878475 6919132 6931676 6948903 6990617 7014645 7039066
sherman1242a6d2013-11-13 11:26:01 -080036 * 7067045 7014640 7189363 8007395 8013252 8013254 8012646 8023647 6559590
37 * 8027645
sherman0b4d42d2009-02-23 21:06:15 -080038 */
39
40import java.util.regex.*;
41import java.util.Random;
42import java.io.*;
43import java.util.*;
44import java.nio.CharBuffer;
psandoze9d4ac92013-05-01 18:40:31 +020045import java.util.function.Predicate;
sherman0b4d42d2009-02-23 21:06:15 -080046
47/**
48 * This is a test class created to check the operation of
49 * the Pattern and Matcher classes.
50 */
51public class RegExTest {
52
53 private static Random generator = new Random();
54 private static boolean failure = false;
55 private static int failCount = 0;
shermanb16229d2011-12-19 14:14:14 -080056 private static String firstFailure = null;
sherman0b4d42d2009-02-23 21:06:15 -080057
58 /**
59 * Main to interpret arguments and run several tests.
60 *
61 */
62 public static void main(String[] args) throws Exception {
63 // Most of the tests are in a file
64 processFile("TestCases.txt");
65 //processFile("PerlCases.txt");
66 processFile("BMPTestCases.txt");
67 processFile("SupplementaryTestCases.txt");
68
69 // These test many randomly generated char patterns
70 bm();
71 slice();
72
73 // These are hard to put into the file
74 escapes();
75 blankInput();
76
77 // Substitition tests on randomly generated sequences
78 globalSubstitute();
79 stringbufferSubstitute();
80 substitutionBasher();
81
82 // Canonical Equivalence
83 ceTest();
84
85 // Anchors
86 anchorTest();
87
88 // boolean match calls
89 matchesTest();
90 lookingAtTest();
91
92 // Pattern API
93 patternMatchesTest();
94
95 // Misc
96 lookbehindTest();
97 nullArgumentTest();
98 backRefTest();
99 groupCaptureTest();
100 caretTest();
101 charClassTest();
102 emptyPatternTest();
103 findIntTest();
104 group0Test();
105 longPatternTest();
106 octalTest();
107 ampersandTest();
108 negationTest();
109 splitTest();
110 appendTest();
111 caseFoldingTest();
112 commentsTest();
113 unixLinesTest();
114 replaceFirstTest();
115 gTest();
116 zTest();
117 serializeTest();
118 reluctantRepetitionTest();
119 multilineDollarTest();
120 dollarAtEndTest();
121 caretBetweenTerminatorsTest();
122 // This RFE rejected in Tiger numOccurrencesTest();
123 javaCharClassTest();
124 nonCaptureRepetitionTest();
125 notCapturedGroupCurlyMatchTest();
126 escapedSegmentTest();
127 literalPatternTest();
128 literalReplacementTest();
129 regionTest();
130 toStringTest();
131 negatedCharClassTest();
132 findFromTest();
133 boundsTest();
134 unicodeWordBoundsTest();
135 caretAtEndTest();
136 wordSearchTest();
137 hitEndTest();
138 toMatchResultTest();
139 surrogatesInClassTest();
shermanb16229d2011-12-19 14:14:14 -0800140 removeQEQuotingTest();
sherman0b4d42d2009-02-23 21:06:15 -0800141 namedGroupCaptureTest();
sherman6782c962010-02-05 00:10:42 -0800142 nonBmpClassComplementTest();
shermancc01ef52010-05-18 15:36:47 -0700143 unicodePropertiesTest();
shermanf03c78b2011-02-03 13:49:25 -0800144 unicodeHexNotationTest();
sherman85bbd8b2011-04-28 20:48:36 -0700145 unicodeClassesTest();
shermanecb65472012-05-08 10:57:13 -0700146 horizontalAndVerticalWSTest();
147 linebreakTest();
sherman36e2c8f2012-08-09 10:15:26 -0700148 branchTest();
shermanf6f35a12013-04-26 13:59:10 -0700149 groupCurlyNotFoundSuppTest();
sherman95a939c2013-08-27 12:54:44 -0700150 groupCurlyBackoffTest();
psandoze9d4ac92013-05-01 18:40:31 +0200151 patternAsPredicate();
sherman1242a6d2013-11-13 11:26:01 -0800152
shermanb16229d2011-12-19 14:14:14 -0800153 if (failure) {
154 throw new
155 RuntimeException("RegExTest failed, 1st failure: " +
156 firstFailure);
157 } else {
sherman0b4d42d2009-02-23 21:06:15 -0800158 System.err.println("OKAY: All tests passed.");
shermanb16229d2011-12-19 14:14:14 -0800159 }
sherman0b4d42d2009-02-23 21:06:15 -0800160 }
161
162 // Utility functions
163
164 private static String getRandomAlphaString(int length) {
165 StringBuffer buf = new StringBuffer(length);
166 for (int i=0; i<length; i++) {
167 char randChar = (char)(97 + generator.nextInt(26));
168 buf.append(randChar);
169 }
170 return buf.toString();
171 }
172
173 private static void check(Matcher m, String expected) {
174 m.find();
175 if (!m.group().equals(expected))
176 failCount++;
177 }
178
179 private static void check(Matcher m, String result, boolean expected) {
180 m.find();
shermanf03c78b2011-02-03 13:49:25 -0800181 if (m.group().equals(result) != expected)
182 failCount++;
sherman0b4d42d2009-02-23 21:06:15 -0800183 }
184
185 private static void check(Pattern p, String s, boolean expected) {
shermanf03c78b2011-02-03 13:49:25 -0800186 if (p.matcher(s).find() != expected)
187 failCount++;
188 }
189
190 private static void check(String p, String s, boolean expected) {
191 Matcher matcher = Pattern.compile(p).matcher(s);
192 if (matcher.find() != expected)
193 failCount++;
sherman0b4d42d2009-02-23 21:06:15 -0800194 }
195
196 private static void check(String p, char c, boolean expected) {
197 String propertyPattern = expected ? "\\p" + p : "\\P" + p;
198 Pattern pattern = Pattern.compile(propertyPattern);
199 char[] ca = new char[1]; ca[0] = c;
200 Matcher matcher = pattern.matcher(new String(ca));
201 if (!matcher.find())
202 failCount++;
203 }
204
205 private static void check(String p, int codePoint, boolean expected) {
206 String propertyPattern = expected ? "\\p" + p : "\\P" + p;
207 Pattern pattern = Pattern.compile(propertyPattern);
208 char[] ca = Character.toChars(codePoint);
209 Matcher matcher = pattern.matcher(new String(ca));
210 if (!matcher.find())
211 failCount++;
212 }
213
214 private static void check(String p, int flag, String input, String s,
215 boolean expected)
216 {
217 Pattern pattern = Pattern.compile(p, flag);
218 Matcher matcher = pattern.matcher(input);
219 if (expected)
220 check(matcher, s, expected);
221 else
222 check(pattern, input, false);
223 }
224
225 private static void report(String testName) {
226 int spacesToAdd = 30 - testName.length();
227 StringBuffer paddedNameBuffer = new StringBuffer(testName);
228 for (int i=0; i<spacesToAdd; i++)
229 paddedNameBuffer.append(" ");
230 String paddedName = paddedNameBuffer.toString();
231 System.err.println(paddedName + ": " +
232 (failCount==0 ? "Passed":"Failed("+failCount+")"));
shermanb16229d2011-12-19 14:14:14 -0800233 if (failCount > 0) {
sherman0b4d42d2009-02-23 21:06:15 -0800234 failure = true;
shermanb16229d2011-12-19 14:14:14 -0800235
236 if (firstFailure == null) {
237 firstFailure = testName;
238 }
239 }
240
sherman0b4d42d2009-02-23 21:06:15 -0800241 failCount = 0;
242 }
243
244 /**
245 * Converts ASCII alphabet characters [A-Za-z] in the given 's' to
246 * supplementary characters. This method does NOT fully take care
247 * of the regex syntax.
248 */
249 private static String toSupplementaries(String s) {
250 int length = s.length();
251 StringBuffer sb = new StringBuffer(length * 2);
252
253 for (int i = 0; i < length; ) {
254 char c = s.charAt(i++);
255 if (c == '\\') {
256 sb.append(c);
257 if (i < length) {
258 c = s.charAt(i++);
259 sb.append(c);
260 if (c == 'u') {
261 // assume no syntax error
262 sb.append(s.charAt(i++));
263 sb.append(s.charAt(i++));
264 sb.append(s.charAt(i++));
265 sb.append(s.charAt(i++));
266 }
267 }
268 } else if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) {
269 sb.append('\ud800').append((char)('\udc00'+c));
270 } else {
271 sb.append(c);
272 }
273 }
274 return sb.toString();
275 }
276
277 // Regular expression tests
278
279 // This is for bug 6178785
280 // Test if an expected NPE gets thrown when passing in a null argument
281 private static boolean check(Runnable test) {
282 try {
283 test.run();
284 failCount++;
285 return false;
286 } catch (NullPointerException npe) {
287 return true;
288 }
289 }
290
291 private static void nullArgumentTest() {
292 check(new Runnable() { public void run() { Pattern.compile(null); }});
293 check(new Runnable() { public void run() { Pattern.matches(null, null); }});
294 check(new Runnable() { public void run() { Pattern.matches("xyz", null);}});
295 check(new Runnable() { public void run() { Pattern.quote(null);}});
296 check(new Runnable() { public void run() { Pattern.compile("xyz").split(null);}});
297 check(new Runnable() { public void run() { Pattern.compile("xyz").matcher(null);}});
298
299 final Matcher m = Pattern.compile("xyz").matcher("xyz");
300 m.matches();
301 check(new Runnable() { public void run() { m.appendTail(null);}});
302 check(new Runnable() { public void run() { m.replaceAll(null);}});
303 check(new Runnable() { public void run() { m.replaceFirst(null);}});
304 check(new Runnable() { public void run() { m.appendReplacement(null, null);}});
305 check(new Runnable() { public void run() { m.reset(null);}});
306 check(new Runnable() { public void run() { Matcher.quoteReplacement(null);}});
307 //check(new Runnable() { public void run() { m.usePattern(null);}});
308
309 report("Null Argument");
310 }
311
312 // This is for bug6635133
313 // Test if surrogate pair in Unicode escapes can be handled correctly.
314 private static void surrogatesInClassTest() throws Exception {
315 Pattern pattern = Pattern.compile("[\\ud834\\udd21-\\ud834\\udd24]");
316 Matcher matcher = pattern.matcher("\ud834\udd22");
317 if (!matcher.find())
318 failCount++;
shermanb16229d2011-12-19 14:14:14 -0800319
320 report("Surrogate pair in Unicode escape");
321 }
322
323 // This is for bug6990617
324 // Test if Pattern.RemoveQEQuoting works correctly if the octal unicode
325 // char encoding is only 2 or 3 digits instead of 4 and the first quoted
326 // char is an octal digit.
327 private static void removeQEQuotingTest() throws Exception {
328 Pattern pattern =
329 Pattern.compile("\\011\\Q1sometext\\E\\011\\Q2sometext\\E");
330 Matcher matcher = pattern.matcher("\t1sometext\t2sometext");
331 if (!matcher.find())
332 failCount++;
333
334 report("Remove Q/E Quoting");
sherman0b4d42d2009-02-23 21:06:15 -0800335 }
336
337 // This is for bug 4988891
338 // Test toMatchResult to see that it is a copy of the Matcher
339 // that is not affected by subsequent operations on the original
340 private static void toMatchResultTest() throws Exception {
341 Pattern pattern = Pattern.compile("squid");
342 Matcher matcher = pattern.matcher(
343 "agiantsquidofdestinyasmallsquidoffate");
344 matcher.find();
345 int matcherStart1 = matcher.start();
346 MatchResult mr = matcher.toMatchResult();
347 if (mr == matcher)
348 failCount++;
349 int resultStart1 = mr.start();
350 if (matcherStart1 != resultStart1)
351 failCount++;
352 matcher.find();
353 int matcherStart2 = matcher.start();
354 int resultStart2 = mr.start();
355 if (matcherStart2 == resultStart2)
356 failCount++;
357 if (resultStart1 != resultStart2)
358 failCount++;
359 MatchResult mr2 = matcher.toMatchResult();
360 if (mr == mr2)
361 failCount++;
362 if (mr2.start() != matcherStart2)
363 failCount++;
364 report("toMatchResult is a copy");
365 }
366
367 // This is for bug 5013885
368 // Must test a slice to see if it reports hitEnd correctly
369 private static void hitEndTest() throws Exception {
370 // Basic test of Slice node
371 Pattern p = Pattern.compile("^squidattack");
372 Matcher m = p.matcher("squack");
373 m.find();
374 if (m.hitEnd())
375 failCount++;
376 m.reset("squid");
377 m.find();
378 if (!m.hitEnd())
379 failCount++;
380
381 // Test Slice, SliceA and SliceU nodes
382 for (int i=0; i<3; i++) {
383 int flags = 0;
384 if (i==1) flags = Pattern.CASE_INSENSITIVE;
385 if (i==2) flags = Pattern.UNICODE_CASE;
386 p = Pattern.compile("^abc", flags);
387 m = p.matcher("ad");
388 m.find();
389 if (m.hitEnd())
390 failCount++;
391 m.reset("ab");
392 m.find();
393 if (!m.hitEnd())
394 failCount++;
395 }
396
397 // Test Boyer-Moore node
398 p = Pattern.compile("catattack");
399 m = p.matcher("attack");
400 m.find();
401 if (!m.hitEnd())
402 failCount++;
403
404 p = Pattern.compile("catattack");
405 m = p.matcher("attackattackattackcatatta");
406 m.find();
407 if (!m.hitEnd())
408 failCount++;
sherman0b4d42d2009-02-23 21:06:15 -0800409 report("hitEnd from a Slice");
410 }
411
412 // This is for bug 4997476
413 // It is weird code submitted by customer demonstrating a regression
414 private static void wordSearchTest() throws Exception {
415 String testString = new String("word1 word2 word3");
416 Pattern p = Pattern.compile("\\b");
417 Matcher m = p.matcher(testString);
418 int position = 0;
419 int start = 0;
420 while (m.find(position)) {
421 start = m.start();
422 if (start == testString.length())
423 break;
424 if (m.find(start+1)) {
425 position = m.start();
426 } else {
427 position = testString.length();
428 }
429 if (testString.substring(start, position).equals(" "))
430 continue;
431 if (!testString.substring(start, position-1).startsWith("word"))
432 failCount++;
433 }
434 report("Customer word search");
435 }
436
437 // This is for bug 4994840
438 private static void caretAtEndTest() throws Exception {
439 // Problem only occurs with multiline patterns
440 // containing a beginning-of-line caret "^" followed
441 // by an expression that also matches the empty string.
442 Pattern pattern = Pattern.compile("^x?", Pattern.MULTILINE);
443 Matcher matcher = pattern.matcher("\r");
444 matcher.find();
445 matcher.find();
446 report("Caret at end");
447 }
448
449 // This test is for 4979006
450 // Check to see if word boundary construct properly handles unicode
451 // non spacing marks
452 private static void unicodeWordBoundsTest() throws Exception {
453 String spaces = " ";
454 String wordChar = "a";
455 String nsm = "\u030a";
456
457 assert (Character.getType('\u030a') == Character.NON_SPACING_MARK);
458
459 Pattern pattern = Pattern.compile("\\b");
460 Matcher matcher = pattern.matcher("");
461 // S=other B=word character N=non spacing mark .=word boundary
462 // SS.BB.SS
463 String input = spaces + wordChar + wordChar + spaces;
464 twoFindIndexes(input, matcher, 2, 4);
465 // SS.BBN.SS
466 input = spaces + wordChar +wordChar + nsm + spaces;
467 twoFindIndexes(input, matcher, 2, 5);
468 // SS.BN.SS
469 input = spaces + wordChar + nsm + spaces;
470 twoFindIndexes(input, matcher, 2, 4);
471 // SS.BNN.SS
472 input = spaces + wordChar + nsm + nsm + spaces;
473 twoFindIndexes(input, matcher, 2, 5);
474 // SSN.BB.SS
475 input = spaces + nsm + wordChar + wordChar + spaces;
476 twoFindIndexes(input, matcher, 3, 5);
477 // SS.BNB.SS
478 input = spaces + wordChar + nsm + wordChar + spaces;
479 twoFindIndexes(input, matcher, 2, 5);
480 // SSNNSS
481 input = spaces + nsm + nsm + spaces;
482 matcher.reset(input);
483 if (matcher.find())
484 failCount++;
485 // SSN.BBN.SS
486 input = spaces + nsm + wordChar + wordChar + nsm + spaces;
487 twoFindIndexes(input, matcher, 3, 6);
488
489 report("Unicode word boundary");
490 }
491
492 private static void twoFindIndexes(String input, Matcher matcher, int a,
493 int b) throws Exception
494 {
495 matcher.reset(input);
496 matcher.find();
497 if (matcher.start() != a)
498 failCount++;
499 matcher.find();
500 if (matcher.start() != b)
501 failCount++;
502 }
503
504 // This test is for 6284152
505 static void check(String regex, String input, String[] expected) {
506 List<String> result = new ArrayList<String>();
507 Pattern p = Pattern.compile(regex);
508 Matcher m = p.matcher(input);
509 while (m.find()) {
510 result.add(m.group());
511 }
512 if (!Arrays.asList(expected).equals(result))
513 failCount++;
514 }
515
516 private static void lookbehindTest() throws Exception {
517 //Positive
518 check("(?<=%.{0,5})foo\\d",
519 "%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5",
520 new String[]{"foo1", "foo2", "foo3"});
521
522 //boundary at end of the lookbehind sub-regex should work consistently
523 //with the boundary just after the lookbehind sub-regex
524 check("(?<=.*\\b)foo", "abcd foo", new String[]{"foo"});
525 check("(?<=.*)\\bfoo", "abcd foo", new String[]{"foo"});
526 check("(?<!abc )\\bfoo", "abc foo", new String[0]);
527 check("(?<!abc \\b)foo", "abc foo", new String[0]);
528
529 //Negative
530 check("(?<!%.{0,5})foo\\d",
531 "%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5",
532 new String[] {"foo4", "foo5"});
533
534 //Positive greedy
535 check("(?<=%b{1,4})foo", "%bbbbfoo", new String[] {"foo"});
536
537 //Positive reluctant
538 check("(?<=%b{1,4}?)foo", "%bbbbfoo", new String[] {"foo"});
539
540 //supplementary
541 check("(?<=%b{1,4})fo\ud800\udc00o", "%bbbbfo\ud800\udc00o",
542 new String[] {"fo\ud800\udc00o"});
543 check("(?<=%b{1,4}?)fo\ud800\udc00o", "%bbbbfo\ud800\udc00o",
544 new String[] {"fo\ud800\udc00o"});
545 check("(?<!%b{1,4})fo\ud800\udc00o", "%afo\ud800\udc00o",
546 new String[] {"fo\ud800\udc00o"});
547 check("(?<!%b{1,4}?)fo\ud800\udc00o", "%afo\ud800\udc00o",
548 new String[] {"fo\ud800\udc00o"});
549 report("Lookbehind");
550 }
551
552 // This test is for 4938995
553 // Check to see if weak region boundaries are transparent to
554 // lookahead and lookbehind constructs
555 private static void boundsTest() throws Exception {
556 String fullMessage = "catdogcat";
557 Pattern pattern = Pattern.compile("(?<=cat)dog(?=cat)");
558 Matcher matcher = pattern.matcher("catdogca");
559 matcher.useTransparentBounds(true);
560 if (matcher.find())
561 failCount++;
562 matcher.reset("atdogcat");
563 if (matcher.find())
564 failCount++;
565 matcher.reset(fullMessage);
566 if (!matcher.find())
567 failCount++;
568 matcher.reset(fullMessage);
569 matcher.region(0,9);
570 if (!matcher.find())
571 failCount++;
572 matcher.reset(fullMessage);
573 matcher.region(0,6);
574 if (!matcher.find())
575 failCount++;
576 matcher.reset(fullMessage);
577 matcher.region(3,6);
578 if (!matcher.find())
579 failCount++;
580 matcher.useTransparentBounds(false);
581 if (matcher.find())
582 failCount++;
583
584 // Negative lookahead/lookbehind
585 pattern = Pattern.compile("(?<!cat)dog(?!cat)");
586 matcher = pattern.matcher("dogcat");
587 matcher.useTransparentBounds(true);
588 matcher.region(0,3);
589 if (matcher.find())
590 failCount++;
591 matcher.reset("catdog");
592 matcher.region(3,6);
593 if (matcher.find())
594 failCount++;
595 matcher.useTransparentBounds(false);
596 matcher.reset("dogcat");
597 matcher.region(0,3);
598 if (!matcher.find())
599 failCount++;
600 matcher.reset("catdog");
601 matcher.region(3,6);
602 if (!matcher.find())
603 failCount++;
604
605 report("Region bounds transparency");
606 }
607
608 // This test is for 4945394
609 private static void findFromTest() throws Exception {
610 String message = "This is 40 $0 message.";
611 Pattern pat = Pattern.compile("\\$0");
612 Matcher match = pat.matcher(message);
613 if (!match.find())
614 failCount++;
615 if (match.find())
616 failCount++;
617 if (match.find())
618 failCount++;
619 report("Check for alternating find");
620 }
621
622 // This test is for 4872664 and 4892980
623 private static void negatedCharClassTest() throws Exception {
624 Pattern pattern = Pattern.compile("[^>]");
625 Matcher matcher = pattern.matcher("\u203A");
626 if (!matcher.matches())
627 failCount++;
628 pattern = Pattern.compile("[^fr]");
629 matcher = pattern.matcher("a");
630 if (!matcher.find())
631 failCount++;
632 matcher.reset("\u203A");
633 if (!matcher.find())
634 failCount++;
635 String s = "for";
636 String result[] = s.split("[^fr]");
637 if (!result[0].equals("f"))
638 failCount++;
639 if (!result[1].equals("r"))
640 failCount++;
641 s = "f\u203Ar";
642 result = s.split("[^fr]");
643 if (!result[0].equals("f"))
644 failCount++;
645 if (!result[1].equals("r"))
646 failCount++;
647
648 // Test adding to bits, subtracting a node, then adding to bits again
649 pattern = Pattern.compile("[^f\u203Ar]");
650 matcher = pattern.matcher("a");
651 if (!matcher.find())
652 failCount++;
653 matcher.reset("f");
654 if (matcher.find())
655 failCount++;
656 matcher.reset("\u203A");
657 if (matcher.find())
658 failCount++;
659 matcher.reset("r");
660 if (matcher.find())
661 failCount++;
662 matcher.reset("\u203B");
663 if (!matcher.find())
664 failCount++;
665
666 // Test subtracting a node, adding to bits, subtracting again
667 pattern = Pattern.compile("[^\u203Ar\u203B]");
668 matcher = pattern.matcher("a");
669 if (!matcher.find())
670 failCount++;
671 matcher.reset("\u203A");
672 if (matcher.find())
673 failCount++;
674 matcher.reset("r");
675 if (matcher.find())
676 failCount++;
677 matcher.reset("\u203B");
678 if (matcher.find())
679 failCount++;
680 matcher.reset("\u203C");
681 if (!matcher.find())
682 failCount++;
683
684 report("Negated Character Class");
685 }
686
687 // This test is for 4628291
688 private static void toStringTest() throws Exception {
689 Pattern pattern = Pattern.compile("b+");
690 if (pattern.toString() != "b+")
691 failCount++;
692 Matcher matcher = pattern.matcher("aaabbbccc");
693 String matcherString = matcher.toString(); // unspecified
694 matcher.find();
695 matcherString = matcher.toString(); // unspecified
696 matcher.region(0,3);
697 matcherString = matcher.toString(); // unspecified
698 matcher.reset();
699 matcherString = matcher.toString(); // unspecified
700 report("toString");
701 }
702
703 // This test is for 4808962
704 private static void literalPatternTest() throws Exception {
705 int flags = Pattern.LITERAL;
706
707 Pattern pattern = Pattern.compile("abc\\t$^", flags);
708 check(pattern, "abc\\t$^", true);
709
710 pattern = Pattern.compile(Pattern.quote("abc\\t$^"));
711 check(pattern, "abc\\t$^", true);
712
713 pattern = Pattern.compile("\\Qa^$bcabc\\E", flags);
714 check(pattern, "\\Qa^$bcabc\\E", true);
715 check(pattern, "a^$bcabc", false);
716
717 pattern = Pattern.compile("\\\\Q\\\\E");
718 check(pattern, "\\Q\\E", true);
719
720 pattern = Pattern.compile("\\Qabc\\Eefg\\\\Q\\\\Ehij");
721 check(pattern, "abcefg\\Q\\Ehij", true);
722
723 pattern = Pattern.compile("\\\\\\Q\\\\E");
724 check(pattern, "\\\\\\\\", true);
725
726 pattern = Pattern.compile(Pattern.quote("\\Qa^$bcabc\\E"));
727 check(pattern, "\\Qa^$bcabc\\E", true);
728 check(pattern, "a^$bcabc", false);
729
730 pattern = Pattern.compile(Pattern.quote("\\Qabc\\Edef"));
731 check(pattern, "\\Qabc\\Edef", true);
732 check(pattern, "abcdef", false);
733
734 pattern = Pattern.compile(Pattern.quote("abc\\Edef"));
735 check(pattern, "abc\\Edef", true);
736 check(pattern, "abcdef", false);
737
738 pattern = Pattern.compile(Pattern.quote("\\E"));
739 check(pattern, "\\E", true);
740
741 pattern = Pattern.compile("((((abc.+?:)", flags);
742 check(pattern, "((((abc.+?:)", true);
743
744 flags |= Pattern.MULTILINE;
745
746 pattern = Pattern.compile("^cat$", flags);
747 check(pattern, "abc^cat$def", true);
748 check(pattern, "cat", false);
749
750 flags |= Pattern.CASE_INSENSITIVE;
751
752 pattern = Pattern.compile("abcdef", flags);
753 check(pattern, "ABCDEF", true);
754 check(pattern, "AbCdEf", true);
755
756 flags |= Pattern.DOTALL;
757
758 pattern = Pattern.compile("a...b", flags);
759 check(pattern, "A...b", true);
760 check(pattern, "Axxxb", false);
761
762 flags |= Pattern.CANON_EQ;
763
764 Pattern p = Pattern.compile("testa\u030a", flags);
765 check(pattern, "testa\u030a", false);
766 check(pattern, "test\u00e5", false);
767
768 // Supplementary character test
769 flags = Pattern.LITERAL;
770
771 pattern = Pattern.compile(toSupplementaries("abc\\t$^"), flags);
772 check(pattern, toSupplementaries("abc\\t$^"), true);
773
774 pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\t$^")));
775 check(pattern, toSupplementaries("abc\\t$^"), true);
776
777 pattern = Pattern.compile(toSupplementaries("\\Qa^$bcabc\\E"), flags);
778 check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true);
779 check(pattern, toSupplementaries("a^$bcabc"), false);
780
781 pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qa^$bcabc\\E")));
782 check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true);
783 check(pattern, toSupplementaries("a^$bcabc"), false);
784
785 pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qabc\\Edef")));
786 check(pattern, toSupplementaries("\\Qabc\\Edef"), true);
787 check(pattern, toSupplementaries("abcdef"), false);
788
789 pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\Edef")));
790 check(pattern, toSupplementaries("abc\\Edef"), true);
791 check(pattern, toSupplementaries("abcdef"), false);
792
793 pattern = Pattern.compile(toSupplementaries("((((abc.+?:)"), flags);
794 check(pattern, toSupplementaries("((((abc.+?:)"), true);
795
796 flags |= Pattern.MULTILINE;
797
798 pattern = Pattern.compile(toSupplementaries("^cat$"), flags);
799 check(pattern, toSupplementaries("abc^cat$def"), true);
800 check(pattern, toSupplementaries("cat"), false);
801
802 flags |= Pattern.DOTALL;
803
804 // note: this is case-sensitive.
805 pattern = Pattern.compile(toSupplementaries("a...b"), flags);
806 check(pattern, toSupplementaries("a...b"), true);
807 check(pattern, toSupplementaries("axxxb"), false);
808
809 flags |= Pattern.CANON_EQ;
810
811 String t = toSupplementaries("test");
812 p = Pattern.compile(t + "a\u030a", flags);
813 check(pattern, t + "a\u030a", false);
814 check(pattern, t + "\u00e5", false);
815
816 report("Literal pattern");
817 }
818
819 // This test is for 4803179
820 // This test is also for 4808962, replacement parts
821 private static void literalReplacementTest() throws Exception {
822 int flags = Pattern.LITERAL;
823
824 Pattern pattern = Pattern.compile("abc", flags);
825 Matcher matcher = pattern.matcher("zzzabczzz");
826 String replaceTest = "$0";
827 String result = matcher.replaceAll(replaceTest);
828 if (!result.equals("zzzabczzz"))
829 failCount++;
830
831 matcher.reset();
832 String literalReplacement = matcher.quoteReplacement(replaceTest);
833 result = matcher.replaceAll(literalReplacement);
834 if (!result.equals("zzz$0zzz"))
835 failCount++;
836
837 matcher.reset();
838 replaceTest = "\\t$\\$";
839 literalReplacement = matcher.quoteReplacement(replaceTest);
840 result = matcher.replaceAll(literalReplacement);
841 if (!result.equals("zzz\\t$\\$zzz"))
842 failCount++;
843
844 // Supplementary character test
845 pattern = Pattern.compile(toSupplementaries("abc"), flags);
846 matcher = pattern.matcher(toSupplementaries("zzzabczzz"));
847 replaceTest = "$0";
848 result = matcher.replaceAll(replaceTest);
849 if (!result.equals(toSupplementaries("zzzabczzz")))
850 failCount++;
851
852 matcher.reset();
853 literalReplacement = matcher.quoteReplacement(replaceTest);
854 result = matcher.replaceAll(literalReplacement);
855 if (!result.equals(toSupplementaries("zzz$0zzz")))
856 failCount++;
857
858 matcher.reset();
859 replaceTest = "\\t$\\$";
860 literalReplacement = matcher.quoteReplacement(replaceTest);
861 result = matcher.replaceAll(literalReplacement);
862 if (!result.equals(toSupplementaries("zzz\\t$\\$zzz")))
863 failCount++;
864
sherman5c8f3492012-04-12 15:01:41 -0700865 // IAE should be thrown if backslash or '$' is the last character
866 // in replacement string
867 try {
868 "\uac00".replaceAll("\uac00", "$");
shermanecb65472012-05-08 10:57:13 -0700869 failCount++;
870 } catch (IllegalArgumentException iie) {
871 } catch (Exception e) {
872 failCount++;
873 }
874 try {
sherman5c8f3492012-04-12 15:01:41 -0700875 "\uac00".replaceAll("\uac00", "\\");
876 failCount++;
877 } catch (IllegalArgumentException iie) {
878 } catch (Exception e) {
879 failCount++;
880 }
sherman0b4d42d2009-02-23 21:06:15 -0800881 report("Literal replacement");
882 }
883
884 // This test is for 4757029
885 private static void regionTest() throws Exception {
886 Pattern pattern = Pattern.compile("abc");
887 Matcher matcher = pattern.matcher("abcdefabc");
888
889 matcher.region(0,9);
890 if (!matcher.find())
891 failCount++;
892 if (!matcher.find())
893 failCount++;
894 matcher.region(0,3);
895 if (!matcher.find())
896 failCount++;
897 matcher.region(3,6);
898 if (matcher.find())
899 failCount++;
900 matcher.region(0,2);
901 if (matcher.find())
902 failCount++;
903
904 expectRegionFail(matcher, 1, -1);
905 expectRegionFail(matcher, -1, -1);
906 expectRegionFail(matcher, -1, 1);
907 expectRegionFail(matcher, 5, 3);
908 expectRegionFail(matcher, 5, 12);
909 expectRegionFail(matcher, 12, 12);
910
911 pattern = Pattern.compile("^abc$");
912 matcher = pattern.matcher("zzzabczzz");
913 matcher.region(0,9);
914 if (matcher.find())
915 failCount++;
916 matcher.region(3,6);
917 if (!matcher.find())
918 failCount++;
919 matcher.region(3,6);
920 matcher.useAnchoringBounds(false);
921 if (matcher.find())
922 failCount++;
923
924 // Supplementary character test
925 pattern = Pattern.compile(toSupplementaries("abc"));
926 matcher = pattern.matcher(toSupplementaries("abcdefabc"));
927 matcher.region(0,9*2);
928 if (!matcher.find())
929 failCount++;
930 if (!matcher.find())
931 failCount++;
932 matcher.region(0,3*2);
933 if (!matcher.find())
934 failCount++;
935 matcher.region(1,3*2);
936 if (matcher.find())
937 failCount++;
938 matcher.region(3*2,6*2);
939 if (matcher.find())
940 failCount++;
941 matcher.region(0,2*2);
942 if (matcher.find())
943 failCount++;
944 matcher.region(0,2*2+1);
945 if (matcher.find())
946 failCount++;
947
948 expectRegionFail(matcher, 1*2, -1);
949 expectRegionFail(matcher, -1, -1);
950 expectRegionFail(matcher, -1, 1*2);
951 expectRegionFail(matcher, 5*2, 3*2);
952 expectRegionFail(matcher, 5*2, 12*2);
953 expectRegionFail(matcher, 12*2, 12*2);
954
955 pattern = Pattern.compile(toSupplementaries("^abc$"));
956 matcher = pattern.matcher(toSupplementaries("zzzabczzz"));
957 matcher.region(0,9*2);
958 if (matcher.find())
959 failCount++;
960 matcher.region(3*2,6*2);
961 if (!matcher.find())
962 failCount++;
963 matcher.region(3*2+1,6*2);
964 if (matcher.find())
965 failCount++;
966 matcher.region(3*2,6*2-1);
967 if (matcher.find())
968 failCount++;
969 matcher.region(3*2,6*2);
970 matcher.useAnchoringBounds(false);
971 if (matcher.find())
972 failCount++;
973 report("Regions");
974 }
975
976 private static void expectRegionFail(Matcher matcher, int index1,
977 int index2)
978 {
979 try {
980 matcher.region(index1, index2);
981 failCount++;
982 } catch (IndexOutOfBoundsException ioobe) {
983 // Correct result
984 } catch (IllegalStateException ise) {
985 // Correct result
986 }
987 }
988
989 // This test is for 4803197
990 private static void escapedSegmentTest() throws Exception {
991
992 Pattern pattern = Pattern.compile("\\Qdir1\\dir2\\E");
993 check(pattern, "dir1\\dir2", true);
994
995 pattern = Pattern.compile("\\Qdir1\\dir2\\\\E");
996 check(pattern, "dir1\\dir2\\", true);
997
998 pattern = Pattern.compile("(\\Qdir1\\dir2\\\\E)");
999 check(pattern, "dir1\\dir2\\", true);
1000
1001 // Supplementary character test
1002 pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2\\E"));
1003 check(pattern, toSupplementaries("dir1\\dir2"), true);
1004
1005 pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2")+"\\\\E");
1006 check(pattern, toSupplementaries("dir1\\dir2\\"), true);
1007
1008 pattern = Pattern.compile(toSupplementaries("(\\Qdir1\\dir2")+"\\\\E)");
1009 check(pattern, toSupplementaries("dir1\\dir2\\"), true);
1010
1011 report("Escaped segment");
1012 }
1013
1014 // This test is for 4792284
1015 private static void nonCaptureRepetitionTest() throws Exception {
1016 String input = "abcdefgh;";
1017
1018 String[] patterns = new String[] {
1019 "(?:\\w{4})+;",
1020 "(?:\\w{8})*;",
1021 "(?:\\w{2}){2,4};",
1022 "(?:\\w{4}){2,};", // only matches the
1023 ".*?(?:\\w{5})+;", // specified minimum
1024 ".*?(?:\\w{9})*;", // number of reps - OK
1025 "(?:\\w{4})+?;", // lazy repetition - OK
1026 "(?:\\w{4})++;", // possessive repetition - OK
1027 "(?:\\w{2,}?)+;", // non-deterministic - OK
1028 "(\\w{4})+;", // capturing group - OK
1029 };
1030
1031 for (int i = 0; i < patterns.length; i++) {
1032 // Check find()
1033 check(patterns[i], 0, input, input, true);
1034 // Check matches()
1035 Pattern p = Pattern.compile(patterns[i]);
1036 Matcher m = p.matcher(input);
1037
1038 if (m.matches()) {
1039 if (!m.group(0).equals(input))
1040 failCount++;
1041 } else {
1042 failCount++;
1043 }
1044 }
1045
1046 report("Non capturing repetition");
1047 }
1048
1049 // This test is for 6358731
1050 private static void notCapturedGroupCurlyMatchTest() throws Exception {
1051 Pattern pattern = Pattern.compile("(abc)+|(abcd)+");
1052 Matcher matcher = pattern.matcher("abcd");
1053 if (!matcher.matches() ||
1054 matcher.group(1) != null ||
1055 !matcher.group(2).equals("abcd")) {
1056 failCount++;
1057 }
1058 report("Not captured GroupCurly");
1059 }
1060
1061 // This test is for 4706545
1062 private static void javaCharClassTest() throws Exception {
1063 for (int i=0; i<1000; i++) {
1064 char c = (char)generator.nextInt();
1065 check("{javaLowerCase}", c, Character.isLowerCase(c));
1066 check("{javaUpperCase}", c, Character.isUpperCase(c));
1067 check("{javaUpperCase}+", c, Character.isUpperCase(c));
1068 check("{javaTitleCase}", c, Character.isTitleCase(c));
1069 check("{javaDigit}", c, Character.isDigit(c));
1070 check("{javaDefined}", c, Character.isDefined(c));
1071 check("{javaLetter}", c, Character.isLetter(c));
1072 check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c));
1073 check("{javaJavaIdentifierStart}", c,
1074 Character.isJavaIdentifierStart(c));
1075 check("{javaJavaIdentifierPart}", c,
1076 Character.isJavaIdentifierPart(c));
1077 check("{javaUnicodeIdentifierStart}", c,
1078 Character.isUnicodeIdentifierStart(c));
1079 check("{javaUnicodeIdentifierPart}", c,
1080 Character.isUnicodeIdentifierPart(c));
1081 check("{javaIdentifierIgnorable}", c,
1082 Character.isIdentifierIgnorable(c));
1083 check("{javaSpaceChar}", c, Character.isSpaceChar(c));
1084 check("{javaWhitespace}", c, Character.isWhitespace(c));
1085 check("{javaISOControl}", c, Character.isISOControl(c));
1086 check("{javaMirrored}", c, Character.isMirrored(c));
1087
1088 }
1089
1090 // Supplementary character test
1091 for (int i=0; i<1000; i++) {
1092 int c = generator.nextInt(Character.MAX_CODE_POINT
1093 - Character.MIN_SUPPLEMENTARY_CODE_POINT)
1094 + Character.MIN_SUPPLEMENTARY_CODE_POINT;
1095 check("{javaLowerCase}", c, Character.isLowerCase(c));
1096 check("{javaUpperCase}", c, Character.isUpperCase(c));
1097 check("{javaUpperCase}+", c, Character.isUpperCase(c));
1098 check("{javaTitleCase}", c, Character.isTitleCase(c));
1099 check("{javaDigit}", c, Character.isDigit(c));
1100 check("{javaDefined}", c, Character.isDefined(c));
1101 check("{javaLetter}", c, Character.isLetter(c));
1102 check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c));
1103 check("{javaJavaIdentifierStart}", c,
1104 Character.isJavaIdentifierStart(c));
1105 check("{javaJavaIdentifierPart}", c,
1106 Character.isJavaIdentifierPart(c));
1107 check("{javaUnicodeIdentifierStart}", c,
1108 Character.isUnicodeIdentifierStart(c));
1109 check("{javaUnicodeIdentifierPart}", c,
1110 Character.isUnicodeIdentifierPart(c));
1111 check("{javaIdentifierIgnorable}", c,
1112 Character.isIdentifierIgnorable(c));
1113 check("{javaSpaceChar}", c, Character.isSpaceChar(c));
1114 check("{javaWhitespace}", c, Character.isWhitespace(c));
1115 check("{javaISOControl}", c, Character.isISOControl(c));
1116 check("{javaMirrored}", c, Character.isMirrored(c));
1117 }
1118
1119 report("Java character classes");
1120 }
1121
1122 // This test is for 4523620
1123 /*
1124 private static void numOccurrencesTest() throws Exception {
1125 Pattern pattern = Pattern.compile("aaa");
1126
1127 if (pattern.numOccurrences("aaaaaa", false) != 2)
1128 failCount++;
1129 if (pattern.numOccurrences("aaaaaa", true) != 4)
1130 failCount++;
1131
1132 pattern = Pattern.compile("^");
1133 if (pattern.numOccurrences("aaaaaa", false) != 1)
1134 failCount++;
1135 if (pattern.numOccurrences("aaaaaa", true) != 1)
1136 failCount++;
1137
1138 report("Number of Occurrences");
1139 }
1140 */
1141
1142 // This test is for 4776374
1143 private static void caretBetweenTerminatorsTest() throws Exception {
1144 int flags1 = Pattern.DOTALL;
1145 int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES;
1146 int flags3 = Pattern.DOTALL | Pattern.UNIX_LINES | Pattern.MULTILINE;
1147 int flags4 = Pattern.DOTALL | Pattern.MULTILINE;
1148
1149 check("^....", flags1, "test\ntest", "test", true);
1150 check(".....^", flags1, "test\ntest", "test", false);
1151 check(".....^", flags1, "test\n", "test", false);
1152 check("....^", flags1, "test\r\n", "test", false);
1153
1154 check("^....", flags2, "test\ntest", "test", true);
1155 check("....^", flags2, "test\ntest", "test", false);
1156 check(".....^", flags2, "test\n", "test", false);
1157 check("....^", flags2, "test\r\n", "test", false);
1158
1159 check("^....", flags3, "test\ntest", "test", true);
1160 check(".....^", flags3, "test\ntest", "test\n", true);
1161 check(".....^", flags3, "test\u0085test", "test\u0085", false);
1162 check(".....^", flags3, "test\n", "test", false);
1163 check(".....^", flags3, "test\r\n", "test", false);
1164 check("......^", flags3, "test\r\ntest", "test\r\n", true);
1165
1166 check("^....", flags4, "test\ntest", "test", true);
1167 check(".....^", flags3, "test\ntest", "test\n", true);
1168 check(".....^", flags4, "test\u0085test", "test\u0085", true);
1169 check(".....^", flags4, "test\n", "test\n", false);
1170 check(".....^", flags4, "test\r\n", "test\r", false);
1171
1172 // Supplementary character test
1173 String t = toSupplementaries("test");
1174 check("^....", flags1, t+"\n"+t, t, true);
1175 check(".....^", flags1, t+"\n"+t, t, false);
1176 check(".....^", flags1, t+"\n", t, false);
1177 check("....^", flags1, t+"\r\n", t, false);
1178
1179 check("^....", flags2, t+"\n"+t, t, true);
1180 check("....^", flags2, t+"\n"+t, t, false);
1181 check(".....^", flags2, t+"\n", t, false);
1182 check("....^", flags2, t+"\r\n", t, false);
1183
1184 check("^....", flags3, t+"\n"+t, t, true);
1185 check(".....^", flags3, t+"\n"+t, t+"\n", true);
1186 check(".....^", flags3, t+"\u0085"+t, t+"\u0085", false);
1187 check(".....^", flags3, t+"\n", t, false);
1188 check(".....^", flags3, t+"\r\n", t, false);
1189 check("......^", flags3, t+"\r\n"+t, t+"\r\n", true);
1190
1191 check("^....", flags4, t+"\n"+t, t, true);
1192 check(".....^", flags3, t+"\n"+t, t+"\n", true);
1193 check(".....^", flags4, t+"\u0085"+t, t+"\u0085", true);
1194 check(".....^", flags4, t+"\n", t+"\n", false);
1195 check(".....^", flags4, t+"\r\n", t+"\r", false);
1196
1197 report("Caret between terminators");
1198 }
1199
1200 // This test is for 4727935
1201 private static void dollarAtEndTest() throws Exception {
1202 int flags1 = Pattern.DOTALL;
1203 int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES;
1204 int flags3 = Pattern.DOTALL | Pattern.MULTILINE;
1205
1206 check("....$", flags1, "test\n", "test", true);
1207 check("....$", flags1, "test\r\n", "test", true);
1208 check(".....$", flags1, "test\n", "test\n", true);
1209 check(".....$", flags1, "test\u0085", "test\u0085", true);
1210 check("....$", flags1, "test\u0085", "test", true);
1211
1212 check("....$", flags2, "test\n", "test", true);
1213 check(".....$", flags2, "test\n", "test\n", true);
1214 check(".....$", flags2, "test\u0085", "test\u0085", true);
1215 check("....$", flags2, "test\u0085", "est\u0085", true);
1216
1217 check("....$.blah", flags3, "test\nblah", "test\nblah", true);
1218 check(".....$.blah", flags3, "test\n\nblah", "test\n\nblah", true);
1219 check("....$blah", flags3, "test\nblah", "!!!!", false);
1220 check(".....$blah", flags3, "test\nblah", "!!!!", false);
1221
1222 // Supplementary character test
1223 String t = toSupplementaries("test");
1224 String b = toSupplementaries("blah");
1225 check("....$", flags1, t+"\n", t, true);
1226 check("....$", flags1, t+"\r\n", t, true);
1227 check(".....$", flags1, t+"\n", t+"\n", true);
1228 check(".....$", flags1, t+"\u0085", t+"\u0085", true);
1229 check("....$", flags1, t+"\u0085", t, true);
1230
1231 check("....$", flags2, t+"\n", t, true);
1232 check(".....$", flags2, t+"\n", t+"\n", true);
1233 check(".....$", flags2, t+"\u0085", t+"\u0085", true);
1234 check("....$", flags2, t+"\u0085", toSupplementaries("est\u0085"), true);
1235
1236 check("....$."+b, flags3, t+"\n"+b, t+"\n"+b, true);
1237 check(".....$."+b, flags3, t+"\n\n"+b, t+"\n\n"+b, true);
1238 check("....$"+b, flags3, t+"\n"+b, "!!!!", false);
1239 check(".....$"+b, flags3, t+"\n"+b, "!!!!", false);
1240
1241 report("Dollar at End");
1242 }
1243
1244 // This test is for 4711773
1245 private static void multilineDollarTest() throws Exception {
1246 Pattern findCR = Pattern.compile("$", Pattern.MULTILINE);
1247 Matcher matcher = findCR.matcher("first bit\nsecond bit");
1248 matcher.find();
1249 if (matcher.start(0) != 9)
1250 failCount++;
1251 matcher.find();
1252 if (matcher.start(0) != 20)
1253 failCount++;
1254
1255 // Supplementary character test
1256 matcher = findCR.matcher(toSupplementaries("first bit\n second bit")); // double BMP chars
1257 matcher.find();
1258 if (matcher.start(0) != 9*2)
1259 failCount++;
1260 matcher.find();
1261 if (matcher.start(0) != 20*2)
1262 failCount++;
1263
1264 report("Multiline Dollar");
1265 }
1266
1267 private static void reluctantRepetitionTest() throws Exception {
1268 Pattern p = Pattern.compile("1(\\s\\S+?){1,3}?[\\s,]2");
1269 check(p, "1 word word word 2", true);
1270 check(p, "1 wor wo w 2", true);
1271 check(p, "1 word word 2", true);
1272 check(p, "1 word 2", true);
1273 check(p, "1 wo w w 2", true);
1274 check(p, "1 wo w 2", true);
1275 check(p, "1 wor w 2", true);
1276
1277 p = Pattern.compile("([a-z])+?c");
1278 Matcher m = p.matcher("ababcdefdec");
1279 check(m, "ababc");
1280
1281 // Supplementary character test
1282 p = Pattern.compile(toSupplementaries("([a-z])+?c"));
1283 m = p.matcher(toSupplementaries("ababcdefdec"));
1284 check(m, toSupplementaries("ababc"));
1285
1286 report("Reluctant Repetition");
1287 }
1288
1289 private static void serializeTest() throws Exception {
1290 String patternStr = "(b)";
1291 String matchStr = "b";
1292 Pattern pattern = Pattern.compile(patternStr);
1293 ByteArrayOutputStream baos = new ByteArrayOutputStream();
1294 ObjectOutputStream oos = new ObjectOutputStream(baos);
1295 oos.writeObject(pattern);
1296 oos.close();
1297 ObjectInputStream ois = new ObjectInputStream(
1298 new ByteArrayInputStream(baos.toByteArray()));
1299 Pattern serializedPattern = (Pattern)ois.readObject();
1300 ois.close();
1301 Matcher matcher = serializedPattern.matcher(matchStr);
1302 if (!matcher.matches())
1303 failCount++;
1304 if (matcher.groupCount() != 1)
1305 failCount++;
1306
1307 report("Serialization");
1308 }
1309
1310 private static void gTest() {
1311 Pattern pattern = Pattern.compile("\\G\\w");
1312 Matcher matcher = pattern.matcher("abc#x#x");
1313 matcher.find();
1314 matcher.find();
1315 matcher.find();
1316 if (matcher.find())
1317 failCount++;
1318
1319 pattern = Pattern.compile("\\GA*");
1320 matcher = pattern.matcher("1A2AA3");
1321 matcher.find();
1322 if (matcher.find())
1323 failCount++;
1324
1325 pattern = Pattern.compile("\\GA*");
1326 matcher = pattern.matcher("1A2AA3");
1327 if (!matcher.find(1))
1328 failCount++;
1329 matcher.find();
1330 if (matcher.find())
1331 failCount++;
1332
1333 report("\\G");
1334 }
1335
1336 private static void zTest() {
1337 Pattern pattern = Pattern.compile("foo\\Z");
1338 // Positives
1339 check(pattern, "foo\u0085", true);
1340 check(pattern, "foo\u2028", true);
1341 check(pattern, "foo\u2029", true);
1342 check(pattern, "foo\n", true);
1343 check(pattern, "foo\r", true);
1344 check(pattern, "foo\r\n", true);
1345 // Negatives
1346 check(pattern, "fooo", false);
1347 check(pattern, "foo\n\r", false);
1348
1349 pattern = Pattern.compile("foo\\Z", Pattern.UNIX_LINES);
1350 // Positives
1351 check(pattern, "foo", true);
1352 check(pattern, "foo\n", true);
1353 // Negatives
1354 check(pattern, "foo\r", false);
1355 check(pattern, "foo\u0085", false);
1356 check(pattern, "foo\u2028", false);
1357 check(pattern, "foo\u2029", false);
1358
1359 report("\\Z");
1360 }
1361
1362 private static void replaceFirstTest() {
1363 Pattern pattern = Pattern.compile("(ab)(c*)");
1364 Matcher matcher = pattern.matcher("abccczzzabcczzzabccc");
1365 if (!matcher.replaceFirst("test").equals("testzzzabcczzzabccc"))
1366 failCount++;
1367
1368 matcher.reset("zzzabccczzzabcczzzabccczzz");
1369 if (!matcher.replaceFirst("test").equals("zzztestzzzabcczzzabccczzz"))
1370 failCount++;
1371
1372 matcher.reset("zzzabccczzzabcczzzabccczzz");
1373 String result = matcher.replaceFirst("$1");
1374 if (!result.equals("zzzabzzzabcczzzabccczzz"))
1375 failCount++;
1376
1377 matcher.reset("zzzabccczzzabcczzzabccczzz");
1378 result = matcher.replaceFirst("$2");
1379 if (!result.equals("zzzccczzzabcczzzabccczzz"))
1380 failCount++;
1381
1382 pattern = Pattern.compile("a*");
1383 matcher = pattern.matcher("aaaaaaaaaa");
1384 if (!matcher.replaceFirst("test").equals("test"))
1385 failCount++;
1386
1387 pattern = Pattern.compile("a+");
1388 matcher = pattern.matcher("zzzaaaaaaaaaa");
1389 if (!matcher.replaceFirst("test").equals("zzztest"))
1390 failCount++;
1391
1392 // Supplementary character test
1393 pattern = Pattern.compile(toSupplementaries("(ab)(c*)"));
1394 matcher = pattern.matcher(toSupplementaries("abccczzzabcczzzabccc"));
1395 if (!matcher.replaceFirst(toSupplementaries("test"))
1396 .equals(toSupplementaries("testzzzabcczzzabccc")))
1397 failCount++;
1398
1399 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
1400 if (!matcher.replaceFirst(toSupplementaries("test")).
1401 equals(toSupplementaries("zzztestzzzabcczzzabccczzz")))
1402 failCount++;
1403
1404 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
1405 result = matcher.replaceFirst("$1");
1406 if (!result.equals(toSupplementaries("zzzabzzzabcczzzabccczzz")))
1407 failCount++;
1408
1409 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
1410 result = matcher.replaceFirst("$2");
1411 if (!result.equals(toSupplementaries("zzzccczzzabcczzzabccczzz")))
1412 failCount++;
1413
1414 pattern = Pattern.compile(toSupplementaries("a*"));
1415 matcher = pattern.matcher(toSupplementaries("aaaaaaaaaa"));
1416 if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("test")))
1417 failCount++;
1418
1419 pattern = Pattern.compile(toSupplementaries("a+"));
1420 matcher = pattern.matcher(toSupplementaries("zzzaaaaaaaaaa"));
1421 if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("zzztest")))
1422 failCount++;
1423
1424 report("Replace First");
1425 }
1426
1427 private static void unixLinesTest() {
1428 Pattern pattern = Pattern.compile(".*");
1429 Matcher matcher = pattern.matcher("aa\u2028blah");
1430 matcher.find();
1431 if (!matcher.group(0).equals("aa"))
1432 failCount++;
1433
1434 pattern = Pattern.compile(".*", Pattern.UNIX_LINES);
1435 matcher = pattern.matcher("aa\u2028blah");
1436 matcher.find();
1437 if (!matcher.group(0).equals("aa\u2028blah"))
1438 failCount++;
1439
1440 pattern = Pattern.compile("[az]$",
1441 Pattern.MULTILINE | Pattern.UNIX_LINES);
1442 matcher = pattern.matcher("aa\u2028zz");
1443 check(matcher, "a\u2028", false);
1444
1445 // Supplementary character test
1446 pattern = Pattern.compile(".*");
1447 matcher = pattern.matcher(toSupplementaries("aa\u2028blah"));
1448 matcher.find();
1449 if (!matcher.group(0).equals(toSupplementaries("aa")))
1450 failCount++;
1451
1452 pattern = Pattern.compile(".*", Pattern.UNIX_LINES);
1453 matcher = pattern.matcher(toSupplementaries("aa\u2028blah"));
1454 matcher.find();
1455 if (!matcher.group(0).equals(toSupplementaries("aa\u2028blah")))
1456 failCount++;
1457
1458 pattern = Pattern.compile(toSupplementaries("[az]$"),
1459 Pattern.MULTILINE | Pattern.UNIX_LINES);
1460 matcher = pattern.matcher(toSupplementaries("aa\u2028zz"));
1461 check(matcher, toSupplementaries("a\u2028"), false);
1462
1463 report("Unix Lines");
1464 }
1465
1466 private static void commentsTest() {
1467 int flags = Pattern.COMMENTS;
1468
1469 Pattern pattern = Pattern.compile("aa \\# aa", flags);
1470 Matcher matcher = pattern.matcher("aa#aa");
1471 if (!matcher.matches())
1472 failCount++;
1473
1474 pattern = Pattern.compile("aa # blah", flags);
1475 matcher = pattern.matcher("aa");
1476 if (!matcher.matches())
1477 failCount++;
1478
1479 pattern = Pattern.compile("aa blah", flags);
1480 matcher = pattern.matcher("aablah");
1481 if (!matcher.matches())
1482 failCount++;
1483
1484 pattern = Pattern.compile("aa # blah blech ", flags);
1485 matcher = pattern.matcher("aa");
1486 if (!matcher.matches())
1487 failCount++;
1488
1489 pattern = Pattern.compile("aa # blah\n ", flags);
1490 matcher = pattern.matcher("aa");
1491 if (!matcher.matches())
1492 failCount++;
1493
1494 pattern = Pattern.compile("aa # blah\nbc # blech", flags);
1495 matcher = pattern.matcher("aabc");
1496 if (!matcher.matches())
1497 failCount++;
1498
1499 pattern = Pattern.compile("aa # blah\nbc# blech", flags);
1500 matcher = pattern.matcher("aabc");
1501 if (!matcher.matches())
1502 failCount++;
1503
1504 pattern = Pattern.compile("aa # blah\nbc\\# blech", flags);
1505 matcher = pattern.matcher("aabc#blech");
1506 if (!matcher.matches())
1507 failCount++;
1508
1509 // Supplementary character test
1510 pattern = Pattern.compile(toSupplementaries("aa \\# aa"), flags);
1511 matcher = pattern.matcher(toSupplementaries("aa#aa"));
1512 if (!matcher.matches())
1513 failCount++;
1514
1515 pattern = Pattern.compile(toSupplementaries("aa # blah"), flags);
1516 matcher = pattern.matcher(toSupplementaries("aa"));
1517 if (!matcher.matches())
1518 failCount++;
1519
1520 pattern = Pattern.compile(toSupplementaries("aa blah"), flags);
1521 matcher = pattern.matcher(toSupplementaries("aablah"));
1522 if (!matcher.matches())
1523 failCount++;
1524
1525 pattern = Pattern.compile(toSupplementaries("aa # blah blech "), flags);
1526 matcher = pattern.matcher(toSupplementaries("aa"));
1527 if (!matcher.matches())
1528 failCount++;
1529
1530 pattern = Pattern.compile(toSupplementaries("aa # blah\n "), flags);
1531 matcher = pattern.matcher(toSupplementaries("aa"));
1532 if (!matcher.matches())
1533 failCount++;
1534
1535 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc # blech"), flags);
1536 matcher = pattern.matcher(toSupplementaries("aabc"));
1537 if (!matcher.matches())
1538 failCount++;
1539
1540 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc# blech"), flags);
1541 matcher = pattern.matcher(toSupplementaries("aabc"));
1542 if (!matcher.matches())
1543 failCount++;
1544
1545 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc\\# blech"), flags);
1546 matcher = pattern.matcher(toSupplementaries("aabc#blech"));
1547 if (!matcher.matches())
1548 failCount++;
1549
1550 report("Comments");
1551 }
1552
1553 private static void caseFoldingTest() { // bug 4504687
1554 int flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE;
1555 Pattern pattern = Pattern.compile("aa", flags);
1556 Matcher matcher = pattern.matcher("ab");
1557 if (matcher.matches())
1558 failCount++;
1559
1560 pattern = Pattern.compile("aA", flags);
1561 matcher = pattern.matcher("ab");
1562 if (matcher.matches())
1563 failCount++;
1564
1565 pattern = Pattern.compile("aa", flags);
1566 matcher = pattern.matcher("aB");
1567 if (matcher.matches())
1568 failCount++;
1569 matcher = pattern.matcher("Ab");
1570 if (matcher.matches())
1571 failCount++;
1572
1573 // ASCII "a"
1574 // Latin-1 Supplement "a" + grave
1575 // Cyrillic "a"
1576 String[] patterns = new String[] {
1577 //single
1578 "a", "\u00e0", "\u0430",
1579 //slice
1580 "ab", "\u00e0\u00e1", "\u0430\u0431",
1581 //class single
1582 "[a]", "[\u00e0]", "[\u0430]",
1583 //class range
1584 "[a-b]", "[\u00e0-\u00e5]", "[\u0430-\u0431]",
1585 //back reference
1586 "(a)\\1", "(\u00e0)\\1", "(\u0430)\\1"
1587 };
1588
1589 String[] texts = new String[] {
1590 "A", "\u00c0", "\u0410",
1591 "AB", "\u00c0\u00c1", "\u0410\u0411",
1592 "A", "\u00c0", "\u0410",
1593 "B", "\u00c2", "\u0411",
1594 "aA", "\u00e0\u00c0", "\u0430\u0410"
1595 };
1596
1597 boolean[] expected = new boolean[] {
1598 true, false, false,
1599 true, false, false,
1600 true, false, false,
1601 true, false, false,
1602 true, false, false
1603 };
1604
1605 flags = Pattern.CASE_INSENSITIVE;
1606 for (int i = 0; i < patterns.length; i++) {
1607 pattern = Pattern.compile(patterns[i], flags);
1608 matcher = pattern.matcher(texts[i]);
1609 if (matcher.matches() != expected[i]) {
1610 System.out.println("<1> Failed at " + i);
1611 failCount++;
1612 }
1613 }
1614
1615 flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE;
1616 for (int i = 0; i < patterns.length; i++) {
1617 pattern = Pattern.compile(patterns[i], flags);
1618 matcher = pattern.matcher(texts[i]);
1619 if (!matcher.matches()) {
1620 System.out.println("<2> Failed at " + i);
1621 failCount++;
1622 }
1623 }
1624 // flag unicode_case alone should do nothing
1625 flags = Pattern.UNICODE_CASE;
1626 for (int i = 0; i < patterns.length; i++) {
1627 pattern = Pattern.compile(patterns[i], flags);
1628 matcher = pattern.matcher(texts[i]);
1629 if (matcher.matches()) {
1630 System.out.println("<3> Failed at " + i);
1631 failCount++;
1632 }
1633 }
1634
1635 // Special cases: i, I, u+0131 and u+0130
1636 flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE;
1637 pattern = Pattern.compile("[h-j]+", flags);
1638 if (!pattern.matcher("\u0131\u0130").matches())
1639 failCount++;
1640 report("Case Folding");
1641 }
1642
1643 private static void appendTest() {
1644 Pattern pattern = Pattern.compile("(ab)(cd)");
1645 Matcher matcher = pattern.matcher("abcd");
1646 String result = matcher.replaceAll("$2$1");
1647 if (!result.equals("cdab"))
1648 failCount++;
1649
1650 String s1 = "Swap all: first = 123, second = 456";
1651 String s2 = "Swap one: first = 123, second = 456";
1652 String r = "$3$2$1";
1653 pattern = Pattern.compile("([a-z]+)( *= *)([0-9]+)");
1654 matcher = pattern.matcher(s1);
1655
1656 result = matcher.replaceAll(r);
1657 if (!result.equals("Swap all: 123 = first, 456 = second"))
1658 failCount++;
1659
1660 matcher = pattern.matcher(s2);
1661
1662 if (matcher.find()) {
1663 StringBuffer sb = new StringBuffer();
1664 matcher.appendReplacement(sb, r);
1665 matcher.appendTail(sb);
1666 result = sb.toString();
1667 if (!result.equals("Swap one: 123 = first, second = 456"))
1668 failCount++;
1669 }
1670
1671 // Supplementary character test
1672 pattern = Pattern.compile(toSupplementaries("(ab)(cd)"));
1673 matcher = pattern.matcher(toSupplementaries("abcd"));
1674 result = matcher.replaceAll("$2$1");
1675 if (!result.equals(toSupplementaries("cdab")))
1676 failCount++;
1677
1678 s1 = toSupplementaries("Swap all: first = 123, second = 456");
1679 s2 = toSupplementaries("Swap one: first = 123, second = 456");
1680 r = toSupplementaries("$3$2$1");
1681 pattern = Pattern.compile(toSupplementaries("([a-z]+)( *= *)([0-9]+)"));
1682 matcher = pattern.matcher(s1);
1683
1684 result = matcher.replaceAll(r);
1685 if (!result.equals(toSupplementaries("Swap all: 123 = first, 456 = second")))
1686 failCount++;
1687
1688 matcher = pattern.matcher(s2);
1689
1690 if (matcher.find()) {
1691 StringBuffer sb = new StringBuffer();
1692 matcher.appendReplacement(sb, r);
1693 matcher.appendTail(sb);
1694 result = sb.toString();
1695 if (!result.equals(toSupplementaries("Swap one: 123 = first, second = 456")))
1696 failCount++;
1697 }
1698 report("Append");
1699 }
1700
1701 private static void splitTest() {
1702 Pattern pattern = Pattern.compile(":");
1703 String[] result = pattern.split("foo:and:boo", 2);
1704 if (!result[0].equals("foo"))
1705 failCount++;
1706 if (!result[1].equals("and:boo"))
1707 failCount++;
1708 // Supplementary character test
1709 Pattern patternX = Pattern.compile(toSupplementaries("X"));
1710 result = patternX.split(toSupplementaries("fooXandXboo"), 2);
1711 if (!result[0].equals(toSupplementaries("foo")))
1712 failCount++;
1713 if (!result[1].equals(toSupplementaries("andXboo")))
1714 failCount++;
1715
1716 CharBuffer cb = CharBuffer.allocate(100);
1717 cb.put("foo:and:boo");
1718 cb.flip();
1719 result = pattern.split(cb);
1720 if (!result[0].equals("foo"))
1721 failCount++;
1722 if (!result[1].equals("and"))
1723 failCount++;
1724 if (!result[2].equals("boo"))
1725 failCount++;
1726
1727 // Supplementary character test
1728 CharBuffer cbs = CharBuffer.allocate(100);
1729 cbs.put(toSupplementaries("fooXandXboo"));
1730 cbs.flip();
1731 result = patternX.split(cbs);
1732 if (!result[0].equals(toSupplementaries("foo")))
1733 failCount++;
1734 if (!result[1].equals(toSupplementaries("and")))
1735 failCount++;
1736 if (!result[2].equals(toSupplementaries("boo")))
1737 failCount++;
1738
1739 String source = "0123456789";
1740 for (int limit=-2; limit<3; limit++) {
1741 for (int x=0; x<10; x++) {
1742 result = source.split(Integer.toString(x), limit);
1743 int expectedLength = limit < 1 ? 2 : limit;
1744
1745 if ((limit == 0) && (x == 9)) {
1746 // expected dropping of ""
1747 if (result.length != 1)
1748 failCount++;
1749 if (!result[0].equals("012345678")) {
1750 failCount++;
1751 }
1752 } else {
1753 if (result.length != expectedLength) {
1754 failCount++;
1755 }
1756 if (!result[0].equals(source.substring(0,x))) {
1757 if (limit != 1) {
1758 failCount++;
1759 } else {
1760 if (!result[0].equals(source.substring(0,10))) {
1761 failCount++;
1762 }
1763 }
1764 }
1765 if (expectedLength > 1) { // Check segment 2
1766 if (!result[1].equals(source.substring(x+1,10)))
1767 failCount++;
1768 }
1769 }
1770 }
1771 }
1772 // Check the case for no match found
1773 for (int limit=-2; limit<3; limit++) {
1774 result = source.split("e", limit);
1775 if (result.length != 1)
1776 failCount++;
1777 if (!result[0].equals(source))
1778 failCount++;
1779 }
1780 // Check the case for limit == 0, source = "";
sherman1242a6d2013-11-13 11:26:01 -08001781 // split() now returns 0-length for empty source "" see #6559590
sherman0b4d42d2009-02-23 21:06:15 -08001782 source = "";
1783 result = source.split("e", 0);
sherman1242a6d2013-11-13 11:26:01 -08001784 if (result.length != 0)
sherman0b4d42d2009-02-23 21:06:15 -08001785 failCount++;
1786
sherman1242a6d2013-11-13 11:26:01 -08001787 // Check both split() and splitAsStraem(), especially for zero-lenth
1788 // input and zero-lenth match cases
1789 String[][] input = new String[][] {
1790 { " ", "Abc Efg Hij" }, // normal non-zero-match
1791 { " ", " Abc Efg Hij" }, // leading empty str for non-zero-match
1792 { " ", "Abc Efg Hij" }, // non-zero-match in the middle
1793 { "(?=\\p{Lu})", "AbcEfgHij" }, // no leading empty str for zero-match
1794 { "(?=\\p{Lu})", "AbcEfg" },
1795 { "(?=\\p{Lu})", "Abc" },
1796 { " ", "" }, // zero-length input
1797 { ".*", "" },
1798
1799 // some tests from PatternStreamTest.java
1800 { "4", "awgqwefg1fefw4vssv1vvv1" },
1801 { "\u00a3a", "afbfq\u00a3abgwgb\u00a3awngnwggw\u00a3a\u00a3ahjrnhneerh" },
1802 { "1", "awgqwefg1fefw4vssv1vvv1" },
1803 { "1", "a\u4ebafg1fefw\u4eba4\u9f9cvssv\u9f9c1v\u672c\u672cvv" },
1804 { "\u56da", "1\u56da23\u56da456\u56da7890" },
1805 { "\u56da", "1\u56da23\u9f9c\u672c\u672c\u56da456\u56da\u9f9c\u672c7890" },
1806 { "\u56da", "" },
1807 { "[ \t,:.]","This is,testing: with\tdifferent separators." }, //multiple septs
1808 { "o", "boo:and:foo" },
1809 { "o", "booooo:and:fooooo" },
1810 { "o", "fooooo:" },
1811 };
1812
1813 String[][] expected = new String[][] {
1814 { "Abc", "Efg", "Hij" },
1815 { "", "Abc", "Efg", "Hij" },
1816 { "Abc", "", "Efg", "Hij" },
1817 { "Abc", "Efg", "Hij" },
1818 { "Abc", "Efg" },
1819 { "Abc" },
1820 {},
1821 {},
1822
1823 { "awgqwefg1fefw", "vssv1vvv1" },
1824 { "afbfq", "bgwgb", "wngnwggw", "", "hjrnhneerh" },
1825 { "awgqwefg", "fefw4vssv", "vvv" },
1826 { "a\u4ebafg", "fefw\u4eba4\u9f9cvssv\u9f9c", "v\u672c\u672cvv" },
1827 { "1", "23", "456", "7890" },
1828 { "1", "23\u9f9c\u672c\u672c", "456", "\u9f9c\u672c7890" },
1829 {},
1830 { "This", "is", "testing", "", "with", "different", "separators" },
1831 { "b", "", ":and:f" },
1832 { "b", "", "", "", "", ":and:f" },
1833 { "f", "", "", "", "", ":" },
1834 };
1835 for (int i = 0; i < input.length; i++) {
1836 pattern = Pattern.compile(input[i][0]);
1837 if (!Arrays.equals(pattern.split(input[i][1]), expected[i]))
1838 failCount++;
1839 if (!Arrays.equals(pattern.splitAsStream(input[i][1]).toArray(),
1840 expected[i]))
1841 failCount++;
1842 }
sherman0b4d42d2009-02-23 21:06:15 -08001843 report("Split");
1844 }
1845
1846 private static void negationTest() {
1847 Pattern pattern = Pattern.compile("[\\[@^]+");
1848 Matcher matcher = pattern.matcher("@@@@[[[[^^^^");
1849 if (!matcher.find())
1850 failCount++;
1851 if (!matcher.group(0).equals("@@@@[[[[^^^^"))
1852 failCount++;
1853 pattern = Pattern.compile("[@\\[^]+");
1854 matcher = pattern.matcher("@@@@[[[[^^^^");
1855 if (!matcher.find())
1856 failCount++;
1857 if (!matcher.group(0).equals("@@@@[[[[^^^^"))
1858 failCount++;
1859 pattern = Pattern.compile("[@\\[^@]+");
1860 matcher = pattern.matcher("@@@@[[[[^^^^");
1861 if (!matcher.find())
1862 failCount++;
1863 if (!matcher.group(0).equals("@@@@[[[[^^^^"))
1864 failCount++;
1865
1866 pattern = Pattern.compile("\\)");
1867 matcher = pattern.matcher("xxx)xxx");
1868 if (!matcher.find())
1869 failCount++;
1870
1871 report("Negation");
1872 }
1873
1874 private static void ampersandTest() {
1875 Pattern pattern = Pattern.compile("[&@]+");
1876 check(pattern, "@@@@&&&&", true);
1877
1878 pattern = Pattern.compile("[@&]+");
1879 check(pattern, "@@@@&&&&", true);
1880
1881 pattern = Pattern.compile("[@\\&]+");
1882 check(pattern, "@@@@&&&&", true);
1883
1884 report("Ampersand");
1885 }
1886
1887 private static void octalTest() throws Exception {
1888 Pattern pattern = Pattern.compile("\\u0007");
1889 Matcher matcher = pattern.matcher("\u0007");
1890 if (!matcher.matches())
1891 failCount++;
1892 pattern = Pattern.compile("\\07");
1893 matcher = pattern.matcher("\u0007");
1894 if (!matcher.matches())
1895 failCount++;
1896 pattern = Pattern.compile("\\007");
1897 matcher = pattern.matcher("\u0007");
1898 if (!matcher.matches())
1899 failCount++;
1900 pattern = Pattern.compile("\\0007");
1901 matcher = pattern.matcher("\u0007");
1902 if (!matcher.matches())
1903 failCount++;
1904 pattern = Pattern.compile("\\040");
1905 matcher = pattern.matcher("\u0020");
1906 if (!matcher.matches())
1907 failCount++;
1908 pattern = Pattern.compile("\\0403");
1909 matcher = pattern.matcher("\u00203");
1910 if (!matcher.matches())
1911 failCount++;
1912 pattern = Pattern.compile("\\0103");
1913 matcher = pattern.matcher("\u0043");
1914 if (!matcher.matches())
1915 failCount++;
1916
1917 report("Octal");
1918 }
1919
1920 private static void longPatternTest() throws Exception {
1921 try {
1922 Pattern pattern = Pattern.compile(
1923 "a 32-character-long pattern xxxx");
1924 pattern = Pattern.compile("a 33-character-long pattern xxxxx");
1925 pattern = Pattern.compile("a thirty four character long regex");
1926 StringBuffer patternToBe = new StringBuffer(101);
1927 for (int i=0; i<100; i++)
1928 patternToBe.append((char)(97 + i%26));
1929 pattern = Pattern.compile(patternToBe.toString());
1930 } catch (PatternSyntaxException e) {
1931 failCount++;
1932 }
1933
1934 // Supplementary character test
1935 try {
1936 Pattern pattern = Pattern.compile(
1937 toSupplementaries("a 32-character-long pattern xxxx"));
1938 pattern = Pattern.compile(toSupplementaries("a 33-character-long pattern xxxxx"));
1939 pattern = Pattern.compile(toSupplementaries("a thirty four character long regex"));
1940 StringBuffer patternToBe = new StringBuffer(101*2);
1941 for (int i=0; i<100; i++)
1942 patternToBe.append(Character.toChars(Character.MIN_SUPPLEMENTARY_CODE_POINT
1943 + 97 + i%26));
1944 pattern = Pattern.compile(patternToBe.toString());
1945 } catch (PatternSyntaxException e) {
1946 failCount++;
1947 }
1948 report("LongPattern");
1949 }
1950
1951 private static void group0Test() throws Exception {
1952 Pattern pattern = Pattern.compile("(tes)ting");
1953 Matcher matcher = pattern.matcher("testing");
1954 check(matcher, "testing");
1955
1956 matcher.reset("testing");
1957 if (matcher.lookingAt()) {
1958 if (!matcher.group(0).equals("testing"))
1959 failCount++;
1960 } else {
1961 failCount++;
1962 }
1963
1964 matcher.reset("testing");
1965 if (matcher.matches()) {
1966 if (!matcher.group(0).equals("testing"))
1967 failCount++;
1968 } else {
1969 failCount++;
1970 }
1971
1972 pattern = Pattern.compile("(tes)ting");
1973 matcher = pattern.matcher("testing");
1974 if (matcher.lookingAt()) {
1975 if (!matcher.group(0).equals("testing"))
1976 failCount++;
1977 } else {
1978 failCount++;
1979 }
1980
1981 pattern = Pattern.compile("^(tes)ting");
1982 matcher = pattern.matcher("testing");
1983 if (matcher.matches()) {
1984 if (!matcher.group(0).equals("testing"))
1985 failCount++;
1986 } else {
1987 failCount++;
1988 }
1989
1990 // Supplementary character test
1991 pattern = Pattern.compile(toSupplementaries("(tes)ting"));
1992 matcher = pattern.matcher(toSupplementaries("testing"));
1993 check(matcher, toSupplementaries("testing"));
1994
1995 matcher.reset(toSupplementaries("testing"));
1996 if (matcher.lookingAt()) {
1997 if (!matcher.group(0).equals(toSupplementaries("testing")))
1998 failCount++;
1999 } else {
2000 failCount++;
2001 }
2002
2003 matcher.reset(toSupplementaries("testing"));
2004 if (matcher.matches()) {
2005 if (!matcher.group(0).equals(toSupplementaries("testing")))
2006 failCount++;
2007 } else {
2008 failCount++;
2009 }
2010
2011 pattern = Pattern.compile(toSupplementaries("(tes)ting"));
2012 matcher = pattern.matcher(toSupplementaries("testing"));
2013 if (matcher.lookingAt()) {
2014 if (!matcher.group(0).equals(toSupplementaries("testing")))
2015 failCount++;
2016 } else {
2017 failCount++;
2018 }
2019
2020 pattern = Pattern.compile(toSupplementaries("^(tes)ting"));
2021 matcher = pattern.matcher(toSupplementaries("testing"));
2022 if (matcher.matches()) {
2023 if (!matcher.group(0).equals(toSupplementaries("testing")))
2024 failCount++;
2025 } else {
2026 failCount++;
2027 }
2028
2029 report("Group0");
2030 }
2031
2032 private static void findIntTest() throws Exception {
2033 Pattern p = Pattern.compile("blah");
2034 Matcher m = p.matcher("zzzzblahzzzzzblah");
2035 boolean result = m.find(2);
2036 if (!result)
2037 failCount++;
2038
2039 p = Pattern.compile("$");
2040 m = p.matcher("1234567890");
2041 result = m.find(10);
2042 if (!result)
2043 failCount++;
2044 try {
2045 result = m.find(11);
2046 failCount++;
2047 } catch (IndexOutOfBoundsException e) {
2048 // correct result
2049 }
2050
2051 // Supplementary character test
2052 p = Pattern.compile(toSupplementaries("blah"));
2053 m = p.matcher(toSupplementaries("zzzzblahzzzzzblah"));
2054 result = m.find(2);
2055 if (!result)
2056 failCount++;
2057
2058 report("FindInt");
2059 }
2060
2061 private static void emptyPatternTest() throws Exception {
2062 Pattern p = Pattern.compile("");
2063 Matcher m = p.matcher("foo");
2064
2065 // Should find empty pattern at beginning of input
2066 boolean result = m.find();
2067 if (result != true)
2068 failCount++;
2069 if (m.start() != 0)
2070 failCount++;
2071
2072 // Should not match entire input if input is not empty
2073 m.reset();
2074 result = m.matches();
2075 if (result == true)
2076 failCount++;
2077
2078 try {
2079 m.start(0);
2080 failCount++;
2081 } catch (IllegalStateException e) {
2082 // Correct result
2083 }
2084
2085 // Should match entire input if input is empty
2086 m.reset("");
2087 result = m.matches();
2088 if (result != true)
2089 failCount++;
2090
2091 result = Pattern.matches("", "");
2092 if (result != true)
2093 failCount++;
2094
2095 result = Pattern.matches("", "foo");
2096 if (result == true)
2097 failCount++;
2098 report("EmptyPattern");
2099 }
2100
2101 private static void charClassTest() throws Exception {
2102 Pattern pattern = Pattern.compile("blah[ab]]blech");
2103 check(pattern, "blahb]blech", true);
2104
2105 pattern = Pattern.compile("[abc[def]]");
2106 check(pattern, "b", true);
2107
2108 // Supplementary character tests
2109 pattern = Pattern.compile(toSupplementaries("blah[ab]]blech"));
2110 check(pattern, toSupplementaries("blahb]blech"), true);
2111
2112 pattern = Pattern.compile(toSupplementaries("[abc[def]]"));
2113 check(pattern, toSupplementaries("b"), true);
2114
2115 try {
2116 // u00ff when UNICODE_CASE
2117 pattern = Pattern.compile("[ab\u00ffcd]",
2118 Pattern.CASE_INSENSITIVE|
2119 Pattern.UNICODE_CASE);
2120 check(pattern, "ab\u00ffcd", true);
2121 check(pattern, "Ab\u0178Cd", true);
2122
2123 // u00b5 when UNICODE_CASE
2124 pattern = Pattern.compile("[ab\u00b5cd]",
2125 Pattern.CASE_INSENSITIVE|
2126 Pattern.UNICODE_CASE);
2127 check(pattern, "ab\u00b5cd", true);
2128 check(pattern, "Ab\u039cCd", true);
2129 } catch (Exception e) { failCount++; }
2130
2131 /* Special cases
2132 (1)LatinSmallLetterLongS u+017f
2133 (2)LatinSmallLetterDotlessI u+0131
2134 (3)LatineCapitalLetterIWithDotAbove u+0130
2135 (4)KelvinSign u+212a
2136 (5)AngstromSign u+212b
2137 */
2138 int flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE;
2139 pattern = Pattern.compile("[sik\u00c5]+", flags);
2140 if (!pattern.matcher("\u017f\u0130\u0131\u212a\u212b").matches())
2141 failCount++;
2142
2143 report("CharClass");
2144 }
2145
2146 private static void caretTest() throws Exception {
2147 Pattern pattern = Pattern.compile("\\w*");
2148 Matcher matcher = pattern.matcher("a#bc#def##g");
2149 check(matcher, "a");
2150 check(matcher, "");
2151 check(matcher, "bc");
2152 check(matcher, "");
2153 check(matcher, "def");
2154 check(matcher, "");
2155 check(matcher, "");
2156 check(matcher, "g");
2157 check(matcher, "");
2158 if (matcher.find())
2159 failCount++;
2160
2161 pattern = Pattern.compile("^\\w*");
2162 matcher = pattern.matcher("a#bc#def##g");
2163 check(matcher, "a");
2164 if (matcher.find())
2165 failCount++;
2166
2167 pattern = Pattern.compile("\\w");
2168 matcher = pattern.matcher("abc##x");
2169 check(matcher, "a");
2170 check(matcher, "b");
2171 check(matcher, "c");
2172 check(matcher, "x");
2173 if (matcher.find())
2174 failCount++;
2175
2176 pattern = Pattern.compile("^\\w");
2177 matcher = pattern.matcher("abc##x");
2178 check(matcher, "a");
2179 if (matcher.find())
2180 failCount++;
2181
2182 pattern = Pattern.compile("\\A\\p{Alpha}{3}");
2183 matcher = pattern.matcher("abcdef-ghi\njklmno");
2184 check(matcher, "abc");
2185 if (matcher.find())
2186 failCount++;
2187
2188 pattern = Pattern.compile("^\\p{Alpha}{3}", Pattern.MULTILINE);
2189 matcher = pattern.matcher("abcdef-ghi\njklmno");
2190 check(matcher, "abc");
2191 check(matcher, "jkl");
2192 if (matcher.find())
2193 failCount++;
2194
2195 pattern = Pattern.compile("^", Pattern.MULTILINE);
2196 matcher = pattern.matcher("this is some text");
2197 String result = matcher.replaceAll("X");
2198 if (!result.equals("Xthis is some text"))
2199 failCount++;
2200
2201 pattern = Pattern.compile("^");
2202 matcher = pattern.matcher("this is some text");
2203 result = matcher.replaceAll("X");
2204 if (!result.equals("Xthis is some text"))
2205 failCount++;
2206
2207 pattern = Pattern.compile("^", Pattern.MULTILINE | Pattern.UNIX_LINES);
2208 matcher = pattern.matcher("this is some text\n");
2209 result = matcher.replaceAll("X");
2210 if (!result.equals("Xthis is some text\n"))
2211 failCount++;
2212
2213 report("Caret");
2214 }
2215
2216 private static void groupCaptureTest() throws Exception {
2217 // Independent group
2218 Pattern pattern = Pattern.compile("x+(?>y+)z+");
2219 Matcher matcher = pattern.matcher("xxxyyyzzz");
2220 matcher.find();
2221 try {
2222 String blah = matcher.group(1);
2223 failCount++;
2224 } catch (IndexOutOfBoundsException ioobe) {
2225 // Good result
2226 }
2227 // Pure group
2228 pattern = Pattern.compile("x+(?:y+)z+");
2229 matcher = pattern.matcher("xxxyyyzzz");
2230 matcher.find();
2231 try {
2232 String blah = matcher.group(1);
2233 failCount++;
2234 } catch (IndexOutOfBoundsException ioobe) {
2235 // Good result
2236 }
2237
2238 // Supplementary character tests
2239 // Independent group
2240 pattern = Pattern.compile(toSupplementaries("x+(?>y+)z+"));
2241 matcher = pattern.matcher(toSupplementaries("xxxyyyzzz"));
2242 matcher.find();
2243 try {
2244 String blah = matcher.group(1);
2245 failCount++;
2246 } catch (IndexOutOfBoundsException ioobe) {
2247 // Good result
2248 }
2249 // Pure group
2250 pattern = Pattern.compile(toSupplementaries("x+(?:y+)z+"));
2251 matcher = pattern.matcher(toSupplementaries("xxxyyyzzz"));
2252 matcher.find();
2253 try {
2254 String blah = matcher.group(1);
2255 failCount++;
2256 } catch (IndexOutOfBoundsException ioobe) {
2257 // Good result
2258 }
2259
2260 report("GroupCapture");
2261 }
2262
2263 private static void backRefTest() throws Exception {
2264 Pattern pattern = Pattern.compile("(a*)bc\\1");
2265 check(pattern, "zzzaabcazzz", true);
2266
2267 pattern = Pattern.compile("(a*)bc\\1");
2268 check(pattern, "zzzaabcaazzz", true);
2269
2270 pattern = Pattern.compile("(abc)(def)\\1");
2271 check(pattern, "abcdefabc", true);
2272
2273 pattern = Pattern.compile("(abc)(def)\\3");
2274 check(pattern, "abcdefabc", false);
2275
2276 try {
2277 for (int i = 1; i < 10; i++) {
2278 // Make sure backref 1-9 are always accepted
2279 pattern = Pattern.compile("abcdef\\" + i);
2280 // and fail to match if the target group does not exit
2281 check(pattern, "abcdef", false);
2282 }
2283 } catch(PatternSyntaxException e) {
2284 failCount++;
2285 }
2286
2287 pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11");
2288 check(pattern, "abcdefghija", false);
2289 check(pattern, "abcdefghija1", true);
2290
2291 pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11");
2292 check(pattern, "abcdefghijkk", true);
2293
2294 pattern = Pattern.compile("(a)bcdefghij\\11");
2295 check(pattern, "abcdefghija1", true);
2296
2297 // Supplementary character tests
2298 pattern = Pattern.compile(toSupplementaries("(a*)bc\\1"));
2299 check(pattern, toSupplementaries("zzzaabcazzz"), true);
2300
2301 pattern = Pattern.compile(toSupplementaries("(a*)bc\\1"));
2302 check(pattern, toSupplementaries("zzzaabcaazzz"), true);
2303
2304 pattern = Pattern.compile(toSupplementaries("(abc)(def)\\1"));
2305 check(pattern, toSupplementaries("abcdefabc"), true);
2306
2307 pattern = Pattern.compile(toSupplementaries("(abc)(def)\\3"));
2308 check(pattern, toSupplementaries("abcdefabc"), false);
2309
2310 pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11"));
2311 check(pattern, toSupplementaries("abcdefghija"), false);
2312 check(pattern, toSupplementaries("abcdefghija1"), true);
2313
2314 pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11"));
2315 check(pattern, toSupplementaries("abcdefghijkk"), true);
2316
2317 report("BackRef");
2318 }
2319
2320 /**
2321 * Unicode Technical Report #18, section 2.6 End of Line
2322 * There is no empty line to be matched in the sequence \u000D\u000A
2323 * but there is an empty line in the sequence \u000A\u000D.
2324 */
2325 private static void anchorTest() throws Exception {
2326 Pattern p = Pattern.compile("^.*$", Pattern.MULTILINE);
2327 Matcher m = p.matcher("blah1\r\nblah2");
2328 m.find();
2329 m.find();
2330 if (!m.group().equals("blah2"))
2331 failCount++;
2332
2333 m.reset("blah1\n\rblah2");
2334 m.find();
2335 m.find();
2336 m.find();
2337 if (!m.group().equals("blah2"))
2338 failCount++;
2339
2340 // Test behavior of $ with \r\n at end of input
2341 p = Pattern.compile(".+$");
2342 m = p.matcher("blah1\r\n");
2343 if (!m.find())
2344 failCount++;
2345 if (!m.group().equals("blah1"))
2346 failCount++;
2347 if (m.find())
2348 failCount++;
2349
2350 // Test behavior of $ with \r\n at end of input in multiline
2351 p = Pattern.compile(".+$", Pattern.MULTILINE);
2352 m = p.matcher("blah1\r\n");
2353 if (!m.find())
2354 failCount++;
2355 if (m.find())
2356 failCount++;
2357
2358 // Test for $ recognition of \u0085 for bug 4527731
2359 p = Pattern.compile(".+$", Pattern.MULTILINE);
2360 m = p.matcher("blah1\u0085");
2361 if (!m.find())
2362 failCount++;
2363
2364 // Supplementary character test
2365 p = Pattern.compile("^.*$", Pattern.MULTILINE);
2366 m = p.matcher(toSupplementaries("blah1\r\nblah2"));
2367 m.find();
2368 m.find();
2369 if (!m.group().equals(toSupplementaries("blah2")))
2370 failCount++;
2371
2372 m.reset(toSupplementaries("blah1\n\rblah2"));
2373 m.find();
2374 m.find();
2375 m.find();
2376 if (!m.group().equals(toSupplementaries("blah2")))
2377 failCount++;
2378
2379 // Test behavior of $ with \r\n at end of input
2380 p = Pattern.compile(".+$");
2381 m = p.matcher(toSupplementaries("blah1\r\n"));
2382 if (!m.find())
2383 failCount++;
2384 if (!m.group().equals(toSupplementaries("blah1")))
2385 failCount++;
2386 if (m.find())
2387 failCount++;
2388
2389 // Test behavior of $ with \r\n at end of input in multiline
2390 p = Pattern.compile(".+$", Pattern.MULTILINE);
2391 m = p.matcher(toSupplementaries("blah1\r\n"));
2392 if (!m.find())
2393 failCount++;
2394 if (m.find())
2395 failCount++;
2396
2397 // Test for $ recognition of \u0085 for bug 4527731
2398 p = Pattern.compile(".+$", Pattern.MULTILINE);
2399 m = p.matcher(toSupplementaries("blah1\u0085"));
2400 if (!m.find())
2401 failCount++;
2402
2403 report("Anchors");
2404 }
2405
2406 /**
2407 * A basic sanity test of Matcher.lookingAt().
2408 */
2409 private static void lookingAtTest() throws Exception {
2410 Pattern p = Pattern.compile("(ab)(c*)");
2411 Matcher m = p.matcher("abccczzzabcczzzabccc");
2412
2413 if (!m.lookingAt())
2414 failCount++;
2415
2416 if (!m.group().equals(m.group(0)))
2417 failCount++;
2418
2419 m = p.matcher("zzzabccczzzabcczzzabccczzz");
2420 if (m.lookingAt())
2421 failCount++;
2422
2423 // Supplementary character test
2424 p = Pattern.compile(toSupplementaries("(ab)(c*)"));
2425 m = p.matcher(toSupplementaries("abccczzzabcczzzabccc"));
2426
2427 if (!m.lookingAt())
2428 failCount++;
2429
2430 if (!m.group().equals(m.group(0)))
2431 failCount++;
2432
2433 m = p.matcher(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
2434 if (m.lookingAt())
2435 failCount++;
2436
2437 report("Looking At");
2438 }
2439
2440 /**
2441 * A basic sanity test of Matcher.matches().
2442 */
2443 private static void matchesTest() throws Exception {
2444 // matches()
2445 Pattern p = Pattern.compile("ulb(c*)");
2446 Matcher m = p.matcher("ulbcccccc");
2447 if (!m.matches())
2448 failCount++;
2449
2450 // find() but not matches()
2451 m.reset("zzzulbcccccc");
2452 if (m.matches())
2453 failCount++;
2454
2455 // lookingAt() but not matches()
2456 m.reset("ulbccccccdef");
2457 if (m.matches())
2458 failCount++;
2459
2460 // matches()
2461 p = Pattern.compile("a|ad");
2462 m = p.matcher("ad");
2463 if (!m.matches())
2464 failCount++;
2465
2466 // Supplementary character test
2467 // matches()
2468 p = Pattern.compile(toSupplementaries("ulb(c*)"));
2469 m = p.matcher(toSupplementaries("ulbcccccc"));
2470 if (!m.matches())
2471 failCount++;
2472
2473 // find() but not matches()
2474 m.reset(toSupplementaries("zzzulbcccccc"));
2475 if (m.matches())
2476 failCount++;
2477
2478 // lookingAt() but not matches()
2479 m.reset(toSupplementaries("ulbccccccdef"));
2480 if (m.matches())
2481 failCount++;
2482
2483 // matches()
2484 p = Pattern.compile(toSupplementaries("a|ad"));
2485 m = p.matcher(toSupplementaries("ad"));
2486 if (!m.matches())
2487 failCount++;
2488
2489 report("Matches");
2490 }
2491
2492 /**
2493 * A basic sanity test of Pattern.matches().
2494 */
2495 private static void patternMatchesTest() throws Exception {
2496 // matches()
2497 if (!Pattern.matches(toSupplementaries("ulb(c*)"),
2498 toSupplementaries("ulbcccccc")))
2499 failCount++;
2500
2501 // find() but not matches()
2502 if (Pattern.matches(toSupplementaries("ulb(c*)"),
2503 toSupplementaries("zzzulbcccccc")))
2504 failCount++;
2505
2506 // lookingAt() but not matches()
2507 if (Pattern.matches(toSupplementaries("ulb(c*)"),
2508 toSupplementaries("ulbccccccdef")))
2509 failCount++;
2510
2511 // Supplementary character test
2512 // matches()
2513 if (!Pattern.matches(toSupplementaries("ulb(c*)"),
2514 toSupplementaries("ulbcccccc")))
2515 failCount++;
2516
2517 // find() but not matches()
2518 if (Pattern.matches(toSupplementaries("ulb(c*)"),
2519 toSupplementaries("zzzulbcccccc")))
2520 failCount++;
2521
2522 // lookingAt() but not matches()
2523 if (Pattern.matches(toSupplementaries("ulb(c*)"),
2524 toSupplementaries("ulbccccccdef")))
2525 failCount++;
2526
2527 report("Pattern Matches");
2528 }
2529
2530 /**
2531 * Canonical equivalence testing. Tests the ability of the engine
2532 * to match sequences that are not explicitly specified in the
2533 * pattern when they are considered equivalent by the Unicode Standard.
2534 */
2535 private static void ceTest() throws Exception {
2536 // Decomposed char outside char classes
2537 Pattern p = Pattern.compile("testa\u030a", Pattern.CANON_EQ);
2538 Matcher m = p.matcher("test\u00e5");
2539 if (!m.matches())
2540 failCount++;
2541
2542 m.reset("testa\u030a");
2543 if (!m.matches())
2544 failCount++;
2545
2546 // Composed char outside char classes
2547 p = Pattern.compile("test\u00e5", Pattern.CANON_EQ);
2548 m = p.matcher("test\u00e5");
2549 if (!m.matches())
2550 failCount++;
2551
2552 m.reset("testa\u030a");
2553 if (!m.find())
2554 failCount++;
2555
2556 // Decomposed char inside a char class
2557 p = Pattern.compile("test[abca\u030a]", Pattern.CANON_EQ);
2558 m = p.matcher("test\u00e5");
2559 if (!m.find())
2560 failCount++;
2561
2562 m.reset("testa\u030a");
2563 if (!m.find())
2564 failCount++;
2565
2566 // Composed char inside a char class
2567 p = Pattern.compile("test[abc\u00e5def\u00e0]", Pattern.CANON_EQ);
2568 m = p.matcher("test\u00e5");
2569 if (!m.find())
2570 failCount++;
2571
2572 m.reset("testa\u0300");
2573 if (!m.find())
2574 failCount++;
2575
2576 m.reset("testa\u030a");
2577 if (!m.find())
2578 failCount++;
2579
2580 // Marks that cannot legally change order and be equivalent
2581 p = Pattern.compile("testa\u0308\u0300", Pattern.CANON_EQ);
2582 check(p, "testa\u0308\u0300", true);
2583 check(p, "testa\u0300\u0308", false);
2584
2585 // Marks that can legally change order and be equivalent
2586 p = Pattern.compile("testa\u0308\u0323", Pattern.CANON_EQ);
2587 check(p, "testa\u0308\u0323", true);
2588 check(p, "testa\u0323\u0308", true);
2589
2590 // Test all equivalences of the sequence a\u0308\u0323\u0300
2591 p = Pattern.compile("testa\u0308\u0323\u0300", Pattern.CANON_EQ);
2592 check(p, "testa\u0308\u0323\u0300", true);
2593 check(p, "testa\u0323\u0308\u0300", true);
2594 check(p, "testa\u0308\u0300\u0323", true);
2595 check(p, "test\u00e4\u0323\u0300", true);
2596 check(p, "test\u00e4\u0300\u0323", true);
2597
2598 /*
2599 * The following canonical equivalence tests don't work. Bug id: 4916384.
2600 *
2601 // Decomposed hangul (jamos)
2602 p = Pattern.compile("\u1100\u1161", Pattern.CANON_EQ);
2603 m = p.matcher("\u1100\u1161");
2604 if (!m.matches())
2605 failCount++;
2606
2607 m.reset("\uac00");
2608 if (!m.matches())
2609 failCount++;
2610
2611 // Composed hangul
2612 p = Pattern.compile("\uac00", Pattern.CANON_EQ);
2613 m = p.matcher("\u1100\u1161");
2614 if (!m.matches())
2615 failCount++;
2616
2617 m.reset("\uac00");
2618 if (!m.matches())
2619 failCount++;
2620
2621 // Decomposed supplementary outside char classes
2622 p = Pattern.compile("test\ud834\uddbc\ud834\udd6f", Pattern.CANON_EQ);
2623 m = p.matcher("test\ud834\uddc0");
2624 if (!m.matches())
2625 failCount++;
2626
2627 m.reset("test\ud834\uddbc\ud834\udd6f");
2628 if (!m.matches())
2629 failCount++;
2630
2631 // Composed supplementary outside char classes
2632 p = Pattern.compile("test\ud834\uddc0", Pattern.CANON_EQ);
2633 m.reset("test\ud834\uddbc\ud834\udd6f");
2634 if (!m.matches())
2635 failCount++;
2636
2637 m = p.matcher("test\ud834\uddc0");
2638 if (!m.matches())
2639 failCount++;
2640
2641 */
2642
2643 report("Canonical Equivalence");
2644 }
2645
2646 /**
2647 * A basic sanity test of Matcher.replaceAll().
2648 */
2649 private static void globalSubstitute() throws Exception {
2650 // Global substitution with a literal
2651 Pattern p = Pattern.compile("(ab)(c*)");
2652 Matcher m = p.matcher("abccczzzabcczzzabccc");
2653 if (!m.replaceAll("test").equals("testzzztestzzztest"))
2654 failCount++;
2655
2656 m.reset("zzzabccczzzabcczzzabccczzz");
2657 if (!m.replaceAll("test").equals("zzztestzzztestzzztestzzz"))
2658 failCount++;
2659
2660 // Global substitution with groups
2661 m.reset("zzzabccczzzabcczzzabccczzz");
2662 String result = m.replaceAll("$1");
2663 if (!result.equals("zzzabzzzabzzzabzzz"))
2664 failCount++;
2665
2666 // Supplementary character test
2667 // Global substitution with a literal
2668 p = Pattern.compile(toSupplementaries("(ab)(c*)"));
2669 m = p.matcher(toSupplementaries("abccczzzabcczzzabccc"));
2670 if (!m.replaceAll(toSupplementaries("test")).
2671 equals(toSupplementaries("testzzztestzzztest")))
2672 failCount++;
2673
2674 m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
2675 if (!m.replaceAll(toSupplementaries("test")).
2676 equals(toSupplementaries("zzztestzzztestzzztestzzz")))
2677 failCount++;
2678
2679 // Global substitution with groups
2680 m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
2681 result = m.replaceAll("$1");
2682 if (!result.equals(toSupplementaries("zzzabzzzabzzzabzzz")))
2683 failCount++;
2684
2685 report("Global Substitution");
2686 }
2687
2688 /**
2689 * Tests the usage of Matcher.appendReplacement() with literal
2690 * and group substitutions.
2691 */
2692 private static void stringbufferSubstitute() throws Exception {
2693 // SB substitution with literal
2694 String blah = "zzzblahzzz";
2695 Pattern p = Pattern.compile("blah");
2696 Matcher m = p.matcher(blah);
2697 StringBuffer result = new StringBuffer();
2698 try {
2699 m.appendReplacement(result, "blech");
2700 failCount++;
2701 } catch (IllegalStateException e) {
2702 }
2703 m.find();
2704 m.appendReplacement(result, "blech");
2705 if (!result.toString().equals("zzzblech"))
2706 failCount++;
2707
2708 m.appendTail(result);
2709 if (!result.toString().equals("zzzblechzzz"))
2710 failCount++;
2711
2712 // SB substitution with groups
2713 blah = "zzzabcdzzz";
2714 p = Pattern.compile("(ab)(cd)*");
2715 m = p.matcher(blah);
2716 result = new StringBuffer();
2717 try {
2718 m.appendReplacement(result, "$1");
2719 failCount++;
2720 } catch (IllegalStateException e) {
2721 }
2722 m.find();
2723 m.appendReplacement(result, "$1");
2724 if (!result.toString().equals("zzzab"))
2725 failCount++;
2726
2727 m.appendTail(result);
2728 if (!result.toString().equals("zzzabzzz"))
2729 failCount++;
2730
2731 // SB substitution with 3 groups
2732 blah = "zzzabcdcdefzzz";
2733 p = Pattern.compile("(ab)(cd)*(ef)");
2734 m = p.matcher(blah);
2735 result = new StringBuffer();
2736 try {
2737 m.appendReplacement(result, "$1w$2w$3");
2738 failCount++;
2739 } catch (IllegalStateException e) {
2740 }
2741 m.find();
2742 m.appendReplacement(result, "$1w$2w$3");
2743 if (!result.toString().equals("zzzabwcdwef"))
2744 failCount++;
2745
2746 m.appendTail(result);
2747 if (!result.toString().equals("zzzabwcdwefzzz"))
2748 failCount++;
2749
2750 // SB substitution with groups and three matches
2751 // skipping middle match
2752 blah = "zzzabcdzzzabcddzzzabcdzzz";
2753 p = Pattern.compile("(ab)(cd*)");
2754 m = p.matcher(blah);
2755 result = new StringBuffer();
2756 try {
2757 m.appendReplacement(result, "$1");
2758 failCount++;
2759 } catch (IllegalStateException e) {
2760 }
2761 m.find();
2762 m.appendReplacement(result, "$1");
2763 if (!result.toString().equals("zzzab"))
2764 failCount++;
2765
2766 m.find();
2767 m.find();
2768 m.appendReplacement(result, "$2");
2769 if (!result.toString().equals("zzzabzzzabcddzzzcd"))
2770 failCount++;
2771
2772 m.appendTail(result);
2773 if (!result.toString().equals("zzzabzzzabcddzzzcdzzz"))
2774 failCount++;
2775
2776 // Check to make sure escaped $ is ignored
2777 blah = "zzzabcdcdefzzz";
2778 p = Pattern.compile("(ab)(cd)*(ef)");
2779 m = p.matcher(blah);
2780 result = new StringBuffer();
2781 m.find();
2782 m.appendReplacement(result, "$1w\\$2w$3");
2783 if (!result.toString().equals("zzzabw$2wef"))
2784 failCount++;
2785
2786 m.appendTail(result);
2787 if (!result.toString().equals("zzzabw$2wefzzz"))
2788 failCount++;
2789
2790 // Check to make sure a reference to nonexistent group causes error
2791 blah = "zzzabcdcdefzzz";
2792 p = Pattern.compile("(ab)(cd)*(ef)");
2793 m = p.matcher(blah);
2794 result = new StringBuffer();
2795 m.find();
2796 try {
2797 m.appendReplacement(result, "$1w$5w$3");
2798 failCount++;
2799 } catch (IndexOutOfBoundsException ioobe) {
2800 // Correct result
2801 }
2802
2803 // Check double digit group references
2804 blah = "zzz123456789101112zzz";
2805 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)");
2806 m = p.matcher(blah);
2807 result = new StringBuffer();
2808 m.find();
2809 m.appendReplacement(result, "$1w$11w$3");
2810 if (!result.toString().equals("zzz1w11w3"))
2811 failCount++;
2812
2813 // Check to make sure it backs off $15 to $1 if only three groups
2814 blah = "zzzabcdcdefzzz";
2815 p = Pattern.compile("(ab)(cd)*(ef)");
2816 m = p.matcher(blah);
2817 result = new StringBuffer();
2818 m.find();
2819 m.appendReplacement(result, "$1w$15w$3");
2820 if (!result.toString().equals("zzzabwab5wef"))
2821 failCount++;
2822
2823
2824 // Supplementary character test
2825 // SB substitution with literal
2826 blah = toSupplementaries("zzzblahzzz");
2827 p = Pattern.compile(toSupplementaries("blah"));
2828 m = p.matcher(blah);
2829 result = new StringBuffer();
2830 try {
2831 m.appendReplacement(result, toSupplementaries("blech"));
2832 failCount++;
2833 } catch (IllegalStateException e) {
2834 }
2835 m.find();
2836 m.appendReplacement(result, toSupplementaries("blech"));
2837 if (!result.toString().equals(toSupplementaries("zzzblech")))
2838 failCount++;
2839
2840 m.appendTail(result);
2841 if (!result.toString().equals(toSupplementaries("zzzblechzzz")))
2842 failCount++;
2843
2844 // SB substitution with groups
2845 blah = toSupplementaries("zzzabcdzzz");
2846 p = Pattern.compile(toSupplementaries("(ab)(cd)*"));
2847 m = p.matcher(blah);
2848 result = new StringBuffer();
2849 try {
2850 m.appendReplacement(result, "$1");
2851 failCount++;
2852 } catch (IllegalStateException e) {
2853 }
2854 m.find();
2855 m.appendReplacement(result, "$1");
2856 if (!result.toString().equals(toSupplementaries("zzzab")))
2857 failCount++;
2858
2859 m.appendTail(result);
2860 if (!result.toString().equals(toSupplementaries("zzzabzzz")))
2861 failCount++;
2862
2863 // SB substitution with 3 groups
2864 blah = toSupplementaries("zzzabcdcdefzzz");
2865 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
2866 m = p.matcher(blah);
2867 result = new StringBuffer();
2868 try {
2869 m.appendReplacement(result, toSupplementaries("$1w$2w$3"));
2870 failCount++;
2871 } catch (IllegalStateException e) {
2872 }
2873 m.find();
2874 m.appendReplacement(result, toSupplementaries("$1w$2w$3"));
2875 if (!result.toString().equals(toSupplementaries("zzzabwcdwef")))
2876 failCount++;
2877
2878 m.appendTail(result);
2879 if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz")))
2880 failCount++;
2881
2882 // SB substitution with groups and three matches
2883 // skipping middle match
2884 blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz");
2885 p = Pattern.compile(toSupplementaries("(ab)(cd*)"));
2886 m = p.matcher(blah);
2887 result = new StringBuffer();
2888 try {
2889 m.appendReplacement(result, "$1");
2890 failCount++;
2891 } catch (IllegalStateException e) {
2892 }
2893 m.find();
2894 m.appendReplacement(result, "$1");
2895 if (!result.toString().equals(toSupplementaries("zzzab")))
2896 failCount++;
2897
2898 m.find();
2899 m.find();
2900 m.appendReplacement(result, "$2");
2901 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd")))
2902 failCount++;
2903
2904 m.appendTail(result);
2905 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz")))
2906 failCount++;
2907
2908 // Check to make sure escaped $ is ignored
2909 blah = toSupplementaries("zzzabcdcdefzzz");
2910 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
2911 m = p.matcher(blah);
2912 result = new StringBuffer();
2913 m.find();
2914 m.appendReplacement(result, toSupplementaries("$1w\\$2w$3"));
2915 if (!result.toString().equals(toSupplementaries("zzzabw$2wef")))
2916 failCount++;
2917
2918 m.appendTail(result);
2919 if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz")))
2920 failCount++;
2921
2922 // Check to make sure a reference to nonexistent group causes error
2923 blah = toSupplementaries("zzzabcdcdefzzz");
2924 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
2925 m = p.matcher(blah);
2926 result = new StringBuffer();
2927 m.find();
2928 try {
2929 m.appendReplacement(result, toSupplementaries("$1w$5w$3"));
2930 failCount++;
2931 } catch (IndexOutOfBoundsException ioobe) {
2932 // Correct result
2933 }
2934
2935 // Check double digit group references
2936 blah = toSupplementaries("zzz123456789101112zzz");
2937 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)");
2938 m = p.matcher(blah);
2939 result = new StringBuffer();
2940 m.find();
2941 m.appendReplacement(result, toSupplementaries("$1w$11w$3"));
2942 if (!result.toString().equals(toSupplementaries("zzz1w11w3")))
2943 failCount++;
2944
2945 // Check to make sure it backs off $15 to $1 if only three groups
2946 blah = toSupplementaries("zzzabcdcdefzzz");
2947 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
2948 m = p.matcher(blah);
2949 result = new StringBuffer();
2950 m.find();
2951 m.appendReplacement(result, toSupplementaries("$1w$15w$3"));
2952 if (!result.toString().equals(toSupplementaries("zzzabwab5wef")))
2953 failCount++;
2954
2955 // Check nothing has been appended into the output buffer if
2956 // the replacement string triggers IllegalArgumentException.
2957 p = Pattern.compile("(abc)");
2958 m = p.matcher("abcd");
2959 result = new StringBuffer();
2960 m.find();
2961 try {
2962 m.appendReplacement(result, ("xyz$g"));
2963 failCount++;
2964 } catch (IllegalArgumentException iae) {
2965 if (result.length() != 0)
2966 failCount++;
2967 }
2968
2969 report("SB Substitution");
2970 }
2971
2972 /*
2973 * 5 groups of characters are created to make a substitution string.
2974 * A base string will be created including random lead chars, the
2975 * substitution string, and random trailing chars.
2976 * A pattern containing the 5 groups is searched for and replaced with:
2977 * random group + random string + random group.
2978 * The results are checked for correctness.
2979 */
2980 private static void substitutionBasher() {
2981 for (int runs = 0; runs<1000; runs++) {
2982 // Create a base string to work in
2983 int leadingChars = generator.nextInt(10);
2984 StringBuffer baseBuffer = new StringBuffer(100);
2985 String leadingString = getRandomAlphaString(leadingChars);
2986 baseBuffer.append(leadingString);
2987
2988 // Create 5 groups of random number of random chars
2989 // Create the string to substitute
2990 // Create the pattern string to search for
2991 StringBuffer bufferToSub = new StringBuffer(25);
2992 StringBuffer bufferToPat = new StringBuffer(50);
2993 String[] groups = new String[5];
2994 for(int i=0; i<5; i++) {
2995 int aGroupSize = generator.nextInt(5)+1;
2996 groups[i] = getRandomAlphaString(aGroupSize);
2997 bufferToSub.append(groups[i]);
2998 bufferToPat.append('(');
2999 bufferToPat.append(groups[i]);
3000 bufferToPat.append(')');
3001 }
3002 String stringToSub = bufferToSub.toString();
3003 String pattern = bufferToPat.toString();
3004
3005 // Place sub string into working string at random index
3006 baseBuffer.append(stringToSub);
3007
3008 // Append random chars to end
3009 int trailingChars = generator.nextInt(10);
3010 String trailingString = getRandomAlphaString(trailingChars);
3011 baseBuffer.append(trailingString);
3012 String baseString = baseBuffer.toString();
3013
3014 // Create test pattern and matcher
3015 Pattern p = Pattern.compile(pattern);
3016 Matcher m = p.matcher(baseString);
3017
3018 // Reject candidate if pattern happens to start early
3019 m.find();
3020 if (m.start() < leadingChars)
3021 continue;
3022
3023 // Reject candidate if more than one match
3024 if (m.find())
3025 continue;
3026
3027 // Construct a replacement string with :
3028 // random group + random string + random group
3029 StringBuffer bufferToRep = new StringBuffer();
3030 int groupIndex1 = generator.nextInt(5);
3031 bufferToRep.append("$" + (groupIndex1 + 1));
3032 String randomMidString = getRandomAlphaString(5);
3033 bufferToRep.append(randomMidString);
3034 int groupIndex2 = generator.nextInt(5);
3035 bufferToRep.append("$" + (groupIndex2 + 1));
3036 String replacement = bufferToRep.toString();
3037
3038 // Do the replacement
3039 String result = m.replaceAll(replacement);
3040
3041 // Construct expected result
3042 StringBuffer bufferToRes = new StringBuffer();
3043 bufferToRes.append(leadingString);
3044 bufferToRes.append(groups[groupIndex1]);
3045 bufferToRes.append(randomMidString);
3046 bufferToRes.append(groups[groupIndex2]);
3047 bufferToRes.append(trailingString);
3048 String expectedResult = bufferToRes.toString();
3049
3050 // Check results
3051 if (!result.equals(expectedResult))
3052 failCount++;
3053 }
3054
3055 report("Substitution Basher");
3056 }
3057
3058 /**
3059 * Checks the handling of some escape sequences that the Pattern
3060 * class should process instead of the java compiler. These are
3061 * not in the file because the escapes should be be processed
3062 * by the Pattern class when the regex is compiled.
3063 */
3064 private static void escapes() throws Exception {
3065 Pattern p = Pattern.compile("\\043");
3066 Matcher m = p.matcher("#");
3067 if (!m.find())
3068 failCount++;
3069
3070 p = Pattern.compile("\\x23");
3071 m = p.matcher("#");
3072 if (!m.find())
3073 failCount++;
3074
3075 p = Pattern.compile("\\u0023");
3076 m = p.matcher("#");
3077 if (!m.find())
3078 failCount++;
3079
3080 report("Escape sequences");
3081 }
3082
3083 /**
3084 * Checks the handling of blank input situations. These
3085 * tests are incompatible with my test file format.
3086 */
3087 private static void blankInput() throws Exception {
3088 Pattern p = Pattern.compile("abc", Pattern.CASE_INSENSITIVE);
3089 Matcher m = p.matcher("");
3090 if (m.find())
3091 failCount++;
3092
3093 p = Pattern.compile("a*", Pattern.CASE_INSENSITIVE);
3094 m = p.matcher("");
3095 if (!m.find())
3096 failCount++;
3097
3098 p = Pattern.compile("abc");
3099 m = p.matcher("");
3100 if (m.find())
3101 failCount++;
3102
3103 p = Pattern.compile("a*");
3104 m = p.matcher("");
3105 if (!m.find())
3106 failCount++;
3107
3108 report("Blank input");
3109 }
3110
3111 /**
3112 * Tests the Boyer-Moore pattern matching of a character sequence
3113 * on randomly generated patterns.
3114 */
3115 private static void bm() throws Exception {
3116 doBnM('a');
3117 report("Boyer Moore (ASCII)");
3118
3119 doBnM(Character.MIN_SUPPLEMENTARY_CODE_POINT - 10);
3120 report("Boyer Moore (Supplementary)");
3121 }
3122
3123 private static void doBnM(int baseCharacter) throws Exception {
3124 int achar=0;
3125
3126 for (int i=0; i<100; i++) {
3127 // Create a short pattern to search for
3128 int patternLength = generator.nextInt(7) + 4;
3129 StringBuffer patternBuffer = new StringBuffer(patternLength);
3130 for (int x=0; x<patternLength; x++) {
3131 int ch = baseCharacter + generator.nextInt(26);
3132 if (Character.isSupplementaryCodePoint(ch)) {
3133 patternBuffer.append(Character.toChars(ch));
3134 } else {
3135 patternBuffer.append((char)ch);
3136 }
3137 }
3138 String pattern = patternBuffer.toString();
3139 Pattern p = Pattern.compile(pattern);
3140
3141 // Create a buffer with random ASCII chars that does
3142 // not match the sample
3143 String toSearch = null;
3144 StringBuffer s = null;
3145 Matcher m = p.matcher("");
3146 do {
3147 s = new StringBuffer(100);
3148 for (int x=0; x<100; x++) {
3149 int ch = baseCharacter + generator.nextInt(26);
3150 if (Character.isSupplementaryCodePoint(ch)) {
3151 s.append(Character.toChars(ch));
3152 } else {
3153 s.append((char)ch);
3154 }
3155 }
3156 toSearch = s.toString();
3157 m.reset(toSearch);
3158 } while (m.find());
3159
3160 // Insert the pattern at a random spot
3161 int insertIndex = generator.nextInt(99);
3162 if (Character.isLowSurrogate(s.charAt(insertIndex)))
3163 insertIndex++;
3164 s = s.insert(insertIndex, pattern);
3165 toSearch = s.toString();
3166
3167 // Make sure that the pattern is found
3168 m.reset(toSearch);
3169 if (!m.find())
3170 failCount++;
3171
3172 // Make sure that the match text is the pattern
3173 if (!m.group().equals(pattern))
3174 failCount++;
3175
3176 // Make sure match occured at insertion point
3177 if (m.start() != insertIndex)
3178 failCount++;
3179 }
3180 }
3181
3182 /**
3183 * Tests the matching of slices on randomly generated patterns.
3184 * The Boyer-Moore optimization is not done on these patterns
3185 * because it uses unicode case folding.
3186 */
3187 private static void slice() throws Exception {
3188 doSlice(Character.MAX_VALUE);
3189 report("Slice");
3190
3191 doSlice(Character.MAX_CODE_POINT);
3192 report("Slice (Supplementary)");
3193 }
3194
3195 private static void doSlice(int maxCharacter) throws Exception {
3196 Random generator = new Random();
3197 int achar=0;
3198
3199 for (int i=0; i<100; i++) {
3200 // Create a short pattern to search for
3201 int patternLength = generator.nextInt(7) + 4;
3202 StringBuffer patternBuffer = new StringBuffer(patternLength);
3203 for (int x=0; x<patternLength; x++) {
3204 int randomChar = 0;
3205 while (!Character.isLetterOrDigit(randomChar))
3206 randomChar = generator.nextInt(maxCharacter);
3207 if (Character.isSupplementaryCodePoint(randomChar)) {
3208 patternBuffer.append(Character.toChars(randomChar));
3209 } else {
3210 patternBuffer.append((char) randomChar);
3211 }
3212 }
3213 String pattern = patternBuffer.toString();
3214 Pattern p = Pattern.compile(pattern, Pattern.UNICODE_CASE);
3215
3216 // Create a buffer with random chars that does not match the sample
3217 String toSearch = null;
3218 StringBuffer s = null;
3219 Matcher m = p.matcher("");
3220 do {
3221 s = new StringBuffer(100);
3222 for (int x=0; x<100; x++) {
3223 int randomChar = 0;
3224 while (!Character.isLetterOrDigit(randomChar))
3225 randomChar = generator.nextInt(maxCharacter);
3226 if (Character.isSupplementaryCodePoint(randomChar)) {
3227 s.append(Character.toChars(randomChar));
3228 } else {
3229 s.append((char) randomChar);
3230 }
3231 }
3232 toSearch = s.toString();
3233 m.reset(toSearch);
3234 } while (m.find());
3235
3236 // Insert the pattern at a random spot
3237 int insertIndex = generator.nextInt(99);
3238 if (Character.isLowSurrogate(s.charAt(insertIndex)))
3239 insertIndex++;
3240 s = s.insert(insertIndex, pattern);
3241 toSearch = s.toString();
3242
3243 // Make sure that the pattern is found
3244 m.reset(toSearch);
3245 if (!m.find())
3246 failCount++;
3247
3248 // Make sure that the match text is the pattern
3249 if (!m.group().equals(pattern))
3250 failCount++;
3251
3252 // Make sure match occured at insertion point
3253 if (m.start() != insertIndex)
3254 failCount++;
3255 }
3256 }
3257
3258 private static void explainFailure(String pattern, String data,
3259 String expected, String actual) {
3260 System.err.println("----------------------------------------");
3261 System.err.println("Pattern = "+pattern);
3262 System.err.println("Data = "+data);
3263 System.err.println("Expected = " + expected);
3264 System.err.println("Actual = " + actual);
3265 }
3266
3267 private static void explainFailure(String pattern, String data,
3268 Throwable t) {
3269 System.err.println("----------------------------------------");
3270 System.err.println("Pattern = "+pattern);
3271 System.err.println("Data = "+data);
3272 t.printStackTrace(System.err);
3273 }
3274
3275 // Testing examples from a file
3276
3277 /**
3278 * Goes through the file "TestCases.txt" and creates many patterns
3279 * described in the file, matching the patterns against input lines in
3280 * the file, and comparing the results against the correct results
3281 * also found in the file. The file format is described in comments
3282 * at the head of the file.
3283 */
3284 private static void processFile(String fileName) throws Exception {
3285 File testCases = new File(System.getProperty("test.src", "."),
3286 fileName);
3287 FileInputStream in = new FileInputStream(testCases);
3288 BufferedReader r = new BufferedReader(new InputStreamReader(in));
3289
3290 // Process next test case.
3291 String aLine;
3292 while((aLine = r.readLine()) != null) {
3293 // Read a line for pattern
3294 String patternString = grabLine(r);
3295 Pattern p = null;
3296 try {
3297 p = compileTestPattern(patternString);
3298 } catch (PatternSyntaxException e) {
3299 String dataString = grabLine(r);
3300 String expectedResult = grabLine(r);
3301 if (expectedResult.startsWith("error"))
3302 continue;
3303 explainFailure(patternString, dataString, e);
3304 failCount++;
3305 continue;
3306 }
3307
3308 // Read a line for input string
3309 String dataString = grabLine(r);
3310 Matcher m = p.matcher(dataString);
3311 StringBuffer result = new StringBuffer();
3312
3313 // Check for IllegalStateExceptions before a match
3314 failCount += preMatchInvariants(m);
3315
3316 boolean found = m.find();
3317
3318 if (found)
3319 failCount += postTrueMatchInvariants(m);
3320 else
3321 failCount += postFalseMatchInvariants(m);
3322
3323 if (found) {
3324 result.append("true ");
3325 result.append(m.group(0) + " ");
3326 } else {
3327 result.append("false ");
3328 }
3329
3330 result.append(m.groupCount());
3331
3332 if (found) {
3333 for (int i=1; i<m.groupCount()+1; i++)
3334 if (m.group(i) != null)
3335 result.append(" " +m.group(i));
3336 }
3337
3338 // Read a line for the expected result
3339 String expectedResult = grabLine(r);
3340
3341 if (!result.toString().equals(expectedResult)) {
3342 explainFailure(patternString, dataString, expectedResult, result.toString());
3343 failCount++;
3344 }
3345 }
3346
3347 report(fileName);
3348 }
3349
3350 private static int preMatchInvariants(Matcher m) {
3351 int failCount = 0;
3352 try {
3353 m.start();
3354 failCount++;
3355 } catch (IllegalStateException ise) {}
3356 try {
3357 m.end();
3358 failCount++;
3359 } catch (IllegalStateException ise) {}
3360 try {
3361 m.group();
3362 failCount++;
3363 } catch (IllegalStateException ise) {}
3364 return failCount;
3365 }
3366
3367 private static int postFalseMatchInvariants(Matcher m) {
3368 int failCount = 0;
3369 try {
3370 m.group();
3371 failCount++;
3372 } catch (IllegalStateException ise) {}
3373 try {
3374 m.start();
3375 failCount++;
3376 } catch (IllegalStateException ise) {}
3377 try {
3378 m.end();
3379 failCount++;
3380 } catch (IllegalStateException ise) {}
3381 return failCount;
3382 }
3383
3384 private static int postTrueMatchInvariants(Matcher m) {
3385 int failCount = 0;
3386 //assert(m.start() = m.start(0);
3387 if (m.start() != m.start(0))
3388 failCount++;
3389 //assert(m.end() = m.end(0);
3390 if (m.start() != m.start(0))
3391 failCount++;
3392 //assert(m.group() = m.group(0);
3393 if (!m.group().equals(m.group(0)))
3394 failCount++;
3395 try {
3396 m.group(50);
3397 failCount++;
3398 } catch (IndexOutOfBoundsException ise) {}
3399
3400 return failCount;
3401 }
3402
3403 private static Pattern compileTestPattern(String patternString) {
3404 if (!patternString.startsWith("'")) {
3405 return Pattern.compile(patternString);
3406 }
3407
3408 int break1 = patternString.lastIndexOf("'");
3409 String flagString = patternString.substring(
3410 break1+1, patternString.length());
3411 patternString = patternString.substring(1, break1);
3412
3413 if (flagString.equals("i"))
3414 return Pattern.compile(patternString, Pattern.CASE_INSENSITIVE);
3415
3416 if (flagString.equals("m"))
3417 return Pattern.compile(patternString, Pattern.MULTILINE);
3418
3419 return Pattern.compile(patternString);
3420 }
3421
3422 /**
3423 * Reads a line from the input file. Keeps reading lines until a non
3424 * empty non comment line is read. If the line contains a \n then
3425 * these two characters are replaced by a newline char. If a \\uxxxx
3426 * sequence is read then the sequence is replaced by the unicode char.
3427 */
3428 private static String grabLine(BufferedReader r) throws Exception {
3429 int index = 0;
3430 String line = r.readLine();
3431 while (line.startsWith("//") || line.length() < 1)
3432 line = r.readLine();
3433 while ((index = line.indexOf("\\n")) != -1) {
3434 StringBuffer temp = new StringBuffer(line);
3435 temp.replace(index, index+2, "\n");
3436 line = temp.toString();
3437 }
3438 while ((index = line.indexOf("\\u")) != -1) {
3439 StringBuffer temp = new StringBuffer(line);
3440 String value = temp.substring(index+2, index+6);
3441 char aChar = (char)Integer.parseInt(value, 16);
3442 String unicodeChar = "" + aChar;
3443 temp.replace(index, index+6, unicodeChar);
3444 line = temp.toString();
3445 }
3446
3447 return line;
3448 }
3449
3450 private static void check(Pattern p, String s, String g, String expected) {
3451 Matcher m = p.matcher(s);
3452 m.find();
shermana244eb52013-05-06 21:24:37 -07003453 if (!m.group(g).equals(expected) ||
3454 s.charAt(m.start(g)) != expected.charAt(0) ||
3455 s.charAt(m.end(g) - 1) != expected.charAt(expected.length() - 1))
sherman0b4d42d2009-02-23 21:06:15 -08003456 failCount++;
3457 }
3458
3459 private static void checkReplaceFirst(String p, String s, String r, String expected)
3460 {
3461 if (!expected.equals(Pattern.compile(p)
3462 .matcher(s)
3463 .replaceFirst(r)))
3464 failCount++;
3465 }
3466
3467 private static void checkReplaceAll(String p, String s, String r, String expected)
3468 {
3469 if (!expected.equals(Pattern.compile(p)
3470 .matcher(s)
3471 .replaceAll(r)))
3472 failCount++;
3473 }
3474
3475 private static void checkExpectedFail(String p) {
3476 try {
3477 Pattern.compile(p);
3478 } catch (PatternSyntaxException pse) {
3479 //pse.printStackTrace();
3480 return;
3481 }
3482 failCount++;
3483 }
3484
shermana244eb52013-05-06 21:24:37 -07003485 private static void checkExpectedIAE(Matcher m, String g) {
sherman0b4d42d2009-02-23 21:06:15 -08003486 m.find();
3487 try {
3488 m.group(g);
shermana244eb52013-05-06 21:24:37 -07003489 } catch (IllegalArgumentException x) {
sherman0b4d42d2009-02-23 21:06:15 -08003490 //iae.printStackTrace();
shermana244eb52013-05-06 21:24:37 -07003491 try {
3492 m.start(g);
3493 } catch (IllegalArgumentException xx) {
3494 try {
3495 m.start(g);
3496 } catch (IllegalArgumentException xxx) {
3497 return;
3498 }
3499 }
sherman0b4d42d2009-02-23 21:06:15 -08003500 }
3501 failCount++;
3502 }
3503
shermana244eb52013-05-06 21:24:37 -07003504 private static void checkExpectedNPE(Matcher m) {
3505 m.find();
3506 try {
3507 m.group(null);
3508 } catch (NullPointerException x) {
3509 try {
3510 m.start(null);
3511 } catch (NullPointerException xx) {
3512 try {
3513 m.end(null);
3514 } catch (NullPointerException xxx) {
3515 return;
3516 }
3517 }
3518 }
3519 failCount++;
3520 }
sherman0b4d42d2009-02-23 21:06:15 -08003521
3522 private static void namedGroupCaptureTest() throws Exception {
3523 check(Pattern.compile("x+(?<gname>y+)z+"),
3524 "xxxyyyzzz",
3525 "gname",
3526 "yyy");
3527
shermand9337e02009-10-21 11:40:40 -07003528 check(Pattern.compile("x+(?<gname8>y+)z+"),
shermanffaf4922009-03-20 16:22:59 -07003529 "xxxyyyzzz",
shermand9337e02009-10-21 11:40:40 -07003530 "gname8",
shermanffaf4922009-03-20 16:22:59 -07003531 "yyy");
3532
sherman0b4d42d2009-02-23 21:06:15 -08003533 //backref
3534 Pattern pattern = Pattern.compile("(a*)bc\\1");
3535 check(pattern, "zzzaabcazzz", true); // found "abca"
3536
3537 check(Pattern.compile("(?<gname>a*)bc\\k<gname>"),
3538 "zzzaabcaazzz", true);
3539
3540 check(Pattern.compile("(?<gname>abc)(def)\\k<gname>"),
3541 "abcdefabc", true);
3542
3543 check(Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(?<gname>k)\\k<gname>"),
3544 "abcdefghijkk", true);
3545
3546 // Supplementary character tests
3547 check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"),
3548 toSupplementaries("zzzaabcazzz"), true);
3549
3550 check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"),
3551 toSupplementaries("zzzaabcaazzz"), true);
3552
3553 check(Pattern.compile("(?<gname>" + toSupplementaries("abc)(def)") + "\\k<gname>"),
3554 toSupplementaries("abcdefabc"), true);
3555
3556 check(Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)") +
3557 "(?<gname>" +
3558 toSupplementaries("k)") + "\\k<gname>"),
3559 toSupplementaries("abcdefghijkk"), true);
3560
3561 check(Pattern.compile("x+(?<gname>y+)z+\\k<gname>"),
3562 "xxxyyyzzzyyy",
3563 "gname",
3564 "yyy");
3565
3566 //replaceFirst/All
3567 checkReplaceFirst("(?<gn>ab)(c*)",
3568 "abccczzzabcczzzabccc",
shermand9337e02009-10-21 11:40:40 -07003569 "${gn}",
sherman0b4d42d2009-02-23 21:06:15 -08003570 "abzzzabcczzzabccc");
3571
3572 checkReplaceAll("(?<gn>ab)(c*)",
3573 "abccczzzabcczzzabccc",
shermand9337e02009-10-21 11:40:40 -07003574 "${gn}",
sherman0b4d42d2009-02-23 21:06:15 -08003575 "abzzzabzzzab");
3576
3577
3578 checkReplaceFirst("(?<gn>ab)(c*)",
3579 "zzzabccczzzabcczzzabccczzz",
shermand9337e02009-10-21 11:40:40 -07003580 "${gn}",
sherman0b4d42d2009-02-23 21:06:15 -08003581 "zzzabzzzabcczzzabccczzz");
3582
3583 checkReplaceAll("(?<gn>ab)(c*)",
3584 "zzzabccczzzabcczzzabccczzz",
shermand9337e02009-10-21 11:40:40 -07003585 "${gn}",
sherman0b4d42d2009-02-23 21:06:15 -08003586 "zzzabzzzabzzzabzzz");
3587
3588 checkReplaceFirst("(?<gn1>ab)(?<gn2>c*)",
3589 "zzzabccczzzabcczzzabccczzz",
shermand9337e02009-10-21 11:40:40 -07003590 "${gn2}",
sherman0b4d42d2009-02-23 21:06:15 -08003591 "zzzccczzzabcczzzabccczzz");
3592
3593 checkReplaceAll("(?<gn1>ab)(?<gn2>c*)",
3594 "zzzabccczzzabcczzzabccczzz",
shermand9337e02009-10-21 11:40:40 -07003595 "${gn2}",
sherman0b4d42d2009-02-23 21:06:15 -08003596 "zzzccczzzcczzzccczzz");
3597
3598 //toSupplementaries("(ab)(c*)"));
3599 checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") +
3600 ")(?<gn2>" + toSupplementaries("c") + "*)",
3601 toSupplementaries("abccczzzabcczzzabccc"),
shermand9337e02009-10-21 11:40:40 -07003602 "${gn1}",
sherman0b4d42d2009-02-23 21:06:15 -08003603 toSupplementaries("abzzzabcczzzabccc"));
3604
3605
3606 checkReplaceAll("(?<gn1>" + toSupplementaries("ab") +
3607 ")(?<gn2>" + toSupplementaries("c") + "*)",
3608 toSupplementaries("abccczzzabcczzzabccc"),
shermand9337e02009-10-21 11:40:40 -07003609 "${gn1}",
sherman0b4d42d2009-02-23 21:06:15 -08003610 toSupplementaries("abzzzabzzzab"));
3611
3612 checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") +
3613 ")(?<gn2>" + toSupplementaries("c") + "*)",
3614 toSupplementaries("abccczzzabcczzzabccc"),
shermand9337e02009-10-21 11:40:40 -07003615 "${gn2}",
sherman0b4d42d2009-02-23 21:06:15 -08003616 toSupplementaries("ccczzzabcczzzabccc"));
3617
3618
3619 checkReplaceAll("(?<gn1>" + toSupplementaries("ab") +
3620 ")(?<gn2>" + toSupplementaries("c") + "*)",
3621 toSupplementaries("abccczzzabcczzzabccc"),
shermand9337e02009-10-21 11:40:40 -07003622 "${gn2}",
sherman0b4d42d2009-02-23 21:06:15 -08003623 toSupplementaries("ccczzzcczzzccc"));
3624
3625 checkReplaceFirst("(?<dog>Dog)AndCat",
3626 "zzzDogAndCatzzzDogAndCatzzz",
shermand9337e02009-10-21 11:40:40 -07003627 "${dog}",
sherman0b4d42d2009-02-23 21:06:15 -08003628 "zzzDogzzzDogAndCatzzz");
3629
3630
3631 checkReplaceAll("(?<dog>Dog)AndCat",
3632 "zzzDogAndCatzzzDogAndCatzzz",
shermand9337e02009-10-21 11:40:40 -07003633 "${dog}",
sherman0b4d42d2009-02-23 21:06:15 -08003634 "zzzDogzzzDogzzz");
3635
3636 // backref in Matcher & String
shermand9337e02009-10-21 11:40:40 -07003637 if (!"abcdefghij".replaceFirst("cd(?<gn>ef)gh", "${gn}").equals("abefij") ||
3638 !"abbbcbdbefgh".replaceAll("(?<gn>[a-e])b", "${gn}").equals("abcdefgh"))
sherman0b4d42d2009-02-23 21:06:15 -08003639 failCount++;
3640
3641 // negative
3642 checkExpectedFail("(?<groupnamehasnoascii.in>abc)(def)");
3643 checkExpectedFail("(?<groupnamehasnoascii_in>abc)(def)");
shermand9337e02009-10-21 11:40:40 -07003644 checkExpectedFail("(?<6groupnamestartswithdigit>abc)(def)");
sherman0b4d42d2009-02-23 21:06:15 -08003645 checkExpectedFail("(?<gname>abc)(def)\\k<gnameX>");
3646 checkExpectedFail("(?<gname>abc)(?<gname>def)\\k<gnameX>");
shermana244eb52013-05-06 21:24:37 -07003647 checkExpectedIAE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"),
3648 "gnameX");
3649 checkExpectedNPE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"));
sherman0b4d42d2009-02-23 21:06:15 -08003650 report("NamedGroupCapture");
3651 }
sherman6782c962010-02-05 00:10:42 -08003652
shermancc01ef52010-05-18 15:36:47 -07003653 // This is for bug 6969132
sherman6782c962010-02-05 00:10:42 -08003654 private static void nonBmpClassComplementTest() throws Exception {
3655 Pattern p = Pattern.compile("\\P{Lu}");
3656 Matcher m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
3657 if (m.find() && m.start() == 1)
3658 failCount++;
3659
3660 // from a unicode category
3661 p = Pattern.compile("\\P{Lu}");
3662 m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
3663 if (m.find())
3664 failCount++;
3665 if (!m.hitEnd())
3666 failCount++;
3667
3668 // block
3669 p = Pattern.compile("\\P{InMathematicalAlphanumericSymbols}");
3670 m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
3671 if (m.find() && m.start() == 1)
3672 failCount++;
3673
3674 report("NonBmpClassComplement");
3675 }
3676
shermancc01ef52010-05-18 15:36:47 -07003677 private static void unicodePropertiesTest() throws Exception {
3678 // different forms
3679 if (!Pattern.compile("\\p{IsLu}").matcher("A").matches() ||
3680 !Pattern.compile("\\p{Lu}").matcher("A").matches() ||
3681 !Pattern.compile("\\p{gc=Lu}").matcher("A").matches() ||
3682 !Pattern.compile("\\p{general_category=Lu}").matcher("A").matches() ||
3683 !Pattern.compile("\\p{IsLatin}").matcher("B").matches() ||
3684 !Pattern.compile("\\p{sc=Latin}").matcher("B").matches() ||
3685 !Pattern.compile("\\p{script=Latin}").matcher("B").matches() ||
3686 !Pattern.compile("\\p{InBasicLatin}").matcher("c").matches() ||
3687 !Pattern.compile("\\p{blk=BasicLatin}").matcher("c").matches() ||
3688 !Pattern.compile("\\p{block=BasicLatin}").matcher("c").matches())
3689 failCount++;
3690
3691 Matcher common = Pattern.compile("\\p{script=Common}").matcher("");
3692 Matcher unknown = Pattern.compile("\\p{IsUnknown}").matcher("");
3693 Matcher lastSM = common;
3694 Character.UnicodeScript lastScript = Character.UnicodeScript.of(0);
3695
3696 Matcher latin = Pattern.compile("\\p{block=basic_latin}").matcher("");
3697 Matcher greek = Pattern.compile("\\p{InGreek}").matcher("");
3698 Matcher lastBM = latin;
3699 Character.UnicodeBlock lastBlock = Character.UnicodeBlock.of(0);
3700
3701 for (int cp = 1; cp < Character.MAX_CODE_POINT; cp++) {
3702 if (cp >= 0x30000 && (cp & 0x70) == 0){
3703 continue; // only pick couple code points, they are the same
3704 }
3705
3706 // Unicode Script
3707 Character.UnicodeScript script = Character.UnicodeScript.of(cp);
3708 Matcher m;
3709 String str = new String(Character.toChars(cp));
3710 if (script == lastScript) {
3711 m = lastSM;
3712 m.reset(str);
3713 } else {
3714 m = Pattern.compile("\\p{Is" + script.name() + "}").matcher(str);
3715 }
3716 if (!m.matches()) {
3717 failCount++;
3718 }
3719 Matcher other = (script == Character.UnicodeScript.COMMON)? unknown : common;
3720 other.reset(str);
3721 if (other.matches()) {
3722 failCount++;
3723 }
3724 lastSM = m;
3725 lastScript = script;
3726
3727 // Unicode Block
3728 Character.UnicodeBlock block = Character.UnicodeBlock.of(cp);
3729 if (block == null) {
3730 //System.out.printf("Not a Block: cp=%x%n", cp);
3731 continue;
3732 }
3733 if (block == lastBlock) {
3734 m = lastBM;
3735 m.reset(str);
3736 } else {
3737 m = Pattern.compile("\\p{block=" + block.toString() + "}").matcher(str);
3738 }
3739 if (!m.matches()) {
3740 failCount++;
3741 }
3742 other = (block == Character.UnicodeBlock.BASIC_LATIN)? greek : latin;
3743 other.reset(str);
3744 if (other.matches()) {
3745 failCount++;
3746 }
3747 lastBM = m;
3748 lastBlock = block;
3749 }
3750 report("unicodeProperties");
3751 }
shermanf03c78b2011-02-03 13:49:25 -08003752
3753 private static void unicodeHexNotationTest() throws Exception {
3754
3755 // negative
3756 checkExpectedFail("\\x{-23}");
3757 checkExpectedFail("\\x{110000}");
3758 checkExpectedFail("\\x{}");
3759 checkExpectedFail("\\x{AB[ef]");
3760
3761 // codepoint
3762 check("^\\x{1033c}$", "\uD800\uDF3C", true);
3763 check("^\\xF0\\x90\\x8C\\xBC$", "\uD800\uDF3C", false);
3764 check("^\\x{D800}\\x{DF3c}+$", "\uD800\uDF3C", false);
3765 check("^\\xF0\\x90\\x8C\\xBC$", "\uD800\uDF3C", false);
3766
3767 // in class
3768 check("^[\\x{D800}\\x{DF3c}]+$", "\uD800\uDF3C", false);
3769 check("^[\\xF0\\x90\\x8C\\xBC]+$", "\uD800\uDF3C", false);
3770 check("^[\\x{D800}\\x{DF3C}]+$", "\uD800\uDF3C", false);
3771 check("^[\\x{DF3C}\\x{D800}]+$", "\uD800\uDF3C", false);
3772 check("^[\\x{D800}\\x{DF3C}]+$", "\uDF3C\uD800", true);
3773 check("^[\\x{DF3C}\\x{D800}]+$", "\uDF3C\uD800", true);
3774
3775 for (int cp = 0; cp <= 0x10FFFF; cp++) {
3776 String s = "A" + new String(Character.toChars(cp)) + "B";
3777 String hexUTF16 = (cp <= 0xFFFF)? String.format("\\u%04x", cp)
3778 : String.format("\\u%04x\\u%04x",
3779 (int) Character.toChars(cp)[0],
3780 (int) Character.toChars(cp)[1]);
3781 String hexCodePoint = "\\x{" + Integer.toHexString(cp) + "}";
3782 if (!Pattern.matches("A" + hexUTF16 + "B", s))
3783 failCount++;
3784 if (!Pattern.matches("A[" + hexUTF16 + "]B", s))
3785 failCount++;
3786 if (!Pattern.matches("A" + hexCodePoint + "B", s))
3787 failCount++;
3788 if (!Pattern.matches("A[" + hexCodePoint + "]B", s))
3789 failCount++;
3790 }
3791 report("unicodeHexNotation");
sherman85bbd8b2011-04-28 20:48:36 -07003792 }
3793
3794 private static void unicodeClassesTest() throws Exception {
3795
3796 Matcher lower = Pattern.compile("\\p{Lower}").matcher("");
3797 Matcher upper = Pattern.compile("\\p{Upper}").matcher("");
3798 Matcher ASCII = Pattern.compile("\\p{ASCII}").matcher("");
3799 Matcher alpha = Pattern.compile("\\p{Alpha}").matcher("");
3800 Matcher digit = Pattern.compile("\\p{Digit}").matcher("");
3801 Matcher alnum = Pattern.compile("\\p{Alnum}").matcher("");
3802 Matcher punct = Pattern.compile("\\p{Punct}").matcher("");
3803 Matcher graph = Pattern.compile("\\p{Graph}").matcher("");
3804 Matcher print = Pattern.compile("\\p{Print}").matcher("");
3805 Matcher blank = Pattern.compile("\\p{Blank}").matcher("");
3806 Matcher cntrl = Pattern.compile("\\p{Cntrl}").matcher("");
3807 Matcher xdigit = Pattern.compile("\\p{XDigit}").matcher("");
3808 Matcher space = Pattern.compile("\\p{Space}").matcher("");
3809 Matcher bound = Pattern.compile("\\b").matcher("");
3810 Matcher word = Pattern.compile("\\w++").matcher("");
3811 // UNICODE_CHARACTER_CLASS
3812 Matcher lowerU = Pattern.compile("\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3813 Matcher upperU = Pattern.compile("\\p{Upper}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3814 Matcher ASCIIU = Pattern.compile("\\p{ASCII}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3815 Matcher alphaU = Pattern.compile("\\p{Alpha}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3816 Matcher digitU = Pattern.compile("\\p{Digit}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3817 Matcher alnumU = Pattern.compile("\\p{Alnum}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3818 Matcher punctU = Pattern.compile("\\p{Punct}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3819 Matcher graphU = Pattern.compile("\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3820 Matcher printU = Pattern.compile("\\p{Print}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3821 Matcher blankU = Pattern.compile("\\p{Blank}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3822 Matcher cntrlU = Pattern.compile("\\p{Cntrl}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3823 Matcher xdigitU = Pattern.compile("\\p{XDigit}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3824 Matcher spaceU = Pattern.compile("\\p{Space}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3825 Matcher boundU = Pattern.compile("\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3826 Matcher wordU = Pattern.compile("\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3827 // embedded flag (?U)
3828 Matcher lowerEU = Pattern.compile("(?U)\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3829 Matcher graphEU = Pattern.compile("(?U)\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3830 Matcher wordEU = Pattern.compile("(?U)\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3831
3832 Matcher bwb = Pattern.compile("\\b\\w\\b").matcher("");
3833 Matcher bwbU = Pattern.compile("\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3834 Matcher bwbEU = Pattern.compile("(?U)\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3835 // properties
3836 Matcher lowerP = Pattern.compile("\\p{IsLowerCase}").matcher("");
3837 Matcher upperP = Pattern.compile("\\p{IsUpperCase}").matcher("");
3838 Matcher titleP = Pattern.compile("\\p{IsTitleCase}").matcher("");
3839 Matcher letterP = Pattern.compile("\\p{IsLetter}").matcher("");
3840 Matcher alphaP = Pattern.compile("\\p{IsAlphabetic}").matcher("");
3841 Matcher ideogP = Pattern.compile("\\p{IsIdeographic}").matcher("");
3842 Matcher cntrlP = Pattern.compile("\\p{IsControl}").matcher("");
3843 Matcher spaceP = Pattern.compile("\\p{IsWhiteSpace}").matcher("");
3844 Matcher definedP = Pattern.compile("\\p{IsAssigned}").matcher("");
3845 Matcher nonCCPP = Pattern.compile("\\p{IsNoncharacterCodePoint}").matcher("");
shermana244eb52013-05-06 21:24:37 -07003846 Matcher joinCrtl = Pattern.compile("\\p{IsJoinControl}").matcher("");
sherman85bbd8b2011-04-28 20:48:36 -07003847
3848 // javaMethod
3849 Matcher lowerJ = Pattern.compile("\\p{javaLowerCase}").matcher("");
3850 Matcher upperJ = Pattern.compile("\\p{javaUpperCase}").matcher("");
3851 Matcher alphaJ = Pattern.compile("\\p{javaAlphabetic}").matcher("");
3852 Matcher ideogJ = Pattern.compile("\\p{javaIdeographic}").matcher("");
3853
3854 for (int cp = 1; cp < 0x30000; cp++) {
3855 String str = new String(Character.toChars(cp));
3856 int type = Character.getType(cp);
3857 if (// lower
3858 POSIX_ASCII.isLower(cp) != lower.reset(str).matches() ||
3859 Character.isLowerCase(cp) != lowerU.reset(str).matches() ||
3860 Character.isLowerCase(cp) != lowerP.reset(str).matches() ||
3861 Character.isLowerCase(cp) != lowerEU.reset(str).matches()||
3862 Character.isLowerCase(cp) != lowerJ.reset(str).matches()||
3863 // upper
3864 POSIX_ASCII.isUpper(cp) != upper.reset(str).matches() ||
3865 POSIX_Unicode.isUpper(cp) != upperU.reset(str).matches() ||
3866 Character.isUpperCase(cp) != upperP.reset(str).matches() ||
3867 Character.isUpperCase(cp) != upperJ.reset(str).matches() ||
3868 // alpha
3869 POSIX_ASCII.isAlpha(cp) != alpha.reset(str).matches() ||
3870 POSIX_Unicode.isAlpha(cp) != alphaU.reset(str).matches() ||
3871 Character.isAlphabetic(cp)!= alphaP.reset(str).matches() ||
3872 Character.isAlphabetic(cp)!= alphaJ.reset(str).matches() ||
3873 // digit
3874 POSIX_ASCII.isDigit(cp) != digit.reset(str).matches() ||
3875 Character.isDigit(cp) != digitU.reset(str).matches() ||
3876 // alnum
3877 POSIX_ASCII.isAlnum(cp) != alnum.reset(str).matches() ||
3878 POSIX_Unicode.isAlnum(cp) != alnumU.reset(str).matches() ||
3879 // punct
3880 POSIX_ASCII.isPunct(cp) != punct.reset(str).matches() ||
3881 POSIX_Unicode.isPunct(cp) != punctU.reset(str).matches() ||
3882 // graph
3883 POSIX_ASCII.isGraph(cp) != graph.reset(str).matches() ||
3884 POSIX_Unicode.isGraph(cp) != graphU.reset(str).matches() ||
3885 POSIX_Unicode.isGraph(cp) != graphEU.reset(str).matches()||
3886 // blank
3887 POSIX_ASCII.isType(cp, POSIX_ASCII.BLANK)
3888 != blank.reset(str).matches() ||
3889 POSIX_Unicode.isBlank(cp) != blankU.reset(str).matches() ||
3890 // print
3891 POSIX_ASCII.isPrint(cp) != print.reset(str).matches() ||
3892 POSIX_Unicode.isPrint(cp) != printU.reset(str).matches() ||
3893 // cntrl
3894 POSIX_ASCII.isCntrl(cp) != cntrl.reset(str).matches() ||
3895 POSIX_Unicode.isCntrl(cp) != cntrlU.reset(str).matches() ||
3896 (Character.CONTROL == type) != cntrlP.reset(str).matches() ||
3897 // hexdigit
3898 POSIX_ASCII.isHexDigit(cp) != xdigit.reset(str).matches() ||
3899 POSIX_Unicode.isHexDigit(cp) != xdigitU.reset(str).matches() ||
3900 // space
3901 POSIX_ASCII.isSpace(cp) != space.reset(str).matches() ||
3902 POSIX_Unicode.isSpace(cp) != spaceU.reset(str).matches() ||
3903 POSIX_Unicode.isSpace(cp) != spaceP.reset(str).matches() ||
3904 // word
3905 POSIX_ASCII.isWord(cp) != word.reset(str).matches() ||
3906 POSIX_Unicode.isWord(cp) != wordU.reset(str).matches() ||
3907 POSIX_Unicode.isWord(cp) != wordEU.reset(str).matches()||
3908 // bwordb
3909 POSIX_ASCII.isWord(cp) != bwb.reset(str).matches() ||
3910 POSIX_Unicode.isWord(cp) != bwbU.reset(str).matches() ||
3911 // properties
3912 Character.isTitleCase(cp) != titleP.reset(str).matches() ||
3913 Character.isLetter(cp) != letterP.reset(str).matches()||
3914 Character.isIdeographic(cp) != ideogP.reset(str).matches() ||
3915 Character.isIdeographic(cp) != ideogJ.reset(str).matches() ||
3916 (Character.UNASSIGNED == type) == definedP.reset(str).matches() ||
shermana244eb52013-05-06 21:24:37 -07003917 POSIX_Unicode.isNoncharacterCodePoint(cp) != nonCCPP.reset(str).matches() ||
3918 POSIX_Unicode.isJoinControl(cp) != joinCrtl.reset(str).matches())
sherman85bbd8b2011-04-28 20:48:36 -07003919 failCount++;
3920 }
3921
3922 // bounds/word align
3923 twoFindIndexes(" \u0180sherman\u0400 ", bound, 1, 10);
3924 if (!bwbU.reset("\u0180sherman\u0400").matches())
3925 failCount++;
3926 twoFindIndexes(" \u0180sh\u0345erman\u0400 ", bound, 1, 11);
3927 if (!bwbU.reset("\u0180sh\u0345erman\u0400").matches())
3928 failCount++;
3929 twoFindIndexes(" \u0724\u0739\u0724 ", bound, 1, 4);
3930 if (!bwbU.reset("\u0724\u0739\u0724").matches())
3931 failCount++;
3932 if (!bwbEU.reset("\u0724\u0739\u0724").matches())
3933 failCount++;
3934 report("unicodePredefinedClasses");
3935 }
shermanecb65472012-05-08 10:57:13 -07003936
3937 private static void horizontalAndVerticalWSTest() throws Exception {
3938 String hws = new String (new char[] {
3939 0x09, 0x20, 0xa0, 0x1680, 0x180e,
3940 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005,
3941 0x2006, 0x2007, 0x2008, 0x2009, 0x200a,
3942 0x202f, 0x205f, 0x3000 });
3943 String vws = new String (new char[] {
3944 0x0a, 0x0b, 0x0c, 0x0d, 0x85, 0x2028, 0x2029 });
3945 if (!Pattern.compile("\\h+").matcher(hws).matches() ||
3946 !Pattern.compile("[\\h]+").matcher(hws).matches())
3947 failCount++;
3948 if (Pattern.compile("\\H").matcher(hws).find() ||
3949 Pattern.compile("[\\H]").matcher(hws).find())
3950 failCount++;
3951 if (!Pattern.compile("\\v+").matcher(vws).matches() ||
3952 !Pattern.compile("[\\v]+").matcher(vws).matches())
3953 failCount++;
3954 if (Pattern.compile("\\V").matcher(vws).find() ||
3955 Pattern.compile("[\\V]").matcher(vws).find())
3956 failCount++;
3957 String prefix = "abcd";
3958 String suffix = "efgh";
3959 String ng = "A";
3960 for (int i = 0; i < hws.length(); i++) {
3961 String c = String.valueOf(hws.charAt(i));
3962 Matcher m = Pattern.compile("\\h").matcher(prefix + c + suffix);
3963 if (!m.find() || !c.equals(m.group()))
3964 failCount++;
3965 m = Pattern.compile("[\\h]").matcher(prefix + c + suffix);
3966 if (!m.find() || !c.equals(m.group()))
3967 failCount++;
3968
3969 m = Pattern.compile("\\H").matcher(hws.substring(0, i) + ng + hws.substring(i));
3970 if (!m.find() || !ng.equals(m.group()))
3971 failCount++;
3972 m = Pattern.compile("[\\H]").matcher(hws.substring(0, i) + ng + hws.substring(i));
3973 if (!m.find() || !ng.equals(m.group()))
3974 failCount++;
3975 }
3976 for (int i = 0; i < vws.length(); i++) {
3977 String c = String.valueOf(vws.charAt(i));
3978 Matcher m = Pattern.compile("\\v").matcher(prefix + c + suffix);
3979 if (!m.find() || !c.equals(m.group()))
3980 failCount++;
3981 m = Pattern.compile("[\\v]").matcher(prefix + c + suffix);
3982 if (!m.find() || !c.equals(m.group()))
3983 failCount++;
3984
3985 m = Pattern.compile("\\V").matcher(vws.substring(0, i) + ng + vws.substring(i));
3986 if (!m.find() || !ng.equals(m.group()))
3987 failCount++;
3988 m = Pattern.compile("[\\V]").matcher(vws.substring(0, i) + ng + vws.substring(i));
3989 if (!m.find() || !ng.equals(m.group()))
3990 failCount++;
3991 }
3992 // \v in range is interpreted as 0x0B. This is the undocumented behavior
3993 if (!Pattern.compile("[\\v-\\v]").matcher(String.valueOf((char)0x0B)).matches())
3994 failCount++;
3995 report("horizontalAndVerticalWSTest");
3996 }
3997
3998 private static void linebreakTest() throws Exception {
3999 String linebreaks = new String (new char[] {
4000 0x0A, 0x0B, 0x0C, 0x0D, 0x85, 0x2028, 0x2029 });
4001 String crnl = "\r\n";
4002 if (!Pattern.compile("\\R+").matcher(linebreaks).matches() ||
4003 !Pattern.compile("\\R").matcher(crnl).matches() ||
4004 Pattern.compile("\\R\\R").matcher(crnl).matches())
4005 failCount++;
4006 report("linebreakTest");
4007 }
4008
sherman36e2c8f2012-08-09 10:15:26 -07004009 // #7189363
4010 private static void branchTest() throws Exception {
4011 if (!Pattern.compile("(a)?bc|d").matcher("d").find() || // greedy
4012 !Pattern.compile("(a)+bc|d").matcher("d").find() ||
4013 !Pattern.compile("(a)*bc|d").matcher("d").find() ||
4014 !Pattern.compile("(a)??bc|d").matcher("d").find() || // reluctant
4015 !Pattern.compile("(a)+?bc|d").matcher("d").find() ||
4016 !Pattern.compile("(a)*?bc|d").matcher("d").find() ||
4017 !Pattern.compile("(a)?+bc|d").matcher("d").find() || // possessive
4018 !Pattern.compile("(a)++bc|d").matcher("d").find() ||
4019 !Pattern.compile("(a)*+bc|d").matcher("d").find() ||
4020 !Pattern.compile("(a)?bc|d").matcher("d").matches() || // greedy
4021 !Pattern.compile("(a)+bc|d").matcher("d").matches() ||
4022 !Pattern.compile("(a)*bc|d").matcher("d").matches() ||
4023 !Pattern.compile("(a)??bc|d").matcher("d").matches() || // reluctant
4024 !Pattern.compile("(a)+?bc|d").matcher("d").matches() ||
4025 !Pattern.compile("(a)*?bc|d").matcher("d").matches() ||
4026 !Pattern.compile("(a)?+bc|d").matcher("d").matches() || // possessive
4027 !Pattern.compile("(a)++bc|d").matcher("d").matches() ||
4028 !Pattern.compile("(a)*+bc|d").matcher("d").matches() ||
4029 !Pattern.compile("(a)?bc|de").matcher("de").find() || // others
4030 !Pattern.compile("(a)??bc|de").matcher("de").find() ||
4031 !Pattern.compile("(a)?bc|de").matcher("de").matches() ||
4032 !Pattern.compile("(a)??bc|de").matcher("de").matches())
4033 failCount++;
4034 report("branchTest");
4035 }
4036
shermanf6f35a12013-04-26 13:59:10 -07004037 // This test is for 8007395
4038 private static void groupCurlyNotFoundSuppTest() throws Exception {
4039 String input = "test this as \ud83d\ude0d";
4040 for (String pStr : new String[] { "test(.)+(@[a-zA-Z.]+)",
4041 "test(.)*(@[a-zA-Z.]+)",
4042 "test([^B])+(@[a-zA-Z.]+)",
4043 "test([^B])*(@[a-zA-Z.]+)",
4044 "test(\\P{IsControl})+(@[a-zA-Z.]+)",
4045 "test(\\P{IsControl})*(@[a-zA-Z.]+)",
4046 }) {
4047 Matcher m = Pattern.compile(pStr, Pattern.CASE_INSENSITIVE)
4048 .matcher(input);
4049 try {
4050 if (m.find()) {
4051 failCount++;
4052 }
4053 } catch (Exception x) {
4054 failCount++;
4055 }
4056 }
4057 report("GroupCurly NotFoundSupp");
4058 }
4059
sherman95a939c2013-08-27 12:54:44 -07004060 // This test is for 8023647
4061 private static void groupCurlyBackoffTest() throws Exception {
4062 if (!"abc1c".matches("(\\w)+1\\1") ||
4063 "abc11".matches("(\\w)+1\\1")) {
4064 failCount++;
4065 }
4066 report("GroupCurly backoff");
4067 }
4068
psandoze9d4ac92013-05-01 18:40:31 +02004069 // This test is for 8012646
4070 private static void patternAsPredicate() throws Exception {
4071 Predicate<String> p = Pattern.compile("[a-z]+").asPredicate();
4072
4073 if (p.test("")) {
4074 failCount++;
4075 }
4076 if (!p.test("word")) {
4077 failCount++;
4078 }
4079 if (p.test("1234")) {
4080 failCount++;
4081 }
4082 report("Pattern.asPredicate");
4083 }
sherman0b4d42d2009-02-23 21:06:15 -08004084}