blob: e2b61d83407084c4f72eec89634361eda3bcd81e [file] [log] [blame]
sherman0b4d42d2009-02-23 21:06:15 -08001/*
ohairbf91ea12011-04-06 22:06:11 -07002 * Copyright (c) 1999, 2011, Oracle and/or its affiliates. All rights reserved.
sherman0b4d42d2009-02-23 21:06:15 -08003 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
ohair2283b9d2010-05-25 15:58:33 -07007 * published by the Free Software Foundation. Oracle designates this
sherman0b4d42d2009-02-23 21:06:15 -08008 * particular file as subject to the "Classpath" exception as provided
ohair2283b9d2010-05-25 15:58:33 -07009 * by Oracle in the LICENSE file that accompanied this code.
sherman0b4d42d2009-02-23 21:06:15 -080010 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
ohair2283b9d2010-05-25 15:58:33 -070021 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
sherman0b4d42d2009-02-23 21:06:15 -080024 */
25
26/**
27 * @test
28 * @summary tests RegExp framework
29 * @author Mike McCloskey
30 * @bug 4481568 4482696 4495089 4504687 4527731 4599621 4631553 4619345
31 * 4630911 4672616 4711773 4727935 4750573 4792284 4803197 4757029 4808962
32 * 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476
33 * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940
34 * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133
shermanb16229d2011-12-19 14:14:14 -080035 * 6350801 6676425 6878475 6919132 6931676 6948903 6990617 7014645 7039066
sherman36e2c8f2012-08-09 10:15:26 -070036 * 7067045 7014640 7189363
sherman0b4d42d2009-02-23 21:06:15 -080037 */
38
39import java.util.regex.*;
40import java.util.Random;
41import java.io.*;
42import java.util.*;
43import java.nio.CharBuffer;
44
45/**
46 * This is a test class created to check the operation of
47 * the Pattern and Matcher classes.
48 */
49public class RegExTest {
50
51 private static Random generator = new Random();
52 private static boolean failure = false;
53 private static int failCount = 0;
shermanb16229d2011-12-19 14:14:14 -080054 private static String firstFailure = null;
sherman0b4d42d2009-02-23 21:06:15 -080055
56 /**
57 * Main to interpret arguments and run several tests.
58 *
59 */
60 public static void main(String[] args) throws Exception {
61 // Most of the tests are in a file
62 processFile("TestCases.txt");
63 //processFile("PerlCases.txt");
64 processFile("BMPTestCases.txt");
65 processFile("SupplementaryTestCases.txt");
66
67 // These test many randomly generated char patterns
68 bm();
69 slice();
70
71 // These are hard to put into the file
72 escapes();
73 blankInput();
74
75 // Substitition tests on randomly generated sequences
76 globalSubstitute();
77 stringbufferSubstitute();
78 substitutionBasher();
79
80 // Canonical Equivalence
81 ceTest();
82
83 // Anchors
84 anchorTest();
85
86 // boolean match calls
87 matchesTest();
88 lookingAtTest();
89
90 // Pattern API
91 patternMatchesTest();
92
93 // Misc
94 lookbehindTest();
95 nullArgumentTest();
96 backRefTest();
97 groupCaptureTest();
98 caretTest();
99 charClassTest();
100 emptyPatternTest();
101 findIntTest();
102 group0Test();
103 longPatternTest();
104 octalTest();
105 ampersandTest();
106 negationTest();
107 splitTest();
108 appendTest();
109 caseFoldingTest();
110 commentsTest();
111 unixLinesTest();
112 replaceFirstTest();
113 gTest();
114 zTest();
115 serializeTest();
116 reluctantRepetitionTest();
117 multilineDollarTest();
118 dollarAtEndTest();
119 caretBetweenTerminatorsTest();
120 // This RFE rejected in Tiger numOccurrencesTest();
121 javaCharClassTest();
122 nonCaptureRepetitionTest();
123 notCapturedGroupCurlyMatchTest();
124 escapedSegmentTest();
125 literalPatternTest();
126 literalReplacementTest();
127 regionTest();
128 toStringTest();
129 negatedCharClassTest();
130 findFromTest();
131 boundsTest();
132 unicodeWordBoundsTest();
133 caretAtEndTest();
134 wordSearchTest();
135 hitEndTest();
136 toMatchResultTest();
137 surrogatesInClassTest();
shermanb16229d2011-12-19 14:14:14 -0800138 removeQEQuotingTest();
sherman0b4d42d2009-02-23 21:06:15 -0800139 namedGroupCaptureTest();
sherman6782c962010-02-05 00:10:42 -0800140 nonBmpClassComplementTest();
shermancc01ef52010-05-18 15:36:47 -0700141 unicodePropertiesTest();
shermanf03c78b2011-02-03 13:49:25 -0800142 unicodeHexNotationTest();
sherman85bbd8b2011-04-28 20:48:36 -0700143 unicodeClassesTest();
shermanecb65472012-05-08 10:57:13 -0700144 horizontalAndVerticalWSTest();
145 linebreakTest();
sherman36e2c8f2012-08-09 10:15:26 -0700146 branchTest();
shermanb16229d2011-12-19 14:14:14 -0800147 if (failure) {
148 throw new
149 RuntimeException("RegExTest failed, 1st failure: " +
150 firstFailure);
151 } else {
sherman0b4d42d2009-02-23 21:06:15 -0800152 System.err.println("OKAY: All tests passed.");
shermanb16229d2011-12-19 14:14:14 -0800153 }
sherman0b4d42d2009-02-23 21:06:15 -0800154 }
155
156 // Utility functions
157
158 private static String getRandomAlphaString(int length) {
159 StringBuffer buf = new StringBuffer(length);
160 for (int i=0; i<length; i++) {
161 char randChar = (char)(97 + generator.nextInt(26));
162 buf.append(randChar);
163 }
164 return buf.toString();
165 }
166
167 private static void check(Matcher m, String expected) {
168 m.find();
169 if (!m.group().equals(expected))
170 failCount++;
171 }
172
173 private static void check(Matcher m, String result, boolean expected) {
174 m.find();
shermanf03c78b2011-02-03 13:49:25 -0800175 if (m.group().equals(result) != expected)
176 failCount++;
sherman0b4d42d2009-02-23 21:06:15 -0800177 }
178
179 private static void check(Pattern p, String s, boolean expected) {
shermanf03c78b2011-02-03 13:49:25 -0800180 if (p.matcher(s).find() != expected)
181 failCount++;
182 }
183
184 private static void check(String p, String s, boolean expected) {
185 Matcher matcher = Pattern.compile(p).matcher(s);
186 if (matcher.find() != expected)
187 failCount++;
sherman0b4d42d2009-02-23 21:06:15 -0800188 }
189
190 private static void check(String p, char c, boolean expected) {
191 String propertyPattern = expected ? "\\p" + p : "\\P" + p;
192 Pattern pattern = Pattern.compile(propertyPattern);
193 char[] ca = new char[1]; ca[0] = c;
194 Matcher matcher = pattern.matcher(new String(ca));
195 if (!matcher.find())
196 failCount++;
197 }
198
199 private static void check(String p, int codePoint, boolean expected) {
200 String propertyPattern = expected ? "\\p" + p : "\\P" + p;
201 Pattern pattern = Pattern.compile(propertyPattern);
202 char[] ca = Character.toChars(codePoint);
203 Matcher matcher = pattern.matcher(new String(ca));
204 if (!matcher.find())
205 failCount++;
206 }
207
208 private static void check(String p, int flag, String input, String s,
209 boolean expected)
210 {
211 Pattern pattern = Pattern.compile(p, flag);
212 Matcher matcher = pattern.matcher(input);
213 if (expected)
214 check(matcher, s, expected);
215 else
216 check(pattern, input, false);
217 }
218
219 private static void report(String testName) {
220 int spacesToAdd = 30 - testName.length();
221 StringBuffer paddedNameBuffer = new StringBuffer(testName);
222 for (int i=0; i<spacesToAdd; i++)
223 paddedNameBuffer.append(" ");
224 String paddedName = paddedNameBuffer.toString();
225 System.err.println(paddedName + ": " +
226 (failCount==0 ? "Passed":"Failed("+failCount+")"));
shermanb16229d2011-12-19 14:14:14 -0800227 if (failCount > 0) {
sherman0b4d42d2009-02-23 21:06:15 -0800228 failure = true;
shermanb16229d2011-12-19 14:14:14 -0800229
230 if (firstFailure == null) {
231 firstFailure = testName;
232 }
233 }
234
sherman0b4d42d2009-02-23 21:06:15 -0800235 failCount = 0;
236 }
237
238 /**
239 * Converts ASCII alphabet characters [A-Za-z] in the given 's' to
240 * supplementary characters. This method does NOT fully take care
241 * of the regex syntax.
242 */
243 private static String toSupplementaries(String s) {
244 int length = s.length();
245 StringBuffer sb = new StringBuffer(length * 2);
246
247 for (int i = 0; i < length; ) {
248 char c = s.charAt(i++);
249 if (c == '\\') {
250 sb.append(c);
251 if (i < length) {
252 c = s.charAt(i++);
253 sb.append(c);
254 if (c == 'u') {
255 // assume no syntax error
256 sb.append(s.charAt(i++));
257 sb.append(s.charAt(i++));
258 sb.append(s.charAt(i++));
259 sb.append(s.charAt(i++));
260 }
261 }
262 } else if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) {
263 sb.append('\ud800').append((char)('\udc00'+c));
264 } else {
265 sb.append(c);
266 }
267 }
268 return sb.toString();
269 }
270
271 // Regular expression tests
272
273 // This is for bug 6178785
274 // Test if an expected NPE gets thrown when passing in a null argument
275 private static boolean check(Runnable test) {
276 try {
277 test.run();
278 failCount++;
279 return false;
280 } catch (NullPointerException npe) {
281 return true;
282 }
283 }
284
285 private static void nullArgumentTest() {
286 check(new Runnable() { public void run() { Pattern.compile(null); }});
287 check(new Runnable() { public void run() { Pattern.matches(null, null); }});
288 check(new Runnable() { public void run() { Pattern.matches("xyz", null);}});
289 check(new Runnable() { public void run() { Pattern.quote(null);}});
290 check(new Runnable() { public void run() { Pattern.compile("xyz").split(null);}});
291 check(new Runnable() { public void run() { Pattern.compile("xyz").matcher(null);}});
292
293 final Matcher m = Pattern.compile("xyz").matcher("xyz");
294 m.matches();
295 check(new Runnable() { public void run() { m.appendTail(null);}});
296 check(new Runnable() { public void run() { m.replaceAll(null);}});
297 check(new Runnable() { public void run() { m.replaceFirst(null);}});
298 check(new Runnable() { public void run() { m.appendReplacement(null, null);}});
299 check(new Runnable() { public void run() { m.reset(null);}});
300 check(new Runnable() { public void run() { Matcher.quoteReplacement(null);}});
301 //check(new Runnable() { public void run() { m.usePattern(null);}});
302
303 report("Null Argument");
304 }
305
306 // This is for bug6635133
307 // Test if surrogate pair in Unicode escapes can be handled correctly.
308 private static void surrogatesInClassTest() throws Exception {
309 Pattern pattern = Pattern.compile("[\\ud834\\udd21-\\ud834\\udd24]");
310 Matcher matcher = pattern.matcher("\ud834\udd22");
311 if (!matcher.find())
312 failCount++;
shermanb16229d2011-12-19 14:14:14 -0800313
314 report("Surrogate pair in Unicode escape");
315 }
316
317 // This is for bug6990617
318 // Test if Pattern.RemoveQEQuoting works correctly if the octal unicode
319 // char encoding is only 2 or 3 digits instead of 4 and the first quoted
320 // char is an octal digit.
321 private static void removeQEQuotingTest() throws Exception {
322 Pattern pattern =
323 Pattern.compile("\\011\\Q1sometext\\E\\011\\Q2sometext\\E");
324 Matcher matcher = pattern.matcher("\t1sometext\t2sometext");
325 if (!matcher.find())
326 failCount++;
327
328 report("Remove Q/E Quoting");
sherman0b4d42d2009-02-23 21:06:15 -0800329 }
330
331 // This is for bug 4988891
332 // Test toMatchResult to see that it is a copy of the Matcher
333 // that is not affected by subsequent operations on the original
334 private static void toMatchResultTest() throws Exception {
335 Pattern pattern = Pattern.compile("squid");
336 Matcher matcher = pattern.matcher(
337 "agiantsquidofdestinyasmallsquidoffate");
338 matcher.find();
339 int matcherStart1 = matcher.start();
340 MatchResult mr = matcher.toMatchResult();
341 if (mr == matcher)
342 failCount++;
343 int resultStart1 = mr.start();
344 if (matcherStart1 != resultStart1)
345 failCount++;
346 matcher.find();
347 int matcherStart2 = matcher.start();
348 int resultStart2 = mr.start();
349 if (matcherStart2 == resultStart2)
350 failCount++;
351 if (resultStart1 != resultStart2)
352 failCount++;
353 MatchResult mr2 = matcher.toMatchResult();
354 if (mr == mr2)
355 failCount++;
356 if (mr2.start() != matcherStart2)
357 failCount++;
358 report("toMatchResult is a copy");
359 }
360
361 // This is for bug 5013885
362 // Must test a slice to see if it reports hitEnd correctly
363 private static void hitEndTest() throws Exception {
364 // Basic test of Slice node
365 Pattern p = Pattern.compile("^squidattack");
366 Matcher m = p.matcher("squack");
367 m.find();
368 if (m.hitEnd())
369 failCount++;
370 m.reset("squid");
371 m.find();
372 if (!m.hitEnd())
373 failCount++;
374
375 // Test Slice, SliceA and SliceU nodes
376 for (int i=0; i<3; i++) {
377 int flags = 0;
378 if (i==1) flags = Pattern.CASE_INSENSITIVE;
379 if (i==2) flags = Pattern.UNICODE_CASE;
380 p = Pattern.compile("^abc", flags);
381 m = p.matcher("ad");
382 m.find();
383 if (m.hitEnd())
384 failCount++;
385 m.reset("ab");
386 m.find();
387 if (!m.hitEnd())
388 failCount++;
389 }
390
391 // Test Boyer-Moore node
392 p = Pattern.compile("catattack");
393 m = p.matcher("attack");
394 m.find();
395 if (!m.hitEnd())
396 failCount++;
397
398 p = Pattern.compile("catattack");
399 m = p.matcher("attackattackattackcatatta");
400 m.find();
401 if (!m.hitEnd())
402 failCount++;
sherman0b4d42d2009-02-23 21:06:15 -0800403 report("hitEnd from a Slice");
404 }
405
406 // This is for bug 4997476
407 // It is weird code submitted by customer demonstrating a regression
408 private static void wordSearchTest() throws Exception {
409 String testString = new String("word1 word2 word3");
410 Pattern p = Pattern.compile("\\b");
411 Matcher m = p.matcher(testString);
412 int position = 0;
413 int start = 0;
414 while (m.find(position)) {
415 start = m.start();
416 if (start == testString.length())
417 break;
418 if (m.find(start+1)) {
419 position = m.start();
420 } else {
421 position = testString.length();
422 }
423 if (testString.substring(start, position).equals(" "))
424 continue;
425 if (!testString.substring(start, position-1).startsWith("word"))
426 failCount++;
427 }
428 report("Customer word search");
429 }
430
431 // This is for bug 4994840
432 private static void caretAtEndTest() throws Exception {
433 // Problem only occurs with multiline patterns
434 // containing a beginning-of-line caret "^" followed
435 // by an expression that also matches the empty string.
436 Pattern pattern = Pattern.compile("^x?", Pattern.MULTILINE);
437 Matcher matcher = pattern.matcher("\r");
438 matcher.find();
439 matcher.find();
440 report("Caret at end");
441 }
442
443 // This test is for 4979006
444 // Check to see if word boundary construct properly handles unicode
445 // non spacing marks
446 private static void unicodeWordBoundsTest() throws Exception {
447 String spaces = " ";
448 String wordChar = "a";
449 String nsm = "\u030a";
450
451 assert (Character.getType('\u030a') == Character.NON_SPACING_MARK);
452
453 Pattern pattern = Pattern.compile("\\b");
454 Matcher matcher = pattern.matcher("");
455 // S=other B=word character N=non spacing mark .=word boundary
456 // SS.BB.SS
457 String input = spaces + wordChar + wordChar + spaces;
458 twoFindIndexes(input, matcher, 2, 4);
459 // SS.BBN.SS
460 input = spaces + wordChar +wordChar + nsm + spaces;
461 twoFindIndexes(input, matcher, 2, 5);
462 // SS.BN.SS
463 input = spaces + wordChar + nsm + spaces;
464 twoFindIndexes(input, matcher, 2, 4);
465 // SS.BNN.SS
466 input = spaces + wordChar + nsm + nsm + spaces;
467 twoFindIndexes(input, matcher, 2, 5);
468 // SSN.BB.SS
469 input = spaces + nsm + wordChar + wordChar + spaces;
470 twoFindIndexes(input, matcher, 3, 5);
471 // SS.BNB.SS
472 input = spaces + wordChar + nsm + wordChar + spaces;
473 twoFindIndexes(input, matcher, 2, 5);
474 // SSNNSS
475 input = spaces + nsm + nsm + spaces;
476 matcher.reset(input);
477 if (matcher.find())
478 failCount++;
479 // SSN.BBN.SS
480 input = spaces + nsm + wordChar + wordChar + nsm + spaces;
481 twoFindIndexes(input, matcher, 3, 6);
482
483 report("Unicode word boundary");
484 }
485
486 private static void twoFindIndexes(String input, Matcher matcher, int a,
487 int b) throws Exception
488 {
489 matcher.reset(input);
490 matcher.find();
491 if (matcher.start() != a)
492 failCount++;
493 matcher.find();
494 if (matcher.start() != b)
495 failCount++;
496 }
497
498 // This test is for 6284152
499 static void check(String regex, String input, String[] expected) {
500 List<String> result = new ArrayList<String>();
501 Pattern p = Pattern.compile(regex);
502 Matcher m = p.matcher(input);
503 while (m.find()) {
504 result.add(m.group());
505 }
506 if (!Arrays.asList(expected).equals(result))
507 failCount++;
508 }
509
510 private static void lookbehindTest() throws Exception {
511 //Positive
512 check("(?<=%.{0,5})foo\\d",
513 "%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5",
514 new String[]{"foo1", "foo2", "foo3"});
515
516 //boundary at end of the lookbehind sub-regex should work consistently
517 //with the boundary just after the lookbehind sub-regex
518 check("(?<=.*\\b)foo", "abcd foo", new String[]{"foo"});
519 check("(?<=.*)\\bfoo", "abcd foo", new String[]{"foo"});
520 check("(?<!abc )\\bfoo", "abc foo", new String[0]);
521 check("(?<!abc \\b)foo", "abc foo", new String[0]);
522
523 //Negative
524 check("(?<!%.{0,5})foo\\d",
525 "%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5",
526 new String[] {"foo4", "foo5"});
527
528 //Positive greedy
529 check("(?<=%b{1,4})foo", "%bbbbfoo", new String[] {"foo"});
530
531 //Positive reluctant
532 check("(?<=%b{1,4}?)foo", "%bbbbfoo", new String[] {"foo"});
533
534 //supplementary
535 check("(?<=%b{1,4})fo\ud800\udc00o", "%bbbbfo\ud800\udc00o",
536 new String[] {"fo\ud800\udc00o"});
537 check("(?<=%b{1,4}?)fo\ud800\udc00o", "%bbbbfo\ud800\udc00o",
538 new String[] {"fo\ud800\udc00o"});
539 check("(?<!%b{1,4})fo\ud800\udc00o", "%afo\ud800\udc00o",
540 new String[] {"fo\ud800\udc00o"});
541 check("(?<!%b{1,4}?)fo\ud800\udc00o", "%afo\ud800\udc00o",
542 new String[] {"fo\ud800\udc00o"});
543 report("Lookbehind");
544 }
545
546 // This test is for 4938995
547 // Check to see if weak region boundaries are transparent to
548 // lookahead and lookbehind constructs
549 private static void boundsTest() throws Exception {
550 String fullMessage = "catdogcat";
551 Pattern pattern = Pattern.compile("(?<=cat)dog(?=cat)");
552 Matcher matcher = pattern.matcher("catdogca");
553 matcher.useTransparentBounds(true);
554 if (matcher.find())
555 failCount++;
556 matcher.reset("atdogcat");
557 if (matcher.find())
558 failCount++;
559 matcher.reset(fullMessage);
560 if (!matcher.find())
561 failCount++;
562 matcher.reset(fullMessage);
563 matcher.region(0,9);
564 if (!matcher.find())
565 failCount++;
566 matcher.reset(fullMessage);
567 matcher.region(0,6);
568 if (!matcher.find())
569 failCount++;
570 matcher.reset(fullMessage);
571 matcher.region(3,6);
572 if (!matcher.find())
573 failCount++;
574 matcher.useTransparentBounds(false);
575 if (matcher.find())
576 failCount++;
577
578 // Negative lookahead/lookbehind
579 pattern = Pattern.compile("(?<!cat)dog(?!cat)");
580 matcher = pattern.matcher("dogcat");
581 matcher.useTransparentBounds(true);
582 matcher.region(0,3);
583 if (matcher.find())
584 failCount++;
585 matcher.reset("catdog");
586 matcher.region(3,6);
587 if (matcher.find())
588 failCount++;
589 matcher.useTransparentBounds(false);
590 matcher.reset("dogcat");
591 matcher.region(0,3);
592 if (!matcher.find())
593 failCount++;
594 matcher.reset("catdog");
595 matcher.region(3,6);
596 if (!matcher.find())
597 failCount++;
598
599 report("Region bounds transparency");
600 }
601
602 // This test is for 4945394
603 private static void findFromTest() throws Exception {
604 String message = "This is 40 $0 message.";
605 Pattern pat = Pattern.compile("\\$0");
606 Matcher match = pat.matcher(message);
607 if (!match.find())
608 failCount++;
609 if (match.find())
610 failCount++;
611 if (match.find())
612 failCount++;
613 report("Check for alternating find");
614 }
615
616 // This test is for 4872664 and 4892980
617 private static void negatedCharClassTest() throws Exception {
618 Pattern pattern = Pattern.compile("[^>]");
619 Matcher matcher = pattern.matcher("\u203A");
620 if (!matcher.matches())
621 failCount++;
622 pattern = Pattern.compile("[^fr]");
623 matcher = pattern.matcher("a");
624 if (!matcher.find())
625 failCount++;
626 matcher.reset("\u203A");
627 if (!matcher.find())
628 failCount++;
629 String s = "for";
630 String result[] = s.split("[^fr]");
631 if (!result[0].equals("f"))
632 failCount++;
633 if (!result[1].equals("r"))
634 failCount++;
635 s = "f\u203Ar";
636 result = s.split("[^fr]");
637 if (!result[0].equals("f"))
638 failCount++;
639 if (!result[1].equals("r"))
640 failCount++;
641
642 // Test adding to bits, subtracting a node, then adding to bits again
643 pattern = Pattern.compile("[^f\u203Ar]");
644 matcher = pattern.matcher("a");
645 if (!matcher.find())
646 failCount++;
647 matcher.reset("f");
648 if (matcher.find())
649 failCount++;
650 matcher.reset("\u203A");
651 if (matcher.find())
652 failCount++;
653 matcher.reset("r");
654 if (matcher.find())
655 failCount++;
656 matcher.reset("\u203B");
657 if (!matcher.find())
658 failCount++;
659
660 // Test subtracting a node, adding to bits, subtracting again
661 pattern = Pattern.compile("[^\u203Ar\u203B]");
662 matcher = pattern.matcher("a");
663 if (!matcher.find())
664 failCount++;
665 matcher.reset("\u203A");
666 if (matcher.find())
667 failCount++;
668 matcher.reset("r");
669 if (matcher.find())
670 failCount++;
671 matcher.reset("\u203B");
672 if (matcher.find())
673 failCount++;
674 matcher.reset("\u203C");
675 if (!matcher.find())
676 failCount++;
677
678 report("Negated Character Class");
679 }
680
681 // This test is for 4628291
682 private static void toStringTest() throws Exception {
683 Pattern pattern = Pattern.compile("b+");
684 if (pattern.toString() != "b+")
685 failCount++;
686 Matcher matcher = pattern.matcher("aaabbbccc");
687 String matcherString = matcher.toString(); // unspecified
688 matcher.find();
689 matcherString = matcher.toString(); // unspecified
690 matcher.region(0,3);
691 matcherString = matcher.toString(); // unspecified
692 matcher.reset();
693 matcherString = matcher.toString(); // unspecified
694 report("toString");
695 }
696
697 // This test is for 4808962
698 private static void literalPatternTest() throws Exception {
699 int flags = Pattern.LITERAL;
700
701 Pattern pattern = Pattern.compile("abc\\t$^", flags);
702 check(pattern, "abc\\t$^", true);
703
704 pattern = Pattern.compile(Pattern.quote("abc\\t$^"));
705 check(pattern, "abc\\t$^", true);
706
707 pattern = Pattern.compile("\\Qa^$bcabc\\E", flags);
708 check(pattern, "\\Qa^$bcabc\\E", true);
709 check(pattern, "a^$bcabc", false);
710
711 pattern = Pattern.compile("\\\\Q\\\\E");
712 check(pattern, "\\Q\\E", true);
713
714 pattern = Pattern.compile("\\Qabc\\Eefg\\\\Q\\\\Ehij");
715 check(pattern, "abcefg\\Q\\Ehij", true);
716
717 pattern = Pattern.compile("\\\\\\Q\\\\E");
718 check(pattern, "\\\\\\\\", true);
719
720 pattern = Pattern.compile(Pattern.quote("\\Qa^$bcabc\\E"));
721 check(pattern, "\\Qa^$bcabc\\E", true);
722 check(pattern, "a^$bcabc", false);
723
724 pattern = Pattern.compile(Pattern.quote("\\Qabc\\Edef"));
725 check(pattern, "\\Qabc\\Edef", true);
726 check(pattern, "abcdef", false);
727
728 pattern = Pattern.compile(Pattern.quote("abc\\Edef"));
729 check(pattern, "abc\\Edef", true);
730 check(pattern, "abcdef", false);
731
732 pattern = Pattern.compile(Pattern.quote("\\E"));
733 check(pattern, "\\E", true);
734
735 pattern = Pattern.compile("((((abc.+?:)", flags);
736 check(pattern, "((((abc.+?:)", true);
737
738 flags |= Pattern.MULTILINE;
739
740 pattern = Pattern.compile("^cat$", flags);
741 check(pattern, "abc^cat$def", true);
742 check(pattern, "cat", false);
743
744 flags |= Pattern.CASE_INSENSITIVE;
745
746 pattern = Pattern.compile("abcdef", flags);
747 check(pattern, "ABCDEF", true);
748 check(pattern, "AbCdEf", true);
749
750 flags |= Pattern.DOTALL;
751
752 pattern = Pattern.compile("a...b", flags);
753 check(pattern, "A...b", true);
754 check(pattern, "Axxxb", false);
755
756 flags |= Pattern.CANON_EQ;
757
758 Pattern p = Pattern.compile("testa\u030a", flags);
759 check(pattern, "testa\u030a", false);
760 check(pattern, "test\u00e5", false);
761
762 // Supplementary character test
763 flags = Pattern.LITERAL;
764
765 pattern = Pattern.compile(toSupplementaries("abc\\t$^"), flags);
766 check(pattern, toSupplementaries("abc\\t$^"), true);
767
768 pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\t$^")));
769 check(pattern, toSupplementaries("abc\\t$^"), true);
770
771 pattern = Pattern.compile(toSupplementaries("\\Qa^$bcabc\\E"), flags);
772 check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true);
773 check(pattern, toSupplementaries("a^$bcabc"), false);
774
775 pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qa^$bcabc\\E")));
776 check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true);
777 check(pattern, toSupplementaries("a^$bcabc"), false);
778
779 pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qabc\\Edef")));
780 check(pattern, toSupplementaries("\\Qabc\\Edef"), true);
781 check(pattern, toSupplementaries("abcdef"), false);
782
783 pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\Edef")));
784 check(pattern, toSupplementaries("abc\\Edef"), true);
785 check(pattern, toSupplementaries("abcdef"), false);
786
787 pattern = Pattern.compile(toSupplementaries("((((abc.+?:)"), flags);
788 check(pattern, toSupplementaries("((((abc.+?:)"), true);
789
790 flags |= Pattern.MULTILINE;
791
792 pattern = Pattern.compile(toSupplementaries("^cat$"), flags);
793 check(pattern, toSupplementaries("abc^cat$def"), true);
794 check(pattern, toSupplementaries("cat"), false);
795
796 flags |= Pattern.DOTALL;
797
798 // note: this is case-sensitive.
799 pattern = Pattern.compile(toSupplementaries("a...b"), flags);
800 check(pattern, toSupplementaries("a...b"), true);
801 check(pattern, toSupplementaries("axxxb"), false);
802
803 flags |= Pattern.CANON_EQ;
804
805 String t = toSupplementaries("test");
806 p = Pattern.compile(t + "a\u030a", flags);
807 check(pattern, t + "a\u030a", false);
808 check(pattern, t + "\u00e5", false);
809
810 report("Literal pattern");
811 }
812
813 // This test is for 4803179
814 // This test is also for 4808962, replacement parts
815 private static void literalReplacementTest() throws Exception {
816 int flags = Pattern.LITERAL;
817
818 Pattern pattern = Pattern.compile("abc", flags);
819 Matcher matcher = pattern.matcher("zzzabczzz");
820 String replaceTest = "$0";
821 String result = matcher.replaceAll(replaceTest);
822 if (!result.equals("zzzabczzz"))
823 failCount++;
824
825 matcher.reset();
826 String literalReplacement = matcher.quoteReplacement(replaceTest);
827 result = matcher.replaceAll(literalReplacement);
828 if (!result.equals("zzz$0zzz"))
829 failCount++;
830
831 matcher.reset();
832 replaceTest = "\\t$\\$";
833 literalReplacement = matcher.quoteReplacement(replaceTest);
834 result = matcher.replaceAll(literalReplacement);
835 if (!result.equals("zzz\\t$\\$zzz"))
836 failCount++;
837
838 // Supplementary character test
839 pattern = Pattern.compile(toSupplementaries("abc"), flags);
840 matcher = pattern.matcher(toSupplementaries("zzzabczzz"));
841 replaceTest = "$0";
842 result = matcher.replaceAll(replaceTest);
843 if (!result.equals(toSupplementaries("zzzabczzz")))
844 failCount++;
845
846 matcher.reset();
847 literalReplacement = matcher.quoteReplacement(replaceTest);
848 result = matcher.replaceAll(literalReplacement);
849 if (!result.equals(toSupplementaries("zzz$0zzz")))
850 failCount++;
851
852 matcher.reset();
853 replaceTest = "\\t$\\$";
854 literalReplacement = matcher.quoteReplacement(replaceTest);
855 result = matcher.replaceAll(literalReplacement);
856 if (!result.equals(toSupplementaries("zzz\\t$\\$zzz")))
857 failCount++;
858
sherman5c8f3492012-04-12 15:01:41 -0700859 // IAE should be thrown if backslash or '$' is the last character
860 // in replacement string
861 try {
862 "\uac00".replaceAll("\uac00", "$");
shermanecb65472012-05-08 10:57:13 -0700863 failCount++;
864 } catch (IllegalArgumentException iie) {
865 } catch (Exception e) {
866 failCount++;
867 }
868 try {
sherman5c8f3492012-04-12 15:01:41 -0700869 "\uac00".replaceAll("\uac00", "\\");
870 failCount++;
871 } catch (IllegalArgumentException iie) {
872 } catch (Exception e) {
873 failCount++;
874 }
sherman0b4d42d2009-02-23 21:06:15 -0800875 report("Literal replacement");
876 }
877
878 // This test is for 4757029
879 private static void regionTest() throws Exception {
880 Pattern pattern = Pattern.compile("abc");
881 Matcher matcher = pattern.matcher("abcdefabc");
882
883 matcher.region(0,9);
884 if (!matcher.find())
885 failCount++;
886 if (!matcher.find())
887 failCount++;
888 matcher.region(0,3);
889 if (!matcher.find())
890 failCount++;
891 matcher.region(3,6);
892 if (matcher.find())
893 failCount++;
894 matcher.region(0,2);
895 if (matcher.find())
896 failCount++;
897
898 expectRegionFail(matcher, 1, -1);
899 expectRegionFail(matcher, -1, -1);
900 expectRegionFail(matcher, -1, 1);
901 expectRegionFail(matcher, 5, 3);
902 expectRegionFail(matcher, 5, 12);
903 expectRegionFail(matcher, 12, 12);
904
905 pattern = Pattern.compile("^abc$");
906 matcher = pattern.matcher("zzzabczzz");
907 matcher.region(0,9);
908 if (matcher.find())
909 failCount++;
910 matcher.region(3,6);
911 if (!matcher.find())
912 failCount++;
913 matcher.region(3,6);
914 matcher.useAnchoringBounds(false);
915 if (matcher.find())
916 failCount++;
917
918 // Supplementary character test
919 pattern = Pattern.compile(toSupplementaries("abc"));
920 matcher = pattern.matcher(toSupplementaries("abcdefabc"));
921 matcher.region(0,9*2);
922 if (!matcher.find())
923 failCount++;
924 if (!matcher.find())
925 failCount++;
926 matcher.region(0,3*2);
927 if (!matcher.find())
928 failCount++;
929 matcher.region(1,3*2);
930 if (matcher.find())
931 failCount++;
932 matcher.region(3*2,6*2);
933 if (matcher.find())
934 failCount++;
935 matcher.region(0,2*2);
936 if (matcher.find())
937 failCount++;
938 matcher.region(0,2*2+1);
939 if (matcher.find())
940 failCount++;
941
942 expectRegionFail(matcher, 1*2, -1);
943 expectRegionFail(matcher, -1, -1);
944 expectRegionFail(matcher, -1, 1*2);
945 expectRegionFail(matcher, 5*2, 3*2);
946 expectRegionFail(matcher, 5*2, 12*2);
947 expectRegionFail(matcher, 12*2, 12*2);
948
949 pattern = Pattern.compile(toSupplementaries("^abc$"));
950 matcher = pattern.matcher(toSupplementaries("zzzabczzz"));
951 matcher.region(0,9*2);
952 if (matcher.find())
953 failCount++;
954 matcher.region(3*2,6*2);
955 if (!matcher.find())
956 failCount++;
957 matcher.region(3*2+1,6*2);
958 if (matcher.find())
959 failCount++;
960 matcher.region(3*2,6*2-1);
961 if (matcher.find())
962 failCount++;
963 matcher.region(3*2,6*2);
964 matcher.useAnchoringBounds(false);
965 if (matcher.find())
966 failCount++;
967 report("Regions");
968 }
969
970 private static void expectRegionFail(Matcher matcher, int index1,
971 int index2)
972 {
973 try {
974 matcher.region(index1, index2);
975 failCount++;
976 } catch (IndexOutOfBoundsException ioobe) {
977 // Correct result
978 } catch (IllegalStateException ise) {
979 // Correct result
980 }
981 }
982
983 // This test is for 4803197
984 private static void escapedSegmentTest() throws Exception {
985
986 Pattern pattern = Pattern.compile("\\Qdir1\\dir2\\E");
987 check(pattern, "dir1\\dir2", true);
988
989 pattern = Pattern.compile("\\Qdir1\\dir2\\\\E");
990 check(pattern, "dir1\\dir2\\", true);
991
992 pattern = Pattern.compile("(\\Qdir1\\dir2\\\\E)");
993 check(pattern, "dir1\\dir2\\", true);
994
995 // Supplementary character test
996 pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2\\E"));
997 check(pattern, toSupplementaries("dir1\\dir2"), true);
998
999 pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2")+"\\\\E");
1000 check(pattern, toSupplementaries("dir1\\dir2\\"), true);
1001
1002 pattern = Pattern.compile(toSupplementaries("(\\Qdir1\\dir2")+"\\\\E)");
1003 check(pattern, toSupplementaries("dir1\\dir2\\"), true);
1004
1005 report("Escaped segment");
1006 }
1007
1008 // This test is for 4792284
1009 private static void nonCaptureRepetitionTest() throws Exception {
1010 String input = "abcdefgh;";
1011
1012 String[] patterns = new String[] {
1013 "(?:\\w{4})+;",
1014 "(?:\\w{8})*;",
1015 "(?:\\w{2}){2,4};",
1016 "(?:\\w{4}){2,};", // only matches the
1017 ".*?(?:\\w{5})+;", // specified minimum
1018 ".*?(?:\\w{9})*;", // number of reps - OK
1019 "(?:\\w{4})+?;", // lazy repetition - OK
1020 "(?:\\w{4})++;", // possessive repetition - OK
1021 "(?:\\w{2,}?)+;", // non-deterministic - OK
1022 "(\\w{4})+;", // capturing group - OK
1023 };
1024
1025 for (int i = 0; i < patterns.length; i++) {
1026 // Check find()
1027 check(patterns[i], 0, input, input, true);
1028 // Check matches()
1029 Pattern p = Pattern.compile(patterns[i]);
1030 Matcher m = p.matcher(input);
1031
1032 if (m.matches()) {
1033 if (!m.group(0).equals(input))
1034 failCount++;
1035 } else {
1036 failCount++;
1037 }
1038 }
1039
1040 report("Non capturing repetition");
1041 }
1042
1043 // This test is for 6358731
1044 private static void notCapturedGroupCurlyMatchTest() throws Exception {
1045 Pattern pattern = Pattern.compile("(abc)+|(abcd)+");
1046 Matcher matcher = pattern.matcher("abcd");
1047 if (!matcher.matches() ||
1048 matcher.group(1) != null ||
1049 !matcher.group(2).equals("abcd")) {
1050 failCount++;
1051 }
1052 report("Not captured GroupCurly");
1053 }
1054
1055 // This test is for 4706545
1056 private static void javaCharClassTest() throws Exception {
1057 for (int i=0; i<1000; i++) {
1058 char c = (char)generator.nextInt();
1059 check("{javaLowerCase}", c, Character.isLowerCase(c));
1060 check("{javaUpperCase}", c, Character.isUpperCase(c));
1061 check("{javaUpperCase}+", c, Character.isUpperCase(c));
1062 check("{javaTitleCase}", c, Character.isTitleCase(c));
1063 check("{javaDigit}", c, Character.isDigit(c));
1064 check("{javaDefined}", c, Character.isDefined(c));
1065 check("{javaLetter}", c, Character.isLetter(c));
1066 check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c));
1067 check("{javaJavaIdentifierStart}", c,
1068 Character.isJavaIdentifierStart(c));
1069 check("{javaJavaIdentifierPart}", c,
1070 Character.isJavaIdentifierPart(c));
1071 check("{javaUnicodeIdentifierStart}", c,
1072 Character.isUnicodeIdentifierStart(c));
1073 check("{javaUnicodeIdentifierPart}", c,
1074 Character.isUnicodeIdentifierPart(c));
1075 check("{javaIdentifierIgnorable}", c,
1076 Character.isIdentifierIgnorable(c));
1077 check("{javaSpaceChar}", c, Character.isSpaceChar(c));
1078 check("{javaWhitespace}", c, Character.isWhitespace(c));
1079 check("{javaISOControl}", c, Character.isISOControl(c));
1080 check("{javaMirrored}", c, Character.isMirrored(c));
1081
1082 }
1083
1084 // Supplementary character test
1085 for (int i=0; i<1000; i++) {
1086 int c = generator.nextInt(Character.MAX_CODE_POINT
1087 - Character.MIN_SUPPLEMENTARY_CODE_POINT)
1088 + Character.MIN_SUPPLEMENTARY_CODE_POINT;
1089 check("{javaLowerCase}", c, Character.isLowerCase(c));
1090 check("{javaUpperCase}", c, Character.isUpperCase(c));
1091 check("{javaUpperCase}+", c, Character.isUpperCase(c));
1092 check("{javaTitleCase}", c, Character.isTitleCase(c));
1093 check("{javaDigit}", c, Character.isDigit(c));
1094 check("{javaDefined}", c, Character.isDefined(c));
1095 check("{javaLetter}", c, Character.isLetter(c));
1096 check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c));
1097 check("{javaJavaIdentifierStart}", c,
1098 Character.isJavaIdentifierStart(c));
1099 check("{javaJavaIdentifierPart}", c,
1100 Character.isJavaIdentifierPart(c));
1101 check("{javaUnicodeIdentifierStart}", c,
1102 Character.isUnicodeIdentifierStart(c));
1103 check("{javaUnicodeIdentifierPart}", c,
1104 Character.isUnicodeIdentifierPart(c));
1105 check("{javaIdentifierIgnorable}", c,
1106 Character.isIdentifierIgnorable(c));
1107 check("{javaSpaceChar}", c, Character.isSpaceChar(c));
1108 check("{javaWhitespace}", c, Character.isWhitespace(c));
1109 check("{javaISOControl}", c, Character.isISOControl(c));
1110 check("{javaMirrored}", c, Character.isMirrored(c));
1111 }
1112
1113 report("Java character classes");
1114 }
1115
1116 // This test is for 4523620
1117 /*
1118 private static void numOccurrencesTest() throws Exception {
1119 Pattern pattern = Pattern.compile("aaa");
1120
1121 if (pattern.numOccurrences("aaaaaa", false) != 2)
1122 failCount++;
1123 if (pattern.numOccurrences("aaaaaa", true) != 4)
1124 failCount++;
1125
1126 pattern = Pattern.compile("^");
1127 if (pattern.numOccurrences("aaaaaa", false) != 1)
1128 failCount++;
1129 if (pattern.numOccurrences("aaaaaa", true) != 1)
1130 failCount++;
1131
1132 report("Number of Occurrences");
1133 }
1134 */
1135
1136 // This test is for 4776374
1137 private static void caretBetweenTerminatorsTest() throws Exception {
1138 int flags1 = Pattern.DOTALL;
1139 int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES;
1140 int flags3 = Pattern.DOTALL | Pattern.UNIX_LINES | Pattern.MULTILINE;
1141 int flags4 = Pattern.DOTALL | Pattern.MULTILINE;
1142
1143 check("^....", flags1, "test\ntest", "test", true);
1144 check(".....^", flags1, "test\ntest", "test", false);
1145 check(".....^", flags1, "test\n", "test", false);
1146 check("....^", flags1, "test\r\n", "test", false);
1147
1148 check("^....", flags2, "test\ntest", "test", true);
1149 check("....^", flags2, "test\ntest", "test", false);
1150 check(".....^", flags2, "test\n", "test", false);
1151 check("....^", flags2, "test\r\n", "test", false);
1152
1153 check("^....", flags3, "test\ntest", "test", true);
1154 check(".....^", flags3, "test\ntest", "test\n", true);
1155 check(".....^", flags3, "test\u0085test", "test\u0085", false);
1156 check(".....^", flags3, "test\n", "test", false);
1157 check(".....^", flags3, "test\r\n", "test", false);
1158 check("......^", flags3, "test\r\ntest", "test\r\n", true);
1159
1160 check("^....", flags4, "test\ntest", "test", true);
1161 check(".....^", flags3, "test\ntest", "test\n", true);
1162 check(".....^", flags4, "test\u0085test", "test\u0085", true);
1163 check(".....^", flags4, "test\n", "test\n", false);
1164 check(".....^", flags4, "test\r\n", "test\r", false);
1165
1166 // Supplementary character test
1167 String t = toSupplementaries("test");
1168 check("^....", flags1, t+"\n"+t, t, true);
1169 check(".....^", flags1, t+"\n"+t, t, false);
1170 check(".....^", flags1, t+"\n", t, false);
1171 check("....^", flags1, t+"\r\n", t, false);
1172
1173 check("^....", flags2, t+"\n"+t, t, true);
1174 check("....^", flags2, t+"\n"+t, t, false);
1175 check(".....^", flags2, t+"\n", t, false);
1176 check("....^", flags2, t+"\r\n", t, false);
1177
1178 check("^....", flags3, t+"\n"+t, t, true);
1179 check(".....^", flags3, t+"\n"+t, t+"\n", true);
1180 check(".....^", flags3, t+"\u0085"+t, t+"\u0085", false);
1181 check(".....^", flags3, t+"\n", t, false);
1182 check(".....^", flags3, t+"\r\n", t, false);
1183 check("......^", flags3, t+"\r\n"+t, t+"\r\n", true);
1184
1185 check("^....", flags4, t+"\n"+t, t, true);
1186 check(".....^", flags3, t+"\n"+t, t+"\n", true);
1187 check(".....^", flags4, t+"\u0085"+t, t+"\u0085", true);
1188 check(".....^", flags4, t+"\n", t+"\n", false);
1189 check(".....^", flags4, t+"\r\n", t+"\r", false);
1190
1191 report("Caret between terminators");
1192 }
1193
1194 // This test is for 4727935
1195 private static void dollarAtEndTest() throws Exception {
1196 int flags1 = Pattern.DOTALL;
1197 int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES;
1198 int flags3 = Pattern.DOTALL | Pattern.MULTILINE;
1199
1200 check("....$", flags1, "test\n", "test", true);
1201 check("....$", flags1, "test\r\n", "test", true);
1202 check(".....$", flags1, "test\n", "test\n", true);
1203 check(".....$", flags1, "test\u0085", "test\u0085", true);
1204 check("....$", flags1, "test\u0085", "test", true);
1205
1206 check("....$", flags2, "test\n", "test", true);
1207 check(".....$", flags2, "test\n", "test\n", true);
1208 check(".....$", flags2, "test\u0085", "test\u0085", true);
1209 check("....$", flags2, "test\u0085", "est\u0085", true);
1210
1211 check("....$.blah", flags3, "test\nblah", "test\nblah", true);
1212 check(".....$.blah", flags3, "test\n\nblah", "test\n\nblah", true);
1213 check("....$blah", flags3, "test\nblah", "!!!!", false);
1214 check(".....$blah", flags3, "test\nblah", "!!!!", false);
1215
1216 // Supplementary character test
1217 String t = toSupplementaries("test");
1218 String b = toSupplementaries("blah");
1219 check("....$", flags1, t+"\n", t, true);
1220 check("....$", flags1, t+"\r\n", t, true);
1221 check(".....$", flags1, t+"\n", t+"\n", true);
1222 check(".....$", flags1, t+"\u0085", t+"\u0085", true);
1223 check("....$", flags1, t+"\u0085", t, true);
1224
1225 check("....$", flags2, t+"\n", t, true);
1226 check(".....$", flags2, t+"\n", t+"\n", true);
1227 check(".....$", flags2, t+"\u0085", t+"\u0085", true);
1228 check("....$", flags2, t+"\u0085", toSupplementaries("est\u0085"), true);
1229
1230 check("....$."+b, flags3, t+"\n"+b, t+"\n"+b, true);
1231 check(".....$."+b, flags3, t+"\n\n"+b, t+"\n\n"+b, true);
1232 check("....$"+b, flags3, t+"\n"+b, "!!!!", false);
1233 check(".....$"+b, flags3, t+"\n"+b, "!!!!", false);
1234
1235 report("Dollar at End");
1236 }
1237
1238 // This test is for 4711773
1239 private static void multilineDollarTest() throws Exception {
1240 Pattern findCR = Pattern.compile("$", Pattern.MULTILINE);
1241 Matcher matcher = findCR.matcher("first bit\nsecond bit");
1242 matcher.find();
1243 if (matcher.start(0) != 9)
1244 failCount++;
1245 matcher.find();
1246 if (matcher.start(0) != 20)
1247 failCount++;
1248
1249 // Supplementary character test
1250 matcher = findCR.matcher(toSupplementaries("first bit\n second bit")); // double BMP chars
1251 matcher.find();
1252 if (matcher.start(0) != 9*2)
1253 failCount++;
1254 matcher.find();
1255 if (matcher.start(0) != 20*2)
1256 failCount++;
1257
1258 report("Multiline Dollar");
1259 }
1260
1261 private static void reluctantRepetitionTest() throws Exception {
1262 Pattern p = Pattern.compile("1(\\s\\S+?){1,3}?[\\s,]2");
1263 check(p, "1 word word word 2", true);
1264 check(p, "1 wor wo w 2", true);
1265 check(p, "1 word word 2", true);
1266 check(p, "1 word 2", true);
1267 check(p, "1 wo w w 2", true);
1268 check(p, "1 wo w 2", true);
1269 check(p, "1 wor w 2", true);
1270
1271 p = Pattern.compile("([a-z])+?c");
1272 Matcher m = p.matcher("ababcdefdec");
1273 check(m, "ababc");
1274
1275 // Supplementary character test
1276 p = Pattern.compile(toSupplementaries("([a-z])+?c"));
1277 m = p.matcher(toSupplementaries("ababcdefdec"));
1278 check(m, toSupplementaries("ababc"));
1279
1280 report("Reluctant Repetition");
1281 }
1282
1283 private static void serializeTest() throws Exception {
1284 String patternStr = "(b)";
1285 String matchStr = "b";
1286 Pattern pattern = Pattern.compile(patternStr);
1287 ByteArrayOutputStream baos = new ByteArrayOutputStream();
1288 ObjectOutputStream oos = new ObjectOutputStream(baos);
1289 oos.writeObject(pattern);
1290 oos.close();
1291 ObjectInputStream ois = new ObjectInputStream(
1292 new ByteArrayInputStream(baos.toByteArray()));
1293 Pattern serializedPattern = (Pattern)ois.readObject();
1294 ois.close();
1295 Matcher matcher = serializedPattern.matcher(matchStr);
1296 if (!matcher.matches())
1297 failCount++;
1298 if (matcher.groupCount() != 1)
1299 failCount++;
1300
1301 report("Serialization");
1302 }
1303
1304 private static void gTest() {
1305 Pattern pattern = Pattern.compile("\\G\\w");
1306 Matcher matcher = pattern.matcher("abc#x#x");
1307 matcher.find();
1308 matcher.find();
1309 matcher.find();
1310 if (matcher.find())
1311 failCount++;
1312
1313 pattern = Pattern.compile("\\GA*");
1314 matcher = pattern.matcher("1A2AA3");
1315 matcher.find();
1316 if (matcher.find())
1317 failCount++;
1318
1319 pattern = Pattern.compile("\\GA*");
1320 matcher = pattern.matcher("1A2AA3");
1321 if (!matcher.find(1))
1322 failCount++;
1323 matcher.find();
1324 if (matcher.find())
1325 failCount++;
1326
1327 report("\\G");
1328 }
1329
1330 private static void zTest() {
1331 Pattern pattern = Pattern.compile("foo\\Z");
1332 // Positives
1333 check(pattern, "foo\u0085", true);
1334 check(pattern, "foo\u2028", true);
1335 check(pattern, "foo\u2029", true);
1336 check(pattern, "foo\n", true);
1337 check(pattern, "foo\r", true);
1338 check(pattern, "foo\r\n", true);
1339 // Negatives
1340 check(pattern, "fooo", false);
1341 check(pattern, "foo\n\r", false);
1342
1343 pattern = Pattern.compile("foo\\Z", Pattern.UNIX_LINES);
1344 // Positives
1345 check(pattern, "foo", true);
1346 check(pattern, "foo\n", true);
1347 // Negatives
1348 check(pattern, "foo\r", false);
1349 check(pattern, "foo\u0085", false);
1350 check(pattern, "foo\u2028", false);
1351 check(pattern, "foo\u2029", false);
1352
1353 report("\\Z");
1354 }
1355
1356 private static void replaceFirstTest() {
1357 Pattern pattern = Pattern.compile("(ab)(c*)");
1358 Matcher matcher = pattern.matcher("abccczzzabcczzzabccc");
1359 if (!matcher.replaceFirst("test").equals("testzzzabcczzzabccc"))
1360 failCount++;
1361
1362 matcher.reset("zzzabccczzzabcczzzabccczzz");
1363 if (!matcher.replaceFirst("test").equals("zzztestzzzabcczzzabccczzz"))
1364 failCount++;
1365
1366 matcher.reset("zzzabccczzzabcczzzabccczzz");
1367 String result = matcher.replaceFirst("$1");
1368 if (!result.equals("zzzabzzzabcczzzabccczzz"))
1369 failCount++;
1370
1371 matcher.reset("zzzabccczzzabcczzzabccczzz");
1372 result = matcher.replaceFirst("$2");
1373 if (!result.equals("zzzccczzzabcczzzabccczzz"))
1374 failCount++;
1375
1376 pattern = Pattern.compile("a*");
1377 matcher = pattern.matcher("aaaaaaaaaa");
1378 if (!matcher.replaceFirst("test").equals("test"))
1379 failCount++;
1380
1381 pattern = Pattern.compile("a+");
1382 matcher = pattern.matcher("zzzaaaaaaaaaa");
1383 if (!matcher.replaceFirst("test").equals("zzztest"))
1384 failCount++;
1385
1386 // Supplementary character test
1387 pattern = Pattern.compile(toSupplementaries("(ab)(c*)"));
1388 matcher = pattern.matcher(toSupplementaries("abccczzzabcczzzabccc"));
1389 if (!matcher.replaceFirst(toSupplementaries("test"))
1390 .equals(toSupplementaries("testzzzabcczzzabccc")))
1391 failCount++;
1392
1393 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
1394 if (!matcher.replaceFirst(toSupplementaries("test")).
1395 equals(toSupplementaries("zzztestzzzabcczzzabccczzz")))
1396 failCount++;
1397
1398 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
1399 result = matcher.replaceFirst("$1");
1400 if (!result.equals(toSupplementaries("zzzabzzzabcczzzabccczzz")))
1401 failCount++;
1402
1403 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
1404 result = matcher.replaceFirst("$2");
1405 if (!result.equals(toSupplementaries("zzzccczzzabcczzzabccczzz")))
1406 failCount++;
1407
1408 pattern = Pattern.compile(toSupplementaries("a*"));
1409 matcher = pattern.matcher(toSupplementaries("aaaaaaaaaa"));
1410 if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("test")))
1411 failCount++;
1412
1413 pattern = Pattern.compile(toSupplementaries("a+"));
1414 matcher = pattern.matcher(toSupplementaries("zzzaaaaaaaaaa"));
1415 if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("zzztest")))
1416 failCount++;
1417
1418 report("Replace First");
1419 }
1420
1421 private static void unixLinesTest() {
1422 Pattern pattern = Pattern.compile(".*");
1423 Matcher matcher = pattern.matcher("aa\u2028blah");
1424 matcher.find();
1425 if (!matcher.group(0).equals("aa"))
1426 failCount++;
1427
1428 pattern = Pattern.compile(".*", Pattern.UNIX_LINES);
1429 matcher = pattern.matcher("aa\u2028blah");
1430 matcher.find();
1431 if (!matcher.group(0).equals("aa\u2028blah"))
1432 failCount++;
1433
1434 pattern = Pattern.compile("[az]$",
1435 Pattern.MULTILINE | Pattern.UNIX_LINES);
1436 matcher = pattern.matcher("aa\u2028zz");
1437 check(matcher, "a\u2028", false);
1438
1439 // Supplementary character test
1440 pattern = Pattern.compile(".*");
1441 matcher = pattern.matcher(toSupplementaries("aa\u2028blah"));
1442 matcher.find();
1443 if (!matcher.group(0).equals(toSupplementaries("aa")))
1444 failCount++;
1445
1446 pattern = Pattern.compile(".*", Pattern.UNIX_LINES);
1447 matcher = pattern.matcher(toSupplementaries("aa\u2028blah"));
1448 matcher.find();
1449 if (!matcher.group(0).equals(toSupplementaries("aa\u2028blah")))
1450 failCount++;
1451
1452 pattern = Pattern.compile(toSupplementaries("[az]$"),
1453 Pattern.MULTILINE | Pattern.UNIX_LINES);
1454 matcher = pattern.matcher(toSupplementaries("aa\u2028zz"));
1455 check(matcher, toSupplementaries("a\u2028"), false);
1456
1457 report("Unix Lines");
1458 }
1459
1460 private static void commentsTest() {
1461 int flags = Pattern.COMMENTS;
1462
1463 Pattern pattern = Pattern.compile("aa \\# aa", flags);
1464 Matcher matcher = pattern.matcher("aa#aa");
1465 if (!matcher.matches())
1466 failCount++;
1467
1468 pattern = Pattern.compile("aa # blah", flags);
1469 matcher = pattern.matcher("aa");
1470 if (!matcher.matches())
1471 failCount++;
1472
1473 pattern = Pattern.compile("aa blah", flags);
1474 matcher = pattern.matcher("aablah");
1475 if (!matcher.matches())
1476 failCount++;
1477
1478 pattern = Pattern.compile("aa # blah blech ", flags);
1479 matcher = pattern.matcher("aa");
1480 if (!matcher.matches())
1481 failCount++;
1482
1483 pattern = Pattern.compile("aa # blah\n ", flags);
1484 matcher = pattern.matcher("aa");
1485 if (!matcher.matches())
1486 failCount++;
1487
1488 pattern = Pattern.compile("aa # blah\nbc # blech", flags);
1489 matcher = pattern.matcher("aabc");
1490 if (!matcher.matches())
1491 failCount++;
1492
1493 pattern = Pattern.compile("aa # blah\nbc# blech", flags);
1494 matcher = pattern.matcher("aabc");
1495 if (!matcher.matches())
1496 failCount++;
1497
1498 pattern = Pattern.compile("aa # blah\nbc\\# blech", flags);
1499 matcher = pattern.matcher("aabc#blech");
1500 if (!matcher.matches())
1501 failCount++;
1502
1503 // Supplementary character test
1504 pattern = Pattern.compile(toSupplementaries("aa \\# aa"), flags);
1505 matcher = pattern.matcher(toSupplementaries("aa#aa"));
1506 if (!matcher.matches())
1507 failCount++;
1508
1509 pattern = Pattern.compile(toSupplementaries("aa # blah"), flags);
1510 matcher = pattern.matcher(toSupplementaries("aa"));
1511 if (!matcher.matches())
1512 failCount++;
1513
1514 pattern = Pattern.compile(toSupplementaries("aa blah"), flags);
1515 matcher = pattern.matcher(toSupplementaries("aablah"));
1516 if (!matcher.matches())
1517 failCount++;
1518
1519 pattern = Pattern.compile(toSupplementaries("aa # blah blech "), flags);
1520 matcher = pattern.matcher(toSupplementaries("aa"));
1521 if (!matcher.matches())
1522 failCount++;
1523
1524 pattern = Pattern.compile(toSupplementaries("aa # blah\n "), flags);
1525 matcher = pattern.matcher(toSupplementaries("aa"));
1526 if (!matcher.matches())
1527 failCount++;
1528
1529 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc # blech"), flags);
1530 matcher = pattern.matcher(toSupplementaries("aabc"));
1531 if (!matcher.matches())
1532 failCount++;
1533
1534 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc# blech"), flags);
1535 matcher = pattern.matcher(toSupplementaries("aabc"));
1536 if (!matcher.matches())
1537 failCount++;
1538
1539 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc\\# blech"), flags);
1540 matcher = pattern.matcher(toSupplementaries("aabc#blech"));
1541 if (!matcher.matches())
1542 failCount++;
1543
1544 report("Comments");
1545 }
1546
1547 private static void caseFoldingTest() { // bug 4504687
1548 int flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE;
1549 Pattern pattern = Pattern.compile("aa", flags);
1550 Matcher matcher = pattern.matcher("ab");
1551 if (matcher.matches())
1552 failCount++;
1553
1554 pattern = Pattern.compile("aA", flags);
1555 matcher = pattern.matcher("ab");
1556 if (matcher.matches())
1557 failCount++;
1558
1559 pattern = Pattern.compile("aa", flags);
1560 matcher = pattern.matcher("aB");
1561 if (matcher.matches())
1562 failCount++;
1563 matcher = pattern.matcher("Ab");
1564 if (matcher.matches())
1565 failCount++;
1566
1567 // ASCII "a"
1568 // Latin-1 Supplement "a" + grave
1569 // Cyrillic "a"
1570 String[] patterns = new String[] {
1571 //single
1572 "a", "\u00e0", "\u0430",
1573 //slice
1574 "ab", "\u00e0\u00e1", "\u0430\u0431",
1575 //class single
1576 "[a]", "[\u00e0]", "[\u0430]",
1577 //class range
1578 "[a-b]", "[\u00e0-\u00e5]", "[\u0430-\u0431]",
1579 //back reference
1580 "(a)\\1", "(\u00e0)\\1", "(\u0430)\\1"
1581 };
1582
1583 String[] texts = new String[] {
1584 "A", "\u00c0", "\u0410",
1585 "AB", "\u00c0\u00c1", "\u0410\u0411",
1586 "A", "\u00c0", "\u0410",
1587 "B", "\u00c2", "\u0411",
1588 "aA", "\u00e0\u00c0", "\u0430\u0410"
1589 };
1590
1591 boolean[] expected = new boolean[] {
1592 true, false, false,
1593 true, false, false,
1594 true, false, false,
1595 true, false, false,
1596 true, false, false
1597 };
1598
1599 flags = Pattern.CASE_INSENSITIVE;
1600 for (int i = 0; i < patterns.length; i++) {
1601 pattern = Pattern.compile(patterns[i], flags);
1602 matcher = pattern.matcher(texts[i]);
1603 if (matcher.matches() != expected[i]) {
1604 System.out.println("<1> Failed at " + i);
1605 failCount++;
1606 }
1607 }
1608
1609 flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE;
1610 for (int i = 0; i < patterns.length; i++) {
1611 pattern = Pattern.compile(patterns[i], flags);
1612 matcher = pattern.matcher(texts[i]);
1613 if (!matcher.matches()) {
1614 System.out.println("<2> Failed at " + i);
1615 failCount++;
1616 }
1617 }
1618 // flag unicode_case alone should do nothing
1619 flags = Pattern.UNICODE_CASE;
1620 for (int i = 0; i < patterns.length; i++) {
1621 pattern = Pattern.compile(patterns[i], flags);
1622 matcher = pattern.matcher(texts[i]);
1623 if (matcher.matches()) {
1624 System.out.println("<3> Failed at " + i);
1625 failCount++;
1626 }
1627 }
1628
1629 // Special cases: i, I, u+0131 and u+0130
1630 flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE;
1631 pattern = Pattern.compile("[h-j]+", flags);
1632 if (!pattern.matcher("\u0131\u0130").matches())
1633 failCount++;
1634 report("Case Folding");
1635 }
1636
1637 private static void appendTest() {
1638 Pattern pattern = Pattern.compile("(ab)(cd)");
1639 Matcher matcher = pattern.matcher("abcd");
1640 String result = matcher.replaceAll("$2$1");
1641 if (!result.equals("cdab"))
1642 failCount++;
1643
1644 String s1 = "Swap all: first = 123, second = 456";
1645 String s2 = "Swap one: first = 123, second = 456";
1646 String r = "$3$2$1";
1647 pattern = Pattern.compile("([a-z]+)( *= *)([0-9]+)");
1648 matcher = pattern.matcher(s1);
1649
1650 result = matcher.replaceAll(r);
1651 if (!result.equals("Swap all: 123 = first, 456 = second"))
1652 failCount++;
1653
1654 matcher = pattern.matcher(s2);
1655
1656 if (matcher.find()) {
1657 StringBuffer sb = new StringBuffer();
1658 matcher.appendReplacement(sb, r);
1659 matcher.appendTail(sb);
1660 result = sb.toString();
1661 if (!result.equals("Swap one: 123 = first, second = 456"))
1662 failCount++;
1663 }
1664
1665 // Supplementary character test
1666 pattern = Pattern.compile(toSupplementaries("(ab)(cd)"));
1667 matcher = pattern.matcher(toSupplementaries("abcd"));
1668 result = matcher.replaceAll("$2$1");
1669 if (!result.equals(toSupplementaries("cdab")))
1670 failCount++;
1671
1672 s1 = toSupplementaries("Swap all: first = 123, second = 456");
1673 s2 = toSupplementaries("Swap one: first = 123, second = 456");
1674 r = toSupplementaries("$3$2$1");
1675 pattern = Pattern.compile(toSupplementaries("([a-z]+)( *= *)([0-9]+)"));
1676 matcher = pattern.matcher(s1);
1677
1678 result = matcher.replaceAll(r);
1679 if (!result.equals(toSupplementaries("Swap all: 123 = first, 456 = second")))
1680 failCount++;
1681
1682 matcher = pattern.matcher(s2);
1683
1684 if (matcher.find()) {
1685 StringBuffer sb = new StringBuffer();
1686 matcher.appendReplacement(sb, r);
1687 matcher.appendTail(sb);
1688 result = sb.toString();
1689 if (!result.equals(toSupplementaries("Swap one: 123 = first, second = 456")))
1690 failCount++;
1691 }
1692 report("Append");
1693 }
1694
1695 private static void splitTest() {
1696 Pattern pattern = Pattern.compile(":");
1697 String[] result = pattern.split("foo:and:boo", 2);
1698 if (!result[0].equals("foo"))
1699 failCount++;
1700 if (!result[1].equals("and:boo"))
1701 failCount++;
1702 // Supplementary character test
1703 Pattern patternX = Pattern.compile(toSupplementaries("X"));
1704 result = patternX.split(toSupplementaries("fooXandXboo"), 2);
1705 if (!result[0].equals(toSupplementaries("foo")))
1706 failCount++;
1707 if (!result[1].equals(toSupplementaries("andXboo")))
1708 failCount++;
1709
1710 CharBuffer cb = CharBuffer.allocate(100);
1711 cb.put("foo:and:boo");
1712 cb.flip();
1713 result = pattern.split(cb);
1714 if (!result[0].equals("foo"))
1715 failCount++;
1716 if (!result[1].equals("and"))
1717 failCount++;
1718 if (!result[2].equals("boo"))
1719 failCount++;
1720
1721 // Supplementary character test
1722 CharBuffer cbs = CharBuffer.allocate(100);
1723 cbs.put(toSupplementaries("fooXandXboo"));
1724 cbs.flip();
1725 result = patternX.split(cbs);
1726 if (!result[0].equals(toSupplementaries("foo")))
1727 failCount++;
1728 if (!result[1].equals(toSupplementaries("and")))
1729 failCount++;
1730 if (!result[2].equals(toSupplementaries("boo")))
1731 failCount++;
1732
1733 String source = "0123456789";
1734 for (int limit=-2; limit<3; limit++) {
1735 for (int x=0; x<10; x++) {
1736 result = source.split(Integer.toString(x), limit);
1737 int expectedLength = limit < 1 ? 2 : limit;
1738
1739 if ((limit == 0) && (x == 9)) {
1740 // expected dropping of ""
1741 if (result.length != 1)
1742 failCount++;
1743 if (!result[0].equals("012345678")) {
1744 failCount++;
1745 }
1746 } else {
1747 if (result.length != expectedLength) {
1748 failCount++;
1749 }
1750 if (!result[0].equals(source.substring(0,x))) {
1751 if (limit != 1) {
1752 failCount++;
1753 } else {
1754 if (!result[0].equals(source.substring(0,10))) {
1755 failCount++;
1756 }
1757 }
1758 }
1759 if (expectedLength > 1) { // Check segment 2
1760 if (!result[1].equals(source.substring(x+1,10)))
1761 failCount++;
1762 }
1763 }
1764 }
1765 }
1766 // Check the case for no match found
1767 for (int limit=-2; limit<3; limit++) {
1768 result = source.split("e", limit);
1769 if (result.length != 1)
1770 failCount++;
1771 if (!result[0].equals(source))
1772 failCount++;
1773 }
1774 // Check the case for limit == 0, source = "";
1775 source = "";
1776 result = source.split("e", 0);
1777 if (result.length != 1)
1778 failCount++;
1779 if (!result[0].equals(source))
1780 failCount++;
1781
1782 report("Split");
1783 }
1784
1785 private static void negationTest() {
1786 Pattern pattern = Pattern.compile("[\\[@^]+");
1787 Matcher matcher = pattern.matcher("@@@@[[[[^^^^");
1788 if (!matcher.find())
1789 failCount++;
1790 if (!matcher.group(0).equals("@@@@[[[[^^^^"))
1791 failCount++;
1792 pattern = Pattern.compile("[@\\[^]+");
1793 matcher = pattern.matcher("@@@@[[[[^^^^");
1794 if (!matcher.find())
1795 failCount++;
1796 if (!matcher.group(0).equals("@@@@[[[[^^^^"))
1797 failCount++;
1798 pattern = Pattern.compile("[@\\[^@]+");
1799 matcher = pattern.matcher("@@@@[[[[^^^^");
1800 if (!matcher.find())
1801 failCount++;
1802 if (!matcher.group(0).equals("@@@@[[[[^^^^"))
1803 failCount++;
1804
1805 pattern = Pattern.compile("\\)");
1806 matcher = pattern.matcher("xxx)xxx");
1807 if (!matcher.find())
1808 failCount++;
1809
1810 report("Negation");
1811 }
1812
1813 private static void ampersandTest() {
1814 Pattern pattern = Pattern.compile("[&@]+");
1815 check(pattern, "@@@@&&&&", true);
1816
1817 pattern = Pattern.compile("[@&]+");
1818 check(pattern, "@@@@&&&&", true);
1819
1820 pattern = Pattern.compile("[@\\&]+");
1821 check(pattern, "@@@@&&&&", true);
1822
1823 report("Ampersand");
1824 }
1825
1826 private static void octalTest() throws Exception {
1827 Pattern pattern = Pattern.compile("\\u0007");
1828 Matcher matcher = pattern.matcher("\u0007");
1829 if (!matcher.matches())
1830 failCount++;
1831 pattern = Pattern.compile("\\07");
1832 matcher = pattern.matcher("\u0007");
1833 if (!matcher.matches())
1834 failCount++;
1835 pattern = Pattern.compile("\\007");
1836 matcher = pattern.matcher("\u0007");
1837 if (!matcher.matches())
1838 failCount++;
1839 pattern = Pattern.compile("\\0007");
1840 matcher = pattern.matcher("\u0007");
1841 if (!matcher.matches())
1842 failCount++;
1843 pattern = Pattern.compile("\\040");
1844 matcher = pattern.matcher("\u0020");
1845 if (!matcher.matches())
1846 failCount++;
1847 pattern = Pattern.compile("\\0403");
1848 matcher = pattern.matcher("\u00203");
1849 if (!matcher.matches())
1850 failCount++;
1851 pattern = Pattern.compile("\\0103");
1852 matcher = pattern.matcher("\u0043");
1853 if (!matcher.matches())
1854 failCount++;
1855
1856 report("Octal");
1857 }
1858
1859 private static void longPatternTest() throws Exception {
1860 try {
1861 Pattern pattern = Pattern.compile(
1862 "a 32-character-long pattern xxxx");
1863 pattern = Pattern.compile("a 33-character-long pattern xxxxx");
1864 pattern = Pattern.compile("a thirty four character long regex");
1865 StringBuffer patternToBe = new StringBuffer(101);
1866 for (int i=0; i<100; i++)
1867 patternToBe.append((char)(97 + i%26));
1868 pattern = Pattern.compile(patternToBe.toString());
1869 } catch (PatternSyntaxException e) {
1870 failCount++;
1871 }
1872
1873 // Supplementary character test
1874 try {
1875 Pattern pattern = Pattern.compile(
1876 toSupplementaries("a 32-character-long pattern xxxx"));
1877 pattern = Pattern.compile(toSupplementaries("a 33-character-long pattern xxxxx"));
1878 pattern = Pattern.compile(toSupplementaries("a thirty four character long regex"));
1879 StringBuffer patternToBe = new StringBuffer(101*2);
1880 for (int i=0; i<100; i++)
1881 patternToBe.append(Character.toChars(Character.MIN_SUPPLEMENTARY_CODE_POINT
1882 + 97 + i%26));
1883 pattern = Pattern.compile(patternToBe.toString());
1884 } catch (PatternSyntaxException e) {
1885 failCount++;
1886 }
1887 report("LongPattern");
1888 }
1889
1890 private static void group0Test() throws Exception {
1891 Pattern pattern = Pattern.compile("(tes)ting");
1892 Matcher matcher = pattern.matcher("testing");
1893 check(matcher, "testing");
1894
1895 matcher.reset("testing");
1896 if (matcher.lookingAt()) {
1897 if (!matcher.group(0).equals("testing"))
1898 failCount++;
1899 } else {
1900 failCount++;
1901 }
1902
1903 matcher.reset("testing");
1904 if (matcher.matches()) {
1905 if (!matcher.group(0).equals("testing"))
1906 failCount++;
1907 } else {
1908 failCount++;
1909 }
1910
1911 pattern = Pattern.compile("(tes)ting");
1912 matcher = pattern.matcher("testing");
1913 if (matcher.lookingAt()) {
1914 if (!matcher.group(0).equals("testing"))
1915 failCount++;
1916 } else {
1917 failCount++;
1918 }
1919
1920 pattern = Pattern.compile("^(tes)ting");
1921 matcher = pattern.matcher("testing");
1922 if (matcher.matches()) {
1923 if (!matcher.group(0).equals("testing"))
1924 failCount++;
1925 } else {
1926 failCount++;
1927 }
1928
1929 // Supplementary character test
1930 pattern = Pattern.compile(toSupplementaries("(tes)ting"));
1931 matcher = pattern.matcher(toSupplementaries("testing"));
1932 check(matcher, toSupplementaries("testing"));
1933
1934 matcher.reset(toSupplementaries("testing"));
1935 if (matcher.lookingAt()) {
1936 if (!matcher.group(0).equals(toSupplementaries("testing")))
1937 failCount++;
1938 } else {
1939 failCount++;
1940 }
1941
1942 matcher.reset(toSupplementaries("testing"));
1943 if (matcher.matches()) {
1944 if (!matcher.group(0).equals(toSupplementaries("testing")))
1945 failCount++;
1946 } else {
1947 failCount++;
1948 }
1949
1950 pattern = Pattern.compile(toSupplementaries("(tes)ting"));
1951 matcher = pattern.matcher(toSupplementaries("testing"));
1952 if (matcher.lookingAt()) {
1953 if (!matcher.group(0).equals(toSupplementaries("testing")))
1954 failCount++;
1955 } else {
1956 failCount++;
1957 }
1958
1959 pattern = Pattern.compile(toSupplementaries("^(tes)ting"));
1960 matcher = pattern.matcher(toSupplementaries("testing"));
1961 if (matcher.matches()) {
1962 if (!matcher.group(0).equals(toSupplementaries("testing")))
1963 failCount++;
1964 } else {
1965 failCount++;
1966 }
1967
1968 report("Group0");
1969 }
1970
1971 private static void findIntTest() throws Exception {
1972 Pattern p = Pattern.compile("blah");
1973 Matcher m = p.matcher("zzzzblahzzzzzblah");
1974 boolean result = m.find(2);
1975 if (!result)
1976 failCount++;
1977
1978 p = Pattern.compile("$");
1979 m = p.matcher("1234567890");
1980 result = m.find(10);
1981 if (!result)
1982 failCount++;
1983 try {
1984 result = m.find(11);
1985 failCount++;
1986 } catch (IndexOutOfBoundsException e) {
1987 // correct result
1988 }
1989
1990 // Supplementary character test
1991 p = Pattern.compile(toSupplementaries("blah"));
1992 m = p.matcher(toSupplementaries("zzzzblahzzzzzblah"));
1993 result = m.find(2);
1994 if (!result)
1995 failCount++;
1996
1997 report("FindInt");
1998 }
1999
2000 private static void emptyPatternTest() throws Exception {
2001 Pattern p = Pattern.compile("");
2002 Matcher m = p.matcher("foo");
2003
2004 // Should find empty pattern at beginning of input
2005 boolean result = m.find();
2006 if (result != true)
2007 failCount++;
2008 if (m.start() != 0)
2009 failCount++;
2010
2011 // Should not match entire input if input is not empty
2012 m.reset();
2013 result = m.matches();
2014 if (result == true)
2015 failCount++;
2016
2017 try {
2018 m.start(0);
2019 failCount++;
2020 } catch (IllegalStateException e) {
2021 // Correct result
2022 }
2023
2024 // Should match entire input if input is empty
2025 m.reset("");
2026 result = m.matches();
2027 if (result != true)
2028 failCount++;
2029
2030 result = Pattern.matches("", "");
2031 if (result != true)
2032 failCount++;
2033
2034 result = Pattern.matches("", "foo");
2035 if (result == true)
2036 failCount++;
2037 report("EmptyPattern");
2038 }
2039
2040 private static void charClassTest() throws Exception {
2041 Pattern pattern = Pattern.compile("blah[ab]]blech");
2042 check(pattern, "blahb]blech", true);
2043
2044 pattern = Pattern.compile("[abc[def]]");
2045 check(pattern, "b", true);
2046
2047 // Supplementary character tests
2048 pattern = Pattern.compile(toSupplementaries("blah[ab]]blech"));
2049 check(pattern, toSupplementaries("blahb]blech"), true);
2050
2051 pattern = Pattern.compile(toSupplementaries("[abc[def]]"));
2052 check(pattern, toSupplementaries("b"), true);
2053
2054 try {
2055 // u00ff when UNICODE_CASE
2056 pattern = Pattern.compile("[ab\u00ffcd]",
2057 Pattern.CASE_INSENSITIVE|
2058 Pattern.UNICODE_CASE);
2059 check(pattern, "ab\u00ffcd", true);
2060 check(pattern, "Ab\u0178Cd", true);
2061
2062 // u00b5 when UNICODE_CASE
2063 pattern = Pattern.compile("[ab\u00b5cd]",
2064 Pattern.CASE_INSENSITIVE|
2065 Pattern.UNICODE_CASE);
2066 check(pattern, "ab\u00b5cd", true);
2067 check(pattern, "Ab\u039cCd", true);
2068 } catch (Exception e) { failCount++; }
2069
2070 /* Special cases
2071 (1)LatinSmallLetterLongS u+017f
2072 (2)LatinSmallLetterDotlessI u+0131
2073 (3)LatineCapitalLetterIWithDotAbove u+0130
2074 (4)KelvinSign u+212a
2075 (5)AngstromSign u+212b
2076 */
2077 int flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE;
2078 pattern = Pattern.compile("[sik\u00c5]+", flags);
2079 if (!pattern.matcher("\u017f\u0130\u0131\u212a\u212b").matches())
2080 failCount++;
2081
2082 report("CharClass");
2083 }
2084
2085 private static void caretTest() throws Exception {
2086 Pattern pattern = Pattern.compile("\\w*");
2087 Matcher matcher = pattern.matcher("a#bc#def##g");
2088 check(matcher, "a");
2089 check(matcher, "");
2090 check(matcher, "bc");
2091 check(matcher, "");
2092 check(matcher, "def");
2093 check(matcher, "");
2094 check(matcher, "");
2095 check(matcher, "g");
2096 check(matcher, "");
2097 if (matcher.find())
2098 failCount++;
2099
2100 pattern = Pattern.compile("^\\w*");
2101 matcher = pattern.matcher("a#bc#def##g");
2102 check(matcher, "a");
2103 if (matcher.find())
2104 failCount++;
2105
2106 pattern = Pattern.compile("\\w");
2107 matcher = pattern.matcher("abc##x");
2108 check(matcher, "a");
2109 check(matcher, "b");
2110 check(matcher, "c");
2111 check(matcher, "x");
2112 if (matcher.find())
2113 failCount++;
2114
2115 pattern = Pattern.compile("^\\w");
2116 matcher = pattern.matcher("abc##x");
2117 check(matcher, "a");
2118 if (matcher.find())
2119 failCount++;
2120
2121 pattern = Pattern.compile("\\A\\p{Alpha}{3}");
2122 matcher = pattern.matcher("abcdef-ghi\njklmno");
2123 check(matcher, "abc");
2124 if (matcher.find())
2125 failCount++;
2126
2127 pattern = Pattern.compile("^\\p{Alpha}{3}", Pattern.MULTILINE);
2128 matcher = pattern.matcher("abcdef-ghi\njklmno");
2129 check(matcher, "abc");
2130 check(matcher, "jkl");
2131 if (matcher.find())
2132 failCount++;
2133
2134 pattern = Pattern.compile("^", Pattern.MULTILINE);
2135 matcher = pattern.matcher("this is some text");
2136 String result = matcher.replaceAll("X");
2137 if (!result.equals("Xthis is some text"))
2138 failCount++;
2139
2140 pattern = Pattern.compile("^");
2141 matcher = pattern.matcher("this is some text");
2142 result = matcher.replaceAll("X");
2143 if (!result.equals("Xthis is some text"))
2144 failCount++;
2145
2146 pattern = Pattern.compile("^", Pattern.MULTILINE | Pattern.UNIX_LINES);
2147 matcher = pattern.matcher("this is some text\n");
2148 result = matcher.replaceAll("X");
2149 if (!result.equals("Xthis is some text\n"))
2150 failCount++;
2151
2152 report("Caret");
2153 }
2154
2155 private static void groupCaptureTest() throws Exception {
2156 // Independent group
2157 Pattern pattern = Pattern.compile("x+(?>y+)z+");
2158 Matcher matcher = pattern.matcher("xxxyyyzzz");
2159 matcher.find();
2160 try {
2161 String blah = matcher.group(1);
2162 failCount++;
2163 } catch (IndexOutOfBoundsException ioobe) {
2164 // Good result
2165 }
2166 // Pure group
2167 pattern = Pattern.compile("x+(?:y+)z+");
2168 matcher = pattern.matcher("xxxyyyzzz");
2169 matcher.find();
2170 try {
2171 String blah = matcher.group(1);
2172 failCount++;
2173 } catch (IndexOutOfBoundsException ioobe) {
2174 // Good result
2175 }
2176
2177 // Supplementary character tests
2178 // Independent group
2179 pattern = Pattern.compile(toSupplementaries("x+(?>y+)z+"));
2180 matcher = pattern.matcher(toSupplementaries("xxxyyyzzz"));
2181 matcher.find();
2182 try {
2183 String blah = matcher.group(1);
2184 failCount++;
2185 } catch (IndexOutOfBoundsException ioobe) {
2186 // Good result
2187 }
2188 // Pure group
2189 pattern = Pattern.compile(toSupplementaries("x+(?:y+)z+"));
2190 matcher = pattern.matcher(toSupplementaries("xxxyyyzzz"));
2191 matcher.find();
2192 try {
2193 String blah = matcher.group(1);
2194 failCount++;
2195 } catch (IndexOutOfBoundsException ioobe) {
2196 // Good result
2197 }
2198
2199 report("GroupCapture");
2200 }
2201
2202 private static void backRefTest() throws Exception {
2203 Pattern pattern = Pattern.compile("(a*)bc\\1");
2204 check(pattern, "zzzaabcazzz", true);
2205
2206 pattern = Pattern.compile("(a*)bc\\1");
2207 check(pattern, "zzzaabcaazzz", true);
2208
2209 pattern = Pattern.compile("(abc)(def)\\1");
2210 check(pattern, "abcdefabc", true);
2211
2212 pattern = Pattern.compile("(abc)(def)\\3");
2213 check(pattern, "abcdefabc", false);
2214
2215 try {
2216 for (int i = 1; i < 10; i++) {
2217 // Make sure backref 1-9 are always accepted
2218 pattern = Pattern.compile("abcdef\\" + i);
2219 // and fail to match if the target group does not exit
2220 check(pattern, "abcdef", false);
2221 }
2222 } catch(PatternSyntaxException e) {
2223 failCount++;
2224 }
2225
2226 pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11");
2227 check(pattern, "abcdefghija", false);
2228 check(pattern, "abcdefghija1", true);
2229
2230 pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11");
2231 check(pattern, "abcdefghijkk", true);
2232
2233 pattern = Pattern.compile("(a)bcdefghij\\11");
2234 check(pattern, "abcdefghija1", true);
2235
2236 // Supplementary character tests
2237 pattern = Pattern.compile(toSupplementaries("(a*)bc\\1"));
2238 check(pattern, toSupplementaries("zzzaabcazzz"), true);
2239
2240 pattern = Pattern.compile(toSupplementaries("(a*)bc\\1"));
2241 check(pattern, toSupplementaries("zzzaabcaazzz"), true);
2242
2243 pattern = Pattern.compile(toSupplementaries("(abc)(def)\\1"));
2244 check(pattern, toSupplementaries("abcdefabc"), true);
2245
2246 pattern = Pattern.compile(toSupplementaries("(abc)(def)\\3"));
2247 check(pattern, toSupplementaries("abcdefabc"), false);
2248
2249 pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11"));
2250 check(pattern, toSupplementaries("abcdefghija"), false);
2251 check(pattern, toSupplementaries("abcdefghija1"), true);
2252
2253 pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11"));
2254 check(pattern, toSupplementaries("abcdefghijkk"), true);
2255
2256 report("BackRef");
2257 }
2258
2259 /**
2260 * Unicode Technical Report #18, section 2.6 End of Line
2261 * There is no empty line to be matched in the sequence \u000D\u000A
2262 * but there is an empty line in the sequence \u000A\u000D.
2263 */
2264 private static void anchorTest() throws Exception {
2265 Pattern p = Pattern.compile("^.*$", Pattern.MULTILINE);
2266 Matcher m = p.matcher("blah1\r\nblah2");
2267 m.find();
2268 m.find();
2269 if (!m.group().equals("blah2"))
2270 failCount++;
2271
2272 m.reset("blah1\n\rblah2");
2273 m.find();
2274 m.find();
2275 m.find();
2276 if (!m.group().equals("blah2"))
2277 failCount++;
2278
2279 // Test behavior of $ with \r\n at end of input
2280 p = Pattern.compile(".+$");
2281 m = p.matcher("blah1\r\n");
2282 if (!m.find())
2283 failCount++;
2284 if (!m.group().equals("blah1"))
2285 failCount++;
2286 if (m.find())
2287 failCount++;
2288
2289 // Test behavior of $ with \r\n at end of input in multiline
2290 p = Pattern.compile(".+$", Pattern.MULTILINE);
2291 m = p.matcher("blah1\r\n");
2292 if (!m.find())
2293 failCount++;
2294 if (m.find())
2295 failCount++;
2296
2297 // Test for $ recognition of \u0085 for bug 4527731
2298 p = Pattern.compile(".+$", Pattern.MULTILINE);
2299 m = p.matcher("blah1\u0085");
2300 if (!m.find())
2301 failCount++;
2302
2303 // Supplementary character test
2304 p = Pattern.compile("^.*$", Pattern.MULTILINE);
2305 m = p.matcher(toSupplementaries("blah1\r\nblah2"));
2306 m.find();
2307 m.find();
2308 if (!m.group().equals(toSupplementaries("blah2")))
2309 failCount++;
2310
2311 m.reset(toSupplementaries("blah1\n\rblah2"));
2312 m.find();
2313 m.find();
2314 m.find();
2315 if (!m.group().equals(toSupplementaries("blah2")))
2316 failCount++;
2317
2318 // Test behavior of $ with \r\n at end of input
2319 p = Pattern.compile(".+$");
2320 m = p.matcher(toSupplementaries("blah1\r\n"));
2321 if (!m.find())
2322 failCount++;
2323 if (!m.group().equals(toSupplementaries("blah1")))
2324 failCount++;
2325 if (m.find())
2326 failCount++;
2327
2328 // Test behavior of $ with \r\n at end of input in multiline
2329 p = Pattern.compile(".+$", Pattern.MULTILINE);
2330 m = p.matcher(toSupplementaries("blah1\r\n"));
2331 if (!m.find())
2332 failCount++;
2333 if (m.find())
2334 failCount++;
2335
2336 // Test for $ recognition of \u0085 for bug 4527731
2337 p = Pattern.compile(".+$", Pattern.MULTILINE);
2338 m = p.matcher(toSupplementaries("blah1\u0085"));
2339 if (!m.find())
2340 failCount++;
2341
2342 report("Anchors");
2343 }
2344
2345 /**
2346 * A basic sanity test of Matcher.lookingAt().
2347 */
2348 private static void lookingAtTest() throws Exception {
2349 Pattern p = Pattern.compile("(ab)(c*)");
2350 Matcher m = p.matcher("abccczzzabcczzzabccc");
2351
2352 if (!m.lookingAt())
2353 failCount++;
2354
2355 if (!m.group().equals(m.group(0)))
2356 failCount++;
2357
2358 m = p.matcher("zzzabccczzzabcczzzabccczzz");
2359 if (m.lookingAt())
2360 failCount++;
2361
2362 // Supplementary character test
2363 p = Pattern.compile(toSupplementaries("(ab)(c*)"));
2364 m = p.matcher(toSupplementaries("abccczzzabcczzzabccc"));
2365
2366 if (!m.lookingAt())
2367 failCount++;
2368
2369 if (!m.group().equals(m.group(0)))
2370 failCount++;
2371
2372 m = p.matcher(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
2373 if (m.lookingAt())
2374 failCount++;
2375
2376 report("Looking At");
2377 }
2378
2379 /**
2380 * A basic sanity test of Matcher.matches().
2381 */
2382 private static void matchesTest() throws Exception {
2383 // matches()
2384 Pattern p = Pattern.compile("ulb(c*)");
2385 Matcher m = p.matcher("ulbcccccc");
2386 if (!m.matches())
2387 failCount++;
2388
2389 // find() but not matches()
2390 m.reset("zzzulbcccccc");
2391 if (m.matches())
2392 failCount++;
2393
2394 // lookingAt() but not matches()
2395 m.reset("ulbccccccdef");
2396 if (m.matches())
2397 failCount++;
2398
2399 // matches()
2400 p = Pattern.compile("a|ad");
2401 m = p.matcher("ad");
2402 if (!m.matches())
2403 failCount++;
2404
2405 // Supplementary character test
2406 // matches()
2407 p = Pattern.compile(toSupplementaries("ulb(c*)"));
2408 m = p.matcher(toSupplementaries("ulbcccccc"));
2409 if (!m.matches())
2410 failCount++;
2411
2412 // find() but not matches()
2413 m.reset(toSupplementaries("zzzulbcccccc"));
2414 if (m.matches())
2415 failCount++;
2416
2417 // lookingAt() but not matches()
2418 m.reset(toSupplementaries("ulbccccccdef"));
2419 if (m.matches())
2420 failCount++;
2421
2422 // matches()
2423 p = Pattern.compile(toSupplementaries("a|ad"));
2424 m = p.matcher(toSupplementaries("ad"));
2425 if (!m.matches())
2426 failCount++;
2427
2428 report("Matches");
2429 }
2430
2431 /**
2432 * A basic sanity test of Pattern.matches().
2433 */
2434 private static void patternMatchesTest() throws Exception {
2435 // matches()
2436 if (!Pattern.matches(toSupplementaries("ulb(c*)"),
2437 toSupplementaries("ulbcccccc")))
2438 failCount++;
2439
2440 // find() but not matches()
2441 if (Pattern.matches(toSupplementaries("ulb(c*)"),
2442 toSupplementaries("zzzulbcccccc")))
2443 failCount++;
2444
2445 // lookingAt() but not matches()
2446 if (Pattern.matches(toSupplementaries("ulb(c*)"),
2447 toSupplementaries("ulbccccccdef")))
2448 failCount++;
2449
2450 // Supplementary character test
2451 // matches()
2452 if (!Pattern.matches(toSupplementaries("ulb(c*)"),
2453 toSupplementaries("ulbcccccc")))
2454 failCount++;
2455
2456 // find() but not matches()
2457 if (Pattern.matches(toSupplementaries("ulb(c*)"),
2458 toSupplementaries("zzzulbcccccc")))
2459 failCount++;
2460
2461 // lookingAt() but not matches()
2462 if (Pattern.matches(toSupplementaries("ulb(c*)"),
2463 toSupplementaries("ulbccccccdef")))
2464 failCount++;
2465
2466 report("Pattern Matches");
2467 }
2468
2469 /**
2470 * Canonical equivalence testing. Tests the ability of the engine
2471 * to match sequences that are not explicitly specified in the
2472 * pattern when they are considered equivalent by the Unicode Standard.
2473 */
2474 private static void ceTest() throws Exception {
2475 // Decomposed char outside char classes
2476 Pattern p = Pattern.compile("testa\u030a", Pattern.CANON_EQ);
2477 Matcher m = p.matcher("test\u00e5");
2478 if (!m.matches())
2479 failCount++;
2480
2481 m.reset("testa\u030a");
2482 if (!m.matches())
2483 failCount++;
2484
2485 // Composed char outside char classes
2486 p = Pattern.compile("test\u00e5", Pattern.CANON_EQ);
2487 m = p.matcher("test\u00e5");
2488 if (!m.matches())
2489 failCount++;
2490
2491 m.reset("testa\u030a");
2492 if (!m.find())
2493 failCount++;
2494
2495 // Decomposed char inside a char class
2496 p = Pattern.compile("test[abca\u030a]", Pattern.CANON_EQ);
2497 m = p.matcher("test\u00e5");
2498 if (!m.find())
2499 failCount++;
2500
2501 m.reset("testa\u030a");
2502 if (!m.find())
2503 failCount++;
2504
2505 // Composed char inside a char class
2506 p = Pattern.compile("test[abc\u00e5def\u00e0]", Pattern.CANON_EQ);
2507 m = p.matcher("test\u00e5");
2508 if (!m.find())
2509 failCount++;
2510
2511 m.reset("testa\u0300");
2512 if (!m.find())
2513 failCount++;
2514
2515 m.reset("testa\u030a");
2516 if (!m.find())
2517 failCount++;
2518
2519 // Marks that cannot legally change order and be equivalent
2520 p = Pattern.compile("testa\u0308\u0300", Pattern.CANON_EQ);
2521 check(p, "testa\u0308\u0300", true);
2522 check(p, "testa\u0300\u0308", false);
2523
2524 // Marks that can legally change order and be equivalent
2525 p = Pattern.compile("testa\u0308\u0323", Pattern.CANON_EQ);
2526 check(p, "testa\u0308\u0323", true);
2527 check(p, "testa\u0323\u0308", true);
2528
2529 // Test all equivalences of the sequence a\u0308\u0323\u0300
2530 p = Pattern.compile("testa\u0308\u0323\u0300", Pattern.CANON_EQ);
2531 check(p, "testa\u0308\u0323\u0300", true);
2532 check(p, "testa\u0323\u0308\u0300", true);
2533 check(p, "testa\u0308\u0300\u0323", true);
2534 check(p, "test\u00e4\u0323\u0300", true);
2535 check(p, "test\u00e4\u0300\u0323", true);
2536
2537 /*
2538 * The following canonical equivalence tests don't work. Bug id: 4916384.
2539 *
2540 // Decomposed hangul (jamos)
2541 p = Pattern.compile("\u1100\u1161", Pattern.CANON_EQ);
2542 m = p.matcher("\u1100\u1161");
2543 if (!m.matches())
2544 failCount++;
2545
2546 m.reset("\uac00");
2547 if (!m.matches())
2548 failCount++;
2549
2550 // Composed hangul
2551 p = Pattern.compile("\uac00", Pattern.CANON_EQ);
2552 m = p.matcher("\u1100\u1161");
2553 if (!m.matches())
2554 failCount++;
2555
2556 m.reset("\uac00");
2557 if (!m.matches())
2558 failCount++;
2559
2560 // Decomposed supplementary outside char classes
2561 p = Pattern.compile("test\ud834\uddbc\ud834\udd6f", Pattern.CANON_EQ);
2562 m = p.matcher("test\ud834\uddc0");
2563 if (!m.matches())
2564 failCount++;
2565
2566 m.reset("test\ud834\uddbc\ud834\udd6f");
2567 if (!m.matches())
2568 failCount++;
2569
2570 // Composed supplementary outside char classes
2571 p = Pattern.compile("test\ud834\uddc0", Pattern.CANON_EQ);
2572 m.reset("test\ud834\uddbc\ud834\udd6f");
2573 if (!m.matches())
2574 failCount++;
2575
2576 m = p.matcher("test\ud834\uddc0");
2577 if (!m.matches())
2578 failCount++;
2579
2580 */
2581
2582 report("Canonical Equivalence");
2583 }
2584
2585 /**
2586 * A basic sanity test of Matcher.replaceAll().
2587 */
2588 private static void globalSubstitute() throws Exception {
2589 // Global substitution with a literal
2590 Pattern p = Pattern.compile("(ab)(c*)");
2591 Matcher m = p.matcher("abccczzzabcczzzabccc");
2592 if (!m.replaceAll("test").equals("testzzztestzzztest"))
2593 failCount++;
2594
2595 m.reset("zzzabccczzzabcczzzabccczzz");
2596 if (!m.replaceAll("test").equals("zzztestzzztestzzztestzzz"))
2597 failCount++;
2598
2599 // Global substitution with groups
2600 m.reset("zzzabccczzzabcczzzabccczzz");
2601 String result = m.replaceAll("$1");
2602 if (!result.equals("zzzabzzzabzzzabzzz"))
2603 failCount++;
2604
2605 // Supplementary character test
2606 // Global substitution with a literal
2607 p = Pattern.compile(toSupplementaries("(ab)(c*)"));
2608 m = p.matcher(toSupplementaries("abccczzzabcczzzabccc"));
2609 if (!m.replaceAll(toSupplementaries("test")).
2610 equals(toSupplementaries("testzzztestzzztest")))
2611 failCount++;
2612
2613 m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
2614 if (!m.replaceAll(toSupplementaries("test")).
2615 equals(toSupplementaries("zzztestzzztestzzztestzzz")))
2616 failCount++;
2617
2618 // Global substitution with groups
2619 m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
2620 result = m.replaceAll("$1");
2621 if (!result.equals(toSupplementaries("zzzabzzzabzzzabzzz")))
2622 failCount++;
2623
2624 report("Global Substitution");
2625 }
2626
2627 /**
2628 * Tests the usage of Matcher.appendReplacement() with literal
2629 * and group substitutions.
2630 */
2631 private static void stringbufferSubstitute() throws Exception {
2632 // SB substitution with literal
2633 String blah = "zzzblahzzz";
2634 Pattern p = Pattern.compile("blah");
2635 Matcher m = p.matcher(blah);
2636 StringBuffer result = new StringBuffer();
2637 try {
2638 m.appendReplacement(result, "blech");
2639 failCount++;
2640 } catch (IllegalStateException e) {
2641 }
2642 m.find();
2643 m.appendReplacement(result, "blech");
2644 if (!result.toString().equals("zzzblech"))
2645 failCount++;
2646
2647 m.appendTail(result);
2648 if (!result.toString().equals("zzzblechzzz"))
2649 failCount++;
2650
2651 // SB substitution with groups
2652 blah = "zzzabcdzzz";
2653 p = Pattern.compile("(ab)(cd)*");
2654 m = p.matcher(blah);
2655 result = new StringBuffer();
2656 try {
2657 m.appendReplacement(result, "$1");
2658 failCount++;
2659 } catch (IllegalStateException e) {
2660 }
2661 m.find();
2662 m.appendReplacement(result, "$1");
2663 if (!result.toString().equals("zzzab"))
2664 failCount++;
2665
2666 m.appendTail(result);
2667 if (!result.toString().equals("zzzabzzz"))
2668 failCount++;
2669
2670 // SB substitution with 3 groups
2671 blah = "zzzabcdcdefzzz";
2672 p = Pattern.compile("(ab)(cd)*(ef)");
2673 m = p.matcher(blah);
2674 result = new StringBuffer();
2675 try {
2676 m.appendReplacement(result, "$1w$2w$3");
2677 failCount++;
2678 } catch (IllegalStateException e) {
2679 }
2680 m.find();
2681 m.appendReplacement(result, "$1w$2w$3");
2682 if (!result.toString().equals("zzzabwcdwef"))
2683 failCount++;
2684
2685 m.appendTail(result);
2686 if (!result.toString().equals("zzzabwcdwefzzz"))
2687 failCount++;
2688
2689 // SB substitution with groups and three matches
2690 // skipping middle match
2691 blah = "zzzabcdzzzabcddzzzabcdzzz";
2692 p = Pattern.compile("(ab)(cd*)");
2693 m = p.matcher(blah);
2694 result = new StringBuffer();
2695 try {
2696 m.appendReplacement(result, "$1");
2697 failCount++;
2698 } catch (IllegalStateException e) {
2699 }
2700 m.find();
2701 m.appendReplacement(result, "$1");
2702 if (!result.toString().equals("zzzab"))
2703 failCount++;
2704
2705 m.find();
2706 m.find();
2707 m.appendReplacement(result, "$2");
2708 if (!result.toString().equals("zzzabzzzabcddzzzcd"))
2709 failCount++;
2710
2711 m.appendTail(result);
2712 if (!result.toString().equals("zzzabzzzabcddzzzcdzzz"))
2713 failCount++;
2714
2715 // Check to make sure escaped $ is ignored
2716 blah = "zzzabcdcdefzzz";
2717 p = Pattern.compile("(ab)(cd)*(ef)");
2718 m = p.matcher(blah);
2719 result = new StringBuffer();
2720 m.find();
2721 m.appendReplacement(result, "$1w\\$2w$3");
2722 if (!result.toString().equals("zzzabw$2wef"))
2723 failCount++;
2724
2725 m.appendTail(result);
2726 if (!result.toString().equals("zzzabw$2wefzzz"))
2727 failCount++;
2728
2729 // Check to make sure a reference to nonexistent group causes error
2730 blah = "zzzabcdcdefzzz";
2731 p = Pattern.compile("(ab)(cd)*(ef)");
2732 m = p.matcher(blah);
2733 result = new StringBuffer();
2734 m.find();
2735 try {
2736 m.appendReplacement(result, "$1w$5w$3");
2737 failCount++;
2738 } catch (IndexOutOfBoundsException ioobe) {
2739 // Correct result
2740 }
2741
2742 // Check double digit group references
2743 blah = "zzz123456789101112zzz";
2744 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)");
2745 m = p.matcher(blah);
2746 result = new StringBuffer();
2747 m.find();
2748 m.appendReplacement(result, "$1w$11w$3");
2749 if (!result.toString().equals("zzz1w11w3"))
2750 failCount++;
2751
2752 // Check to make sure it backs off $15 to $1 if only three groups
2753 blah = "zzzabcdcdefzzz";
2754 p = Pattern.compile("(ab)(cd)*(ef)");
2755 m = p.matcher(blah);
2756 result = new StringBuffer();
2757 m.find();
2758 m.appendReplacement(result, "$1w$15w$3");
2759 if (!result.toString().equals("zzzabwab5wef"))
2760 failCount++;
2761
2762
2763 // Supplementary character test
2764 // SB substitution with literal
2765 blah = toSupplementaries("zzzblahzzz");
2766 p = Pattern.compile(toSupplementaries("blah"));
2767 m = p.matcher(blah);
2768 result = new StringBuffer();
2769 try {
2770 m.appendReplacement(result, toSupplementaries("blech"));
2771 failCount++;
2772 } catch (IllegalStateException e) {
2773 }
2774 m.find();
2775 m.appendReplacement(result, toSupplementaries("blech"));
2776 if (!result.toString().equals(toSupplementaries("zzzblech")))
2777 failCount++;
2778
2779 m.appendTail(result);
2780 if (!result.toString().equals(toSupplementaries("zzzblechzzz")))
2781 failCount++;
2782
2783 // SB substitution with groups
2784 blah = toSupplementaries("zzzabcdzzz");
2785 p = Pattern.compile(toSupplementaries("(ab)(cd)*"));
2786 m = p.matcher(blah);
2787 result = new StringBuffer();
2788 try {
2789 m.appendReplacement(result, "$1");
2790 failCount++;
2791 } catch (IllegalStateException e) {
2792 }
2793 m.find();
2794 m.appendReplacement(result, "$1");
2795 if (!result.toString().equals(toSupplementaries("zzzab")))
2796 failCount++;
2797
2798 m.appendTail(result);
2799 if (!result.toString().equals(toSupplementaries("zzzabzzz")))
2800 failCount++;
2801
2802 // SB substitution with 3 groups
2803 blah = toSupplementaries("zzzabcdcdefzzz");
2804 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
2805 m = p.matcher(blah);
2806 result = new StringBuffer();
2807 try {
2808 m.appendReplacement(result, toSupplementaries("$1w$2w$3"));
2809 failCount++;
2810 } catch (IllegalStateException e) {
2811 }
2812 m.find();
2813 m.appendReplacement(result, toSupplementaries("$1w$2w$3"));
2814 if (!result.toString().equals(toSupplementaries("zzzabwcdwef")))
2815 failCount++;
2816
2817 m.appendTail(result);
2818 if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz")))
2819 failCount++;
2820
2821 // SB substitution with groups and three matches
2822 // skipping middle match
2823 blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz");
2824 p = Pattern.compile(toSupplementaries("(ab)(cd*)"));
2825 m = p.matcher(blah);
2826 result = new StringBuffer();
2827 try {
2828 m.appendReplacement(result, "$1");
2829 failCount++;
2830 } catch (IllegalStateException e) {
2831 }
2832 m.find();
2833 m.appendReplacement(result, "$1");
2834 if (!result.toString().equals(toSupplementaries("zzzab")))
2835 failCount++;
2836
2837 m.find();
2838 m.find();
2839 m.appendReplacement(result, "$2");
2840 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd")))
2841 failCount++;
2842
2843 m.appendTail(result);
2844 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz")))
2845 failCount++;
2846
2847 // Check to make sure escaped $ is ignored
2848 blah = toSupplementaries("zzzabcdcdefzzz");
2849 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
2850 m = p.matcher(blah);
2851 result = new StringBuffer();
2852 m.find();
2853 m.appendReplacement(result, toSupplementaries("$1w\\$2w$3"));
2854 if (!result.toString().equals(toSupplementaries("zzzabw$2wef")))
2855 failCount++;
2856
2857 m.appendTail(result);
2858 if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz")))
2859 failCount++;
2860
2861 // Check to make sure a reference to nonexistent group causes error
2862 blah = toSupplementaries("zzzabcdcdefzzz");
2863 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
2864 m = p.matcher(blah);
2865 result = new StringBuffer();
2866 m.find();
2867 try {
2868 m.appendReplacement(result, toSupplementaries("$1w$5w$3"));
2869 failCount++;
2870 } catch (IndexOutOfBoundsException ioobe) {
2871 // Correct result
2872 }
2873
2874 // Check double digit group references
2875 blah = toSupplementaries("zzz123456789101112zzz");
2876 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)");
2877 m = p.matcher(blah);
2878 result = new StringBuffer();
2879 m.find();
2880 m.appendReplacement(result, toSupplementaries("$1w$11w$3"));
2881 if (!result.toString().equals(toSupplementaries("zzz1w11w3")))
2882 failCount++;
2883
2884 // Check to make sure it backs off $15 to $1 if only three groups
2885 blah = toSupplementaries("zzzabcdcdefzzz");
2886 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
2887 m = p.matcher(blah);
2888 result = new StringBuffer();
2889 m.find();
2890 m.appendReplacement(result, toSupplementaries("$1w$15w$3"));
2891 if (!result.toString().equals(toSupplementaries("zzzabwab5wef")))
2892 failCount++;
2893
2894 // Check nothing has been appended into the output buffer if
2895 // the replacement string triggers IllegalArgumentException.
2896 p = Pattern.compile("(abc)");
2897 m = p.matcher("abcd");
2898 result = new StringBuffer();
2899 m.find();
2900 try {
2901 m.appendReplacement(result, ("xyz$g"));
2902 failCount++;
2903 } catch (IllegalArgumentException iae) {
2904 if (result.length() != 0)
2905 failCount++;
2906 }
2907
2908 report("SB Substitution");
2909 }
2910
2911 /*
2912 * 5 groups of characters are created to make a substitution string.
2913 * A base string will be created including random lead chars, the
2914 * substitution string, and random trailing chars.
2915 * A pattern containing the 5 groups is searched for and replaced with:
2916 * random group + random string + random group.
2917 * The results are checked for correctness.
2918 */
2919 private static void substitutionBasher() {
2920 for (int runs = 0; runs<1000; runs++) {
2921 // Create a base string to work in
2922 int leadingChars = generator.nextInt(10);
2923 StringBuffer baseBuffer = new StringBuffer(100);
2924 String leadingString = getRandomAlphaString(leadingChars);
2925 baseBuffer.append(leadingString);
2926
2927 // Create 5 groups of random number of random chars
2928 // Create the string to substitute
2929 // Create the pattern string to search for
2930 StringBuffer bufferToSub = new StringBuffer(25);
2931 StringBuffer bufferToPat = new StringBuffer(50);
2932 String[] groups = new String[5];
2933 for(int i=0; i<5; i++) {
2934 int aGroupSize = generator.nextInt(5)+1;
2935 groups[i] = getRandomAlphaString(aGroupSize);
2936 bufferToSub.append(groups[i]);
2937 bufferToPat.append('(');
2938 bufferToPat.append(groups[i]);
2939 bufferToPat.append(')');
2940 }
2941 String stringToSub = bufferToSub.toString();
2942 String pattern = bufferToPat.toString();
2943
2944 // Place sub string into working string at random index
2945 baseBuffer.append(stringToSub);
2946
2947 // Append random chars to end
2948 int trailingChars = generator.nextInt(10);
2949 String trailingString = getRandomAlphaString(trailingChars);
2950 baseBuffer.append(trailingString);
2951 String baseString = baseBuffer.toString();
2952
2953 // Create test pattern and matcher
2954 Pattern p = Pattern.compile(pattern);
2955 Matcher m = p.matcher(baseString);
2956
2957 // Reject candidate if pattern happens to start early
2958 m.find();
2959 if (m.start() < leadingChars)
2960 continue;
2961
2962 // Reject candidate if more than one match
2963 if (m.find())
2964 continue;
2965
2966 // Construct a replacement string with :
2967 // random group + random string + random group
2968 StringBuffer bufferToRep = new StringBuffer();
2969 int groupIndex1 = generator.nextInt(5);
2970 bufferToRep.append("$" + (groupIndex1 + 1));
2971 String randomMidString = getRandomAlphaString(5);
2972 bufferToRep.append(randomMidString);
2973 int groupIndex2 = generator.nextInt(5);
2974 bufferToRep.append("$" + (groupIndex2 + 1));
2975 String replacement = bufferToRep.toString();
2976
2977 // Do the replacement
2978 String result = m.replaceAll(replacement);
2979
2980 // Construct expected result
2981 StringBuffer bufferToRes = new StringBuffer();
2982 bufferToRes.append(leadingString);
2983 bufferToRes.append(groups[groupIndex1]);
2984 bufferToRes.append(randomMidString);
2985 bufferToRes.append(groups[groupIndex2]);
2986 bufferToRes.append(trailingString);
2987 String expectedResult = bufferToRes.toString();
2988
2989 // Check results
2990 if (!result.equals(expectedResult))
2991 failCount++;
2992 }
2993
2994 report("Substitution Basher");
2995 }
2996
2997 /**
2998 * Checks the handling of some escape sequences that the Pattern
2999 * class should process instead of the java compiler. These are
3000 * not in the file because the escapes should be be processed
3001 * by the Pattern class when the regex is compiled.
3002 */
3003 private static void escapes() throws Exception {
3004 Pattern p = Pattern.compile("\\043");
3005 Matcher m = p.matcher("#");
3006 if (!m.find())
3007 failCount++;
3008
3009 p = Pattern.compile("\\x23");
3010 m = p.matcher("#");
3011 if (!m.find())
3012 failCount++;
3013
3014 p = Pattern.compile("\\u0023");
3015 m = p.matcher("#");
3016 if (!m.find())
3017 failCount++;
3018
3019 report("Escape sequences");
3020 }
3021
3022 /**
3023 * Checks the handling of blank input situations. These
3024 * tests are incompatible with my test file format.
3025 */
3026 private static void blankInput() throws Exception {
3027 Pattern p = Pattern.compile("abc", Pattern.CASE_INSENSITIVE);
3028 Matcher m = p.matcher("");
3029 if (m.find())
3030 failCount++;
3031
3032 p = Pattern.compile("a*", Pattern.CASE_INSENSITIVE);
3033 m = p.matcher("");
3034 if (!m.find())
3035 failCount++;
3036
3037 p = Pattern.compile("abc");
3038 m = p.matcher("");
3039 if (m.find())
3040 failCount++;
3041
3042 p = Pattern.compile("a*");
3043 m = p.matcher("");
3044 if (!m.find())
3045 failCount++;
3046
3047 report("Blank input");
3048 }
3049
3050 /**
3051 * Tests the Boyer-Moore pattern matching of a character sequence
3052 * on randomly generated patterns.
3053 */
3054 private static void bm() throws Exception {
3055 doBnM('a');
3056 report("Boyer Moore (ASCII)");
3057
3058 doBnM(Character.MIN_SUPPLEMENTARY_CODE_POINT - 10);
3059 report("Boyer Moore (Supplementary)");
3060 }
3061
3062 private static void doBnM(int baseCharacter) throws Exception {
3063 int achar=0;
3064
3065 for (int i=0; i<100; i++) {
3066 // Create a short pattern to search for
3067 int patternLength = generator.nextInt(7) + 4;
3068 StringBuffer patternBuffer = new StringBuffer(patternLength);
3069 for (int x=0; x<patternLength; x++) {
3070 int ch = baseCharacter + generator.nextInt(26);
3071 if (Character.isSupplementaryCodePoint(ch)) {
3072 patternBuffer.append(Character.toChars(ch));
3073 } else {
3074 patternBuffer.append((char)ch);
3075 }
3076 }
3077 String pattern = patternBuffer.toString();
3078 Pattern p = Pattern.compile(pattern);
3079
3080 // Create a buffer with random ASCII chars that does
3081 // not match the sample
3082 String toSearch = null;
3083 StringBuffer s = null;
3084 Matcher m = p.matcher("");
3085 do {
3086 s = new StringBuffer(100);
3087 for (int x=0; x<100; x++) {
3088 int ch = baseCharacter + generator.nextInt(26);
3089 if (Character.isSupplementaryCodePoint(ch)) {
3090 s.append(Character.toChars(ch));
3091 } else {
3092 s.append((char)ch);
3093 }
3094 }
3095 toSearch = s.toString();
3096 m.reset(toSearch);
3097 } while (m.find());
3098
3099 // Insert the pattern at a random spot
3100 int insertIndex = generator.nextInt(99);
3101 if (Character.isLowSurrogate(s.charAt(insertIndex)))
3102 insertIndex++;
3103 s = s.insert(insertIndex, pattern);
3104 toSearch = s.toString();
3105
3106 // Make sure that the pattern is found
3107 m.reset(toSearch);
3108 if (!m.find())
3109 failCount++;
3110
3111 // Make sure that the match text is the pattern
3112 if (!m.group().equals(pattern))
3113 failCount++;
3114
3115 // Make sure match occured at insertion point
3116 if (m.start() != insertIndex)
3117 failCount++;
3118 }
3119 }
3120
3121 /**
3122 * Tests the matching of slices on randomly generated patterns.
3123 * The Boyer-Moore optimization is not done on these patterns
3124 * because it uses unicode case folding.
3125 */
3126 private static void slice() throws Exception {
3127 doSlice(Character.MAX_VALUE);
3128 report("Slice");
3129
3130 doSlice(Character.MAX_CODE_POINT);
3131 report("Slice (Supplementary)");
3132 }
3133
3134 private static void doSlice(int maxCharacter) throws Exception {
3135 Random generator = new Random();
3136 int achar=0;
3137
3138 for (int i=0; i<100; i++) {
3139 // Create a short pattern to search for
3140 int patternLength = generator.nextInt(7) + 4;
3141 StringBuffer patternBuffer = new StringBuffer(patternLength);
3142 for (int x=0; x<patternLength; x++) {
3143 int randomChar = 0;
3144 while (!Character.isLetterOrDigit(randomChar))
3145 randomChar = generator.nextInt(maxCharacter);
3146 if (Character.isSupplementaryCodePoint(randomChar)) {
3147 patternBuffer.append(Character.toChars(randomChar));
3148 } else {
3149 patternBuffer.append((char) randomChar);
3150 }
3151 }
3152 String pattern = patternBuffer.toString();
3153 Pattern p = Pattern.compile(pattern, Pattern.UNICODE_CASE);
3154
3155 // Create a buffer with random chars that does not match the sample
3156 String toSearch = null;
3157 StringBuffer s = null;
3158 Matcher m = p.matcher("");
3159 do {
3160 s = new StringBuffer(100);
3161 for (int x=0; x<100; x++) {
3162 int randomChar = 0;
3163 while (!Character.isLetterOrDigit(randomChar))
3164 randomChar = generator.nextInt(maxCharacter);
3165 if (Character.isSupplementaryCodePoint(randomChar)) {
3166 s.append(Character.toChars(randomChar));
3167 } else {
3168 s.append((char) randomChar);
3169 }
3170 }
3171 toSearch = s.toString();
3172 m.reset(toSearch);
3173 } while (m.find());
3174
3175 // Insert the pattern at a random spot
3176 int insertIndex = generator.nextInt(99);
3177 if (Character.isLowSurrogate(s.charAt(insertIndex)))
3178 insertIndex++;
3179 s = s.insert(insertIndex, pattern);
3180 toSearch = s.toString();
3181
3182 // Make sure that the pattern is found
3183 m.reset(toSearch);
3184 if (!m.find())
3185 failCount++;
3186
3187 // Make sure that the match text is the pattern
3188 if (!m.group().equals(pattern))
3189 failCount++;
3190
3191 // Make sure match occured at insertion point
3192 if (m.start() != insertIndex)
3193 failCount++;
3194 }
3195 }
3196
3197 private static void explainFailure(String pattern, String data,
3198 String expected, String actual) {
3199 System.err.println("----------------------------------------");
3200 System.err.println("Pattern = "+pattern);
3201 System.err.println("Data = "+data);
3202 System.err.println("Expected = " + expected);
3203 System.err.println("Actual = " + actual);
3204 }
3205
3206 private static void explainFailure(String pattern, String data,
3207 Throwable t) {
3208 System.err.println("----------------------------------------");
3209 System.err.println("Pattern = "+pattern);
3210 System.err.println("Data = "+data);
3211 t.printStackTrace(System.err);
3212 }
3213
3214 // Testing examples from a file
3215
3216 /**
3217 * Goes through the file "TestCases.txt" and creates many patterns
3218 * described in the file, matching the patterns against input lines in
3219 * the file, and comparing the results against the correct results
3220 * also found in the file. The file format is described in comments
3221 * at the head of the file.
3222 */
3223 private static void processFile(String fileName) throws Exception {
3224 File testCases = new File(System.getProperty("test.src", "."),
3225 fileName);
3226 FileInputStream in = new FileInputStream(testCases);
3227 BufferedReader r = new BufferedReader(new InputStreamReader(in));
3228
3229 // Process next test case.
3230 String aLine;
3231 while((aLine = r.readLine()) != null) {
3232 // Read a line for pattern
3233 String patternString = grabLine(r);
3234 Pattern p = null;
3235 try {
3236 p = compileTestPattern(patternString);
3237 } catch (PatternSyntaxException e) {
3238 String dataString = grabLine(r);
3239 String expectedResult = grabLine(r);
3240 if (expectedResult.startsWith("error"))
3241 continue;
3242 explainFailure(patternString, dataString, e);
3243 failCount++;
3244 continue;
3245 }
3246
3247 // Read a line for input string
3248 String dataString = grabLine(r);
3249 Matcher m = p.matcher(dataString);
3250 StringBuffer result = new StringBuffer();
3251
3252 // Check for IllegalStateExceptions before a match
3253 failCount += preMatchInvariants(m);
3254
3255 boolean found = m.find();
3256
3257 if (found)
3258 failCount += postTrueMatchInvariants(m);
3259 else
3260 failCount += postFalseMatchInvariants(m);
3261
3262 if (found) {
3263 result.append("true ");
3264 result.append(m.group(0) + " ");
3265 } else {
3266 result.append("false ");
3267 }
3268
3269 result.append(m.groupCount());
3270
3271 if (found) {
3272 for (int i=1; i<m.groupCount()+1; i++)
3273 if (m.group(i) != null)
3274 result.append(" " +m.group(i));
3275 }
3276
3277 // Read a line for the expected result
3278 String expectedResult = grabLine(r);
3279
3280 if (!result.toString().equals(expectedResult)) {
3281 explainFailure(patternString, dataString, expectedResult, result.toString());
3282 failCount++;
3283 }
3284 }
3285
3286 report(fileName);
3287 }
3288
3289 private static int preMatchInvariants(Matcher m) {
3290 int failCount = 0;
3291 try {
3292 m.start();
3293 failCount++;
3294 } catch (IllegalStateException ise) {}
3295 try {
3296 m.end();
3297 failCount++;
3298 } catch (IllegalStateException ise) {}
3299 try {
3300 m.group();
3301 failCount++;
3302 } catch (IllegalStateException ise) {}
3303 return failCount;
3304 }
3305
3306 private static int postFalseMatchInvariants(Matcher m) {
3307 int failCount = 0;
3308 try {
3309 m.group();
3310 failCount++;
3311 } catch (IllegalStateException ise) {}
3312 try {
3313 m.start();
3314 failCount++;
3315 } catch (IllegalStateException ise) {}
3316 try {
3317 m.end();
3318 failCount++;
3319 } catch (IllegalStateException ise) {}
3320 return failCount;
3321 }
3322
3323 private static int postTrueMatchInvariants(Matcher m) {
3324 int failCount = 0;
3325 //assert(m.start() = m.start(0);
3326 if (m.start() != m.start(0))
3327 failCount++;
3328 //assert(m.end() = m.end(0);
3329 if (m.start() != m.start(0))
3330 failCount++;
3331 //assert(m.group() = m.group(0);
3332 if (!m.group().equals(m.group(0)))
3333 failCount++;
3334 try {
3335 m.group(50);
3336 failCount++;
3337 } catch (IndexOutOfBoundsException ise) {}
3338
3339 return failCount;
3340 }
3341
3342 private static Pattern compileTestPattern(String patternString) {
3343 if (!patternString.startsWith("'")) {
3344 return Pattern.compile(patternString);
3345 }
3346
3347 int break1 = patternString.lastIndexOf("'");
3348 String flagString = patternString.substring(
3349 break1+1, patternString.length());
3350 patternString = patternString.substring(1, break1);
3351
3352 if (flagString.equals("i"))
3353 return Pattern.compile(patternString, Pattern.CASE_INSENSITIVE);
3354
3355 if (flagString.equals("m"))
3356 return Pattern.compile(patternString, Pattern.MULTILINE);
3357
3358 return Pattern.compile(patternString);
3359 }
3360
3361 /**
3362 * Reads a line from the input file. Keeps reading lines until a non
3363 * empty non comment line is read. If the line contains a \n then
3364 * these two characters are replaced by a newline char. If a \\uxxxx
3365 * sequence is read then the sequence is replaced by the unicode char.
3366 */
3367 private static String grabLine(BufferedReader r) throws Exception {
3368 int index = 0;
3369 String line = r.readLine();
3370 while (line.startsWith("//") || line.length() < 1)
3371 line = r.readLine();
3372 while ((index = line.indexOf("\\n")) != -1) {
3373 StringBuffer temp = new StringBuffer(line);
3374 temp.replace(index, index+2, "\n");
3375 line = temp.toString();
3376 }
3377 while ((index = line.indexOf("\\u")) != -1) {
3378 StringBuffer temp = new StringBuffer(line);
3379 String value = temp.substring(index+2, index+6);
3380 char aChar = (char)Integer.parseInt(value, 16);
3381 String unicodeChar = "" + aChar;
3382 temp.replace(index, index+6, unicodeChar);
3383 line = temp.toString();
3384 }
3385
3386 return line;
3387 }
3388
3389 private static void check(Pattern p, String s, String g, String expected) {
3390 Matcher m = p.matcher(s);
3391 m.find();
3392 if (!m.group(g).equals(expected))
3393 failCount++;
3394 }
3395
3396 private static void checkReplaceFirst(String p, String s, String r, String expected)
3397 {
3398 if (!expected.equals(Pattern.compile(p)
3399 .matcher(s)
3400 .replaceFirst(r)))
3401 failCount++;
3402 }
3403
3404 private static void checkReplaceAll(String p, String s, String r, String expected)
3405 {
3406 if (!expected.equals(Pattern.compile(p)
3407 .matcher(s)
3408 .replaceAll(r)))
3409 failCount++;
3410 }
3411
3412 private static void checkExpectedFail(String p) {
3413 try {
3414 Pattern.compile(p);
3415 } catch (PatternSyntaxException pse) {
3416 //pse.printStackTrace();
3417 return;
3418 }
3419 failCount++;
3420 }
3421
3422 private static void checkExpectedFail(Matcher m, String g) {
3423 m.find();
3424 try {
3425 m.group(g);
3426 } catch (IllegalArgumentException iae) {
3427 //iae.printStackTrace();
3428 return;
3429 } catch (NullPointerException npe) {
3430 return;
3431 }
3432 failCount++;
3433 }
3434
3435
3436 private static void namedGroupCaptureTest() throws Exception {
3437 check(Pattern.compile("x+(?<gname>y+)z+"),
3438 "xxxyyyzzz",
3439 "gname",
3440 "yyy");
3441
shermand9337e02009-10-21 11:40:40 -07003442 check(Pattern.compile("x+(?<gname8>y+)z+"),
shermanffaf4922009-03-20 16:22:59 -07003443 "xxxyyyzzz",
shermand9337e02009-10-21 11:40:40 -07003444 "gname8",
shermanffaf4922009-03-20 16:22:59 -07003445 "yyy");
3446
sherman0b4d42d2009-02-23 21:06:15 -08003447 //backref
3448 Pattern pattern = Pattern.compile("(a*)bc\\1");
3449 check(pattern, "zzzaabcazzz", true); // found "abca"
3450
3451 check(Pattern.compile("(?<gname>a*)bc\\k<gname>"),
3452 "zzzaabcaazzz", true);
3453
3454 check(Pattern.compile("(?<gname>abc)(def)\\k<gname>"),
3455 "abcdefabc", true);
3456
3457 check(Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(?<gname>k)\\k<gname>"),
3458 "abcdefghijkk", true);
3459
3460 // Supplementary character tests
3461 check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"),
3462 toSupplementaries("zzzaabcazzz"), true);
3463
3464 check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"),
3465 toSupplementaries("zzzaabcaazzz"), true);
3466
3467 check(Pattern.compile("(?<gname>" + toSupplementaries("abc)(def)") + "\\k<gname>"),
3468 toSupplementaries("abcdefabc"), true);
3469
3470 check(Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)") +
3471 "(?<gname>" +
3472 toSupplementaries("k)") + "\\k<gname>"),
3473 toSupplementaries("abcdefghijkk"), true);
3474
3475 check(Pattern.compile("x+(?<gname>y+)z+\\k<gname>"),
3476 "xxxyyyzzzyyy",
3477 "gname",
3478 "yyy");
3479
3480 //replaceFirst/All
3481 checkReplaceFirst("(?<gn>ab)(c*)",
3482 "abccczzzabcczzzabccc",
shermand9337e02009-10-21 11:40:40 -07003483 "${gn}",
sherman0b4d42d2009-02-23 21:06:15 -08003484 "abzzzabcczzzabccc");
3485
3486 checkReplaceAll("(?<gn>ab)(c*)",
3487 "abccczzzabcczzzabccc",
shermand9337e02009-10-21 11:40:40 -07003488 "${gn}",
sherman0b4d42d2009-02-23 21:06:15 -08003489 "abzzzabzzzab");
3490
3491
3492 checkReplaceFirst("(?<gn>ab)(c*)",
3493 "zzzabccczzzabcczzzabccczzz",
shermand9337e02009-10-21 11:40:40 -07003494 "${gn}",
sherman0b4d42d2009-02-23 21:06:15 -08003495 "zzzabzzzabcczzzabccczzz");
3496
3497 checkReplaceAll("(?<gn>ab)(c*)",
3498 "zzzabccczzzabcczzzabccczzz",
shermand9337e02009-10-21 11:40:40 -07003499 "${gn}",
sherman0b4d42d2009-02-23 21:06:15 -08003500 "zzzabzzzabzzzabzzz");
3501
3502 checkReplaceFirst("(?<gn1>ab)(?<gn2>c*)",
3503 "zzzabccczzzabcczzzabccczzz",
shermand9337e02009-10-21 11:40:40 -07003504 "${gn2}",
sherman0b4d42d2009-02-23 21:06:15 -08003505 "zzzccczzzabcczzzabccczzz");
3506
3507 checkReplaceAll("(?<gn1>ab)(?<gn2>c*)",
3508 "zzzabccczzzabcczzzabccczzz",
shermand9337e02009-10-21 11:40:40 -07003509 "${gn2}",
sherman0b4d42d2009-02-23 21:06:15 -08003510 "zzzccczzzcczzzccczzz");
3511
3512 //toSupplementaries("(ab)(c*)"));
3513 checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") +
3514 ")(?<gn2>" + toSupplementaries("c") + "*)",
3515 toSupplementaries("abccczzzabcczzzabccc"),
shermand9337e02009-10-21 11:40:40 -07003516 "${gn1}",
sherman0b4d42d2009-02-23 21:06:15 -08003517 toSupplementaries("abzzzabcczzzabccc"));
3518
3519
3520 checkReplaceAll("(?<gn1>" + toSupplementaries("ab") +
3521 ")(?<gn2>" + toSupplementaries("c") + "*)",
3522 toSupplementaries("abccczzzabcczzzabccc"),
shermand9337e02009-10-21 11:40:40 -07003523 "${gn1}",
sherman0b4d42d2009-02-23 21:06:15 -08003524 toSupplementaries("abzzzabzzzab"));
3525
3526 checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") +
3527 ")(?<gn2>" + toSupplementaries("c") + "*)",
3528 toSupplementaries("abccczzzabcczzzabccc"),
shermand9337e02009-10-21 11:40:40 -07003529 "${gn2}",
sherman0b4d42d2009-02-23 21:06:15 -08003530 toSupplementaries("ccczzzabcczzzabccc"));
3531
3532
3533 checkReplaceAll("(?<gn1>" + toSupplementaries("ab") +
3534 ")(?<gn2>" + toSupplementaries("c") + "*)",
3535 toSupplementaries("abccczzzabcczzzabccc"),
shermand9337e02009-10-21 11:40:40 -07003536 "${gn2}",
sherman0b4d42d2009-02-23 21:06:15 -08003537 toSupplementaries("ccczzzcczzzccc"));
3538
3539 checkReplaceFirst("(?<dog>Dog)AndCat",
3540 "zzzDogAndCatzzzDogAndCatzzz",
shermand9337e02009-10-21 11:40:40 -07003541 "${dog}",
sherman0b4d42d2009-02-23 21:06:15 -08003542 "zzzDogzzzDogAndCatzzz");
3543
3544
3545 checkReplaceAll("(?<dog>Dog)AndCat",
3546 "zzzDogAndCatzzzDogAndCatzzz",
shermand9337e02009-10-21 11:40:40 -07003547 "${dog}",
sherman0b4d42d2009-02-23 21:06:15 -08003548 "zzzDogzzzDogzzz");
3549
3550 // backref in Matcher & String
shermand9337e02009-10-21 11:40:40 -07003551 if (!"abcdefghij".replaceFirst("cd(?<gn>ef)gh", "${gn}").equals("abefij") ||
3552 !"abbbcbdbefgh".replaceAll("(?<gn>[a-e])b", "${gn}").equals("abcdefgh"))
sherman0b4d42d2009-02-23 21:06:15 -08003553 failCount++;
3554
3555 // negative
3556 checkExpectedFail("(?<groupnamehasnoascii.in>abc)(def)");
3557 checkExpectedFail("(?<groupnamehasnoascii_in>abc)(def)");
shermand9337e02009-10-21 11:40:40 -07003558 checkExpectedFail("(?<6groupnamestartswithdigit>abc)(def)");
sherman0b4d42d2009-02-23 21:06:15 -08003559 checkExpectedFail("(?<gname>abc)(def)\\k<gnameX>");
3560 checkExpectedFail("(?<gname>abc)(?<gname>def)\\k<gnameX>");
3561 checkExpectedFail(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"),
3562 "gnameX");
3563 checkExpectedFail(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"),
3564 null);
3565 report("NamedGroupCapture");
3566 }
sherman6782c962010-02-05 00:10:42 -08003567
shermancc01ef52010-05-18 15:36:47 -07003568 // This is for bug 6969132
sherman6782c962010-02-05 00:10:42 -08003569 private static void nonBmpClassComplementTest() throws Exception {
3570 Pattern p = Pattern.compile("\\P{Lu}");
3571 Matcher m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
3572 if (m.find() && m.start() == 1)
3573 failCount++;
3574
3575 // from a unicode category
3576 p = Pattern.compile("\\P{Lu}");
3577 m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
3578 if (m.find())
3579 failCount++;
3580 if (!m.hitEnd())
3581 failCount++;
3582
3583 // block
3584 p = Pattern.compile("\\P{InMathematicalAlphanumericSymbols}");
3585 m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
3586 if (m.find() && m.start() == 1)
3587 failCount++;
3588
3589 report("NonBmpClassComplement");
3590 }
3591
shermancc01ef52010-05-18 15:36:47 -07003592 private static void unicodePropertiesTest() throws Exception {
3593 // different forms
3594 if (!Pattern.compile("\\p{IsLu}").matcher("A").matches() ||
3595 !Pattern.compile("\\p{Lu}").matcher("A").matches() ||
3596 !Pattern.compile("\\p{gc=Lu}").matcher("A").matches() ||
3597 !Pattern.compile("\\p{general_category=Lu}").matcher("A").matches() ||
3598 !Pattern.compile("\\p{IsLatin}").matcher("B").matches() ||
3599 !Pattern.compile("\\p{sc=Latin}").matcher("B").matches() ||
3600 !Pattern.compile("\\p{script=Latin}").matcher("B").matches() ||
3601 !Pattern.compile("\\p{InBasicLatin}").matcher("c").matches() ||
3602 !Pattern.compile("\\p{blk=BasicLatin}").matcher("c").matches() ||
3603 !Pattern.compile("\\p{block=BasicLatin}").matcher("c").matches())
3604 failCount++;
3605
3606 Matcher common = Pattern.compile("\\p{script=Common}").matcher("");
3607 Matcher unknown = Pattern.compile("\\p{IsUnknown}").matcher("");
3608 Matcher lastSM = common;
3609 Character.UnicodeScript lastScript = Character.UnicodeScript.of(0);
3610
3611 Matcher latin = Pattern.compile("\\p{block=basic_latin}").matcher("");
3612 Matcher greek = Pattern.compile("\\p{InGreek}").matcher("");
3613 Matcher lastBM = latin;
3614 Character.UnicodeBlock lastBlock = Character.UnicodeBlock.of(0);
3615
3616 for (int cp = 1; cp < Character.MAX_CODE_POINT; cp++) {
3617 if (cp >= 0x30000 && (cp & 0x70) == 0){
3618 continue; // only pick couple code points, they are the same
3619 }
3620
3621 // Unicode Script
3622 Character.UnicodeScript script = Character.UnicodeScript.of(cp);
3623 Matcher m;
3624 String str = new String(Character.toChars(cp));
3625 if (script == lastScript) {
3626 m = lastSM;
3627 m.reset(str);
3628 } else {
3629 m = Pattern.compile("\\p{Is" + script.name() + "}").matcher(str);
3630 }
3631 if (!m.matches()) {
3632 failCount++;
3633 }
3634 Matcher other = (script == Character.UnicodeScript.COMMON)? unknown : common;
3635 other.reset(str);
3636 if (other.matches()) {
3637 failCount++;
3638 }
3639 lastSM = m;
3640 lastScript = script;
3641
3642 // Unicode Block
3643 Character.UnicodeBlock block = Character.UnicodeBlock.of(cp);
3644 if (block == null) {
3645 //System.out.printf("Not a Block: cp=%x%n", cp);
3646 continue;
3647 }
3648 if (block == lastBlock) {
3649 m = lastBM;
3650 m.reset(str);
3651 } else {
3652 m = Pattern.compile("\\p{block=" + block.toString() + "}").matcher(str);
3653 }
3654 if (!m.matches()) {
3655 failCount++;
3656 }
3657 other = (block == Character.UnicodeBlock.BASIC_LATIN)? greek : latin;
3658 other.reset(str);
3659 if (other.matches()) {
3660 failCount++;
3661 }
3662 lastBM = m;
3663 lastBlock = block;
3664 }
3665 report("unicodeProperties");
3666 }
shermanf03c78b2011-02-03 13:49:25 -08003667
3668 private static void unicodeHexNotationTest() throws Exception {
3669
3670 // negative
3671 checkExpectedFail("\\x{-23}");
3672 checkExpectedFail("\\x{110000}");
3673 checkExpectedFail("\\x{}");
3674 checkExpectedFail("\\x{AB[ef]");
3675
3676 // codepoint
3677 check("^\\x{1033c}$", "\uD800\uDF3C", true);
3678 check("^\\xF0\\x90\\x8C\\xBC$", "\uD800\uDF3C", false);
3679 check("^\\x{D800}\\x{DF3c}+$", "\uD800\uDF3C", false);
3680 check("^\\xF0\\x90\\x8C\\xBC$", "\uD800\uDF3C", false);
3681
3682 // in class
3683 check("^[\\x{D800}\\x{DF3c}]+$", "\uD800\uDF3C", false);
3684 check("^[\\xF0\\x90\\x8C\\xBC]+$", "\uD800\uDF3C", false);
3685 check("^[\\x{D800}\\x{DF3C}]+$", "\uD800\uDF3C", false);
3686 check("^[\\x{DF3C}\\x{D800}]+$", "\uD800\uDF3C", false);
3687 check("^[\\x{D800}\\x{DF3C}]+$", "\uDF3C\uD800", true);
3688 check("^[\\x{DF3C}\\x{D800}]+$", "\uDF3C\uD800", true);
3689
3690 for (int cp = 0; cp <= 0x10FFFF; cp++) {
3691 String s = "A" + new String(Character.toChars(cp)) + "B";
3692 String hexUTF16 = (cp <= 0xFFFF)? String.format("\\u%04x", cp)
3693 : String.format("\\u%04x\\u%04x",
3694 (int) Character.toChars(cp)[0],
3695 (int) Character.toChars(cp)[1]);
3696 String hexCodePoint = "\\x{" + Integer.toHexString(cp) + "}";
3697 if (!Pattern.matches("A" + hexUTF16 + "B", s))
3698 failCount++;
3699 if (!Pattern.matches("A[" + hexUTF16 + "]B", s))
3700 failCount++;
3701 if (!Pattern.matches("A" + hexCodePoint + "B", s))
3702 failCount++;
3703 if (!Pattern.matches("A[" + hexCodePoint + "]B", s))
3704 failCount++;
3705 }
3706 report("unicodeHexNotation");
sherman85bbd8b2011-04-28 20:48:36 -07003707 }
3708
3709 private static void unicodeClassesTest() throws Exception {
3710
3711 Matcher lower = Pattern.compile("\\p{Lower}").matcher("");
3712 Matcher upper = Pattern.compile("\\p{Upper}").matcher("");
3713 Matcher ASCII = Pattern.compile("\\p{ASCII}").matcher("");
3714 Matcher alpha = Pattern.compile("\\p{Alpha}").matcher("");
3715 Matcher digit = Pattern.compile("\\p{Digit}").matcher("");
3716 Matcher alnum = Pattern.compile("\\p{Alnum}").matcher("");
3717 Matcher punct = Pattern.compile("\\p{Punct}").matcher("");
3718 Matcher graph = Pattern.compile("\\p{Graph}").matcher("");
3719 Matcher print = Pattern.compile("\\p{Print}").matcher("");
3720 Matcher blank = Pattern.compile("\\p{Blank}").matcher("");
3721 Matcher cntrl = Pattern.compile("\\p{Cntrl}").matcher("");
3722 Matcher xdigit = Pattern.compile("\\p{XDigit}").matcher("");
3723 Matcher space = Pattern.compile("\\p{Space}").matcher("");
3724 Matcher bound = Pattern.compile("\\b").matcher("");
3725 Matcher word = Pattern.compile("\\w++").matcher("");
3726 // UNICODE_CHARACTER_CLASS
3727 Matcher lowerU = Pattern.compile("\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3728 Matcher upperU = Pattern.compile("\\p{Upper}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3729 Matcher ASCIIU = Pattern.compile("\\p{ASCII}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3730 Matcher alphaU = Pattern.compile("\\p{Alpha}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3731 Matcher digitU = Pattern.compile("\\p{Digit}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3732 Matcher alnumU = Pattern.compile("\\p{Alnum}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3733 Matcher punctU = Pattern.compile("\\p{Punct}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3734 Matcher graphU = Pattern.compile("\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3735 Matcher printU = Pattern.compile("\\p{Print}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3736 Matcher blankU = Pattern.compile("\\p{Blank}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3737 Matcher cntrlU = Pattern.compile("\\p{Cntrl}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3738 Matcher xdigitU = Pattern.compile("\\p{XDigit}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3739 Matcher spaceU = Pattern.compile("\\p{Space}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3740 Matcher boundU = Pattern.compile("\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3741 Matcher wordU = Pattern.compile("\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3742 // embedded flag (?U)
3743 Matcher lowerEU = Pattern.compile("(?U)\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3744 Matcher graphEU = Pattern.compile("(?U)\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3745 Matcher wordEU = Pattern.compile("(?U)\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3746
3747 Matcher bwb = Pattern.compile("\\b\\w\\b").matcher("");
3748 Matcher bwbU = Pattern.compile("\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3749 Matcher bwbEU = Pattern.compile("(?U)\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3750 // properties
3751 Matcher lowerP = Pattern.compile("\\p{IsLowerCase}").matcher("");
3752 Matcher upperP = Pattern.compile("\\p{IsUpperCase}").matcher("");
3753 Matcher titleP = Pattern.compile("\\p{IsTitleCase}").matcher("");
3754 Matcher letterP = Pattern.compile("\\p{IsLetter}").matcher("");
3755 Matcher alphaP = Pattern.compile("\\p{IsAlphabetic}").matcher("");
3756 Matcher ideogP = Pattern.compile("\\p{IsIdeographic}").matcher("");
3757 Matcher cntrlP = Pattern.compile("\\p{IsControl}").matcher("");
3758 Matcher spaceP = Pattern.compile("\\p{IsWhiteSpace}").matcher("");
3759 Matcher definedP = Pattern.compile("\\p{IsAssigned}").matcher("");
3760 Matcher nonCCPP = Pattern.compile("\\p{IsNoncharacterCodePoint}").matcher("");
3761
3762 // javaMethod
3763 Matcher lowerJ = Pattern.compile("\\p{javaLowerCase}").matcher("");
3764 Matcher upperJ = Pattern.compile("\\p{javaUpperCase}").matcher("");
3765 Matcher alphaJ = Pattern.compile("\\p{javaAlphabetic}").matcher("");
3766 Matcher ideogJ = Pattern.compile("\\p{javaIdeographic}").matcher("");
3767
3768 for (int cp = 1; cp < 0x30000; cp++) {
3769 String str = new String(Character.toChars(cp));
3770 int type = Character.getType(cp);
3771 if (// lower
3772 POSIX_ASCII.isLower(cp) != lower.reset(str).matches() ||
3773 Character.isLowerCase(cp) != lowerU.reset(str).matches() ||
3774 Character.isLowerCase(cp) != lowerP.reset(str).matches() ||
3775 Character.isLowerCase(cp) != lowerEU.reset(str).matches()||
3776 Character.isLowerCase(cp) != lowerJ.reset(str).matches()||
3777 // upper
3778 POSIX_ASCII.isUpper(cp) != upper.reset(str).matches() ||
3779 POSIX_Unicode.isUpper(cp) != upperU.reset(str).matches() ||
3780 Character.isUpperCase(cp) != upperP.reset(str).matches() ||
3781 Character.isUpperCase(cp) != upperJ.reset(str).matches() ||
3782 // alpha
3783 POSIX_ASCII.isAlpha(cp) != alpha.reset(str).matches() ||
3784 POSIX_Unicode.isAlpha(cp) != alphaU.reset(str).matches() ||
3785 Character.isAlphabetic(cp)!= alphaP.reset(str).matches() ||
3786 Character.isAlphabetic(cp)!= alphaJ.reset(str).matches() ||
3787 // digit
3788 POSIX_ASCII.isDigit(cp) != digit.reset(str).matches() ||
3789 Character.isDigit(cp) != digitU.reset(str).matches() ||
3790 // alnum
3791 POSIX_ASCII.isAlnum(cp) != alnum.reset(str).matches() ||
3792 POSIX_Unicode.isAlnum(cp) != alnumU.reset(str).matches() ||
3793 // punct
3794 POSIX_ASCII.isPunct(cp) != punct.reset(str).matches() ||
3795 POSIX_Unicode.isPunct(cp) != punctU.reset(str).matches() ||
3796 // graph
3797 POSIX_ASCII.isGraph(cp) != graph.reset(str).matches() ||
3798 POSIX_Unicode.isGraph(cp) != graphU.reset(str).matches() ||
3799 POSIX_Unicode.isGraph(cp) != graphEU.reset(str).matches()||
3800 // blank
3801 POSIX_ASCII.isType(cp, POSIX_ASCII.BLANK)
3802 != blank.reset(str).matches() ||
3803 POSIX_Unicode.isBlank(cp) != blankU.reset(str).matches() ||
3804 // print
3805 POSIX_ASCII.isPrint(cp) != print.reset(str).matches() ||
3806 POSIX_Unicode.isPrint(cp) != printU.reset(str).matches() ||
3807 // cntrl
3808 POSIX_ASCII.isCntrl(cp) != cntrl.reset(str).matches() ||
3809 POSIX_Unicode.isCntrl(cp) != cntrlU.reset(str).matches() ||
3810 (Character.CONTROL == type) != cntrlP.reset(str).matches() ||
3811 // hexdigit
3812 POSIX_ASCII.isHexDigit(cp) != xdigit.reset(str).matches() ||
3813 POSIX_Unicode.isHexDigit(cp) != xdigitU.reset(str).matches() ||
3814 // space
3815 POSIX_ASCII.isSpace(cp) != space.reset(str).matches() ||
3816 POSIX_Unicode.isSpace(cp) != spaceU.reset(str).matches() ||
3817 POSIX_Unicode.isSpace(cp) != spaceP.reset(str).matches() ||
3818 // word
3819 POSIX_ASCII.isWord(cp) != word.reset(str).matches() ||
3820 POSIX_Unicode.isWord(cp) != wordU.reset(str).matches() ||
3821 POSIX_Unicode.isWord(cp) != wordEU.reset(str).matches()||
3822 // bwordb
3823 POSIX_ASCII.isWord(cp) != bwb.reset(str).matches() ||
3824 POSIX_Unicode.isWord(cp) != bwbU.reset(str).matches() ||
3825 // properties
3826 Character.isTitleCase(cp) != titleP.reset(str).matches() ||
3827 Character.isLetter(cp) != letterP.reset(str).matches()||
3828 Character.isIdeographic(cp) != ideogP.reset(str).matches() ||
3829 Character.isIdeographic(cp) != ideogJ.reset(str).matches() ||
3830 (Character.UNASSIGNED == type) == definedP.reset(str).matches() ||
3831 POSIX_Unicode.isNoncharacterCodePoint(cp) != nonCCPP.reset(str).matches())
3832 failCount++;
3833 }
3834
3835 // bounds/word align
3836 twoFindIndexes(" \u0180sherman\u0400 ", bound, 1, 10);
3837 if (!bwbU.reset("\u0180sherman\u0400").matches())
3838 failCount++;
3839 twoFindIndexes(" \u0180sh\u0345erman\u0400 ", bound, 1, 11);
3840 if (!bwbU.reset("\u0180sh\u0345erman\u0400").matches())
3841 failCount++;
3842 twoFindIndexes(" \u0724\u0739\u0724 ", bound, 1, 4);
3843 if (!bwbU.reset("\u0724\u0739\u0724").matches())
3844 failCount++;
3845 if (!bwbEU.reset("\u0724\u0739\u0724").matches())
3846 failCount++;
3847 report("unicodePredefinedClasses");
3848 }
shermanecb65472012-05-08 10:57:13 -07003849
3850 private static void horizontalAndVerticalWSTest() throws Exception {
3851 String hws = new String (new char[] {
3852 0x09, 0x20, 0xa0, 0x1680, 0x180e,
3853 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005,
3854 0x2006, 0x2007, 0x2008, 0x2009, 0x200a,
3855 0x202f, 0x205f, 0x3000 });
3856 String vws = new String (new char[] {
3857 0x0a, 0x0b, 0x0c, 0x0d, 0x85, 0x2028, 0x2029 });
3858 if (!Pattern.compile("\\h+").matcher(hws).matches() ||
3859 !Pattern.compile("[\\h]+").matcher(hws).matches())
3860 failCount++;
3861 if (Pattern.compile("\\H").matcher(hws).find() ||
3862 Pattern.compile("[\\H]").matcher(hws).find())
3863 failCount++;
3864 if (!Pattern.compile("\\v+").matcher(vws).matches() ||
3865 !Pattern.compile("[\\v]+").matcher(vws).matches())
3866 failCount++;
3867 if (Pattern.compile("\\V").matcher(vws).find() ||
3868 Pattern.compile("[\\V]").matcher(vws).find())
3869 failCount++;
3870 String prefix = "abcd";
3871 String suffix = "efgh";
3872 String ng = "A";
3873 for (int i = 0; i < hws.length(); i++) {
3874 String c = String.valueOf(hws.charAt(i));
3875 Matcher m = Pattern.compile("\\h").matcher(prefix + c + suffix);
3876 if (!m.find() || !c.equals(m.group()))
3877 failCount++;
3878 m = Pattern.compile("[\\h]").matcher(prefix + c + suffix);
3879 if (!m.find() || !c.equals(m.group()))
3880 failCount++;
3881
3882 m = Pattern.compile("\\H").matcher(hws.substring(0, i) + ng + hws.substring(i));
3883 if (!m.find() || !ng.equals(m.group()))
3884 failCount++;
3885 m = Pattern.compile("[\\H]").matcher(hws.substring(0, i) + ng + hws.substring(i));
3886 if (!m.find() || !ng.equals(m.group()))
3887 failCount++;
3888 }
3889 for (int i = 0; i < vws.length(); i++) {
3890 String c = String.valueOf(vws.charAt(i));
3891 Matcher m = Pattern.compile("\\v").matcher(prefix + c + suffix);
3892 if (!m.find() || !c.equals(m.group()))
3893 failCount++;
3894 m = Pattern.compile("[\\v]").matcher(prefix + c + suffix);
3895 if (!m.find() || !c.equals(m.group()))
3896 failCount++;
3897
3898 m = Pattern.compile("\\V").matcher(vws.substring(0, i) + ng + vws.substring(i));
3899 if (!m.find() || !ng.equals(m.group()))
3900 failCount++;
3901 m = Pattern.compile("[\\V]").matcher(vws.substring(0, i) + ng + vws.substring(i));
3902 if (!m.find() || !ng.equals(m.group()))
3903 failCount++;
3904 }
3905 // \v in range is interpreted as 0x0B. This is the undocumented behavior
3906 if (!Pattern.compile("[\\v-\\v]").matcher(String.valueOf((char)0x0B)).matches())
3907 failCount++;
3908 report("horizontalAndVerticalWSTest");
3909 }
3910
3911 private static void linebreakTest() throws Exception {
3912 String linebreaks = new String (new char[] {
3913 0x0A, 0x0B, 0x0C, 0x0D, 0x85, 0x2028, 0x2029 });
3914 String crnl = "\r\n";
3915 if (!Pattern.compile("\\R+").matcher(linebreaks).matches() ||
3916 !Pattern.compile("\\R").matcher(crnl).matches() ||
3917 Pattern.compile("\\R\\R").matcher(crnl).matches())
3918 failCount++;
3919 report("linebreakTest");
3920 }
3921
sherman36e2c8f2012-08-09 10:15:26 -07003922 // #7189363
3923 private static void branchTest() throws Exception {
3924 if (!Pattern.compile("(a)?bc|d").matcher("d").find() || // greedy
3925 !Pattern.compile("(a)+bc|d").matcher("d").find() ||
3926 !Pattern.compile("(a)*bc|d").matcher("d").find() ||
3927 !Pattern.compile("(a)??bc|d").matcher("d").find() || // reluctant
3928 !Pattern.compile("(a)+?bc|d").matcher("d").find() ||
3929 !Pattern.compile("(a)*?bc|d").matcher("d").find() ||
3930 !Pattern.compile("(a)?+bc|d").matcher("d").find() || // possessive
3931 !Pattern.compile("(a)++bc|d").matcher("d").find() ||
3932 !Pattern.compile("(a)*+bc|d").matcher("d").find() ||
3933 !Pattern.compile("(a)?bc|d").matcher("d").matches() || // greedy
3934 !Pattern.compile("(a)+bc|d").matcher("d").matches() ||
3935 !Pattern.compile("(a)*bc|d").matcher("d").matches() ||
3936 !Pattern.compile("(a)??bc|d").matcher("d").matches() || // reluctant
3937 !Pattern.compile("(a)+?bc|d").matcher("d").matches() ||
3938 !Pattern.compile("(a)*?bc|d").matcher("d").matches() ||
3939 !Pattern.compile("(a)?+bc|d").matcher("d").matches() || // possessive
3940 !Pattern.compile("(a)++bc|d").matcher("d").matches() ||
3941 !Pattern.compile("(a)*+bc|d").matcher("d").matches() ||
3942 !Pattern.compile("(a)?bc|de").matcher("de").find() || // others
3943 !Pattern.compile("(a)??bc|de").matcher("de").find() ||
3944 !Pattern.compile("(a)?bc|de").matcher("de").matches() ||
3945 !Pattern.compile("(a)??bc|de").matcher("de").matches())
3946 failCount++;
3947 report("branchTest");
3948 }
3949
sherman0b4d42d2009-02-23 21:06:15 -08003950}