blob: 159e9afddd92830132035c2ec03387269a4e5649 [file] [log] [blame]
sherman0b4d42d2009-02-23 21:06:15 -08001/*
ohairbf91ea12011-04-06 22:06:11 -07002 * Copyright (c) 1999, 2011, Oracle and/or its affiliates. All rights reserved.
sherman0b4d42d2009-02-23 21:06:15 -08003 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
ohair2283b9d2010-05-25 15:58:33 -07007 * published by the Free Software Foundation. Oracle designates this
sherman0b4d42d2009-02-23 21:06:15 -08008 * particular file as subject to the "Classpath" exception as provided
ohair2283b9d2010-05-25 15:58:33 -07009 * by Oracle in the LICENSE file that accompanied this code.
sherman0b4d42d2009-02-23 21:06:15 -080010 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
ohair2283b9d2010-05-25 15:58:33 -070021 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
sherman0b4d42d2009-02-23 21:06:15 -080024 */
25
26/**
27 * @test
28 * @summary tests RegExp framework
29 * @author Mike McCloskey
30 * @bug 4481568 4482696 4495089 4504687 4527731 4599621 4631553 4619345
31 * 4630911 4672616 4711773 4727935 4750573 4792284 4803197 4757029 4808962
32 * 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476
33 * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940
34 * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133
shermanb16229d2011-12-19 14:14:14 -080035 * 6350801 6676425 6878475 6919132 6931676 6948903 6990617 7014645 7039066
sherman0b4d42d2009-02-23 21:06:15 -080036 */
37
38import java.util.regex.*;
39import java.util.Random;
40import java.io.*;
41import java.util.*;
42import java.nio.CharBuffer;
43
44/**
45 * This is a test class created to check the operation of
46 * the Pattern and Matcher classes.
47 */
48public class RegExTest {
49
50 private static Random generator = new Random();
51 private static boolean failure = false;
52 private static int failCount = 0;
shermanb16229d2011-12-19 14:14:14 -080053 private static String firstFailure = null;
sherman0b4d42d2009-02-23 21:06:15 -080054
55 /**
56 * Main to interpret arguments and run several tests.
57 *
58 */
59 public static void main(String[] args) throws Exception {
60 // Most of the tests are in a file
61 processFile("TestCases.txt");
62 //processFile("PerlCases.txt");
63 processFile("BMPTestCases.txt");
64 processFile("SupplementaryTestCases.txt");
65
66 // These test many randomly generated char patterns
67 bm();
68 slice();
69
70 // These are hard to put into the file
71 escapes();
72 blankInput();
73
74 // Substitition tests on randomly generated sequences
75 globalSubstitute();
76 stringbufferSubstitute();
77 substitutionBasher();
78
79 // Canonical Equivalence
80 ceTest();
81
82 // Anchors
83 anchorTest();
84
85 // boolean match calls
86 matchesTest();
87 lookingAtTest();
88
89 // Pattern API
90 patternMatchesTest();
91
92 // Misc
93 lookbehindTest();
94 nullArgumentTest();
95 backRefTest();
96 groupCaptureTest();
97 caretTest();
98 charClassTest();
99 emptyPatternTest();
100 findIntTest();
101 group0Test();
102 longPatternTest();
103 octalTest();
104 ampersandTest();
105 negationTest();
106 splitTest();
107 appendTest();
108 caseFoldingTest();
109 commentsTest();
110 unixLinesTest();
111 replaceFirstTest();
112 gTest();
113 zTest();
114 serializeTest();
115 reluctantRepetitionTest();
116 multilineDollarTest();
117 dollarAtEndTest();
118 caretBetweenTerminatorsTest();
119 // This RFE rejected in Tiger numOccurrencesTest();
120 javaCharClassTest();
121 nonCaptureRepetitionTest();
122 notCapturedGroupCurlyMatchTest();
123 escapedSegmentTest();
124 literalPatternTest();
125 literalReplacementTest();
126 regionTest();
127 toStringTest();
128 negatedCharClassTest();
129 findFromTest();
130 boundsTest();
131 unicodeWordBoundsTest();
132 caretAtEndTest();
133 wordSearchTest();
134 hitEndTest();
135 toMatchResultTest();
136 surrogatesInClassTest();
shermanb16229d2011-12-19 14:14:14 -0800137 removeQEQuotingTest();
sherman0b4d42d2009-02-23 21:06:15 -0800138 namedGroupCaptureTest();
sherman6782c962010-02-05 00:10:42 -0800139 nonBmpClassComplementTest();
shermancc01ef52010-05-18 15:36:47 -0700140 unicodePropertiesTest();
shermanf03c78b2011-02-03 13:49:25 -0800141 unicodeHexNotationTest();
sherman85bbd8b2011-04-28 20:48:36 -0700142 unicodeClassesTest();
shermanb16229d2011-12-19 14:14:14 -0800143 if (failure) {
144 throw new
145 RuntimeException("RegExTest failed, 1st failure: " +
146 firstFailure);
147 } else {
sherman0b4d42d2009-02-23 21:06:15 -0800148 System.err.println("OKAY: All tests passed.");
shermanb16229d2011-12-19 14:14:14 -0800149 }
sherman0b4d42d2009-02-23 21:06:15 -0800150 }
151
152 // Utility functions
153
154 private static String getRandomAlphaString(int length) {
155 StringBuffer buf = new StringBuffer(length);
156 for (int i=0; i<length; i++) {
157 char randChar = (char)(97 + generator.nextInt(26));
158 buf.append(randChar);
159 }
160 return buf.toString();
161 }
162
163 private static void check(Matcher m, String expected) {
164 m.find();
165 if (!m.group().equals(expected))
166 failCount++;
167 }
168
169 private static void check(Matcher m, String result, boolean expected) {
170 m.find();
shermanf03c78b2011-02-03 13:49:25 -0800171 if (m.group().equals(result) != expected)
172 failCount++;
sherman0b4d42d2009-02-23 21:06:15 -0800173 }
174
175 private static void check(Pattern p, String s, boolean expected) {
shermanf03c78b2011-02-03 13:49:25 -0800176 if (p.matcher(s).find() != expected)
177 failCount++;
178 }
179
180 private static void check(String p, String s, boolean expected) {
181 Matcher matcher = Pattern.compile(p).matcher(s);
182 if (matcher.find() != expected)
183 failCount++;
sherman0b4d42d2009-02-23 21:06:15 -0800184 }
185
186 private static void check(String p, char c, boolean expected) {
187 String propertyPattern = expected ? "\\p" + p : "\\P" + p;
188 Pattern pattern = Pattern.compile(propertyPattern);
189 char[] ca = new char[1]; ca[0] = c;
190 Matcher matcher = pattern.matcher(new String(ca));
191 if (!matcher.find())
192 failCount++;
193 }
194
195 private static void check(String p, int codePoint, boolean expected) {
196 String propertyPattern = expected ? "\\p" + p : "\\P" + p;
197 Pattern pattern = Pattern.compile(propertyPattern);
198 char[] ca = Character.toChars(codePoint);
199 Matcher matcher = pattern.matcher(new String(ca));
200 if (!matcher.find())
201 failCount++;
202 }
203
204 private static void check(String p, int flag, String input, String s,
205 boolean expected)
206 {
207 Pattern pattern = Pattern.compile(p, flag);
208 Matcher matcher = pattern.matcher(input);
209 if (expected)
210 check(matcher, s, expected);
211 else
212 check(pattern, input, false);
213 }
214
215 private static void report(String testName) {
216 int spacesToAdd = 30 - testName.length();
217 StringBuffer paddedNameBuffer = new StringBuffer(testName);
218 for (int i=0; i<spacesToAdd; i++)
219 paddedNameBuffer.append(" ");
220 String paddedName = paddedNameBuffer.toString();
221 System.err.println(paddedName + ": " +
222 (failCount==0 ? "Passed":"Failed("+failCount+")"));
shermanb16229d2011-12-19 14:14:14 -0800223 if (failCount > 0) {
sherman0b4d42d2009-02-23 21:06:15 -0800224 failure = true;
shermanb16229d2011-12-19 14:14:14 -0800225
226 if (firstFailure == null) {
227 firstFailure = testName;
228 }
229 }
230
sherman0b4d42d2009-02-23 21:06:15 -0800231 failCount = 0;
232 }
233
234 /**
235 * Converts ASCII alphabet characters [A-Za-z] in the given 's' to
236 * supplementary characters. This method does NOT fully take care
237 * of the regex syntax.
238 */
239 private static String toSupplementaries(String s) {
240 int length = s.length();
241 StringBuffer sb = new StringBuffer(length * 2);
242
243 for (int i = 0; i < length; ) {
244 char c = s.charAt(i++);
245 if (c == '\\') {
246 sb.append(c);
247 if (i < length) {
248 c = s.charAt(i++);
249 sb.append(c);
250 if (c == 'u') {
251 // assume no syntax error
252 sb.append(s.charAt(i++));
253 sb.append(s.charAt(i++));
254 sb.append(s.charAt(i++));
255 sb.append(s.charAt(i++));
256 }
257 }
258 } else if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) {
259 sb.append('\ud800').append((char)('\udc00'+c));
260 } else {
261 sb.append(c);
262 }
263 }
264 return sb.toString();
265 }
266
267 // Regular expression tests
268
269 // This is for bug 6178785
270 // Test if an expected NPE gets thrown when passing in a null argument
271 private static boolean check(Runnable test) {
272 try {
273 test.run();
274 failCount++;
275 return false;
276 } catch (NullPointerException npe) {
277 return true;
278 }
279 }
280
281 private static void nullArgumentTest() {
282 check(new Runnable() { public void run() { Pattern.compile(null); }});
283 check(new Runnable() { public void run() { Pattern.matches(null, null); }});
284 check(new Runnable() { public void run() { Pattern.matches("xyz", null);}});
285 check(new Runnable() { public void run() { Pattern.quote(null);}});
286 check(new Runnable() { public void run() { Pattern.compile("xyz").split(null);}});
287 check(new Runnable() { public void run() { Pattern.compile("xyz").matcher(null);}});
288
289 final Matcher m = Pattern.compile("xyz").matcher("xyz");
290 m.matches();
291 check(new Runnable() { public void run() { m.appendTail(null);}});
292 check(new Runnable() { public void run() { m.replaceAll(null);}});
293 check(new Runnable() { public void run() { m.replaceFirst(null);}});
294 check(new Runnable() { public void run() { m.appendReplacement(null, null);}});
295 check(new Runnable() { public void run() { m.reset(null);}});
296 check(new Runnable() { public void run() { Matcher.quoteReplacement(null);}});
297 //check(new Runnable() { public void run() { m.usePattern(null);}});
298
299 report("Null Argument");
300 }
301
302 // This is for bug6635133
303 // Test if surrogate pair in Unicode escapes can be handled correctly.
304 private static void surrogatesInClassTest() throws Exception {
305 Pattern pattern = Pattern.compile("[\\ud834\\udd21-\\ud834\\udd24]");
306 Matcher matcher = pattern.matcher("\ud834\udd22");
307 if (!matcher.find())
308 failCount++;
shermanb16229d2011-12-19 14:14:14 -0800309
310 report("Surrogate pair in Unicode escape");
311 }
312
313 // This is for bug6990617
314 // Test if Pattern.RemoveQEQuoting works correctly if the octal unicode
315 // char encoding is only 2 or 3 digits instead of 4 and the first quoted
316 // char is an octal digit.
317 private static void removeQEQuotingTest() throws Exception {
318 Pattern pattern =
319 Pattern.compile("\\011\\Q1sometext\\E\\011\\Q2sometext\\E");
320 Matcher matcher = pattern.matcher("\t1sometext\t2sometext");
321 if (!matcher.find())
322 failCount++;
323
324 report("Remove Q/E Quoting");
sherman0b4d42d2009-02-23 21:06:15 -0800325 }
326
327 // This is for bug 4988891
328 // Test toMatchResult to see that it is a copy of the Matcher
329 // that is not affected by subsequent operations on the original
330 private static void toMatchResultTest() throws Exception {
331 Pattern pattern = Pattern.compile("squid");
332 Matcher matcher = pattern.matcher(
333 "agiantsquidofdestinyasmallsquidoffate");
334 matcher.find();
335 int matcherStart1 = matcher.start();
336 MatchResult mr = matcher.toMatchResult();
337 if (mr == matcher)
338 failCount++;
339 int resultStart1 = mr.start();
340 if (matcherStart1 != resultStart1)
341 failCount++;
342 matcher.find();
343 int matcherStart2 = matcher.start();
344 int resultStart2 = mr.start();
345 if (matcherStart2 == resultStart2)
346 failCount++;
347 if (resultStart1 != resultStart2)
348 failCount++;
349 MatchResult mr2 = matcher.toMatchResult();
350 if (mr == mr2)
351 failCount++;
352 if (mr2.start() != matcherStart2)
353 failCount++;
354 report("toMatchResult is a copy");
355 }
356
357 // This is for bug 5013885
358 // Must test a slice to see if it reports hitEnd correctly
359 private static void hitEndTest() throws Exception {
360 // Basic test of Slice node
361 Pattern p = Pattern.compile("^squidattack");
362 Matcher m = p.matcher("squack");
363 m.find();
364 if (m.hitEnd())
365 failCount++;
366 m.reset("squid");
367 m.find();
368 if (!m.hitEnd())
369 failCount++;
370
371 // Test Slice, SliceA and SliceU nodes
372 for (int i=0; i<3; i++) {
373 int flags = 0;
374 if (i==1) flags = Pattern.CASE_INSENSITIVE;
375 if (i==2) flags = Pattern.UNICODE_CASE;
376 p = Pattern.compile("^abc", flags);
377 m = p.matcher("ad");
378 m.find();
379 if (m.hitEnd())
380 failCount++;
381 m.reset("ab");
382 m.find();
383 if (!m.hitEnd())
384 failCount++;
385 }
386
387 // Test Boyer-Moore node
388 p = Pattern.compile("catattack");
389 m = p.matcher("attack");
390 m.find();
391 if (!m.hitEnd())
392 failCount++;
393
394 p = Pattern.compile("catattack");
395 m = p.matcher("attackattackattackcatatta");
396 m.find();
397 if (!m.hitEnd())
398 failCount++;
sherman0b4d42d2009-02-23 21:06:15 -0800399 report("hitEnd from a Slice");
400 }
401
402 // This is for bug 4997476
403 // It is weird code submitted by customer demonstrating a regression
404 private static void wordSearchTest() throws Exception {
405 String testString = new String("word1 word2 word3");
406 Pattern p = Pattern.compile("\\b");
407 Matcher m = p.matcher(testString);
408 int position = 0;
409 int start = 0;
410 while (m.find(position)) {
411 start = m.start();
412 if (start == testString.length())
413 break;
414 if (m.find(start+1)) {
415 position = m.start();
416 } else {
417 position = testString.length();
418 }
419 if (testString.substring(start, position).equals(" "))
420 continue;
421 if (!testString.substring(start, position-1).startsWith("word"))
422 failCount++;
423 }
424 report("Customer word search");
425 }
426
427 // This is for bug 4994840
428 private static void caretAtEndTest() throws Exception {
429 // Problem only occurs with multiline patterns
430 // containing a beginning-of-line caret "^" followed
431 // by an expression that also matches the empty string.
432 Pattern pattern = Pattern.compile("^x?", Pattern.MULTILINE);
433 Matcher matcher = pattern.matcher("\r");
434 matcher.find();
435 matcher.find();
436 report("Caret at end");
437 }
438
439 // This test is for 4979006
440 // Check to see if word boundary construct properly handles unicode
441 // non spacing marks
442 private static void unicodeWordBoundsTest() throws Exception {
443 String spaces = " ";
444 String wordChar = "a";
445 String nsm = "\u030a";
446
447 assert (Character.getType('\u030a') == Character.NON_SPACING_MARK);
448
449 Pattern pattern = Pattern.compile("\\b");
450 Matcher matcher = pattern.matcher("");
451 // S=other B=word character N=non spacing mark .=word boundary
452 // SS.BB.SS
453 String input = spaces + wordChar + wordChar + spaces;
454 twoFindIndexes(input, matcher, 2, 4);
455 // SS.BBN.SS
456 input = spaces + wordChar +wordChar + nsm + spaces;
457 twoFindIndexes(input, matcher, 2, 5);
458 // SS.BN.SS
459 input = spaces + wordChar + nsm + spaces;
460 twoFindIndexes(input, matcher, 2, 4);
461 // SS.BNN.SS
462 input = spaces + wordChar + nsm + nsm + spaces;
463 twoFindIndexes(input, matcher, 2, 5);
464 // SSN.BB.SS
465 input = spaces + nsm + wordChar + wordChar + spaces;
466 twoFindIndexes(input, matcher, 3, 5);
467 // SS.BNB.SS
468 input = spaces + wordChar + nsm + wordChar + spaces;
469 twoFindIndexes(input, matcher, 2, 5);
470 // SSNNSS
471 input = spaces + nsm + nsm + spaces;
472 matcher.reset(input);
473 if (matcher.find())
474 failCount++;
475 // SSN.BBN.SS
476 input = spaces + nsm + wordChar + wordChar + nsm + spaces;
477 twoFindIndexes(input, matcher, 3, 6);
478
479 report("Unicode word boundary");
480 }
481
482 private static void twoFindIndexes(String input, Matcher matcher, int a,
483 int b) throws Exception
484 {
485 matcher.reset(input);
486 matcher.find();
487 if (matcher.start() != a)
488 failCount++;
489 matcher.find();
490 if (matcher.start() != b)
491 failCount++;
492 }
493
494 // This test is for 6284152
495 static void check(String regex, String input, String[] expected) {
496 List<String> result = new ArrayList<String>();
497 Pattern p = Pattern.compile(regex);
498 Matcher m = p.matcher(input);
499 while (m.find()) {
500 result.add(m.group());
501 }
502 if (!Arrays.asList(expected).equals(result))
503 failCount++;
504 }
505
506 private static void lookbehindTest() throws Exception {
507 //Positive
508 check("(?<=%.{0,5})foo\\d",
509 "%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5",
510 new String[]{"foo1", "foo2", "foo3"});
511
512 //boundary at end of the lookbehind sub-regex should work consistently
513 //with the boundary just after the lookbehind sub-regex
514 check("(?<=.*\\b)foo", "abcd foo", new String[]{"foo"});
515 check("(?<=.*)\\bfoo", "abcd foo", new String[]{"foo"});
516 check("(?<!abc )\\bfoo", "abc foo", new String[0]);
517 check("(?<!abc \\b)foo", "abc foo", new String[0]);
518
519 //Negative
520 check("(?<!%.{0,5})foo\\d",
521 "%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5",
522 new String[] {"foo4", "foo5"});
523
524 //Positive greedy
525 check("(?<=%b{1,4})foo", "%bbbbfoo", new String[] {"foo"});
526
527 //Positive reluctant
528 check("(?<=%b{1,4}?)foo", "%bbbbfoo", new String[] {"foo"});
529
530 //supplementary
531 check("(?<=%b{1,4})fo\ud800\udc00o", "%bbbbfo\ud800\udc00o",
532 new String[] {"fo\ud800\udc00o"});
533 check("(?<=%b{1,4}?)fo\ud800\udc00o", "%bbbbfo\ud800\udc00o",
534 new String[] {"fo\ud800\udc00o"});
535 check("(?<!%b{1,4})fo\ud800\udc00o", "%afo\ud800\udc00o",
536 new String[] {"fo\ud800\udc00o"});
537 check("(?<!%b{1,4}?)fo\ud800\udc00o", "%afo\ud800\udc00o",
538 new String[] {"fo\ud800\udc00o"});
539 report("Lookbehind");
540 }
541
542 // This test is for 4938995
543 // Check to see if weak region boundaries are transparent to
544 // lookahead and lookbehind constructs
545 private static void boundsTest() throws Exception {
546 String fullMessage = "catdogcat";
547 Pattern pattern = Pattern.compile("(?<=cat)dog(?=cat)");
548 Matcher matcher = pattern.matcher("catdogca");
549 matcher.useTransparentBounds(true);
550 if (matcher.find())
551 failCount++;
552 matcher.reset("atdogcat");
553 if (matcher.find())
554 failCount++;
555 matcher.reset(fullMessage);
556 if (!matcher.find())
557 failCount++;
558 matcher.reset(fullMessage);
559 matcher.region(0,9);
560 if (!matcher.find())
561 failCount++;
562 matcher.reset(fullMessage);
563 matcher.region(0,6);
564 if (!matcher.find())
565 failCount++;
566 matcher.reset(fullMessage);
567 matcher.region(3,6);
568 if (!matcher.find())
569 failCount++;
570 matcher.useTransparentBounds(false);
571 if (matcher.find())
572 failCount++;
573
574 // Negative lookahead/lookbehind
575 pattern = Pattern.compile("(?<!cat)dog(?!cat)");
576 matcher = pattern.matcher("dogcat");
577 matcher.useTransparentBounds(true);
578 matcher.region(0,3);
579 if (matcher.find())
580 failCount++;
581 matcher.reset("catdog");
582 matcher.region(3,6);
583 if (matcher.find())
584 failCount++;
585 matcher.useTransparentBounds(false);
586 matcher.reset("dogcat");
587 matcher.region(0,3);
588 if (!matcher.find())
589 failCount++;
590 matcher.reset("catdog");
591 matcher.region(3,6);
592 if (!matcher.find())
593 failCount++;
594
595 report("Region bounds transparency");
596 }
597
598 // This test is for 4945394
599 private static void findFromTest() throws Exception {
600 String message = "This is 40 $0 message.";
601 Pattern pat = Pattern.compile("\\$0");
602 Matcher match = pat.matcher(message);
603 if (!match.find())
604 failCount++;
605 if (match.find())
606 failCount++;
607 if (match.find())
608 failCount++;
609 report("Check for alternating find");
610 }
611
612 // This test is for 4872664 and 4892980
613 private static void negatedCharClassTest() throws Exception {
614 Pattern pattern = Pattern.compile("[^>]");
615 Matcher matcher = pattern.matcher("\u203A");
616 if (!matcher.matches())
617 failCount++;
618 pattern = Pattern.compile("[^fr]");
619 matcher = pattern.matcher("a");
620 if (!matcher.find())
621 failCount++;
622 matcher.reset("\u203A");
623 if (!matcher.find())
624 failCount++;
625 String s = "for";
626 String result[] = s.split("[^fr]");
627 if (!result[0].equals("f"))
628 failCount++;
629 if (!result[1].equals("r"))
630 failCount++;
631 s = "f\u203Ar";
632 result = s.split("[^fr]");
633 if (!result[0].equals("f"))
634 failCount++;
635 if (!result[1].equals("r"))
636 failCount++;
637
638 // Test adding to bits, subtracting a node, then adding to bits again
639 pattern = Pattern.compile("[^f\u203Ar]");
640 matcher = pattern.matcher("a");
641 if (!matcher.find())
642 failCount++;
643 matcher.reset("f");
644 if (matcher.find())
645 failCount++;
646 matcher.reset("\u203A");
647 if (matcher.find())
648 failCount++;
649 matcher.reset("r");
650 if (matcher.find())
651 failCount++;
652 matcher.reset("\u203B");
653 if (!matcher.find())
654 failCount++;
655
656 // Test subtracting a node, adding to bits, subtracting again
657 pattern = Pattern.compile("[^\u203Ar\u203B]");
658 matcher = pattern.matcher("a");
659 if (!matcher.find())
660 failCount++;
661 matcher.reset("\u203A");
662 if (matcher.find())
663 failCount++;
664 matcher.reset("r");
665 if (matcher.find())
666 failCount++;
667 matcher.reset("\u203B");
668 if (matcher.find())
669 failCount++;
670 matcher.reset("\u203C");
671 if (!matcher.find())
672 failCount++;
673
674 report("Negated Character Class");
675 }
676
677 // This test is for 4628291
678 private static void toStringTest() throws Exception {
679 Pattern pattern = Pattern.compile("b+");
680 if (pattern.toString() != "b+")
681 failCount++;
682 Matcher matcher = pattern.matcher("aaabbbccc");
683 String matcherString = matcher.toString(); // unspecified
684 matcher.find();
685 matcherString = matcher.toString(); // unspecified
686 matcher.region(0,3);
687 matcherString = matcher.toString(); // unspecified
688 matcher.reset();
689 matcherString = matcher.toString(); // unspecified
690 report("toString");
691 }
692
693 // This test is for 4808962
694 private static void literalPatternTest() throws Exception {
695 int flags = Pattern.LITERAL;
696
697 Pattern pattern = Pattern.compile("abc\\t$^", flags);
698 check(pattern, "abc\\t$^", true);
699
700 pattern = Pattern.compile(Pattern.quote("abc\\t$^"));
701 check(pattern, "abc\\t$^", true);
702
703 pattern = Pattern.compile("\\Qa^$bcabc\\E", flags);
704 check(pattern, "\\Qa^$bcabc\\E", true);
705 check(pattern, "a^$bcabc", false);
706
707 pattern = Pattern.compile("\\\\Q\\\\E");
708 check(pattern, "\\Q\\E", true);
709
710 pattern = Pattern.compile("\\Qabc\\Eefg\\\\Q\\\\Ehij");
711 check(pattern, "abcefg\\Q\\Ehij", true);
712
713 pattern = Pattern.compile("\\\\\\Q\\\\E");
714 check(pattern, "\\\\\\\\", true);
715
716 pattern = Pattern.compile(Pattern.quote("\\Qa^$bcabc\\E"));
717 check(pattern, "\\Qa^$bcabc\\E", true);
718 check(pattern, "a^$bcabc", false);
719
720 pattern = Pattern.compile(Pattern.quote("\\Qabc\\Edef"));
721 check(pattern, "\\Qabc\\Edef", true);
722 check(pattern, "abcdef", false);
723
724 pattern = Pattern.compile(Pattern.quote("abc\\Edef"));
725 check(pattern, "abc\\Edef", true);
726 check(pattern, "abcdef", false);
727
728 pattern = Pattern.compile(Pattern.quote("\\E"));
729 check(pattern, "\\E", true);
730
731 pattern = Pattern.compile("((((abc.+?:)", flags);
732 check(pattern, "((((abc.+?:)", true);
733
734 flags |= Pattern.MULTILINE;
735
736 pattern = Pattern.compile("^cat$", flags);
737 check(pattern, "abc^cat$def", true);
738 check(pattern, "cat", false);
739
740 flags |= Pattern.CASE_INSENSITIVE;
741
742 pattern = Pattern.compile("abcdef", flags);
743 check(pattern, "ABCDEF", true);
744 check(pattern, "AbCdEf", true);
745
746 flags |= Pattern.DOTALL;
747
748 pattern = Pattern.compile("a...b", flags);
749 check(pattern, "A...b", true);
750 check(pattern, "Axxxb", false);
751
752 flags |= Pattern.CANON_EQ;
753
754 Pattern p = Pattern.compile("testa\u030a", flags);
755 check(pattern, "testa\u030a", false);
756 check(pattern, "test\u00e5", false);
757
758 // Supplementary character test
759 flags = Pattern.LITERAL;
760
761 pattern = Pattern.compile(toSupplementaries("abc\\t$^"), flags);
762 check(pattern, toSupplementaries("abc\\t$^"), true);
763
764 pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\t$^")));
765 check(pattern, toSupplementaries("abc\\t$^"), true);
766
767 pattern = Pattern.compile(toSupplementaries("\\Qa^$bcabc\\E"), flags);
768 check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true);
769 check(pattern, toSupplementaries("a^$bcabc"), false);
770
771 pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qa^$bcabc\\E")));
772 check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true);
773 check(pattern, toSupplementaries("a^$bcabc"), false);
774
775 pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qabc\\Edef")));
776 check(pattern, toSupplementaries("\\Qabc\\Edef"), true);
777 check(pattern, toSupplementaries("abcdef"), false);
778
779 pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\Edef")));
780 check(pattern, toSupplementaries("abc\\Edef"), true);
781 check(pattern, toSupplementaries("abcdef"), false);
782
783 pattern = Pattern.compile(toSupplementaries("((((abc.+?:)"), flags);
784 check(pattern, toSupplementaries("((((abc.+?:)"), true);
785
786 flags |= Pattern.MULTILINE;
787
788 pattern = Pattern.compile(toSupplementaries("^cat$"), flags);
789 check(pattern, toSupplementaries("abc^cat$def"), true);
790 check(pattern, toSupplementaries("cat"), false);
791
792 flags |= Pattern.DOTALL;
793
794 // note: this is case-sensitive.
795 pattern = Pattern.compile(toSupplementaries("a...b"), flags);
796 check(pattern, toSupplementaries("a...b"), true);
797 check(pattern, toSupplementaries("axxxb"), false);
798
799 flags |= Pattern.CANON_EQ;
800
801 String t = toSupplementaries("test");
802 p = Pattern.compile(t + "a\u030a", flags);
803 check(pattern, t + "a\u030a", false);
804 check(pattern, t + "\u00e5", false);
805
806 report("Literal pattern");
807 }
808
809 // This test is for 4803179
810 // This test is also for 4808962, replacement parts
811 private static void literalReplacementTest() throws Exception {
812 int flags = Pattern.LITERAL;
813
814 Pattern pattern = Pattern.compile("abc", flags);
815 Matcher matcher = pattern.matcher("zzzabczzz");
816 String replaceTest = "$0";
817 String result = matcher.replaceAll(replaceTest);
818 if (!result.equals("zzzabczzz"))
819 failCount++;
820
821 matcher.reset();
822 String literalReplacement = matcher.quoteReplacement(replaceTest);
823 result = matcher.replaceAll(literalReplacement);
824 if (!result.equals("zzz$0zzz"))
825 failCount++;
826
827 matcher.reset();
828 replaceTest = "\\t$\\$";
829 literalReplacement = matcher.quoteReplacement(replaceTest);
830 result = matcher.replaceAll(literalReplacement);
831 if (!result.equals("zzz\\t$\\$zzz"))
832 failCount++;
833
834 // Supplementary character test
835 pattern = Pattern.compile(toSupplementaries("abc"), flags);
836 matcher = pattern.matcher(toSupplementaries("zzzabczzz"));
837 replaceTest = "$0";
838 result = matcher.replaceAll(replaceTest);
839 if (!result.equals(toSupplementaries("zzzabczzz")))
840 failCount++;
841
842 matcher.reset();
843 literalReplacement = matcher.quoteReplacement(replaceTest);
844 result = matcher.replaceAll(literalReplacement);
845 if (!result.equals(toSupplementaries("zzz$0zzz")))
846 failCount++;
847
848 matcher.reset();
849 replaceTest = "\\t$\\$";
850 literalReplacement = matcher.quoteReplacement(replaceTest);
851 result = matcher.replaceAll(literalReplacement);
852 if (!result.equals(toSupplementaries("zzz\\t$\\$zzz")))
853 failCount++;
854
855 report("Literal replacement");
856 }
857
858 // This test is for 4757029
859 private static void regionTest() throws Exception {
860 Pattern pattern = Pattern.compile("abc");
861 Matcher matcher = pattern.matcher("abcdefabc");
862
863 matcher.region(0,9);
864 if (!matcher.find())
865 failCount++;
866 if (!matcher.find())
867 failCount++;
868 matcher.region(0,3);
869 if (!matcher.find())
870 failCount++;
871 matcher.region(3,6);
872 if (matcher.find())
873 failCount++;
874 matcher.region(0,2);
875 if (matcher.find())
876 failCount++;
877
878 expectRegionFail(matcher, 1, -1);
879 expectRegionFail(matcher, -1, -1);
880 expectRegionFail(matcher, -1, 1);
881 expectRegionFail(matcher, 5, 3);
882 expectRegionFail(matcher, 5, 12);
883 expectRegionFail(matcher, 12, 12);
884
885 pattern = Pattern.compile("^abc$");
886 matcher = pattern.matcher("zzzabczzz");
887 matcher.region(0,9);
888 if (matcher.find())
889 failCount++;
890 matcher.region(3,6);
891 if (!matcher.find())
892 failCount++;
893 matcher.region(3,6);
894 matcher.useAnchoringBounds(false);
895 if (matcher.find())
896 failCount++;
897
898 // Supplementary character test
899 pattern = Pattern.compile(toSupplementaries("abc"));
900 matcher = pattern.matcher(toSupplementaries("abcdefabc"));
901 matcher.region(0,9*2);
902 if (!matcher.find())
903 failCount++;
904 if (!matcher.find())
905 failCount++;
906 matcher.region(0,3*2);
907 if (!matcher.find())
908 failCount++;
909 matcher.region(1,3*2);
910 if (matcher.find())
911 failCount++;
912 matcher.region(3*2,6*2);
913 if (matcher.find())
914 failCount++;
915 matcher.region(0,2*2);
916 if (matcher.find())
917 failCount++;
918 matcher.region(0,2*2+1);
919 if (matcher.find())
920 failCount++;
921
922 expectRegionFail(matcher, 1*2, -1);
923 expectRegionFail(matcher, -1, -1);
924 expectRegionFail(matcher, -1, 1*2);
925 expectRegionFail(matcher, 5*2, 3*2);
926 expectRegionFail(matcher, 5*2, 12*2);
927 expectRegionFail(matcher, 12*2, 12*2);
928
929 pattern = Pattern.compile(toSupplementaries("^abc$"));
930 matcher = pattern.matcher(toSupplementaries("zzzabczzz"));
931 matcher.region(0,9*2);
932 if (matcher.find())
933 failCount++;
934 matcher.region(3*2,6*2);
935 if (!matcher.find())
936 failCount++;
937 matcher.region(3*2+1,6*2);
938 if (matcher.find())
939 failCount++;
940 matcher.region(3*2,6*2-1);
941 if (matcher.find())
942 failCount++;
943 matcher.region(3*2,6*2);
944 matcher.useAnchoringBounds(false);
945 if (matcher.find())
946 failCount++;
947 report("Regions");
948 }
949
950 private static void expectRegionFail(Matcher matcher, int index1,
951 int index2)
952 {
953 try {
954 matcher.region(index1, index2);
955 failCount++;
956 } catch (IndexOutOfBoundsException ioobe) {
957 // Correct result
958 } catch (IllegalStateException ise) {
959 // Correct result
960 }
961 }
962
963 // This test is for 4803197
964 private static void escapedSegmentTest() throws Exception {
965
966 Pattern pattern = Pattern.compile("\\Qdir1\\dir2\\E");
967 check(pattern, "dir1\\dir2", true);
968
969 pattern = Pattern.compile("\\Qdir1\\dir2\\\\E");
970 check(pattern, "dir1\\dir2\\", true);
971
972 pattern = Pattern.compile("(\\Qdir1\\dir2\\\\E)");
973 check(pattern, "dir1\\dir2\\", true);
974
975 // Supplementary character test
976 pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2\\E"));
977 check(pattern, toSupplementaries("dir1\\dir2"), true);
978
979 pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2")+"\\\\E");
980 check(pattern, toSupplementaries("dir1\\dir2\\"), true);
981
982 pattern = Pattern.compile(toSupplementaries("(\\Qdir1\\dir2")+"\\\\E)");
983 check(pattern, toSupplementaries("dir1\\dir2\\"), true);
984
985 report("Escaped segment");
986 }
987
988 // This test is for 4792284
989 private static void nonCaptureRepetitionTest() throws Exception {
990 String input = "abcdefgh;";
991
992 String[] patterns = new String[] {
993 "(?:\\w{4})+;",
994 "(?:\\w{8})*;",
995 "(?:\\w{2}){2,4};",
996 "(?:\\w{4}){2,};", // only matches the
997 ".*?(?:\\w{5})+;", // specified minimum
998 ".*?(?:\\w{9})*;", // number of reps - OK
999 "(?:\\w{4})+?;", // lazy repetition - OK
1000 "(?:\\w{4})++;", // possessive repetition - OK
1001 "(?:\\w{2,}?)+;", // non-deterministic - OK
1002 "(\\w{4})+;", // capturing group - OK
1003 };
1004
1005 for (int i = 0; i < patterns.length; i++) {
1006 // Check find()
1007 check(patterns[i], 0, input, input, true);
1008 // Check matches()
1009 Pattern p = Pattern.compile(patterns[i]);
1010 Matcher m = p.matcher(input);
1011
1012 if (m.matches()) {
1013 if (!m.group(0).equals(input))
1014 failCount++;
1015 } else {
1016 failCount++;
1017 }
1018 }
1019
1020 report("Non capturing repetition");
1021 }
1022
1023 // This test is for 6358731
1024 private static void notCapturedGroupCurlyMatchTest() throws Exception {
1025 Pattern pattern = Pattern.compile("(abc)+|(abcd)+");
1026 Matcher matcher = pattern.matcher("abcd");
1027 if (!matcher.matches() ||
1028 matcher.group(1) != null ||
1029 !matcher.group(2).equals("abcd")) {
1030 failCount++;
1031 }
1032 report("Not captured GroupCurly");
1033 }
1034
1035 // This test is for 4706545
1036 private static void javaCharClassTest() throws Exception {
1037 for (int i=0; i<1000; i++) {
1038 char c = (char)generator.nextInt();
1039 check("{javaLowerCase}", c, Character.isLowerCase(c));
1040 check("{javaUpperCase}", c, Character.isUpperCase(c));
1041 check("{javaUpperCase}+", c, Character.isUpperCase(c));
1042 check("{javaTitleCase}", c, Character.isTitleCase(c));
1043 check("{javaDigit}", c, Character.isDigit(c));
1044 check("{javaDefined}", c, Character.isDefined(c));
1045 check("{javaLetter}", c, Character.isLetter(c));
1046 check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c));
1047 check("{javaJavaIdentifierStart}", c,
1048 Character.isJavaIdentifierStart(c));
1049 check("{javaJavaIdentifierPart}", c,
1050 Character.isJavaIdentifierPart(c));
1051 check("{javaUnicodeIdentifierStart}", c,
1052 Character.isUnicodeIdentifierStart(c));
1053 check("{javaUnicodeIdentifierPart}", c,
1054 Character.isUnicodeIdentifierPart(c));
1055 check("{javaIdentifierIgnorable}", c,
1056 Character.isIdentifierIgnorable(c));
1057 check("{javaSpaceChar}", c, Character.isSpaceChar(c));
1058 check("{javaWhitespace}", c, Character.isWhitespace(c));
1059 check("{javaISOControl}", c, Character.isISOControl(c));
1060 check("{javaMirrored}", c, Character.isMirrored(c));
1061
1062 }
1063
1064 // Supplementary character test
1065 for (int i=0; i<1000; i++) {
1066 int c = generator.nextInt(Character.MAX_CODE_POINT
1067 - Character.MIN_SUPPLEMENTARY_CODE_POINT)
1068 + Character.MIN_SUPPLEMENTARY_CODE_POINT;
1069 check("{javaLowerCase}", c, Character.isLowerCase(c));
1070 check("{javaUpperCase}", c, Character.isUpperCase(c));
1071 check("{javaUpperCase}+", c, Character.isUpperCase(c));
1072 check("{javaTitleCase}", c, Character.isTitleCase(c));
1073 check("{javaDigit}", c, Character.isDigit(c));
1074 check("{javaDefined}", c, Character.isDefined(c));
1075 check("{javaLetter}", c, Character.isLetter(c));
1076 check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c));
1077 check("{javaJavaIdentifierStart}", c,
1078 Character.isJavaIdentifierStart(c));
1079 check("{javaJavaIdentifierPart}", c,
1080 Character.isJavaIdentifierPart(c));
1081 check("{javaUnicodeIdentifierStart}", c,
1082 Character.isUnicodeIdentifierStart(c));
1083 check("{javaUnicodeIdentifierPart}", c,
1084 Character.isUnicodeIdentifierPart(c));
1085 check("{javaIdentifierIgnorable}", c,
1086 Character.isIdentifierIgnorable(c));
1087 check("{javaSpaceChar}", c, Character.isSpaceChar(c));
1088 check("{javaWhitespace}", c, Character.isWhitespace(c));
1089 check("{javaISOControl}", c, Character.isISOControl(c));
1090 check("{javaMirrored}", c, Character.isMirrored(c));
1091 }
1092
1093 report("Java character classes");
1094 }
1095
1096 // This test is for 4523620
1097 /*
1098 private static void numOccurrencesTest() throws Exception {
1099 Pattern pattern = Pattern.compile("aaa");
1100
1101 if (pattern.numOccurrences("aaaaaa", false) != 2)
1102 failCount++;
1103 if (pattern.numOccurrences("aaaaaa", true) != 4)
1104 failCount++;
1105
1106 pattern = Pattern.compile("^");
1107 if (pattern.numOccurrences("aaaaaa", false) != 1)
1108 failCount++;
1109 if (pattern.numOccurrences("aaaaaa", true) != 1)
1110 failCount++;
1111
1112 report("Number of Occurrences");
1113 }
1114 */
1115
1116 // This test is for 4776374
1117 private static void caretBetweenTerminatorsTest() throws Exception {
1118 int flags1 = Pattern.DOTALL;
1119 int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES;
1120 int flags3 = Pattern.DOTALL | Pattern.UNIX_LINES | Pattern.MULTILINE;
1121 int flags4 = Pattern.DOTALL | Pattern.MULTILINE;
1122
1123 check("^....", flags1, "test\ntest", "test", true);
1124 check(".....^", flags1, "test\ntest", "test", false);
1125 check(".....^", flags1, "test\n", "test", false);
1126 check("....^", flags1, "test\r\n", "test", false);
1127
1128 check("^....", flags2, "test\ntest", "test", true);
1129 check("....^", flags2, "test\ntest", "test", false);
1130 check(".....^", flags2, "test\n", "test", false);
1131 check("....^", flags2, "test\r\n", "test", false);
1132
1133 check("^....", flags3, "test\ntest", "test", true);
1134 check(".....^", flags3, "test\ntest", "test\n", true);
1135 check(".....^", flags3, "test\u0085test", "test\u0085", false);
1136 check(".....^", flags3, "test\n", "test", false);
1137 check(".....^", flags3, "test\r\n", "test", false);
1138 check("......^", flags3, "test\r\ntest", "test\r\n", true);
1139
1140 check("^....", flags4, "test\ntest", "test", true);
1141 check(".....^", flags3, "test\ntest", "test\n", true);
1142 check(".....^", flags4, "test\u0085test", "test\u0085", true);
1143 check(".....^", flags4, "test\n", "test\n", false);
1144 check(".....^", flags4, "test\r\n", "test\r", false);
1145
1146 // Supplementary character test
1147 String t = toSupplementaries("test");
1148 check("^....", flags1, t+"\n"+t, t, true);
1149 check(".....^", flags1, t+"\n"+t, t, false);
1150 check(".....^", flags1, t+"\n", t, false);
1151 check("....^", flags1, t+"\r\n", t, false);
1152
1153 check("^....", flags2, t+"\n"+t, t, true);
1154 check("....^", flags2, t+"\n"+t, t, false);
1155 check(".....^", flags2, t+"\n", t, false);
1156 check("....^", flags2, t+"\r\n", t, false);
1157
1158 check("^....", flags3, t+"\n"+t, t, true);
1159 check(".....^", flags3, t+"\n"+t, t+"\n", true);
1160 check(".....^", flags3, t+"\u0085"+t, t+"\u0085", false);
1161 check(".....^", flags3, t+"\n", t, false);
1162 check(".....^", flags3, t+"\r\n", t, false);
1163 check("......^", flags3, t+"\r\n"+t, t+"\r\n", true);
1164
1165 check("^....", flags4, t+"\n"+t, t, true);
1166 check(".....^", flags3, t+"\n"+t, t+"\n", true);
1167 check(".....^", flags4, t+"\u0085"+t, t+"\u0085", true);
1168 check(".....^", flags4, t+"\n", t+"\n", false);
1169 check(".....^", flags4, t+"\r\n", t+"\r", false);
1170
1171 report("Caret between terminators");
1172 }
1173
1174 // This test is for 4727935
1175 private static void dollarAtEndTest() throws Exception {
1176 int flags1 = Pattern.DOTALL;
1177 int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES;
1178 int flags3 = Pattern.DOTALL | Pattern.MULTILINE;
1179
1180 check("....$", flags1, "test\n", "test", true);
1181 check("....$", flags1, "test\r\n", "test", true);
1182 check(".....$", flags1, "test\n", "test\n", true);
1183 check(".....$", flags1, "test\u0085", "test\u0085", true);
1184 check("....$", flags1, "test\u0085", "test", true);
1185
1186 check("....$", flags2, "test\n", "test", true);
1187 check(".....$", flags2, "test\n", "test\n", true);
1188 check(".....$", flags2, "test\u0085", "test\u0085", true);
1189 check("....$", flags2, "test\u0085", "est\u0085", true);
1190
1191 check("....$.blah", flags3, "test\nblah", "test\nblah", true);
1192 check(".....$.blah", flags3, "test\n\nblah", "test\n\nblah", true);
1193 check("....$blah", flags3, "test\nblah", "!!!!", false);
1194 check(".....$blah", flags3, "test\nblah", "!!!!", false);
1195
1196 // Supplementary character test
1197 String t = toSupplementaries("test");
1198 String b = toSupplementaries("blah");
1199 check("....$", flags1, t+"\n", t, true);
1200 check("....$", flags1, t+"\r\n", t, true);
1201 check(".....$", flags1, t+"\n", t+"\n", true);
1202 check(".....$", flags1, t+"\u0085", t+"\u0085", true);
1203 check("....$", flags1, t+"\u0085", t, true);
1204
1205 check("....$", flags2, t+"\n", t, true);
1206 check(".....$", flags2, t+"\n", t+"\n", true);
1207 check(".....$", flags2, t+"\u0085", t+"\u0085", true);
1208 check("....$", flags2, t+"\u0085", toSupplementaries("est\u0085"), true);
1209
1210 check("....$."+b, flags3, t+"\n"+b, t+"\n"+b, true);
1211 check(".....$."+b, flags3, t+"\n\n"+b, t+"\n\n"+b, true);
1212 check("....$"+b, flags3, t+"\n"+b, "!!!!", false);
1213 check(".....$"+b, flags3, t+"\n"+b, "!!!!", false);
1214
1215 report("Dollar at End");
1216 }
1217
1218 // This test is for 4711773
1219 private static void multilineDollarTest() throws Exception {
1220 Pattern findCR = Pattern.compile("$", Pattern.MULTILINE);
1221 Matcher matcher = findCR.matcher("first bit\nsecond bit");
1222 matcher.find();
1223 if (matcher.start(0) != 9)
1224 failCount++;
1225 matcher.find();
1226 if (matcher.start(0) != 20)
1227 failCount++;
1228
1229 // Supplementary character test
1230 matcher = findCR.matcher(toSupplementaries("first bit\n second bit")); // double BMP chars
1231 matcher.find();
1232 if (matcher.start(0) != 9*2)
1233 failCount++;
1234 matcher.find();
1235 if (matcher.start(0) != 20*2)
1236 failCount++;
1237
1238 report("Multiline Dollar");
1239 }
1240
1241 private static void reluctantRepetitionTest() throws Exception {
1242 Pattern p = Pattern.compile("1(\\s\\S+?){1,3}?[\\s,]2");
1243 check(p, "1 word word word 2", true);
1244 check(p, "1 wor wo w 2", true);
1245 check(p, "1 word word 2", true);
1246 check(p, "1 word 2", true);
1247 check(p, "1 wo w w 2", true);
1248 check(p, "1 wo w 2", true);
1249 check(p, "1 wor w 2", true);
1250
1251 p = Pattern.compile("([a-z])+?c");
1252 Matcher m = p.matcher("ababcdefdec");
1253 check(m, "ababc");
1254
1255 // Supplementary character test
1256 p = Pattern.compile(toSupplementaries("([a-z])+?c"));
1257 m = p.matcher(toSupplementaries("ababcdefdec"));
1258 check(m, toSupplementaries("ababc"));
1259
1260 report("Reluctant Repetition");
1261 }
1262
1263 private static void serializeTest() throws Exception {
1264 String patternStr = "(b)";
1265 String matchStr = "b";
1266 Pattern pattern = Pattern.compile(patternStr);
1267 ByteArrayOutputStream baos = new ByteArrayOutputStream();
1268 ObjectOutputStream oos = new ObjectOutputStream(baos);
1269 oos.writeObject(pattern);
1270 oos.close();
1271 ObjectInputStream ois = new ObjectInputStream(
1272 new ByteArrayInputStream(baos.toByteArray()));
1273 Pattern serializedPattern = (Pattern)ois.readObject();
1274 ois.close();
1275 Matcher matcher = serializedPattern.matcher(matchStr);
1276 if (!matcher.matches())
1277 failCount++;
1278 if (matcher.groupCount() != 1)
1279 failCount++;
1280
1281 report("Serialization");
1282 }
1283
1284 private static void gTest() {
1285 Pattern pattern = Pattern.compile("\\G\\w");
1286 Matcher matcher = pattern.matcher("abc#x#x");
1287 matcher.find();
1288 matcher.find();
1289 matcher.find();
1290 if (matcher.find())
1291 failCount++;
1292
1293 pattern = Pattern.compile("\\GA*");
1294 matcher = pattern.matcher("1A2AA3");
1295 matcher.find();
1296 if (matcher.find())
1297 failCount++;
1298
1299 pattern = Pattern.compile("\\GA*");
1300 matcher = pattern.matcher("1A2AA3");
1301 if (!matcher.find(1))
1302 failCount++;
1303 matcher.find();
1304 if (matcher.find())
1305 failCount++;
1306
1307 report("\\G");
1308 }
1309
1310 private static void zTest() {
1311 Pattern pattern = Pattern.compile("foo\\Z");
1312 // Positives
1313 check(pattern, "foo\u0085", true);
1314 check(pattern, "foo\u2028", true);
1315 check(pattern, "foo\u2029", true);
1316 check(pattern, "foo\n", true);
1317 check(pattern, "foo\r", true);
1318 check(pattern, "foo\r\n", true);
1319 // Negatives
1320 check(pattern, "fooo", false);
1321 check(pattern, "foo\n\r", false);
1322
1323 pattern = Pattern.compile("foo\\Z", Pattern.UNIX_LINES);
1324 // Positives
1325 check(pattern, "foo", true);
1326 check(pattern, "foo\n", true);
1327 // Negatives
1328 check(pattern, "foo\r", false);
1329 check(pattern, "foo\u0085", false);
1330 check(pattern, "foo\u2028", false);
1331 check(pattern, "foo\u2029", false);
1332
1333 report("\\Z");
1334 }
1335
1336 private static void replaceFirstTest() {
1337 Pattern pattern = Pattern.compile("(ab)(c*)");
1338 Matcher matcher = pattern.matcher("abccczzzabcczzzabccc");
1339 if (!matcher.replaceFirst("test").equals("testzzzabcczzzabccc"))
1340 failCount++;
1341
1342 matcher.reset("zzzabccczzzabcczzzabccczzz");
1343 if (!matcher.replaceFirst("test").equals("zzztestzzzabcczzzabccczzz"))
1344 failCount++;
1345
1346 matcher.reset("zzzabccczzzabcczzzabccczzz");
1347 String result = matcher.replaceFirst("$1");
1348 if (!result.equals("zzzabzzzabcczzzabccczzz"))
1349 failCount++;
1350
1351 matcher.reset("zzzabccczzzabcczzzabccczzz");
1352 result = matcher.replaceFirst("$2");
1353 if (!result.equals("zzzccczzzabcczzzabccczzz"))
1354 failCount++;
1355
1356 pattern = Pattern.compile("a*");
1357 matcher = pattern.matcher("aaaaaaaaaa");
1358 if (!matcher.replaceFirst("test").equals("test"))
1359 failCount++;
1360
1361 pattern = Pattern.compile("a+");
1362 matcher = pattern.matcher("zzzaaaaaaaaaa");
1363 if (!matcher.replaceFirst("test").equals("zzztest"))
1364 failCount++;
1365
1366 // Supplementary character test
1367 pattern = Pattern.compile(toSupplementaries("(ab)(c*)"));
1368 matcher = pattern.matcher(toSupplementaries("abccczzzabcczzzabccc"));
1369 if (!matcher.replaceFirst(toSupplementaries("test"))
1370 .equals(toSupplementaries("testzzzabcczzzabccc")))
1371 failCount++;
1372
1373 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
1374 if (!matcher.replaceFirst(toSupplementaries("test")).
1375 equals(toSupplementaries("zzztestzzzabcczzzabccczzz")))
1376 failCount++;
1377
1378 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
1379 result = matcher.replaceFirst("$1");
1380 if (!result.equals(toSupplementaries("zzzabzzzabcczzzabccczzz")))
1381 failCount++;
1382
1383 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
1384 result = matcher.replaceFirst("$2");
1385 if (!result.equals(toSupplementaries("zzzccczzzabcczzzabccczzz")))
1386 failCount++;
1387
1388 pattern = Pattern.compile(toSupplementaries("a*"));
1389 matcher = pattern.matcher(toSupplementaries("aaaaaaaaaa"));
1390 if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("test")))
1391 failCount++;
1392
1393 pattern = Pattern.compile(toSupplementaries("a+"));
1394 matcher = pattern.matcher(toSupplementaries("zzzaaaaaaaaaa"));
1395 if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("zzztest")))
1396 failCount++;
1397
1398 report("Replace First");
1399 }
1400
1401 private static void unixLinesTest() {
1402 Pattern pattern = Pattern.compile(".*");
1403 Matcher matcher = pattern.matcher("aa\u2028blah");
1404 matcher.find();
1405 if (!matcher.group(0).equals("aa"))
1406 failCount++;
1407
1408 pattern = Pattern.compile(".*", Pattern.UNIX_LINES);
1409 matcher = pattern.matcher("aa\u2028blah");
1410 matcher.find();
1411 if (!matcher.group(0).equals("aa\u2028blah"))
1412 failCount++;
1413
1414 pattern = Pattern.compile("[az]$",
1415 Pattern.MULTILINE | Pattern.UNIX_LINES);
1416 matcher = pattern.matcher("aa\u2028zz");
1417 check(matcher, "a\u2028", false);
1418
1419 // Supplementary character test
1420 pattern = Pattern.compile(".*");
1421 matcher = pattern.matcher(toSupplementaries("aa\u2028blah"));
1422 matcher.find();
1423 if (!matcher.group(0).equals(toSupplementaries("aa")))
1424 failCount++;
1425
1426 pattern = Pattern.compile(".*", Pattern.UNIX_LINES);
1427 matcher = pattern.matcher(toSupplementaries("aa\u2028blah"));
1428 matcher.find();
1429 if (!matcher.group(0).equals(toSupplementaries("aa\u2028blah")))
1430 failCount++;
1431
1432 pattern = Pattern.compile(toSupplementaries("[az]$"),
1433 Pattern.MULTILINE | Pattern.UNIX_LINES);
1434 matcher = pattern.matcher(toSupplementaries("aa\u2028zz"));
1435 check(matcher, toSupplementaries("a\u2028"), false);
1436
1437 report("Unix Lines");
1438 }
1439
1440 private static void commentsTest() {
1441 int flags = Pattern.COMMENTS;
1442
1443 Pattern pattern = Pattern.compile("aa \\# aa", flags);
1444 Matcher matcher = pattern.matcher("aa#aa");
1445 if (!matcher.matches())
1446 failCount++;
1447
1448 pattern = Pattern.compile("aa # blah", flags);
1449 matcher = pattern.matcher("aa");
1450 if (!matcher.matches())
1451 failCount++;
1452
1453 pattern = Pattern.compile("aa blah", flags);
1454 matcher = pattern.matcher("aablah");
1455 if (!matcher.matches())
1456 failCount++;
1457
1458 pattern = Pattern.compile("aa # blah blech ", flags);
1459 matcher = pattern.matcher("aa");
1460 if (!matcher.matches())
1461 failCount++;
1462
1463 pattern = Pattern.compile("aa # blah\n ", flags);
1464 matcher = pattern.matcher("aa");
1465 if (!matcher.matches())
1466 failCount++;
1467
1468 pattern = Pattern.compile("aa # blah\nbc # blech", flags);
1469 matcher = pattern.matcher("aabc");
1470 if (!matcher.matches())
1471 failCount++;
1472
1473 pattern = Pattern.compile("aa # blah\nbc# blech", flags);
1474 matcher = pattern.matcher("aabc");
1475 if (!matcher.matches())
1476 failCount++;
1477
1478 pattern = Pattern.compile("aa # blah\nbc\\# blech", flags);
1479 matcher = pattern.matcher("aabc#blech");
1480 if (!matcher.matches())
1481 failCount++;
1482
1483 // Supplementary character test
1484 pattern = Pattern.compile(toSupplementaries("aa \\# aa"), flags);
1485 matcher = pattern.matcher(toSupplementaries("aa#aa"));
1486 if (!matcher.matches())
1487 failCount++;
1488
1489 pattern = Pattern.compile(toSupplementaries("aa # blah"), flags);
1490 matcher = pattern.matcher(toSupplementaries("aa"));
1491 if (!matcher.matches())
1492 failCount++;
1493
1494 pattern = Pattern.compile(toSupplementaries("aa blah"), flags);
1495 matcher = pattern.matcher(toSupplementaries("aablah"));
1496 if (!matcher.matches())
1497 failCount++;
1498
1499 pattern = Pattern.compile(toSupplementaries("aa # blah blech "), flags);
1500 matcher = pattern.matcher(toSupplementaries("aa"));
1501 if (!matcher.matches())
1502 failCount++;
1503
1504 pattern = Pattern.compile(toSupplementaries("aa # blah\n "), flags);
1505 matcher = pattern.matcher(toSupplementaries("aa"));
1506 if (!matcher.matches())
1507 failCount++;
1508
1509 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc # blech"), flags);
1510 matcher = pattern.matcher(toSupplementaries("aabc"));
1511 if (!matcher.matches())
1512 failCount++;
1513
1514 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc# blech"), flags);
1515 matcher = pattern.matcher(toSupplementaries("aabc"));
1516 if (!matcher.matches())
1517 failCount++;
1518
1519 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc\\# blech"), flags);
1520 matcher = pattern.matcher(toSupplementaries("aabc#blech"));
1521 if (!matcher.matches())
1522 failCount++;
1523
1524 report("Comments");
1525 }
1526
1527 private static void caseFoldingTest() { // bug 4504687
1528 int flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE;
1529 Pattern pattern = Pattern.compile("aa", flags);
1530 Matcher matcher = pattern.matcher("ab");
1531 if (matcher.matches())
1532 failCount++;
1533
1534 pattern = Pattern.compile("aA", flags);
1535 matcher = pattern.matcher("ab");
1536 if (matcher.matches())
1537 failCount++;
1538
1539 pattern = Pattern.compile("aa", flags);
1540 matcher = pattern.matcher("aB");
1541 if (matcher.matches())
1542 failCount++;
1543 matcher = pattern.matcher("Ab");
1544 if (matcher.matches())
1545 failCount++;
1546
1547 // ASCII "a"
1548 // Latin-1 Supplement "a" + grave
1549 // Cyrillic "a"
1550 String[] patterns = new String[] {
1551 //single
1552 "a", "\u00e0", "\u0430",
1553 //slice
1554 "ab", "\u00e0\u00e1", "\u0430\u0431",
1555 //class single
1556 "[a]", "[\u00e0]", "[\u0430]",
1557 //class range
1558 "[a-b]", "[\u00e0-\u00e5]", "[\u0430-\u0431]",
1559 //back reference
1560 "(a)\\1", "(\u00e0)\\1", "(\u0430)\\1"
1561 };
1562
1563 String[] texts = new String[] {
1564 "A", "\u00c0", "\u0410",
1565 "AB", "\u00c0\u00c1", "\u0410\u0411",
1566 "A", "\u00c0", "\u0410",
1567 "B", "\u00c2", "\u0411",
1568 "aA", "\u00e0\u00c0", "\u0430\u0410"
1569 };
1570
1571 boolean[] expected = new boolean[] {
1572 true, false, false,
1573 true, false, false,
1574 true, false, false,
1575 true, false, false,
1576 true, false, false
1577 };
1578
1579 flags = Pattern.CASE_INSENSITIVE;
1580 for (int i = 0; i < patterns.length; i++) {
1581 pattern = Pattern.compile(patterns[i], flags);
1582 matcher = pattern.matcher(texts[i]);
1583 if (matcher.matches() != expected[i]) {
1584 System.out.println("<1> Failed at " + i);
1585 failCount++;
1586 }
1587 }
1588
1589 flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE;
1590 for (int i = 0; i < patterns.length; i++) {
1591 pattern = Pattern.compile(patterns[i], flags);
1592 matcher = pattern.matcher(texts[i]);
1593 if (!matcher.matches()) {
1594 System.out.println("<2> Failed at " + i);
1595 failCount++;
1596 }
1597 }
1598 // flag unicode_case alone should do nothing
1599 flags = Pattern.UNICODE_CASE;
1600 for (int i = 0; i < patterns.length; i++) {
1601 pattern = Pattern.compile(patterns[i], flags);
1602 matcher = pattern.matcher(texts[i]);
1603 if (matcher.matches()) {
1604 System.out.println("<3> Failed at " + i);
1605 failCount++;
1606 }
1607 }
1608
1609 // Special cases: i, I, u+0131 and u+0130
1610 flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE;
1611 pattern = Pattern.compile("[h-j]+", flags);
1612 if (!pattern.matcher("\u0131\u0130").matches())
1613 failCount++;
1614 report("Case Folding");
1615 }
1616
1617 private static void appendTest() {
1618 Pattern pattern = Pattern.compile("(ab)(cd)");
1619 Matcher matcher = pattern.matcher("abcd");
1620 String result = matcher.replaceAll("$2$1");
1621 if (!result.equals("cdab"))
1622 failCount++;
1623
1624 String s1 = "Swap all: first = 123, second = 456";
1625 String s2 = "Swap one: first = 123, second = 456";
1626 String r = "$3$2$1";
1627 pattern = Pattern.compile("([a-z]+)( *= *)([0-9]+)");
1628 matcher = pattern.matcher(s1);
1629
1630 result = matcher.replaceAll(r);
1631 if (!result.equals("Swap all: 123 = first, 456 = second"))
1632 failCount++;
1633
1634 matcher = pattern.matcher(s2);
1635
1636 if (matcher.find()) {
1637 StringBuffer sb = new StringBuffer();
1638 matcher.appendReplacement(sb, r);
1639 matcher.appendTail(sb);
1640 result = sb.toString();
1641 if (!result.equals("Swap one: 123 = first, second = 456"))
1642 failCount++;
1643 }
1644
1645 // Supplementary character test
1646 pattern = Pattern.compile(toSupplementaries("(ab)(cd)"));
1647 matcher = pattern.matcher(toSupplementaries("abcd"));
1648 result = matcher.replaceAll("$2$1");
1649 if (!result.equals(toSupplementaries("cdab")))
1650 failCount++;
1651
1652 s1 = toSupplementaries("Swap all: first = 123, second = 456");
1653 s2 = toSupplementaries("Swap one: first = 123, second = 456");
1654 r = toSupplementaries("$3$2$1");
1655 pattern = Pattern.compile(toSupplementaries("([a-z]+)( *= *)([0-9]+)"));
1656 matcher = pattern.matcher(s1);
1657
1658 result = matcher.replaceAll(r);
1659 if (!result.equals(toSupplementaries("Swap all: 123 = first, 456 = second")))
1660 failCount++;
1661
1662 matcher = pattern.matcher(s2);
1663
1664 if (matcher.find()) {
1665 StringBuffer sb = new StringBuffer();
1666 matcher.appendReplacement(sb, r);
1667 matcher.appendTail(sb);
1668 result = sb.toString();
1669 if (!result.equals(toSupplementaries("Swap one: 123 = first, second = 456")))
1670 failCount++;
1671 }
1672 report("Append");
1673 }
1674
1675 private static void splitTest() {
1676 Pattern pattern = Pattern.compile(":");
1677 String[] result = pattern.split("foo:and:boo", 2);
1678 if (!result[0].equals("foo"))
1679 failCount++;
1680 if (!result[1].equals("and:boo"))
1681 failCount++;
1682 // Supplementary character test
1683 Pattern patternX = Pattern.compile(toSupplementaries("X"));
1684 result = patternX.split(toSupplementaries("fooXandXboo"), 2);
1685 if (!result[0].equals(toSupplementaries("foo")))
1686 failCount++;
1687 if (!result[1].equals(toSupplementaries("andXboo")))
1688 failCount++;
1689
1690 CharBuffer cb = CharBuffer.allocate(100);
1691 cb.put("foo:and:boo");
1692 cb.flip();
1693 result = pattern.split(cb);
1694 if (!result[0].equals("foo"))
1695 failCount++;
1696 if (!result[1].equals("and"))
1697 failCount++;
1698 if (!result[2].equals("boo"))
1699 failCount++;
1700
1701 // Supplementary character test
1702 CharBuffer cbs = CharBuffer.allocate(100);
1703 cbs.put(toSupplementaries("fooXandXboo"));
1704 cbs.flip();
1705 result = patternX.split(cbs);
1706 if (!result[0].equals(toSupplementaries("foo")))
1707 failCount++;
1708 if (!result[1].equals(toSupplementaries("and")))
1709 failCount++;
1710 if (!result[2].equals(toSupplementaries("boo")))
1711 failCount++;
1712
1713 String source = "0123456789";
1714 for (int limit=-2; limit<3; limit++) {
1715 for (int x=0; x<10; x++) {
1716 result = source.split(Integer.toString(x), limit);
1717 int expectedLength = limit < 1 ? 2 : limit;
1718
1719 if ((limit == 0) && (x == 9)) {
1720 // expected dropping of ""
1721 if (result.length != 1)
1722 failCount++;
1723 if (!result[0].equals("012345678")) {
1724 failCount++;
1725 }
1726 } else {
1727 if (result.length != expectedLength) {
1728 failCount++;
1729 }
1730 if (!result[0].equals(source.substring(0,x))) {
1731 if (limit != 1) {
1732 failCount++;
1733 } else {
1734 if (!result[0].equals(source.substring(0,10))) {
1735 failCount++;
1736 }
1737 }
1738 }
1739 if (expectedLength > 1) { // Check segment 2
1740 if (!result[1].equals(source.substring(x+1,10)))
1741 failCount++;
1742 }
1743 }
1744 }
1745 }
1746 // Check the case for no match found
1747 for (int limit=-2; limit<3; limit++) {
1748 result = source.split("e", limit);
1749 if (result.length != 1)
1750 failCount++;
1751 if (!result[0].equals(source))
1752 failCount++;
1753 }
1754 // Check the case for limit == 0, source = "";
1755 source = "";
1756 result = source.split("e", 0);
1757 if (result.length != 1)
1758 failCount++;
1759 if (!result[0].equals(source))
1760 failCount++;
1761
1762 report("Split");
1763 }
1764
1765 private static void negationTest() {
1766 Pattern pattern = Pattern.compile("[\\[@^]+");
1767 Matcher matcher = pattern.matcher("@@@@[[[[^^^^");
1768 if (!matcher.find())
1769 failCount++;
1770 if (!matcher.group(0).equals("@@@@[[[[^^^^"))
1771 failCount++;
1772 pattern = Pattern.compile("[@\\[^]+");
1773 matcher = pattern.matcher("@@@@[[[[^^^^");
1774 if (!matcher.find())
1775 failCount++;
1776 if (!matcher.group(0).equals("@@@@[[[[^^^^"))
1777 failCount++;
1778 pattern = Pattern.compile("[@\\[^@]+");
1779 matcher = pattern.matcher("@@@@[[[[^^^^");
1780 if (!matcher.find())
1781 failCount++;
1782 if (!matcher.group(0).equals("@@@@[[[[^^^^"))
1783 failCount++;
1784
1785 pattern = Pattern.compile("\\)");
1786 matcher = pattern.matcher("xxx)xxx");
1787 if (!matcher.find())
1788 failCount++;
1789
1790 report("Negation");
1791 }
1792
1793 private static void ampersandTest() {
1794 Pattern pattern = Pattern.compile("[&@]+");
1795 check(pattern, "@@@@&&&&", true);
1796
1797 pattern = Pattern.compile("[@&]+");
1798 check(pattern, "@@@@&&&&", true);
1799
1800 pattern = Pattern.compile("[@\\&]+");
1801 check(pattern, "@@@@&&&&", true);
1802
1803 report("Ampersand");
1804 }
1805
1806 private static void octalTest() throws Exception {
1807 Pattern pattern = Pattern.compile("\\u0007");
1808 Matcher matcher = pattern.matcher("\u0007");
1809 if (!matcher.matches())
1810 failCount++;
1811 pattern = Pattern.compile("\\07");
1812 matcher = pattern.matcher("\u0007");
1813 if (!matcher.matches())
1814 failCount++;
1815 pattern = Pattern.compile("\\007");
1816 matcher = pattern.matcher("\u0007");
1817 if (!matcher.matches())
1818 failCount++;
1819 pattern = Pattern.compile("\\0007");
1820 matcher = pattern.matcher("\u0007");
1821 if (!matcher.matches())
1822 failCount++;
1823 pattern = Pattern.compile("\\040");
1824 matcher = pattern.matcher("\u0020");
1825 if (!matcher.matches())
1826 failCount++;
1827 pattern = Pattern.compile("\\0403");
1828 matcher = pattern.matcher("\u00203");
1829 if (!matcher.matches())
1830 failCount++;
1831 pattern = Pattern.compile("\\0103");
1832 matcher = pattern.matcher("\u0043");
1833 if (!matcher.matches())
1834 failCount++;
1835
1836 report("Octal");
1837 }
1838
1839 private static void longPatternTest() throws Exception {
1840 try {
1841 Pattern pattern = Pattern.compile(
1842 "a 32-character-long pattern xxxx");
1843 pattern = Pattern.compile("a 33-character-long pattern xxxxx");
1844 pattern = Pattern.compile("a thirty four character long regex");
1845 StringBuffer patternToBe = new StringBuffer(101);
1846 for (int i=0; i<100; i++)
1847 patternToBe.append((char)(97 + i%26));
1848 pattern = Pattern.compile(patternToBe.toString());
1849 } catch (PatternSyntaxException e) {
1850 failCount++;
1851 }
1852
1853 // Supplementary character test
1854 try {
1855 Pattern pattern = Pattern.compile(
1856 toSupplementaries("a 32-character-long pattern xxxx"));
1857 pattern = Pattern.compile(toSupplementaries("a 33-character-long pattern xxxxx"));
1858 pattern = Pattern.compile(toSupplementaries("a thirty four character long regex"));
1859 StringBuffer patternToBe = new StringBuffer(101*2);
1860 for (int i=0; i<100; i++)
1861 patternToBe.append(Character.toChars(Character.MIN_SUPPLEMENTARY_CODE_POINT
1862 + 97 + i%26));
1863 pattern = Pattern.compile(patternToBe.toString());
1864 } catch (PatternSyntaxException e) {
1865 failCount++;
1866 }
1867 report("LongPattern");
1868 }
1869
1870 private static void group0Test() throws Exception {
1871 Pattern pattern = Pattern.compile("(tes)ting");
1872 Matcher matcher = pattern.matcher("testing");
1873 check(matcher, "testing");
1874
1875 matcher.reset("testing");
1876 if (matcher.lookingAt()) {
1877 if (!matcher.group(0).equals("testing"))
1878 failCount++;
1879 } else {
1880 failCount++;
1881 }
1882
1883 matcher.reset("testing");
1884 if (matcher.matches()) {
1885 if (!matcher.group(0).equals("testing"))
1886 failCount++;
1887 } else {
1888 failCount++;
1889 }
1890
1891 pattern = Pattern.compile("(tes)ting");
1892 matcher = pattern.matcher("testing");
1893 if (matcher.lookingAt()) {
1894 if (!matcher.group(0).equals("testing"))
1895 failCount++;
1896 } else {
1897 failCount++;
1898 }
1899
1900 pattern = Pattern.compile("^(tes)ting");
1901 matcher = pattern.matcher("testing");
1902 if (matcher.matches()) {
1903 if (!matcher.group(0).equals("testing"))
1904 failCount++;
1905 } else {
1906 failCount++;
1907 }
1908
1909 // Supplementary character test
1910 pattern = Pattern.compile(toSupplementaries("(tes)ting"));
1911 matcher = pattern.matcher(toSupplementaries("testing"));
1912 check(matcher, toSupplementaries("testing"));
1913
1914 matcher.reset(toSupplementaries("testing"));
1915 if (matcher.lookingAt()) {
1916 if (!matcher.group(0).equals(toSupplementaries("testing")))
1917 failCount++;
1918 } else {
1919 failCount++;
1920 }
1921
1922 matcher.reset(toSupplementaries("testing"));
1923 if (matcher.matches()) {
1924 if (!matcher.group(0).equals(toSupplementaries("testing")))
1925 failCount++;
1926 } else {
1927 failCount++;
1928 }
1929
1930 pattern = Pattern.compile(toSupplementaries("(tes)ting"));
1931 matcher = pattern.matcher(toSupplementaries("testing"));
1932 if (matcher.lookingAt()) {
1933 if (!matcher.group(0).equals(toSupplementaries("testing")))
1934 failCount++;
1935 } else {
1936 failCount++;
1937 }
1938
1939 pattern = Pattern.compile(toSupplementaries("^(tes)ting"));
1940 matcher = pattern.matcher(toSupplementaries("testing"));
1941 if (matcher.matches()) {
1942 if (!matcher.group(0).equals(toSupplementaries("testing")))
1943 failCount++;
1944 } else {
1945 failCount++;
1946 }
1947
1948 report("Group0");
1949 }
1950
1951 private static void findIntTest() throws Exception {
1952 Pattern p = Pattern.compile("blah");
1953 Matcher m = p.matcher("zzzzblahzzzzzblah");
1954 boolean result = m.find(2);
1955 if (!result)
1956 failCount++;
1957
1958 p = Pattern.compile("$");
1959 m = p.matcher("1234567890");
1960 result = m.find(10);
1961 if (!result)
1962 failCount++;
1963 try {
1964 result = m.find(11);
1965 failCount++;
1966 } catch (IndexOutOfBoundsException e) {
1967 // correct result
1968 }
1969
1970 // Supplementary character test
1971 p = Pattern.compile(toSupplementaries("blah"));
1972 m = p.matcher(toSupplementaries("zzzzblahzzzzzblah"));
1973 result = m.find(2);
1974 if (!result)
1975 failCount++;
1976
1977 report("FindInt");
1978 }
1979
1980 private static void emptyPatternTest() throws Exception {
1981 Pattern p = Pattern.compile("");
1982 Matcher m = p.matcher("foo");
1983
1984 // Should find empty pattern at beginning of input
1985 boolean result = m.find();
1986 if (result != true)
1987 failCount++;
1988 if (m.start() != 0)
1989 failCount++;
1990
1991 // Should not match entire input if input is not empty
1992 m.reset();
1993 result = m.matches();
1994 if (result == true)
1995 failCount++;
1996
1997 try {
1998 m.start(0);
1999 failCount++;
2000 } catch (IllegalStateException e) {
2001 // Correct result
2002 }
2003
2004 // Should match entire input if input is empty
2005 m.reset("");
2006 result = m.matches();
2007 if (result != true)
2008 failCount++;
2009
2010 result = Pattern.matches("", "");
2011 if (result != true)
2012 failCount++;
2013
2014 result = Pattern.matches("", "foo");
2015 if (result == true)
2016 failCount++;
2017 report("EmptyPattern");
2018 }
2019
2020 private static void charClassTest() throws Exception {
2021 Pattern pattern = Pattern.compile("blah[ab]]blech");
2022 check(pattern, "blahb]blech", true);
2023
2024 pattern = Pattern.compile("[abc[def]]");
2025 check(pattern, "b", true);
2026
2027 // Supplementary character tests
2028 pattern = Pattern.compile(toSupplementaries("blah[ab]]blech"));
2029 check(pattern, toSupplementaries("blahb]blech"), true);
2030
2031 pattern = Pattern.compile(toSupplementaries("[abc[def]]"));
2032 check(pattern, toSupplementaries("b"), true);
2033
2034 try {
2035 // u00ff when UNICODE_CASE
2036 pattern = Pattern.compile("[ab\u00ffcd]",
2037 Pattern.CASE_INSENSITIVE|
2038 Pattern.UNICODE_CASE);
2039 check(pattern, "ab\u00ffcd", true);
2040 check(pattern, "Ab\u0178Cd", true);
2041
2042 // u00b5 when UNICODE_CASE
2043 pattern = Pattern.compile("[ab\u00b5cd]",
2044 Pattern.CASE_INSENSITIVE|
2045 Pattern.UNICODE_CASE);
2046 check(pattern, "ab\u00b5cd", true);
2047 check(pattern, "Ab\u039cCd", true);
2048 } catch (Exception e) { failCount++; }
2049
2050 /* Special cases
2051 (1)LatinSmallLetterLongS u+017f
2052 (2)LatinSmallLetterDotlessI u+0131
2053 (3)LatineCapitalLetterIWithDotAbove u+0130
2054 (4)KelvinSign u+212a
2055 (5)AngstromSign u+212b
2056 */
2057 int flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE;
2058 pattern = Pattern.compile("[sik\u00c5]+", flags);
2059 if (!pattern.matcher("\u017f\u0130\u0131\u212a\u212b").matches())
2060 failCount++;
2061
2062 report("CharClass");
2063 }
2064
2065 private static void caretTest() throws Exception {
2066 Pattern pattern = Pattern.compile("\\w*");
2067 Matcher matcher = pattern.matcher("a#bc#def##g");
2068 check(matcher, "a");
2069 check(matcher, "");
2070 check(matcher, "bc");
2071 check(matcher, "");
2072 check(matcher, "def");
2073 check(matcher, "");
2074 check(matcher, "");
2075 check(matcher, "g");
2076 check(matcher, "");
2077 if (matcher.find())
2078 failCount++;
2079
2080 pattern = Pattern.compile("^\\w*");
2081 matcher = pattern.matcher("a#bc#def##g");
2082 check(matcher, "a");
2083 if (matcher.find())
2084 failCount++;
2085
2086 pattern = Pattern.compile("\\w");
2087 matcher = pattern.matcher("abc##x");
2088 check(matcher, "a");
2089 check(matcher, "b");
2090 check(matcher, "c");
2091 check(matcher, "x");
2092 if (matcher.find())
2093 failCount++;
2094
2095 pattern = Pattern.compile("^\\w");
2096 matcher = pattern.matcher("abc##x");
2097 check(matcher, "a");
2098 if (matcher.find())
2099 failCount++;
2100
2101 pattern = Pattern.compile("\\A\\p{Alpha}{3}");
2102 matcher = pattern.matcher("abcdef-ghi\njklmno");
2103 check(matcher, "abc");
2104 if (matcher.find())
2105 failCount++;
2106
2107 pattern = Pattern.compile("^\\p{Alpha}{3}", Pattern.MULTILINE);
2108 matcher = pattern.matcher("abcdef-ghi\njklmno");
2109 check(matcher, "abc");
2110 check(matcher, "jkl");
2111 if (matcher.find())
2112 failCount++;
2113
2114 pattern = Pattern.compile("^", Pattern.MULTILINE);
2115 matcher = pattern.matcher("this is some text");
2116 String result = matcher.replaceAll("X");
2117 if (!result.equals("Xthis is some text"))
2118 failCount++;
2119
2120 pattern = Pattern.compile("^");
2121 matcher = pattern.matcher("this is some text");
2122 result = matcher.replaceAll("X");
2123 if (!result.equals("Xthis is some text"))
2124 failCount++;
2125
2126 pattern = Pattern.compile("^", Pattern.MULTILINE | Pattern.UNIX_LINES);
2127 matcher = pattern.matcher("this is some text\n");
2128 result = matcher.replaceAll("X");
2129 if (!result.equals("Xthis is some text\n"))
2130 failCount++;
2131
2132 report("Caret");
2133 }
2134
2135 private static void groupCaptureTest() throws Exception {
2136 // Independent group
2137 Pattern pattern = Pattern.compile("x+(?>y+)z+");
2138 Matcher matcher = pattern.matcher("xxxyyyzzz");
2139 matcher.find();
2140 try {
2141 String blah = matcher.group(1);
2142 failCount++;
2143 } catch (IndexOutOfBoundsException ioobe) {
2144 // Good result
2145 }
2146 // Pure group
2147 pattern = Pattern.compile("x+(?:y+)z+");
2148 matcher = pattern.matcher("xxxyyyzzz");
2149 matcher.find();
2150 try {
2151 String blah = matcher.group(1);
2152 failCount++;
2153 } catch (IndexOutOfBoundsException ioobe) {
2154 // Good result
2155 }
2156
2157 // Supplementary character tests
2158 // Independent group
2159 pattern = Pattern.compile(toSupplementaries("x+(?>y+)z+"));
2160 matcher = pattern.matcher(toSupplementaries("xxxyyyzzz"));
2161 matcher.find();
2162 try {
2163 String blah = matcher.group(1);
2164 failCount++;
2165 } catch (IndexOutOfBoundsException ioobe) {
2166 // Good result
2167 }
2168 // Pure group
2169 pattern = Pattern.compile(toSupplementaries("x+(?:y+)z+"));
2170 matcher = pattern.matcher(toSupplementaries("xxxyyyzzz"));
2171 matcher.find();
2172 try {
2173 String blah = matcher.group(1);
2174 failCount++;
2175 } catch (IndexOutOfBoundsException ioobe) {
2176 // Good result
2177 }
2178
2179 report("GroupCapture");
2180 }
2181
2182 private static void backRefTest() throws Exception {
2183 Pattern pattern = Pattern.compile("(a*)bc\\1");
2184 check(pattern, "zzzaabcazzz", true);
2185
2186 pattern = Pattern.compile("(a*)bc\\1");
2187 check(pattern, "zzzaabcaazzz", true);
2188
2189 pattern = Pattern.compile("(abc)(def)\\1");
2190 check(pattern, "abcdefabc", true);
2191
2192 pattern = Pattern.compile("(abc)(def)\\3");
2193 check(pattern, "abcdefabc", false);
2194
2195 try {
2196 for (int i = 1; i < 10; i++) {
2197 // Make sure backref 1-9 are always accepted
2198 pattern = Pattern.compile("abcdef\\" + i);
2199 // and fail to match if the target group does not exit
2200 check(pattern, "abcdef", false);
2201 }
2202 } catch(PatternSyntaxException e) {
2203 failCount++;
2204 }
2205
2206 pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11");
2207 check(pattern, "abcdefghija", false);
2208 check(pattern, "abcdefghija1", true);
2209
2210 pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11");
2211 check(pattern, "abcdefghijkk", true);
2212
2213 pattern = Pattern.compile("(a)bcdefghij\\11");
2214 check(pattern, "abcdefghija1", true);
2215
2216 // Supplementary character tests
2217 pattern = Pattern.compile(toSupplementaries("(a*)bc\\1"));
2218 check(pattern, toSupplementaries("zzzaabcazzz"), true);
2219
2220 pattern = Pattern.compile(toSupplementaries("(a*)bc\\1"));
2221 check(pattern, toSupplementaries("zzzaabcaazzz"), true);
2222
2223 pattern = Pattern.compile(toSupplementaries("(abc)(def)\\1"));
2224 check(pattern, toSupplementaries("abcdefabc"), true);
2225
2226 pattern = Pattern.compile(toSupplementaries("(abc)(def)\\3"));
2227 check(pattern, toSupplementaries("abcdefabc"), false);
2228
2229 pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11"));
2230 check(pattern, toSupplementaries("abcdefghija"), false);
2231 check(pattern, toSupplementaries("abcdefghija1"), true);
2232
2233 pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11"));
2234 check(pattern, toSupplementaries("abcdefghijkk"), true);
2235
2236 report("BackRef");
2237 }
2238
2239 /**
2240 * Unicode Technical Report #18, section 2.6 End of Line
2241 * There is no empty line to be matched in the sequence \u000D\u000A
2242 * but there is an empty line in the sequence \u000A\u000D.
2243 */
2244 private static void anchorTest() throws Exception {
2245 Pattern p = Pattern.compile("^.*$", Pattern.MULTILINE);
2246 Matcher m = p.matcher("blah1\r\nblah2");
2247 m.find();
2248 m.find();
2249 if (!m.group().equals("blah2"))
2250 failCount++;
2251
2252 m.reset("blah1\n\rblah2");
2253 m.find();
2254 m.find();
2255 m.find();
2256 if (!m.group().equals("blah2"))
2257 failCount++;
2258
2259 // Test behavior of $ with \r\n at end of input
2260 p = Pattern.compile(".+$");
2261 m = p.matcher("blah1\r\n");
2262 if (!m.find())
2263 failCount++;
2264 if (!m.group().equals("blah1"))
2265 failCount++;
2266 if (m.find())
2267 failCount++;
2268
2269 // Test behavior of $ with \r\n at end of input in multiline
2270 p = Pattern.compile(".+$", Pattern.MULTILINE);
2271 m = p.matcher("blah1\r\n");
2272 if (!m.find())
2273 failCount++;
2274 if (m.find())
2275 failCount++;
2276
2277 // Test for $ recognition of \u0085 for bug 4527731
2278 p = Pattern.compile(".+$", Pattern.MULTILINE);
2279 m = p.matcher("blah1\u0085");
2280 if (!m.find())
2281 failCount++;
2282
2283 // Supplementary character test
2284 p = Pattern.compile("^.*$", Pattern.MULTILINE);
2285 m = p.matcher(toSupplementaries("blah1\r\nblah2"));
2286 m.find();
2287 m.find();
2288 if (!m.group().equals(toSupplementaries("blah2")))
2289 failCount++;
2290
2291 m.reset(toSupplementaries("blah1\n\rblah2"));
2292 m.find();
2293 m.find();
2294 m.find();
2295 if (!m.group().equals(toSupplementaries("blah2")))
2296 failCount++;
2297
2298 // Test behavior of $ with \r\n at end of input
2299 p = Pattern.compile(".+$");
2300 m = p.matcher(toSupplementaries("blah1\r\n"));
2301 if (!m.find())
2302 failCount++;
2303 if (!m.group().equals(toSupplementaries("blah1")))
2304 failCount++;
2305 if (m.find())
2306 failCount++;
2307
2308 // Test behavior of $ with \r\n at end of input in multiline
2309 p = Pattern.compile(".+$", Pattern.MULTILINE);
2310 m = p.matcher(toSupplementaries("blah1\r\n"));
2311 if (!m.find())
2312 failCount++;
2313 if (m.find())
2314 failCount++;
2315
2316 // Test for $ recognition of \u0085 for bug 4527731
2317 p = Pattern.compile(".+$", Pattern.MULTILINE);
2318 m = p.matcher(toSupplementaries("blah1\u0085"));
2319 if (!m.find())
2320 failCount++;
2321
2322 report("Anchors");
2323 }
2324
2325 /**
2326 * A basic sanity test of Matcher.lookingAt().
2327 */
2328 private static void lookingAtTest() throws Exception {
2329 Pattern p = Pattern.compile("(ab)(c*)");
2330 Matcher m = p.matcher("abccczzzabcczzzabccc");
2331
2332 if (!m.lookingAt())
2333 failCount++;
2334
2335 if (!m.group().equals(m.group(0)))
2336 failCount++;
2337
2338 m = p.matcher("zzzabccczzzabcczzzabccczzz");
2339 if (m.lookingAt())
2340 failCount++;
2341
2342 // Supplementary character test
2343 p = Pattern.compile(toSupplementaries("(ab)(c*)"));
2344 m = p.matcher(toSupplementaries("abccczzzabcczzzabccc"));
2345
2346 if (!m.lookingAt())
2347 failCount++;
2348
2349 if (!m.group().equals(m.group(0)))
2350 failCount++;
2351
2352 m = p.matcher(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
2353 if (m.lookingAt())
2354 failCount++;
2355
2356 report("Looking At");
2357 }
2358
2359 /**
2360 * A basic sanity test of Matcher.matches().
2361 */
2362 private static void matchesTest() throws Exception {
2363 // matches()
2364 Pattern p = Pattern.compile("ulb(c*)");
2365 Matcher m = p.matcher("ulbcccccc");
2366 if (!m.matches())
2367 failCount++;
2368
2369 // find() but not matches()
2370 m.reset("zzzulbcccccc");
2371 if (m.matches())
2372 failCount++;
2373
2374 // lookingAt() but not matches()
2375 m.reset("ulbccccccdef");
2376 if (m.matches())
2377 failCount++;
2378
2379 // matches()
2380 p = Pattern.compile("a|ad");
2381 m = p.matcher("ad");
2382 if (!m.matches())
2383 failCount++;
2384
2385 // Supplementary character test
2386 // matches()
2387 p = Pattern.compile(toSupplementaries("ulb(c*)"));
2388 m = p.matcher(toSupplementaries("ulbcccccc"));
2389 if (!m.matches())
2390 failCount++;
2391
2392 // find() but not matches()
2393 m.reset(toSupplementaries("zzzulbcccccc"));
2394 if (m.matches())
2395 failCount++;
2396
2397 // lookingAt() but not matches()
2398 m.reset(toSupplementaries("ulbccccccdef"));
2399 if (m.matches())
2400 failCount++;
2401
2402 // matches()
2403 p = Pattern.compile(toSupplementaries("a|ad"));
2404 m = p.matcher(toSupplementaries("ad"));
2405 if (!m.matches())
2406 failCount++;
2407
2408 report("Matches");
2409 }
2410
2411 /**
2412 * A basic sanity test of Pattern.matches().
2413 */
2414 private static void patternMatchesTest() throws Exception {
2415 // matches()
2416 if (!Pattern.matches(toSupplementaries("ulb(c*)"),
2417 toSupplementaries("ulbcccccc")))
2418 failCount++;
2419
2420 // find() but not matches()
2421 if (Pattern.matches(toSupplementaries("ulb(c*)"),
2422 toSupplementaries("zzzulbcccccc")))
2423 failCount++;
2424
2425 // lookingAt() but not matches()
2426 if (Pattern.matches(toSupplementaries("ulb(c*)"),
2427 toSupplementaries("ulbccccccdef")))
2428 failCount++;
2429
2430 // Supplementary character test
2431 // matches()
2432 if (!Pattern.matches(toSupplementaries("ulb(c*)"),
2433 toSupplementaries("ulbcccccc")))
2434 failCount++;
2435
2436 // find() but not matches()
2437 if (Pattern.matches(toSupplementaries("ulb(c*)"),
2438 toSupplementaries("zzzulbcccccc")))
2439 failCount++;
2440
2441 // lookingAt() but not matches()
2442 if (Pattern.matches(toSupplementaries("ulb(c*)"),
2443 toSupplementaries("ulbccccccdef")))
2444 failCount++;
2445
2446 report("Pattern Matches");
2447 }
2448
2449 /**
2450 * Canonical equivalence testing. Tests the ability of the engine
2451 * to match sequences that are not explicitly specified in the
2452 * pattern when they are considered equivalent by the Unicode Standard.
2453 */
2454 private static void ceTest() throws Exception {
2455 // Decomposed char outside char classes
2456 Pattern p = Pattern.compile("testa\u030a", Pattern.CANON_EQ);
2457 Matcher m = p.matcher("test\u00e5");
2458 if (!m.matches())
2459 failCount++;
2460
2461 m.reset("testa\u030a");
2462 if (!m.matches())
2463 failCount++;
2464
2465 // Composed char outside char classes
2466 p = Pattern.compile("test\u00e5", Pattern.CANON_EQ);
2467 m = p.matcher("test\u00e5");
2468 if (!m.matches())
2469 failCount++;
2470
2471 m.reset("testa\u030a");
2472 if (!m.find())
2473 failCount++;
2474
2475 // Decomposed char inside a char class
2476 p = Pattern.compile("test[abca\u030a]", Pattern.CANON_EQ);
2477 m = p.matcher("test\u00e5");
2478 if (!m.find())
2479 failCount++;
2480
2481 m.reset("testa\u030a");
2482 if (!m.find())
2483 failCount++;
2484
2485 // Composed char inside a char class
2486 p = Pattern.compile("test[abc\u00e5def\u00e0]", Pattern.CANON_EQ);
2487 m = p.matcher("test\u00e5");
2488 if (!m.find())
2489 failCount++;
2490
2491 m.reset("testa\u0300");
2492 if (!m.find())
2493 failCount++;
2494
2495 m.reset("testa\u030a");
2496 if (!m.find())
2497 failCount++;
2498
2499 // Marks that cannot legally change order and be equivalent
2500 p = Pattern.compile("testa\u0308\u0300", Pattern.CANON_EQ);
2501 check(p, "testa\u0308\u0300", true);
2502 check(p, "testa\u0300\u0308", false);
2503
2504 // Marks that can legally change order and be equivalent
2505 p = Pattern.compile("testa\u0308\u0323", Pattern.CANON_EQ);
2506 check(p, "testa\u0308\u0323", true);
2507 check(p, "testa\u0323\u0308", true);
2508
2509 // Test all equivalences of the sequence a\u0308\u0323\u0300
2510 p = Pattern.compile("testa\u0308\u0323\u0300", Pattern.CANON_EQ);
2511 check(p, "testa\u0308\u0323\u0300", true);
2512 check(p, "testa\u0323\u0308\u0300", true);
2513 check(p, "testa\u0308\u0300\u0323", true);
2514 check(p, "test\u00e4\u0323\u0300", true);
2515 check(p, "test\u00e4\u0300\u0323", true);
2516
2517 /*
2518 * The following canonical equivalence tests don't work. Bug id: 4916384.
2519 *
2520 // Decomposed hangul (jamos)
2521 p = Pattern.compile("\u1100\u1161", Pattern.CANON_EQ);
2522 m = p.matcher("\u1100\u1161");
2523 if (!m.matches())
2524 failCount++;
2525
2526 m.reset("\uac00");
2527 if (!m.matches())
2528 failCount++;
2529
2530 // Composed hangul
2531 p = Pattern.compile("\uac00", Pattern.CANON_EQ);
2532 m = p.matcher("\u1100\u1161");
2533 if (!m.matches())
2534 failCount++;
2535
2536 m.reset("\uac00");
2537 if (!m.matches())
2538 failCount++;
2539
2540 // Decomposed supplementary outside char classes
2541 p = Pattern.compile("test\ud834\uddbc\ud834\udd6f", Pattern.CANON_EQ);
2542 m = p.matcher("test\ud834\uddc0");
2543 if (!m.matches())
2544 failCount++;
2545
2546 m.reset("test\ud834\uddbc\ud834\udd6f");
2547 if (!m.matches())
2548 failCount++;
2549
2550 // Composed supplementary outside char classes
2551 p = Pattern.compile("test\ud834\uddc0", Pattern.CANON_EQ);
2552 m.reset("test\ud834\uddbc\ud834\udd6f");
2553 if (!m.matches())
2554 failCount++;
2555
2556 m = p.matcher("test\ud834\uddc0");
2557 if (!m.matches())
2558 failCount++;
2559
2560 */
2561
2562 report("Canonical Equivalence");
2563 }
2564
2565 /**
2566 * A basic sanity test of Matcher.replaceAll().
2567 */
2568 private static void globalSubstitute() throws Exception {
2569 // Global substitution with a literal
2570 Pattern p = Pattern.compile("(ab)(c*)");
2571 Matcher m = p.matcher("abccczzzabcczzzabccc");
2572 if (!m.replaceAll("test").equals("testzzztestzzztest"))
2573 failCount++;
2574
2575 m.reset("zzzabccczzzabcczzzabccczzz");
2576 if (!m.replaceAll("test").equals("zzztestzzztestzzztestzzz"))
2577 failCount++;
2578
2579 // Global substitution with groups
2580 m.reset("zzzabccczzzabcczzzabccczzz");
2581 String result = m.replaceAll("$1");
2582 if (!result.equals("zzzabzzzabzzzabzzz"))
2583 failCount++;
2584
2585 // Supplementary character test
2586 // Global substitution with a literal
2587 p = Pattern.compile(toSupplementaries("(ab)(c*)"));
2588 m = p.matcher(toSupplementaries("abccczzzabcczzzabccc"));
2589 if (!m.replaceAll(toSupplementaries("test")).
2590 equals(toSupplementaries("testzzztestzzztest")))
2591 failCount++;
2592
2593 m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
2594 if (!m.replaceAll(toSupplementaries("test")).
2595 equals(toSupplementaries("zzztestzzztestzzztestzzz")))
2596 failCount++;
2597
2598 // Global substitution with groups
2599 m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
2600 result = m.replaceAll("$1");
2601 if (!result.equals(toSupplementaries("zzzabzzzabzzzabzzz")))
2602 failCount++;
2603
2604 report("Global Substitution");
2605 }
2606
2607 /**
2608 * Tests the usage of Matcher.appendReplacement() with literal
2609 * and group substitutions.
2610 */
2611 private static void stringbufferSubstitute() throws Exception {
2612 // SB substitution with literal
2613 String blah = "zzzblahzzz";
2614 Pattern p = Pattern.compile("blah");
2615 Matcher m = p.matcher(blah);
2616 StringBuffer result = new StringBuffer();
2617 try {
2618 m.appendReplacement(result, "blech");
2619 failCount++;
2620 } catch (IllegalStateException e) {
2621 }
2622 m.find();
2623 m.appendReplacement(result, "blech");
2624 if (!result.toString().equals("zzzblech"))
2625 failCount++;
2626
2627 m.appendTail(result);
2628 if (!result.toString().equals("zzzblechzzz"))
2629 failCount++;
2630
2631 // SB substitution with groups
2632 blah = "zzzabcdzzz";
2633 p = Pattern.compile("(ab)(cd)*");
2634 m = p.matcher(blah);
2635 result = new StringBuffer();
2636 try {
2637 m.appendReplacement(result, "$1");
2638 failCount++;
2639 } catch (IllegalStateException e) {
2640 }
2641 m.find();
2642 m.appendReplacement(result, "$1");
2643 if (!result.toString().equals("zzzab"))
2644 failCount++;
2645
2646 m.appendTail(result);
2647 if (!result.toString().equals("zzzabzzz"))
2648 failCount++;
2649
2650 // SB substitution with 3 groups
2651 blah = "zzzabcdcdefzzz";
2652 p = Pattern.compile("(ab)(cd)*(ef)");
2653 m = p.matcher(blah);
2654 result = new StringBuffer();
2655 try {
2656 m.appendReplacement(result, "$1w$2w$3");
2657 failCount++;
2658 } catch (IllegalStateException e) {
2659 }
2660 m.find();
2661 m.appendReplacement(result, "$1w$2w$3");
2662 if (!result.toString().equals("zzzabwcdwef"))
2663 failCount++;
2664
2665 m.appendTail(result);
2666 if (!result.toString().equals("zzzabwcdwefzzz"))
2667 failCount++;
2668
2669 // SB substitution with groups and three matches
2670 // skipping middle match
2671 blah = "zzzabcdzzzabcddzzzabcdzzz";
2672 p = Pattern.compile("(ab)(cd*)");
2673 m = p.matcher(blah);
2674 result = new StringBuffer();
2675 try {
2676 m.appendReplacement(result, "$1");
2677 failCount++;
2678 } catch (IllegalStateException e) {
2679 }
2680 m.find();
2681 m.appendReplacement(result, "$1");
2682 if (!result.toString().equals("zzzab"))
2683 failCount++;
2684
2685 m.find();
2686 m.find();
2687 m.appendReplacement(result, "$2");
2688 if (!result.toString().equals("zzzabzzzabcddzzzcd"))
2689 failCount++;
2690
2691 m.appendTail(result);
2692 if (!result.toString().equals("zzzabzzzabcddzzzcdzzz"))
2693 failCount++;
2694
2695 // Check to make sure escaped $ is ignored
2696 blah = "zzzabcdcdefzzz";
2697 p = Pattern.compile("(ab)(cd)*(ef)");
2698 m = p.matcher(blah);
2699 result = new StringBuffer();
2700 m.find();
2701 m.appendReplacement(result, "$1w\\$2w$3");
2702 if (!result.toString().equals("zzzabw$2wef"))
2703 failCount++;
2704
2705 m.appendTail(result);
2706 if (!result.toString().equals("zzzabw$2wefzzz"))
2707 failCount++;
2708
2709 // Check to make sure a reference to nonexistent group causes error
2710 blah = "zzzabcdcdefzzz";
2711 p = Pattern.compile("(ab)(cd)*(ef)");
2712 m = p.matcher(blah);
2713 result = new StringBuffer();
2714 m.find();
2715 try {
2716 m.appendReplacement(result, "$1w$5w$3");
2717 failCount++;
2718 } catch (IndexOutOfBoundsException ioobe) {
2719 // Correct result
2720 }
2721
2722 // Check double digit group references
2723 blah = "zzz123456789101112zzz";
2724 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)");
2725 m = p.matcher(blah);
2726 result = new StringBuffer();
2727 m.find();
2728 m.appendReplacement(result, "$1w$11w$3");
2729 if (!result.toString().equals("zzz1w11w3"))
2730 failCount++;
2731
2732 // Check to make sure it backs off $15 to $1 if only three groups
2733 blah = "zzzabcdcdefzzz";
2734 p = Pattern.compile("(ab)(cd)*(ef)");
2735 m = p.matcher(blah);
2736 result = new StringBuffer();
2737 m.find();
2738 m.appendReplacement(result, "$1w$15w$3");
2739 if (!result.toString().equals("zzzabwab5wef"))
2740 failCount++;
2741
2742
2743 // Supplementary character test
2744 // SB substitution with literal
2745 blah = toSupplementaries("zzzblahzzz");
2746 p = Pattern.compile(toSupplementaries("blah"));
2747 m = p.matcher(blah);
2748 result = new StringBuffer();
2749 try {
2750 m.appendReplacement(result, toSupplementaries("blech"));
2751 failCount++;
2752 } catch (IllegalStateException e) {
2753 }
2754 m.find();
2755 m.appendReplacement(result, toSupplementaries("blech"));
2756 if (!result.toString().equals(toSupplementaries("zzzblech")))
2757 failCount++;
2758
2759 m.appendTail(result);
2760 if (!result.toString().equals(toSupplementaries("zzzblechzzz")))
2761 failCount++;
2762
2763 // SB substitution with groups
2764 blah = toSupplementaries("zzzabcdzzz");
2765 p = Pattern.compile(toSupplementaries("(ab)(cd)*"));
2766 m = p.matcher(blah);
2767 result = new StringBuffer();
2768 try {
2769 m.appendReplacement(result, "$1");
2770 failCount++;
2771 } catch (IllegalStateException e) {
2772 }
2773 m.find();
2774 m.appendReplacement(result, "$1");
2775 if (!result.toString().equals(toSupplementaries("zzzab")))
2776 failCount++;
2777
2778 m.appendTail(result);
2779 if (!result.toString().equals(toSupplementaries("zzzabzzz")))
2780 failCount++;
2781
2782 // SB substitution with 3 groups
2783 blah = toSupplementaries("zzzabcdcdefzzz");
2784 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
2785 m = p.matcher(blah);
2786 result = new StringBuffer();
2787 try {
2788 m.appendReplacement(result, toSupplementaries("$1w$2w$3"));
2789 failCount++;
2790 } catch (IllegalStateException e) {
2791 }
2792 m.find();
2793 m.appendReplacement(result, toSupplementaries("$1w$2w$3"));
2794 if (!result.toString().equals(toSupplementaries("zzzabwcdwef")))
2795 failCount++;
2796
2797 m.appendTail(result);
2798 if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz")))
2799 failCount++;
2800
2801 // SB substitution with groups and three matches
2802 // skipping middle match
2803 blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz");
2804 p = Pattern.compile(toSupplementaries("(ab)(cd*)"));
2805 m = p.matcher(blah);
2806 result = new StringBuffer();
2807 try {
2808 m.appendReplacement(result, "$1");
2809 failCount++;
2810 } catch (IllegalStateException e) {
2811 }
2812 m.find();
2813 m.appendReplacement(result, "$1");
2814 if (!result.toString().equals(toSupplementaries("zzzab")))
2815 failCount++;
2816
2817 m.find();
2818 m.find();
2819 m.appendReplacement(result, "$2");
2820 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd")))
2821 failCount++;
2822
2823 m.appendTail(result);
2824 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz")))
2825 failCount++;
2826
2827 // Check to make sure escaped $ is ignored
2828 blah = toSupplementaries("zzzabcdcdefzzz");
2829 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
2830 m = p.matcher(blah);
2831 result = new StringBuffer();
2832 m.find();
2833 m.appendReplacement(result, toSupplementaries("$1w\\$2w$3"));
2834 if (!result.toString().equals(toSupplementaries("zzzabw$2wef")))
2835 failCount++;
2836
2837 m.appendTail(result);
2838 if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz")))
2839 failCount++;
2840
2841 // Check to make sure a reference to nonexistent group causes error
2842 blah = toSupplementaries("zzzabcdcdefzzz");
2843 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
2844 m = p.matcher(blah);
2845 result = new StringBuffer();
2846 m.find();
2847 try {
2848 m.appendReplacement(result, toSupplementaries("$1w$5w$3"));
2849 failCount++;
2850 } catch (IndexOutOfBoundsException ioobe) {
2851 // Correct result
2852 }
2853
2854 // Check double digit group references
2855 blah = toSupplementaries("zzz123456789101112zzz");
2856 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)");
2857 m = p.matcher(blah);
2858 result = new StringBuffer();
2859 m.find();
2860 m.appendReplacement(result, toSupplementaries("$1w$11w$3"));
2861 if (!result.toString().equals(toSupplementaries("zzz1w11w3")))
2862 failCount++;
2863
2864 // Check to make sure it backs off $15 to $1 if only three groups
2865 blah = toSupplementaries("zzzabcdcdefzzz");
2866 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
2867 m = p.matcher(blah);
2868 result = new StringBuffer();
2869 m.find();
2870 m.appendReplacement(result, toSupplementaries("$1w$15w$3"));
2871 if (!result.toString().equals(toSupplementaries("zzzabwab5wef")))
2872 failCount++;
2873
2874 // Check nothing has been appended into the output buffer if
2875 // the replacement string triggers IllegalArgumentException.
2876 p = Pattern.compile("(abc)");
2877 m = p.matcher("abcd");
2878 result = new StringBuffer();
2879 m.find();
2880 try {
2881 m.appendReplacement(result, ("xyz$g"));
2882 failCount++;
2883 } catch (IllegalArgumentException iae) {
2884 if (result.length() != 0)
2885 failCount++;
2886 }
2887
2888 report("SB Substitution");
2889 }
2890
2891 /*
2892 * 5 groups of characters are created to make a substitution string.
2893 * A base string will be created including random lead chars, the
2894 * substitution string, and random trailing chars.
2895 * A pattern containing the 5 groups is searched for and replaced with:
2896 * random group + random string + random group.
2897 * The results are checked for correctness.
2898 */
2899 private static void substitutionBasher() {
2900 for (int runs = 0; runs<1000; runs++) {
2901 // Create a base string to work in
2902 int leadingChars = generator.nextInt(10);
2903 StringBuffer baseBuffer = new StringBuffer(100);
2904 String leadingString = getRandomAlphaString(leadingChars);
2905 baseBuffer.append(leadingString);
2906
2907 // Create 5 groups of random number of random chars
2908 // Create the string to substitute
2909 // Create the pattern string to search for
2910 StringBuffer bufferToSub = new StringBuffer(25);
2911 StringBuffer bufferToPat = new StringBuffer(50);
2912 String[] groups = new String[5];
2913 for(int i=0; i<5; i++) {
2914 int aGroupSize = generator.nextInt(5)+1;
2915 groups[i] = getRandomAlphaString(aGroupSize);
2916 bufferToSub.append(groups[i]);
2917 bufferToPat.append('(');
2918 bufferToPat.append(groups[i]);
2919 bufferToPat.append(')');
2920 }
2921 String stringToSub = bufferToSub.toString();
2922 String pattern = bufferToPat.toString();
2923
2924 // Place sub string into working string at random index
2925 baseBuffer.append(stringToSub);
2926
2927 // Append random chars to end
2928 int trailingChars = generator.nextInt(10);
2929 String trailingString = getRandomAlphaString(trailingChars);
2930 baseBuffer.append(trailingString);
2931 String baseString = baseBuffer.toString();
2932
2933 // Create test pattern and matcher
2934 Pattern p = Pattern.compile(pattern);
2935 Matcher m = p.matcher(baseString);
2936
2937 // Reject candidate if pattern happens to start early
2938 m.find();
2939 if (m.start() < leadingChars)
2940 continue;
2941
2942 // Reject candidate if more than one match
2943 if (m.find())
2944 continue;
2945
2946 // Construct a replacement string with :
2947 // random group + random string + random group
2948 StringBuffer bufferToRep = new StringBuffer();
2949 int groupIndex1 = generator.nextInt(5);
2950 bufferToRep.append("$" + (groupIndex1 + 1));
2951 String randomMidString = getRandomAlphaString(5);
2952 bufferToRep.append(randomMidString);
2953 int groupIndex2 = generator.nextInt(5);
2954 bufferToRep.append("$" + (groupIndex2 + 1));
2955 String replacement = bufferToRep.toString();
2956
2957 // Do the replacement
2958 String result = m.replaceAll(replacement);
2959
2960 // Construct expected result
2961 StringBuffer bufferToRes = new StringBuffer();
2962 bufferToRes.append(leadingString);
2963 bufferToRes.append(groups[groupIndex1]);
2964 bufferToRes.append(randomMidString);
2965 bufferToRes.append(groups[groupIndex2]);
2966 bufferToRes.append(trailingString);
2967 String expectedResult = bufferToRes.toString();
2968
2969 // Check results
2970 if (!result.equals(expectedResult))
2971 failCount++;
2972 }
2973
2974 report("Substitution Basher");
2975 }
2976
2977 /**
2978 * Checks the handling of some escape sequences that the Pattern
2979 * class should process instead of the java compiler. These are
2980 * not in the file because the escapes should be be processed
2981 * by the Pattern class when the regex is compiled.
2982 */
2983 private static void escapes() throws Exception {
2984 Pattern p = Pattern.compile("\\043");
2985 Matcher m = p.matcher("#");
2986 if (!m.find())
2987 failCount++;
2988
2989 p = Pattern.compile("\\x23");
2990 m = p.matcher("#");
2991 if (!m.find())
2992 failCount++;
2993
2994 p = Pattern.compile("\\u0023");
2995 m = p.matcher("#");
2996 if (!m.find())
2997 failCount++;
2998
2999 report("Escape sequences");
3000 }
3001
3002 /**
3003 * Checks the handling of blank input situations. These
3004 * tests are incompatible with my test file format.
3005 */
3006 private static void blankInput() throws Exception {
3007 Pattern p = Pattern.compile("abc", Pattern.CASE_INSENSITIVE);
3008 Matcher m = p.matcher("");
3009 if (m.find())
3010 failCount++;
3011
3012 p = Pattern.compile("a*", Pattern.CASE_INSENSITIVE);
3013 m = p.matcher("");
3014 if (!m.find())
3015 failCount++;
3016
3017 p = Pattern.compile("abc");
3018 m = p.matcher("");
3019 if (m.find())
3020 failCount++;
3021
3022 p = Pattern.compile("a*");
3023 m = p.matcher("");
3024 if (!m.find())
3025 failCount++;
3026
3027 report("Blank input");
3028 }
3029
3030 /**
3031 * Tests the Boyer-Moore pattern matching of a character sequence
3032 * on randomly generated patterns.
3033 */
3034 private static void bm() throws Exception {
3035 doBnM('a');
3036 report("Boyer Moore (ASCII)");
3037
3038 doBnM(Character.MIN_SUPPLEMENTARY_CODE_POINT - 10);
3039 report("Boyer Moore (Supplementary)");
3040 }
3041
3042 private static void doBnM(int baseCharacter) throws Exception {
3043 int achar=0;
3044
3045 for (int i=0; i<100; i++) {
3046 // Create a short pattern to search for
3047 int patternLength = generator.nextInt(7) + 4;
3048 StringBuffer patternBuffer = new StringBuffer(patternLength);
3049 for (int x=0; x<patternLength; x++) {
3050 int ch = baseCharacter + generator.nextInt(26);
3051 if (Character.isSupplementaryCodePoint(ch)) {
3052 patternBuffer.append(Character.toChars(ch));
3053 } else {
3054 patternBuffer.append((char)ch);
3055 }
3056 }
3057 String pattern = patternBuffer.toString();
3058 Pattern p = Pattern.compile(pattern);
3059
3060 // Create a buffer with random ASCII chars that does
3061 // not match the sample
3062 String toSearch = null;
3063 StringBuffer s = null;
3064 Matcher m = p.matcher("");
3065 do {
3066 s = new StringBuffer(100);
3067 for (int x=0; x<100; x++) {
3068 int ch = baseCharacter + generator.nextInt(26);
3069 if (Character.isSupplementaryCodePoint(ch)) {
3070 s.append(Character.toChars(ch));
3071 } else {
3072 s.append((char)ch);
3073 }
3074 }
3075 toSearch = s.toString();
3076 m.reset(toSearch);
3077 } while (m.find());
3078
3079 // Insert the pattern at a random spot
3080 int insertIndex = generator.nextInt(99);
3081 if (Character.isLowSurrogate(s.charAt(insertIndex)))
3082 insertIndex++;
3083 s = s.insert(insertIndex, pattern);
3084 toSearch = s.toString();
3085
3086 // Make sure that the pattern is found
3087 m.reset(toSearch);
3088 if (!m.find())
3089 failCount++;
3090
3091 // Make sure that the match text is the pattern
3092 if (!m.group().equals(pattern))
3093 failCount++;
3094
3095 // Make sure match occured at insertion point
3096 if (m.start() != insertIndex)
3097 failCount++;
3098 }
3099 }
3100
3101 /**
3102 * Tests the matching of slices on randomly generated patterns.
3103 * The Boyer-Moore optimization is not done on these patterns
3104 * because it uses unicode case folding.
3105 */
3106 private static void slice() throws Exception {
3107 doSlice(Character.MAX_VALUE);
3108 report("Slice");
3109
3110 doSlice(Character.MAX_CODE_POINT);
3111 report("Slice (Supplementary)");
3112 }
3113
3114 private static void doSlice(int maxCharacter) throws Exception {
3115 Random generator = new Random();
3116 int achar=0;
3117
3118 for (int i=0; i<100; i++) {
3119 // Create a short pattern to search for
3120 int patternLength = generator.nextInt(7) + 4;
3121 StringBuffer patternBuffer = new StringBuffer(patternLength);
3122 for (int x=0; x<patternLength; x++) {
3123 int randomChar = 0;
3124 while (!Character.isLetterOrDigit(randomChar))
3125 randomChar = generator.nextInt(maxCharacter);
3126 if (Character.isSupplementaryCodePoint(randomChar)) {
3127 patternBuffer.append(Character.toChars(randomChar));
3128 } else {
3129 patternBuffer.append((char) randomChar);
3130 }
3131 }
3132 String pattern = patternBuffer.toString();
3133 Pattern p = Pattern.compile(pattern, Pattern.UNICODE_CASE);
3134
3135 // Create a buffer with random chars that does not match the sample
3136 String toSearch = null;
3137 StringBuffer s = null;
3138 Matcher m = p.matcher("");
3139 do {
3140 s = new StringBuffer(100);
3141 for (int x=0; x<100; x++) {
3142 int randomChar = 0;
3143 while (!Character.isLetterOrDigit(randomChar))
3144 randomChar = generator.nextInt(maxCharacter);
3145 if (Character.isSupplementaryCodePoint(randomChar)) {
3146 s.append(Character.toChars(randomChar));
3147 } else {
3148 s.append((char) randomChar);
3149 }
3150 }
3151 toSearch = s.toString();
3152 m.reset(toSearch);
3153 } while (m.find());
3154
3155 // Insert the pattern at a random spot
3156 int insertIndex = generator.nextInt(99);
3157 if (Character.isLowSurrogate(s.charAt(insertIndex)))
3158 insertIndex++;
3159 s = s.insert(insertIndex, pattern);
3160 toSearch = s.toString();
3161
3162 // Make sure that the pattern is found
3163 m.reset(toSearch);
3164 if (!m.find())
3165 failCount++;
3166
3167 // Make sure that the match text is the pattern
3168 if (!m.group().equals(pattern))
3169 failCount++;
3170
3171 // Make sure match occured at insertion point
3172 if (m.start() != insertIndex)
3173 failCount++;
3174 }
3175 }
3176
3177 private static void explainFailure(String pattern, String data,
3178 String expected, String actual) {
3179 System.err.println("----------------------------------------");
3180 System.err.println("Pattern = "+pattern);
3181 System.err.println("Data = "+data);
3182 System.err.println("Expected = " + expected);
3183 System.err.println("Actual = " + actual);
3184 }
3185
3186 private static void explainFailure(String pattern, String data,
3187 Throwable t) {
3188 System.err.println("----------------------------------------");
3189 System.err.println("Pattern = "+pattern);
3190 System.err.println("Data = "+data);
3191 t.printStackTrace(System.err);
3192 }
3193
3194 // Testing examples from a file
3195
3196 /**
3197 * Goes through the file "TestCases.txt" and creates many patterns
3198 * described in the file, matching the patterns against input lines in
3199 * the file, and comparing the results against the correct results
3200 * also found in the file. The file format is described in comments
3201 * at the head of the file.
3202 */
3203 private static void processFile(String fileName) throws Exception {
3204 File testCases = new File(System.getProperty("test.src", "."),
3205 fileName);
3206 FileInputStream in = new FileInputStream(testCases);
3207 BufferedReader r = new BufferedReader(new InputStreamReader(in));
3208
3209 // Process next test case.
3210 String aLine;
3211 while((aLine = r.readLine()) != null) {
3212 // Read a line for pattern
3213 String patternString = grabLine(r);
3214 Pattern p = null;
3215 try {
3216 p = compileTestPattern(patternString);
3217 } catch (PatternSyntaxException e) {
3218 String dataString = grabLine(r);
3219 String expectedResult = grabLine(r);
3220 if (expectedResult.startsWith("error"))
3221 continue;
3222 explainFailure(patternString, dataString, e);
3223 failCount++;
3224 continue;
3225 }
3226
3227 // Read a line for input string
3228 String dataString = grabLine(r);
3229 Matcher m = p.matcher(dataString);
3230 StringBuffer result = new StringBuffer();
3231
3232 // Check for IllegalStateExceptions before a match
3233 failCount += preMatchInvariants(m);
3234
3235 boolean found = m.find();
3236
3237 if (found)
3238 failCount += postTrueMatchInvariants(m);
3239 else
3240 failCount += postFalseMatchInvariants(m);
3241
3242 if (found) {
3243 result.append("true ");
3244 result.append(m.group(0) + " ");
3245 } else {
3246 result.append("false ");
3247 }
3248
3249 result.append(m.groupCount());
3250
3251 if (found) {
3252 for (int i=1; i<m.groupCount()+1; i++)
3253 if (m.group(i) != null)
3254 result.append(" " +m.group(i));
3255 }
3256
3257 // Read a line for the expected result
3258 String expectedResult = grabLine(r);
3259
3260 if (!result.toString().equals(expectedResult)) {
3261 explainFailure(patternString, dataString, expectedResult, result.toString());
3262 failCount++;
3263 }
3264 }
3265
3266 report(fileName);
3267 }
3268
3269 private static int preMatchInvariants(Matcher m) {
3270 int failCount = 0;
3271 try {
3272 m.start();
3273 failCount++;
3274 } catch (IllegalStateException ise) {}
3275 try {
3276 m.end();
3277 failCount++;
3278 } catch (IllegalStateException ise) {}
3279 try {
3280 m.group();
3281 failCount++;
3282 } catch (IllegalStateException ise) {}
3283 return failCount;
3284 }
3285
3286 private static int postFalseMatchInvariants(Matcher m) {
3287 int failCount = 0;
3288 try {
3289 m.group();
3290 failCount++;
3291 } catch (IllegalStateException ise) {}
3292 try {
3293 m.start();
3294 failCount++;
3295 } catch (IllegalStateException ise) {}
3296 try {
3297 m.end();
3298 failCount++;
3299 } catch (IllegalStateException ise) {}
3300 return failCount;
3301 }
3302
3303 private static int postTrueMatchInvariants(Matcher m) {
3304 int failCount = 0;
3305 //assert(m.start() = m.start(0);
3306 if (m.start() != m.start(0))
3307 failCount++;
3308 //assert(m.end() = m.end(0);
3309 if (m.start() != m.start(0))
3310 failCount++;
3311 //assert(m.group() = m.group(0);
3312 if (!m.group().equals(m.group(0)))
3313 failCount++;
3314 try {
3315 m.group(50);
3316 failCount++;
3317 } catch (IndexOutOfBoundsException ise) {}
3318
3319 return failCount;
3320 }
3321
3322 private static Pattern compileTestPattern(String patternString) {
3323 if (!patternString.startsWith("'")) {
3324 return Pattern.compile(patternString);
3325 }
3326
3327 int break1 = patternString.lastIndexOf("'");
3328 String flagString = patternString.substring(
3329 break1+1, patternString.length());
3330 patternString = patternString.substring(1, break1);
3331
3332 if (flagString.equals("i"))
3333 return Pattern.compile(patternString, Pattern.CASE_INSENSITIVE);
3334
3335 if (flagString.equals("m"))
3336 return Pattern.compile(patternString, Pattern.MULTILINE);
3337
3338 return Pattern.compile(patternString);
3339 }
3340
3341 /**
3342 * Reads a line from the input file. Keeps reading lines until a non
3343 * empty non comment line is read. If the line contains a \n then
3344 * these two characters are replaced by a newline char. If a \\uxxxx
3345 * sequence is read then the sequence is replaced by the unicode char.
3346 */
3347 private static String grabLine(BufferedReader r) throws Exception {
3348 int index = 0;
3349 String line = r.readLine();
3350 while (line.startsWith("//") || line.length() < 1)
3351 line = r.readLine();
3352 while ((index = line.indexOf("\\n")) != -1) {
3353 StringBuffer temp = new StringBuffer(line);
3354 temp.replace(index, index+2, "\n");
3355 line = temp.toString();
3356 }
3357 while ((index = line.indexOf("\\u")) != -1) {
3358 StringBuffer temp = new StringBuffer(line);
3359 String value = temp.substring(index+2, index+6);
3360 char aChar = (char)Integer.parseInt(value, 16);
3361 String unicodeChar = "" + aChar;
3362 temp.replace(index, index+6, unicodeChar);
3363 line = temp.toString();
3364 }
3365
3366 return line;
3367 }
3368
3369 private static void check(Pattern p, String s, String g, String expected) {
3370 Matcher m = p.matcher(s);
3371 m.find();
3372 if (!m.group(g).equals(expected))
3373 failCount++;
3374 }
3375
3376 private static void checkReplaceFirst(String p, String s, String r, String expected)
3377 {
3378 if (!expected.equals(Pattern.compile(p)
3379 .matcher(s)
3380 .replaceFirst(r)))
3381 failCount++;
3382 }
3383
3384 private static void checkReplaceAll(String p, String s, String r, String expected)
3385 {
3386 if (!expected.equals(Pattern.compile(p)
3387 .matcher(s)
3388 .replaceAll(r)))
3389 failCount++;
3390 }
3391
3392 private static void checkExpectedFail(String p) {
3393 try {
3394 Pattern.compile(p);
3395 } catch (PatternSyntaxException pse) {
3396 //pse.printStackTrace();
3397 return;
3398 }
3399 failCount++;
3400 }
3401
3402 private static void checkExpectedFail(Matcher m, String g) {
3403 m.find();
3404 try {
3405 m.group(g);
3406 } catch (IllegalArgumentException iae) {
3407 //iae.printStackTrace();
3408 return;
3409 } catch (NullPointerException npe) {
3410 return;
3411 }
3412 failCount++;
3413 }
3414
3415
3416 private static void namedGroupCaptureTest() throws Exception {
3417 check(Pattern.compile("x+(?<gname>y+)z+"),
3418 "xxxyyyzzz",
3419 "gname",
3420 "yyy");
3421
shermand9337e02009-10-21 11:40:40 -07003422 check(Pattern.compile("x+(?<gname8>y+)z+"),
shermanffaf4922009-03-20 16:22:59 -07003423 "xxxyyyzzz",
shermand9337e02009-10-21 11:40:40 -07003424 "gname8",
shermanffaf4922009-03-20 16:22:59 -07003425 "yyy");
3426
sherman0b4d42d2009-02-23 21:06:15 -08003427 //backref
3428 Pattern pattern = Pattern.compile("(a*)bc\\1");
3429 check(pattern, "zzzaabcazzz", true); // found "abca"
3430
3431 check(Pattern.compile("(?<gname>a*)bc\\k<gname>"),
3432 "zzzaabcaazzz", true);
3433
3434 check(Pattern.compile("(?<gname>abc)(def)\\k<gname>"),
3435 "abcdefabc", true);
3436
3437 check(Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(?<gname>k)\\k<gname>"),
3438 "abcdefghijkk", true);
3439
3440 // Supplementary character tests
3441 check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"),
3442 toSupplementaries("zzzaabcazzz"), true);
3443
3444 check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"),
3445 toSupplementaries("zzzaabcaazzz"), true);
3446
3447 check(Pattern.compile("(?<gname>" + toSupplementaries("abc)(def)") + "\\k<gname>"),
3448 toSupplementaries("abcdefabc"), true);
3449
3450 check(Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)") +
3451 "(?<gname>" +
3452 toSupplementaries("k)") + "\\k<gname>"),
3453 toSupplementaries("abcdefghijkk"), true);
3454
3455 check(Pattern.compile("x+(?<gname>y+)z+\\k<gname>"),
3456 "xxxyyyzzzyyy",
3457 "gname",
3458 "yyy");
3459
3460 //replaceFirst/All
3461 checkReplaceFirst("(?<gn>ab)(c*)",
3462 "abccczzzabcczzzabccc",
shermand9337e02009-10-21 11:40:40 -07003463 "${gn}",
sherman0b4d42d2009-02-23 21:06:15 -08003464 "abzzzabcczzzabccc");
3465
3466 checkReplaceAll("(?<gn>ab)(c*)",
3467 "abccczzzabcczzzabccc",
shermand9337e02009-10-21 11:40:40 -07003468 "${gn}",
sherman0b4d42d2009-02-23 21:06:15 -08003469 "abzzzabzzzab");
3470
3471
3472 checkReplaceFirst("(?<gn>ab)(c*)",
3473 "zzzabccczzzabcczzzabccczzz",
shermand9337e02009-10-21 11:40:40 -07003474 "${gn}",
sherman0b4d42d2009-02-23 21:06:15 -08003475 "zzzabzzzabcczzzabccczzz");
3476
3477 checkReplaceAll("(?<gn>ab)(c*)",
3478 "zzzabccczzzabcczzzabccczzz",
shermand9337e02009-10-21 11:40:40 -07003479 "${gn}",
sherman0b4d42d2009-02-23 21:06:15 -08003480 "zzzabzzzabzzzabzzz");
3481
3482 checkReplaceFirst("(?<gn1>ab)(?<gn2>c*)",
3483 "zzzabccczzzabcczzzabccczzz",
shermand9337e02009-10-21 11:40:40 -07003484 "${gn2}",
sherman0b4d42d2009-02-23 21:06:15 -08003485 "zzzccczzzabcczzzabccczzz");
3486
3487 checkReplaceAll("(?<gn1>ab)(?<gn2>c*)",
3488 "zzzabccczzzabcczzzabccczzz",
shermand9337e02009-10-21 11:40:40 -07003489 "${gn2}",
sherman0b4d42d2009-02-23 21:06:15 -08003490 "zzzccczzzcczzzccczzz");
3491
3492 //toSupplementaries("(ab)(c*)"));
3493 checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") +
3494 ")(?<gn2>" + toSupplementaries("c") + "*)",
3495 toSupplementaries("abccczzzabcczzzabccc"),
shermand9337e02009-10-21 11:40:40 -07003496 "${gn1}",
sherman0b4d42d2009-02-23 21:06:15 -08003497 toSupplementaries("abzzzabcczzzabccc"));
3498
3499
3500 checkReplaceAll("(?<gn1>" + toSupplementaries("ab") +
3501 ")(?<gn2>" + toSupplementaries("c") + "*)",
3502 toSupplementaries("abccczzzabcczzzabccc"),
shermand9337e02009-10-21 11:40:40 -07003503 "${gn1}",
sherman0b4d42d2009-02-23 21:06:15 -08003504 toSupplementaries("abzzzabzzzab"));
3505
3506 checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") +
3507 ")(?<gn2>" + toSupplementaries("c") + "*)",
3508 toSupplementaries("abccczzzabcczzzabccc"),
shermand9337e02009-10-21 11:40:40 -07003509 "${gn2}",
sherman0b4d42d2009-02-23 21:06:15 -08003510 toSupplementaries("ccczzzabcczzzabccc"));
3511
3512
3513 checkReplaceAll("(?<gn1>" + toSupplementaries("ab") +
3514 ")(?<gn2>" + toSupplementaries("c") + "*)",
3515 toSupplementaries("abccczzzabcczzzabccc"),
shermand9337e02009-10-21 11:40:40 -07003516 "${gn2}",
sherman0b4d42d2009-02-23 21:06:15 -08003517 toSupplementaries("ccczzzcczzzccc"));
3518
3519 checkReplaceFirst("(?<dog>Dog)AndCat",
3520 "zzzDogAndCatzzzDogAndCatzzz",
shermand9337e02009-10-21 11:40:40 -07003521 "${dog}",
sherman0b4d42d2009-02-23 21:06:15 -08003522 "zzzDogzzzDogAndCatzzz");
3523
3524
3525 checkReplaceAll("(?<dog>Dog)AndCat",
3526 "zzzDogAndCatzzzDogAndCatzzz",
shermand9337e02009-10-21 11:40:40 -07003527 "${dog}",
sherman0b4d42d2009-02-23 21:06:15 -08003528 "zzzDogzzzDogzzz");
3529
3530 // backref in Matcher & String
shermand9337e02009-10-21 11:40:40 -07003531 if (!"abcdefghij".replaceFirst("cd(?<gn>ef)gh", "${gn}").equals("abefij") ||
3532 !"abbbcbdbefgh".replaceAll("(?<gn>[a-e])b", "${gn}").equals("abcdefgh"))
sherman0b4d42d2009-02-23 21:06:15 -08003533 failCount++;
3534
3535 // negative
3536 checkExpectedFail("(?<groupnamehasnoascii.in>abc)(def)");
3537 checkExpectedFail("(?<groupnamehasnoascii_in>abc)(def)");
shermand9337e02009-10-21 11:40:40 -07003538 checkExpectedFail("(?<6groupnamestartswithdigit>abc)(def)");
sherman0b4d42d2009-02-23 21:06:15 -08003539 checkExpectedFail("(?<gname>abc)(def)\\k<gnameX>");
3540 checkExpectedFail("(?<gname>abc)(?<gname>def)\\k<gnameX>");
3541 checkExpectedFail(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"),
3542 "gnameX");
3543 checkExpectedFail(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"),
3544 null);
3545 report("NamedGroupCapture");
3546 }
sherman6782c962010-02-05 00:10:42 -08003547
shermancc01ef52010-05-18 15:36:47 -07003548 // This is for bug 6969132
sherman6782c962010-02-05 00:10:42 -08003549 private static void nonBmpClassComplementTest() throws Exception {
3550 Pattern p = Pattern.compile("\\P{Lu}");
3551 Matcher m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
3552 if (m.find() && m.start() == 1)
3553 failCount++;
3554
3555 // from a unicode category
3556 p = Pattern.compile("\\P{Lu}");
3557 m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
3558 if (m.find())
3559 failCount++;
3560 if (!m.hitEnd())
3561 failCount++;
3562
3563 // block
3564 p = Pattern.compile("\\P{InMathematicalAlphanumericSymbols}");
3565 m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
3566 if (m.find() && m.start() == 1)
3567 failCount++;
3568
3569 report("NonBmpClassComplement");
3570 }
3571
shermancc01ef52010-05-18 15:36:47 -07003572 private static void unicodePropertiesTest() throws Exception {
3573 // different forms
3574 if (!Pattern.compile("\\p{IsLu}").matcher("A").matches() ||
3575 !Pattern.compile("\\p{Lu}").matcher("A").matches() ||
3576 !Pattern.compile("\\p{gc=Lu}").matcher("A").matches() ||
3577 !Pattern.compile("\\p{general_category=Lu}").matcher("A").matches() ||
3578 !Pattern.compile("\\p{IsLatin}").matcher("B").matches() ||
3579 !Pattern.compile("\\p{sc=Latin}").matcher("B").matches() ||
3580 !Pattern.compile("\\p{script=Latin}").matcher("B").matches() ||
3581 !Pattern.compile("\\p{InBasicLatin}").matcher("c").matches() ||
3582 !Pattern.compile("\\p{blk=BasicLatin}").matcher("c").matches() ||
3583 !Pattern.compile("\\p{block=BasicLatin}").matcher("c").matches())
3584 failCount++;
3585
3586 Matcher common = Pattern.compile("\\p{script=Common}").matcher("");
3587 Matcher unknown = Pattern.compile("\\p{IsUnknown}").matcher("");
3588 Matcher lastSM = common;
3589 Character.UnicodeScript lastScript = Character.UnicodeScript.of(0);
3590
3591 Matcher latin = Pattern.compile("\\p{block=basic_latin}").matcher("");
3592 Matcher greek = Pattern.compile("\\p{InGreek}").matcher("");
3593 Matcher lastBM = latin;
3594 Character.UnicodeBlock lastBlock = Character.UnicodeBlock.of(0);
3595
3596 for (int cp = 1; cp < Character.MAX_CODE_POINT; cp++) {
3597 if (cp >= 0x30000 && (cp & 0x70) == 0){
3598 continue; // only pick couple code points, they are the same
3599 }
3600
3601 // Unicode Script
3602 Character.UnicodeScript script = Character.UnicodeScript.of(cp);
3603 Matcher m;
3604 String str = new String(Character.toChars(cp));
3605 if (script == lastScript) {
3606 m = lastSM;
3607 m.reset(str);
3608 } else {
3609 m = Pattern.compile("\\p{Is" + script.name() + "}").matcher(str);
3610 }
3611 if (!m.matches()) {
3612 failCount++;
3613 }
3614 Matcher other = (script == Character.UnicodeScript.COMMON)? unknown : common;
3615 other.reset(str);
3616 if (other.matches()) {
3617 failCount++;
3618 }
3619 lastSM = m;
3620 lastScript = script;
3621
3622 // Unicode Block
3623 Character.UnicodeBlock block = Character.UnicodeBlock.of(cp);
3624 if (block == null) {
3625 //System.out.printf("Not a Block: cp=%x%n", cp);
3626 continue;
3627 }
3628 if (block == lastBlock) {
3629 m = lastBM;
3630 m.reset(str);
3631 } else {
3632 m = Pattern.compile("\\p{block=" + block.toString() + "}").matcher(str);
3633 }
3634 if (!m.matches()) {
3635 failCount++;
3636 }
3637 other = (block == Character.UnicodeBlock.BASIC_LATIN)? greek : latin;
3638 other.reset(str);
3639 if (other.matches()) {
3640 failCount++;
3641 }
3642 lastBM = m;
3643 lastBlock = block;
3644 }
3645 report("unicodeProperties");
3646 }
shermanf03c78b2011-02-03 13:49:25 -08003647
3648 private static void unicodeHexNotationTest() throws Exception {
3649
3650 // negative
3651 checkExpectedFail("\\x{-23}");
3652 checkExpectedFail("\\x{110000}");
3653 checkExpectedFail("\\x{}");
3654 checkExpectedFail("\\x{AB[ef]");
3655
3656 // codepoint
3657 check("^\\x{1033c}$", "\uD800\uDF3C", true);
3658 check("^\\xF0\\x90\\x8C\\xBC$", "\uD800\uDF3C", false);
3659 check("^\\x{D800}\\x{DF3c}+$", "\uD800\uDF3C", false);
3660 check("^\\xF0\\x90\\x8C\\xBC$", "\uD800\uDF3C", false);
3661
3662 // in class
3663 check("^[\\x{D800}\\x{DF3c}]+$", "\uD800\uDF3C", false);
3664 check("^[\\xF0\\x90\\x8C\\xBC]+$", "\uD800\uDF3C", false);
3665 check("^[\\x{D800}\\x{DF3C}]+$", "\uD800\uDF3C", false);
3666 check("^[\\x{DF3C}\\x{D800}]+$", "\uD800\uDF3C", false);
3667 check("^[\\x{D800}\\x{DF3C}]+$", "\uDF3C\uD800", true);
3668 check("^[\\x{DF3C}\\x{D800}]+$", "\uDF3C\uD800", true);
3669
3670 for (int cp = 0; cp <= 0x10FFFF; cp++) {
3671 String s = "A" + new String(Character.toChars(cp)) + "B";
3672 String hexUTF16 = (cp <= 0xFFFF)? String.format("\\u%04x", cp)
3673 : String.format("\\u%04x\\u%04x",
3674 (int) Character.toChars(cp)[0],
3675 (int) Character.toChars(cp)[1]);
3676 String hexCodePoint = "\\x{" + Integer.toHexString(cp) + "}";
3677 if (!Pattern.matches("A" + hexUTF16 + "B", s))
3678 failCount++;
3679 if (!Pattern.matches("A[" + hexUTF16 + "]B", s))
3680 failCount++;
3681 if (!Pattern.matches("A" + hexCodePoint + "B", s))
3682 failCount++;
3683 if (!Pattern.matches("A[" + hexCodePoint + "]B", s))
3684 failCount++;
3685 }
3686 report("unicodeHexNotation");
sherman85bbd8b2011-04-28 20:48:36 -07003687 }
3688
3689 private static void unicodeClassesTest() throws Exception {
3690
3691 Matcher lower = Pattern.compile("\\p{Lower}").matcher("");
3692 Matcher upper = Pattern.compile("\\p{Upper}").matcher("");
3693 Matcher ASCII = Pattern.compile("\\p{ASCII}").matcher("");
3694 Matcher alpha = Pattern.compile("\\p{Alpha}").matcher("");
3695 Matcher digit = Pattern.compile("\\p{Digit}").matcher("");
3696 Matcher alnum = Pattern.compile("\\p{Alnum}").matcher("");
3697 Matcher punct = Pattern.compile("\\p{Punct}").matcher("");
3698 Matcher graph = Pattern.compile("\\p{Graph}").matcher("");
3699 Matcher print = Pattern.compile("\\p{Print}").matcher("");
3700 Matcher blank = Pattern.compile("\\p{Blank}").matcher("");
3701 Matcher cntrl = Pattern.compile("\\p{Cntrl}").matcher("");
3702 Matcher xdigit = Pattern.compile("\\p{XDigit}").matcher("");
3703 Matcher space = Pattern.compile("\\p{Space}").matcher("");
3704 Matcher bound = Pattern.compile("\\b").matcher("");
3705 Matcher word = Pattern.compile("\\w++").matcher("");
3706 // UNICODE_CHARACTER_CLASS
3707 Matcher lowerU = Pattern.compile("\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3708 Matcher upperU = Pattern.compile("\\p{Upper}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3709 Matcher ASCIIU = Pattern.compile("\\p{ASCII}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3710 Matcher alphaU = Pattern.compile("\\p{Alpha}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3711 Matcher digitU = Pattern.compile("\\p{Digit}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3712 Matcher alnumU = Pattern.compile("\\p{Alnum}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3713 Matcher punctU = Pattern.compile("\\p{Punct}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3714 Matcher graphU = Pattern.compile("\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3715 Matcher printU = Pattern.compile("\\p{Print}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3716 Matcher blankU = Pattern.compile("\\p{Blank}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3717 Matcher cntrlU = Pattern.compile("\\p{Cntrl}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3718 Matcher xdigitU = Pattern.compile("\\p{XDigit}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3719 Matcher spaceU = Pattern.compile("\\p{Space}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3720 Matcher boundU = Pattern.compile("\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3721 Matcher wordU = Pattern.compile("\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3722 // embedded flag (?U)
3723 Matcher lowerEU = Pattern.compile("(?U)\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3724 Matcher graphEU = Pattern.compile("(?U)\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3725 Matcher wordEU = Pattern.compile("(?U)\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3726
3727 Matcher bwb = Pattern.compile("\\b\\w\\b").matcher("");
3728 Matcher bwbU = Pattern.compile("\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3729 Matcher bwbEU = Pattern.compile("(?U)\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3730 // properties
3731 Matcher lowerP = Pattern.compile("\\p{IsLowerCase}").matcher("");
3732 Matcher upperP = Pattern.compile("\\p{IsUpperCase}").matcher("");
3733 Matcher titleP = Pattern.compile("\\p{IsTitleCase}").matcher("");
3734 Matcher letterP = Pattern.compile("\\p{IsLetter}").matcher("");
3735 Matcher alphaP = Pattern.compile("\\p{IsAlphabetic}").matcher("");
3736 Matcher ideogP = Pattern.compile("\\p{IsIdeographic}").matcher("");
3737 Matcher cntrlP = Pattern.compile("\\p{IsControl}").matcher("");
3738 Matcher spaceP = Pattern.compile("\\p{IsWhiteSpace}").matcher("");
3739 Matcher definedP = Pattern.compile("\\p{IsAssigned}").matcher("");
3740 Matcher nonCCPP = Pattern.compile("\\p{IsNoncharacterCodePoint}").matcher("");
3741
3742 // javaMethod
3743 Matcher lowerJ = Pattern.compile("\\p{javaLowerCase}").matcher("");
3744 Matcher upperJ = Pattern.compile("\\p{javaUpperCase}").matcher("");
3745 Matcher alphaJ = Pattern.compile("\\p{javaAlphabetic}").matcher("");
3746 Matcher ideogJ = Pattern.compile("\\p{javaIdeographic}").matcher("");
3747
3748 for (int cp = 1; cp < 0x30000; cp++) {
3749 String str = new String(Character.toChars(cp));
3750 int type = Character.getType(cp);
3751 if (// lower
3752 POSIX_ASCII.isLower(cp) != lower.reset(str).matches() ||
3753 Character.isLowerCase(cp) != lowerU.reset(str).matches() ||
3754 Character.isLowerCase(cp) != lowerP.reset(str).matches() ||
3755 Character.isLowerCase(cp) != lowerEU.reset(str).matches()||
3756 Character.isLowerCase(cp) != lowerJ.reset(str).matches()||
3757 // upper
3758 POSIX_ASCII.isUpper(cp) != upper.reset(str).matches() ||
3759 POSIX_Unicode.isUpper(cp) != upperU.reset(str).matches() ||
3760 Character.isUpperCase(cp) != upperP.reset(str).matches() ||
3761 Character.isUpperCase(cp) != upperJ.reset(str).matches() ||
3762 // alpha
3763 POSIX_ASCII.isAlpha(cp) != alpha.reset(str).matches() ||
3764 POSIX_Unicode.isAlpha(cp) != alphaU.reset(str).matches() ||
3765 Character.isAlphabetic(cp)!= alphaP.reset(str).matches() ||
3766 Character.isAlphabetic(cp)!= alphaJ.reset(str).matches() ||
3767 // digit
3768 POSIX_ASCII.isDigit(cp) != digit.reset(str).matches() ||
3769 Character.isDigit(cp) != digitU.reset(str).matches() ||
3770 // alnum
3771 POSIX_ASCII.isAlnum(cp) != alnum.reset(str).matches() ||
3772 POSIX_Unicode.isAlnum(cp) != alnumU.reset(str).matches() ||
3773 // punct
3774 POSIX_ASCII.isPunct(cp) != punct.reset(str).matches() ||
3775 POSIX_Unicode.isPunct(cp) != punctU.reset(str).matches() ||
3776 // graph
3777 POSIX_ASCII.isGraph(cp) != graph.reset(str).matches() ||
3778 POSIX_Unicode.isGraph(cp) != graphU.reset(str).matches() ||
3779 POSIX_Unicode.isGraph(cp) != graphEU.reset(str).matches()||
3780 // blank
3781 POSIX_ASCII.isType(cp, POSIX_ASCII.BLANK)
3782 != blank.reset(str).matches() ||
3783 POSIX_Unicode.isBlank(cp) != blankU.reset(str).matches() ||
3784 // print
3785 POSIX_ASCII.isPrint(cp) != print.reset(str).matches() ||
3786 POSIX_Unicode.isPrint(cp) != printU.reset(str).matches() ||
3787 // cntrl
3788 POSIX_ASCII.isCntrl(cp) != cntrl.reset(str).matches() ||
3789 POSIX_Unicode.isCntrl(cp) != cntrlU.reset(str).matches() ||
3790 (Character.CONTROL == type) != cntrlP.reset(str).matches() ||
3791 // hexdigit
3792 POSIX_ASCII.isHexDigit(cp) != xdigit.reset(str).matches() ||
3793 POSIX_Unicode.isHexDigit(cp) != xdigitU.reset(str).matches() ||
3794 // space
3795 POSIX_ASCII.isSpace(cp) != space.reset(str).matches() ||
3796 POSIX_Unicode.isSpace(cp) != spaceU.reset(str).matches() ||
3797 POSIX_Unicode.isSpace(cp) != spaceP.reset(str).matches() ||
3798 // word
3799 POSIX_ASCII.isWord(cp) != word.reset(str).matches() ||
3800 POSIX_Unicode.isWord(cp) != wordU.reset(str).matches() ||
3801 POSIX_Unicode.isWord(cp) != wordEU.reset(str).matches()||
3802 // bwordb
3803 POSIX_ASCII.isWord(cp) != bwb.reset(str).matches() ||
3804 POSIX_Unicode.isWord(cp) != bwbU.reset(str).matches() ||
3805 // properties
3806 Character.isTitleCase(cp) != titleP.reset(str).matches() ||
3807 Character.isLetter(cp) != letterP.reset(str).matches()||
3808 Character.isIdeographic(cp) != ideogP.reset(str).matches() ||
3809 Character.isIdeographic(cp) != ideogJ.reset(str).matches() ||
3810 (Character.UNASSIGNED == type) == definedP.reset(str).matches() ||
3811 POSIX_Unicode.isNoncharacterCodePoint(cp) != nonCCPP.reset(str).matches())
3812 failCount++;
3813 }
3814
3815 // bounds/word align
3816 twoFindIndexes(" \u0180sherman\u0400 ", bound, 1, 10);
3817 if (!bwbU.reset("\u0180sherman\u0400").matches())
3818 failCount++;
3819 twoFindIndexes(" \u0180sh\u0345erman\u0400 ", bound, 1, 11);
3820 if (!bwbU.reset("\u0180sh\u0345erman\u0400").matches())
3821 failCount++;
3822 twoFindIndexes(" \u0724\u0739\u0724 ", bound, 1, 4);
3823 if (!bwbU.reset("\u0724\u0739\u0724").matches())
3824 failCount++;
3825 if (!bwbEU.reset("\u0724\u0739\u0724").matches())
3826 failCount++;
3827 report("unicodePredefinedClasses");
3828 }
sherman0b4d42d2009-02-23 21:06:15 -08003829}