blob: 10644e491aa11bf89aaebe71bb0642fb7cfa2edf [file] [log] [blame]
sherman0b4d42d2009-02-23 21:06:15 -08001/*
ohairbf91ea12011-04-06 22:06:11 -07002 * Copyright (c) 1999, 2011, Oracle and/or its affiliates. All rights reserved.
sherman0b4d42d2009-02-23 21:06:15 -08003 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
ohair2283b9d2010-05-25 15:58:33 -07007 * published by the Free Software Foundation. Oracle designates this
sherman0b4d42d2009-02-23 21:06:15 -08008 * particular file as subject to the "Classpath" exception as provided
ohair2283b9d2010-05-25 15:58:33 -07009 * by Oracle in the LICENSE file that accompanied this code.
sherman0b4d42d2009-02-23 21:06:15 -080010 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
ohair2283b9d2010-05-25 15:58:33 -070021 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
sherman0b4d42d2009-02-23 21:06:15 -080024 */
25
26/**
27 * @test
28 * @summary tests RegExp framework
29 * @author Mike McCloskey
30 * @bug 4481568 4482696 4495089 4504687 4527731 4599621 4631553 4619345
31 * 4630911 4672616 4711773 4727935 4750573 4792284 4803197 4757029 4808962
32 * 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476
33 * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940
34 * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133
sherman85bbd8b2011-04-28 20:48:36 -070035 * 6350801 6676425 6878475 6919132 6931676 6948903 7014645 7039066
sherman0b4d42d2009-02-23 21:06:15 -080036 */
37
38import java.util.regex.*;
39import java.util.Random;
40import java.io.*;
41import java.util.*;
42import java.nio.CharBuffer;
43
44/**
45 * This is a test class created to check the operation of
46 * the Pattern and Matcher classes.
47 */
48public class RegExTest {
49
50 private static Random generator = new Random();
51 private static boolean failure = false;
52 private static int failCount = 0;
53
54 /**
55 * Main to interpret arguments and run several tests.
56 *
57 */
58 public static void main(String[] args) throws Exception {
59 // Most of the tests are in a file
60 processFile("TestCases.txt");
61 //processFile("PerlCases.txt");
62 processFile("BMPTestCases.txt");
63 processFile("SupplementaryTestCases.txt");
64
65 // These test many randomly generated char patterns
66 bm();
67 slice();
68
69 // These are hard to put into the file
70 escapes();
71 blankInput();
72
73 // Substitition tests on randomly generated sequences
74 globalSubstitute();
75 stringbufferSubstitute();
76 substitutionBasher();
77
78 // Canonical Equivalence
79 ceTest();
80
81 // Anchors
82 anchorTest();
83
84 // boolean match calls
85 matchesTest();
86 lookingAtTest();
87
88 // Pattern API
89 patternMatchesTest();
90
91 // Misc
92 lookbehindTest();
93 nullArgumentTest();
94 backRefTest();
95 groupCaptureTest();
96 caretTest();
97 charClassTest();
98 emptyPatternTest();
99 findIntTest();
100 group0Test();
101 longPatternTest();
102 octalTest();
103 ampersandTest();
104 negationTest();
105 splitTest();
106 appendTest();
107 caseFoldingTest();
108 commentsTest();
109 unixLinesTest();
110 replaceFirstTest();
111 gTest();
112 zTest();
113 serializeTest();
114 reluctantRepetitionTest();
115 multilineDollarTest();
116 dollarAtEndTest();
117 caretBetweenTerminatorsTest();
118 // This RFE rejected in Tiger numOccurrencesTest();
119 javaCharClassTest();
120 nonCaptureRepetitionTest();
121 notCapturedGroupCurlyMatchTest();
122 escapedSegmentTest();
123 literalPatternTest();
124 literalReplacementTest();
125 regionTest();
126 toStringTest();
127 negatedCharClassTest();
128 findFromTest();
129 boundsTest();
130 unicodeWordBoundsTest();
131 caretAtEndTest();
132 wordSearchTest();
133 hitEndTest();
134 toMatchResultTest();
135 surrogatesInClassTest();
136 namedGroupCaptureTest();
sherman6782c962010-02-05 00:10:42 -0800137 nonBmpClassComplementTest();
shermancc01ef52010-05-18 15:36:47 -0700138 unicodePropertiesTest();
shermanf03c78b2011-02-03 13:49:25 -0800139 unicodeHexNotationTest();
sherman85bbd8b2011-04-28 20:48:36 -0700140 unicodeClassesTest();
sherman0b4d42d2009-02-23 21:06:15 -0800141 if (failure)
142 throw new RuntimeException("Failure in the RE handling.");
143 else
144 System.err.println("OKAY: All tests passed.");
145 }
146
147 // Utility functions
148
149 private static String getRandomAlphaString(int length) {
150 StringBuffer buf = new StringBuffer(length);
151 for (int i=0; i<length; i++) {
152 char randChar = (char)(97 + generator.nextInt(26));
153 buf.append(randChar);
154 }
155 return buf.toString();
156 }
157
158 private static void check(Matcher m, String expected) {
159 m.find();
160 if (!m.group().equals(expected))
161 failCount++;
162 }
163
164 private static void check(Matcher m, String result, boolean expected) {
165 m.find();
shermanf03c78b2011-02-03 13:49:25 -0800166 if (m.group().equals(result) != expected)
167 failCount++;
sherman0b4d42d2009-02-23 21:06:15 -0800168 }
169
170 private static void check(Pattern p, String s, boolean expected) {
shermanf03c78b2011-02-03 13:49:25 -0800171 if (p.matcher(s).find() != expected)
172 failCount++;
173 }
174
175 private static void check(String p, String s, boolean expected) {
176 Matcher matcher = Pattern.compile(p).matcher(s);
177 if (matcher.find() != expected)
178 failCount++;
sherman0b4d42d2009-02-23 21:06:15 -0800179 }
180
181 private static void check(String p, char c, boolean expected) {
182 String propertyPattern = expected ? "\\p" + p : "\\P" + p;
183 Pattern pattern = Pattern.compile(propertyPattern);
184 char[] ca = new char[1]; ca[0] = c;
185 Matcher matcher = pattern.matcher(new String(ca));
186 if (!matcher.find())
187 failCount++;
188 }
189
190 private static void check(String p, int codePoint, boolean expected) {
191 String propertyPattern = expected ? "\\p" + p : "\\P" + p;
192 Pattern pattern = Pattern.compile(propertyPattern);
193 char[] ca = Character.toChars(codePoint);
194 Matcher matcher = pattern.matcher(new String(ca));
195 if (!matcher.find())
196 failCount++;
197 }
198
199 private static void check(String p, int flag, String input, String s,
200 boolean expected)
201 {
202 Pattern pattern = Pattern.compile(p, flag);
203 Matcher matcher = pattern.matcher(input);
204 if (expected)
205 check(matcher, s, expected);
206 else
207 check(pattern, input, false);
208 }
209
210 private static void report(String testName) {
211 int spacesToAdd = 30 - testName.length();
212 StringBuffer paddedNameBuffer = new StringBuffer(testName);
213 for (int i=0; i<spacesToAdd; i++)
214 paddedNameBuffer.append(" ");
215 String paddedName = paddedNameBuffer.toString();
216 System.err.println(paddedName + ": " +
217 (failCount==0 ? "Passed":"Failed("+failCount+")"));
218 if (failCount > 0)
219 failure = true;
220 failCount = 0;
221 }
222
223 /**
224 * Converts ASCII alphabet characters [A-Za-z] in the given 's' to
225 * supplementary characters. This method does NOT fully take care
226 * of the regex syntax.
227 */
228 private static String toSupplementaries(String s) {
229 int length = s.length();
230 StringBuffer sb = new StringBuffer(length * 2);
231
232 for (int i = 0; i < length; ) {
233 char c = s.charAt(i++);
234 if (c == '\\') {
235 sb.append(c);
236 if (i < length) {
237 c = s.charAt(i++);
238 sb.append(c);
239 if (c == 'u') {
240 // assume no syntax error
241 sb.append(s.charAt(i++));
242 sb.append(s.charAt(i++));
243 sb.append(s.charAt(i++));
244 sb.append(s.charAt(i++));
245 }
246 }
247 } else if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) {
248 sb.append('\ud800').append((char)('\udc00'+c));
249 } else {
250 sb.append(c);
251 }
252 }
253 return sb.toString();
254 }
255
256 // Regular expression tests
257
258 // This is for bug 6178785
259 // Test if an expected NPE gets thrown when passing in a null argument
260 private static boolean check(Runnable test) {
261 try {
262 test.run();
263 failCount++;
264 return false;
265 } catch (NullPointerException npe) {
266 return true;
267 }
268 }
269
270 private static void nullArgumentTest() {
271 check(new Runnable() { public void run() { Pattern.compile(null); }});
272 check(new Runnable() { public void run() { Pattern.matches(null, null); }});
273 check(new Runnable() { public void run() { Pattern.matches("xyz", null);}});
274 check(new Runnable() { public void run() { Pattern.quote(null);}});
275 check(new Runnable() { public void run() { Pattern.compile("xyz").split(null);}});
276 check(new Runnable() { public void run() { Pattern.compile("xyz").matcher(null);}});
277
278 final Matcher m = Pattern.compile("xyz").matcher("xyz");
279 m.matches();
280 check(new Runnable() { public void run() { m.appendTail(null);}});
281 check(new Runnable() { public void run() { m.replaceAll(null);}});
282 check(new Runnable() { public void run() { m.replaceFirst(null);}});
283 check(new Runnable() { public void run() { m.appendReplacement(null, null);}});
284 check(new Runnable() { public void run() { m.reset(null);}});
285 check(new Runnable() { public void run() { Matcher.quoteReplacement(null);}});
286 //check(new Runnable() { public void run() { m.usePattern(null);}});
287
288 report("Null Argument");
289 }
290
291 // This is for bug6635133
292 // Test if surrogate pair in Unicode escapes can be handled correctly.
293 private static void surrogatesInClassTest() throws Exception {
294 Pattern pattern = Pattern.compile("[\\ud834\\udd21-\\ud834\\udd24]");
295 Matcher matcher = pattern.matcher("\ud834\udd22");
296 if (!matcher.find())
297 failCount++;
298 }
299
300 // This is for bug 4988891
301 // Test toMatchResult to see that it is a copy of the Matcher
302 // that is not affected by subsequent operations on the original
303 private static void toMatchResultTest() throws Exception {
304 Pattern pattern = Pattern.compile("squid");
305 Matcher matcher = pattern.matcher(
306 "agiantsquidofdestinyasmallsquidoffate");
307 matcher.find();
308 int matcherStart1 = matcher.start();
309 MatchResult mr = matcher.toMatchResult();
310 if (mr == matcher)
311 failCount++;
312 int resultStart1 = mr.start();
313 if (matcherStart1 != resultStart1)
314 failCount++;
315 matcher.find();
316 int matcherStart2 = matcher.start();
317 int resultStart2 = mr.start();
318 if (matcherStart2 == resultStart2)
319 failCount++;
320 if (resultStart1 != resultStart2)
321 failCount++;
322 MatchResult mr2 = matcher.toMatchResult();
323 if (mr == mr2)
324 failCount++;
325 if (mr2.start() != matcherStart2)
326 failCount++;
327 report("toMatchResult is a copy");
328 }
329
330 // This is for bug 5013885
331 // Must test a slice to see if it reports hitEnd correctly
332 private static void hitEndTest() throws Exception {
333 // Basic test of Slice node
334 Pattern p = Pattern.compile("^squidattack");
335 Matcher m = p.matcher("squack");
336 m.find();
337 if (m.hitEnd())
338 failCount++;
339 m.reset("squid");
340 m.find();
341 if (!m.hitEnd())
342 failCount++;
343
344 // Test Slice, SliceA and SliceU nodes
345 for (int i=0; i<3; i++) {
346 int flags = 0;
347 if (i==1) flags = Pattern.CASE_INSENSITIVE;
348 if (i==2) flags = Pattern.UNICODE_CASE;
349 p = Pattern.compile("^abc", flags);
350 m = p.matcher("ad");
351 m.find();
352 if (m.hitEnd())
353 failCount++;
354 m.reset("ab");
355 m.find();
356 if (!m.hitEnd())
357 failCount++;
358 }
359
360 // Test Boyer-Moore node
361 p = Pattern.compile("catattack");
362 m = p.matcher("attack");
363 m.find();
364 if (!m.hitEnd())
365 failCount++;
366
367 p = Pattern.compile("catattack");
368 m = p.matcher("attackattackattackcatatta");
369 m.find();
370 if (!m.hitEnd())
371 failCount++;
sherman0b4d42d2009-02-23 21:06:15 -0800372 report("hitEnd from a Slice");
373 }
374
375 // This is for bug 4997476
376 // It is weird code submitted by customer demonstrating a regression
377 private static void wordSearchTest() throws Exception {
378 String testString = new String("word1 word2 word3");
379 Pattern p = Pattern.compile("\\b");
380 Matcher m = p.matcher(testString);
381 int position = 0;
382 int start = 0;
383 while (m.find(position)) {
384 start = m.start();
385 if (start == testString.length())
386 break;
387 if (m.find(start+1)) {
388 position = m.start();
389 } else {
390 position = testString.length();
391 }
392 if (testString.substring(start, position).equals(" "))
393 continue;
394 if (!testString.substring(start, position-1).startsWith("word"))
395 failCount++;
396 }
397 report("Customer word search");
398 }
399
400 // This is for bug 4994840
401 private static void caretAtEndTest() throws Exception {
402 // Problem only occurs with multiline patterns
403 // containing a beginning-of-line caret "^" followed
404 // by an expression that also matches the empty string.
405 Pattern pattern = Pattern.compile("^x?", Pattern.MULTILINE);
406 Matcher matcher = pattern.matcher("\r");
407 matcher.find();
408 matcher.find();
409 report("Caret at end");
410 }
411
412 // This test is for 4979006
413 // Check to see if word boundary construct properly handles unicode
414 // non spacing marks
415 private static void unicodeWordBoundsTest() throws Exception {
416 String spaces = " ";
417 String wordChar = "a";
418 String nsm = "\u030a";
419
420 assert (Character.getType('\u030a') == Character.NON_SPACING_MARK);
421
422 Pattern pattern = Pattern.compile("\\b");
423 Matcher matcher = pattern.matcher("");
424 // S=other B=word character N=non spacing mark .=word boundary
425 // SS.BB.SS
426 String input = spaces + wordChar + wordChar + spaces;
427 twoFindIndexes(input, matcher, 2, 4);
428 // SS.BBN.SS
429 input = spaces + wordChar +wordChar + nsm + spaces;
430 twoFindIndexes(input, matcher, 2, 5);
431 // SS.BN.SS
432 input = spaces + wordChar + nsm + spaces;
433 twoFindIndexes(input, matcher, 2, 4);
434 // SS.BNN.SS
435 input = spaces + wordChar + nsm + nsm + spaces;
436 twoFindIndexes(input, matcher, 2, 5);
437 // SSN.BB.SS
438 input = spaces + nsm + wordChar + wordChar + spaces;
439 twoFindIndexes(input, matcher, 3, 5);
440 // SS.BNB.SS
441 input = spaces + wordChar + nsm + wordChar + spaces;
442 twoFindIndexes(input, matcher, 2, 5);
443 // SSNNSS
444 input = spaces + nsm + nsm + spaces;
445 matcher.reset(input);
446 if (matcher.find())
447 failCount++;
448 // SSN.BBN.SS
449 input = spaces + nsm + wordChar + wordChar + nsm + spaces;
450 twoFindIndexes(input, matcher, 3, 6);
451
452 report("Unicode word boundary");
453 }
454
455 private static void twoFindIndexes(String input, Matcher matcher, int a,
456 int b) throws Exception
457 {
458 matcher.reset(input);
459 matcher.find();
460 if (matcher.start() != a)
461 failCount++;
462 matcher.find();
463 if (matcher.start() != b)
464 failCount++;
465 }
466
467 // This test is for 6284152
468 static void check(String regex, String input, String[] expected) {
469 List<String> result = new ArrayList<String>();
470 Pattern p = Pattern.compile(regex);
471 Matcher m = p.matcher(input);
472 while (m.find()) {
473 result.add(m.group());
474 }
475 if (!Arrays.asList(expected).equals(result))
476 failCount++;
477 }
478
479 private static void lookbehindTest() throws Exception {
480 //Positive
481 check("(?<=%.{0,5})foo\\d",
482 "%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5",
483 new String[]{"foo1", "foo2", "foo3"});
484
485 //boundary at end of the lookbehind sub-regex should work consistently
486 //with the boundary just after the lookbehind sub-regex
487 check("(?<=.*\\b)foo", "abcd foo", new String[]{"foo"});
488 check("(?<=.*)\\bfoo", "abcd foo", new String[]{"foo"});
489 check("(?<!abc )\\bfoo", "abc foo", new String[0]);
490 check("(?<!abc \\b)foo", "abc foo", new String[0]);
491
492 //Negative
493 check("(?<!%.{0,5})foo\\d",
494 "%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5",
495 new String[] {"foo4", "foo5"});
496
497 //Positive greedy
498 check("(?<=%b{1,4})foo", "%bbbbfoo", new String[] {"foo"});
499
500 //Positive reluctant
501 check("(?<=%b{1,4}?)foo", "%bbbbfoo", new String[] {"foo"});
502
503 //supplementary
504 check("(?<=%b{1,4})fo\ud800\udc00o", "%bbbbfo\ud800\udc00o",
505 new String[] {"fo\ud800\udc00o"});
506 check("(?<=%b{1,4}?)fo\ud800\udc00o", "%bbbbfo\ud800\udc00o",
507 new String[] {"fo\ud800\udc00o"});
508 check("(?<!%b{1,4})fo\ud800\udc00o", "%afo\ud800\udc00o",
509 new String[] {"fo\ud800\udc00o"});
510 check("(?<!%b{1,4}?)fo\ud800\udc00o", "%afo\ud800\udc00o",
511 new String[] {"fo\ud800\udc00o"});
512 report("Lookbehind");
513 }
514
515 // This test is for 4938995
516 // Check to see if weak region boundaries are transparent to
517 // lookahead and lookbehind constructs
518 private static void boundsTest() throws Exception {
519 String fullMessage = "catdogcat";
520 Pattern pattern = Pattern.compile("(?<=cat)dog(?=cat)");
521 Matcher matcher = pattern.matcher("catdogca");
522 matcher.useTransparentBounds(true);
523 if (matcher.find())
524 failCount++;
525 matcher.reset("atdogcat");
526 if (matcher.find())
527 failCount++;
528 matcher.reset(fullMessage);
529 if (!matcher.find())
530 failCount++;
531 matcher.reset(fullMessage);
532 matcher.region(0,9);
533 if (!matcher.find())
534 failCount++;
535 matcher.reset(fullMessage);
536 matcher.region(0,6);
537 if (!matcher.find())
538 failCount++;
539 matcher.reset(fullMessage);
540 matcher.region(3,6);
541 if (!matcher.find())
542 failCount++;
543 matcher.useTransparentBounds(false);
544 if (matcher.find())
545 failCount++;
546
547 // Negative lookahead/lookbehind
548 pattern = Pattern.compile("(?<!cat)dog(?!cat)");
549 matcher = pattern.matcher("dogcat");
550 matcher.useTransparentBounds(true);
551 matcher.region(0,3);
552 if (matcher.find())
553 failCount++;
554 matcher.reset("catdog");
555 matcher.region(3,6);
556 if (matcher.find())
557 failCount++;
558 matcher.useTransparentBounds(false);
559 matcher.reset("dogcat");
560 matcher.region(0,3);
561 if (!matcher.find())
562 failCount++;
563 matcher.reset("catdog");
564 matcher.region(3,6);
565 if (!matcher.find())
566 failCount++;
567
568 report("Region bounds transparency");
569 }
570
571 // This test is for 4945394
572 private static void findFromTest() throws Exception {
573 String message = "This is 40 $0 message.";
574 Pattern pat = Pattern.compile("\\$0");
575 Matcher match = pat.matcher(message);
576 if (!match.find())
577 failCount++;
578 if (match.find())
579 failCount++;
580 if (match.find())
581 failCount++;
582 report("Check for alternating find");
583 }
584
585 // This test is for 4872664 and 4892980
586 private static void negatedCharClassTest() throws Exception {
587 Pattern pattern = Pattern.compile("[^>]");
588 Matcher matcher = pattern.matcher("\u203A");
589 if (!matcher.matches())
590 failCount++;
591 pattern = Pattern.compile("[^fr]");
592 matcher = pattern.matcher("a");
593 if (!matcher.find())
594 failCount++;
595 matcher.reset("\u203A");
596 if (!matcher.find())
597 failCount++;
598 String s = "for";
599 String result[] = s.split("[^fr]");
600 if (!result[0].equals("f"))
601 failCount++;
602 if (!result[1].equals("r"))
603 failCount++;
604 s = "f\u203Ar";
605 result = s.split("[^fr]");
606 if (!result[0].equals("f"))
607 failCount++;
608 if (!result[1].equals("r"))
609 failCount++;
610
611 // Test adding to bits, subtracting a node, then adding to bits again
612 pattern = Pattern.compile("[^f\u203Ar]");
613 matcher = pattern.matcher("a");
614 if (!matcher.find())
615 failCount++;
616 matcher.reset("f");
617 if (matcher.find())
618 failCount++;
619 matcher.reset("\u203A");
620 if (matcher.find())
621 failCount++;
622 matcher.reset("r");
623 if (matcher.find())
624 failCount++;
625 matcher.reset("\u203B");
626 if (!matcher.find())
627 failCount++;
628
629 // Test subtracting a node, adding to bits, subtracting again
630 pattern = Pattern.compile("[^\u203Ar\u203B]");
631 matcher = pattern.matcher("a");
632 if (!matcher.find())
633 failCount++;
634 matcher.reset("\u203A");
635 if (matcher.find())
636 failCount++;
637 matcher.reset("r");
638 if (matcher.find())
639 failCount++;
640 matcher.reset("\u203B");
641 if (matcher.find())
642 failCount++;
643 matcher.reset("\u203C");
644 if (!matcher.find())
645 failCount++;
646
647 report("Negated Character Class");
648 }
649
650 // This test is for 4628291
651 private static void toStringTest() throws Exception {
652 Pattern pattern = Pattern.compile("b+");
653 if (pattern.toString() != "b+")
654 failCount++;
655 Matcher matcher = pattern.matcher("aaabbbccc");
656 String matcherString = matcher.toString(); // unspecified
657 matcher.find();
658 matcherString = matcher.toString(); // unspecified
659 matcher.region(0,3);
660 matcherString = matcher.toString(); // unspecified
661 matcher.reset();
662 matcherString = matcher.toString(); // unspecified
663 report("toString");
664 }
665
666 // This test is for 4808962
667 private static void literalPatternTest() throws Exception {
668 int flags = Pattern.LITERAL;
669
670 Pattern pattern = Pattern.compile("abc\\t$^", flags);
671 check(pattern, "abc\\t$^", true);
672
673 pattern = Pattern.compile(Pattern.quote("abc\\t$^"));
674 check(pattern, "abc\\t$^", true);
675
676 pattern = Pattern.compile("\\Qa^$bcabc\\E", flags);
677 check(pattern, "\\Qa^$bcabc\\E", true);
678 check(pattern, "a^$bcabc", false);
679
680 pattern = Pattern.compile("\\\\Q\\\\E");
681 check(pattern, "\\Q\\E", true);
682
683 pattern = Pattern.compile("\\Qabc\\Eefg\\\\Q\\\\Ehij");
684 check(pattern, "abcefg\\Q\\Ehij", true);
685
686 pattern = Pattern.compile("\\\\\\Q\\\\E");
687 check(pattern, "\\\\\\\\", true);
688
689 pattern = Pattern.compile(Pattern.quote("\\Qa^$bcabc\\E"));
690 check(pattern, "\\Qa^$bcabc\\E", true);
691 check(pattern, "a^$bcabc", false);
692
693 pattern = Pattern.compile(Pattern.quote("\\Qabc\\Edef"));
694 check(pattern, "\\Qabc\\Edef", true);
695 check(pattern, "abcdef", false);
696
697 pattern = Pattern.compile(Pattern.quote("abc\\Edef"));
698 check(pattern, "abc\\Edef", true);
699 check(pattern, "abcdef", false);
700
701 pattern = Pattern.compile(Pattern.quote("\\E"));
702 check(pattern, "\\E", true);
703
704 pattern = Pattern.compile("((((abc.+?:)", flags);
705 check(pattern, "((((abc.+?:)", true);
706
707 flags |= Pattern.MULTILINE;
708
709 pattern = Pattern.compile("^cat$", flags);
710 check(pattern, "abc^cat$def", true);
711 check(pattern, "cat", false);
712
713 flags |= Pattern.CASE_INSENSITIVE;
714
715 pattern = Pattern.compile("abcdef", flags);
716 check(pattern, "ABCDEF", true);
717 check(pattern, "AbCdEf", true);
718
719 flags |= Pattern.DOTALL;
720
721 pattern = Pattern.compile("a...b", flags);
722 check(pattern, "A...b", true);
723 check(pattern, "Axxxb", false);
724
725 flags |= Pattern.CANON_EQ;
726
727 Pattern p = Pattern.compile("testa\u030a", flags);
728 check(pattern, "testa\u030a", false);
729 check(pattern, "test\u00e5", false);
730
731 // Supplementary character test
732 flags = Pattern.LITERAL;
733
734 pattern = Pattern.compile(toSupplementaries("abc\\t$^"), flags);
735 check(pattern, toSupplementaries("abc\\t$^"), true);
736
737 pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\t$^")));
738 check(pattern, toSupplementaries("abc\\t$^"), true);
739
740 pattern = Pattern.compile(toSupplementaries("\\Qa^$bcabc\\E"), flags);
741 check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true);
742 check(pattern, toSupplementaries("a^$bcabc"), false);
743
744 pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qa^$bcabc\\E")));
745 check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true);
746 check(pattern, toSupplementaries("a^$bcabc"), false);
747
748 pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qabc\\Edef")));
749 check(pattern, toSupplementaries("\\Qabc\\Edef"), true);
750 check(pattern, toSupplementaries("abcdef"), false);
751
752 pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\Edef")));
753 check(pattern, toSupplementaries("abc\\Edef"), true);
754 check(pattern, toSupplementaries("abcdef"), false);
755
756 pattern = Pattern.compile(toSupplementaries("((((abc.+?:)"), flags);
757 check(pattern, toSupplementaries("((((abc.+?:)"), true);
758
759 flags |= Pattern.MULTILINE;
760
761 pattern = Pattern.compile(toSupplementaries("^cat$"), flags);
762 check(pattern, toSupplementaries("abc^cat$def"), true);
763 check(pattern, toSupplementaries("cat"), false);
764
765 flags |= Pattern.DOTALL;
766
767 // note: this is case-sensitive.
768 pattern = Pattern.compile(toSupplementaries("a...b"), flags);
769 check(pattern, toSupplementaries("a...b"), true);
770 check(pattern, toSupplementaries("axxxb"), false);
771
772 flags |= Pattern.CANON_EQ;
773
774 String t = toSupplementaries("test");
775 p = Pattern.compile(t + "a\u030a", flags);
776 check(pattern, t + "a\u030a", false);
777 check(pattern, t + "\u00e5", false);
778
779 report("Literal pattern");
780 }
781
782 // This test is for 4803179
783 // This test is also for 4808962, replacement parts
784 private static void literalReplacementTest() throws Exception {
785 int flags = Pattern.LITERAL;
786
787 Pattern pattern = Pattern.compile("abc", flags);
788 Matcher matcher = pattern.matcher("zzzabczzz");
789 String replaceTest = "$0";
790 String result = matcher.replaceAll(replaceTest);
791 if (!result.equals("zzzabczzz"))
792 failCount++;
793
794 matcher.reset();
795 String literalReplacement = matcher.quoteReplacement(replaceTest);
796 result = matcher.replaceAll(literalReplacement);
797 if (!result.equals("zzz$0zzz"))
798 failCount++;
799
800 matcher.reset();
801 replaceTest = "\\t$\\$";
802 literalReplacement = matcher.quoteReplacement(replaceTest);
803 result = matcher.replaceAll(literalReplacement);
804 if (!result.equals("zzz\\t$\\$zzz"))
805 failCount++;
806
807 // Supplementary character test
808 pattern = Pattern.compile(toSupplementaries("abc"), flags);
809 matcher = pattern.matcher(toSupplementaries("zzzabczzz"));
810 replaceTest = "$0";
811 result = matcher.replaceAll(replaceTest);
812 if (!result.equals(toSupplementaries("zzzabczzz")))
813 failCount++;
814
815 matcher.reset();
816 literalReplacement = matcher.quoteReplacement(replaceTest);
817 result = matcher.replaceAll(literalReplacement);
818 if (!result.equals(toSupplementaries("zzz$0zzz")))
819 failCount++;
820
821 matcher.reset();
822 replaceTest = "\\t$\\$";
823 literalReplacement = matcher.quoteReplacement(replaceTest);
824 result = matcher.replaceAll(literalReplacement);
825 if (!result.equals(toSupplementaries("zzz\\t$\\$zzz")))
826 failCount++;
827
828 report("Literal replacement");
829 }
830
831 // This test is for 4757029
832 private static void regionTest() throws Exception {
833 Pattern pattern = Pattern.compile("abc");
834 Matcher matcher = pattern.matcher("abcdefabc");
835
836 matcher.region(0,9);
837 if (!matcher.find())
838 failCount++;
839 if (!matcher.find())
840 failCount++;
841 matcher.region(0,3);
842 if (!matcher.find())
843 failCount++;
844 matcher.region(3,6);
845 if (matcher.find())
846 failCount++;
847 matcher.region(0,2);
848 if (matcher.find())
849 failCount++;
850
851 expectRegionFail(matcher, 1, -1);
852 expectRegionFail(matcher, -1, -1);
853 expectRegionFail(matcher, -1, 1);
854 expectRegionFail(matcher, 5, 3);
855 expectRegionFail(matcher, 5, 12);
856 expectRegionFail(matcher, 12, 12);
857
858 pattern = Pattern.compile("^abc$");
859 matcher = pattern.matcher("zzzabczzz");
860 matcher.region(0,9);
861 if (matcher.find())
862 failCount++;
863 matcher.region(3,6);
864 if (!matcher.find())
865 failCount++;
866 matcher.region(3,6);
867 matcher.useAnchoringBounds(false);
868 if (matcher.find())
869 failCount++;
870
871 // Supplementary character test
872 pattern = Pattern.compile(toSupplementaries("abc"));
873 matcher = pattern.matcher(toSupplementaries("abcdefabc"));
874 matcher.region(0,9*2);
875 if (!matcher.find())
876 failCount++;
877 if (!matcher.find())
878 failCount++;
879 matcher.region(0,3*2);
880 if (!matcher.find())
881 failCount++;
882 matcher.region(1,3*2);
883 if (matcher.find())
884 failCount++;
885 matcher.region(3*2,6*2);
886 if (matcher.find())
887 failCount++;
888 matcher.region(0,2*2);
889 if (matcher.find())
890 failCount++;
891 matcher.region(0,2*2+1);
892 if (matcher.find())
893 failCount++;
894
895 expectRegionFail(matcher, 1*2, -1);
896 expectRegionFail(matcher, -1, -1);
897 expectRegionFail(matcher, -1, 1*2);
898 expectRegionFail(matcher, 5*2, 3*2);
899 expectRegionFail(matcher, 5*2, 12*2);
900 expectRegionFail(matcher, 12*2, 12*2);
901
902 pattern = Pattern.compile(toSupplementaries("^abc$"));
903 matcher = pattern.matcher(toSupplementaries("zzzabczzz"));
904 matcher.region(0,9*2);
905 if (matcher.find())
906 failCount++;
907 matcher.region(3*2,6*2);
908 if (!matcher.find())
909 failCount++;
910 matcher.region(3*2+1,6*2);
911 if (matcher.find())
912 failCount++;
913 matcher.region(3*2,6*2-1);
914 if (matcher.find())
915 failCount++;
916 matcher.region(3*2,6*2);
917 matcher.useAnchoringBounds(false);
918 if (matcher.find())
919 failCount++;
920 report("Regions");
921 }
922
923 private static void expectRegionFail(Matcher matcher, int index1,
924 int index2)
925 {
926 try {
927 matcher.region(index1, index2);
928 failCount++;
929 } catch (IndexOutOfBoundsException ioobe) {
930 // Correct result
931 } catch (IllegalStateException ise) {
932 // Correct result
933 }
934 }
935
936 // This test is for 4803197
937 private static void escapedSegmentTest() throws Exception {
938
939 Pattern pattern = Pattern.compile("\\Qdir1\\dir2\\E");
940 check(pattern, "dir1\\dir2", true);
941
942 pattern = Pattern.compile("\\Qdir1\\dir2\\\\E");
943 check(pattern, "dir1\\dir2\\", true);
944
945 pattern = Pattern.compile("(\\Qdir1\\dir2\\\\E)");
946 check(pattern, "dir1\\dir2\\", true);
947
948 // Supplementary character test
949 pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2\\E"));
950 check(pattern, toSupplementaries("dir1\\dir2"), true);
951
952 pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2")+"\\\\E");
953 check(pattern, toSupplementaries("dir1\\dir2\\"), true);
954
955 pattern = Pattern.compile(toSupplementaries("(\\Qdir1\\dir2")+"\\\\E)");
956 check(pattern, toSupplementaries("dir1\\dir2\\"), true);
957
958 report("Escaped segment");
959 }
960
961 // This test is for 4792284
962 private static void nonCaptureRepetitionTest() throws Exception {
963 String input = "abcdefgh;";
964
965 String[] patterns = new String[] {
966 "(?:\\w{4})+;",
967 "(?:\\w{8})*;",
968 "(?:\\w{2}){2,4};",
969 "(?:\\w{4}){2,};", // only matches the
970 ".*?(?:\\w{5})+;", // specified minimum
971 ".*?(?:\\w{9})*;", // number of reps - OK
972 "(?:\\w{4})+?;", // lazy repetition - OK
973 "(?:\\w{4})++;", // possessive repetition - OK
974 "(?:\\w{2,}?)+;", // non-deterministic - OK
975 "(\\w{4})+;", // capturing group - OK
976 };
977
978 for (int i = 0; i < patterns.length; i++) {
979 // Check find()
980 check(patterns[i], 0, input, input, true);
981 // Check matches()
982 Pattern p = Pattern.compile(patterns[i]);
983 Matcher m = p.matcher(input);
984
985 if (m.matches()) {
986 if (!m.group(0).equals(input))
987 failCount++;
988 } else {
989 failCount++;
990 }
991 }
992
993 report("Non capturing repetition");
994 }
995
996 // This test is for 6358731
997 private static void notCapturedGroupCurlyMatchTest() throws Exception {
998 Pattern pattern = Pattern.compile("(abc)+|(abcd)+");
999 Matcher matcher = pattern.matcher("abcd");
1000 if (!matcher.matches() ||
1001 matcher.group(1) != null ||
1002 !matcher.group(2).equals("abcd")) {
1003 failCount++;
1004 }
1005 report("Not captured GroupCurly");
1006 }
1007
1008 // This test is for 4706545
1009 private static void javaCharClassTest() throws Exception {
1010 for (int i=0; i<1000; i++) {
1011 char c = (char)generator.nextInt();
1012 check("{javaLowerCase}", c, Character.isLowerCase(c));
1013 check("{javaUpperCase}", c, Character.isUpperCase(c));
1014 check("{javaUpperCase}+", c, Character.isUpperCase(c));
1015 check("{javaTitleCase}", c, Character.isTitleCase(c));
1016 check("{javaDigit}", c, Character.isDigit(c));
1017 check("{javaDefined}", c, Character.isDefined(c));
1018 check("{javaLetter}", c, Character.isLetter(c));
1019 check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c));
1020 check("{javaJavaIdentifierStart}", c,
1021 Character.isJavaIdentifierStart(c));
1022 check("{javaJavaIdentifierPart}", c,
1023 Character.isJavaIdentifierPart(c));
1024 check("{javaUnicodeIdentifierStart}", c,
1025 Character.isUnicodeIdentifierStart(c));
1026 check("{javaUnicodeIdentifierPart}", c,
1027 Character.isUnicodeIdentifierPart(c));
1028 check("{javaIdentifierIgnorable}", c,
1029 Character.isIdentifierIgnorable(c));
1030 check("{javaSpaceChar}", c, Character.isSpaceChar(c));
1031 check("{javaWhitespace}", c, Character.isWhitespace(c));
1032 check("{javaISOControl}", c, Character.isISOControl(c));
1033 check("{javaMirrored}", c, Character.isMirrored(c));
1034
1035 }
1036
1037 // Supplementary character test
1038 for (int i=0; i<1000; i++) {
1039 int c = generator.nextInt(Character.MAX_CODE_POINT
1040 - Character.MIN_SUPPLEMENTARY_CODE_POINT)
1041 + Character.MIN_SUPPLEMENTARY_CODE_POINT;
1042 check("{javaLowerCase}", c, Character.isLowerCase(c));
1043 check("{javaUpperCase}", c, Character.isUpperCase(c));
1044 check("{javaUpperCase}+", c, Character.isUpperCase(c));
1045 check("{javaTitleCase}", c, Character.isTitleCase(c));
1046 check("{javaDigit}", c, Character.isDigit(c));
1047 check("{javaDefined}", c, Character.isDefined(c));
1048 check("{javaLetter}", c, Character.isLetter(c));
1049 check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c));
1050 check("{javaJavaIdentifierStart}", c,
1051 Character.isJavaIdentifierStart(c));
1052 check("{javaJavaIdentifierPart}", c,
1053 Character.isJavaIdentifierPart(c));
1054 check("{javaUnicodeIdentifierStart}", c,
1055 Character.isUnicodeIdentifierStart(c));
1056 check("{javaUnicodeIdentifierPart}", c,
1057 Character.isUnicodeIdentifierPart(c));
1058 check("{javaIdentifierIgnorable}", c,
1059 Character.isIdentifierIgnorable(c));
1060 check("{javaSpaceChar}", c, Character.isSpaceChar(c));
1061 check("{javaWhitespace}", c, Character.isWhitespace(c));
1062 check("{javaISOControl}", c, Character.isISOControl(c));
1063 check("{javaMirrored}", c, Character.isMirrored(c));
1064 }
1065
1066 report("Java character classes");
1067 }
1068
1069 // This test is for 4523620
1070 /*
1071 private static void numOccurrencesTest() throws Exception {
1072 Pattern pattern = Pattern.compile("aaa");
1073
1074 if (pattern.numOccurrences("aaaaaa", false) != 2)
1075 failCount++;
1076 if (pattern.numOccurrences("aaaaaa", true) != 4)
1077 failCount++;
1078
1079 pattern = Pattern.compile("^");
1080 if (pattern.numOccurrences("aaaaaa", false) != 1)
1081 failCount++;
1082 if (pattern.numOccurrences("aaaaaa", true) != 1)
1083 failCount++;
1084
1085 report("Number of Occurrences");
1086 }
1087 */
1088
1089 // This test is for 4776374
1090 private static void caretBetweenTerminatorsTest() throws Exception {
1091 int flags1 = Pattern.DOTALL;
1092 int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES;
1093 int flags3 = Pattern.DOTALL | Pattern.UNIX_LINES | Pattern.MULTILINE;
1094 int flags4 = Pattern.DOTALL | Pattern.MULTILINE;
1095
1096 check("^....", flags1, "test\ntest", "test", true);
1097 check(".....^", flags1, "test\ntest", "test", false);
1098 check(".....^", flags1, "test\n", "test", false);
1099 check("....^", flags1, "test\r\n", "test", false);
1100
1101 check("^....", flags2, "test\ntest", "test", true);
1102 check("....^", flags2, "test\ntest", "test", false);
1103 check(".....^", flags2, "test\n", "test", false);
1104 check("....^", flags2, "test\r\n", "test", false);
1105
1106 check("^....", flags3, "test\ntest", "test", true);
1107 check(".....^", flags3, "test\ntest", "test\n", true);
1108 check(".....^", flags3, "test\u0085test", "test\u0085", false);
1109 check(".....^", flags3, "test\n", "test", false);
1110 check(".....^", flags3, "test\r\n", "test", false);
1111 check("......^", flags3, "test\r\ntest", "test\r\n", true);
1112
1113 check("^....", flags4, "test\ntest", "test", true);
1114 check(".....^", flags3, "test\ntest", "test\n", true);
1115 check(".....^", flags4, "test\u0085test", "test\u0085", true);
1116 check(".....^", flags4, "test\n", "test\n", false);
1117 check(".....^", flags4, "test\r\n", "test\r", false);
1118
1119 // Supplementary character test
1120 String t = toSupplementaries("test");
1121 check("^....", flags1, t+"\n"+t, t, true);
1122 check(".....^", flags1, t+"\n"+t, t, false);
1123 check(".....^", flags1, t+"\n", t, false);
1124 check("....^", flags1, t+"\r\n", t, false);
1125
1126 check("^....", flags2, t+"\n"+t, t, true);
1127 check("....^", flags2, t+"\n"+t, t, false);
1128 check(".....^", flags2, t+"\n", t, false);
1129 check("....^", flags2, t+"\r\n", t, false);
1130
1131 check("^....", flags3, t+"\n"+t, t, true);
1132 check(".....^", flags3, t+"\n"+t, t+"\n", true);
1133 check(".....^", flags3, t+"\u0085"+t, t+"\u0085", false);
1134 check(".....^", flags3, t+"\n", t, false);
1135 check(".....^", flags3, t+"\r\n", t, false);
1136 check("......^", flags3, t+"\r\n"+t, t+"\r\n", true);
1137
1138 check("^....", flags4, t+"\n"+t, t, true);
1139 check(".....^", flags3, t+"\n"+t, t+"\n", true);
1140 check(".....^", flags4, t+"\u0085"+t, t+"\u0085", true);
1141 check(".....^", flags4, t+"\n", t+"\n", false);
1142 check(".....^", flags4, t+"\r\n", t+"\r", false);
1143
1144 report("Caret between terminators");
1145 }
1146
1147 // This test is for 4727935
1148 private static void dollarAtEndTest() throws Exception {
1149 int flags1 = Pattern.DOTALL;
1150 int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES;
1151 int flags3 = Pattern.DOTALL | Pattern.MULTILINE;
1152
1153 check("....$", flags1, "test\n", "test", true);
1154 check("....$", flags1, "test\r\n", "test", true);
1155 check(".....$", flags1, "test\n", "test\n", true);
1156 check(".....$", flags1, "test\u0085", "test\u0085", true);
1157 check("....$", flags1, "test\u0085", "test", true);
1158
1159 check("....$", flags2, "test\n", "test", true);
1160 check(".....$", flags2, "test\n", "test\n", true);
1161 check(".....$", flags2, "test\u0085", "test\u0085", true);
1162 check("....$", flags2, "test\u0085", "est\u0085", true);
1163
1164 check("....$.blah", flags3, "test\nblah", "test\nblah", true);
1165 check(".....$.blah", flags3, "test\n\nblah", "test\n\nblah", true);
1166 check("....$blah", flags3, "test\nblah", "!!!!", false);
1167 check(".....$blah", flags3, "test\nblah", "!!!!", false);
1168
1169 // Supplementary character test
1170 String t = toSupplementaries("test");
1171 String b = toSupplementaries("blah");
1172 check("....$", flags1, t+"\n", t, true);
1173 check("....$", flags1, t+"\r\n", t, true);
1174 check(".....$", flags1, t+"\n", t+"\n", true);
1175 check(".....$", flags1, t+"\u0085", t+"\u0085", true);
1176 check("....$", flags1, t+"\u0085", t, true);
1177
1178 check("....$", flags2, t+"\n", t, true);
1179 check(".....$", flags2, t+"\n", t+"\n", true);
1180 check(".....$", flags2, t+"\u0085", t+"\u0085", true);
1181 check("....$", flags2, t+"\u0085", toSupplementaries("est\u0085"), true);
1182
1183 check("....$."+b, flags3, t+"\n"+b, t+"\n"+b, true);
1184 check(".....$."+b, flags3, t+"\n\n"+b, t+"\n\n"+b, true);
1185 check("....$"+b, flags3, t+"\n"+b, "!!!!", false);
1186 check(".....$"+b, flags3, t+"\n"+b, "!!!!", false);
1187
1188 report("Dollar at End");
1189 }
1190
1191 // This test is for 4711773
1192 private static void multilineDollarTest() throws Exception {
1193 Pattern findCR = Pattern.compile("$", Pattern.MULTILINE);
1194 Matcher matcher = findCR.matcher("first bit\nsecond bit");
1195 matcher.find();
1196 if (matcher.start(0) != 9)
1197 failCount++;
1198 matcher.find();
1199 if (matcher.start(0) != 20)
1200 failCount++;
1201
1202 // Supplementary character test
1203 matcher = findCR.matcher(toSupplementaries("first bit\n second bit")); // double BMP chars
1204 matcher.find();
1205 if (matcher.start(0) != 9*2)
1206 failCount++;
1207 matcher.find();
1208 if (matcher.start(0) != 20*2)
1209 failCount++;
1210
1211 report("Multiline Dollar");
1212 }
1213
1214 private static void reluctantRepetitionTest() throws Exception {
1215 Pattern p = Pattern.compile("1(\\s\\S+?){1,3}?[\\s,]2");
1216 check(p, "1 word word word 2", true);
1217 check(p, "1 wor wo w 2", true);
1218 check(p, "1 word word 2", true);
1219 check(p, "1 word 2", true);
1220 check(p, "1 wo w w 2", true);
1221 check(p, "1 wo w 2", true);
1222 check(p, "1 wor w 2", true);
1223
1224 p = Pattern.compile("([a-z])+?c");
1225 Matcher m = p.matcher("ababcdefdec");
1226 check(m, "ababc");
1227
1228 // Supplementary character test
1229 p = Pattern.compile(toSupplementaries("([a-z])+?c"));
1230 m = p.matcher(toSupplementaries("ababcdefdec"));
1231 check(m, toSupplementaries("ababc"));
1232
1233 report("Reluctant Repetition");
1234 }
1235
1236 private static void serializeTest() throws Exception {
1237 String patternStr = "(b)";
1238 String matchStr = "b";
1239 Pattern pattern = Pattern.compile(patternStr);
1240 ByteArrayOutputStream baos = new ByteArrayOutputStream();
1241 ObjectOutputStream oos = new ObjectOutputStream(baos);
1242 oos.writeObject(pattern);
1243 oos.close();
1244 ObjectInputStream ois = new ObjectInputStream(
1245 new ByteArrayInputStream(baos.toByteArray()));
1246 Pattern serializedPattern = (Pattern)ois.readObject();
1247 ois.close();
1248 Matcher matcher = serializedPattern.matcher(matchStr);
1249 if (!matcher.matches())
1250 failCount++;
1251 if (matcher.groupCount() != 1)
1252 failCount++;
1253
1254 report("Serialization");
1255 }
1256
1257 private static void gTest() {
1258 Pattern pattern = Pattern.compile("\\G\\w");
1259 Matcher matcher = pattern.matcher("abc#x#x");
1260 matcher.find();
1261 matcher.find();
1262 matcher.find();
1263 if (matcher.find())
1264 failCount++;
1265
1266 pattern = Pattern.compile("\\GA*");
1267 matcher = pattern.matcher("1A2AA3");
1268 matcher.find();
1269 if (matcher.find())
1270 failCount++;
1271
1272 pattern = Pattern.compile("\\GA*");
1273 matcher = pattern.matcher("1A2AA3");
1274 if (!matcher.find(1))
1275 failCount++;
1276 matcher.find();
1277 if (matcher.find())
1278 failCount++;
1279
1280 report("\\G");
1281 }
1282
1283 private static void zTest() {
1284 Pattern pattern = Pattern.compile("foo\\Z");
1285 // Positives
1286 check(pattern, "foo\u0085", true);
1287 check(pattern, "foo\u2028", true);
1288 check(pattern, "foo\u2029", true);
1289 check(pattern, "foo\n", true);
1290 check(pattern, "foo\r", true);
1291 check(pattern, "foo\r\n", true);
1292 // Negatives
1293 check(pattern, "fooo", false);
1294 check(pattern, "foo\n\r", false);
1295
1296 pattern = Pattern.compile("foo\\Z", Pattern.UNIX_LINES);
1297 // Positives
1298 check(pattern, "foo", true);
1299 check(pattern, "foo\n", true);
1300 // Negatives
1301 check(pattern, "foo\r", false);
1302 check(pattern, "foo\u0085", false);
1303 check(pattern, "foo\u2028", false);
1304 check(pattern, "foo\u2029", false);
1305
1306 report("\\Z");
1307 }
1308
1309 private static void replaceFirstTest() {
1310 Pattern pattern = Pattern.compile("(ab)(c*)");
1311 Matcher matcher = pattern.matcher("abccczzzabcczzzabccc");
1312 if (!matcher.replaceFirst("test").equals("testzzzabcczzzabccc"))
1313 failCount++;
1314
1315 matcher.reset("zzzabccczzzabcczzzabccczzz");
1316 if (!matcher.replaceFirst("test").equals("zzztestzzzabcczzzabccczzz"))
1317 failCount++;
1318
1319 matcher.reset("zzzabccczzzabcczzzabccczzz");
1320 String result = matcher.replaceFirst("$1");
1321 if (!result.equals("zzzabzzzabcczzzabccczzz"))
1322 failCount++;
1323
1324 matcher.reset("zzzabccczzzabcczzzabccczzz");
1325 result = matcher.replaceFirst("$2");
1326 if (!result.equals("zzzccczzzabcczzzabccczzz"))
1327 failCount++;
1328
1329 pattern = Pattern.compile("a*");
1330 matcher = pattern.matcher("aaaaaaaaaa");
1331 if (!matcher.replaceFirst("test").equals("test"))
1332 failCount++;
1333
1334 pattern = Pattern.compile("a+");
1335 matcher = pattern.matcher("zzzaaaaaaaaaa");
1336 if (!matcher.replaceFirst("test").equals("zzztest"))
1337 failCount++;
1338
1339 // Supplementary character test
1340 pattern = Pattern.compile(toSupplementaries("(ab)(c*)"));
1341 matcher = pattern.matcher(toSupplementaries("abccczzzabcczzzabccc"));
1342 if (!matcher.replaceFirst(toSupplementaries("test"))
1343 .equals(toSupplementaries("testzzzabcczzzabccc")))
1344 failCount++;
1345
1346 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
1347 if (!matcher.replaceFirst(toSupplementaries("test")).
1348 equals(toSupplementaries("zzztestzzzabcczzzabccczzz")))
1349 failCount++;
1350
1351 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
1352 result = matcher.replaceFirst("$1");
1353 if (!result.equals(toSupplementaries("zzzabzzzabcczzzabccczzz")))
1354 failCount++;
1355
1356 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
1357 result = matcher.replaceFirst("$2");
1358 if (!result.equals(toSupplementaries("zzzccczzzabcczzzabccczzz")))
1359 failCount++;
1360
1361 pattern = Pattern.compile(toSupplementaries("a*"));
1362 matcher = pattern.matcher(toSupplementaries("aaaaaaaaaa"));
1363 if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("test")))
1364 failCount++;
1365
1366 pattern = Pattern.compile(toSupplementaries("a+"));
1367 matcher = pattern.matcher(toSupplementaries("zzzaaaaaaaaaa"));
1368 if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("zzztest")))
1369 failCount++;
1370
1371 report("Replace First");
1372 }
1373
1374 private static void unixLinesTest() {
1375 Pattern pattern = Pattern.compile(".*");
1376 Matcher matcher = pattern.matcher("aa\u2028blah");
1377 matcher.find();
1378 if (!matcher.group(0).equals("aa"))
1379 failCount++;
1380
1381 pattern = Pattern.compile(".*", Pattern.UNIX_LINES);
1382 matcher = pattern.matcher("aa\u2028blah");
1383 matcher.find();
1384 if (!matcher.group(0).equals("aa\u2028blah"))
1385 failCount++;
1386
1387 pattern = Pattern.compile("[az]$",
1388 Pattern.MULTILINE | Pattern.UNIX_LINES);
1389 matcher = pattern.matcher("aa\u2028zz");
1390 check(matcher, "a\u2028", false);
1391
1392 // Supplementary character test
1393 pattern = Pattern.compile(".*");
1394 matcher = pattern.matcher(toSupplementaries("aa\u2028blah"));
1395 matcher.find();
1396 if (!matcher.group(0).equals(toSupplementaries("aa")))
1397 failCount++;
1398
1399 pattern = Pattern.compile(".*", Pattern.UNIX_LINES);
1400 matcher = pattern.matcher(toSupplementaries("aa\u2028blah"));
1401 matcher.find();
1402 if (!matcher.group(0).equals(toSupplementaries("aa\u2028blah")))
1403 failCount++;
1404
1405 pattern = Pattern.compile(toSupplementaries("[az]$"),
1406 Pattern.MULTILINE | Pattern.UNIX_LINES);
1407 matcher = pattern.matcher(toSupplementaries("aa\u2028zz"));
1408 check(matcher, toSupplementaries("a\u2028"), false);
1409
1410 report("Unix Lines");
1411 }
1412
1413 private static void commentsTest() {
1414 int flags = Pattern.COMMENTS;
1415
1416 Pattern pattern = Pattern.compile("aa \\# aa", flags);
1417 Matcher matcher = pattern.matcher("aa#aa");
1418 if (!matcher.matches())
1419 failCount++;
1420
1421 pattern = Pattern.compile("aa # blah", flags);
1422 matcher = pattern.matcher("aa");
1423 if (!matcher.matches())
1424 failCount++;
1425
1426 pattern = Pattern.compile("aa blah", flags);
1427 matcher = pattern.matcher("aablah");
1428 if (!matcher.matches())
1429 failCount++;
1430
1431 pattern = Pattern.compile("aa # blah blech ", flags);
1432 matcher = pattern.matcher("aa");
1433 if (!matcher.matches())
1434 failCount++;
1435
1436 pattern = Pattern.compile("aa # blah\n ", flags);
1437 matcher = pattern.matcher("aa");
1438 if (!matcher.matches())
1439 failCount++;
1440
1441 pattern = Pattern.compile("aa # blah\nbc # blech", flags);
1442 matcher = pattern.matcher("aabc");
1443 if (!matcher.matches())
1444 failCount++;
1445
1446 pattern = Pattern.compile("aa # blah\nbc# blech", flags);
1447 matcher = pattern.matcher("aabc");
1448 if (!matcher.matches())
1449 failCount++;
1450
1451 pattern = Pattern.compile("aa # blah\nbc\\# blech", flags);
1452 matcher = pattern.matcher("aabc#blech");
1453 if (!matcher.matches())
1454 failCount++;
1455
1456 // Supplementary character test
1457 pattern = Pattern.compile(toSupplementaries("aa \\# aa"), flags);
1458 matcher = pattern.matcher(toSupplementaries("aa#aa"));
1459 if (!matcher.matches())
1460 failCount++;
1461
1462 pattern = Pattern.compile(toSupplementaries("aa # blah"), flags);
1463 matcher = pattern.matcher(toSupplementaries("aa"));
1464 if (!matcher.matches())
1465 failCount++;
1466
1467 pattern = Pattern.compile(toSupplementaries("aa blah"), flags);
1468 matcher = pattern.matcher(toSupplementaries("aablah"));
1469 if (!matcher.matches())
1470 failCount++;
1471
1472 pattern = Pattern.compile(toSupplementaries("aa # blah blech "), flags);
1473 matcher = pattern.matcher(toSupplementaries("aa"));
1474 if (!matcher.matches())
1475 failCount++;
1476
1477 pattern = Pattern.compile(toSupplementaries("aa # blah\n "), flags);
1478 matcher = pattern.matcher(toSupplementaries("aa"));
1479 if (!matcher.matches())
1480 failCount++;
1481
1482 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc # blech"), flags);
1483 matcher = pattern.matcher(toSupplementaries("aabc"));
1484 if (!matcher.matches())
1485 failCount++;
1486
1487 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc# blech"), flags);
1488 matcher = pattern.matcher(toSupplementaries("aabc"));
1489 if (!matcher.matches())
1490 failCount++;
1491
1492 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc\\# blech"), flags);
1493 matcher = pattern.matcher(toSupplementaries("aabc#blech"));
1494 if (!matcher.matches())
1495 failCount++;
1496
1497 report("Comments");
1498 }
1499
1500 private static void caseFoldingTest() { // bug 4504687
1501 int flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE;
1502 Pattern pattern = Pattern.compile("aa", flags);
1503 Matcher matcher = pattern.matcher("ab");
1504 if (matcher.matches())
1505 failCount++;
1506
1507 pattern = Pattern.compile("aA", flags);
1508 matcher = pattern.matcher("ab");
1509 if (matcher.matches())
1510 failCount++;
1511
1512 pattern = Pattern.compile("aa", flags);
1513 matcher = pattern.matcher("aB");
1514 if (matcher.matches())
1515 failCount++;
1516 matcher = pattern.matcher("Ab");
1517 if (matcher.matches())
1518 failCount++;
1519
1520 // ASCII "a"
1521 // Latin-1 Supplement "a" + grave
1522 // Cyrillic "a"
1523 String[] patterns = new String[] {
1524 //single
1525 "a", "\u00e0", "\u0430",
1526 //slice
1527 "ab", "\u00e0\u00e1", "\u0430\u0431",
1528 //class single
1529 "[a]", "[\u00e0]", "[\u0430]",
1530 //class range
1531 "[a-b]", "[\u00e0-\u00e5]", "[\u0430-\u0431]",
1532 //back reference
1533 "(a)\\1", "(\u00e0)\\1", "(\u0430)\\1"
1534 };
1535
1536 String[] texts = new String[] {
1537 "A", "\u00c0", "\u0410",
1538 "AB", "\u00c0\u00c1", "\u0410\u0411",
1539 "A", "\u00c0", "\u0410",
1540 "B", "\u00c2", "\u0411",
1541 "aA", "\u00e0\u00c0", "\u0430\u0410"
1542 };
1543
1544 boolean[] expected = new boolean[] {
1545 true, false, false,
1546 true, false, false,
1547 true, false, false,
1548 true, false, false,
1549 true, false, false
1550 };
1551
1552 flags = Pattern.CASE_INSENSITIVE;
1553 for (int i = 0; i < patterns.length; i++) {
1554 pattern = Pattern.compile(patterns[i], flags);
1555 matcher = pattern.matcher(texts[i]);
1556 if (matcher.matches() != expected[i]) {
1557 System.out.println("<1> Failed at " + i);
1558 failCount++;
1559 }
1560 }
1561
1562 flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE;
1563 for (int i = 0; i < patterns.length; i++) {
1564 pattern = Pattern.compile(patterns[i], flags);
1565 matcher = pattern.matcher(texts[i]);
1566 if (!matcher.matches()) {
1567 System.out.println("<2> Failed at " + i);
1568 failCount++;
1569 }
1570 }
1571 // flag unicode_case alone should do nothing
1572 flags = Pattern.UNICODE_CASE;
1573 for (int i = 0; i < patterns.length; i++) {
1574 pattern = Pattern.compile(patterns[i], flags);
1575 matcher = pattern.matcher(texts[i]);
1576 if (matcher.matches()) {
1577 System.out.println("<3> Failed at " + i);
1578 failCount++;
1579 }
1580 }
1581
1582 // Special cases: i, I, u+0131 and u+0130
1583 flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE;
1584 pattern = Pattern.compile("[h-j]+", flags);
1585 if (!pattern.matcher("\u0131\u0130").matches())
1586 failCount++;
1587 report("Case Folding");
1588 }
1589
1590 private static void appendTest() {
1591 Pattern pattern = Pattern.compile("(ab)(cd)");
1592 Matcher matcher = pattern.matcher("abcd");
1593 String result = matcher.replaceAll("$2$1");
1594 if (!result.equals("cdab"))
1595 failCount++;
1596
1597 String s1 = "Swap all: first = 123, second = 456";
1598 String s2 = "Swap one: first = 123, second = 456";
1599 String r = "$3$2$1";
1600 pattern = Pattern.compile("([a-z]+)( *= *)([0-9]+)");
1601 matcher = pattern.matcher(s1);
1602
1603 result = matcher.replaceAll(r);
1604 if (!result.equals("Swap all: 123 = first, 456 = second"))
1605 failCount++;
1606
1607 matcher = pattern.matcher(s2);
1608
1609 if (matcher.find()) {
1610 StringBuffer sb = new StringBuffer();
1611 matcher.appendReplacement(sb, r);
1612 matcher.appendTail(sb);
1613 result = sb.toString();
1614 if (!result.equals("Swap one: 123 = first, second = 456"))
1615 failCount++;
1616 }
1617
1618 // Supplementary character test
1619 pattern = Pattern.compile(toSupplementaries("(ab)(cd)"));
1620 matcher = pattern.matcher(toSupplementaries("abcd"));
1621 result = matcher.replaceAll("$2$1");
1622 if (!result.equals(toSupplementaries("cdab")))
1623 failCount++;
1624
1625 s1 = toSupplementaries("Swap all: first = 123, second = 456");
1626 s2 = toSupplementaries("Swap one: first = 123, second = 456");
1627 r = toSupplementaries("$3$2$1");
1628 pattern = Pattern.compile(toSupplementaries("([a-z]+)( *= *)([0-9]+)"));
1629 matcher = pattern.matcher(s1);
1630
1631 result = matcher.replaceAll(r);
1632 if (!result.equals(toSupplementaries("Swap all: 123 = first, 456 = second")))
1633 failCount++;
1634
1635 matcher = pattern.matcher(s2);
1636
1637 if (matcher.find()) {
1638 StringBuffer sb = new StringBuffer();
1639 matcher.appendReplacement(sb, r);
1640 matcher.appendTail(sb);
1641 result = sb.toString();
1642 if (!result.equals(toSupplementaries("Swap one: 123 = first, second = 456")))
1643 failCount++;
1644 }
1645 report("Append");
1646 }
1647
1648 private static void splitTest() {
1649 Pattern pattern = Pattern.compile(":");
1650 String[] result = pattern.split("foo:and:boo", 2);
1651 if (!result[0].equals("foo"))
1652 failCount++;
1653 if (!result[1].equals("and:boo"))
1654 failCount++;
1655 // Supplementary character test
1656 Pattern patternX = Pattern.compile(toSupplementaries("X"));
1657 result = patternX.split(toSupplementaries("fooXandXboo"), 2);
1658 if (!result[0].equals(toSupplementaries("foo")))
1659 failCount++;
1660 if (!result[1].equals(toSupplementaries("andXboo")))
1661 failCount++;
1662
1663 CharBuffer cb = CharBuffer.allocate(100);
1664 cb.put("foo:and:boo");
1665 cb.flip();
1666 result = pattern.split(cb);
1667 if (!result[0].equals("foo"))
1668 failCount++;
1669 if (!result[1].equals("and"))
1670 failCount++;
1671 if (!result[2].equals("boo"))
1672 failCount++;
1673
1674 // Supplementary character test
1675 CharBuffer cbs = CharBuffer.allocate(100);
1676 cbs.put(toSupplementaries("fooXandXboo"));
1677 cbs.flip();
1678 result = patternX.split(cbs);
1679 if (!result[0].equals(toSupplementaries("foo")))
1680 failCount++;
1681 if (!result[1].equals(toSupplementaries("and")))
1682 failCount++;
1683 if (!result[2].equals(toSupplementaries("boo")))
1684 failCount++;
1685
1686 String source = "0123456789";
1687 for (int limit=-2; limit<3; limit++) {
1688 for (int x=0; x<10; x++) {
1689 result = source.split(Integer.toString(x), limit);
1690 int expectedLength = limit < 1 ? 2 : limit;
1691
1692 if ((limit == 0) && (x == 9)) {
1693 // expected dropping of ""
1694 if (result.length != 1)
1695 failCount++;
1696 if (!result[0].equals("012345678")) {
1697 failCount++;
1698 }
1699 } else {
1700 if (result.length != expectedLength) {
1701 failCount++;
1702 }
1703 if (!result[0].equals(source.substring(0,x))) {
1704 if (limit != 1) {
1705 failCount++;
1706 } else {
1707 if (!result[0].equals(source.substring(0,10))) {
1708 failCount++;
1709 }
1710 }
1711 }
1712 if (expectedLength > 1) { // Check segment 2
1713 if (!result[1].equals(source.substring(x+1,10)))
1714 failCount++;
1715 }
1716 }
1717 }
1718 }
1719 // Check the case for no match found
1720 for (int limit=-2; limit<3; limit++) {
1721 result = source.split("e", limit);
1722 if (result.length != 1)
1723 failCount++;
1724 if (!result[0].equals(source))
1725 failCount++;
1726 }
1727 // Check the case for limit == 0, source = "";
1728 source = "";
1729 result = source.split("e", 0);
1730 if (result.length != 1)
1731 failCount++;
1732 if (!result[0].equals(source))
1733 failCount++;
1734
1735 report("Split");
1736 }
1737
1738 private static void negationTest() {
1739 Pattern pattern = Pattern.compile("[\\[@^]+");
1740 Matcher matcher = pattern.matcher("@@@@[[[[^^^^");
1741 if (!matcher.find())
1742 failCount++;
1743 if (!matcher.group(0).equals("@@@@[[[[^^^^"))
1744 failCount++;
1745 pattern = Pattern.compile("[@\\[^]+");
1746 matcher = pattern.matcher("@@@@[[[[^^^^");
1747 if (!matcher.find())
1748 failCount++;
1749 if (!matcher.group(0).equals("@@@@[[[[^^^^"))
1750 failCount++;
1751 pattern = Pattern.compile("[@\\[^@]+");
1752 matcher = pattern.matcher("@@@@[[[[^^^^");
1753 if (!matcher.find())
1754 failCount++;
1755 if (!matcher.group(0).equals("@@@@[[[[^^^^"))
1756 failCount++;
1757
1758 pattern = Pattern.compile("\\)");
1759 matcher = pattern.matcher("xxx)xxx");
1760 if (!matcher.find())
1761 failCount++;
1762
1763 report("Negation");
1764 }
1765
1766 private static void ampersandTest() {
1767 Pattern pattern = Pattern.compile("[&@]+");
1768 check(pattern, "@@@@&&&&", true);
1769
1770 pattern = Pattern.compile("[@&]+");
1771 check(pattern, "@@@@&&&&", true);
1772
1773 pattern = Pattern.compile("[@\\&]+");
1774 check(pattern, "@@@@&&&&", true);
1775
1776 report("Ampersand");
1777 }
1778
1779 private static void octalTest() throws Exception {
1780 Pattern pattern = Pattern.compile("\\u0007");
1781 Matcher matcher = pattern.matcher("\u0007");
1782 if (!matcher.matches())
1783 failCount++;
1784 pattern = Pattern.compile("\\07");
1785 matcher = pattern.matcher("\u0007");
1786 if (!matcher.matches())
1787 failCount++;
1788 pattern = Pattern.compile("\\007");
1789 matcher = pattern.matcher("\u0007");
1790 if (!matcher.matches())
1791 failCount++;
1792 pattern = Pattern.compile("\\0007");
1793 matcher = pattern.matcher("\u0007");
1794 if (!matcher.matches())
1795 failCount++;
1796 pattern = Pattern.compile("\\040");
1797 matcher = pattern.matcher("\u0020");
1798 if (!matcher.matches())
1799 failCount++;
1800 pattern = Pattern.compile("\\0403");
1801 matcher = pattern.matcher("\u00203");
1802 if (!matcher.matches())
1803 failCount++;
1804 pattern = Pattern.compile("\\0103");
1805 matcher = pattern.matcher("\u0043");
1806 if (!matcher.matches())
1807 failCount++;
1808
1809 report("Octal");
1810 }
1811
1812 private static void longPatternTest() throws Exception {
1813 try {
1814 Pattern pattern = Pattern.compile(
1815 "a 32-character-long pattern xxxx");
1816 pattern = Pattern.compile("a 33-character-long pattern xxxxx");
1817 pattern = Pattern.compile("a thirty four character long regex");
1818 StringBuffer patternToBe = new StringBuffer(101);
1819 for (int i=0; i<100; i++)
1820 patternToBe.append((char)(97 + i%26));
1821 pattern = Pattern.compile(patternToBe.toString());
1822 } catch (PatternSyntaxException e) {
1823 failCount++;
1824 }
1825
1826 // Supplementary character test
1827 try {
1828 Pattern pattern = Pattern.compile(
1829 toSupplementaries("a 32-character-long pattern xxxx"));
1830 pattern = Pattern.compile(toSupplementaries("a 33-character-long pattern xxxxx"));
1831 pattern = Pattern.compile(toSupplementaries("a thirty four character long regex"));
1832 StringBuffer patternToBe = new StringBuffer(101*2);
1833 for (int i=0; i<100; i++)
1834 patternToBe.append(Character.toChars(Character.MIN_SUPPLEMENTARY_CODE_POINT
1835 + 97 + i%26));
1836 pattern = Pattern.compile(patternToBe.toString());
1837 } catch (PatternSyntaxException e) {
1838 failCount++;
1839 }
1840 report("LongPattern");
1841 }
1842
1843 private static void group0Test() throws Exception {
1844 Pattern pattern = Pattern.compile("(tes)ting");
1845 Matcher matcher = pattern.matcher("testing");
1846 check(matcher, "testing");
1847
1848 matcher.reset("testing");
1849 if (matcher.lookingAt()) {
1850 if (!matcher.group(0).equals("testing"))
1851 failCount++;
1852 } else {
1853 failCount++;
1854 }
1855
1856 matcher.reset("testing");
1857 if (matcher.matches()) {
1858 if (!matcher.group(0).equals("testing"))
1859 failCount++;
1860 } else {
1861 failCount++;
1862 }
1863
1864 pattern = Pattern.compile("(tes)ting");
1865 matcher = pattern.matcher("testing");
1866 if (matcher.lookingAt()) {
1867 if (!matcher.group(0).equals("testing"))
1868 failCount++;
1869 } else {
1870 failCount++;
1871 }
1872
1873 pattern = Pattern.compile("^(tes)ting");
1874 matcher = pattern.matcher("testing");
1875 if (matcher.matches()) {
1876 if (!matcher.group(0).equals("testing"))
1877 failCount++;
1878 } else {
1879 failCount++;
1880 }
1881
1882 // Supplementary character test
1883 pattern = Pattern.compile(toSupplementaries("(tes)ting"));
1884 matcher = pattern.matcher(toSupplementaries("testing"));
1885 check(matcher, toSupplementaries("testing"));
1886
1887 matcher.reset(toSupplementaries("testing"));
1888 if (matcher.lookingAt()) {
1889 if (!matcher.group(0).equals(toSupplementaries("testing")))
1890 failCount++;
1891 } else {
1892 failCount++;
1893 }
1894
1895 matcher.reset(toSupplementaries("testing"));
1896 if (matcher.matches()) {
1897 if (!matcher.group(0).equals(toSupplementaries("testing")))
1898 failCount++;
1899 } else {
1900 failCount++;
1901 }
1902
1903 pattern = Pattern.compile(toSupplementaries("(tes)ting"));
1904 matcher = pattern.matcher(toSupplementaries("testing"));
1905 if (matcher.lookingAt()) {
1906 if (!matcher.group(0).equals(toSupplementaries("testing")))
1907 failCount++;
1908 } else {
1909 failCount++;
1910 }
1911
1912 pattern = Pattern.compile(toSupplementaries("^(tes)ting"));
1913 matcher = pattern.matcher(toSupplementaries("testing"));
1914 if (matcher.matches()) {
1915 if (!matcher.group(0).equals(toSupplementaries("testing")))
1916 failCount++;
1917 } else {
1918 failCount++;
1919 }
1920
1921 report("Group0");
1922 }
1923
1924 private static void findIntTest() throws Exception {
1925 Pattern p = Pattern.compile("blah");
1926 Matcher m = p.matcher("zzzzblahzzzzzblah");
1927 boolean result = m.find(2);
1928 if (!result)
1929 failCount++;
1930
1931 p = Pattern.compile("$");
1932 m = p.matcher("1234567890");
1933 result = m.find(10);
1934 if (!result)
1935 failCount++;
1936 try {
1937 result = m.find(11);
1938 failCount++;
1939 } catch (IndexOutOfBoundsException e) {
1940 // correct result
1941 }
1942
1943 // Supplementary character test
1944 p = Pattern.compile(toSupplementaries("blah"));
1945 m = p.matcher(toSupplementaries("zzzzblahzzzzzblah"));
1946 result = m.find(2);
1947 if (!result)
1948 failCount++;
1949
1950 report("FindInt");
1951 }
1952
1953 private static void emptyPatternTest() throws Exception {
1954 Pattern p = Pattern.compile("");
1955 Matcher m = p.matcher("foo");
1956
1957 // Should find empty pattern at beginning of input
1958 boolean result = m.find();
1959 if (result != true)
1960 failCount++;
1961 if (m.start() != 0)
1962 failCount++;
1963
1964 // Should not match entire input if input is not empty
1965 m.reset();
1966 result = m.matches();
1967 if (result == true)
1968 failCount++;
1969
1970 try {
1971 m.start(0);
1972 failCount++;
1973 } catch (IllegalStateException e) {
1974 // Correct result
1975 }
1976
1977 // Should match entire input if input is empty
1978 m.reset("");
1979 result = m.matches();
1980 if (result != true)
1981 failCount++;
1982
1983 result = Pattern.matches("", "");
1984 if (result != true)
1985 failCount++;
1986
1987 result = Pattern.matches("", "foo");
1988 if (result == true)
1989 failCount++;
1990 report("EmptyPattern");
1991 }
1992
1993 private static void charClassTest() throws Exception {
1994 Pattern pattern = Pattern.compile("blah[ab]]blech");
1995 check(pattern, "blahb]blech", true);
1996
1997 pattern = Pattern.compile("[abc[def]]");
1998 check(pattern, "b", true);
1999
2000 // Supplementary character tests
2001 pattern = Pattern.compile(toSupplementaries("blah[ab]]blech"));
2002 check(pattern, toSupplementaries("blahb]blech"), true);
2003
2004 pattern = Pattern.compile(toSupplementaries("[abc[def]]"));
2005 check(pattern, toSupplementaries("b"), true);
2006
2007 try {
2008 // u00ff when UNICODE_CASE
2009 pattern = Pattern.compile("[ab\u00ffcd]",
2010 Pattern.CASE_INSENSITIVE|
2011 Pattern.UNICODE_CASE);
2012 check(pattern, "ab\u00ffcd", true);
2013 check(pattern, "Ab\u0178Cd", true);
2014
2015 // u00b5 when UNICODE_CASE
2016 pattern = Pattern.compile("[ab\u00b5cd]",
2017 Pattern.CASE_INSENSITIVE|
2018 Pattern.UNICODE_CASE);
2019 check(pattern, "ab\u00b5cd", true);
2020 check(pattern, "Ab\u039cCd", true);
2021 } catch (Exception e) { failCount++; }
2022
2023 /* Special cases
2024 (1)LatinSmallLetterLongS u+017f
2025 (2)LatinSmallLetterDotlessI u+0131
2026 (3)LatineCapitalLetterIWithDotAbove u+0130
2027 (4)KelvinSign u+212a
2028 (5)AngstromSign u+212b
2029 */
2030 int flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE;
2031 pattern = Pattern.compile("[sik\u00c5]+", flags);
2032 if (!pattern.matcher("\u017f\u0130\u0131\u212a\u212b").matches())
2033 failCount++;
2034
2035 report("CharClass");
2036 }
2037
2038 private static void caretTest() throws Exception {
2039 Pattern pattern = Pattern.compile("\\w*");
2040 Matcher matcher = pattern.matcher("a#bc#def##g");
2041 check(matcher, "a");
2042 check(matcher, "");
2043 check(matcher, "bc");
2044 check(matcher, "");
2045 check(matcher, "def");
2046 check(matcher, "");
2047 check(matcher, "");
2048 check(matcher, "g");
2049 check(matcher, "");
2050 if (matcher.find())
2051 failCount++;
2052
2053 pattern = Pattern.compile("^\\w*");
2054 matcher = pattern.matcher("a#bc#def##g");
2055 check(matcher, "a");
2056 if (matcher.find())
2057 failCount++;
2058
2059 pattern = Pattern.compile("\\w");
2060 matcher = pattern.matcher("abc##x");
2061 check(matcher, "a");
2062 check(matcher, "b");
2063 check(matcher, "c");
2064 check(matcher, "x");
2065 if (matcher.find())
2066 failCount++;
2067
2068 pattern = Pattern.compile("^\\w");
2069 matcher = pattern.matcher("abc##x");
2070 check(matcher, "a");
2071 if (matcher.find())
2072 failCount++;
2073
2074 pattern = Pattern.compile("\\A\\p{Alpha}{3}");
2075 matcher = pattern.matcher("abcdef-ghi\njklmno");
2076 check(matcher, "abc");
2077 if (matcher.find())
2078 failCount++;
2079
2080 pattern = Pattern.compile("^\\p{Alpha}{3}", Pattern.MULTILINE);
2081 matcher = pattern.matcher("abcdef-ghi\njklmno");
2082 check(matcher, "abc");
2083 check(matcher, "jkl");
2084 if (matcher.find())
2085 failCount++;
2086
2087 pattern = Pattern.compile("^", Pattern.MULTILINE);
2088 matcher = pattern.matcher("this is some text");
2089 String result = matcher.replaceAll("X");
2090 if (!result.equals("Xthis is some text"))
2091 failCount++;
2092
2093 pattern = Pattern.compile("^");
2094 matcher = pattern.matcher("this is some text");
2095 result = matcher.replaceAll("X");
2096 if (!result.equals("Xthis is some text"))
2097 failCount++;
2098
2099 pattern = Pattern.compile("^", Pattern.MULTILINE | Pattern.UNIX_LINES);
2100 matcher = pattern.matcher("this is some text\n");
2101 result = matcher.replaceAll("X");
2102 if (!result.equals("Xthis is some text\n"))
2103 failCount++;
2104
2105 report("Caret");
2106 }
2107
2108 private static void groupCaptureTest() throws Exception {
2109 // Independent group
2110 Pattern pattern = Pattern.compile("x+(?>y+)z+");
2111 Matcher matcher = pattern.matcher("xxxyyyzzz");
2112 matcher.find();
2113 try {
2114 String blah = matcher.group(1);
2115 failCount++;
2116 } catch (IndexOutOfBoundsException ioobe) {
2117 // Good result
2118 }
2119 // Pure group
2120 pattern = Pattern.compile("x+(?:y+)z+");
2121 matcher = pattern.matcher("xxxyyyzzz");
2122 matcher.find();
2123 try {
2124 String blah = matcher.group(1);
2125 failCount++;
2126 } catch (IndexOutOfBoundsException ioobe) {
2127 // Good result
2128 }
2129
2130 // Supplementary character tests
2131 // Independent group
2132 pattern = Pattern.compile(toSupplementaries("x+(?>y+)z+"));
2133 matcher = pattern.matcher(toSupplementaries("xxxyyyzzz"));
2134 matcher.find();
2135 try {
2136 String blah = matcher.group(1);
2137 failCount++;
2138 } catch (IndexOutOfBoundsException ioobe) {
2139 // Good result
2140 }
2141 // Pure group
2142 pattern = Pattern.compile(toSupplementaries("x+(?:y+)z+"));
2143 matcher = pattern.matcher(toSupplementaries("xxxyyyzzz"));
2144 matcher.find();
2145 try {
2146 String blah = matcher.group(1);
2147 failCount++;
2148 } catch (IndexOutOfBoundsException ioobe) {
2149 // Good result
2150 }
2151
2152 report("GroupCapture");
2153 }
2154
2155 private static void backRefTest() throws Exception {
2156 Pattern pattern = Pattern.compile("(a*)bc\\1");
2157 check(pattern, "zzzaabcazzz", true);
2158
2159 pattern = Pattern.compile("(a*)bc\\1");
2160 check(pattern, "zzzaabcaazzz", true);
2161
2162 pattern = Pattern.compile("(abc)(def)\\1");
2163 check(pattern, "abcdefabc", true);
2164
2165 pattern = Pattern.compile("(abc)(def)\\3");
2166 check(pattern, "abcdefabc", false);
2167
2168 try {
2169 for (int i = 1; i < 10; i++) {
2170 // Make sure backref 1-9 are always accepted
2171 pattern = Pattern.compile("abcdef\\" + i);
2172 // and fail to match if the target group does not exit
2173 check(pattern, "abcdef", false);
2174 }
2175 } catch(PatternSyntaxException e) {
2176 failCount++;
2177 }
2178
2179 pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11");
2180 check(pattern, "abcdefghija", false);
2181 check(pattern, "abcdefghija1", true);
2182
2183 pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11");
2184 check(pattern, "abcdefghijkk", true);
2185
2186 pattern = Pattern.compile("(a)bcdefghij\\11");
2187 check(pattern, "abcdefghija1", true);
2188
2189 // Supplementary character tests
2190 pattern = Pattern.compile(toSupplementaries("(a*)bc\\1"));
2191 check(pattern, toSupplementaries("zzzaabcazzz"), true);
2192
2193 pattern = Pattern.compile(toSupplementaries("(a*)bc\\1"));
2194 check(pattern, toSupplementaries("zzzaabcaazzz"), true);
2195
2196 pattern = Pattern.compile(toSupplementaries("(abc)(def)\\1"));
2197 check(pattern, toSupplementaries("abcdefabc"), true);
2198
2199 pattern = Pattern.compile(toSupplementaries("(abc)(def)\\3"));
2200 check(pattern, toSupplementaries("abcdefabc"), false);
2201
2202 pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11"));
2203 check(pattern, toSupplementaries("abcdefghija"), false);
2204 check(pattern, toSupplementaries("abcdefghija1"), true);
2205
2206 pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11"));
2207 check(pattern, toSupplementaries("abcdefghijkk"), true);
2208
2209 report("BackRef");
2210 }
2211
2212 /**
2213 * Unicode Technical Report #18, section 2.6 End of Line
2214 * There is no empty line to be matched in the sequence \u000D\u000A
2215 * but there is an empty line in the sequence \u000A\u000D.
2216 */
2217 private static void anchorTest() throws Exception {
2218 Pattern p = Pattern.compile("^.*$", Pattern.MULTILINE);
2219 Matcher m = p.matcher("blah1\r\nblah2");
2220 m.find();
2221 m.find();
2222 if (!m.group().equals("blah2"))
2223 failCount++;
2224
2225 m.reset("blah1\n\rblah2");
2226 m.find();
2227 m.find();
2228 m.find();
2229 if (!m.group().equals("blah2"))
2230 failCount++;
2231
2232 // Test behavior of $ with \r\n at end of input
2233 p = Pattern.compile(".+$");
2234 m = p.matcher("blah1\r\n");
2235 if (!m.find())
2236 failCount++;
2237 if (!m.group().equals("blah1"))
2238 failCount++;
2239 if (m.find())
2240 failCount++;
2241
2242 // Test behavior of $ with \r\n at end of input in multiline
2243 p = Pattern.compile(".+$", Pattern.MULTILINE);
2244 m = p.matcher("blah1\r\n");
2245 if (!m.find())
2246 failCount++;
2247 if (m.find())
2248 failCount++;
2249
2250 // Test for $ recognition of \u0085 for bug 4527731
2251 p = Pattern.compile(".+$", Pattern.MULTILINE);
2252 m = p.matcher("blah1\u0085");
2253 if (!m.find())
2254 failCount++;
2255
2256 // Supplementary character test
2257 p = Pattern.compile("^.*$", Pattern.MULTILINE);
2258 m = p.matcher(toSupplementaries("blah1\r\nblah2"));
2259 m.find();
2260 m.find();
2261 if (!m.group().equals(toSupplementaries("blah2")))
2262 failCount++;
2263
2264 m.reset(toSupplementaries("blah1\n\rblah2"));
2265 m.find();
2266 m.find();
2267 m.find();
2268 if (!m.group().equals(toSupplementaries("blah2")))
2269 failCount++;
2270
2271 // Test behavior of $ with \r\n at end of input
2272 p = Pattern.compile(".+$");
2273 m = p.matcher(toSupplementaries("blah1\r\n"));
2274 if (!m.find())
2275 failCount++;
2276 if (!m.group().equals(toSupplementaries("blah1")))
2277 failCount++;
2278 if (m.find())
2279 failCount++;
2280
2281 // Test behavior of $ with \r\n at end of input in multiline
2282 p = Pattern.compile(".+$", Pattern.MULTILINE);
2283 m = p.matcher(toSupplementaries("blah1\r\n"));
2284 if (!m.find())
2285 failCount++;
2286 if (m.find())
2287 failCount++;
2288
2289 // Test for $ recognition of \u0085 for bug 4527731
2290 p = Pattern.compile(".+$", Pattern.MULTILINE);
2291 m = p.matcher(toSupplementaries("blah1\u0085"));
2292 if (!m.find())
2293 failCount++;
2294
2295 report("Anchors");
2296 }
2297
2298 /**
2299 * A basic sanity test of Matcher.lookingAt().
2300 */
2301 private static void lookingAtTest() throws Exception {
2302 Pattern p = Pattern.compile("(ab)(c*)");
2303 Matcher m = p.matcher("abccczzzabcczzzabccc");
2304
2305 if (!m.lookingAt())
2306 failCount++;
2307
2308 if (!m.group().equals(m.group(0)))
2309 failCount++;
2310
2311 m = p.matcher("zzzabccczzzabcczzzabccczzz");
2312 if (m.lookingAt())
2313 failCount++;
2314
2315 // Supplementary character test
2316 p = Pattern.compile(toSupplementaries("(ab)(c*)"));
2317 m = p.matcher(toSupplementaries("abccczzzabcczzzabccc"));
2318
2319 if (!m.lookingAt())
2320 failCount++;
2321
2322 if (!m.group().equals(m.group(0)))
2323 failCount++;
2324
2325 m = p.matcher(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
2326 if (m.lookingAt())
2327 failCount++;
2328
2329 report("Looking At");
2330 }
2331
2332 /**
2333 * A basic sanity test of Matcher.matches().
2334 */
2335 private static void matchesTest() throws Exception {
2336 // matches()
2337 Pattern p = Pattern.compile("ulb(c*)");
2338 Matcher m = p.matcher("ulbcccccc");
2339 if (!m.matches())
2340 failCount++;
2341
2342 // find() but not matches()
2343 m.reset("zzzulbcccccc");
2344 if (m.matches())
2345 failCount++;
2346
2347 // lookingAt() but not matches()
2348 m.reset("ulbccccccdef");
2349 if (m.matches())
2350 failCount++;
2351
2352 // matches()
2353 p = Pattern.compile("a|ad");
2354 m = p.matcher("ad");
2355 if (!m.matches())
2356 failCount++;
2357
2358 // Supplementary character test
2359 // matches()
2360 p = Pattern.compile(toSupplementaries("ulb(c*)"));
2361 m = p.matcher(toSupplementaries("ulbcccccc"));
2362 if (!m.matches())
2363 failCount++;
2364
2365 // find() but not matches()
2366 m.reset(toSupplementaries("zzzulbcccccc"));
2367 if (m.matches())
2368 failCount++;
2369
2370 // lookingAt() but not matches()
2371 m.reset(toSupplementaries("ulbccccccdef"));
2372 if (m.matches())
2373 failCount++;
2374
2375 // matches()
2376 p = Pattern.compile(toSupplementaries("a|ad"));
2377 m = p.matcher(toSupplementaries("ad"));
2378 if (!m.matches())
2379 failCount++;
2380
2381 report("Matches");
2382 }
2383
2384 /**
2385 * A basic sanity test of Pattern.matches().
2386 */
2387 private static void patternMatchesTest() throws Exception {
2388 // matches()
2389 if (!Pattern.matches(toSupplementaries("ulb(c*)"),
2390 toSupplementaries("ulbcccccc")))
2391 failCount++;
2392
2393 // find() but not matches()
2394 if (Pattern.matches(toSupplementaries("ulb(c*)"),
2395 toSupplementaries("zzzulbcccccc")))
2396 failCount++;
2397
2398 // lookingAt() but not matches()
2399 if (Pattern.matches(toSupplementaries("ulb(c*)"),
2400 toSupplementaries("ulbccccccdef")))
2401 failCount++;
2402
2403 // Supplementary character test
2404 // matches()
2405 if (!Pattern.matches(toSupplementaries("ulb(c*)"),
2406 toSupplementaries("ulbcccccc")))
2407 failCount++;
2408
2409 // find() but not matches()
2410 if (Pattern.matches(toSupplementaries("ulb(c*)"),
2411 toSupplementaries("zzzulbcccccc")))
2412 failCount++;
2413
2414 // lookingAt() but not matches()
2415 if (Pattern.matches(toSupplementaries("ulb(c*)"),
2416 toSupplementaries("ulbccccccdef")))
2417 failCount++;
2418
2419 report("Pattern Matches");
2420 }
2421
2422 /**
2423 * Canonical equivalence testing. Tests the ability of the engine
2424 * to match sequences that are not explicitly specified in the
2425 * pattern when they are considered equivalent by the Unicode Standard.
2426 */
2427 private static void ceTest() throws Exception {
2428 // Decomposed char outside char classes
2429 Pattern p = Pattern.compile("testa\u030a", Pattern.CANON_EQ);
2430 Matcher m = p.matcher("test\u00e5");
2431 if (!m.matches())
2432 failCount++;
2433
2434 m.reset("testa\u030a");
2435 if (!m.matches())
2436 failCount++;
2437
2438 // Composed char outside char classes
2439 p = Pattern.compile("test\u00e5", Pattern.CANON_EQ);
2440 m = p.matcher("test\u00e5");
2441 if (!m.matches())
2442 failCount++;
2443
2444 m.reset("testa\u030a");
2445 if (!m.find())
2446 failCount++;
2447
2448 // Decomposed char inside a char class
2449 p = Pattern.compile("test[abca\u030a]", Pattern.CANON_EQ);
2450 m = p.matcher("test\u00e5");
2451 if (!m.find())
2452 failCount++;
2453
2454 m.reset("testa\u030a");
2455 if (!m.find())
2456 failCount++;
2457
2458 // Composed char inside a char class
2459 p = Pattern.compile("test[abc\u00e5def\u00e0]", Pattern.CANON_EQ);
2460 m = p.matcher("test\u00e5");
2461 if (!m.find())
2462 failCount++;
2463
2464 m.reset("testa\u0300");
2465 if (!m.find())
2466 failCount++;
2467
2468 m.reset("testa\u030a");
2469 if (!m.find())
2470 failCount++;
2471
2472 // Marks that cannot legally change order and be equivalent
2473 p = Pattern.compile("testa\u0308\u0300", Pattern.CANON_EQ);
2474 check(p, "testa\u0308\u0300", true);
2475 check(p, "testa\u0300\u0308", false);
2476
2477 // Marks that can legally change order and be equivalent
2478 p = Pattern.compile("testa\u0308\u0323", Pattern.CANON_EQ);
2479 check(p, "testa\u0308\u0323", true);
2480 check(p, "testa\u0323\u0308", true);
2481
2482 // Test all equivalences of the sequence a\u0308\u0323\u0300
2483 p = Pattern.compile("testa\u0308\u0323\u0300", Pattern.CANON_EQ);
2484 check(p, "testa\u0308\u0323\u0300", true);
2485 check(p, "testa\u0323\u0308\u0300", true);
2486 check(p, "testa\u0308\u0300\u0323", true);
2487 check(p, "test\u00e4\u0323\u0300", true);
2488 check(p, "test\u00e4\u0300\u0323", true);
2489
2490 /*
2491 * The following canonical equivalence tests don't work. Bug id: 4916384.
2492 *
2493 // Decomposed hangul (jamos)
2494 p = Pattern.compile("\u1100\u1161", Pattern.CANON_EQ);
2495 m = p.matcher("\u1100\u1161");
2496 if (!m.matches())
2497 failCount++;
2498
2499 m.reset("\uac00");
2500 if (!m.matches())
2501 failCount++;
2502
2503 // Composed hangul
2504 p = Pattern.compile("\uac00", Pattern.CANON_EQ);
2505 m = p.matcher("\u1100\u1161");
2506 if (!m.matches())
2507 failCount++;
2508
2509 m.reset("\uac00");
2510 if (!m.matches())
2511 failCount++;
2512
2513 // Decomposed supplementary outside char classes
2514 p = Pattern.compile("test\ud834\uddbc\ud834\udd6f", Pattern.CANON_EQ);
2515 m = p.matcher("test\ud834\uddc0");
2516 if (!m.matches())
2517 failCount++;
2518
2519 m.reset("test\ud834\uddbc\ud834\udd6f");
2520 if (!m.matches())
2521 failCount++;
2522
2523 // Composed supplementary outside char classes
2524 p = Pattern.compile("test\ud834\uddc0", Pattern.CANON_EQ);
2525 m.reset("test\ud834\uddbc\ud834\udd6f");
2526 if (!m.matches())
2527 failCount++;
2528
2529 m = p.matcher("test\ud834\uddc0");
2530 if (!m.matches())
2531 failCount++;
2532
2533 */
2534
2535 report("Canonical Equivalence");
2536 }
2537
2538 /**
2539 * A basic sanity test of Matcher.replaceAll().
2540 */
2541 private static void globalSubstitute() throws Exception {
2542 // Global substitution with a literal
2543 Pattern p = Pattern.compile("(ab)(c*)");
2544 Matcher m = p.matcher("abccczzzabcczzzabccc");
2545 if (!m.replaceAll("test").equals("testzzztestzzztest"))
2546 failCount++;
2547
2548 m.reset("zzzabccczzzabcczzzabccczzz");
2549 if (!m.replaceAll("test").equals("zzztestzzztestzzztestzzz"))
2550 failCount++;
2551
2552 // Global substitution with groups
2553 m.reset("zzzabccczzzabcczzzabccczzz");
2554 String result = m.replaceAll("$1");
2555 if (!result.equals("zzzabzzzabzzzabzzz"))
2556 failCount++;
2557
2558 // Supplementary character test
2559 // Global substitution with a literal
2560 p = Pattern.compile(toSupplementaries("(ab)(c*)"));
2561 m = p.matcher(toSupplementaries("abccczzzabcczzzabccc"));
2562 if (!m.replaceAll(toSupplementaries("test")).
2563 equals(toSupplementaries("testzzztestzzztest")))
2564 failCount++;
2565
2566 m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
2567 if (!m.replaceAll(toSupplementaries("test")).
2568 equals(toSupplementaries("zzztestzzztestzzztestzzz")))
2569 failCount++;
2570
2571 // Global substitution with groups
2572 m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
2573 result = m.replaceAll("$1");
2574 if (!result.equals(toSupplementaries("zzzabzzzabzzzabzzz")))
2575 failCount++;
2576
2577 report("Global Substitution");
2578 }
2579
2580 /**
2581 * Tests the usage of Matcher.appendReplacement() with literal
2582 * and group substitutions.
2583 */
2584 private static void stringbufferSubstitute() throws Exception {
2585 // SB substitution with literal
2586 String blah = "zzzblahzzz";
2587 Pattern p = Pattern.compile("blah");
2588 Matcher m = p.matcher(blah);
2589 StringBuffer result = new StringBuffer();
2590 try {
2591 m.appendReplacement(result, "blech");
2592 failCount++;
2593 } catch (IllegalStateException e) {
2594 }
2595 m.find();
2596 m.appendReplacement(result, "blech");
2597 if (!result.toString().equals("zzzblech"))
2598 failCount++;
2599
2600 m.appendTail(result);
2601 if (!result.toString().equals("zzzblechzzz"))
2602 failCount++;
2603
2604 // SB substitution with groups
2605 blah = "zzzabcdzzz";
2606 p = Pattern.compile("(ab)(cd)*");
2607 m = p.matcher(blah);
2608 result = new StringBuffer();
2609 try {
2610 m.appendReplacement(result, "$1");
2611 failCount++;
2612 } catch (IllegalStateException e) {
2613 }
2614 m.find();
2615 m.appendReplacement(result, "$1");
2616 if (!result.toString().equals("zzzab"))
2617 failCount++;
2618
2619 m.appendTail(result);
2620 if (!result.toString().equals("zzzabzzz"))
2621 failCount++;
2622
2623 // SB substitution with 3 groups
2624 blah = "zzzabcdcdefzzz";
2625 p = Pattern.compile("(ab)(cd)*(ef)");
2626 m = p.matcher(blah);
2627 result = new StringBuffer();
2628 try {
2629 m.appendReplacement(result, "$1w$2w$3");
2630 failCount++;
2631 } catch (IllegalStateException e) {
2632 }
2633 m.find();
2634 m.appendReplacement(result, "$1w$2w$3");
2635 if (!result.toString().equals("zzzabwcdwef"))
2636 failCount++;
2637
2638 m.appendTail(result);
2639 if (!result.toString().equals("zzzabwcdwefzzz"))
2640 failCount++;
2641
2642 // SB substitution with groups and three matches
2643 // skipping middle match
2644 blah = "zzzabcdzzzabcddzzzabcdzzz";
2645 p = Pattern.compile("(ab)(cd*)");
2646 m = p.matcher(blah);
2647 result = new StringBuffer();
2648 try {
2649 m.appendReplacement(result, "$1");
2650 failCount++;
2651 } catch (IllegalStateException e) {
2652 }
2653 m.find();
2654 m.appendReplacement(result, "$1");
2655 if (!result.toString().equals("zzzab"))
2656 failCount++;
2657
2658 m.find();
2659 m.find();
2660 m.appendReplacement(result, "$2");
2661 if (!result.toString().equals("zzzabzzzabcddzzzcd"))
2662 failCount++;
2663
2664 m.appendTail(result);
2665 if (!result.toString().equals("zzzabzzzabcddzzzcdzzz"))
2666 failCount++;
2667
2668 // Check to make sure escaped $ is ignored
2669 blah = "zzzabcdcdefzzz";
2670 p = Pattern.compile("(ab)(cd)*(ef)");
2671 m = p.matcher(blah);
2672 result = new StringBuffer();
2673 m.find();
2674 m.appendReplacement(result, "$1w\\$2w$3");
2675 if (!result.toString().equals("zzzabw$2wef"))
2676 failCount++;
2677
2678 m.appendTail(result);
2679 if (!result.toString().equals("zzzabw$2wefzzz"))
2680 failCount++;
2681
2682 // Check to make sure a reference to nonexistent group causes error
2683 blah = "zzzabcdcdefzzz";
2684 p = Pattern.compile("(ab)(cd)*(ef)");
2685 m = p.matcher(blah);
2686 result = new StringBuffer();
2687 m.find();
2688 try {
2689 m.appendReplacement(result, "$1w$5w$3");
2690 failCount++;
2691 } catch (IndexOutOfBoundsException ioobe) {
2692 // Correct result
2693 }
2694
2695 // Check double digit group references
2696 blah = "zzz123456789101112zzz";
2697 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)");
2698 m = p.matcher(blah);
2699 result = new StringBuffer();
2700 m.find();
2701 m.appendReplacement(result, "$1w$11w$3");
2702 if (!result.toString().equals("zzz1w11w3"))
2703 failCount++;
2704
2705 // Check to make sure it backs off $15 to $1 if only three groups
2706 blah = "zzzabcdcdefzzz";
2707 p = Pattern.compile("(ab)(cd)*(ef)");
2708 m = p.matcher(blah);
2709 result = new StringBuffer();
2710 m.find();
2711 m.appendReplacement(result, "$1w$15w$3");
2712 if (!result.toString().equals("zzzabwab5wef"))
2713 failCount++;
2714
2715
2716 // Supplementary character test
2717 // SB substitution with literal
2718 blah = toSupplementaries("zzzblahzzz");
2719 p = Pattern.compile(toSupplementaries("blah"));
2720 m = p.matcher(blah);
2721 result = new StringBuffer();
2722 try {
2723 m.appendReplacement(result, toSupplementaries("blech"));
2724 failCount++;
2725 } catch (IllegalStateException e) {
2726 }
2727 m.find();
2728 m.appendReplacement(result, toSupplementaries("blech"));
2729 if (!result.toString().equals(toSupplementaries("zzzblech")))
2730 failCount++;
2731
2732 m.appendTail(result);
2733 if (!result.toString().equals(toSupplementaries("zzzblechzzz")))
2734 failCount++;
2735
2736 // SB substitution with groups
2737 blah = toSupplementaries("zzzabcdzzz");
2738 p = Pattern.compile(toSupplementaries("(ab)(cd)*"));
2739 m = p.matcher(blah);
2740 result = new StringBuffer();
2741 try {
2742 m.appendReplacement(result, "$1");
2743 failCount++;
2744 } catch (IllegalStateException e) {
2745 }
2746 m.find();
2747 m.appendReplacement(result, "$1");
2748 if (!result.toString().equals(toSupplementaries("zzzab")))
2749 failCount++;
2750
2751 m.appendTail(result);
2752 if (!result.toString().equals(toSupplementaries("zzzabzzz")))
2753 failCount++;
2754
2755 // SB substitution with 3 groups
2756 blah = toSupplementaries("zzzabcdcdefzzz");
2757 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
2758 m = p.matcher(blah);
2759 result = new StringBuffer();
2760 try {
2761 m.appendReplacement(result, toSupplementaries("$1w$2w$3"));
2762 failCount++;
2763 } catch (IllegalStateException e) {
2764 }
2765 m.find();
2766 m.appendReplacement(result, toSupplementaries("$1w$2w$3"));
2767 if (!result.toString().equals(toSupplementaries("zzzabwcdwef")))
2768 failCount++;
2769
2770 m.appendTail(result);
2771 if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz")))
2772 failCount++;
2773
2774 // SB substitution with groups and three matches
2775 // skipping middle match
2776 blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz");
2777 p = Pattern.compile(toSupplementaries("(ab)(cd*)"));
2778 m = p.matcher(blah);
2779 result = new StringBuffer();
2780 try {
2781 m.appendReplacement(result, "$1");
2782 failCount++;
2783 } catch (IllegalStateException e) {
2784 }
2785 m.find();
2786 m.appendReplacement(result, "$1");
2787 if (!result.toString().equals(toSupplementaries("zzzab")))
2788 failCount++;
2789
2790 m.find();
2791 m.find();
2792 m.appendReplacement(result, "$2");
2793 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd")))
2794 failCount++;
2795
2796 m.appendTail(result);
2797 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz")))
2798 failCount++;
2799
2800 // Check to make sure escaped $ is ignored
2801 blah = toSupplementaries("zzzabcdcdefzzz");
2802 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
2803 m = p.matcher(blah);
2804 result = new StringBuffer();
2805 m.find();
2806 m.appendReplacement(result, toSupplementaries("$1w\\$2w$3"));
2807 if (!result.toString().equals(toSupplementaries("zzzabw$2wef")))
2808 failCount++;
2809
2810 m.appendTail(result);
2811 if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz")))
2812 failCount++;
2813
2814 // Check to make sure a reference to nonexistent group causes error
2815 blah = toSupplementaries("zzzabcdcdefzzz");
2816 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
2817 m = p.matcher(blah);
2818 result = new StringBuffer();
2819 m.find();
2820 try {
2821 m.appendReplacement(result, toSupplementaries("$1w$5w$3"));
2822 failCount++;
2823 } catch (IndexOutOfBoundsException ioobe) {
2824 // Correct result
2825 }
2826
2827 // Check double digit group references
2828 blah = toSupplementaries("zzz123456789101112zzz");
2829 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)");
2830 m = p.matcher(blah);
2831 result = new StringBuffer();
2832 m.find();
2833 m.appendReplacement(result, toSupplementaries("$1w$11w$3"));
2834 if (!result.toString().equals(toSupplementaries("zzz1w11w3")))
2835 failCount++;
2836
2837 // Check to make sure it backs off $15 to $1 if only three groups
2838 blah = toSupplementaries("zzzabcdcdefzzz");
2839 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
2840 m = p.matcher(blah);
2841 result = new StringBuffer();
2842 m.find();
2843 m.appendReplacement(result, toSupplementaries("$1w$15w$3"));
2844 if (!result.toString().equals(toSupplementaries("zzzabwab5wef")))
2845 failCount++;
2846
2847 // Check nothing has been appended into the output buffer if
2848 // the replacement string triggers IllegalArgumentException.
2849 p = Pattern.compile("(abc)");
2850 m = p.matcher("abcd");
2851 result = new StringBuffer();
2852 m.find();
2853 try {
2854 m.appendReplacement(result, ("xyz$g"));
2855 failCount++;
2856 } catch (IllegalArgumentException iae) {
2857 if (result.length() != 0)
2858 failCount++;
2859 }
2860
2861 report("SB Substitution");
2862 }
2863
2864 /*
2865 * 5 groups of characters are created to make a substitution string.
2866 * A base string will be created including random lead chars, the
2867 * substitution string, and random trailing chars.
2868 * A pattern containing the 5 groups is searched for and replaced with:
2869 * random group + random string + random group.
2870 * The results are checked for correctness.
2871 */
2872 private static void substitutionBasher() {
2873 for (int runs = 0; runs<1000; runs++) {
2874 // Create a base string to work in
2875 int leadingChars = generator.nextInt(10);
2876 StringBuffer baseBuffer = new StringBuffer(100);
2877 String leadingString = getRandomAlphaString(leadingChars);
2878 baseBuffer.append(leadingString);
2879
2880 // Create 5 groups of random number of random chars
2881 // Create the string to substitute
2882 // Create the pattern string to search for
2883 StringBuffer bufferToSub = new StringBuffer(25);
2884 StringBuffer bufferToPat = new StringBuffer(50);
2885 String[] groups = new String[5];
2886 for(int i=0; i<5; i++) {
2887 int aGroupSize = generator.nextInt(5)+1;
2888 groups[i] = getRandomAlphaString(aGroupSize);
2889 bufferToSub.append(groups[i]);
2890 bufferToPat.append('(');
2891 bufferToPat.append(groups[i]);
2892 bufferToPat.append(')');
2893 }
2894 String stringToSub = bufferToSub.toString();
2895 String pattern = bufferToPat.toString();
2896
2897 // Place sub string into working string at random index
2898 baseBuffer.append(stringToSub);
2899
2900 // Append random chars to end
2901 int trailingChars = generator.nextInt(10);
2902 String trailingString = getRandomAlphaString(trailingChars);
2903 baseBuffer.append(trailingString);
2904 String baseString = baseBuffer.toString();
2905
2906 // Create test pattern and matcher
2907 Pattern p = Pattern.compile(pattern);
2908 Matcher m = p.matcher(baseString);
2909
2910 // Reject candidate if pattern happens to start early
2911 m.find();
2912 if (m.start() < leadingChars)
2913 continue;
2914
2915 // Reject candidate if more than one match
2916 if (m.find())
2917 continue;
2918
2919 // Construct a replacement string with :
2920 // random group + random string + random group
2921 StringBuffer bufferToRep = new StringBuffer();
2922 int groupIndex1 = generator.nextInt(5);
2923 bufferToRep.append("$" + (groupIndex1 + 1));
2924 String randomMidString = getRandomAlphaString(5);
2925 bufferToRep.append(randomMidString);
2926 int groupIndex2 = generator.nextInt(5);
2927 bufferToRep.append("$" + (groupIndex2 + 1));
2928 String replacement = bufferToRep.toString();
2929
2930 // Do the replacement
2931 String result = m.replaceAll(replacement);
2932
2933 // Construct expected result
2934 StringBuffer bufferToRes = new StringBuffer();
2935 bufferToRes.append(leadingString);
2936 bufferToRes.append(groups[groupIndex1]);
2937 bufferToRes.append(randomMidString);
2938 bufferToRes.append(groups[groupIndex2]);
2939 bufferToRes.append(trailingString);
2940 String expectedResult = bufferToRes.toString();
2941
2942 // Check results
2943 if (!result.equals(expectedResult))
2944 failCount++;
2945 }
2946
2947 report("Substitution Basher");
2948 }
2949
2950 /**
2951 * Checks the handling of some escape sequences that the Pattern
2952 * class should process instead of the java compiler. These are
2953 * not in the file because the escapes should be be processed
2954 * by the Pattern class when the regex is compiled.
2955 */
2956 private static void escapes() throws Exception {
2957 Pattern p = Pattern.compile("\\043");
2958 Matcher m = p.matcher("#");
2959 if (!m.find())
2960 failCount++;
2961
2962 p = Pattern.compile("\\x23");
2963 m = p.matcher("#");
2964 if (!m.find())
2965 failCount++;
2966
2967 p = Pattern.compile("\\u0023");
2968 m = p.matcher("#");
2969 if (!m.find())
2970 failCount++;
2971
2972 report("Escape sequences");
2973 }
2974
2975 /**
2976 * Checks the handling of blank input situations. These
2977 * tests are incompatible with my test file format.
2978 */
2979 private static void blankInput() throws Exception {
2980 Pattern p = Pattern.compile("abc", Pattern.CASE_INSENSITIVE);
2981 Matcher m = p.matcher("");
2982 if (m.find())
2983 failCount++;
2984
2985 p = Pattern.compile("a*", Pattern.CASE_INSENSITIVE);
2986 m = p.matcher("");
2987 if (!m.find())
2988 failCount++;
2989
2990 p = Pattern.compile("abc");
2991 m = p.matcher("");
2992 if (m.find())
2993 failCount++;
2994
2995 p = Pattern.compile("a*");
2996 m = p.matcher("");
2997 if (!m.find())
2998 failCount++;
2999
3000 report("Blank input");
3001 }
3002
3003 /**
3004 * Tests the Boyer-Moore pattern matching of a character sequence
3005 * on randomly generated patterns.
3006 */
3007 private static void bm() throws Exception {
3008 doBnM('a');
3009 report("Boyer Moore (ASCII)");
3010
3011 doBnM(Character.MIN_SUPPLEMENTARY_CODE_POINT - 10);
3012 report("Boyer Moore (Supplementary)");
3013 }
3014
3015 private static void doBnM(int baseCharacter) throws Exception {
3016 int achar=0;
3017
3018 for (int i=0; i<100; i++) {
3019 // Create a short pattern to search for
3020 int patternLength = generator.nextInt(7) + 4;
3021 StringBuffer patternBuffer = new StringBuffer(patternLength);
3022 for (int x=0; x<patternLength; x++) {
3023 int ch = baseCharacter + generator.nextInt(26);
3024 if (Character.isSupplementaryCodePoint(ch)) {
3025 patternBuffer.append(Character.toChars(ch));
3026 } else {
3027 patternBuffer.append((char)ch);
3028 }
3029 }
3030 String pattern = patternBuffer.toString();
3031 Pattern p = Pattern.compile(pattern);
3032
3033 // Create a buffer with random ASCII chars that does
3034 // not match the sample
3035 String toSearch = null;
3036 StringBuffer s = null;
3037 Matcher m = p.matcher("");
3038 do {
3039 s = new StringBuffer(100);
3040 for (int x=0; x<100; x++) {
3041 int ch = baseCharacter + generator.nextInt(26);
3042 if (Character.isSupplementaryCodePoint(ch)) {
3043 s.append(Character.toChars(ch));
3044 } else {
3045 s.append((char)ch);
3046 }
3047 }
3048 toSearch = s.toString();
3049 m.reset(toSearch);
3050 } while (m.find());
3051
3052 // Insert the pattern at a random spot
3053 int insertIndex = generator.nextInt(99);
3054 if (Character.isLowSurrogate(s.charAt(insertIndex)))
3055 insertIndex++;
3056 s = s.insert(insertIndex, pattern);
3057 toSearch = s.toString();
3058
3059 // Make sure that the pattern is found
3060 m.reset(toSearch);
3061 if (!m.find())
3062 failCount++;
3063
3064 // Make sure that the match text is the pattern
3065 if (!m.group().equals(pattern))
3066 failCount++;
3067
3068 // Make sure match occured at insertion point
3069 if (m.start() != insertIndex)
3070 failCount++;
3071 }
3072 }
3073
3074 /**
3075 * Tests the matching of slices on randomly generated patterns.
3076 * The Boyer-Moore optimization is not done on these patterns
3077 * because it uses unicode case folding.
3078 */
3079 private static void slice() throws Exception {
3080 doSlice(Character.MAX_VALUE);
3081 report("Slice");
3082
3083 doSlice(Character.MAX_CODE_POINT);
3084 report("Slice (Supplementary)");
3085 }
3086
3087 private static void doSlice(int maxCharacter) throws Exception {
3088 Random generator = new Random();
3089 int achar=0;
3090
3091 for (int i=0; i<100; i++) {
3092 // Create a short pattern to search for
3093 int patternLength = generator.nextInt(7) + 4;
3094 StringBuffer patternBuffer = new StringBuffer(patternLength);
3095 for (int x=0; x<patternLength; x++) {
3096 int randomChar = 0;
3097 while (!Character.isLetterOrDigit(randomChar))
3098 randomChar = generator.nextInt(maxCharacter);
3099 if (Character.isSupplementaryCodePoint(randomChar)) {
3100 patternBuffer.append(Character.toChars(randomChar));
3101 } else {
3102 patternBuffer.append((char) randomChar);
3103 }
3104 }
3105 String pattern = patternBuffer.toString();
3106 Pattern p = Pattern.compile(pattern, Pattern.UNICODE_CASE);
3107
3108 // Create a buffer with random chars that does not match the sample
3109 String toSearch = null;
3110 StringBuffer s = null;
3111 Matcher m = p.matcher("");
3112 do {
3113 s = new StringBuffer(100);
3114 for (int x=0; x<100; x++) {
3115 int randomChar = 0;
3116 while (!Character.isLetterOrDigit(randomChar))
3117 randomChar = generator.nextInt(maxCharacter);
3118 if (Character.isSupplementaryCodePoint(randomChar)) {
3119 s.append(Character.toChars(randomChar));
3120 } else {
3121 s.append((char) randomChar);
3122 }
3123 }
3124 toSearch = s.toString();
3125 m.reset(toSearch);
3126 } while (m.find());
3127
3128 // Insert the pattern at a random spot
3129 int insertIndex = generator.nextInt(99);
3130 if (Character.isLowSurrogate(s.charAt(insertIndex)))
3131 insertIndex++;
3132 s = s.insert(insertIndex, pattern);
3133 toSearch = s.toString();
3134
3135 // Make sure that the pattern is found
3136 m.reset(toSearch);
3137 if (!m.find())
3138 failCount++;
3139
3140 // Make sure that the match text is the pattern
3141 if (!m.group().equals(pattern))
3142 failCount++;
3143
3144 // Make sure match occured at insertion point
3145 if (m.start() != insertIndex)
3146 failCount++;
3147 }
3148 }
3149
3150 private static void explainFailure(String pattern, String data,
3151 String expected, String actual) {
3152 System.err.println("----------------------------------------");
3153 System.err.println("Pattern = "+pattern);
3154 System.err.println("Data = "+data);
3155 System.err.println("Expected = " + expected);
3156 System.err.println("Actual = " + actual);
3157 }
3158
3159 private static void explainFailure(String pattern, String data,
3160 Throwable t) {
3161 System.err.println("----------------------------------------");
3162 System.err.println("Pattern = "+pattern);
3163 System.err.println("Data = "+data);
3164 t.printStackTrace(System.err);
3165 }
3166
3167 // Testing examples from a file
3168
3169 /**
3170 * Goes through the file "TestCases.txt" and creates many patterns
3171 * described in the file, matching the patterns against input lines in
3172 * the file, and comparing the results against the correct results
3173 * also found in the file. The file format is described in comments
3174 * at the head of the file.
3175 */
3176 private static void processFile(String fileName) throws Exception {
3177 File testCases = new File(System.getProperty("test.src", "."),
3178 fileName);
3179 FileInputStream in = new FileInputStream(testCases);
3180 BufferedReader r = new BufferedReader(new InputStreamReader(in));
3181
3182 // Process next test case.
3183 String aLine;
3184 while((aLine = r.readLine()) != null) {
3185 // Read a line for pattern
3186 String patternString = grabLine(r);
3187 Pattern p = null;
3188 try {
3189 p = compileTestPattern(patternString);
3190 } catch (PatternSyntaxException e) {
3191 String dataString = grabLine(r);
3192 String expectedResult = grabLine(r);
3193 if (expectedResult.startsWith("error"))
3194 continue;
3195 explainFailure(patternString, dataString, e);
3196 failCount++;
3197 continue;
3198 }
3199
3200 // Read a line for input string
3201 String dataString = grabLine(r);
3202 Matcher m = p.matcher(dataString);
3203 StringBuffer result = new StringBuffer();
3204
3205 // Check for IllegalStateExceptions before a match
3206 failCount += preMatchInvariants(m);
3207
3208 boolean found = m.find();
3209
3210 if (found)
3211 failCount += postTrueMatchInvariants(m);
3212 else
3213 failCount += postFalseMatchInvariants(m);
3214
3215 if (found) {
3216 result.append("true ");
3217 result.append(m.group(0) + " ");
3218 } else {
3219 result.append("false ");
3220 }
3221
3222 result.append(m.groupCount());
3223
3224 if (found) {
3225 for (int i=1; i<m.groupCount()+1; i++)
3226 if (m.group(i) != null)
3227 result.append(" " +m.group(i));
3228 }
3229
3230 // Read a line for the expected result
3231 String expectedResult = grabLine(r);
3232
3233 if (!result.toString().equals(expectedResult)) {
3234 explainFailure(patternString, dataString, expectedResult, result.toString());
3235 failCount++;
3236 }
3237 }
3238
3239 report(fileName);
3240 }
3241
3242 private static int preMatchInvariants(Matcher m) {
3243 int failCount = 0;
3244 try {
3245 m.start();
3246 failCount++;
3247 } catch (IllegalStateException ise) {}
3248 try {
3249 m.end();
3250 failCount++;
3251 } catch (IllegalStateException ise) {}
3252 try {
3253 m.group();
3254 failCount++;
3255 } catch (IllegalStateException ise) {}
3256 return failCount;
3257 }
3258
3259 private static int postFalseMatchInvariants(Matcher m) {
3260 int failCount = 0;
3261 try {
3262 m.group();
3263 failCount++;
3264 } catch (IllegalStateException ise) {}
3265 try {
3266 m.start();
3267 failCount++;
3268 } catch (IllegalStateException ise) {}
3269 try {
3270 m.end();
3271 failCount++;
3272 } catch (IllegalStateException ise) {}
3273 return failCount;
3274 }
3275
3276 private static int postTrueMatchInvariants(Matcher m) {
3277 int failCount = 0;
3278 //assert(m.start() = m.start(0);
3279 if (m.start() != m.start(0))
3280 failCount++;
3281 //assert(m.end() = m.end(0);
3282 if (m.start() != m.start(0))
3283 failCount++;
3284 //assert(m.group() = m.group(0);
3285 if (!m.group().equals(m.group(0)))
3286 failCount++;
3287 try {
3288 m.group(50);
3289 failCount++;
3290 } catch (IndexOutOfBoundsException ise) {}
3291
3292 return failCount;
3293 }
3294
3295 private static Pattern compileTestPattern(String patternString) {
3296 if (!patternString.startsWith("'")) {
3297 return Pattern.compile(patternString);
3298 }
3299
3300 int break1 = patternString.lastIndexOf("'");
3301 String flagString = patternString.substring(
3302 break1+1, patternString.length());
3303 patternString = patternString.substring(1, break1);
3304
3305 if (flagString.equals("i"))
3306 return Pattern.compile(patternString, Pattern.CASE_INSENSITIVE);
3307
3308 if (flagString.equals("m"))
3309 return Pattern.compile(patternString, Pattern.MULTILINE);
3310
3311 return Pattern.compile(patternString);
3312 }
3313
3314 /**
3315 * Reads a line from the input file. Keeps reading lines until a non
3316 * empty non comment line is read. If the line contains a \n then
3317 * these two characters are replaced by a newline char. If a \\uxxxx
3318 * sequence is read then the sequence is replaced by the unicode char.
3319 */
3320 private static String grabLine(BufferedReader r) throws Exception {
3321 int index = 0;
3322 String line = r.readLine();
3323 while (line.startsWith("//") || line.length() < 1)
3324 line = r.readLine();
3325 while ((index = line.indexOf("\\n")) != -1) {
3326 StringBuffer temp = new StringBuffer(line);
3327 temp.replace(index, index+2, "\n");
3328 line = temp.toString();
3329 }
3330 while ((index = line.indexOf("\\u")) != -1) {
3331 StringBuffer temp = new StringBuffer(line);
3332 String value = temp.substring(index+2, index+6);
3333 char aChar = (char)Integer.parseInt(value, 16);
3334 String unicodeChar = "" + aChar;
3335 temp.replace(index, index+6, unicodeChar);
3336 line = temp.toString();
3337 }
3338
3339 return line;
3340 }
3341
3342 private static void check(Pattern p, String s, String g, String expected) {
3343 Matcher m = p.matcher(s);
3344 m.find();
3345 if (!m.group(g).equals(expected))
3346 failCount++;
3347 }
3348
3349 private static void checkReplaceFirst(String p, String s, String r, String expected)
3350 {
3351 if (!expected.equals(Pattern.compile(p)
3352 .matcher(s)
3353 .replaceFirst(r)))
3354 failCount++;
3355 }
3356
3357 private static void checkReplaceAll(String p, String s, String r, String expected)
3358 {
3359 if (!expected.equals(Pattern.compile(p)
3360 .matcher(s)
3361 .replaceAll(r)))
3362 failCount++;
3363 }
3364
3365 private static void checkExpectedFail(String p) {
3366 try {
3367 Pattern.compile(p);
3368 } catch (PatternSyntaxException pse) {
3369 //pse.printStackTrace();
3370 return;
3371 }
3372 failCount++;
3373 }
3374
3375 private static void checkExpectedFail(Matcher m, String g) {
3376 m.find();
3377 try {
3378 m.group(g);
3379 } catch (IllegalArgumentException iae) {
3380 //iae.printStackTrace();
3381 return;
3382 } catch (NullPointerException npe) {
3383 return;
3384 }
3385 failCount++;
3386 }
3387
3388
3389 private static void namedGroupCaptureTest() throws Exception {
3390 check(Pattern.compile("x+(?<gname>y+)z+"),
3391 "xxxyyyzzz",
3392 "gname",
3393 "yyy");
3394
shermand9337e02009-10-21 11:40:40 -07003395 check(Pattern.compile("x+(?<gname8>y+)z+"),
shermanffaf4922009-03-20 16:22:59 -07003396 "xxxyyyzzz",
shermand9337e02009-10-21 11:40:40 -07003397 "gname8",
shermanffaf4922009-03-20 16:22:59 -07003398 "yyy");
3399
sherman0b4d42d2009-02-23 21:06:15 -08003400 //backref
3401 Pattern pattern = Pattern.compile("(a*)bc\\1");
3402 check(pattern, "zzzaabcazzz", true); // found "abca"
3403
3404 check(Pattern.compile("(?<gname>a*)bc\\k<gname>"),
3405 "zzzaabcaazzz", true);
3406
3407 check(Pattern.compile("(?<gname>abc)(def)\\k<gname>"),
3408 "abcdefabc", true);
3409
3410 check(Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(?<gname>k)\\k<gname>"),
3411 "abcdefghijkk", true);
3412
3413 // Supplementary character tests
3414 check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"),
3415 toSupplementaries("zzzaabcazzz"), true);
3416
3417 check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"),
3418 toSupplementaries("zzzaabcaazzz"), true);
3419
3420 check(Pattern.compile("(?<gname>" + toSupplementaries("abc)(def)") + "\\k<gname>"),
3421 toSupplementaries("abcdefabc"), true);
3422
3423 check(Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)") +
3424 "(?<gname>" +
3425 toSupplementaries("k)") + "\\k<gname>"),
3426 toSupplementaries("abcdefghijkk"), true);
3427
3428 check(Pattern.compile("x+(?<gname>y+)z+\\k<gname>"),
3429 "xxxyyyzzzyyy",
3430 "gname",
3431 "yyy");
3432
3433 //replaceFirst/All
3434 checkReplaceFirst("(?<gn>ab)(c*)",
3435 "abccczzzabcczzzabccc",
shermand9337e02009-10-21 11:40:40 -07003436 "${gn}",
sherman0b4d42d2009-02-23 21:06:15 -08003437 "abzzzabcczzzabccc");
3438
3439 checkReplaceAll("(?<gn>ab)(c*)",
3440 "abccczzzabcczzzabccc",
shermand9337e02009-10-21 11:40:40 -07003441 "${gn}",
sherman0b4d42d2009-02-23 21:06:15 -08003442 "abzzzabzzzab");
3443
3444
3445 checkReplaceFirst("(?<gn>ab)(c*)",
3446 "zzzabccczzzabcczzzabccczzz",
shermand9337e02009-10-21 11:40:40 -07003447 "${gn}",
sherman0b4d42d2009-02-23 21:06:15 -08003448 "zzzabzzzabcczzzabccczzz");
3449
3450 checkReplaceAll("(?<gn>ab)(c*)",
3451 "zzzabccczzzabcczzzabccczzz",
shermand9337e02009-10-21 11:40:40 -07003452 "${gn}",
sherman0b4d42d2009-02-23 21:06:15 -08003453 "zzzabzzzabzzzabzzz");
3454
3455 checkReplaceFirst("(?<gn1>ab)(?<gn2>c*)",
3456 "zzzabccczzzabcczzzabccczzz",
shermand9337e02009-10-21 11:40:40 -07003457 "${gn2}",
sherman0b4d42d2009-02-23 21:06:15 -08003458 "zzzccczzzabcczzzabccczzz");
3459
3460 checkReplaceAll("(?<gn1>ab)(?<gn2>c*)",
3461 "zzzabccczzzabcczzzabccczzz",
shermand9337e02009-10-21 11:40:40 -07003462 "${gn2}",
sherman0b4d42d2009-02-23 21:06:15 -08003463 "zzzccczzzcczzzccczzz");
3464
3465 //toSupplementaries("(ab)(c*)"));
3466 checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") +
3467 ")(?<gn2>" + toSupplementaries("c") + "*)",
3468 toSupplementaries("abccczzzabcczzzabccc"),
shermand9337e02009-10-21 11:40:40 -07003469 "${gn1}",
sherman0b4d42d2009-02-23 21:06:15 -08003470 toSupplementaries("abzzzabcczzzabccc"));
3471
3472
3473 checkReplaceAll("(?<gn1>" + toSupplementaries("ab") +
3474 ")(?<gn2>" + toSupplementaries("c") + "*)",
3475 toSupplementaries("abccczzzabcczzzabccc"),
shermand9337e02009-10-21 11:40:40 -07003476 "${gn1}",
sherman0b4d42d2009-02-23 21:06:15 -08003477 toSupplementaries("abzzzabzzzab"));
3478
3479 checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") +
3480 ")(?<gn2>" + toSupplementaries("c") + "*)",
3481 toSupplementaries("abccczzzabcczzzabccc"),
shermand9337e02009-10-21 11:40:40 -07003482 "${gn2}",
sherman0b4d42d2009-02-23 21:06:15 -08003483 toSupplementaries("ccczzzabcczzzabccc"));
3484
3485
3486 checkReplaceAll("(?<gn1>" + toSupplementaries("ab") +
3487 ")(?<gn2>" + toSupplementaries("c") + "*)",
3488 toSupplementaries("abccczzzabcczzzabccc"),
shermand9337e02009-10-21 11:40:40 -07003489 "${gn2}",
sherman0b4d42d2009-02-23 21:06:15 -08003490 toSupplementaries("ccczzzcczzzccc"));
3491
3492 checkReplaceFirst("(?<dog>Dog)AndCat",
3493 "zzzDogAndCatzzzDogAndCatzzz",
shermand9337e02009-10-21 11:40:40 -07003494 "${dog}",
sherman0b4d42d2009-02-23 21:06:15 -08003495 "zzzDogzzzDogAndCatzzz");
3496
3497
3498 checkReplaceAll("(?<dog>Dog)AndCat",
3499 "zzzDogAndCatzzzDogAndCatzzz",
shermand9337e02009-10-21 11:40:40 -07003500 "${dog}",
sherman0b4d42d2009-02-23 21:06:15 -08003501 "zzzDogzzzDogzzz");
3502
3503 // backref in Matcher & String
shermand9337e02009-10-21 11:40:40 -07003504 if (!"abcdefghij".replaceFirst("cd(?<gn>ef)gh", "${gn}").equals("abefij") ||
3505 !"abbbcbdbefgh".replaceAll("(?<gn>[a-e])b", "${gn}").equals("abcdefgh"))
sherman0b4d42d2009-02-23 21:06:15 -08003506 failCount++;
3507
3508 // negative
3509 checkExpectedFail("(?<groupnamehasnoascii.in>abc)(def)");
3510 checkExpectedFail("(?<groupnamehasnoascii_in>abc)(def)");
shermand9337e02009-10-21 11:40:40 -07003511 checkExpectedFail("(?<6groupnamestartswithdigit>abc)(def)");
sherman0b4d42d2009-02-23 21:06:15 -08003512 checkExpectedFail("(?<gname>abc)(def)\\k<gnameX>");
3513 checkExpectedFail("(?<gname>abc)(?<gname>def)\\k<gnameX>");
3514 checkExpectedFail(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"),
3515 "gnameX");
3516 checkExpectedFail(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"),
3517 null);
3518 report("NamedGroupCapture");
3519 }
sherman6782c962010-02-05 00:10:42 -08003520
shermancc01ef52010-05-18 15:36:47 -07003521 // This is for bug 6969132
sherman6782c962010-02-05 00:10:42 -08003522 private static void nonBmpClassComplementTest() throws Exception {
3523 Pattern p = Pattern.compile("\\P{Lu}");
3524 Matcher m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
3525 if (m.find() && m.start() == 1)
3526 failCount++;
3527
3528 // from a unicode category
3529 p = Pattern.compile("\\P{Lu}");
3530 m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
3531 if (m.find())
3532 failCount++;
3533 if (!m.hitEnd())
3534 failCount++;
3535
3536 // block
3537 p = Pattern.compile("\\P{InMathematicalAlphanumericSymbols}");
3538 m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
3539 if (m.find() && m.start() == 1)
3540 failCount++;
3541
3542 report("NonBmpClassComplement");
3543 }
3544
shermancc01ef52010-05-18 15:36:47 -07003545 private static void unicodePropertiesTest() throws Exception {
3546 // different forms
3547 if (!Pattern.compile("\\p{IsLu}").matcher("A").matches() ||
3548 !Pattern.compile("\\p{Lu}").matcher("A").matches() ||
3549 !Pattern.compile("\\p{gc=Lu}").matcher("A").matches() ||
3550 !Pattern.compile("\\p{general_category=Lu}").matcher("A").matches() ||
3551 !Pattern.compile("\\p{IsLatin}").matcher("B").matches() ||
3552 !Pattern.compile("\\p{sc=Latin}").matcher("B").matches() ||
3553 !Pattern.compile("\\p{script=Latin}").matcher("B").matches() ||
3554 !Pattern.compile("\\p{InBasicLatin}").matcher("c").matches() ||
3555 !Pattern.compile("\\p{blk=BasicLatin}").matcher("c").matches() ||
3556 !Pattern.compile("\\p{block=BasicLatin}").matcher("c").matches())
3557 failCount++;
3558
3559 Matcher common = Pattern.compile("\\p{script=Common}").matcher("");
3560 Matcher unknown = Pattern.compile("\\p{IsUnknown}").matcher("");
3561 Matcher lastSM = common;
3562 Character.UnicodeScript lastScript = Character.UnicodeScript.of(0);
3563
3564 Matcher latin = Pattern.compile("\\p{block=basic_latin}").matcher("");
3565 Matcher greek = Pattern.compile("\\p{InGreek}").matcher("");
3566 Matcher lastBM = latin;
3567 Character.UnicodeBlock lastBlock = Character.UnicodeBlock.of(0);
3568
3569 for (int cp = 1; cp < Character.MAX_CODE_POINT; cp++) {
3570 if (cp >= 0x30000 && (cp & 0x70) == 0){
3571 continue; // only pick couple code points, they are the same
3572 }
3573
3574 // Unicode Script
3575 Character.UnicodeScript script = Character.UnicodeScript.of(cp);
3576 Matcher m;
3577 String str = new String(Character.toChars(cp));
3578 if (script == lastScript) {
3579 m = lastSM;
3580 m.reset(str);
3581 } else {
3582 m = Pattern.compile("\\p{Is" + script.name() + "}").matcher(str);
3583 }
3584 if (!m.matches()) {
3585 failCount++;
3586 }
3587 Matcher other = (script == Character.UnicodeScript.COMMON)? unknown : common;
3588 other.reset(str);
3589 if (other.matches()) {
3590 failCount++;
3591 }
3592 lastSM = m;
3593 lastScript = script;
3594
3595 // Unicode Block
3596 Character.UnicodeBlock block = Character.UnicodeBlock.of(cp);
3597 if (block == null) {
3598 //System.out.printf("Not a Block: cp=%x%n", cp);
3599 continue;
3600 }
3601 if (block == lastBlock) {
3602 m = lastBM;
3603 m.reset(str);
3604 } else {
3605 m = Pattern.compile("\\p{block=" + block.toString() + "}").matcher(str);
3606 }
3607 if (!m.matches()) {
3608 failCount++;
3609 }
3610 other = (block == Character.UnicodeBlock.BASIC_LATIN)? greek : latin;
3611 other.reset(str);
3612 if (other.matches()) {
3613 failCount++;
3614 }
3615 lastBM = m;
3616 lastBlock = block;
3617 }
3618 report("unicodeProperties");
3619 }
shermanf03c78b2011-02-03 13:49:25 -08003620
3621 private static void unicodeHexNotationTest() throws Exception {
3622
3623 // negative
3624 checkExpectedFail("\\x{-23}");
3625 checkExpectedFail("\\x{110000}");
3626 checkExpectedFail("\\x{}");
3627 checkExpectedFail("\\x{AB[ef]");
3628
3629 // codepoint
3630 check("^\\x{1033c}$", "\uD800\uDF3C", true);
3631 check("^\\xF0\\x90\\x8C\\xBC$", "\uD800\uDF3C", false);
3632 check("^\\x{D800}\\x{DF3c}+$", "\uD800\uDF3C", false);
3633 check("^\\xF0\\x90\\x8C\\xBC$", "\uD800\uDF3C", false);
3634
3635 // in class
3636 check("^[\\x{D800}\\x{DF3c}]+$", "\uD800\uDF3C", false);
3637 check("^[\\xF0\\x90\\x8C\\xBC]+$", "\uD800\uDF3C", false);
3638 check("^[\\x{D800}\\x{DF3C}]+$", "\uD800\uDF3C", false);
3639 check("^[\\x{DF3C}\\x{D800}]+$", "\uD800\uDF3C", false);
3640 check("^[\\x{D800}\\x{DF3C}]+$", "\uDF3C\uD800", true);
3641 check("^[\\x{DF3C}\\x{D800}]+$", "\uDF3C\uD800", true);
3642
3643 for (int cp = 0; cp <= 0x10FFFF; cp++) {
3644 String s = "A" + new String(Character.toChars(cp)) + "B";
3645 String hexUTF16 = (cp <= 0xFFFF)? String.format("\\u%04x", cp)
3646 : String.format("\\u%04x\\u%04x",
3647 (int) Character.toChars(cp)[0],
3648 (int) Character.toChars(cp)[1]);
3649 String hexCodePoint = "\\x{" + Integer.toHexString(cp) + "}";
3650 if (!Pattern.matches("A" + hexUTF16 + "B", s))
3651 failCount++;
3652 if (!Pattern.matches("A[" + hexUTF16 + "]B", s))
3653 failCount++;
3654 if (!Pattern.matches("A" + hexCodePoint + "B", s))
3655 failCount++;
3656 if (!Pattern.matches("A[" + hexCodePoint + "]B", s))
3657 failCount++;
3658 }
3659 report("unicodeHexNotation");
sherman85bbd8b2011-04-28 20:48:36 -07003660 }
3661
3662 private static void unicodeClassesTest() throws Exception {
3663
3664 Matcher lower = Pattern.compile("\\p{Lower}").matcher("");
3665 Matcher upper = Pattern.compile("\\p{Upper}").matcher("");
3666 Matcher ASCII = Pattern.compile("\\p{ASCII}").matcher("");
3667 Matcher alpha = Pattern.compile("\\p{Alpha}").matcher("");
3668 Matcher digit = Pattern.compile("\\p{Digit}").matcher("");
3669 Matcher alnum = Pattern.compile("\\p{Alnum}").matcher("");
3670 Matcher punct = Pattern.compile("\\p{Punct}").matcher("");
3671 Matcher graph = Pattern.compile("\\p{Graph}").matcher("");
3672 Matcher print = Pattern.compile("\\p{Print}").matcher("");
3673 Matcher blank = Pattern.compile("\\p{Blank}").matcher("");
3674 Matcher cntrl = Pattern.compile("\\p{Cntrl}").matcher("");
3675 Matcher xdigit = Pattern.compile("\\p{XDigit}").matcher("");
3676 Matcher space = Pattern.compile("\\p{Space}").matcher("");
3677 Matcher bound = Pattern.compile("\\b").matcher("");
3678 Matcher word = Pattern.compile("\\w++").matcher("");
3679 // UNICODE_CHARACTER_CLASS
3680 Matcher lowerU = Pattern.compile("\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3681 Matcher upperU = Pattern.compile("\\p{Upper}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3682 Matcher ASCIIU = Pattern.compile("\\p{ASCII}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3683 Matcher alphaU = Pattern.compile("\\p{Alpha}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3684 Matcher digitU = Pattern.compile("\\p{Digit}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3685 Matcher alnumU = Pattern.compile("\\p{Alnum}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3686 Matcher punctU = Pattern.compile("\\p{Punct}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3687 Matcher graphU = Pattern.compile("\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3688 Matcher printU = Pattern.compile("\\p{Print}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3689 Matcher blankU = Pattern.compile("\\p{Blank}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3690 Matcher cntrlU = Pattern.compile("\\p{Cntrl}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3691 Matcher xdigitU = Pattern.compile("\\p{XDigit}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3692 Matcher spaceU = Pattern.compile("\\p{Space}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3693 Matcher boundU = Pattern.compile("\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3694 Matcher wordU = Pattern.compile("\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3695 // embedded flag (?U)
3696 Matcher lowerEU = Pattern.compile("(?U)\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3697 Matcher graphEU = Pattern.compile("(?U)\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3698 Matcher wordEU = Pattern.compile("(?U)\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3699
3700 Matcher bwb = Pattern.compile("\\b\\w\\b").matcher("");
3701 Matcher bwbU = Pattern.compile("\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3702 Matcher bwbEU = Pattern.compile("(?U)\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3703 // properties
3704 Matcher lowerP = Pattern.compile("\\p{IsLowerCase}").matcher("");
3705 Matcher upperP = Pattern.compile("\\p{IsUpperCase}").matcher("");
3706 Matcher titleP = Pattern.compile("\\p{IsTitleCase}").matcher("");
3707 Matcher letterP = Pattern.compile("\\p{IsLetter}").matcher("");
3708 Matcher alphaP = Pattern.compile("\\p{IsAlphabetic}").matcher("");
3709 Matcher ideogP = Pattern.compile("\\p{IsIdeographic}").matcher("");
3710 Matcher cntrlP = Pattern.compile("\\p{IsControl}").matcher("");
3711 Matcher spaceP = Pattern.compile("\\p{IsWhiteSpace}").matcher("");
3712 Matcher definedP = Pattern.compile("\\p{IsAssigned}").matcher("");
3713 Matcher nonCCPP = Pattern.compile("\\p{IsNoncharacterCodePoint}").matcher("");
3714
3715 // javaMethod
3716 Matcher lowerJ = Pattern.compile("\\p{javaLowerCase}").matcher("");
3717 Matcher upperJ = Pattern.compile("\\p{javaUpperCase}").matcher("");
3718 Matcher alphaJ = Pattern.compile("\\p{javaAlphabetic}").matcher("");
3719 Matcher ideogJ = Pattern.compile("\\p{javaIdeographic}").matcher("");
3720
3721 for (int cp = 1; cp < 0x30000; cp++) {
3722 String str = new String(Character.toChars(cp));
3723 int type = Character.getType(cp);
3724 if (// lower
3725 POSIX_ASCII.isLower(cp) != lower.reset(str).matches() ||
3726 Character.isLowerCase(cp) != lowerU.reset(str).matches() ||
3727 Character.isLowerCase(cp) != lowerP.reset(str).matches() ||
3728 Character.isLowerCase(cp) != lowerEU.reset(str).matches()||
3729 Character.isLowerCase(cp) != lowerJ.reset(str).matches()||
3730 // upper
3731 POSIX_ASCII.isUpper(cp) != upper.reset(str).matches() ||
3732 POSIX_Unicode.isUpper(cp) != upperU.reset(str).matches() ||
3733 Character.isUpperCase(cp) != upperP.reset(str).matches() ||
3734 Character.isUpperCase(cp) != upperJ.reset(str).matches() ||
3735 // alpha
3736 POSIX_ASCII.isAlpha(cp) != alpha.reset(str).matches() ||
3737 POSIX_Unicode.isAlpha(cp) != alphaU.reset(str).matches() ||
3738 Character.isAlphabetic(cp)!= alphaP.reset(str).matches() ||
3739 Character.isAlphabetic(cp)!= alphaJ.reset(str).matches() ||
3740 // digit
3741 POSIX_ASCII.isDigit(cp) != digit.reset(str).matches() ||
3742 Character.isDigit(cp) != digitU.reset(str).matches() ||
3743 // alnum
3744 POSIX_ASCII.isAlnum(cp) != alnum.reset(str).matches() ||
3745 POSIX_Unicode.isAlnum(cp) != alnumU.reset(str).matches() ||
3746 // punct
3747 POSIX_ASCII.isPunct(cp) != punct.reset(str).matches() ||
3748 POSIX_Unicode.isPunct(cp) != punctU.reset(str).matches() ||
3749 // graph
3750 POSIX_ASCII.isGraph(cp) != graph.reset(str).matches() ||
3751 POSIX_Unicode.isGraph(cp) != graphU.reset(str).matches() ||
3752 POSIX_Unicode.isGraph(cp) != graphEU.reset(str).matches()||
3753 // blank
3754 POSIX_ASCII.isType(cp, POSIX_ASCII.BLANK)
3755 != blank.reset(str).matches() ||
3756 POSIX_Unicode.isBlank(cp) != blankU.reset(str).matches() ||
3757 // print
3758 POSIX_ASCII.isPrint(cp) != print.reset(str).matches() ||
3759 POSIX_Unicode.isPrint(cp) != printU.reset(str).matches() ||
3760 // cntrl
3761 POSIX_ASCII.isCntrl(cp) != cntrl.reset(str).matches() ||
3762 POSIX_Unicode.isCntrl(cp) != cntrlU.reset(str).matches() ||
3763 (Character.CONTROL == type) != cntrlP.reset(str).matches() ||
3764 // hexdigit
3765 POSIX_ASCII.isHexDigit(cp) != xdigit.reset(str).matches() ||
3766 POSIX_Unicode.isHexDigit(cp) != xdigitU.reset(str).matches() ||
3767 // space
3768 POSIX_ASCII.isSpace(cp) != space.reset(str).matches() ||
3769 POSIX_Unicode.isSpace(cp) != spaceU.reset(str).matches() ||
3770 POSIX_Unicode.isSpace(cp) != spaceP.reset(str).matches() ||
3771 // word
3772 POSIX_ASCII.isWord(cp) != word.reset(str).matches() ||
3773 POSIX_Unicode.isWord(cp) != wordU.reset(str).matches() ||
3774 POSIX_Unicode.isWord(cp) != wordEU.reset(str).matches()||
3775 // bwordb
3776 POSIX_ASCII.isWord(cp) != bwb.reset(str).matches() ||
3777 POSIX_Unicode.isWord(cp) != bwbU.reset(str).matches() ||
3778 // properties
3779 Character.isTitleCase(cp) != titleP.reset(str).matches() ||
3780 Character.isLetter(cp) != letterP.reset(str).matches()||
3781 Character.isIdeographic(cp) != ideogP.reset(str).matches() ||
3782 Character.isIdeographic(cp) != ideogJ.reset(str).matches() ||
3783 (Character.UNASSIGNED == type) == definedP.reset(str).matches() ||
3784 POSIX_Unicode.isNoncharacterCodePoint(cp) != nonCCPP.reset(str).matches())
3785 failCount++;
3786 }
3787
3788 // bounds/word align
3789 twoFindIndexes(" \u0180sherman\u0400 ", bound, 1, 10);
3790 if (!bwbU.reset("\u0180sherman\u0400").matches())
3791 failCount++;
3792 twoFindIndexes(" \u0180sh\u0345erman\u0400 ", bound, 1, 11);
3793 if (!bwbU.reset("\u0180sh\u0345erman\u0400").matches())
3794 failCount++;
3795 twoFindIndexes(" \u0724\u0739\u0724 ", bound, 1, 4);
3796 if (!bwbU.reset("\u0724\u0739\u0724").matches())
3797 failCount++;
3798 if (!bwbEU.reset("\u0724\u0739\u0724").matches())
3799 failCount++;
3800 report("unicodePredefinedClasses");
3801 }
sherman0b4d42d2009-02-23 21:06:15 -08003802}