blob: 7846f5c05e384450b28cdeb47534cff3374f0e31 [file] [log] [blame]
sherman0b4d42d2009-02-23 21:06:15 -08001/*
ohairbf91ea12011-04-06 22:06:11 -07002 * Copyright (c) 1999, 2011, Oracle and/or its affiliates. All rights reserved.
sherman0b4d42d2009-02-23 21:06:15 -08003 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
ohair2283b9d2010-05-25 15:58:33 -07007 * published by the Free Software Foundation. Oracle designates this
sherman0b4d42d2009-02-23 21:06:15 -08008 * particular file as subject to the "Classpath" exception as provided
ohair2283b9d2010-05-25 15:58:33 -07009 * by Oracle in the LICENSE file that accompanied this code.
sherman0b4d42d2009-02-23 21:06:15 -080010 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
ohair2283b9d2010-05-25 15:58:33 -070021 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
sherman0b4d42d2009-02-23 21:06:15 -080024 */
25
26/**
27 * @test
28 * @summary tests RegExp framework
29 * @author Mike McCloskey
30 * @bug 4481568 4482696 4495089 4504687 4527731 4599621 4631553 4619345
31 * 4630911 4672616 4711773 4727935 4750573 4792284 4803197 4757029 4808962
32 * 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476
33 * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940
34 * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133
shermanb16229d2011-12-19 14:14:14 -080035 * 6350801 6676425 6878475 6919132 6931676 6948903 6990617 7014645 7039066
shermanecb65472012-05-08 10:57:13 -070036 * 7067045 7014640
sherman0b4d42d2009-02-23 21:06:15 -080037 */
38
39import java.util.regex.*;
40import java.util.Random;
41import java.io.*;
42import java.util.*;
43import java.nio.CharBuffer;
44
45/**
46 * This is a test class created to check the operation of
47 * the Pattern and Matcher classes.
48 */
49public class RegExTest {
50
51 private static Random generator = new Random();
52 private static boolean failure = false;
53 private static int failCount = 0;
shermanb16229d2011-12-19 14:14:14 -080054 private static String firstFailure = null;
sherman0b4d42d2009-02-23 21:06:15 -080055
56 /**
57 * Main to interpret arguments and run several tests.
58 *
59 */
60 public static void main(String[] args) throws Exception {
61 // Most of the tests are in a file
62 processFile("TestCases.txt");
63 //processFile("PerlCases.txt");
64 processFile("BMPTestCases.txt");
65 processFile("SupplementaryTestCases.txt");
66
67 // These test many randomly generated char patterns
68 bm();
69 slice();
70
71 // These are hard to put into the file
72 escapes();
73 blankInput();
74
75 // Substitition tests on randomly generated sequences
76 globalSubstitute();
77 stringbufferSubstitute();
78 substitutionBasher();
79
80 // Canonical Equivalence
81 ceTest();
82
83 // Anchors
84 anchorTest();
85
86 // boolean match calls
87 matchesTest();
88 lookingAtTest();
89
90 // Pattern API
91 patternMatchesTest();
92
93 // Misc
94 lookbehindTest();
95 nullArgumentTest();
96 backRefTest();
97 groupCaptureTest();
98 caretTest();
99 charClassTest();
100 emptyPatternTest();
101 findIntTest();
102 group0Test();
103 longPatternTest();
104 octalTest();
105 ampersandTest();
106 negationTest();
107 splitTest();
108 appendTest();
109 caseFoldingTest();
110 commentsTest();
111 unixLinesTest();
112 replaceFirstTest();
113 gTest();
114 zTest();
115 serializeTest();
116 reluctantRepetitionTest();
117 multilineDollarTest();
118 dollarAtEndTest();
119 caretBetweenTerminatorsTest();
120 // This RFE rejected in Tiger numOccurrencesTest();
121 javaCharClassTest();
122 nonCaptureRepetitionTest();
123 notCapturedGroupCurlyMatchTest();
124 escapedSegmentTest();
125 literalPatternTest();
126 literalReplacementTest();
127 regionTest();
128 toStringTest();
129 negatedCharClassTest();
130 findFromTest();
131 boundsTest();
132 unicodeWordBoundsTest();
133 caretAtEndTest();
134 wordSearchTest();
135 hitEndTest();
136 toMatchResultTest();
137 surrogatesInClassTest();
shermanb16229d2011-12-19 14:14:14 -0800138 removeQEQuotingTest();
sherman0b4d42d2009-02-23 21:06:15 -0800139 namedGroupCaptureTest();
sherman6782c962010-02-05 00:10:42 -0800140 nonBmpClassComplementTest();
shermancc01ef52010-05-18 15:36:47 -0700141 unicodePropertiesTest();
shermanf03c78b2011-02-03 13:49:25 -0800142 unicodeHexNotationTest();
sherman85bbd8b2011-04-28 20:48:36 -0700143 unicodeClassesTest();
shermanecb65472012-05-08 10:57:13 -0700144 horizontalAndVerticalWSTest();
145 linebreakTest();
shermanb16229d2011-12-19 14:14:14 -0800146 if (failure) {
147 throw new
148 RuntimeException("RegExTest failed, 1st failure: " +
149 firstFailure);
150 } else {
sherman0b4d42d2009-02-23 21:06:15 -0800151 System.err.println("OKAY: All tests passed.");
shermanb16229d2011-12-19 14:14:14 -0800152 }
sherman0b4d42d2009-02-23 21:06:15 -0800153 }
154
155 // Utility functions
156
157 private static String getRandomAlphaString(int length) {
158 StringBuffer buf = new StringBuffer(length);
159 for (int i=0; i<length; i++) {
160 char randChar = (char)(97 + generator.nextInt(26));
161 buf.append(randChar);
162 }
163 return buf.toString();
164 }
165
166 private static void check(Matcher m, String expected) {
167 m.find();
168 if (!m.group().equals(expected))
169 failCount++;
170 }
171
172 private static void check(Matcher m, String result, boolean expected) {
173 m.find();
shermanf03c78b2011-02-03 13:49:25 -0800174 if (m.group().equals(result) != expected)
175 failCount++;
sherman0b4d42d2009-02-23 21:06:15 -0800176 }
177
178 private static void check(Pattern p, String s, boolean expected) {
shermanf03c78b2011-02-03 13:49:25 -0800179 if (p.matcher(s).find() != expected)
180 failCount++;
181 }
182
183 private static void check(String p, String s, boolean expected) {
184 Matcher matcher = Pattern.compile(p).matcher(s);
185 if (matcher.find() != expected)
186 failCount++;
sherman0b4d42d2009-02-23 21:06:15 -0800187 }
188
189 private static void check(String p, char c, boolean expected) {
190 String propertyPattern = expected ? "\\p" + p : "\\P" + p;
191 Pattern pattern = Pattern.compile(propertyPattern);
192 char[] ca = new char[1]; ca[0] = c;
193 Matcher matcher = pattern.matcher(new String(ca));
194 if (!matcher.find())
195 failCount++;
196 }
197
198 private static void check(String p, int codePoint, boolean expected) {
199 String propertyPattern = expected ? "\\p" + p : "\\P" + p;
200 Pattern pattern = Pattern.compile(propertyPattern);
201 char[] ca = Character.toChars(codePoint);
202 Matcher matcher = pattern.matcher(new String(ca));
203 if (!matcher.find())
204 failCount++;
205 }
206
207 private static void check(String p, int flag, String input, String s,
208 boolean expected)
209 {
210 Pattern pattern = Pattern.compile(p, flag);
211 Matcher matcher = pattern.matcher(input);
212 if (expected)
213 check(matcher, s, expected);
214 else
215 check(pattern, input, false);
216 }
217
218 private static void report(String testName) {
219 int spacesToAdd = 30 - testName.length();
220 StringBuffer paddedNameBuffer = new StringBuffer(testName);
221 for (int i=0; i<spacesToAdd; i++)
222 paddedNameBuffer.append(" ");
223 String paddedName = paddedNameBuffer.toString();
224 System.err.println(paddedName + ": " +
225 (failCount==0 ? "Passed":"Failed("+failCount+")"));
shermanb16229d2011-12-19 14:14:14 -0800226 if (failCount > 0) {
sherman0b4d42d2009-02-23 21:06:15 -0800227 failure = true;
shermanb16229d2011-12-19 14:14:14 -0800228
229 if (firstFailure == null) {
230 firstFailure = testName;
231 }
232 }
233
sherman0b4d42d2009-02-23 21:06:15 -0800234 failCount = 0;
235 }
236
237 /**
238 * Converts ASCII alphabet characters [A-Za-z] in the given 's' to
239 * supplementary characters. This method does NOT fully take care
240 * of the regex syntax.
241 */
242 private static String toSupplementaries(String s) {
243 int length = s.length();
244 StringBuffer sb = new StringBuffer(length * 2);
245
246 for (int i = 0; i < length; ) {
247 char c = s.charAt(i++);
248 if (c == '\\') {
249 sb.append(c);
250 if (i < length) {
251 c = s.charAt(i++);
252 sb.append(c);
253 if (c == 'u') {
254 // assume no syntax error
255 sb.append(s.charAt(i++));
256 sb.append(s.charAt(i++));
257 sb.append(s.charAt(i++));
258 sb.append(s.charAt(i++));
259 }
260 }
261 } else if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) {
262 sb.append('\ud800').append((char)('\udc00'+c));
263 } else {
264 sb.append(c);
265 }
266 }
267 return sb.toString();
268 }
269
270 // Regular expression tests
271
272 // This is for bug 6178785
273 // Test if an expected NPE gets thrown when passing in a null argument
274 private static boolean check(Runnable test) {
275 try {
276 test.run();
277 failCount++;
278 return false;
279 } catch (NullPointerException npe) {
280 return true;
281 }
282 }
283
284 private static void nullArgumentTest() {
285 check(new Runnable() { public void run() { Pattern.compile(null); }});
286 check(new Runnable() { public void run() { Pattern.matches(null, null); }});
287 check(new Runnable() { public void run() { Pattern.matches("xyz", null);}});
288 check(new Runnable() { public void run() { Pattern.quote(null);}});
289 check(new Runnable() { public void run() { Pattern.compile("xyz").split(null);}});
290 check(new Runnable() { public void run() { Pattern.compile("xyz").matcher(null);}});
291
292 final Matcher m = Pattern.compile("xyz").matcher("xyz");
293 m.matches();
294 check(new Runnable() { public void run() { m.appendTail(null);}});
295 check(new Runnable() { public void run() { m.replaceAll(null);}});
296 check(new Runnable() { public void run() { m.replaceFirst(null);}});
297 check(new Runnable() { public void run() { m.appendReplacement(null, null);}});
298 check(new Runnable() { public void run() { m.reset(null);}});
299 check(new Runnable() { public void run() { Matcher.quoteReplacement(null);}});
300 //check(new Runnable() { public void run() { m.usePattern(null);}});
301
302 report("Null Argument");
303 }
304
305 // This is for bug6635133
306 // Test if surrogate pair in Unicode escapes can be handled correctly.
307 private static void surrogatesInClassTest() throws Exception {
308 Pattern pattern = Pattern.compile("[\\ud834\\udd21-\\ud834\\udd24]");
309 Matcher matcher = pattern.matcher("\ud834\udd22");
310 if (!matcher.find())
311 failCount++;
shermanb16229d2011-12-19 14:14:14 -0800312
313 report("Surrogate pair in Unicode escape");
314 }
315
316 // This is for bug6990617
317 // Test if Pattern.RemoveQEQuoting works correctly if the octal unicode
318 // char encoding is only 2 or 3 digits instead of 4 and the first quoted
319 // char is an octal digit.
320 private static void removeQEQuotingTest() throws Exception {
321 Pattern pattern =
322 Pattern.compile("\\011\\Q1sometext\\E\\011\\Q2sometext\\E");
323 Matcher matcher = pattern.matcher("\t1sometext\t2sometext");
324 if (!matcher.find())
325 failCount++;
326
327 report("Remove Q/E Quoting");
sherman0b4d42d2009-02-23 21:06:15 -0800328 }
329
330 // This is for bug 4988891
331 // Test toMatchResult to see that it is a copy of the Matcher
332 // that is not affected by subsequent operations on the original
333 private static void toMatchResultTest() throws Exception {
334 Pattern pattern = Pattern.compile("squid");
335 Matcher matcher = pattern.matcher(
336 "agiantsquidofdestinyasmallsquidoffate");
337 matcher.find();
338 int matcherStart1 = matcher.start();
339 MatchResult mr = matcher.toMatchResult();
340 if (mr == matcher)
341 failCount++;
342 int resultStart1 = mr.start();
343 if (matcherStart1 != resultStart1)
344 failCount++;
345 matcher.find();
346 int matcherStart2 = matcher.start();
347 int resultStart2 = mr.start();
348 if (matcherStart2 == resultStart2)
349 failCount++;
350 if (resultStart1 != resultStart2)
351 failCount++;
352 MatchResult mr2 = matcher.toMatchResult();
353 if (mr == mr2)
354 failCount++;
355 if (mr2.start() != matcherStart2)
356 failCount++;
357 report("toMatchResult is a copy");
358 }
359
360 // This is for bug 5013885
361 // Must test a slice to see if it reports hitEnd correctly
362 private static void hitEndTest() throws Exception {
363 // Basic test of Slice node
364 Pattern p = Pattern.compile("^squidattack");
365 Matcher m = p.matcher("squack");
366 m.find();
367 if (m.hitEnd())
368 failCount++;
369 m.reset("squid");
370 m.find();
371 if (!m.hitEnd())
372 failCount++;
373
374 // Test Slice, SliceA and SliceU nodes
375 for (int i=0; i<3; i++) {
376 int flags = 0;
377 if (i==1) flags = Pattern.CASE_INSENSITIVE;
378 if (i==2) flags = Pattern.UNICODE_CASE;
379 p = Pattern.compile("^abc", flags);
380 m = p.matcher("ad");
381 m.find();
382 if (m.hitEnd())
383 failCount++;
384 m.reset("ab");
385 m.find();
386 if (!m.hitEnd())
387 failCount++;
388 }
389
390 // Test Boyer-Moore node
391 p = Pattern.compile("catattack");
392 m = p.matcher("attack");
393 m.find();
394 if (!m.hitEnd())
395 failCount++;
396
397 p = Pattern.compile("catattack");
398 m = p.matcher("attackattackattackcatatta");
399 m.find();
400 if (!m.hitEnd())
401 failCount++;
sherman0b4d42d2009-02-23 21:06:15 -0800402 report("hitEnd from a Slice");
403 }
404
405 // This is for bug 4997476
406 // It is weird code submitted by customer demonstrating a regression
407 private static void wordSearchTest() throws Exception {
408 String testString = new String("word1 word2 word3");
409 Pattern p = Pattern.compile("\\b");
410 Matcher m = p.matcher(testString);
411 int position = 0;
412 int start = 0;
413 while (m.find(position)) {
414 start = m.start();
415 if (start == testString.length())
416 break;
417 if (m.find(start+1)) {
418 position = m.start();
419 } else {
420 position = testString.length();
421 }
422 if (testString.substring(start, position).equals(" "))
423 continue;
424 if (!testString.substring(start, position-1).startsWith("word"))
425 failCount++;
426 }
427 report("Customer word search");
428 }
429
430 // This is for bug 4994840
431 private static void caretAtEndTest() throws Exception {
432 // Problem only occurs with multiline patterns
433 // containing a beginning-of-line caret "^" followed
434 // by an expression that also matches the empty string.
435 Pattern pattern = Pattern.compile("^x?", Pattern.MULTILINE);
436 Matcher matcher = pattern.matcher("\r");
437 matcher.find();
438 matcher.find();
439 report("Caret at end");
440 }
441
442 // This test is for 4979006
443 // Check to see if word boundary construct properly handles unicode
444 // non spacing marks
445 private static void unicodeWordBoundsTest() throws Exception {
446 String spaces = " ";
447 String wordChar = "a";
448 String nsm = "\u030a";
449
450 assert (Character.getType('\u030a') == Character.NON_SPACING_MARK);
451
452 Pattern pattern = Pattern.compile("\\b");
453 Matcher matcher = pattern.matcher("");
454 // S=other B=word character N=non spacing mark .=word boundary
455 // SS.BB.SS
456 String input = spaces + wordChar + wordChar + spaces;
457 twoFindIndexes(input, matcher, 2, 4);
458 // SS.BBN.SS
459 input = spaces + wordChar +wordChar + nsm + spaces;
460 twoFindIndexes(input, matcher, 2, 5);
461 // SS.BN.SS
462 input = spaces + wordChar + nsm + spaces;
463 twoFindIndexes(input, matcher, 2, 4);
464 // SS.BNN.SS
465 input = spaces + wordChar + nsm + nsm + spaces;
466 twoFindIndexes(input, matcher, 2, 5);
467 // SSN.BB.SS
468 input = spaces + nsm + wordChar + wordChar + spaces;
469 twoFindIndexes(input, matcher, 3, 5);
470 // SS.BNB.SS
471 input = spaces + wordChar + nsm + wordChar + spaces;
472 twoFindIndexes(input, matcher, 2, 5);
473 // SSNNSS
474 input = spaces + nsm + nsm + spaces;
475 matcher.reset(input);
476 if (matcher.find())
477 failCount++;
478 // SSN.BBN.SS
479 input = spaces + nsm + wordChar + wordChar + nsm + spaces;
480 twoFindIndexes(input, matcher, 3, 6);
481
482 report("Unicode word boundary");
483 }
484
485 private static void twoFindIndexes(String input, Matcher matcher, int a,
486 int b) throws Exception
487 {
488 matcher.reset(input);
489 matcher.find();
490 if (matcher.start() != a)
491 failCount++;
492 matcher.find();
493 if (matcher.start() != b)
494 failCount++;
495 }
496
497 // This test is for 6284152
498 static void check(String regex, String input, String[] expected) {
499 List<String> result = new ArrayList<String>();
500 Pattern p = Pattern.compile(regex);
501 Matcher m = p.matcher(input);
502 while (m.find()) {
503 result.add(m.group());
504 }
505 if (!Arrays.asList(expected).equals(result))
506 failCount++;
507 }
508
509 private static void lookbehindTest() throws Exception {
510 //Positive
511 check("(?<=%.{0,5})foo\\d",
512 "%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5",
513 new String[]{"foo1", "foo2", "foo3"});
514
515 //boundary at end of the lookbehind sub-regex should work consistently
516 //with the boundary just after the lookbehind sub-regex
517 check("(?<=.*\\b)foo", "abcd foo", new String[]{"foo"});
518 check("(?<=.*)\\bfoo", "abcd foo", new String[]{"foo"});
519 check("(?<!abc )\\bfoo", "abc foo", new String[0]);
520 check("(?<!abc \\b)foo", "abc foo", new String[0]);
521
522 //Negative
523 check("(?<!%.{0,5})foo\\d",
524 "%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5",
525 new String[] {"foo4", "foo5"});
526
527 //Positive greedy
528 check("(?<=%b{1,4})foo", "%bbbbfoo", new String[] {"foo"});
529
530 //Positive reluctant
531 check("(?<=%b{1,4}?)foo", "%bbbbfoo", new String[] {"foo"});
532
533 //supplementary
534 check("(?<=%b{1,4})fo\ud800\udc00o", "%bbbbfo\ud800\udc00o",
535 new String[] {"fo\ud800\udc00o"});
536 check("(?<=%b{1,4}?)fo\ud800\udc00o", "%bbbbfo\ud800\udc00o",
537 new String[] {"fo\ud800\udc00o"});
538 check("(?<!%b{1,4})fo\ud800\udc00o", "%afo\ud800\udc00o",
539 new String[] {"fo\ud800\udc00o"});
540 check("(?<!%b{1,4}?)fo\ud800\udc00o", "%afo\ud800\udc00o",
541 new String[] {"fo\ud800\udc00o"});
542 report("Lookbehind");
543 }
544
545 // This test is for 4938995
546 // Check to see if weak region boundaries are transparent to
547 // lookahead and lookbehind constructs
548 private static void boundsTest() throws Exception {
549 String fullMessage = "catdogcat";
550 Pattern pattern = Pattern.compile("(?<=cat)dog(?=cat)");
551 Matcher matcher = pattern.matcher("catdogca");
552 matcher.useTransparentBounds(true);
553 if (matcher.find())
554 failCount++;
555 matcher.reset("atdogcat");
556 if (matcher.find())
557 failCount++;
558 matcher.reset(fullMessage);
559 if (!matcher.find())
560 failCount++;
561 matcher.reset(fullMessage);
562 matcher.region(0,9);
563 if (!matcher.find())
564 failCount++;
565 matcher.reset(fullMessage);
566 matcher.region(0,6);
567 if (!matcher.find())
568 failCount++;
569 matcher.reset(fullMessage);
570 matcher.region(3,6);
571 if (!matcher.find())
572 failCount++;
573 matcher.useTransparentBounds(false);
574 if (matcher.find())
575 failCount++;
576
577 // Negative lookahead/lookbehind
578 pattern = Pattern.compile("(?<!cat)dog(?!cat)");
579 matcher = pattern.matcher("dogcat");
580 matcher.useTransparentBounds(true);
581 matcher.region(0,3);
582 if (matcher.find())
583 failCount++;
584 matcher.reset("catdog");
585 matcher.region(3,6);
586 if (matcher.find())
587 failCount++;
588 matcher.useTransparentBounds(false);
589 matcher.reset("dogcat");
590 matcher.region(0,3);
591 if (!matcher.find())
592 failCount++;
593 matcher.reset("catdog");
594 matcher.region(3,6);
595 if (!matcher.find())
596 failCount++;
597
598 report("Region bounds transparency");
599 }
600
601 // This test is for 4945394
602 private static void findFromTest() throws Exception {
603 String message = "This is 40 $0 message.";
604 Pattern pat = Pattern.compile("\\$0");
605 Matcher match = pat.matcher(message);
606 if (!match.find())
607 failCount++;
608 if (match.find())
609 failCount++;
610 if (match.find())
611 failCount++;
612 report("Check for alternating find");
613 }
614
615 // This test is for 4872664 and 4892980
616 private static void negatedCharClassTest() throws Exception {
617 Pattern pattern = Pattern.compile("[^>]");
618 Matcher matcher = pattern.matcher("\u203A");
619 if (!matcher.matches())
620 failCount++;
621 pattern = Pattern.compile("[^fr]");
622 matcher = pattern.matcher("a");
623 if (!matcher.find())
624 failCount++;
625 matcher.reset("\u203A");
626 if (!matcher.find())
627 failCount++;
628 String s = "for";
629 String result[] = s.split("[^fr]");
630 if (!result[0].equals("f"))
631 failCount++;
632 if (!result[1].equals("r"))
633 failCount++;
634 s = "f\u203Ar";
635 result = s.split("[^fr]");
636 if (!result[0].equals("f"))
637 failCount++;
638 if (!result[1].equals("r"))
639 failCount++;
640
641 // Test adding to bits, subtracting a node, then adding to bits again
642 pattern = Pattern.compile("[^f\u203Ar]");
643 matcher = pattern.matcher("a");
644 if (!matcher.find())
645 failCount++;
646 matcher.reset("f");
647 if (matcher.find())
648 failCount++;
649 matcher.reset("\u203A");
650 if (matcher.find())
651 failCount++;
652 matcher.reset("r");
653 if (matcher.find())
654 failCount++;
655 matcher.reset("\u203B");
656 if (!matcher.find())
657 failCount++;
658
659 // Test subtracting a node, adding to bits, subtracting again
660 pattern = Pattern.compile("[^\u203Ar\u203B]");
661 matcher = pattern.matcher("a");
662 if (!matcher.find())
663 failCount++;
664 matcher.reset("\u203A");
665 if (matcher.find())
666 failCount++;
667 matcher.reset("r");
668 if (matcher.find())
669 failCount++;
670 matcher.reset("\u203B");
671 if (matcher.find())
672 failCount++;
673 matcher.reset("\u203C");
674 if (!matcher.find())
675 failCount++;
676
677 report("Negated Character Class");
678 }
679
680 // This test is for 4628291
681 private static void toStringTest() throws Exception {
682 Pattern pattern = Pattern.compile("b+");
683 if (pattern.toString() != "b+")
684 failCount++;
685 Matcher matcher = pattern.matcher("aaabbbccc");
686 String matcherString = matcher.toString(); // unspecified
687 matcher.find();
688 matcherString = matcher.toString(); // unspecified
689 matcher.region(0,3);
690 matcherString = matcher.toString(); // unspecified
691 matcher.reset();
692 matcherString = matcher.toString(); // unspecified
693 report("toString");
694 }
695
696 // This test is for 4808962
697 private static void literalPatternTest() throws Exception {
698 int flags = Pattern.LITERAL;
699
700 Pattern pattern = Pattern.compile("abc\\t$^", flags);
701 check(pattern, "abc\\t$^", true);
702
703 pattern = Pattern.compile(Pattern.quote("abc\\t$^"));
704 check(pattern, "abc\\t$^", true);
705
706 pattern = Pattern.compile("\\Qa^$bcabc\\E", flags);
707 check(pattern, "\\Qa^$bcabc\\E", true);
708 check(pattern, "a^$bcabc", false);
709
710 pattern = Pattern.compile("\\\\Q\\\\E");
711 check(pattern, "\\Q\\E", true);
712
713 pattern = Pattern.compile("\\Qabc\\Eefg\\\\Q\\\\Ehij");
714 check(pattern, "abcefg\\Q\\Ehij", true);
715
716 pattern = Pattern.compile("\\\\\\Q\\\\E");
717 check(pattern, "\\\\\\\\", true);
718
719 pattern = Pattern.compile(Pattern.quote("\\Qa^$bcabc\\E"));
720 check(pattern, "\\Qa^$bcabc\\E", true);
721 check(pattern, "a^$bcabc", false);
722
723 pattern = Pattern.compile(Pattern.quote("\\Qabc\\Edef"));
724 check(pattern, "\\Qabc\\Edef", true);
725 check(pattern, "abcdef", false);
726
727 pattern = Pattern.compile(Pattern.quote("abc\\Edef"));
728 check(pattern, "abc\\Edef", true);
729 check(pattern, "abcdef", false);
730
731 pattern = Pattern.compile(Pattern.quote("\\E"));
732 check(pattern, "\\E", true);
733
734 pattern = Pattern.compile("((((abc.+?:)", flags);
735 check(pattern, "((((abc.+?:)", true);
736
737 flags |= Pattern.MULTILINE;
738
739 pattern = Pattern.compile("^cat$", flags);
740 check(pattern, "abc^cat$def", true);
741 check(pattern, "cat", false);
742
743 flags |= Pattern.CASE_INSENSITIVE;
744
745 pattern = Pattern.compile("abcdef", flags);
746 check(pattern, "ABCDEF", true);
747 check(pattern, "AbCdEf", true);
748
749 flags |= Pattern.DOTALL;
750
751 pattern = Pattern.compile("a...b", flags);
752 check(pattern, "A...b", true);
753 check(pattern, "Axxxb", false);
754
755 flags |= Pattern.CANON_EQ;
756
757 Pattern p = Pattern.compile("testa\u030a", flags);
758 check(pattern, "testa\u030a", false);
759 check(pattern, "test\u00e5", false);
760
761 // Supplementary character test
762 flags = Pattern.LITERAL;
763
764 pattern = Pattern.compile(toSupplementaries("abc\\t$^"), flags);
765 check(pattern, toSupplementaries("abc\\t$^"), true);
766
767 pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\t$^")));
768 check(pattern, toSupplementaries("abc\\t$^"), true);
769
770 pattern = Pattern.compile(toSupplementaries("\\Qa^$bcabc\\E"), flags);
771 check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true);
772 check(pattern, toSupplementaries("a^$bcabc"), false);
773
774 pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qa^$bcabc\\E")));
775 check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true);
776 check(pattern, toSupplementaries("a^$bcabc"), false);
777
778 pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qabc\\Edef")));
779 check(pattern, toSupplementaries("\\Qabc\\Edef"), true);
780 check(pattern, toSupplementaries("abcdef"), false);
781
782 pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\Edef")));
783 check(pattern, toSupplementaries("abc\\Edef"), true);
784 check(pattern, toSupplementaries("abcdef"), false);
785
786 pattern = Pattern.compile(toSupplementaries("((((abc.+?:)"), flags);
787 check(pattern, toSupplementaries("((((abc.+?:)"), true);
788
789 flags |= Pattern.MULTILINE;
790
791 pattern = Pattern.compile(toSupplementaries("^cat$"), flags);
792 check(pattern, toSupplementaries("abc^cat$def"), true);
793 check(pattern, toSupplementaries("cat"), false);
794
795 flags |= Pattern.DOTALL;
796
797 // note: this is case-sensitive.
798 pattern = Pattern.compile(toSupplementaries("a...b"), flags);
799 check(pattern, toSupplementaries("a...b"), true);
800 check(pattern, toSupplementaries("axxxb"), false);
801
802 flags |= Pattern.CANON_EQ;
803
804 String t = toSupplementaries("test");
805 p = Pattern.compile(t + "a\u030a", flags);
806 check(pattern, t + "a\u030a", false);
807 check(pattern, t + "\u00e5", false);
808
809 report("Literal pattern");
810 }
811
812 // This test is for 4803179
813 // This test is also for 4808962, replacement parts
814 private static void literalReplacementTest() throws Exception {
815 int flags = Pattern.LITERAL;
816
817 Pattern pattern = Pattern.compile("abc", flags);
818 Matcher matcher = pattern.matcher("zzzabczzz");
819 String replaceTest = "$0";
820 String result = matcher.replaceAll(replaceTest);
821 if (!result.equals("zzzabczzz"))
822 failCount++;
823
824 matcher.reset();
825 String literalReplacement = matcher.quoteReplacement(replaceTest);
826 result = matcher.replaceAll(literalReplacement);
827 if (!result.equals("zzz$0zzz"))
828 failCount++;
829
830 matcher.reset();
831 replaceTest = "\\t$\\$";
832 literalReplacement = matcher.quoteReplacement(replaceTest);
833 result = matcher.replaceAll(literalReplacement);
834 if (!result.equals("zzz\\t$\\$zzz"))
835 failCount++;
836
837 // Supplementary character test
838 pattern = Pattern.compile(toSupplementaries("abc"), flags);
839 matcher = pattern.matcher(toSupplementaries("zzzabczzz"));
840 replaceTest = "$0";
841 result = matcher.replaceAll(replaceTest);
842 if (!result.equals(toSupplementaries("zzzabczzz")))
843 failCount++;
844
845 matcher.reset();
846 literalReplacement = matcher.quoteReplacement(replaceTest);
847 result = matcher.replaceAll(literalReplacement);
848 if (!result.equals(toSupplementaries("zzz$0zzz")))
849 failCount++;
850
851 matcher.reset();
852 replaceTest = "\\t$\\$";
853 literalReplacement = matcher.quoteReplacement(replaceTest);
854 result = matcher.replaceAll(literalReplacement);
855 if (!result.equals(toSupplementaries("zzz\\t$\\$zzz")))
856 failCount++;
857
sherman5c8f3492012-04-12 15:01:41 -0700858 // IAE should be thrown if backslash or '$' is the last character
859 // in replacement string
860 try {
861 "\uac00".replaceAll("\uac00", "$");
shermanecb65472012-05-08 10:57:13 -0700862 failCount++;
863 } catch (IllegalArgumentException iie) {
864 } catch (Exception e) {
865 failCount++;
866 }
867 try {
sherman5c8f3492012-04-12 15:01:41 -0700868 "\uac00".replaceAll("\uac00", "\\");
869 failCount++;
870 } catch (IllegalArgumentException iie) {
871 } catch (Exception e) {
872 failCount++;
873 }
sherman0b4d42d2009-02-23 21:06:15 -0800874 report("Literal replacement");
875 }
876
877 // This test is for 4757029
878 private static void regionTest() throws Exception {
879 Pattern pattern = Pattern.compile("abc");
880 Matcher matcher = pattern.matcher("abcdefabc");
881
882 matcher.region(0,9);
883 if (!matcher.find())
884 failCount++;
885 if (!matcher.find())
886 failCount++;
887 matcher.region(0,3);
888 if (!matcher.find())
889 failCount++;
890 matcher.region(3,6);
891 if (matcher.find())
892 failCount++;
893 matcher.region(0,2);
894 if (matcher.find())
895 failCount++;
896
897 expectRegionFail(matcher, 1, -1);
898 expectRegionFail(matcher, -1, -1);
899 expectRegionFail(matcher, -1, 1);
900 expectRegionFail(matcher, 5, 3);
901 expectRegionFail(matcher, 5, 12);
902 expectRegionFail(matcher, 12, 12);
903
904 pattern = Pattern.compile("^abc$");
905 matcher = pattern.matcher("zzzabczzz");
906 matcher.region(0,9);
907 if (matcher.find())
908 failCount++;
909 matcher.region(3,6);
910 if (!matcher.find())
911 failCount++;
912 matcher.region(3,6);
913 matcher.useAnchoringBounds(false);
914 if (matcher.find())
915 failCount++;
916
917 // Supplementary character test
918 pattern = Pattern.compile(toSupplementaries("abc"));
919 matcher = pattern.matcher(toSupplementaries("abcdefabc"));
920 matcher.region(0,9*2);
921 if (!matcher.find())
922 failCount++;
923 if (!matcher.find())
924 failCount++;
925 matcher.region(0,3*2);
926 if (!matcher.find())
927 failCount++;
928 matcher.region(1,3*2);
929 if (matcher.find())
930 failCount++;
931 matcher.region(3*2,6*2);
932 if (matcher.find())
933 failCount++;
934 matcher.region(0,2*2);
935 if (matcher.find())
936 failCount++;
937 matcher.region(0,2*2+1);
938 if (matcher.find())
939 failCount++;
940
941 expectRegionFail(matcher, 1*2, -1);
942 expectRegionFail(matcher, -1, -1);
943 expectRegionFail(matcher, -1, 1*2);
944 expectRegionFail(matcher, 5*2, 3*2);
945 expectRegionFail(matcher, 5*2, 12*2);
946 expectRegionFail(matcher, 12*2, 12*2);
947
948 pattern = Pattern.compile(toSupplementaries("^abc$"));
949 matcher = pattern.matcher(toSupplementaries("zzzabczzz"));
950 matcher.region(0,9*2);
951 if (matcher.find())
952 failCount++;
953 matcher.region(3*2,6*2);
954 if (!matcher.find())
955 failCount++;
956 matcher.region(3*2+1,6*2);
957 if (matcher.find())
958 failCount++;
959 matcher.region(3*2,6*2-1);
960 if (matcher.find())
961 failCount++;
962 matcher.region(3*2,6*2);
963 matcher.useAnchoringBounds(false);
964 if (matcher.find())
965 failCount++;
966 report("Regions");
967 }
968
969 private static void expectRegionFail(Matcher matcher, int index1,
970 int index2)
971 {
972 try {
973 matcher.region(index1, index2);
974 failCount++;
975 } catch (IndexOutOfBoundsException ioobe) {
976 // Correct result
977 } catch (IllegalStateException ise) {
978 // Correct result
979 }
980 }
981
982 // This test is for 4803197
983 private static void escapedSegmentTest() throws Exception {
984
985 Pattern pattern = Pattern.compile("\\Qdir1\\dir2\\E");
986 check(pattern, "dir1\\dir2", true);
987
988 pattern = Pattern.compile("\\Qdir1\\dir2\\\\E");
989 check(pattern, "dir1\\dir2\\", true);
990
991 pattern = Pattern.compile("(\\Qdir1\\dir2\\\\E)");
992 check(pattern, "dir1\\dir2\\", true);
993
994 // Supplementary character test
995 pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2\\E"));
996 check(pattern, toSupplementaries("dir1\\dir2"), true);
997
998 pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2")+"\\\\E");
999 check(pattern, toSupplementaries("dir1\\dir2\\"), true);
1000
1001 pattern = Pattern.compile(toSupplementaries("(\\Qdir1\\dir2")+"\\\\E)");
1002 check(pattern, toSupplementaries("dir1\\dir2\\"), true);
1003
1004 report("Escaped segment");
1005 }
1006
1007 // This test is for 4792284
1008 private static void nonCaptureRepetitionTest() throws Exception {
1009 String input = "abcdefgh;";
1010
1011 String[] patterns = new String[] {
1012 "(?:\\w{4})+;",
1013 "(?:\\w{8})*;",
1014 "(?:\\w{2}){2,4};",
1015 "(?:\\w{4}){2,};", // only matches the
1016 ".*?(?:\\w{5})+;", // specified minimum
1017 ".*?(?:\\w{9})*;", // number of reps - OK
1018 "(?:\\w{4})+?;", // lazy repetition - OK
1019 "(?:\\w{4})++;", // possessive repetition - OK
1020 "(?:\\w{2,}?)+;", // non-deterministic - OK
1021 "(\\w{4})+;", // capturing group - OK
1022 };
1023
1024 for (int i = 0; i < patterns.length; i++) {
1025 // Check find()
1026 check(patterns[i], 0, input, input, true);
1027 // Check matches()
1028 Pattern p = Pattern.compile(patterns[i]);
1029 Matcher m = p.matcher(input);
1030
1031 if (m.matches()) {
1032 if (!m.group(0).equals(input))
1033 failCount++;
1034 } else {
1035 failCount++;
1036 }
1037 }
1038
1039 report("Non capturing repetition");
1040 }
1041
1042 // This test is for 6358731
1043 private static void notCapturedGroupCurlyMatchTest() throws Exception {
1044 Pattern pattern = Pattern.compile("(abc)+|(abcd)+");
1045 Matcher matcher = pattern.matcher("abcd");
1046 if (!matcher.matches() ||
1047 matcher.group(1) != null ||
1048 !matcher.group(2).equals("abcd")) {
1049 failCount++;
1050 }
1051 report("Not captured GroupCurly");
1052 }
1053
1054 // This test is for 4706545
1055 private static void javaCharClassTest() throws Exception {
1056 for (int i=0; i<1000; i++) {
1057 char c = (char)generator.nextInt();
1058 check("{javaLowerCase}", c, Character.isLowerCase(c));
1059 check("{javaUpperCase}", c, Character.isUpperCase(c));
1060 check("{javaUpperCase}+", c, Character.isUpperCase(c));
1061 check("{javaTitleCase}", c, Character.isTitleCase(c));
1062 check("{javaDigit}", c, Character.isDigit(c));
1063 check("{javaDefined}", c, Character.isDefined(c));
1064 check("{javaLetter}", c, Character.isLetter(c));
1065 check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c));
1066 check("{javaJavaIdentifierStart}", c,
1067 Character.isJavaIdentifierStart(c));
1068 check("{javaJavaIdentifierPart}", c,
1069 Character.isJavaIdentifierPart(c));
1070 check("{javaUnicodeIdentifierStart}", c,
1071 Character.isUnicodeIdentifierStart(c));
1072 check("{javaUnicodeIdentifierPart}", c,
1073 Character.isUnicodeIdentifierPart(c));
1074 check("{javaIdentifierIgnorable}", c,
1075 Character.isIdentifierIgnorable(c));
1076 check("{javaSpaceChar}", c, Character.isSpaceChar(c));
1077 check("{javaWhitespace}", c, Character.isWhitespace(c));
1078 check("{javaISOControl}", c, Character.isISOControl(c));
1079 check("{javaMirrored}", c, Character.isMirrored(c));
1080
1081 }
1082
1083 // Supplementary character test
1084 for (int i=0; i<1000; i++) {
1085 int c = generator.nextInt(Character.MAX_CODE_POINT
1086 - Character.MIN_SUPPLEMENTARY_CODE_POINT)
1087 + Character.MIN_SUPPLEMENTARY_CODE_POINT;
1088 check("{javaLowerCase}", c, Character.isLowerCase(c));
1089 check("{javaUpperCase}", c, Character.isUpperCase(c));
1090 check("{javaUpperCase}+", c, Character.isUpperCase(c));
1091 check("{javaTitleCase}", c, Character.isTitleCase(c));
1092 check("{javaDigit}", c, Character.isDigit(c));
1093 check("{javaDefined}", c, Character.isDefined(c));
1094 check("{javaLetter}", c, Character.isLetter(c));
1095 check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c));
1096 check("{javaJavaIdentifierStart}", c,
1097 Character.isJavaIdentifierStart(c));
1098 check("{javaJavaIdentifierPart}", c,
1099 Character.isJavaIdentifierPart(c));
1100 check("{javaUnicodeIdentifierStart}", c,
1101 Character.isUnicodeIdentifierStart(c));
1102 check("{javaUnicodeIdentifierPart}", c,
1103 Character.isUnicodeIdentifierPart(c));
1104 check("{javaIdentifierIgnorable}", c,
1105 Character.isIdentifierIgnorable(c));
1106 check("{javaSpaceChar}", c, Character.isSpaceChar(c));
1107 check("{javaWhitespace}", c, Character.isWhitespace(c));
1108 check("{javaISOControl}", c, Character.isISOControl(c));
1109 check("{javaMirrored}", c, Character.isMirrored(c));
1110 }
1111
1112 report("Java character classes");
1113 }
1114
1115 // This test is for 4523620
1116 /*
1117 private static void numOccurrencesTest() throws Exception {
1118 Pattern pattern = Pattern.compile("aaa");
1119
1120 if (pattern.numOccurrences("aaaaaa", false) != 2)
1121 failCount++;
1122 if (pattern.numOccurrences("aaaaaa", true) != 4)
1123 failCount++;
1124
1125 pattern = Pattern.compile("^");
1126 if (pattern.numOccurrences("aaaaaa", false) != 1)
1127 failCount++;
1128 if (pattern.numOccurrences("aaaaaa", true) != 1)
1129 failCount++;
1130
1131 report("Number of Occurrences");
1132 }
1133 */
1134
1135 // This test is for 4776374
1136 private static void caretBetweenTerminatorsTest() throws Exception {
1137 int flags1 = Pattern.DOTALL;
1138 int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES;
1139 int flags3 = Pattern.DOTALL | Pattern.UNIX_LINES | Pattern.MULTILINE;
1140 int flags4 = Pattern.DOTALL | Pattern.MULTILINE;
1141
1142 check("^....", flags1, "test\ntest", "test", true);
1143 check(".....^", flags1, "test\ntest", "test", false);
1144 check(".....^", flags1, "test\n", "test", false);
1145 check("....^", flags1, "test\r\n", "test", false);
1146
1147 check("^....", flags2, "test\ntest", "test", true);
1148 check("....^", flags2, "test\ntest", "test", false);
1149 check(".....^", flags2, "test\n", "test", false);
1150 check("....^", flags2, "test\r\n", "test", false);
1151
1152 check("^....", flags3, "test\ntest", "test", true);
1153 check(".....^", flags3, "test\ntest", "test\n", true);
1154 check(".....^", flags3, "test\u0085test", "test\u0085", false);
1155 check(".....^", flags3, "test\n", "test", false);
1156 check(".....^", flags3, "test\r\n", "test", false);
1157 check("......^", flags3, "test\r\ntest", "test\r\n", true);
1158
1159 check("^....", flags4, "test\ntest", "test", true);
1160 check(".....^", flags3, "test\ntest", "test\n", true);
1161 check(".....^", flags4, "test\u0085test", "test\u0085", true);
1162 check(".....^", flags4, "test\n", "test\n", false);
1163 check(".....^", flags4, "test\r\n", "test\r", false);
1164
1165 // Supplementary character test
1166 String t = toSupplementaries("test");
1167 check("^....", flags1, t+"\n"+t, t, true);
1168 check(".....^", flags1, t+"\n"+t, t, false);
1169 check(".....^", flags1, t+"\n", t, false);
1170 check("....^", flags1, t+"\r\n", t, false);
1171
1172 check("^....", flags2, t+"\n"+t, t, true);
1173 check("....^", flags2, t+"\n"+t, t, false);
1174 check(".....^", flags2, t+"\n", t, false);
1175 check("....^", flags2, t+"\r\n", t, false);
1176
1177 check("^....", flags3, t+"\n"+t, t, true);
1178 check(".....^", flags3, t+"\n"+t, t+"\n", true);
1179 check(".....^", flags3, t+"\u0085"+t, t+"\u0085", false);
1180 check(".....^", flags3, t+"\n", t, false);
1181 check(".....^", flags3, t+"\r\n", t, false);
1182 check("......^", flags3, t+"\r\n"+t, t+"\r\n", true);
1183
1184 check("^....", flags4, t+"\n"+t, t, true);
1185 check(".....^", flags3, t+"\n"+t, t+"\n", true);
1186 check(".....^", flags4, t+"\u0085"+t, t+"\u0085", true);
1187 check(".....^", flags4, t+"\n", t+"\n", false);
1188 check(".....^", flags4, t+"\r\n", t+"\r", false);
1189
1190 report("Caret between terminators");
1191 }
1192
1193 // This test is for 4727935
1194 private static void dollarAtEndTest() throws Exception {
1195 int flags1 = Pattern.DOTALL;
1196 int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES;
1197 int flags3 = Pattern.DOTALL | Pattern.MULTILINE;
1198
1199 check("....$", flags1, "test\n", "test", true);
1200 check("....$", flags1, "test\r\n", "test", true);
1201 check(".....$", flags1, "test\n", "test\n", true);
1202 check(".....$", flags1, "test\u0085", "test\u0085", true);
1203 check("....$", flags1, "test\u0085", "test", true);
1204
1205 check("....$", flags2, "test\n", "test", true);
1206 check(".....$", flags2, "test\n", "test\n", true);
1207 check(".....$", flags2, "test\u0085", "test\u0085", true);
1208 check("....$", flags2, "test\u0085", "est\u0085", true);
1209
1210 check("....$.blah", flags3, "test\nblah", "test\nblah", true);
1211 check(".....$.blah", flags3, "test\n\nblah", "test\n\nblah", true);
1212 check("....$blah", flags3, "test\nblah", "!!!!", false);
1213 check(".....$blah", flags3, "test\nblah", "!!!!", false);
1214
1215 // Supplementary character test
1216 String t = toSupplementaries("test");
1217 String b = toSupplementaries("blah");
1218 check("....$", flags1, t+"\n", t, true);
1219 check("....$", flags1, t+"\r\n", t, true);
1220 check(".....$", flags1, t+"\n", t+"\n", true);
1221 check(".....$", flags1, t+"\u0085", t+"\u0085", true);
1222 check("....$", flags1, t+"\u0085", t, true);
1223
1224 check("....$", flags2, t+"\n", t, true);
1225 check(".....$", flags2, t+"\n", t+"\n", true);
1226 check(".....$", flags2, t+"\u0085", t+"\u0085", true);
1227 check("....$", flags2, t+"\u0085", toSupplementaries("est\u0085"), true);
1228
1229 check("....$."+b, flags3, t+"\n"+b, t+"\n"+b, true);
1230 check(".....$."+b, flags3, t+"\n\n"+b, t+"\n\n"+b, true);
1231 check("....$"+b, flags3, t+"\n"+b, "!!!!", false);
1232 check(".....$"+b, flags3, t+"\n"+b, "!!!!", false);
1233
1234 report("Dollar at End");
1235 }
1236
1237 // This test is for 4711773
1238 private static void multilineDollarTest() throws Exception {
1239 Pattern findCR = Pattern.compile("$", Pattern.MULTILINE);
1240 Matcher matcher = findCR.matcher("first bit\nsecond bit");
1241 matcher.find();
1242 if (matcher.start(0) != 9)
1243 failCount++;
1244 matcher.find();
1245 if (matcher.start(0) != 20)
1246 failCount++;
1247
1248 // Supplementary character test
1249 matcher = findCR.matcher(toSupplementaries("first bit\n second bit")); // double BMP chars
1250 matcher.find();
1251 if (matcher.start(0) != 9*2)
1252 failCount++;
1253 matcher.find();
1254 if (matcher.start(0) != 20*2)
1255 failCount++;
1256
1257 report("Multiline Dollar");
1258 }
1259
1260 private static void reluctantRepetitionTest() throws Exception {
1261 Pattern p = Pattern.compile("1(\\s\\S+?){1,3}?[\\s,]2");
1262 check(p, "1 word word word 2", true);
1263 check(p, "1 wor wo w 2", true);
1264 check(p, "1 word word 2", true);
1265 check(p, "1 word 2", true);
1266 check(p, "1 wo w w 2", true);
1267 check(p, "1 wo w 2", true);
1268 check(p, "1 wor w 2", true);
1269
1270 p = Pattern.compile("([a-z])+?c");
1271 Matcher m = p.matcher("ababcdefdec");
1272 check(m, "ababc");
1273
1274 // Supplementary character test
1275 p = Pattern.compile(toSupplementaries("([a-z])+?c"));
1276 m = p.matcher(toSupplementaries("ababcdefdec"));
1277 check(m, toSupplementaries("ababc"));
1278
1279 report("Reluctant Repetition");
1280 }
1281
1282 private static void serializeTest() throws Exception {
1283 String patternStr = "(b)";
1284 String matchStr = "b";
1285 Pattern pattern = Pattern.compile(patternStr);
1286 ByteArrayOutputStream baos = new ByteArrayOutputStream();
1287 ObjectOutputStream oos = new ObjectOutputStream(baos);
1288 oos.writeObject(pattern);
1289 oos.close();
1290 ObjectInputStream ois = new ObjectInputStream(
1291 new ByteArrayInputStream(baos.toByteArray()));
1292 Pattern serializedPattern = (Pattern)ois.readObject();
1293 ois.close();
1294 Matcher matcher = serializedPattern.matcher(matchStr);
1295 if (!matcher.matches())
1296 failCount++;
1297 if (matcher.groupCount() != 1)
1298 failCount++;
1299
1300 report("Serialization");
1301 }
1302
1303 private static void gTest() {
1304 Pattern pattern = Pattern.compile("\\G\\w");
1305 Matcher matcher = pattern.matcher("abc#x#x");
1306 matcher.find();
1307 matcher.find();
1308 matcher.find();
1309 if (matcher.find())
1310 failCount++;
1311
1312 pattern = Pattern.compile("\\GA*");
1313 matcher = pattern.matcher("1A2AA3");
1314 matcher.find();
1315 if (matcher.find())
1316 failCount++;
1317
1318 pattern = Pattern.compile("\\GA*");
1319 matcher = pattern.matcher("1A2AA3");
1320 if (!matcher.find(1))
1321 failCount++;
1322 matcher.find();
1323 if (matcher.find())
1324 failCount++;
1325
1326 report("\\G");
1327 }
1328
1329 private static void zTest() {
1330 Pattern pattern = Pattern.compile("foo\\Z");
1331 // Positives
1332 check(pattern, "foo\u0085", true);
1333 check(pattern, "foo\u2028", true);
1334 check(pattern, "foo\u2029", true);
1335 check(pattern, "foo\n", true);
1336 check(pattern, "foo\r", true);
1337 check(pattern, "foo\r\n", true);
1338 // Negatives
1339 check(pattern, "fooo", false);
1340 check(pattern, "foo\n\r", false);
1341
1342 pattern = Pattern.compile("foo\\Z", Pattern.UNIX_LINES);
1343 // Positives
1344 check(pattern, "foo", true);
1345 check(pattern, "foo\n", true);
1346 // Negatives
1347 check(pattern, "foo\r", false);
1348 check(pattern, "foo\u0085", false);
1349 check(pattern, "foo\u2028", false);
1350 check(pattern, "foo\u2029", false);
1351
1352 report("\\Z");
1353 }
1354
1355 private static void replaceFirstTest() {
1356 Pattern pattern = Pattern.compile("(ab)(c*)");
1357 Matcher matcher = pattern.matcher("abccczzzabcczzzabccc");
1358 if (!matcher.replaceFirst("test").equals("testzzzabcczzzabccc"))
1359 failCount++;
1360
1361 matcher.reset("zzzabccczzzabcczzzabccczzz");
1362 if (!matcher.replaceFirst("test").equals("zzztestzzzabcczzzabccczzz"))
1363 failCount++;
1364
1365 matcher.reset("zzzabccczzzabcczzzabccczzz");
1366 String result = matcher.replaceFirst("$1");
1367 if (!result.equals("zzzabzzzabcczzzabccczzz"))
1368 failCount++;
1369
1370 matcher.reset("zzzabccczzzabcczzzabccczzz");
1371 result = matcher.replaceFirst("$2");
1372 if (!result.equals("zzzccczzzabcczzzabccczzz"))
1373 failCount++;
1374
1375 pattern = Pattern.compile("a*");
1376 matcher = pattern.matcher("aaaaaaaaaa");
1377 if (!matcher.replaceFirst("test").equals("test"))
1378 failCount++;
1379
1380 pattern = Pattern.compile("a+");
1381 matcher = pattern.matcher("zzzaaaaaaaaaa");
1382 if (!matcher.replaceFirst("test").equals("zzztest"))
1383 failCount++;
1384
1385 // Supplementary character test
1386 pattern = Pattern.compile(toSupplementaries("(ab)(c*)"));
1387 matcher = pattern.matcher(toSupplementaries("abccczzzabcczzzabccc"));
1388 if (!matcher.replaceFirst(toSupplementaries("test"))
1389 .equals(toSupplementaries("testzzzabcczzzabccc")))
1390 failCount++;
1391
1392 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
1393 if (!matcher.replaceFirst(toSupplementaries("test")).
1394 equals(toSupplementaries("zzztestzzzabcczzzabccczzz")))
1395 failCount++;
1396
1397 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
1398 result = matcher.replaceFirst("$1");
1399 if (!result.equals(toSupplementaries("zzzabzzzabcczzzabccczzz")))
1400 failCount++;
1401
1402 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
1403 result = matcher.replaceFirst("$2");
1404 if (!result.equals(toSupplementaries("zzzccczzzabcczzzabccczzz")))
1405 failCount++;
1406
1407 pattern = Pattern.compile(toSupplementaries("a*"));
1408 matcher = pattern.matcher(toSupplementaries("aaaaaaaaaa"));
1409 if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("test")))
1410 failCount++;
1411
1412 pattern = Pattern.compile(toSupplementaries("a+"));
1413 matcher = pattern.matcher(toSupplementaries("zzzaaaaaaaaaa"));
1414 if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("zzztest")))
1415 failCount++;
1416
1417 report("Replace First");
1418 }
1419
1420 private static void unixLinesTest() {
1421 Pattern pattern = Pattern.compile(".*");
1422 Matcher matcher = pattern.matcher("aa\u2028blah");
1423 matcher.find();
1424 if (!matcher.group(0).equals("aa"))
1425 failCount++;
1426
1427 pattern = Pattern.compile(".*", Pattern.UNIX_LINES);
1428 matcher = pattern.matcher("aa\u2028blah");
1429 matcher.find();
1430 if (!matcher.group(0).equals("aa\u2028blah"))
1431 failCount++;
1432
1433 pattern = Pattern.compile("[az]$",
1434 Pattern.MULTILINE | Pattern.UNIX_LINES);
1435 matcher = pattern.matcher("aa\u2028zz");
1436 check(matcher, "a\u2028", false);
1437
1438 // Supplementary character test
1439 pattern = Pattern.compile(".*");
1440 matcher = pattern.matcher(toSupplementaries("aa\u2028blah"));
1441 matcher.find();
1442 if (!matcher.group(0).equals(toSupplementaries("aa")))
1443 failCount++;
1444
1445 pattern = Pattern.compile(".*", Pattern.UNIX_LINES);
1446 matcher = pattern.matcher(toSupplementaries("aa\u2028blah"));
1447 matcher.find();
1448 if (!matcher.group(0).equals(toSupplementaries("aa\u2028blah")))
1449 failCount++;
1450
1451 pattern = Pattern.compile(toSupplementaries("[az]$"),
1452 Pattern.MULTILINE | Pattern.UNIX_LINES);
1453 matcher = pattern.matcher(toSupplementaries("aa\u2028zz"));
1454 check(matcher, toSupplementaries("a\u2028"), false);
1455
1456 report("Unix Lines");
1457 }
1458
1459 private static void commentsTest() {
1460 int flags = Pattern.COMMENTS;
1461
1462 Pattern pattern = Pattern.compile("aa \\# aa", flags);
1463 Matcher matcher = pattern.matcher("aa#aa");
1464 if (!matcher.matches())
1465 failCount++;
1466
1467 pattern = Pattern.compile("aa # blah", flags);
1468 matcher = pattern.matcher("aa");
1469 if (!matcher.matches())
1470 failCount++;
1471
1472 pattern = Pattern.compile("aa blah", flags);
1473 matcher = pattern.matcher("aablah");
1474 if (!matcher.matches())
1475 failCount++;
1476
1477 pattern = Pattern.compile("aa # blah blech ", flags);
1478 matcher = pattern.matcher("aa");
1479 if (!matcher.matches())
1480 failCount++;
1481
1482 pattern = Pattern.compile("aa # blah\n ", flags);
1483 matcher = pattern.matcher("aa");
1484 if (!matcher.matches())
1485 failCount++;
1486
1487 pattern = Pattern.compile("aa # blah\nbc # blech", flags);
1488 matcher = pattern.matcher("aabc");
1489 if (!matcher.matches())
1490 failCount++;
1491
1492 pattern = Pattern.compile("aa # blah\nbc# blech", flags);
1493 matcher = pattern.matcher("aabc");
1494 if (!matcher.matches())
1495 failCount++;
1496
1497 pattern = Pattern.compile("aa # blah\nbc\\# blech", flags);
1498 matcher = pattern.matcher("aabc#blech");
1499 if (!matcher.matches())
1500 failCount++;
1501
1502 // Supplementary character test
1503 pattern = Pattern.compile(toSupplementaries("aa \\# aa"), flags);
1504 matcher = pattern.matcher(toSupplementaries("aa#aa"));
1505 if (!matcher.matches())
1506 failCount++;
1507
1508 pattern = Pattern.compile(toSupplementaries("aa # blah"), flags);
1509 matcher = pattern.matcher(toSupplementaries("aa"));
1510 if (!matcher.matches())
1511 failCount++;
1512
1513 pattern = Pattern.compile(toSupplementaries("aa blah"), flags);
1514 matcher = pattern.matcher(toSupplementaries("aablah"));
1515 if (!matcher.matches())
1516 failCount++;
1517
1518 pattern = Pattern.compile(toSupplementaries("aa # blah blech "), flags);
1519 matcher = pattern.matcher(toSupplementaries("aa"));
1520 if (!matcher.matches())
1521 failCount++;
1522
1523 pattern = Pattern.compile(toSupplementaries("aa # blah\n "), flags);
1524 matcher = pattern.matcher(toSupplementaries("aa"));
1525 if (!matcher.matches())
1526 failCount++;
1527
1528 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc # blech"), flags);
1529 matcher = pattern.matcher(toSupplementaries("aabc"));
1530 if (!matcher.matches())
1531 failCount++;
1532
1533 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc# blech"), flags);
1534 matcher = pattern.matcher(toSupplementaries("aabc"));
1535 if (!matcher.matches())
1536 failCount++;
1537
1538 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc\\# blech"), flags);
1539 matcher = pattern.matcher(toSupplementaries("aabc#blech"));
1540 if (!matcher.matches())
1541 failCount++;
1542
1543 report("Comments");
1544 }
1545
1546 private static void caseFoldingTest() { // bug 4504687
1547 int flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE;
1548 Pattern pattern = Pattern.compile("aa", flags);
1549 Matcher matcher = pattern.matcher("ab");
1550 if (matcher.matches())
1551 failCount++;
1552
1553 pattern = Pattern.compile("aA", flags);
1554 matcher = pattern.matcher("ab");
1555 if (matcher.matches())
1556 failCount++;
1557
1558 pattern = Pattern.compile("aa", flags);
1559 matcher = pattern.matcher("aB");
1560 if (matcher.matches())
1561 failCount++;
1562 matcher = pattern.matcher("Ab");
1563 if (matcher.matches())
1564 failCount++;
1565
1566 // ASCII "a"
1567 // Latin-1 Supplement "a" + grave
1568 // Cyrillic "a"
1569 String[] patterns = new String[] {
1570 //single
1571 "a", "\u00e0", "\u0430",
1572 //slice
1573 "ab", "\u00e0\u00e1", "\u0430\u0431",
1574 //class single
1575 "[a]", "[\u00e0]", "[\u0430]",
1576 //class range
1577 "[a-b]", "[\u00e0-\u00e5]", "[\u0430-\u0431]",
1578 //back reference
1579 "(a)\\1", "(\u00e0)\\1", "(\u0430)\\1"
1580 };
1581
1582 String[] texts = new String[] {
1583 "A", "\u00c0", "\u0410",
1584 "AB", "\u00c0\u00c1", "\u0410\u0411",
1585 "A", "\u00c0", "\u0410",
1586 "B", "\u00c2", "\u0411",
1587 "aA", "\u00e0\u00c0", "\u0430\u0410"
1588 };
1589
1590 boolean[] expected = new boolean[] {
1591 true, false, false,
1592 true, false, false,
1593 true, false, false,
1594 true, false, false,
1595 true, false, false
1596 };
1597
1598 flags = Pattern.CASE_INSENSITIVE;
1599 for (int i = 0; i < patterns.length; i++) {
1600 pattern = Pattern.compile(patterns[i], flags);
1601 matcher = pattern.matcher(texts[i]);
1602 if (matcher.matches() != expected[i]) {
1603 System.out.println("<1> Failed at " + i);
1604 failCount++;
1605 }
1606 }
1607
1608 flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE;
1609 for (int i = 0; i < patterns.length; i++) {
1610 pattern = Pattern.compile(patterns[i], flags);
1611 matcher = pattern.matcher(texts[i]);
1612 if (!matcher.matches()) {
1613 System.out.println("<2> Failed at " + i);
1614 failCount++;
1615 }
1616 }
1617 // flag unicode_case alone should do nothing
1618 flags = Pattern.UNICODE_CASE;
1619 for (int i = 0; i < patterns.length; i++) {
1620 pattern = Pattern.compile(patterns[i], flags);
1621 matcher = pattern.matcher(texts[i]);
1622 if (matcher.matches()) {
1623 System.out.println("<3> Failed at " + i);
1624 failCount++;
1625 }
1626 }
1627
1628 // Special cases: i, I, u+0131 and u+0130
1629 flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE;
1630 pattern = Pattern.compile("[h-j]+", flags);
1631 if (!pattern.matcher("\u0131\u0130").matches())
1632 failCount++;
1633 report("Case Folding");
1634 }
1635
1636 private static void appendTest() {
1637 Pattern pattern = Pattern.compile("(ab)(cd)");
1638 Matcher matcher = pattern.matcher("abcd");
1639 String result = matcher.replaceAll("$2$1");
1640 if (!result.equals("cdab"))
1641 failCount++;
1642
1643 String s1 = "Swap all: first = 123, second = 456";
1644 String s2 = "Swap one: first = 123, second = 456";
1645 String r = "$3$2$1";
1646 pattern = Pattern.compile("([a-z]+)( *= *)([0-9]+)");
1647 matcher = pattern.matcher(s1);
1648
1649 result = matcher.replaceAll(r);
1650 if (!result.equals("Swap all: 123 = first, 456 = second"))
1651 failCount++;
1652
1653 matcher = pattern.matcher(s2);
1654
1655 if (matcher.find()) {
1656 StringBuffer sb = new StringBuffer();
1657 matcher.appendReplacement(sb, r);
1658 matcher.appendTail(sb);
1659 result = sb.toString();
1660 if (!result.equals("Swap one: 123 = first, second = 456"))
1661 failCount++;
1662 }
1663
1664 // Supplementary character test
1665 pattern = Pattern.compile(toSupplementaries("(ab)(cd)"));
1666 matcher = pattern.matcher(toSupplementaries("abcd"));
1667 result = matcher.replaceAll("$2$1");
1668 if (!result.equals(toSupplementaries("cdab")))
1669 failCount++;
1670
1671 s1 = toSupplementaries("Swap all: first = 123, second = 456");
1672 s2 = toSupplementaries("Swap one: first = 123, second = 456");
1673 r = toSupplementaries("$3$2$1");
1674 pattern = Pattern.compile(toSupplementaries("([a-z]+)( *= *)([0-9]+)"));
1675 matcher = pattern.matcher(s1);
1676
1677 result = matcher.replaceAll(r);
1678 if (!result.equals(toSupplementaries("Swap all: 123 = first, 456 = second")))
1679 failCount++;
1680
1681 matcher = pattern.matcher(s2);
1682
1683 if (matcher.find()) {
1684 StringBuffer sb = new StringBuffer();
1685 matcher.appendReplacement(sb, r);
1686 matcher.appendTail(sb);
1687 result = sb.toString();
1688 if (!result.equals(toSupplementaries("Swap one: 123 = first, second = 456")))
1689 failCount++;
1690 }
1691 report("Append");
1692 }
1693
1694 private static void splitTest() {
1695 Pattern pattern = Pattern.compile(":");
1696 String[] result = pattern.split("foo:and:boo", 2);
1697 if (!result[0].equals("foo"))
1698 failCount++;
1699 if (!result[1].equals("and:boo"))
1700 failCount++;
1701 // Supplementary character test
1702 Pattern patternX = Pattern.compile(toSupplementaries("X"));
1703 result = patternX.split(toSupplementaries("fooXandXboo"), 2);
1704 if (!result[0].equals(toSupplementaries("foo")))
1705 failCount++;
1706 if (!result[1].equals(toSupplementaries("andXboo")))
1707 failCount++;
1708
1709 CharBuffer cb = CharBuffer.allocate(100);
1710 cb.put("foo:and:boo");
1711 cb.flip();
1712 result = pattern.split(cb);
1713 if (!result[0].equals("foo"))
1714 failCount++;
1715 if (!result[1].equals("and"))
1716 failCount++;
1717 if (!result[2].equals("boo"))
1718 failCount++;
1719
1720 // Supplementary character test
1721 CharBuffer cbs = CharBuffer.allocate(100);
1722 cbs.put(toSupplementaries("fooXandXboo"));
1723 cbs.flip();
1724 result = patternX.split(cbs);
1725 if (!result[0].equals(toSupplementaries("foo")))
1726 failCount++;
1727 if (!result[1].equals(toSupplementaries("and")))
1728 failCount++;
1729 if (!result[2].equals(toSupplementaries("boo")))
1730 failCount++;
1731
1732 String source = "0123456789";
1733 for (int limit=-2; limit<3; limit++) {
1734 for (int x=0; x<10; x++) {
1735 result = source.split(Integer.toString(x), limit);
1736 int expectedLength = limit < 1 ? 2 : limit;
1737
1738 if ((limit == 0) && (x == 9)) {
1739 // expected dropping of ""
1740 if (result.length != 1)
1741 failCount++;
1742 if (!result[0].equals("012345678")) {
1743 failCount++;
1744 }
1745 } else {
1746 if (result.length != expectedLength) {
1747 failCount++;
1748 }
1749 if (!result[0].equals(source.substring(0,x))) {
1750 if (limit != 1) {
1751 failCount++;
1752 } else {
1753 if (!result[0].equals(source.substring(0,10))) {
1754 failCount++;
1755 }
1756 }
1757 }
1758 if (expectedLength > 1) { // Check segment 2
1759 if (!result[1].equals(source.substring(x+1,10)))
1760 failCount++;
1761 }
1762 }
1763 }
1764 }
1765 // Check the case for no match found
1766 for (int limit=-2; limit<3; limit++) {
1767 result = source.split("e", limit);
1768 if (result.length != 1)
1769 failCount++;
1770 if (!result[0].equals(source))
1771 failCount++;
1772 }
1773 // Check the case for limit == 0, source = "";
1774 source = "";
1775 result = source.split("e", 0);
1776 if (result.length != 1)
1777 failCount++;
1778 if (!result[0].equals(source))
1779 failCount++;
1780
1781 report("Split");
1782 }
1783
1784 private static void negationTest() {
1785 Pattern pattern = Pattern.compile("[\\[@^]+");
1786 Matcher matcher = pattern.matcher("@@@@[[[[^^^^");
1787 if (!matcher.find())
1788 failCount++;
1789 if (!matcher.group(0).equals("@@@@[[[[^^^^"))
1790 failCount++;
1791 pattern = Pattern.compile("[@\\[^]+");
1792 matcher = pattern.matcher("@@@@[[[[^^^^");
1793 if (!matcher.find())
1794 failCount++;
1795 if (!matcher.group(0).equals("@@@@[[[[^^^^"))
1796 failCount++;
1797 pattern = Pattern.compile("[@\\[^@]+");
1798 matcher = pattern.matcher("@@@@[[[[^^^^");
1799 if (!matcher.find())
1800 failCount++;
1801 if (!matcher.group(0).equals("@@@@[[[[^^^^"))
1802 failCount++;
1803
1804 pattern = Pattern.compile("\\)");
1805 matcher = pattern.matcher("xxx)xxx");
1806 if (!matcher.find())
1807 failCount++;
1808
1809 report("Negation");
1810 }
1811
1812 private static void ampersandTest() {
1813 Pattern pattern = Pattern.compile("[&@]+");
1814 check(pattern, "@@@@&&&&", true);
1815
1816 pattern = Pattern.compile("[@&]+");
1817 check(pattern, "@@@@&&&&", true);
1818
1819 pattern = Pattern.compile("[@\\&]+");
1820 check(pattern, "@@@@&&&&", true);
1821
1822 report("Ampersand");
1823 }
1824
1825 private static void octalTest() throws Exception {
1826 Pattern pattern = Pattern.compile("\\u0007");
1827 Matcher matcher = pattern.matcher("\u0007");
1828 if (!matcher.matches())
1829 failCount++;
1830 pattern = Pattern.compile("\\07");
1831 matcher = pattern.matcher("\u0007");
1832 if (!matcher.matches())
1833 failCount++;
1834 pattern = Pattern.compile("\\007");
1835 matcher = pattern.matcher("\u0007");
1836 if (!matcher.matches())
1837 failCount++;
1838 pattern = Pattern.compile("\\0007");
1839 matcher = pattern.matcher("\u0007");
1840 if (!matcher.matches())
1841 failCount++;
1842 pattern = Pattern.compile("\\040");
1843 matcher = pattern.matcher("\u0020");
1844 if (!matcher.matches())
1845 failCount++;
1846 pattern = Pattern.compile("\\0403");
1847 matcher = pattern.matcher("\u00203");
1848 if (!matcher.matches())
1849 failCount++;
1850 pattern = Pattern.compile("\\0103");
1851 matcher = pattern.matcher("\u0043");
1852 if (!matcher.matches())
1853 failCount++;
1854
1855 report("Octal");
1856 }
1857
1858 private static void longPatternTest() throws Exception {
1859 try {
1860 Pattern pattern = Pattern.compile(
1861 "a 32-character-long pattern xxxx");
1862 pattern = Pattern.compile("a 33-character-long pattern xxxxx");
1863 pattern = Pattern.compile("a thirty four character long regex");
1864 StringBuffer patternToBe = new StringBuffer(101);
1865 for (int i=0; i<100; i++)
1866 patternToBe.append((char)(97 + i%26));
1867 pattern = Pattern.compile(patternToBe.toString());
1868 } catch (PatternSyntaxException e) {
1869 failCount++;
1870 }
1871
1872 // Supplementary character test
1873 try {
1874 Pattern pattern = Pattern.compile(
1875 toSupplementaries("a 32-character-long pattern xxxx"));
1876 pattern = Pattern.compile(toSupplementaries("a 33-character-long pattern xxxxx"));
1877 pattern = Pattern.compile(toSupplementaries("a thirty four character long regex"));
1878 StringBuffer patternToBe = new StringBuffer(101*2);
1879 for (int i=0; i<100; i++)
1880 patternToBe.append(Character.toChars(Character.MIN_SUPPLEMENTARY_CODE_POINT
1881 + 97 + i%26));
1882 pattern = Pattern.compile(patternToBe.toString());
1883 } catch (PatternSyntaxException e) {
1884 failCount++;
1885 }
1886 report("LongPattern");
1887 }
1888
1889 private static void group0Test() throws Exception {
1890 Pattern pattern = Pattern.compile("(tes)ting");
1891 Matcher matcher = pattern.matcher("testing");
1892 check(matcher, "testing");
1893
1894 matcher.reset("testing");
1895 if (matcher.lookingAt()) {
1896 if (!matcher.group(0).equals("testing"))
1897 failCount++;
1898 } else {
1899 failCount++;
1900 }
1901
1902 matcher.reset("testing");
1903 if (matcher.matches()) {
1904 if (!matcher.group(0).equals("testing"))
1905 failCount++;
1906 } else {
1907 failCount++;
1908 }
1909
1910 pattern = Pattern.compile("(tes)ting");
1911 matcher = pattern.matcher("testing");
1912 if (matcher.lookingAt()) {
1913 if (!matcher.group(0).equals("testing"))
1914 failCount++;
1915 } else {
1916 failCount++;
1917 }
1918
1919 pattern = Pattern.compile("^(tes)ting");
1920 matcher = pattern.matcher("testing");
1921 if (matcher.matches()) {
1922 if (!matcher.group(0).equals("testing"))
1923 failCount++;
1924 } else {
1925 failCount++;
1926 }
1927
1928 // Supplementary character test
1929 pattern = Pattern.compile(toSupplementaries("(tes)ting"));
1930 matcher = pattern.matcher(toSupplementaries("testing"));
1931 check(matcher, toSupplementaries("testing"));
1932
1933 matcher.reset(toSupplementaries("testing"));
1934 if (matcher.lookingAt()) {
1935 if (!matcher.group(0).equals(toSupplementaries("testing")))
1936 failCount++;
1937 } else {
1938 failCount++;
1939 }
1940
1941 matcher.reset(toSupplementaries("testing"));
1942 if (matcher.matches()) {
1943 if (!matcher.group(0).equals(toSupplementaries("testing")))
1944 failCount++;
1945 } else {
1946 failCount++;
1947 }
1948
1949 pattern = Pattern.compile(toSupplementaries("(tes)ting"));
1950 matcher = pattern.matcher(toSupplementaries("testing"));
1951 if (matcher.lookingAt()) {
1952 if (!matcher.group(0).equals(toSupplementaries("testing")))
1953 failCount++;
1954 } else {
1955 failCount++;
1956 }
1957
1958 pattern = Pattern.compile(toSupplementaries("^(tes)ting"));
1959 matcher = pattern.matcher(toSupplementaries("testing"));
1960 if (matcher.matches()) {
1961 if (!matcher.group(0).equals(toSupplementaries("testing")))
1962 failCount++;
1963 } else {
1964 failCount++;
1965 }
1966
1967 report("Group0");
1968 }
1969
1970 private static void findIntTest() throws Exception {
1971 Pattern p = Pattern.compile("blah");
1972 Matcher m = p.matcher("zzzzblahzzzzzblah");
1973 boolean result = m.find(2);
1974 if (!result)
1975 failCount++;
1976
1977 p = Pattern.compile("$");
1978 m = p.matcher("1234567890");
1979 result = m.find(10);
1980 if (!result)
1981 failCount++;
1982 try {
1983 result = m.find(11);
1984 failCount++;
1985 } catch (IndexOutOfBoundsException e) {
1986 // correct result
1987 }
1988
1989 // Supplementary character test
1990 p = Pattern.compile(toSupplementaries("blah"));
1991 m = p.matcher(toSupplementaries("zzzzblahzzzzzblah"));
1992 result = m.find(2);
1993 if (!result)
1994 failCount++;
1995
1996 report("FindInt");
1997 }
1998
1999 private static void emptyPatternTest() throws Exception {
2000 Pattern p = Pattern.compile("");
2001 Matcher m = p.matcher("foo");
2002
2003 // Should find empty pattern at beginning of input
2004 boolean result = m.find();
2005 if (result != true)
2006 failCount++;
2007 if (m.start() != 0)
2008 failCount++;
2009
2010 // Should not match entire input if input is not empty
2011 m.reset();
2012 result = m.matches();
2013 if (result == true)
2014 failCount++;
2015
2016 try {
2017 m.start(0);
2018 failCount++;
2019 } catch (IllegalStateException e) {
2020 // Correct result
2021 }
2022
2023 // Should match entire input if input is empty
2024 m.reset("");
2025 result = m.matches();
2026 if (result != true)
2027 failCount++;
2028
2029 result = Pattern.matches("", "");
2030 if (result != true)
2031 failCount++;
2032
2033 result = Pattern.matches("", "foo");
2034 if (result == true)
2035 failCount++;
2036 report("EmptyPattern");
2037 }
2038
2039 private static void charClassTest() throws Exception {
2040 Pattern pattern = Pattern.compile("blah[ab]]blech");
2041 check(pattern, "blahb]blech", true);
2042
2043 pattern = Pattern.compile("[abc[def]]");
2044 check(pattern, "b", true);
2045
2046 // Supplementary character tests
2047 pattern = Pattern.compile(toSupplementaries("blah[ab]]blech"));
2048 check(pattern, toSupplementaries("blahb]blech"), true);
2049
2050 pattern = Pattern.compile(toSupplementaries("[abc[def]]"));
2051 check(pattern, toSupplementaries("b"), true);
2052
2053 try {
2054 // u00ff when UNICODE_CASE
2055 pattern = Pattern.compile("[ab\u00ffcd]",
2056 Pattern.CASE_INSENSITIVE|
2057 Pattern.UNICODE_CASE);
2058 check(pattern, "ab\u00ffcd", true);
2059 check(pattern, "Ab\u0178Cd", true);
2060
2061 // u00b5 when UNICODE_CASE
2062 pattern = Pattern.compile("[ab\u00b5cd]",
2063 Pattern.CASE_INSENSITIVE|
2064 Pattern.UNICODE_CASE);
2065 check(pattern, "ab\u00b5cd", true);
2066 check(pattern, "Ab\u039cCd", true);
2067 } catch (Exception e) { failCount++; }
2068
2069 /* Special cases
2070 (1)LatinSmallLetterLongS u+017f
2071 (2)LatinSmallLetterDotlessI u+0131
2072 (3)LatineCapitalLetterIWithDotAbove u+0130
2073 (4)KelvinSign u+212a
2074 (5)AngstromSign u+212b
2075 */
2076 int flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE;
2077 pattern = Pattern.compile("[sik\u00c5]+", flags);
2078 if (!pattern.matcher("\u017f\u0130\u0131\u212a\u212b").matches())
2079 failCount++;
2080
2081 report("CharClass");
2082 }
2083
2084 private static void caretTest() throws Exception {
2085 Pattern pattern = Pattern.compile("\\w*");
2086 Matcher matcher = pattern.matcher("a#bc#def##g");
2087 check(matcher, "a");
2088 check(matcher, "");
2089 check(matcher, "bc");
2090 check(matcher, "");
2091 check(matcher, "def");
2092 check(matcher, "");
2093 check(matcher, "");
2094 check(matcher, "g");
2095 check(matcher, "");
2096 if (matcher.find())
2097 failCount++;
2098
2099 pattern = Pattern.compile("^\\w*");
2100 matcher = pattern.matcher("a#bc#def##g");
2101 check(matcher, "a");
2102 if (matcher.find())
2103 failCount++;
2104
2105 pattern = Pattern.compile("\\w");
2106 matcher = pattern.matcher("abc##x");
2107 check(matcher, "a");
2108 check(matcher, "b");
2109 check(matcher, "c");
2110 check(matcher, "x");
2111 if (matcher.find())
2112 failCount++;
2113
2114 pattern = Pattern.compile("^\\w");
2115 matcher = pattern.matcher("abc##x");
2116 check(matcher, "a");
2117 if (matcher.find())
2118 failCount++;
2119
2120 pattern = Pattern.compile("\\A\\p{Alpha}{3}");
2121 matcher = pattern.matcher("abcdef-ghi\njklmno");
2122 check(matcher, "abc");
2123 if (matcher.find())
2124 failCount++;
2125
2126 pattern = Pattern.compile("^\\p{Alpha}{3}", Pattern.MULTILINE);
2127 matcher = pattern.matcher("abcdef-ghi\njklmno");
2128 check(matcher, "abc");
2129 check(matcher, "jkl");
2130 if (matcher.find())
2131 failCount++;
2132
2133 pattern = Pattern.compile("^", Pattern.MULTILINE);
2134 matcher = pattern.matcher("this is some text");
2135 String result = matcher.replaceAll("X");
2136 if (!result.equals("Xthis is some text"))
2137 failCount++;
2138
2139 pattern = Pattern.compile("^");
2140 matcher = pattern.matcher("this is some text");
2141 result = matcher.replaceAll("X");
2142 if (!result.equals("Xthis is some text"))
2143 failCount++;
2144
2145 pattern = Pattern.compile("^", Pattern.MULTILINE | Pattern.UNIX_LINES);
2146 matcher = pattern.matcher("this is some text\n");
2147 result = matcher.replaceAll("X");
2148 if (!result.equals("Xthis is some text\n"))
2149 failCount++;
2150
2151 report("Caret");
2152 }
2153
2154 private static void groupCaptureTest() throws Exception {
2155 // Independent group
2156 Pattern pattern = Pattern.compile("x+(?>y+)z+");
2157 Matcher matcher = pattern.matcher("xxxyyyzzz");
2158 matcher.find();
2159 try {
2160 String blah = matcher.group(1);
2161 failCount++;
2162 } catch (IndexOutOfBoundsException ioobe) {
2163 // Good result
2164 }
2165 // Pure group
2166 pattern = Pattern.compile("x+(?:y+)z+");
2167 matcher = pattern.matcher("xxxyyyzzz");
2168 matcher.find();
2169 try {
2170 String blah = matcher.group(1);
2171 failCount++;
2172 } catch (IndexOutOfBoundsException ioobe) {
2173 // Good result
2174 }
2175
2176 // Supplementary character tests
2177 // Independent group
2178 pattern = Pattern.compile(toSupplementaries("x+(?>y+)z+"));
2179 matcher = pattern.matcher(toSupplementaries("xxxyyyzzz"));
2180 matcher.find();
2181 try {
2182 String blah = matcher.group(1);
2183 failCount++;
2184 } catch (IndexOutOfBoundsException ioobe) {
2185 // Good result
2186 }
2187 // Pure group
2188 pattern = Pattern.compile(toSupplementaries("x+(?:y+)z+"));
2189 matcher = pattern.matcher(toSupplementaries("xxxyyyzzz"));
2190 matcher.find();
2191 try {
2192 String blah = matcher.group(1);
2193 failCount++;
2194 } catch (IndexOutOfBoundsException ioobe) {
2195 // Good result
2196 }
2197
2198 report("GroupCapture");
2199 }
2200
2201 private static void backRefTest() throws Exception {
2202 Pattern pattern = Pattern.compile("(a*)bc\\1");
2203 check(pattern, "zzzaabcazzz", true);
2204
2205 pattern = Pattern.compile("(a*)bc\\1");
2206 check(pattern, "zzzaabcaazzz", true);
2207
2208 pattern = Pattern.compile("(abc)(def)\\1");
2209 check(pattern, "abcdefabc", true);
2210
2211 pattern = Pattern.compile("(abc)(def)\\3");
2212 check(pattern, "abcdefabc", false);
2213
2214 try {
2215 for (int i = 1; i < 10; i++) {
2216 // Make sure backref 1-9 are always accepted
2217 pattern = Pattern.compile("abcdef\\" + i);
2218 // and fail to match if the target group does not exit
2219 check(pattern, "abcdef", false);
2220 }
2221 } catch(PatternSyntaxException e) {
2222 failCount++;
2223 }
2224
2225 pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11");
2226 check(pattern, "abcdefghija", false);
2227 check(pattern, "abcdefghija1", true);
2228
2229 pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11");
2230 check(pattern, "abcdefghijkk", true);
2231
2232 pattern = Pattern.compile("(a)bcdefghij\\11");
2233 check(pattern, "abcdefghija1", true);
2234
2235 // Supplementary character tests
2236 pattern = Pattern.compile(toSupplementaries("(a*)bc\\1"));
2237 check(pattern, toSupplementaries("zzzaabcazzz"), true);
2238
2239 pattern = Pattern.compile(toSupplementaries("(a*)bc\\1"));
2240 check(pattern, toSupplementaries("zzzaabcaazzz"), true);
2241
2242 pattern = Pattern.compile(toSupplementaries("(abc)(def)\\1"));
2243 check(pattern, toSupplementaries("abcdefabc"), true);
2244
2245 pattern = Pattern.compile(toSupplementaries("(abc)(def)\\3"));
2246 check(pattern, toSupplementaries("abcdefabc"), false);
2247
2248 pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11"));
2249 check(pattern, toSupplementaries("abcdefghija"), false);
2250 check(pattern, toSupplementaries("abcdefghija1"), true);
2251
2252 pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11"));
2253 check(pattern, toSupplementaries("abcdefghijkk"), true);
2254
2255 report("BackRef");
2256 }
2257
2258 /**
2259 * Unicode Technical Report #18, section 2.6 End of Line
2260 * There is no empty line to be matched in the sequence \u000D\u000A
2261 * but there is an empty line in the sequence \u000A\u000D.
2262 */
2263 private static void anchorTest() throws Exception {
2264 Pattern p = Pattern.compile("^.*$", Pattern.MULTILINE);
2265 Matcher m = p.matcher("blah1\r\nblah2");
2266 m.find();
2267 m.find();
2268 if (!m.group().equals("blah2"))
2269 failCount++;
2270
2271 m.reset("blah1\n\rblah2");
2272 m.find();
2273 m.find();
2274 m.find();
2275 if (!m.group().equals("blah2"))
2276 failCount++;
2277
2278 // Test behavior of $ with \r\n at end of input
2279 p = Pattern.compile(".+$");
2280 m = p.matcher("blah1\r\n");
2281 if (!m.find())
2282 failCount++;
2283 if (!m.group().equals("blah1"))
2284 failCount++;
2285 if (m.find())
2286 failCount++;
2287
2288 // Test behavior of $ with \r\n at end of input in multiline
2289 p = Pattern.compile(".+$", Pattern.MULTILINE);
2290 m = p.matcher("blah1\r\n");
2291 if (!m.find())
2292 failCount++;
2293 if (m.find())
2294 failCount++;
2295
2296 // Test for $ recognition of \u0085 for bug 4527731
2297 p = Pattern.compile(".+$", Pattern.MULTILINE);
2298 m = p.matcher("blah1\u0085");
2299 if (!m.find())
2300 failCount++;
2301
2302 // Supplementary character test
2303 p = Pattern.compile("^.*$", Pattern.MULTILINE);
2304 m = p.matcher(toSupplementaries("blah1\r\nblah2"));
2305 m.find();
2306 m.find();
2307 if (!m.group().equals(toSupplementaries("blah2")))
2308 failCount++;
2309
2310 m.reset(toSupplementaries("blah1\n\rblah2"));
2311 m.find();
2312 m.find();
2313 m.find();
2314 if (!m.group().equals(toSupplementaries("blah2")))
2315 failCount++;
2316
2317 // Test behavior of $ with \r\n at end of input
2318 p = Pattern.compile(".+$");
2319 m = p.matcher(toSupplementaries("blah1\r\n"));
2320 if (!m.find())
2321 failCount++;
2322 if (!m.group().equals(toSupplementaries("blah1")))
2323 failCount++;
2324 if (m.find())
2325 failCount++;
2326
2327 // Test behavior of $ with \r\n at end of input in multiline
2328 p = Pattern.compile(".+$", Pattern.MULTILINE);
2329 m = p.matcher(toSupplementaries("blah1\r\n"));
2330 if (!m.find())
2331 failCount++;
2332 if (m.find())
2333 failCount++;
2334
2335 // Test for $ recognition of \u0085 for bug 4527731
2336 p = Pattern.compile(".+$", Pattern.MULTILINE);
2337 m = p.matcher(toSupplementaries("blah1\u0085"));
2338 if (!m.find())
2339 failCount++;
2340
2341 report("Anchors");
2342 }
2343
2344 /**
2345 * A basic sanity test of Matcher.lookingAt().
2346 */
2347 private static void lookingAtTest() throws Exception {
2348 Pattern p = Pattern.compile("(ab)(c*)");
2349 Matcher m = p.matcher("abccczzzabcczzzabccc");
2350
2351 if (!m.lookingAt())
2352 failCount++;
2353
2354 if (!m.group().equals(m.group(0)))
2355 failCount++;
2356
2357 m = p.matcher("zzzabccczzzabcczzzabccczzz");
2358 if (m.lookingAt())
2359 failCount++;
2360
2361 // Supplementary character test
2362 p = Pattern.compile(toSupplementaries("(ab)(c*)"));
2363 m = p.matcher(toSupplementaries("abccczzzabcczzzabccc"));
2364
2365 if (!m.lookingAt())
2366 failCount++;
2367
2368 if (!m.group().equals(m.group(0)))
2369 failCount++;
2370
2371 m = p.matcher(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
2372 if (m.lookingAt())
2373 failCount++;
2374
2375 report("Looking At");
2376 }
2377
2378 /**
2379 * A basic sanity test of Matcher.matches().
2380 */
2381 private static void matchesTest() throws Exception {
2382 // matches()
2383 Pattern p = Pattern.compile("ulb(c*)");
2384 Matcher m = p.matcher("ulbcccccc");
2385 if (!m.matches())
2386 failCount++;
2387
2388 // find() but not matches()
2389 m.reset("zzzulbcccccc");
2390 if (m.matches())
2391 failCount++;
2392
2393 // lookingAt() but not matches()
2394 m.reset("ulbccccccdef");
2395 if (m.matches())
2396 failCount++;
2397
2398 // matches()
2399 p = Pattern.compile("a|ad");
2400 m = p.matcher("ad");
2401 if (!m.matches())
2402 failCount++;
2403
2404 // Supplementary character test
2405 // matches()
2406 p = Pattern.compile(toSupplementaries("ulb(c*)"));
2407 m = p.matcher(toSupplementaries("ulbcccccc"));
2408 if (!m.matches())
2409 failCount++;
2410
2411 // find() but not matches()
2412 m.reset(toSupplementaries("zzzulbcccccc"));
2413 if (m.matches())
2414 failCount++;
2415
2416 // lookingAt() but not matches()
2417 m.reset(toSupplementaries("ulbccccccdef"));
2418 if (m.matches())
2419 failCount++;
2420
2421 // matches()
2422 p = Pattern.compile(toSupplementaries("a|ad"));
2423 m = p.matcher(toSupplementaries("ad"));
2424 if (!m.matches())
2425 failCount++;
2426
2427 report("Matches");
2428 }
2429
2430 /**
2431 * A basic sanity test of Pattern.matches().
2432 */
2433 private static void patternMatchesTest() throws Exception {
2434 // matches()
2435 if (!Pattern.matches(toSupplementaries("ulb(c*)"),
2436 toSupplementaries("ulbcccccc")))
2437 failCount++;
2438
2439 // find() but not matches()
2440 if (Pattern.matches(toSupplementaries("ulb(c*)"),
2441 toSupplementaries("zzzulbcccccc")))
2442 failCount++;
2443
2444 // lookingAt() but not matches()
2445 if (Pattern.matches(toSupplementaries("ulb(c*)"),
2446 toSupplementaries("ulbccccccdef")))
2447 failCount++;
2448
2449 // Supplementary character test
2450 // matches()
2451 if (!Pattern.matches(toSupplementaries("ulb(c*)"),
2452 toSupplementaries("ulbcccccc")))
2453 failCount++;
2454
2455 // find() but not matches()
2456 if (Pattern.matches(toSupplementaries("ulb(c*)"),
2457 toSupplementaries("zzzulbcccccc")))
2458 failCount++;
2459
2460 // lookingAt() but not matches()
2461 if (Pattern.matches(toSupplementaries("ulb(c*)"),
2462 toSupplementaries("ulbccccccdef")))
2463 failCount++;
2464
2465 report("Pattern Matches");
2466 }
2467
2468 /**
2469 * Canonical equivalence testing. Tests the ability of the engine
2470 * to match sequences that are not explicitly specified in the
2471 * pattern when they are considered equivalent by the Unicode Standard.
2472 */
2473 private static void ceTest() throws Exception {
2474 // Decomposed char outside char classes
2475 Pattern p = Pattern.compile("testa\u030a", Pattern.CANON_EQ);
2476 Matcher m = p.matcher("test\u00e5");
2477 if (!m.matches())
2478 failCount++;
2479
2480 m.reset("testa\u030a");
2481 if (!m.matches())
2482 failCount++;
2483
2484 // Composed char outside char classes
2485 p = Pattern.compile("test\u00e5", Pattern.CANON_EQ);
2486 m = p.matcher("test\u00e5");
2487 if (!m.matches())
2488 failCount++;
2489
2490 m.reset("testa\u030a");
2491 if (!m.find())
2492 failCount++;
2493
2494 // Decomposed char inside a char class
2495 p = Pattern.compile("test[abca\u030a]", Pattern.CANON_EQ);
2496 m = p.matcher("test\u00e5");
2497 if (!m.find())
2498 failCount++;
2499
2500 m.reset("testa\u030a");
2501 if (!m.find())
2502 failCount++;
2503
2504 // Composed char inside a char class
2505 p = Pattern.compile("test[abc\u00e5def\u00e0]", Pattern.CANON_EQ);
2506 m = p.matcher("test\u00e5");
2507 if (!m.find())
2508 failCount++;
2509
2510 m.reset("testa\u0300");
2511 if (!m.find())
2512 failCount++;
2513
2514 m.reset("testa\u030a");
2515 if (!m.find())
2516 failCount++;
2517
2518 // Marks that cannot legally change order and be equivalent
2519 p = Pattern.compile("testa\u0308\u0300", Pattern.CANON_EQ);
2520 check(p, "testa\u0308\u0300", true);
2521 check(p, "testa\u0300\u0308", false);
2522
2523 // Marks that can legally change order and be equivalent
2524 p = Pattern.compile("testa\u0308\u0323", Pattern.CANON_EQ);
2525 check(p, "testa\u0308\u0323", true);
2526 check(p, "testa\u0323\u0308", true);
2527
2528 // Test all equivalences of the sequence a\u0308\u0323\u0300
2529 p = Pattern.compile("testa\u0308\u0323\u0300", Pattern.CANON_EQ);
2530 check(p, "testa\u0308\u0323\u0300", true);
2531 check(p, "testa\u0323\u0308\u0300", true);
2532 check(p, "testa\u0308\u0300\u0323", true);
2533 check(p, "test\u00e4\u0323\u0300", true);
2534 check(p, "test\u00e4\u0300\u0323", true);
2535
2536 /*
2537 * The following canonical equivalence tests don't work. Bug id: 4916384.
2538 *
2539 // Decomposed hangul (jamos)
2540 p = Pattern.compile("\u1100\u1161", Pattern.CANON_EQ);
2541 m = p.matcher("\u1100\u1161");
2542 if (!m.matches())
2543 failCount++;
2544
2545 m.reset("\uac00");
2546 if (!m.matches())
2547 failCount++;
2548
2549 // Composed hangul
2550 p = Pattern.compile("\uac00", Pattern.CANON_EQ);
2551 m = p.matcher("\u1100\u1161");
2552 if (!m.matches())
2553 failCount++;
2554
2555 m.reset("\uac00");
2556 if (!m.matches())
2557 failCount++;
2558
2559 // Decomposed supplementary outside char classes
2560 p = Pattern.compile("test\ud834\uddbc\ud834\udd6f", Pattern.CANON_EQ);
2561 m = p.matcher("test\ud834\uddc0");
2562 if (!m.matches())
2563 failCount++;
2564
2565 m.reset("test\ud834\uddbc\ud834\udd6f");
2566 if (!m.matches())
2567 failCount++;
2568
2569 // Composed supplementary outside char classes
2570 p = Pattern.compile("test\ud834\uddc0", Pattern.CANON_EQ);
2571 m.reset("test\ud834\uddbc\ud834\udd6f");
2572 if (!m.matches())
2573 failCount++;
2574
2575 m = p.matcher("test\ud834\uddc0");
2576 if (!m.matches())
2577 failCount++;
2578
2579 */
2580
2581 report("Canonical Equivalence");
2582 }
2583
2584 /**
2585 * A basic sanity test of Matcher.replaceAll().
2586 */
2587 private static void globalSubstitute() throws Exception {
2588 // Global substitution with a literal
2589 Pattern p = Pattern.compile("(ab)(c*)");
2590 Matcher m = p.matcher("abccczzzabcczzzabccc");
2591 if (!m.replaceAll("test").equals("testzzztestzzztest"))
2592 failCount++;
2593
2594 m.reset("zzzabccczzzabcczzzabccczzz");
2595 if (!m.replaceAll("test").equals("zzztestzzztestzzztestzzz"))
2596 failCount++;
2597
2598 // Global substitution with groups
2599 m.reset("zzzabccczzzabcczzzabccczzz");
2600 String result = m.replaceAll("$1");
2601 if (!result.equals("zzzabzzzabzzzabzzz"))
2602 failCount++;
2603
2604 // Supplementary character test
2605 // Global substitution with a literal
2606 p = Pattern.compile(toSupplementaries("(ab)(c*)"));
2607 m = p.matcher(toSupplementaries("abccczzzabcczzzabccc"));
2608 if (!m.replaceAll(toSupplementaries("test")).
2609 equals(toSupplementaries("testzzztestzzztest")))
2610 failCount++;
2611
2612 m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
2613 if (!m.replaceAll(toSupplementaries("test")).
2614 equals(toSupplementaries("zzztestzzztestzzztestzzz")))
2615 failCount++;
2616
2617 // Global substitution with groups
2618 m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
2619 result = m.replaceAll("$1");
2620 if (!result.equals(toSupplementaries("zzzabzzzabzzzabzzz")))
2621 failCount++;
2622
2623 report("Global Substitution");
2624 }
2625
2626 /**
2627 * Tests the usage of Matcher.appendReplacement() with literal
2628 * and group substitutions.
2629 */
2630 private static void stringbufferSubstitute() throws Exception {
2631 // SB substitution with literal
2632 String blah = "zzzblahzzz";
2633 Pattern p = Pattern.compile("blah");
2634 Matcher m = p.matcher(blah);
2635 StringBuffer result = new StringBuffer();
2636 try {
2637 m.appendReplacement(result, "blech");
2638 failCount++;
2639 } catch (IllegalStateException e) {
2640 }
2641 m.find();
2642 m.appendReplacement(result, "blech");
2643 if (!result.toString().equals("zzzblech"))
2644 failCount++;
2645
2646 m.appendTail(result);
2647 if (!result.toString().equals("zzzblechzzz"))
2648 failCount++;
2649
2650 // SB substitution with groups
2651 blah = "zzzabcdzzz";
2652 p = Pattern.compile("(ab)(cd)*");
2653 m = p.matcher(blah);
2654 result = new StringBuffer();
2655 try {
2656 m.appendReplacement(result, "$1");
2657 failCount++;
2658 } catch (IllegalStateException e) {
2659 }
2660 m.find();
2661 m.appendReplacement(result, "$1");
2662 if (!result.toString().equals("zzzab"))
2663 failCount++;
2664
2665 m.appendTail(result);
2666 if (!result.toString().equals("zzzabzzz"))
2667 failCount++;
2668
2669 // SB substitution with 3 groups
2670 blah = "zzzabcdcdefzzz";
2671 p = Pattern.compile("(ab)(cd)*(ef)");
2672 m = p.matcher(blah);
2673 result = new StringBuffer();
2674 try {
2675 m.appendReplacement(result, "$1w$2w$3");
2676 failCount++;
2677 } catch (IllegalStateException e) {
2678 }
2679 m.find();
2680 m.appendReplacement(result, "$1w$2w$3");
2681 if (!result.toString().equals("zzzabwcdwef"))
2682 failCount++;
2683
2684 m.appendTail(result);
2685 if (!result.toString().equals("zzzabwcdwefzzz"))
2686 failCount++;
2687
2688 // SB substitution with groups and three matches
2689 // skipping middle match
2690 blah = "zzzabcdzzzabcddzzzabcdzzz";
2691 p = Pattern.compile("(ab)(cd*)");
2692 m = p.matcher(blah);
2693 result = new StringBuffer();
2694 try {
2695 m.appendReplacement(result, "$1");
2696 failCount++;
2697 } catch (IllegalStateException e) {
2698 }
2699 m.find();
2700 m.appendReplacement(result, "$1");
2701 if (!result.toString().equals("zzzab"))
2702 failCount++;
2703
2704 m.find();
2705 m.find();
2706 m.appendReplacement(result, "$2");
2707 if (!result.toString().equals("zzzabzzzabcddzzzcd"))
2708 failCount++;
2709
2710 m.appendTail(result);
2711 if (!result.toString().equals("zzzabzzzabcddzzzcdzzz"))
2712 failCount++;
2713
2714 // Check to make sure escaped $ is ignored
2715 blah = "zzzabcdcdefzzz";
2716 p = Pattern.compile("(ab)(cd)*(ef)");
2717 m = p.matcher(blah);
2718 result = new StringBuffer();
2719 m.find();
2720 m.appendReplacement(result, "$1w\\$2w$3");
2721 if (!result.toString().equals("zzzabw$2wef"))
2722 failCount++;
2723
2724 m.appendTail(result);
2725 if (!result.toString().equals("zzzabw$2wefzzz"))
2726 failCount++;
2727
2728 // Check to make sure a reference to nonexistent group causes error
2729 blah = "zzzabcdcdefzzz";
2730 p = Pattern.compile("(ab)(cd)*(ef)");
2731 m = p.matcher(blah);
2732 result = new StringBuffer();
2733 m.find();
2734 try {
2735 m.appendReplacement(result, "$1w$5w$3");
2736 failCount++;
2737 } catch (IndexOutOfBoundsException ioobe) {
2738 // Correct result
2739 }
2740
2741 // Check double digit group references
2742 blah = "zzz123456789101112zzz";
2743 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)");
2744 m = p.matcher(blah);
2745 result = new StringBuffer();
2746 m.find();
2747 m.appendReplacement(result, "$1w$11w$3");
2748 if (!result.toString().equals("zzz1w11w3"))
2749 failCount++;
2750
2751 // Check to make sure it backs off $15 to $1 if only three groups
2752 blah = "zzzabcdcdefzzz";
2753 p = Pattern.compile("(ab)(cd)*(ef)");
2754 m = p.matcher(blah);
2755 result = new StringBuffer();
2756 m.find();
2757 m.appendReplacement(result, "$1w$15w$3");
2758 if (!result.toString().equals("zzzabwab5wef"))
2759 failCount++;
2760
2761
2762 // Supplementary character test
2763 // SB substitution with literal
2764 blah = toSupplementaries("zzzblahzzz");
2765 p = Pattern.compile(toSupplementaries("blah"));
2766 m = p.matcher(blah);
2767 result = new StringBuffer();
2768 try {
2769 m.appendReplacement(result, toSupplementaries("blech"));
2770 failCount++;
2771 } catch (IllegalStateException e) {
2772 }
2773 m.find();
2774 m.appendReplacement(result, toSupplementaries("blech"));
2775 if (!result.toString().equals(toSupplementaries("zzzblech")))
2776 failCount++;
2777
2778 m.appendTail(result);
2779 if (!result.toString().equals(toSupplementaries("zzzblechzzz")))
2780 failCount++;
2781
2782 // SB substitution with groups
2783 blah = toSupplementaries("zzzabcdzzz");
2784 p = Pattern.compile(toSupplementaries("(ab)(cd)*"));
2785 m = p.matcher(blah);
2786 result = new StringBuffer();
2787 try {
2788 m.appendReplacement(result, "$1");
2789 failCount++;
2790 } catch (IllegalStateException e) {
2791 }
2792 m.find();
2793 m.appendReplacement(result, "$1");
2794 if (!result.toString().equals(toSupplementaries("zzzab")))
2795 failCount++;
2796
2797 m.appendTail(result);
2798 if (!result.toString().equals(toSupplementaries("zzzabzzz")))
2799 failCount++;
2800
2801 // SB substitution with 3 groups
2802 blah = toSupplementaries("zzzabcdcdefzzz");
2803 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
2804 m = p.matcher(blah);
2805 result = new StringBuffer();
2806 try {
2807 m.appendReplacement(result, toSupplementaries("$1w$2w$3"));
2808 failCount++;
2809 } catch (IllegalStateException e) {
2810 }
2811 m.find();
2812 m.appendReplacement(result, toSupplementaries("$1w$2w$3"));
2813 if (!result.toString().equals(toSupplementaries("zzzabwcdwef")))
2814 failCount++;
2815
2816 m.appendTail(result);
2817 if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz")))
2818 failCount++;
2819
2820 // SB substitution with groups and three matches
2821 // skipping middle match
2822 blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz");
2823 p = Pattern.compile(toSupplementaries("(ab)(cd*)"));
2824 m = p.matcher(blah);
2825 result = new StringBuffer();
2826 try {
2827 m.appendReplacement(result, "$1");
2828 failCount++;
2829 } catch (IllegalStateException e) {
2830 }
2831 m.find();
2832 m.appendReplacement(result, "$1");
2833 if (!result.toString().equals(toSupplementaries("zzzab")))
2834 failCount++;
2835
2836 m.find();
2837 m.find();
2838 m.appendReplacement(result, "$2");
2839 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd")))
2840 failCount++;
2841
2842 m.appendTail(result);
2843 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz")))
2844 failCount++;
2845
2846 // Check to make sure escaped $ is ignored
2847 blah = toSupplementaries("zzzabcdcdefzzz");
2848 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
2849 m = p.matcher(blah);
2850 result = new StringBuffer();
2851 m.find();
2852 m.appendReplacement(result, toSupplementaries("$1w\\$2w$3"));
2853 if (!result.toString().equals(toSupplementaries("zzzabw$2wef")))
2854 failCount++;
2855
2856 m.appendTail(result);
2857 if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz")))
2858 failCount++;
2859
2860 // Check to make sure a reference to nonexistent group causes error
2861 blah = toSupplementaries("zzzabcdcdefzzz");
2862 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
2863 m = p.matcher(blah);
2864 result = new StringBuffer();
2865 m.find();
2866 try {
2867 m.appendReplacement(result, toSupplementaries("$1w$5w$3"));
2868 failCount++;
2869 } catch (IndexOutOfBoundsException ioobe) {
2870 // Correct result
2871 }
2872
2873 // Check double digit group references
2874 blah = toSupplementaries("zzz123456789101112zzz");
2875 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)");
2876 m = p.matcher(blah);
2877 result = new StringBuffer();
2878 m.find();
2879 m.appendReplacement(result, toSupplementaries("$1w$11w$3"));
2880 if (!result.toString().equals(toSupplementaries("zzz1w11w3")))
2881 failCount++;
2882
2883 // Check to make sure it backs off $15 to $1 if only three groups
2884 blah = toSupplementaries("zzzabcdcdefzzz");
2885 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
2886 m = p.matcher(blah);
2887 result = new StringBuffer();
2888 m.find();
2889 m.appendReplacement(result, toSupplementaries("$1w$15w$3"));
2890 if (!result.toString().equals(toSupplementaries("zzzabwab5wef")))
2891 failCount++;
2892
2893 // Check nothing has been appended into the output buffer if
2894 // the replacement string triggers IllegalArgumentException.
2895 p = Pattern.compile("(abc)");
2896 m = p.matcher("abcd");
2897 result = new StringBuffer();
2898 m.find();
2899 try {
2900 m.appendReplacement(result, ("xyz$g"));
2901 failCount++;
2902 } catch (IllegalArgumentException iae) {
2903 if (result.length() != 0)
2904 failCount++;
2905 }
2906
2907 report("SB Substitution");
2908 }
2909
2910 /*
2911 * 5 groups of characters are created to make a substitution string.
2912 * A base string will be created including random lead chars, the
2913 * substitution string, and random trailing chars.
2914 * A pattern containing the 5 groups is searched for and replaced with:
2915 * random group + random string + random group.
2916 * The results are checked for correctness.
2917 */
2918 private static void substitutionBasher() {
2919 for (int runs = 0; runs<1000; runs++) {
2920 // Create a base string to work in
2921 int leadingChars = generator.nextInt(10);
2922 StringBuffer baseBuffer = new StringBuffer(100);
2923 String leadingString = getRandomAlphaString(leadingChars);
2924 baseBuffer.append(leadingString);
2925
2926 // Create 5 groups of random number of random chars
2927 // Create the string to substitute
2928 // Create the pattern string to search for
2929 StringBuffer bufferToSub = new StringBuffer(25);
2930 StringBuffer bufferToPat = new StringBuffer(50);
2931 String[] groups = new String[5];
2932 for(int i=0; i<5; i++) {
2933 int aGroupSize = generator.nextInt(5)+1;
2934 groups[i] = getRandomAlphaString(aGroupSize);
2935 bufferToSub.append(groups[i]);
2936 bufferToPat.append('(');
2937 bufferToPat.append(groups[i]);
2938 bufferToPat.append(')');
2939 }
2940 String stringToSub = bufferToSub.toString();
2941 String pattern = bufferToPat.toString();
2942
2943 // Place sub string into working string at random index
2944 baseBuffer.append(stringToSub);
2945
2946 // Append random chars to end
2947 int trailingChars = generator.nextInt(10);
2948 String trailingString = getRandomAlphaString(trailingChars);
2949 baseBuffer.append(trailingString);
2950 String baseString = baseBuffer.toString();
2951
2952 // Create test pattern and matcher
2953 Pattern p = Pattern.compile(pattern);
2954 Matcher m = p.matcher(baseString);
2955
2956 // Reject candidate if pattern happens to start early
2957 m.find();
2958 if (m.start() < leadingChars)
2959 continue;
2960
2961 // Reject candidate if more than one match
2962 if (m.find())
2963 continue;
2964
2965 // Construct a replacement string with :
2966 // random group + random string + random group
2967 StringBuffer bufferToRep = new StringBuffer();
2968 int groupIndex1 = generator.nextInt(5);
2969 bufferToRep.append("$" + (groupIndex1 + 1));
2970 String randomMidString = getRandomAlphaString(5);
2971 bufferToRep.append(randomMidString);
2972 int groupIndex2 = generator.nextInt(5);
2973 bufferToRep.append("$" + (groupIndex2 + 1));
2974 String replacement = bufferToRep.toString();
2975
2976 // Do the replacement
2977 String result = m.replaceAll(replacement);
2978
2979 // Construct expected result
2980 StringBuffer bufferToRes = new StringBuffer();
2981 bufferToRes.append(leadingString);
2982 bufferToRes.append(groups[groupIndex1]);
2983 bufferToRes.append(randomMidString);
2984 bufferToRes.append(groups[groupIndex2]);
2985 bufferToRes.append(trailingString);
2986 String expectedResult = bufferToRes.toString();
2987
2988 // Check results
2989 if (!result.equals(expectedResult))
2990 failCount++;
2991 }
2992
2993 report("Substitution Basher");
2994 }
2995
2996 /**
2997 * Checks the handling of some escape sequences that the Pattern
2998 * class should process instead of the java compiler. These are
2999 * not in the file because the escapes should be be processed
3000 * by the Pattern class when the regex is compiled.
3001 */
3002 private static void escapes() throws Exception {
3003 Pattern p = Pattern.compile("\\043");
3004 Matcher m = p.matcher("#");
3005 if (!m.find())
3006 failCount++;
3007
3008 p = Pattern.compile("\\x23");
3009 m = p.matcher("#");
3010 if (!m.find())
3011 failCount++;
3012
3013 p = Pattern.compile("\\u0023");
3014 m = p.matcher("#");
3015 if (!m.find())
3016 failCount++;
3017
3018 report("Escape sequences");
3019 }
3020
3021 /**
3022 * Checks the handling of blank input situations. These
3023 * tests are incompatible with my test file format.
3024 */
3025 private static void blankInput() throws Exception {
3026 Pattern p = Pattern.compile("abc", Pattern.CASE_INSENSITIVE);
3027 Matcher m = p.matcher("");
3028 if (m.find())
3029 failCount++;
3030
3031 p = Pattern.compile("a*", Pattern.CASE_INSENSITIVE);
3032 m = p.matcher("");
3033 if (!m.find())
3034 failCount++;
3035
3036 p = Pattern.compile("abc");
3037 m = p.matcher("");
3038 if (m.find())
3039 failCount++;
3040
3041 p = Pattern.compile("a*");
3042 m = p.matcher("");
3043 if (!m.find())
3044 failCount++;
3045
3046 report("Blank input");
3047 }
3048
3049 /**
3050 * Tests the Boyer-Moore pattern matching of a character sequence
3051 * on randomly generated patterns.
3052 */
3053 private static void bm() throws Exception {
3054 doBnM('a');
3055 report("Boyer Moore (ASCII)");
3056
3057 doBnM(Character.MIN_SUPPLEMENTARY_CODE_POINT - 10);
3058 report("Boyer Moore (Supplementary)");
3059 }
3060
3061 private static void doBnM(int baseCharacter) throws Exception {
3062 int achar=0;
3063
3064 for (int i=0; i<100; i++) {
3065 // Create a short pattern to search for
3066 int patternLength = generator.nextInt(7) + 4;
3067 StringBuffer patternBuffer = new StringBuffer(patternLength);
3068 for (int x=0; x<patternLength; x++) {
3069 int ch = baseCharacter + generator.nextInt(26);
3070 if (Character.isSupplementaryCodePoint(ch)) {
3071 patternBuffer.append(Character.toChars(ch));
3072 } else {
3073 patternBuffer.append((char)ch);
3074 }
3075 }
3076 String pattern = patternBuffer.toString();
3077 Pattern p = Pattern.compile(pattern);
3078
3079 // Create a buffer with random ASCII chars that does
3080 // not match the sample
3081 String toSearch = null;
3082 StringBuffer s = null;
3083 Matcher m = p.matcher("");
3084 do {
3085 s = new StringBuffer(100);
3086 for (int x=0; x<100; x++) {
3087 int ch = baseCharacter + generator.nextInt(26);
3088 if (Character.isSupplementaryCodePoint(ch)) {
3089 s.append(Character.toChars(ch));
3090 } else {
3091 s.append((char)ch);
3092 }
3093 }
3094 toSearch = s.toString();
3095 m.reset(toSearch);
3096 } while (m.find());
3097
3098 // Insert the pattern at a random spot
3099 int insertIndex = generator.nextInt(99);
3100 if (Character.isLowSurrogate(s.charAt(insertIndex)))
3101 insertIndex++;
3102 s = s.insert(insertIndex, pattern);
3103 toSearch = s.toString();
3104
3105 // Make sure that the pattern is found
3106 m.reset(toSearch);
3107 if (!m.find())
3108 failCount++;
3109
3110 // Make sure that the match text is the pattern
3111 if (!m.group().equals(pattern))
3112 failCount++;
3113
3114 // Make sure match occured at insertion point
3115 if (m.start() != insertIndex)
3116 failCount++;
3117 }
3118 }
3119
3120 /**
3121 * Tests the matching of slices on randomly generated patterns.
3122 * The Boyer-Moore optimization is not done on these patterns
3123 * because it uses unicode case folding.
3124 */
3125 private static void slice() throws Exception {
3126 doSlice(Character.MAX_VALUE);
3127 report("Slice");
3128
3129 doSlice(Character.MAX_CODE_POINT);
3130 report("Slice (Supplementary)");
3131 }
3132
3133 private static void doSlice(int maxCharacter) throws Exception {
3134 Random generator = new Random();
3135 int achar=0;
3136
3137 for (int i=0; i<100; i++) {
3138 // Create a short pattern to search for
3139 int patternLength = generator.nextInt(7) + 4;
3140 StringBuffer patternBuffer = new StringBuffer(patternLength);
3141 for (int x=0; x<patternLength; x++) {
3142 int randomChar = 0;
3143 while (!Character.isLetterOrDigit(randomChar))
3144 randomChar = generator.nextInt(maxCharacter);
3145 if (Character.isSupplementaryCodePoint(randomChar)) {
3146 patternBuffer.append(Character.toChars(randomChar));
3147 } else {
3148 patternBuffer.append((char) randomChar);
3149 }
3150 }
3151 String pattern = patternBuffer.toString();
3152 Pattern p = Pattern.compile(pattern, Pattern.UNICODE_CASE);
3153
3154 // Create a buffer with random chars that does not match the sample
3155 String toSearch = null;
3156 StringBuffer s = null;
3157 Matcher m = p.matcher("");
3158 do {
3159 s = new StringBuffer(100);
3160 for (int x=0; x<100; x++) {
3161 int randomChar = 0;
3162 while (!Character.isLetterOrDigit(randomChar))
3163 randomChar = generator.nextInt(maxCharacter);
3164 if (Character.isSupplementaryCodePoint(randomChar)) {
3165 s.append(Character.toChars(randomChar));
3166 } else {
3167 s.append((char) randomChar);
3168 }
3169 }
3170 toSearch = s.toString();
3171 m.reset(toSearch);
3172 } while (m.find());
3173
3174 // Insert the pattern at a random spot
3175 int insertIndex = generator.nextInt(99);
3176 if (Character.isLowSurrogate(s.charAt(insertIndex)))
3177 insertIndex++;
3178 s = s.insert(insertIndex, pattern);
3179 toSearch = s.toString();
3180
3181 // Make sure that the pattern is found
3182 m.reset(toSearch);
3183 if (!m.find())
3184 failCount++;
3185
3186 // Make sure that the match text is the pattern
3187 if (!m.group().equals(pattern))
3188 failCount++;
3189
3190 // Make sure match occured at insertion point
3191 if (m.start() != insertIndex)
3192 failCount++;
3193 }
3194 }
3195
3196 private static void explainFailure(String pattern, String data,
3197 String expected, String actual) {
3198 System.err.println("----------------------------------------");
3199 System.err.println("Pattern = "+pattern);
3200 System.err.println("Data = "+data);
3201 System.err.println("Expected = " + expected);
3202 System.err.println("Actual = " + actual);
3203 }
3204
3205 private static void explainFailure(String pattern, String data,
3206 Throwable t) {
3207 System.err.println("----------------------------------------");
3208 System.err.println("Pattern = "+pattern);
3209 System.err.println("Data = "+data);
3210 t.printStackTrace(System.err);
3211 }
3212
3213 // Testing examples from a file
3214
3215 /**
3216 * Goes through the file "TestCases.txt" and creates many patterns
3217 * described in the file, matching the patterns against input lines in
3218 * the file, and comparing the results against the correct results
3219 * also found in the file. The file format is described in comments
3220 * at the head of the file.
3221 */
3222 private static void processFile(String fileName) throws Exception {
3223 File testCases = new File(System.getProperty("test.src", "."),
3224 fileName);
3225 FileInputStream in = new FileInputStream(testCases);
3226 BufferedReader r = new BufferedReader(new InputStreamReader(in));
3227
3228 // Process next test case.
3229 String aLine;
3230 while((aLine = r.readLine()) != null) {
3231 // Read a line for pattern
3232 String patternString = grabLine(r);
3233 Pattern p = null;
3234 try {
3235 p = compileTestPattern(patternString);
3236 } catch (PatternSyntaxException e) {
3237 String dataString = grabLine(r);
3238 String expectedResult = grabLine(r);
3239 if (expectedResult.startsWith("error"))
3240 continue;
3241 explainFailure(patternString, dataString, e);
3242 failCount++;
3243 continue;
3244 }
3245
3246 // Read a line for input string
3247 String dataString = grabLine(r);
3248 Matcher m = p.matcher(dataString);
3249 StringBuffer result = new StringBuffer();
3250
3251 // Check for IllegalStateExceptions before a match
3252 failCount += preMatchInvariants(m);
3253
3254 boolean found = m.find();
3255
3256 if (found)
3257 failCount += postTrueMatchInvariants(m);
3258 else
3259 failCount += postFalseMatchInvariants(m);
3260
3261 if (found) {
3262 result.append("true ");
3263 result.append(m.group(0) + " ");
3264 } else {
3265 result.append("false ");
3266 }
3267
3268 result.append(m.groupCount());
3269
3270 if (found) {
3271 for (int i=1; i<m.groupCount()+1; i++)
3272 if (m.group(i) != null)
3273 result.append(" " +m.group(i));
3274 }
3275
3276 // Read a line for the expected result
3277 String expectedResult = grabLine(r);
3278
3279 if (!result.toString().equals(expectedResult)) {
3280 explainFailure(patternString, dataString, expectedResult, result.toString());
3281 failCount++;
3282 }
3283 }
3284
3285 report(fileName);
3286 }
3287
3288 private static int preMatchInvariants(Matcher m) {
3289 int failCount = 0;
3290 try {
3291 m.start();
3292 failCount++;
3293 } catch (IllegalStateException ise) {}
3294 try {
3295 m.end();
3296 failCount++;
3297 } catch (IllegalStateException ise) {}
3298 try {
3299 m.group();
3300 failCount++;
3301 } catch (IllegalStateException ise) {}
3302 return failCount;
3303 }
3304
3305 private static int postFalseMatchInvariants(Matcher m) {
3306 int failCount = 0;
3307 try {
3308 m.group();
3309 failCount++;
3310 } catch (IllegalStateException ise) {}
3311 try {
3312 m.start();
3313 failCount++;
3314 } catch (IllegalStateException ise) {}
3315 try {
3316 m.end();
3317 failCount++;
3318 } catch (IllegalStateException ise) {}
3319 return failCount;
3320 }
3321
3322 private static int postTrueMatchInvariants(Matcher m) {
3323 int failCount = 0;
3324 //assert(m.start() = m.start(0);
3325 if (m.start() != m.start(0))
3326 failCount++;
3327 //assert(m.end() = m.end(0);
3328 if (m.start() != m.start(0))
3329 failCount++;
3330 //assert(m.group() = m.group(0);
3331 if (!m.group().equals(m.group(0)))
3332 failCount++;
3333 try {
3334 m.group(50);
3335 failCount++;
3336 } catch (IndexOutOfBoundsException ise) {}
3337
3338 return failCount;
3339 }
3340
3341 private static Pattern compileTestPattern(String patternString) {
3342 if (!patternString.startsWith("'")) {
3343 return Pattern.compile(patternString);
3344 }
3345
3346 int break1 = patternString.lastIndexOf("'");
3347 String flagString = patternString.substring(
3348 break1+1, patternString.length());
3349 patternString = patternString.substring(1, break1);
3350
3351 if (flagString.equals("i"))
3352 return Pattern.compile(patternString, Pattern.CASE_INSENSITIVE);
3353
3354 if (flagString.equals("m"))
3355 return Pattern.compile(patternString, Pattern.MULTILINE);
3356
3357 return Pattern.compile(patternString);
3358 }
3359
3360 /**
3361 * Reads a line from the input file. Keeps reading lines until a non
3362 * empty non comment line is read. If the line contains a \n then
3363 * these two characters are replaced by a newline char. If a \\uxxxx
3364 * sequence is read then the sequence is replaced by the unicode char.
3365 */
3366 private static String grabLine(BufferedReader r) throws Exception {
3367 int index = 0;
3368 String line = r.readLine();
3369 while (line.startsWith("//") || line.length() < 1)
3370 line = r.readLine();
3371 while ((index = line.indexOf("\\n")) != -1) {
3372 StringBuffer temp = new StringBuffer(line);
3373 temp.replace(index, index+2, "\n");
3374 line = temp.toString();
3375 }
3376 while ((index = line.indexOf("\\u")) != -1) {
3377 StringBuffer temp = new StringBuffer(line);
3378 String value = temp.substring(index+2, index+6);
3379 char aChar = (char)Integer.parseInt(value, 16);
3380 String unicodeChar = "" + aChar;
3381 temp.replace(index, index+6, unicodeChar);
3382 line = temp.toString();
3383 }
3384
3385 return line;
3386 }
3387
3388 private static void check(Pattern p, String s, String g, String expected) {
3389 Matcher m = p.matcher(s);
3390 m.find();
3391 if (!m.group(g).equals(expected))
3392 failCount++;
3393 }
3394
3395 private static void checkReplaceFirst(String p, String s, String r, String expected)
3396 {
3397 if (!expected.equals(Pattern.compile(p)
3398 .matcher(s)
3399 .replaceFirst(r)))
3400 failCount++;
3401 }
3402
3403 private static void checkReplaceAll(String p, String s, String r, String expected)
3404 {
3405 if (!expected.equals(Pattern.compile(p)
3406 .matcher(s)
3407 .replaceAll(r)))
3408 failCount++;
3409 }
3410
3411 private static void checkExpectedFail(String p) {
3412 try {
3413 Pattern.compile(p);
3414 } catch (PatternSyntaxException pse) {
3415 //pse.printStackTrace();
3416 return;
3417 }
3418 failCount++;
3419 }
3420
3421 private static void checkExpectedFail(Matcher m, String g) {
3422 m.find();
3423 try {
3424 m.group(g);
3425 } catch (IllegalArgumentException iae) {
3426 //iae.printStackTrace();
3427 return;
3428 } catch (NullPointerException npe) {
3429 return;
3430 }
3431 failCount++;
3432 }
3433
3434
3435 private static void namedGroupCaptureTest() throws Exception {
3436 check(Pattern.compile("x+(?<gname>y+)z+"),
3437 "xxxyyyzzz",
3438 "gname",
3439 "yyy");
3440
shermand9337e02009-10-21 11:40:40 -07003441 check(Pattern.compile("x+(?<gname8>y+)z+"),
shermanffaf4922009-03-20 16:22:59 -07003442 "xxxyyyzzz",
shermand9337e02009-10-21 11:40:40 -07003443 "gname8",
shermanffaf4922009-03-20 16:22:59 -07003444 "yyy");
3445
sherman0b4d42d2009-02-23 21:06:15 -08003446 //backref
3447 Pattern pattern = Pattern.compile("(a*)bc\\1");
3448 check(pattern, "zzzaabcazzz", true); // found "abca"
3449
3450 check(Pattern.compile("(?<gname>a*)bc\\k<gname>"),
3451 "zzzaabcaazzz", true);
3452
3453 check(Pattern.compile("(?<gname>abc)(def)\\k<gname>"),
3454 "abcdefabc", true);
3455
3456 check(Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(?<gname>k)\\k<gname>"),
3457 "abcdefghijkk", true);
3458
3459 // Supplementary character tests
3460 check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"),
3461 toSupplementaries("zzzaabcazzz"), true);
3462
3463 check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"),
3464 toSupplementaries("zzzaabcaazzz"), true);
3465
3466 check(Pattern.compile("(?<gname>" + toSupplementaries("abc)(def)") + "\\k<gname>"),
3467 toSupplementaries("abcdefabc"), true);
3468
3469 check(Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)") +
3470 "(?<gname>" +
3471 toSupplementaries("k)") + "\\k<gname>"),
3472 toSupplementaries("abcdefghijkk"), true);
3473
3474 check(Pattern.compile("x+(?<gname>y+)z+\\k<gname>"),
3475 "xxxyyyzzzyyy",
3476 "gname",
3477 "yyy");
3478
3479 //replaceFirst/All
3480 checkReplaceFirst("(?<gn>ab)(c*)",
3481 "abccczzzabcczzzabccc",
shermand9337e02009-10-21 11:40:40 -07003482 "${gn}",
sherman0b4d42d2009-02-23 21:06:15 -08003483 "abzzzabcczzzabccc");
3484
3485 checkReplaceAll("(?<gn>ab)(c*)",
3486 "abccczzzabcczzzabccc",
shermand9337e02009-10-21 11:40:40 -07003487 "${gn}",
sherman0b4d42d2009-02-23 21:06:15 -08003488 "abzzzabzzzab");
3489
3490
3491 checkReplaceFirst("(?<gn>ab)(c*)",
3492 "zzzabccczzzabcczzzabccczzz",
shermand9337e02009-10-21 11:40:40 -07003493 "${gn}",
sherman0b4d42d2009-02-23 21:06:15 -08003494 "zzzabzzzabcczzzabccczzz");
3495
3496 checkReplaceAll("(?<gn>ab)(c*)",
3497 "zzzabccczzzabcczzzabccczzz",
shermand9337e02009-10-21 11:40:40 -07003498 "${gn}",
sherman0b4d42d2009-02-23 21:06:15 -08003499 "zzzabzzzabzzzabzzz");
3500
3501 checkReplaceFirst("(?<gn1>ab)(?<gn2>c*)",
3502 "zzzabccczzzabcczzzabccczzz",
shermand9337e02009-10-21 11:40:40 -07003503 "${gn2}",
sherman0b4d42d2009-02-23 21:06:15 -08003504 "zzzccczzzabcczzzabccczzz");
3505
3506 checkReplaceAll("(?<gn1>ab)(?<gn2>c*)",
3507 "zzzabccczzzabcczzzabccczzz",
shermand9337e02009-10-21 11:40:40 -07003508 "${gn2}",
sherman0b4d42d2009-02-23 21:06:15 -08003509 "zzzccczzzcczzzccczzz");
3510
3511 //toSupplementaries("(ab)(c*)"));
3512 checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") +
3513 ")(?<gn2>" + toSupplementaries("c") + "*)",
3514 toSupplementaries("abccczzzabcczzzabccc"),
shermand9337e02009-10-21 11:40:40 -07003515 "${gn1}",
sherman0b4d42d2009-02-23 21:06:15 -08003516 toSupplementaries("abzzzabcczzzabccc"));
3517
3518
3519 checkReplaceAll("(?<gn1>" + toSupplementaries("ab") +
3520 ")(?<gn2>" + toSupplementaries("c") + "*)",
3521 toSupplementaries("abccczzzabcczzzabccc"),
shermand9337e02009-10-21 11:40:40 -07003522 "${gn1}",
sherman0b4d42d2009-02-23 21:06:15 -08003523 toSupplementaries("abzzzabzzzab"));
3524
3525 checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") +
3526 ")(?<gn2>" + toSupplementaries("c") + "*)",
3527 toSupplementaries("abccczzzabcczzzabccc"),
shermand9337e02009-10-21 11:40:40 -07003528 "${gn2}",
sherman0b4d42d2009-02-23 21:06:15 -08003529 toSupplementaries("ccczzzabcczzzabccc"));
3530
3531
3532 checkReplaceAll("(?<gn1>" + toSupplementaries("ab") +
3533 ")(?<gn2>" + toSupplementaries("c") + "*)",
3534 toSupplementaries("abccczzzabcczzzabccc"),
shermand9337e02009-10-21 11:40:40 -07003535 "${gn2}",
sherman0b4d42d2009-02-23 21:06:15 -08003536 toSupplementaries("ccczzzcczzzccc"));
3537
3538 checkReplaceFirst("(?<dog>Dog)AndCat",
3539 "zzzDogAndCatzzzDogAndCatzzz",
shermand9337e02009-10-21 11:40:40 -07003540 "${dog}",
sherman0b4d42d2009-02-23 21:06:15 -08003541 "zzzDogzzzDogAndCatzzz");
3542
3543
3544 checkReplaceAll("(?<dog>Dog)AndCat",
3545 "zzzDogAndCatzzzDogAndCatzzz",
shermand9337e02009-10-21 11:40:40 -07003546 "${dog}",
sherman0b4d42d2009-02-23 21:06:15 -08003547 "zzzDogzzzDogzzz");
3548
3549 // backref in Matcher & String
shermand9337e02009-10-21 11:40:40 -07003550 if (!"abcdefghij".replaceFirst("cd(?<gn>ef)gh", "${gn}").equals("abefij") ||
3551 !"abbbcbdbefgh".replaceAll("(?<gn>[a-e])b", "${gn}").equals("abcdefgh"))
sherman0b4d42d2009-02-23 21:06:15 -08003552 failCount++;
3553
3554 // negative
3555 checkExpectedFail("(?<groupnamehasnoascii.in>abc)(def)");
3556 checkExpectedFail("(?<groupnamehasnoascii_in>abc)(def)");
shermand9337e02009-10-21 11:40:40 -07003557 checkExpectedFail("(?<6groupnamestartswithdigit>abc)(def)");
sherman0b4d42d2009-02-23 21:06:15 -08003558 checkExpectedFail("(?<gname>abc)(def)\\k<gnameX>");
3559 checkExpectedFail("(?<gname>abc)(?<gname>def)\\k<gnameX>");
3560 checkExpectedFail(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"),
3561 "gnameX");
3562 checkExpectedFail(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"),
3563 null);
3564 report("NamedGroupCapture");
3565 }
sherman6782c962010-02-05 00:10:42 -08003566
shermancc01ef52010-05-18 15:36:47 -07003567 // This is for bug 6969132
sherman6782c962010-02-05 00:10:42 -08003568 private static void nonBmpClassComplementTest() throws Exception {
3569 Pattern p = Pattern.compile("\\P{Lu}");
3570 Matcher m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
3571 if (m.find() && m.start() == 1)
3572 failCount++;
3573
3574 // from a unicode category
3575 p = Pattern.compile("\\P{Lu}");
3576 m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
3577 if (m.find())
3578 failCount++;
3579 if (!m.hitEnd())
3580 failCount++;
3581
3582 // block
3583 p = Pattern.compile("\\P{InMathematicalAlphanumericSymbols}");
3584 m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
3585 if (m.find() && m.start() == 1)
3586 failCount++;
3587
3588 report("NonBmpClassComplement");
3589 }
3590
shermancc01ef52010-05-18 15:36:47 -07003591 private static void unicodePropertiesTest() throws Exception {
3592 // different forms
3593 if (!Pattern.compile("\\p{IsLu}").matcher("A").matches() ||
3594 !Pattern.compile("\\p{Lu}").matcher("A").matches() ||
3595 !Pattern.compile("\\p{gc=Lu}").matcher("A").matches() ||
3596 !Pattern.compile("\\p{general_category=Lu}").matcher("A").matches() ||
3597 !Pattern.compile("\\p{IsLatin}").matcher("B").matches() ||
3598 !Pattern.compile("\\p{sc=Latin}").matcher("B").matches() ||
3599 !Pattern.compile("\\p{script=Latin}").matcher("B").matches() ||
3600 !Pattern.compile("\\p{InBasicLatin}").matcher("c").matches() ||
3601 !Pattern.compile("\\p{blk=BasicLatin}").matcher("c").matches() ||
3602 !Pattern.compile("\\p{block=BasicLatin}").matcher("c").matches())
3603 failCount++;
3604
3605 Matcher common = Pattern.compile("\\p{script=Common}").matcher("");
3606 Matcher unknown = Pattern.compile("\\p{IsUnknown}").matcher("");
3607 Matcher lastSM = common;
3608 Character.UnicodeScript lastScript = Character.UnicodeScript.of(0);
3609
3610 Matcher latin = Pattern.compile("\\p{block=basic_latin}").matcher("");
3611 Matcher greek = Pattern.compile("\\p{InGreek}").matcher("");
3612 Matcher lastBM = latin;
3613 Character.UnicodeBlock lastBlock = Character.UnicodeBlock.of(0);
3614
3615 for (int cp = 1; cp < Character.MAX_CODE_POINT; cp++) {
3616 if (cp >= 0x30000 && (cp & 0x70) == 0){
3617 continue; // only pick couple code points, they are the same
3618 }
3619
3620 // Unicode Script
3621 Character.UnicodeScript script = Character.UnicodeScript.of(cp);
3622 Matcher m;
3623 String str = new String(Character.toChars(cp));
3624 if (script == lastScript) {
3625 m = lastSM;
3626 m.reset(str);
3627 } else {
3628 m = Pattern.compile("\\p{Is" + script.name() + "}").matcher(str);
3629 }
3630 if (!m.matches()) {
3631 failCount++;
3632 }
3633 Matcher other = (script == Character.UnicodeScript.COMMON)? unknown : common;
3634 other.reset(str);
3635 if (other.matches()) {
3636 failCount++;
3637 }
3638 lastSM = m;
3639 lastScript = script;
3640
3641 // Unicode Block
3642 Character.UnicodeBlock block = Character.UnicodeBlock.of(cp);
3643 if (block == null) {
3644 //System.out.printf("Not a Block: cp=%x%n", cp);
3645 continue;
3646 }
3647 if (block == lastBlock) {
3648 m = lastBM;
3649 m.reset(str);
3650 } else {
3651 m = Pattern.compile("\\p{block=" + block.toString() + "}").matcher(str);
3652 }
3653 if (!m.matches()) {
3654 failCount++;
3655 }
3656 other = (block == Character.UnicodeBlock.BASIC_LATIN)? greek : latin;
3657 other.reset(str);
3658 if (other.matches()) {
3659 failCount++;
3660 }
3661 lastBM = m;
3662 lastBlock = block;
3663 }
3664 report("unicodeProperties");
3665 }
shermanf03c78b2011-02-03 13:49:25 -08003666
3667 private static void unicodeHexNotationTest() throws Exception {
3668
3669 // negative
3670 checkExpectedFail("\\x{-23}");
3671 checkExpectedFail("\\x{110000}");
3672 checkExpectedFail("\\x{}");
3673 checkExpectedFail("\\x{AB[ef]");
3674
3675 // codepoint
3676 check("^\\x{1033c}$", "\uD800\uDF3C", true);
3677 check("^\\xF0\\x90\\x8C\\xBC$", "\uD800\uDF3C", false);
3678 check("^\\x{D800}\\x{DF3c}+$", "\uD800\uDF3C", false);
3679 check("^\\xF0\\x90\\x8C\\xBC$", "\uD800\uDF3C", false);
3680
3681 // in class
3682 check("^[\\x{D800}\\x{DF3c}]+$", "\uD800\uDF3C", false);
3683 check("^[\\xF0\\x90\\x8C\\xBC]+$", "\uD800\uDF3C", false);
3684 check("^[\\x{D800}\\x{DF3C}]+$", "\uD800\uDF3C", false);
3685 check("^[\\x{DF3C}\\x{D800}]+$", "\uD800\uDF3C", false);
3686 check("^[\\x{D800}\\x{DF3C}]+$", "\uDF3C\uD800", true);
3687 check("^[\\x{DF3C}\\x{D800}]+$", "\uDF3C\uD800", true);
3688
3689 for (int cp = 0; cp <= 0x10FFFF; cp++) {
3690 String s = "A" + new String(Character.toChars(cp)) + "B";
3691 String hexUTF16 = (cp <= 0xFFFF)? String.format("\\u%04x", cp)
3692 : String.format("\\u%04x\\u%04x",
3693 (int) Character.toChars(cp)[0],
3694 (int) Character.toChars(cp)[1]);
3695 String hexCodePoint = "\\x{" + Integer.toHexString(cp) + "}";
3696 if (!Pattern.matches("A" + hexUTF16 + "B", s))
3697 failCount++;
3698 if (!Pattern.matches("A[" + hexUTF16 + "]B", s))
3699 failCount++;
3700 if (!Pattern.matches("A" + hexCodePoint + "B", s))
3701 failCount++;
3702 if (!Pattern.matches("A[" + hexCodePoint + "]B", s))
3703 failCount++;
3704 }
3705 report("unicodeHexNotation");
sherman85bbd8b2011-04-28 20:48:36 -07003706 }
3707
3708 private static void unicodeClassesTest() throws Exception {
3709
3710 Matcher lower = Pattern.compile("\\p{Lower}").matcher("");
3711 Matcher upper = Pattern.compile("\\p{Upper}").matcher("");
3712 Matcher ASCII = Pattern.compile("\\p{ASCII}").matcher("");
3713 Matcher alpha = Pattern.compile("\\p{Alpha}").matcher("");
3714 Matcher digit = Pattern.compile("\\p{Digit}").matcher("");
3715 Matcher alnum = Pattern.compile("\\p{Alnum}").matcher("");
3716 Matcher punct = Pattern.compile("\\p{Punct}").matcher("");
3717 Matcher graph = Pattern.compile("\\p{Graph}").matcher("");
3718 Matcher print = Pattern.compile("\\p{Print}").matcher("");
3719 Matcher blank = Pattern.compile("\\p{Blank}").matcher("");
3720 Matcher cntrl = Pattern.compile("\\p{Cntrl}").matcher("");
3721 Matcher xdigit = Pattern.compile("\\p{XDigit}").matcher("");
3722 Matcher space = Pattern.compile("\\p{Space}").matcher("");
3723 Matcher bound = Pattern.compile("\\b").matcher("");
3724 Matcher word = Pattern.compile("\\w++").matcher("");
3725 // UNICODE_CHARACTER_CLASS
3726 Matcher lowerU = Pattern.compile("\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3727 Matcher upperU = Pattern.compile("\\p{Upper}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3728 Matcher ASCIIU = Pattern.compile("\\p{ASCII}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3729 Matcher alphaU = Pattern.compile("\\p{Alpha}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3730 Matcher digitU = Pattern.compile("\\p{Digit}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3731 Matcher alnumU = Pattern.compile("\\p{Alnum}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3732 Matcher punctU = Pattern.compile("\\p{Punct}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3733 Matcher graphU = Pattern.compile("\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3734 Matcher printU = Pattern.compile("\\p{Print}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3735 Matcher blankU = Pattern.compile("\\p{Blank}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3736 Matcher cntrlU = Pattern.compile("\\p{Cntrl}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3737 Matcher xdigitU = Pattern.compile("\\p{XDigit}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3738 Matcher spaceU = Pattern.compile("\\p{Space}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3739 Matcher boundU = Pattern.compile("\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3740 Matcher wordU = Pattern.compile("\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3741 // embedded flag (?U)
3742 Matcher lowerEU = Pattern.compile("(?U)\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3743 Matcher graphEU = Pattern.compile("(?U)\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3744 Matcher wordEU = Pattern.compile("(?U)\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3745
3746 Matcher bwb = Pattern.compile("\\b\\w\\b").matcher("");
3747 Matcher bwbU = Pattern.compile("\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3748 Matcher bwbEU = Pattern.compile("(?U)\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3749 // properties
3750 Matcher lowerP = Pattern.compile("\\p{IsLowerCase}").matcher("");
3751 Matcher upperP = Pattern.compile("\\p{IsUpperCase}").matcher("");
3752 Matcher titleP = Pattern.compile("\\p{IsTitleCase}").matcher("");
3753 Matcher letterP = Pattern.compile("\\p{IsLetter}").matcher("");
3754 Matcher alphaP = Pattern.compile("\\p{IsAlphabetic}").matcher("");
3755 Matcher ideogP = Pattern.compile("\\p{IsIdeographic}").matcher("");
3756 Matcher cntrlP = Pattern.compile("\\p{IsControl}").matcher("");
3757 Matcher spaceP = Pattern.compile("\\p{IsWhiteSpace}").matcher("");
3758 Matcher definedP = Pattern.compile("\\p{IsAssigned}").matcher("");
3759 Matcher nonCCPP = Pattern.compile("\\p{IsNoncharacterCodePoint}").matcher("");
3760
3761 // javaMethod
3762 Matcher lowerJ = Pattern.compile("\\p{javaLowerCase}").matcher("");
3763 Matcher upperJ = Pattern.compile("\\p{javaUpperCase}").matcher("");
3764 Matcher alphaJ = Pattern.compile("\\p{javaAlphabetic}").matcher("");
3765 Matcher ideogJ = Pattern.compile("\\p{javaIdeographic}").matcher("");
3766
3767 for (int cp = 1; cp < 0x30000; cp++) {
3768 String str = new String(Character.toChars(cp));
3769 int type = Character.getType(cp);
3770 if (// lower
3771 POSIX_ASCII.isLower(cp) != lower.reset(str).matches() ||
3772 Character.isLowerCase(cp) != lowerU.reset(str).matches() ||
3773 Character.isLowerCase(cp) != lowerP.reset(str).matches() ||
3774 Character.isLowerCase(cp) != lowerEU.reset(str).matches()||
3775 Character.isLowerCase(cp) != lowerJ.reset(str).matches()||
3776 // upper
3777 POSIX_ASCII.isUpper(cp) != upper.reset(str).matches() ||
3778 POSIX_Unicode.isUpper(cp) != upperU.reset(str).matches() ||
3779 Character.isUpperCase(cp) != upperP.reset(str).matches() ||
3780 Character.isUpperCase(cp) != upperJ.reset(str).matches() ||
3781 // alpha
3782 POSIX_ASCII.isAlpha(cp) != alpha.reset(str).matches() ||
3783 POSIX_Unicode.isAlpha(cp) != alphaU.reset(str).matches() ||
3784 Character.isAlphabetic(cp)!= alphaP.reset(str).matches() ||
3785 Character.isAlphabetic(cp)!= alphaJ.reset(str).matches() ||
3786 // digit
3787 POSIX_ASCII.isDigit(cp) != digit.reset(str).matches() ||
3788 Character.isDigit(cp) != digitU.reset(str).matches() ||
3789 // alnum
3790 POSIX_ASCII.isAlnum(cp) != alnum.reset(str).matches() ||
3791 POSIX_Unicode.isAlnum(cp) != alnumU.reset(str).matches() ||
3792 // punct
3793 POSIX_ASCII.isPunct(cp) != punct.reset(str).matches() ||
3794 POSIX_Unicode.isPunct(cp) != punctU.reset(str).matches() ||
3795 // graph
3796 POSIX_ASCII.isGraph(cp) != graph.reset(str).matches() ||
3797 POSIX_Unicode.isGraph(cp) != graphU.reset(str).matches() ||
3798 POSIX_Unicode.isGraph(cp) != graphEU.reset(str).matches()||
3799 // blank
3800 POSIX_ASCII.isType(cp, POSIX_ASCII.BLANK)
3801 != blank.reset(str).matches() ||
3802 POSIX_Unicode.isBlank(cp) != blankU.reset(str).matches() ||
3803 // print
3804 POSIX_ASCII.isPrint(cp) != print.reset(str).matches() ||
3805 POSIX_Unicode.isPrint(cp) != printU.reset(str).matches() ||
3806 // cntrl
3807 POSIX_ASCII.isCntrl(cp) != cntrl.reset(str).matches() ||
3808 POSIX_Unicode.isCntrl(cp) != cntrlU.reset(str).matches() ||
3809 (Character.CONTROL == type) != cntrlP.reset(str).matches() ||
3810 // hexdigit
3811 POSIX_ASCII.isHexDigit(cp) != xdigit.reset(str).matches() ||
3812 POSIX_Unicode.isHexDigit(cp) != xdigitU.reset(str).matches() ||
3813 // space
3814 POSIX_ASCII.isSpace(cp) != space.reset(str).matches() ||
3815 POSIX_Unicode.isSpace(cp) != spaceU.reset(str).matches() ||
3816 POSIX_Unicode.isSpace(cp) != spaceP.reset(str).matches() ||
3817 // word
3818 POSIX_ASCII.isWord(cp) != word.reset(str).matches() ||
3819 POSIX_Unicode.isWord(cp) != wordU.reset(str).matches() ||
3820 POSIX_Unicode.isWord(cp) != wordEU.reset(str).matches()||
3821 // bwordb
3822 POSIX_ASCII.isWord(cp) != bwb.reset(str).matches() ||
3823 POSIX_Unicode.isWord(cp) != bwbU.reset(str).matches() ||
3824 // properties
3825 Character.isTitleCase(cp) != titleP.reset(str).matches() ||
3826 Character.isLetter(cp) != letterP.reset(str).matches()||
3827 Character.isIdeographic(cp) != ideogP.reset(str).matches() ||
3828 Character.isIdeographic(cp) != ideogJ.reset(str).matches() ||
3829 (Character.UNASSIGNED == type) == definedP.reset(str).matches() ||
3830 POSIX_Unicode.isNoncharacterCodePoint(cp) != nonCCPP.reset(str).matches())
3831 failCount++;
3832 }
3833
3834 // bounds/word align
3835 twoFindIndexes(" \u0180sherman\u0400 ", bound, 1, 10);
3836 if (!bwbU.reset("\u0180sherman\u0400").matches())
3837 failCount++;
3838 twoFindIndexes(" \u0180sh\u0345erman\u0400 ", bound, 1, 11);
3839 if (!bwbU.reset("\u0180sh\u0345erman\u0400").matches())
3840 failCount++;
3841 twoFindIndexes(" \u0724\u0739\u0724 ", bound, 1, 4);
3842 if (!bwbU.reset("\u0724\u0739\u0724").matches())
3843 failCount++;
3844 if (!bwbEU.reset("\u0724\u0739\u0724").matches())
3845 failCount++;
3846 report("unicodePredefinedClasses");
3847 }
shermanecb65472012-05-08 10:57:13 -07003848
3849 private static void horizontalAndVerticalWSTest() throws Exception {
3850 String hws = new String (new char[] {
3851 0x09, 0x20, 0xa0, 0x1680, 0x180e,
3852 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005,
3853 0x2006, 0x2007, 0x2008, 0x2009, 0x200a,
3854 0x202f, 0x205f, 0x3000 });
3855 String vws = new String (new char[] {
3856 0x0a, 0x0b, 0x0c, 0x0d, 0x85, 0x2028, 0x2029 });
3857 if (!Pattern.compile("\\h+").matcher(hws).matches() ||
3858 !Pattern.compile("[\\h]+").matcher(hws).matches())
3859 failCount++;
3860 if (Pattern.compile("\\H").matcher(hws).find() ||
3861 Pattern.compile("[\\H]").matcher(hws).find())
3862 failCount++;
3863 if (!Pattern.compile("\\v+").matcher(vws).matches() ||
3864 !Pattern.compile("[\\v]+").matcher(vws).matches())
3865 failCount++;
3866 if (Pattern.compile("\\V").matcher(vws).find() ||
3867 Pattern.compile("[\\V]").matcher(vws).find())
3868 failCount++;
3869 String prefix = "abcd";
3870 String suffix = "efgh";
3871 String ng = "A";
3872 for (int i = 0; i < hws.length(); i++) {
3873 String c = String.valueOf(hws.charAt(i));
3874 Matcher m = Pattern.compile("\\h").matcher(prefix + c + suffix);
3875 if (!m.find() || !c.equals(m.group()))
3876 failCount++;
3877 m = Pattern.compile("[\\h]").matcher(prefix + c + suffix);
3878 if (!m.find() || !c.equals(m.group()))
3879 failCount++;
3880
3881 m = Pattern.compile("\\H").matcher(hws.substring(0, i) + ng + hws.substring(i));
3882 if (!m.find() || !ng.equals(m.group()))
3883 failCount++;
3884 m = Pattern.compile("[\\H]").matcher(hws.substring(0, i) + ng + hws.substring(i));
3885 if (!m.find() || !ng.equals(m.group()))
3886 failCount++;
3887 }
3888 for (int i = 0; i < vws.length(); i++) {
3889 String c = String.valueOf(vws.charAt(i));
3890 Matcher m = Pattern.compile("\\v").matcher(prefix + c + suffix);
3891 if (!m.find() || !c.equals(m.group()))
3892 failCount++;
3893 m = Pattern.compile("[\\v]").matcher(prefix + c + suffix);
3894 if (!m.find() || !c.equals(m.group()))
3895 failCount++;
3896
3897 m = Pattern.compile("\\V").matcher(vws.substring(0, i) + ng + vws.substring(i));
3898 if (!m.find() || !ng.equals(m.group()))
3899 failCount++;
3900 m = Pattern.compile("[\\V]").matcher(vws.substring(0, i) + ng + vws.substring(i));
3901 if (!m.find() || !ng.equals(m.group()))
3902 failCount++;
3903 }
3904 // \v in range is interpreted as 0x0B. This is the undocumented behavior
3905 if (!Pattern.compile("[\\v-\\v]").matcher(String.valueOf((char)0x0B)).matches())
3906 failCount++;
3907 report("horizontalAndVerticalWSTest");
3908 }
3909
3910 private static void linebreakTest() throws Exception {
3911 String linebreaks = new String (new char[] {
3912 0x0A, 0x0B, 0x0C, 0x0D, 0x85, 0x2028, 0x2029 });
3913 String crnl = "\r\n";
3914 if (!Pattern.compile("\\R+").matcher(linebreaks).matches() ||
3915 !Pattern.compile("\\R").matcher(crnl).matches() ||
3916 Pattern.compile("\\R\\R").matcher(crnl).matches())
3917 failCount++;
3918 report("linebreakTest");
3919 }
3920
sherman0b4d42d2009-02-23 21:06:15 -08003921}