blob: 3c441d6257bd62bcf664f9c5e65207975a8c7588 [file] [log] [blame]
sherman241dd4e2011-04-10 23:33:14 -07001
2/*
3 * Copyright (c) 2010, 2011, Oracle and/or its affiliates. All rights reserved.
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 *
6 * This code is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 only, as
8 * published by the Free Software Foundation.
9 *
10 * This code is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 * version 2 for more details (a copy is included in the LICENSE file that
14 * accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License version
17 * 2 along with this work; if not, write to the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19 *
20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21 * or visit www.oracle.com if you need additional information or have any
22 * questions.
23 */
24
shermancc01ef52010-05-18 15:36:47 -070025/**
26 * @test
sherman241dd4e2011-04-10 23:33:14 -070027 * @bug 6945564 6959267 7033561
shermancc01ef52010-05-18 15:36:47 -070028 * @summary Check that the j.l.Character.UnicodeScript
shermancc01ef52010-05-18 15:36:47 -070029 */
30
31import java.io.*;
shermancc01ef52010-05-18 15:36:47 -070032import java.util.*;
33import java.util.regex.*;
34import java.lang.Character.UnicodeScript;
35
36public class CheckScript {
37
sherman241dd4e2011-04-10 23:33:14 -070038 public static void main(String[] args) throws Exception {
39 File fScripts;
40 File fAliases;
peytoiad3d93e82010-11-17 01:02:20 +090041 if (args.length == 0) {
sherman241dd4e2011-04-10 23:33:14 -070042 fScripts = new File(System.getProperty("test.src", "."), "Scripts.txt");
43 fAliases = new File(System.getProperty("test.src", "."), "PropertyValueAliases.txt");
44 } else if (args.length == 2) {
45 fScripts = new File(args[0]);
46 fAliases = new File(args[1]);
peytoiad3d93e82010-11-17 01:02:20 +090047 } else {
sherman241dd4e2011-04-10 23:33:14 -070048 System.out.println("java CharacterScript Scripts.txt PropertyValueAliases.txt");
peytoiad3d93e82010-11-17 01:02:20 +090049 throw new RuntimeException("Datafile name should be specified.");
shermancc01ef52010-05-18 15:36:47 -070050 }
smarks1dba3592011-02-22 15:34:17 -080051
shermancc01ef52010-05-18 15:36:47 -070052 Matcher m = Pattern.compile("(\\p{XDigit}+)(?:\\.{2}(\\p{XDigit}+))?\\s+;\\s+(\\w+)\\s+#.*").matcher("");
53 String line = null;
54 HashMap<String,ArrayList<Integer>> scripts = new HashMap<>();
sherman241dd4e2011-04-10 23:33:14 -070055 try (BufferedReader sbfr = new BufferedReader(new FileReader(fScripts))) {
smarks1dba3592011-02-22 15:34:17 -080056 while ((line = sbfr.readLine()) != null) {
57 if (line.length() <= 1 || line.charAt(0) == '#') {
58 continue;
shermancc01ef52010-05-18 15:36:47 -070059 }
smarks1dba3592011-02-22 15:34:17 -080060 m.reset(line);
61 if (m.matches()) {
62 int start = Integer.parseInt(m.group(1), 16);
63 int end = (m.group(2)==null)?start
64 :Integer.parseInt(m.group(2), 16);
65 String name = m.group(3).toLowerCase(Locale.ENGLISH);
66 ArrayList<Integer> ranges = scripts.get(name);
67 if (ranges == null) {
68 ranges = new ArrayList<Integer>();
69 scripts.put(name, ranges);
70 }
71 ranges.add(start);
72 ranges.add(end);
73 }
shermancc01ef52010-05-18 15:36:47 -070074 }
75 }
shermancc01ef52010-05-18 15:36:47 -070076 // check all defined ranges
77 Integer[] ZEROSIZEARRAY = new Integer[0];
78 for (String name : scripts.keySet()) {
79 System.out.println("Checking " + name + "...");
80 Integer[] ranges = scripts.get(name).toArray(ZEROSIZEARRAY);
81 Character.UnicodeScript expected =
82 Character.UnicodeScript.forName(name);
83
84 int off = 0;
85 while (off < ranges.length) {
86 int start = ranges[off++];
87 int end = ranges[off++];
88 for (int cp = start; cp <= end; cp++) {
89 Character.UnicodeScript script =
90 Character.UnicodeScript.of(cp);
91 if (script != expected) {
92 throw new RuntimeException(
93 "UnicodeScript failed: cp=" +
94 Integer.toHexString(cp) +
95 ", of(cp)=<" + script + "> but <" +
96 expected + "> is expected");
97 }
98 }
99 }
100 }
101 // check all codepoints
102 for (int cp = 0; cp < Character.MAX_CODE_POINT; cp++) {
103 Character.UnicodeScript script = Character.UnicodeScript.of(cp);
104 if (script == Character.UnicodeScript.UNKNOWN) {
105 if (Character.getType(cp) != Character.UNASSIGNED &&
106 Character.getType(cp) != Character.SURROGATE &&
107 Character.getType(cp) != Character.PRIVATE_USE)
108 throw new RuntimeException(
109 "UnicodeScript failed: cp=" +
110 Integer.toHexString(cp) +
111 ", of(cp)=<" + script + "> but UNKNOWN is expected");
112 } else {
113 Integer[] ranges =
114 scripts.get(script.name().toLowerCase(Locale.ENGLISH))
115 .toArray(ZEROSIZEARRAY);
116 int off = 0;
117 boolean found = false;
118 while (off < ranges.length) {
119 int start = ranges[off++];
120 int end = ranges[off++];
121 if (cp >= start && cp <= end)
122 found = true;
123 }
124 if (!found) {
125 throw new RuntimeException(
126 "UnicodeScript failed: cp=" +
127 Integer.toHexString(cp) +
128 ", of(cp)=<" + script +
129 "> but NOT in ranges of this script");
130
131 }
132 }
133 }
sherman241dd4e2011-04-10 23:33:14 -0700134 // check all aliases
135 m = Pattern.compile("sc\\s*;\\s*(\\p{Alpha}{4})\\s*;\\s*([\\p{Alpha}|_]+)\\s*.*").matcher("");
136 line = null;
137 try (BufferedReader sbfr = new BufferedReader(new FileReader(fAliases))) {
138 while ((line = sbfr.readLine()) != null) {
139 if (line.length() <= 1 || line.charAt(0) == '#') {
140 continue;
141 }
142 m.reset(line);
143 if (m.matches()) {
144 String alias = m.group(1);
145 String name = m.group(2);
146 // HRKT -> Katakana_Or_Hiragana not supported
147 if ("HRKT".equals(alias.toUpperCase(Locale.ENGLISH)))
148 continue;
149 if (Character.UnicodeScript.forName(alias) !=
150 Character.UnicodeScript.forName(name)) {
151 throw new RuntimeException(
152 "UnicodeScript failed: alias<" + alias +
153 "> does not map to <" + name + ">");
154 }
155 }
156 }
157 }
shermancc01ef52010-05-18 15:36:47 -0700158 }
159}