sherman | 241dd4e | 2011-04-10 23:33:14 -0700 | [diff] [blame] | 1 | |
| 2 | /* |
| 3 | * Copyright (c) 2010, 2011, Oracle and/or its affiliates. All rights reserved. |
| 4 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
| 5 | * |
| 6 | * This code is free software; you can redistribute it and/or modify it |
| 7 | * under the terms of the GNU General Public License version 2 only, as |
| 8 | * published by the Free Software Foundation. |
| 9 | * |
| 10 | * This code is distributed in the hope that it will be useful, but WITHOUT |
| 11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| 12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| 13 | * version 2 for more details (a copy is included in the LICENSE file that |
| 14 | * accompanied this code). |
| 15 | * |
| 16 | * You should have received a copy of the GNU General Public License version |
| 17 | * 2 along with this work; if not, write to the Free Software Foundation, |
| 18 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
| 19 | * |
| 20 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
| 21 | * or visit www.oracle.com if you need additional information or have any |
| 22 | * questions. |
| 23 | */ |
| 24 | |
sherman | cc01ef5 | 2010-05-18 15:36:47 -0700 | [diff] [blame] | 25 | /** |
| 26 | * @test |
sherman | 241dd4e | 2011-04-10 23:33:14 -0700 | [diff] [blame] | 27 | * @bug 6945564 6959267 7033561 |
sherman | cc01ef5 | 2010-05-18 15:36:47 -0700 | [diff] [blame] | 28 | * @summary Check that the j.l.Character.UnicodeScript |
sherman | cc01ef5 | 2010-05-18 15:36:47 -0700 | [diff] [blame] | 29 | */ |
| 30 | |
| 31 | import java.io.*; |
sherman | cc01ef5 | 2010-05-18 15:36:47 -0700 | [diff] [blame] | 32 | import java.util.*; |
| 33 | import java.util.regex.*; |
| 34 | import java.lang.Character.UnicodeScript; |
| 35 | |
| 36 | public class CheckScript { |
| 37 | |
sherman | 241dd4e | 2011-04-10 23:33:14 -0700 | [diff] [blame] | 38 | public static void main(String[] args) throws Exception { |
| 39 | File fScripts; |
| 40 | File fAliases; |
peytoia | d3d93e8 | 2010-11-17 01:02:20 +0900 | [diff] [blame] | 41 | if (args.length == 0) { |
sherman | 241dd4e | 2011-04-10 23:33:14 -0700 | [diff] [blame] | 42 | fScripts = new File(System.getProperty("test.src", "."), "Scripts.txt"); |
| 43 | fAliases = new File(System.getProperty("test.src", "."), "PropertyValueAliases.txt"); |
| 44 | } else if (args.length == 2) { |
| 45 | fScripts = new File(args[0]); |
| 46 | fAliases = new File(args[1]); |
peytoia | d3d93e8 | 2010-11-17 01:02:20 +0900 | [diff] [blame] | 47 | } else { |
sherman | 241dd4e | 2011-04-10 23:33:14 -0700 | [diff] [blame] | 48 | System.out.println("java CharacterScript Scripts.txt PropertyValueAliases.txt"); |
peytoia | d3d93e8 | 2010-11-17 01:02:20 +0900 | [diff] [blame] | 49 | throw new RuntimeException("Datafile name should be specified."); |
sherman | cc01ef5 | 2010-05-18 15:36:47 -0700 | [diff] [blame] | 50 | } |
smarks | 1dba359 | 2011-02-22 15:34:17 -0800 | [diff] [blame] | 51 | |
sherman | cc01ef5 | 2010-05-18 15:36:47 -0700 | [diff] [blame] | 52 | Matcher m = Pattern.compile("(\\p{XDigit}+)(?:\\.{2}(\\p{XDigit}+))?\\s+;\\s+(\\w+)\\s+#.*").matcher(""); |
| 53 | String line = null; |
| 54 | HashMap<String,ArrayList<Integer>> scripts = new HashMap<>(); |
sherman | 241dd4e | 2011-04-10 23:33:14 -0700 | [diff] [blame] | 55 | try (BufferedReader sbfr = new BufferedReader(new FileReader(fScripts))) { |
smarks | 1dba359 | 2011-02-22 15:34:17 -0800 | [diff] [blame] | 56 | while ((line = sbfr.readLine()) != null) { |
| 57 | if (line.length() <= 1 || line.charAt(0) == '#') { |
| 58 | continue; |
sherman | cc01ef5 | 2010-05-18 15:36:47 -0700 | [diff] [blame] | 59 | } |
smarks | 1dba359 | 2011-02-22 15:34:17 -0800 | [diff] [blame] | 60 | m.reset(line); |
| 61 | if (m.matches()) { |
| 62 | int start = Integer.parseInt(m.group(1), 16); |
| 63 | int end = (m.group(2)==null)?start |
| 64 | :Integer.parseInt(m.group(2), 16); |
| 65 | String name = m.group(3).toLowerCase(Locale.ENGLISH); |
| 66 | ArrayList<Integer> ranges = scripts.get(name); |
| 67 | if (ranges == null) { |
| 68 | ranges = new ArrayList<Integer>(); |
| 69 | scripts.put(name, ranges); |
| 70 | } |
| 71 | ranges.add(start); |
| 72 | ranges.add(end); |
| 73 | } |
sherman | cc01ef5 | 2010-05-18 15:36:47 -0700 | [diff] [blame] | 74 | } |
| 75 | } |
sherman | cc01ef5 | 2010-05-18 15:36:47 -0700 | [diff] [blame] | 76 | // check all defined ranges |
| 77 | Integer[] ZEROSIZEARRAY = new Integer[0]; |
| 78 | for (String name : scripts.keySet()) { |
| 79 | System.out.println("Checking " + name + "..."); |
| 80 | Integer[] ranges = scripts.get(name).toArray(ZEROSIZEARRAY); |
| 81 | Character.UnicodeScript expected = |
| 82 | Character.UnicodeScript.forName(name); |
| 83 | |
| 84 | int off = 0; |
| 85 | while (off < ranges.length) { |
| 86 | int start = ranges[off++]; |
| 87 | int end = ranges[off++]; |
| 88 | for (int cp = start; cp <= end; cp++) { |
| 89 | Character.UnicodeScript script = |
| 90 | Character.UnicodeScript.of(cp); |
| 91 | if (script != expected) { |
| 92 | throw new RuntimeException( |
| 93 | "UnicodeScript failed: cp=" + |
| 94 | Integer.toHexString(cp) + |
| 95 | ", of(cp)=<" + script + "> but <" + |
| 96 | expected + "> is expected"); |
| 97 | } |
| 98 | } |
| 99 | } |
| 100 | } |
| 101 | // check all codepoints |
| 102 | for (int cp = 0; cp < Character.MAX_CODE_POINT; cp++) { |
| 103 | Character.UnicodeScript script = Character.UnicodeScript.of(cp); |
| 104 | if (script == Character.UnicodeScript.UNKNOWN) { |
| 105 | if (Character.getType(cp) != Character.UNASSIGNED && |
| 106 | Character.getType(cp) != Character.SURROGATE && |
| 107 | Character.getType(cp) != Character.PRIVATE_USE) |
| 108 | throw new RuntimeException( |
| 109 | "UnicodeScript failed: cp=" + |
| 110 | Integer.toHexString(cp) + |
| 111 | ", of(cp)=<" + script + "> but UNKNOWN is expected"); |
| 112 | } else { |
| 113 | Integer[] ranges = |
| 114 | scripts.get(script.name().toLowerCase(Locale.ENGLISH)) |
| 115 | .toArray(ZEROSIZEARRAY); |
| 116 | int off = 0; |
| 117 | boolean found = false; |
| 118 | while (off < ranges.length) { |
| 119 | int start = ranges[off++]; |
| 120 | int end = ranges[off++]; |
| 121 | if (cp >= start && cp <= end) |
| 122 | found = true; |
| 123 | } |
| 124 | if (!found) { |
| 125 | throw new RuntimeException( |
| 126 | "UnicodeScript failed: cp=" + |
| 127 | Integer.toHexString(cp) + |
| 128 | ", of(cp)=<" + script + |
| 129 | "> but NOT in ranges of this script"); |
| 130 | |
| 131 | } |
| 132 | } |
| 133 | } |
sherman | 241dd4e | 2011-04-10 23:33:14 -0700 | [diff] [blame] | 134 | // check all aliases |
| 135 | m = Pattern.compile("sc\\s*;\\s*(\\p{Alpha}{4})\\s*;\\s*([\\p{Alpha}|_]+)\\s*.*").matcher(""); |
| 136 | line = null; |
| 137 | try (BufferedReader sbfr = new BufferedReader(new FileReader(fAliases))) { |
| 138 | while ((line = sbfr.readLine()) != null) { |
| 139 | if (line.length() <= 1 || line.charAt(0) == '#') { |
| 140 | continue; |
| 141 | } |
| 142 | m.reset(line); |
| 143 | if (m.matches()) { |
| 144 | String alias = m.group(1); |
| 145 | String name = m.group(2); |
| 146 | // HRKT -> Katakana_Or_Hiragana not supported |
| 147 | if ("HRKT".equals(alias.toUpperCase(Locale.ENGLISH))) |
| 148 | continue; |
| 149 | if (Character.UnicodeScript.forName(alias) != |
| 150 | Character.UnicodeScript.forName(name)) { |
| 151 | throw new RuntimeException( |
| 152 | "UnicodeScript failed: alias<" + alias + |
| 153 | "> does not map to <" + name + ">"); |
| 154 | } |
| 155 | } |
| 156 | } |
| 157 | } |
sherman | cc01ef5 | 2010-05-18 15:36:47 -0700 | [diff] [blame] | 158 | } |
| 159 | } |