sherman | cc01ef5 | 2010-05-18 15:36:47 -0700 | [diff] [blame^] | 1 | /** |
| 2 | * @test |
| 3 | * @bug 6945564 |
| 4 | * @summary Check that the j.l.Character.UnicodeScript |
| 5 | * @ignore don't run until #6903266 is integrated |
| 6 | */ |
| 7 | |
| 8 | import java.io.*; |
| 9 | import java.lang.reflect.*; |
| 10 | import java.util.*; |
| 11 | import java.util.regex.*; |
| 12 | import java.lang.Character.UnicodeScript; |
| 13 | |
| 14 | public class CheckScript { |
| 15 | |
| 16 | public static void main(String[] args) throws Exception { |
| 17 | |
| 18 | if (args.length != 1) { |
| 19 | System.out.println("java CharacterScript script.txt"); |
| 20 | System.exit(1); |
| 21 | } |
| 22 | BufferedReader sbfr = new BufferedReader(new FileReader(args[0])); |
| 23 | Matcher m = Pattern.compile("(\\p{XDigit}+)(?:\\.{2}(\\p{XDigit}+))?\\s+;\\s+(\\w+)\\s+#.*").matcher(""); |
| 24 | String line = null; |
| 25 | HashMap<String,ArrayList<Integer>> scripts = new HashMap<>(); |
| 26 | while ((line = sbfr.readLine()) != null) { |
| 27 | if (line.length() <= 1 || line.charAt(0) == '#') { |
| 28 | continue; |
| 29 | } |
| 30 | m.reset(line); |
| 31 | if (m.matches()) { |
| 32 | int start = Integer.parseInt(m.group(1), 16); |
| 33 | int end = (m.group(2)==null)?start |
| 34 | :Integer.parseInt(m.group(2), 16); |
| 35 | String name = m.group(3).toLowerCase(Locale.ENGLISH); |
| 36 | ArrayList<Integer> ranges = scripts.get(name); |
| 37 | if (ranges == null) { |
| 38 | ranges = new ArrayList<Integer>(); |
| 39 | scripts.put(name, ranges); |
| 40 | } |
| 41 | ranges.add(start); |
| 42 | ranges.add(end); |
| 43 | } |
| 44 | } |
| 45 | sbfr.close(); |
| 46 | // check all defined ranges |
| 47 | Integer[] ZEROSIZEARRAY = new Integer[0]; |
| 48 | for (String name : scripts.keySet()) { |
| 49 | System.out.println("Checking " + name + "..."); |
| 50 | Integer[] ranges = scripts.get(name).toArray(ZEROSIZEARRAY); |
| 51 | Character.UnicodeScript expected = |
| 52 | Character.UnicodeScript.forName(name); |
| 53 | |
| 54 | int off = 0; |
| 55 | while (off < ranges.length) { |
| 56 | int start = ranges[off++]; |
| 57 | int end = ranges[off++]; |
| 58 | for (int cp = start; cp <= end; cp++) { |
| 59 | Character.UnicodeScript script = |
| 60 | Character.UnicodeScript.of(cp); |
| 61 | if (script != expected) { |
| 62 | throw new RuntimeException( |
| 63 | "UnicodeScript failed: cp=" + |
| 64 | Integer.toHexString(cp) + |
| 65 | ", of(cp)=<" + script + "> but <" + |
| 66 | expected + "> is expected"); |
| 67 | } |
| 68 | } |
| 69 | } |
| 70 | } |
| 71 | // check all codepoints |
| 72 | for (int cp = 0; cp < Character.MAX_CODE_POINT; cp++) { |
| 73 | Character.UnicodeScript script = Character.UnicodeScript.of(cp); |
| 74 | if (script == Character.UnicodeScript.UNKNOWN) { |
| 75 | if (Character.getType(cp) != Character.UNASSIGNED && |
| 76 | Character.getType(cp) != Character.SURROGATE && |
| 77 | Character.getType(cp) != Character.PRIVATE_USE) |
| 78 | throw new RuntimeException( |
| 79 | "UnicodeScript failed: cp=" + |
| 80 | Integer.toHexString(cp) + |
| 81 | ", of(cp)=<" + script + "> but UNKNOWN is expected"); |
| 82 | } else { |
| 83 | Integer[] ranges = |
| 84 | scripts.get(script.name().toLowerCase(Locale.ENGLISH)) |
| 85 | .toArray(ZEROSIZEARRAY); |
| 86 | int off = 0; |
| 87 | boolean found = false; |
| 88 | while (off < ranges.length) { |
| 89 | int start = ranges[off++]; |
| 90 | int end = ranges[off++]; |
| 91 | if (cp >= start && cp <= end) |
| 92 | found = true; |
| 93 | } |
| 94 | if (!found) { |
| 95 | throw new RuntimeException( |
| 96 | "UnicodeScript failed: cp=" + |
| 97 | Integer.toHexString(cp) + |
| 98 | ", of(cp)=<" + script + |
| 99 | "> but NOT in ranges of this script"); |
| 100 | |
| 101 | } |
| 102 | } |
| 103 | } |
| 104 | } |
| 105 | } |