blob: 6079d3f59841a600352624fcdc0b034b5d2da6d3 [file] [log] [blame]
J. Duke319a3b92007-12-01 00:00:00 +00001/*
2 * Copyright 1996-2000 Sun Microsystems, Inc. All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Sun designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Sun in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
22 * CA 95054 USA or visit www.sun.com if you need additional information or
23 * have any questions.
24 */
25
26/*
27 * (C) Copyright Taligent, Inc. 1996, 1997 - All Rights Reserved
28 * (C) Copyright IBM Corp. 1996, 1997 - All Rights Reserved
29 *
30 * The original version of this source code and documentation is copyrighted
31 * and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These
32 * materials are provided under terms of a License Agreement between Taligent
33 * and Sun. This technology is protected by multiple US and International
34 * patents. This notice and attribution to Taligent may not be removed.
35 * Taligent is a registered trademark of Taligent, Inc.
36 *
37 */
38
39package java.text;
40
41import java.lang.Character;
42
43/**
44 * Utility class for normalizing and merging patterns for collation.
45 * This is to be used with MergeCollation for adding patterns to an
46 * existing rule table.
47 * @see MergeCollation
48 * @author Mark Davis, Helena Shih
49 */
50
51class PatternEntry {
52 /**
53 * Gets the current extension, quoted
54 */
55 public void appendQuotedExtension(StringBuffer toAddTo) {
56 appendQuoted(extension,toAddTo);
57 }
58
59 /**
60 * Gets the current chars, quoted
61 */
62 public void appendQuotedChars(StringBuffer toAddTo) {
63 appendQuoted(chars,toAddTo);
64 }
65
66 /**
67 * WARNING this is used for searching in a Vector.
68 * Because Vector.indexOf doesn't take a comparator,
69 * this method is ill-defined and ignores strength.
70 */
71 public boolean equals(Object obj) {
72 if (obj == null) return false;
73 PatternEntry other = (PatternEntry) obj;
74 boolean result = chars.equals(other.chars);
75 return result;
76 }
77
78 public int hashCode() {
79 return chars.hashCode();
80 }
81
82 /**
83 * For debugging.
84 */
85 public String toString() {
86 StringBuffer result = new StringBuffer();
87 addToBuffer(result, true, false, null);
88 return result.toString();
89 }
90
91 /**
92 * Gets the strength of the entry.
93 */
94 final int getStrength() {
95 return strength;
96 }
97
98 /**
99 * Gets the expanding characters of the entry.
100 */
101 final String getExtension() {
102 return extension;
103 }
104
105 /**
106 * Gets the core characters of the entry.
107 */
108 final String getChars() {
109 return chars;
110 }
111
112 // ===== privates =====
113
114 void addToBuffer(StringBuffer toAddTo,
115 boolean showExtension,
116 boolean showWhiteSpace,
117 PatternEntry lastEntry)
118 {
119 if (showWhiteSpace && toAddTo.length() > 0)
120 if (strength == Collator.PRIMARY || lastEntry != null)
121 toAddTo.append('\n');
122 else
123 toAddTo.append(' ');
124 if (lastEntry != null) {
125 toAddTo.append('&');
126 if (showWhiteSpace)
127 toAddTo.append(' ');
128 lastEntry.appendQuotedChars(toAddTo);
129 appendQuotedExtension(toAddTo);
130 if (showWhiteSpace)
131 toAddTo.append(' ');
132 }
133 switch (strength) {
134 case Collator.IDENTICAL: toAddTo.append('='); break;
135 case Collator.TERTIARY: toAddTo.append(','); break;
136 case Collator.SECONDARY: toAddTo.append(';'); break;
137 case Collator.PRIMARY: toAddTo.append('<'); break;
138 case RESET: toAddTo.append('&'); break;
139 case UNSET: toAddTo.append('?'); break;
140 }
141 if (showWhiteSpace)
142 toAddTo.append(' ');
143 appendQuoted(chars,toAddTo);
144 if (showExtension && extension.length() != 0) {
145 toAddTo.append('/');
146 appendQuoted(extension,toAddTo);
147 }
148 }
149
150 static void appendQuoted(String chars, StringBuffer toAddTo) {
151 boolean inQuote = false;
152 char ch = chars.charAt(0);
153 if (Character.isSpaceChar(ch)) {
154 inQuote = true;
155 toAddTo.append('\'');
156 } else {
157 if (PatternEntry.isSpecialChar(ch)) {
158 inQuote = true;
159 toAddTo.append('\'');
160 } else {
161 switch (ch) {
162 case 0x0010: case '\f': case '\r':
163 case '\t': case '\n': case '@':
164 inQuote = true;
165 toAddTo.append('\'');
166 break;
167 case '\'':
168 inQuote = true;
169 toAddTo.append('\'');
170 break;
171 default:
172 if (inQuote) {
173 inQuote = false; toAddTo.append('\'');
174 }
175 break;
176 }
177 }
178 }
179 toAddTo.append(chars);
180 if (inQuote)
181 toAddTo.append('\'');
182 }
183
184 //========================================================================
185 // Parsing a pattern into a list of PatternEntries....
186 //========================================================================
187
188 PatternEntry(int strength,
189 StringBuffer chars,
190 StringBuffer extension)
191 {
192 this.strength = strength;
193 this.chars = chars.toString();
194 this.extension = (extension.length() > 0) ? extension.toString()
195 : "";
196 }
197
198 static class Parser {
199 private String pattern;
200 private int i;
201
202 public Parser(String pattern) {
203 this.pattern = pattern;
204 this.i = 0;
205 }
206
207 public PatternEntry next() throws ParseException {
208 int newStrength = UNSET;
209
210 newChars.setLength(0);
211 newExtension.setLength(0);
212
213 boolean inChars = true;
214 boolean inQuote = false;
215 mainLoop:
216 while (i < pattern.length()) {
217 char ch = pattern.charAt(i);
218 if (inQuote) {
219 if (ch == '\'') {
220 inQuote = false;
221 } else {
222 if (newChars.length() == 0) newChars.append(ch);
223 else if (inChars) newChars.append(ch);
224 else newExtension.append(ch);
225 }
226 } else switch (ch) {
227 case '=': if (newStrength != UNSET) break mainLoop;
228 newStrength = Collator.IDENTICAL; break;
229 case ',': if (newStrength != UNSET) break mainLoop;
230 newStrength = Collator.TERTIARY; break;
231 case ';': if (newStrength != UNSET) break mainLoop;
232 newStrength = Collator.SECONDARY; break;
233 case '<': if (newStrength != UNSET) break mainLoop;
234 newStrength = Collator.PRIMARY; break;
235 case '&': if (newStrength != UNSET) break mainLoop;
236 newStrength = RESET; break;
237 case '\t':
238 case '\n':
239 case '\f':
240 case '\r':
241 case ' ': break; // skip whitespace TODO use Character
242 case '/': inChars = false; break;
243 case '\'':
244 inQuote = true;
245 ch = pattern.charAt(++i);
246 if (newChars.length() == 0) newChars.append(ch);
247 else if (inChars) newChars.append(ch);
248 else newExtension.append(ch);
249 break;
250 default:
251 if (newStrength == UNSET) {
252 throw new ParseException
253 ("missing char (=,;<&) : " +
254 pattern.substring(i,
255 (i+10 < pattern.length()) ?
256 i+10 : pattern.length()),
257 i);
258 }
259 if (PatternEntry.isSpecialChar(ch) && (inQuote == false))
260 throw new ParseException
261 ("Unquoted punctuation character : " + Integer.toString(ch, 16), i);
262 if (inChars) {
263 newChars.append(ch);
264 } else {
265 newExtension.append(ch);
266 }
267 break;
268 }
269 i++;
270 }
271 if (newStrength == UNSET)
272 return null;
273 if (newChars.length() == 0) {
274 throw new ParseException
275 ("missing chars (=,;<&): " +
276 pattern.substring(i,
277 (i+10 < pattern.length()) ?
278 i+10 : pattern.length()),
279 i);
280 }
281
282 return new PatternEntry(newStrength, newChars, newExtension);
283 }
284
285 // We re-use these objects in order to improve performance
286 private StringBuffer newChars = new StringBuffer();
287 private StringBuffer newExtension = new StringBuffer();
288
289 }
290
291 static boolean isSpecialChar(char ch) {
292 return ((ch == '\u0020') ||
293 ((ch <= '\u002F') && (ch >= '\u0022')) ||
294 ((ch <= '\u003F') && (ch >= '\u003A')) ||
295 ((ch <= '\u0060') && (ch >= '\u005B')) ||
296 ((ch <= '\u007E') && (ch >= '\u007B')));
297 }
298
299
300 static final int RESET = -2;
301 static final int UNSET = -1;
302
303 int strength = UNSET;
304 String chars = "";
305 String extension = "";
306}