blob: c236d21d01bcfc9542eb4ce9f69004ff4c90b5c3 [file] [log] [blame]
J. Duke319a3b92007-12-01 00:00:00 +00001/*
2 * Copyright 1997-2003 Sun Microsystems, Inc. All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Sun designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Sun in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
22 * CA 95054 USA or visit www.sun.com if you need additional information or
23 * have any questions.
24 */
25
26package sun.io;
27
28import sun.nio.cs.ext.IBM949C;
29
30/**
31* @author Malcolm Ayres
32*/
33
34/*
35Cp949C is a hand-modified version of Cp949
36maps Unicode U-005C <-> 0x5C (local code page)
37 */
38
39public class CharToByteCp949C extends CharToByteConverter
40{
41 private static final char SBase = '\uAC00';
42 private static final char LBase = '\u1100';
43 private static final char VBase = '\u1161';
44 private static final char TBase = '\u11A7';
45 private static final int VCount = 21;
46 private static final int TCount = 28;
47 private static final byte G0 = 0;
48 private static final byte G1 = 1;
49 private static final byte G2 = 2;
50 private static final byte G3 = 3;
51 private byte charState = G0;
52 private char l, v, t;
53
54 private byte[] outputByte;
55
56 private char highHalfZoneCode;
57 private int mask1;
58 private int mask2;
59 private int shift;
60 private short[] index1;
61 private String index2;
62 private String index2a;
63
64 private final static IBM949C nioCoder = new IBM949C();
65
66 public CharToByteCp949C() {
67 super();
68 index1 = nioCoder.getEncoderIndex1();
69 index2 = nioCoder.getEncoderIndex2();
70 index2a = nioCoder.getEncoderIndex2a();
71 highHalfZoneCode = 0;
72 outputByte = new byte[2];
73 mask1 = 0xFFF8;
74 mask2 = 0x0007;
75 shift = 3;
76 }
77
78 /**
79 * flush out any residual data and reset the buffer state
80 */
81 public int flush(byte[] output, int outStart, int outEnd)
82 throws MalformedInputException,
83 ConversionBufferFullException
84 {
85 int bytesOut;
86
87 byteOff = outStart;
88
89 if (highHalfZoneCode != 0) {
90 reset();
91 badInputLength = 0;
92 throw new MalformedInputException();
93 }
94
95 if (charState != G0) {
96 try {
97 unicodeToBuffer(composeHangul() ,output, outEnd);
98 }
99 catch(UnknownCharacterException e) {
100 reset();
101 badInputLength = 0;
102 throw new MalformedInputException();
103 }
104 charState = G0;
105 }
106
107 bytesOut = byteOff - outStart;
108
109 reset();
110 return bytesOut;
111 }
112
113 /**
114 * Resets converter to its initial state.
115 */
116 public void reset() {
117 highHalfZoneCode = 0;
118 charState = G0;
119 charOff = byteOff = 0;
120 }
121
122 /**
123 * Returns true if the given character can be converted to the
124 * target character encoding.
125 */
126 public boolean canConvert(char ch) {
127 int index;
128 int theBytes;
129
130 index = index1[((ch & mask1) >> shift)] + (ch & mask2);
131 if (index < 15000)
132 theBytes = (int)(index2.charAt(index));
133 else
134 theBytes = (int)(index2a.charAt(index-15000));
135
136 if (theBytes != 0)
137 return (true);
138
139 // only return true if input char was unicode null - all others are
140 // undefined
141 return( ch == '\u0000');
142 }
143
144 /**
145 * Character conversion
146 */
147
148 public int convert(char[] input, int inOff, int inEnd,
149 byte[] output, int outOff, int outEnd)
150 throws UnknownCharacterException, MalformedInputException,
151 ConversionBufferFullException
152 {
153 char inputChar;
154 int inputSize;
155
156 charOff = inOff;
157 byteOff = outOff;
158
159 while (charOff < inEnd) {
160
161 if (highHalfZoneCode == 0) {
162 inputChar = input[charOff];
163 inputSize = 1;
164 } else {
165 inputChar = highHalfZoneCode;
166 inputSize = 0;
167 highHalfZoneCode = 0;
168 }
169
170 switch (charState) {
171 case G0:
172
173 l = LBase;
174 v = VBase;
175 t = TBase;
176
177 if ( isLeadingC(inputChar) ) { // Leading Consonant
178 l = inputChar;
179 charState = G1;
180 break;
181 }
182
183 if ( isVowel(inputChar) ) { // Vowel
184 v = inputChar;
185 charState = G2;
186 break;
187 }
188
189 if ( isTrailingC(inputChar) ) { // Trailing Consonant
190 t = inputChar;
191 charState = G3;
192 break;
193 }
194
195 break;
196
197 case G1:
198 if ( isLeadingC(inputChar) ) { // Leading Consonant
199 l = composeLL(l, inputChar);
200 break;
201 }
202
203 if ( isVowel(inputChar) ) { // Vowel
204 v = inputChar;
205 charState = G2;
206 break;
207 }
208
209 if ( isTrailingC(inputChar) ) { // Trailing Consonant
210 t = inputChar;
211 charState = G3;
212 break;
213 }
214
215 unicodeToBuffer(composeHangul(), output, outEnd);
216
217 charState = G0;
218 break;
219
220 case G2:
221 if ( isLeadingC(inputChar) ) { // Leading Consonant
222
223 unicodeToBuffer(composeHangul(), output, outEnd);
224
225 l = inputChar;
226 v = VBase;
227 t = TBase;
228 charState = G1;
229 break;
230 }
231
232 if ( isVowel(inputChar) ) { // Vowel
233 v = composeVV(l, inputChar);
234 charState = G2;
235 break;
236 }
237
238 if ( isTrailingC(inputChar) ) { // Trailing Consonant
239 t = inputChar;
240 charState = G3;
241 break;
242 }
243
244 unicodeToBuffer(composeHangul(), output, outEnd);
245
246 charState = G0;
247
248 break;
249
250 case G3:
251 if ( isTrailingC(inputChar) ) { // Trailing Consonant
252 t = composeTT(t, inputChar);
253 charState = G3;
254 break;
255 }
256
257 unicodeToBuffer(composeHangul(), output, outEnd);
258
259 charState = G0;
260
261 break;
262 }
263
264 if (charState != G0)
265 charOff++;
266 else {
267
268 // Is this a high surrogate?
269 if(inputChar >= '\ud800' && inputChar <= '\udbff') {
270 // Is this the last character of the input?
271 if (charOff + inputSize >= inEnd) {
272 highHalfZoneCode = inputChar;
273 charOff += inputSize;
274 break;
275 }
276
277 // Is there a low surrogate following?
278 inputChar = input[charOff + inputSize];
279 if (inputChar >= '\udc00' && inputChar <= '\udfff') {
280 // We have a valid surrogate pair. Too bad we don't do
281 // surrogates. Is substitution enabled?
282 if (subMode) {
283 if (subBytes.length == 1) {
284 outputByte[0] = 0x00;
285 outputByte[1] = subBytes[0];
286 } else {
287 outputByte[0] = subBytes[0];
288 outputByte[1] = subBytes[1];
289 }
290
291 bytesToBuffer(outputByte, output, outEnd);
292 inputSize++;
293 } else {
294 badInputLength = 2;
295 throw new UnknownCharacterException();
296 }
297 } else {
298 // We have a malformed surrogate pair
299 badInputLength = 1;
300 throw new MalformedInputException();
301 }
302 }
303
304 // Is this an unaccompanied low surrogate?
305 else
306 if (inputChar >= '\uDC00' && inputChar <= '\uDFFF') {
307 badInputLength = 1;
308 throw new MalformedInputException();
309 } else {
310 unicodeToBuffer(inputChar, output, outEnd);
311 }
312
313 charOff += inputSize;
314
315 }
316
317 }
318
319 return byteOff - outOff;
320
321 }
322
323 private char composeHangul() {
324 int lIndex, vIndex, tIndex;
325
326 lIndex = l - LBase;
327 vIndex = v - VBase;
328 tIndex = t - TBase;
329
330 return (char)((lIndex * VCount + vIndex) * TCount + tIndex + SBase);
331 }
332
333 private char composeLL(char l1, char l2) {
334 return l2;
335 }
336
337 private char composeVV(char v1, char v2) {
338 return v2;
339 }
340
341 private char composeTT(char t1, char t2) {
342 return t2;
343 }
344
345 private boolean isLeadingC(char c) {
346 return (c >= LBase && c <= '\u1159');
347 }
348
349 private boolean isVowel(char c) {
350 return (c >= VBase && c <= '\u11a2');
351 }
352
353 private boolean isTrailingC(char c) {
354 return (c >= TBase && c <= '\u11f9');
355 }
356
357 /**
358 * returns the maximum number of bytes needed to convert a char
359 */
360 public int getMaxBytesPerChar() {
361 return 2;
362 }
363
364
365 /**
366 * Return the character set ID
367 */
368 public String getCharacterEncoding() {
369 return "Cp949C";
370 }
371
372 /**
373 * private function to add the bytes to the output buffer
374 */
375 private void bytesToBuffer(byte[] theBytes, byte[] output, int outEnd)
376 throws ConversionBufferFullException,
377 UnknownCharacterException {
378
379 int spaceNeeded;
380
381 // ensure sufficient space for the bytes(s)
382
383 if (theBytes[0] == 0x00)
384 spaceNeeded = 1;
385 else
386 spaceNeeded = 2;
387
388 if (byteOff + spaceNeeded > outEnd)
389 throw new ConversionBufferFullException();
390
391 // move the data into the buffer
392
393 if (spaceNeeded == 1)
394 output[byteOff++] = theBytes[1];
395 else {
396 output[byteOff++] = theBytes[0];
397 output[byteOff++] = theBytes[1];
398 }
399
400 }
401
402 /**
403 * private function to add a unicode character to the output buffer
404 */
405 private void unicodeToBuffer(char unicode, byte[] output, int outEnd)
406 throws ConversionBufferFullException,
407 UnknownCharacterException {
408
409 int index;
410 int theBytes;
411
412 // first we convert the unicode to its byte representation
413
414 index = index1[((unicode & mask1) >> shift)] + (unicode & mask2);
415 if (index < 15000)
416 theBytes = (int)(index2.charAt(index));
417 else
418 theBytes = (int)(index2a.charAt(index-15000));
419 outputByte[0] = (byte)((theBytes & 0x0000ff00)>>8);
420 outputByte[1] = (byte)(theBytes & 0x000000ff);
421
422 // if the unicode was not mappable - look for the substitution bytes
423
424 if (outputByte[0] == 0x00 && outputByte[1] == 0x00
425 && unicode != '\u0000') {
426 if (subMode) {
427 if (subBytes.length == 1) {
428 outputByte[0] = 0x00;
429 outputByte[1] = subBytes[0];
430 } else {
431 outputByte[0] = subBytes[0];
432 outputByte[1] = subBytes[1];
433 }
434 } else {
435 badInputLength = 1;
436 throw new UnknownCharacterException();
437 }
438 }
439
440 // now put the bytes in the buffer
441
442 bytesToBuffer(outputByte, output, outEnd);
443
444 }
445}