blob: 47da36bb4c900f78fa26c1dc1b572bb6895af36d [file] [log] [blame]
J. Duke319a3b92007-12-01 00:00:00 +00001/*
2 * Copyright 1996-1997 Sun Microsystems, Inc. All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Sun designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Sun in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
22 * CA 95054 USA or visit www.sun.com if you need additional information or
23 * have any questions.
24 */
25package sun.io;
26
27
28/**
29 * UCS Transformation Format 8 (UTF-8) -> UCS2 (UTF16) converter
30 *
31 * see CharToByteUTF8.java about UTF-8 format
32 */
33
34public class ByteToCharUTF8 extends ByteToCharConverter {
35
36 private int savedSize;
37 private byte[] savedBytes;
38
39 public ByteToCharUTF8() {
40 super();
41 savedSize = 0;
42 savedBytes = new byte[5];
43 }
44
45 public int flush(char[] output, int outStart, int outEnd)
46 throws MalformedInputException
47 {
48 if (savedSize != 0) {
49 savedSize = 0;
50 badInputLength = 0;
51 throw new MalformedInputException();
52 }
53 byteOff = charOff = 0;
54 return 0;
55 }
56
57 /**
58 * Character converson
59 */
60 public int convert(byte[] input, int inOff, int inEnd,
61 char[] output, int outOff, int outEnd)
62 throws MalformedInputException, ConversionBufferFullException
63 {
64 int byte1, byte2, byte3, byte4;
65 char[] outputChar = new char[2];
66 int outputSize;
67 int byteOffAdjustment = 0;
68
69 if (savedSize != 0) {
70 byte[] newBuf;
71 newBuf = new byte[inEnd - inOff + savedSize];
72 for (int i = 0; i < savedSize; i++) {
73 newBuf[i] = savedBytes[i];
74 }
75 System.arraycopy(input, inOff, newBuf, savedSize, inEnd - inOff);
76 input = newBuf;
77 inOff = 0;
78 inEnd = newBuf.length;
79 byteOffAdjustment = -savedSize;
80 savedSize = 0;
81 }
82
83 charOff = outOff;
84 byteOff = inOff;
85 int startByteOff;
86
87 while(byteOff < inEnd) {
88
89 startByteOff = byteOff;
90 byte1 = input[byteOff++] & 0xff;
91
92 if ((byte1 & 0x80) == 0){
93 outputChar[0] = (char)byte1;
94 outputSize = 1;
95 } else if ((byte1 & 0xe0) == 0xc0) {
96 if (byteOff >= inEnd) {
97 savedSize = 1;
98 savedBytes[0] = (byte)byte1;
99 break;
100 }
101 byte2 = input[byteOff++] & 0xff;
102 if ((byte2 & 0xc0) != 0x80) {
103 badInputLength = 2;
104 byteOff += byteOffAdjustment;
105 throw new MalformedInputException();
106 }
107 outputChar[0] = (char)(((byte1 & 0x1f) << 6) | (byte2 & 0x3f));
108 outputSize = 1;
109 } else if ((byte1 & 0xf0) == 0xe0){
110 if (byteOff + 1 >= inEnd) {
111 savedBytes[0] = (byte)byte1;
112 if (byteOff >= inEnd) {
113 savedSize = 1;
114 } else {
115 savedSize = 2;
116 savedBytes[1] = (byte)input[byteOff++];
117 }
118 break;
119 }
120 byte2 = input[byteOff++] & 0xff;
121 byte3 = input[byteOff++] & 0xff;
122 if ((byte2 & 0xc0) != 0x80 || (byte3 & 0xc0) != 0x80) {
123 badInputLength = 3;
124 byteOff += byteOffAdjustment;
125 throw new MalformedInputException();
126 }
127 outputChar[0] = (char)(((byte1 & 0x0f) << 12)
128 | ((byte2 & 0x3f) << 6)
129 | (byte3 & 0x3f));
130 outputSize = 1;
131 } else if ((byte1 & 0xf8) == 0xf0) {
132 if (byteOff + 2 >= inEnd) {
133 savedBytes[0] = (byte)byte1;
134 if (byteOff >= inEnd) {
135 savedSize = 1;
136 } else if (byteOff + 1 >= inEnd) {
137 savedSize = 2;
138 savedBytes[1] = (byte)input[byteOff++];
139 } else {
140 savedSize = 3;
141 savedBytes[1] = (byte)input[byteOff++];
142 savedBytes[2] = (byte)input[byteOff++];
143 }
144 break;
145 }
146 byte2 = input[byteOff++] & 0xff;
147 byte3 = input[byteOff++] & 0xff;
148 byte4 = input[byteOff++] & 0xff;
149 if ((byte2 & 0xc0) != 0x80 ||
150 (byte3 & 0xc0) != 0x80 ||
151 (byte4 & 0xc0) != 0x80) {
152 badInputLength = 4;
153 byteOff += byteOffAdjustment;
154 throw new MalformedInputException();
155 }
156 // this byte sequence is UTF16 character
157 int ucs4 = (int)(0x07 & byte1) << 18 |
158 (int)(0x3f & byte2) << 12 |
159 (int)(0x3f & byte3) << 6 |
160 (int)(0x3f & byte4);
161 outputChar[0] = (char)((ucs4 - 0x10000) / 0x400 + 0xd800);
162 outputChar[1] = (char)((ucs4 - 0x10000) % 0x400 + 0xdc00);
163 outputSize = 2;
164 } else {
165 badInputLength = 1;
166 byteOff += byteOffAdjustment;
167 throw new MalformedInputException();
168 }
169
170 if (charOff + outputSize > outEnd) {
171 byteOff = startByteOff;
172 byteOff += byteOffAdjustment;
173 throw new ConversionBufferFullException();
174 }
175
176 for (int i = 0; i < outputSize; i++) {
177 output[charOff + i] = outputChar[i];
178 }
179 charOff += outputSize;
180 }
181
182 byteOff += byteOffAdjustment;
183 return charOff - outOff;
184 }
185
186 /*
187 * Return the character set id
188 */
189 public String getCharacterEncoding() {
190 return "UTF8";
191 }
192
193 /*
194 * Reset after finding bad input
195 */
196 public void reset() {
197 byteOff = charOff = 0;
198 savedSize = 0;
199 }
200}