blob: 6958753a92d5374f7e8261526a88e54854fed4fb [file] [log] [blame]
Lucas Eckelsdc4699f2012-08-06 15:22:01 -07001/* ================================================================ */
2/*
3File: ConvertUTF7.c
4Author: David B. Goldsmith
5Copyright (C) 1994, 1996 IBM Corporation All rights reserved.
6Revisions: Header update only July, 2001.
7
8This code is copyrighted. Under the copyright laws, this code may not
9be copied, in whole or part, without prior written consent of IBM Corporation.
10
11IBM Corporation grants the right to use this code as long as this ENTIRE
12copyright notice is reproduced in the code. The code is provided
13AS-IS, AND IBM CORPORATION DISCLAIMS ALL WARRANTIES, EITHER EXPRESS OR
14IMPLIED, INCLUDING, BUT NOT LIMITED TO IMPLIED WARRANTIES OF
15MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
16WILL IBM CORPORATION BE LIABLE FOR ANY DAMAGES WHATSOEVER (INCLUDING,
17WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, BUSINESS
18INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY
19LOSS) ARISING OUT OF THE USE OR INABILITY TO USE THIS CODE, EVEN
20IF IBM CORPORATION HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
21BECAUSE SOME STATES DO NOT ALLOW THE EXCLUSION OR LIMITATION OF
22LIABILITY FOR CONSEQUENTIAL OR INCIDENTAL DAMAGES, THE ABOVE
23LIMITATION MAY NOT APPLY TO YOU.
24
25RESTRICTED RIGHTS LEGEND: Use, duplication, or disclosure by the
26government is subject to restrictions as set forth in subparagraph
27(c)(l)(ii) of the Rights in Technical Data and Computer Software
28clause at DFARS 252.227-7013 and FAR 52.227-19.
29
30This code may be protected by one or more U.S. and International
31Patents.
32
33*/
34
35#include "CVTUTF7.H"
36
37static char base64[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
38static short invbase64[128];
39
40static char direct[] =
41 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'(),-./:?";
42static char optional[] = "!\"#$%&*;<=>@[]^_`{|}";
43static char spaces[] = " \011\015\012"; /* space, tab, return, line feed */
44static char mustshiftsafe[128];
45static char mustshiftopt[128];
46
47static int needtables = 1;
48
49#define SHIFT_IN '+'
50#define SHIFT_OUT '-'
51
52static void
53tabinit()
54{
55 int i, limit;
56
57 for (i = 0; i < 128; ++i)
58 {
59 mustshiftopt[i] = mustshiftsafe[i] = 1;
60 invbase64[i] = -1;
61 }
62 limit = strlen(direct);
63 for (i = 0; i < limit; ++i)
64 mustshiftopt[direct[i]] = mustshiftsafe[direct[i]] = 0;
65 limit = strlen(spaces);
66 for (i = 0; i < limit; ++i)
67 mustshiftopt[spaces[i]] = mustshiftsafe[spaces[i]] = 0;
68 limit = strlen(optional);
69 for (i = 0; i < limit; ++i)
70 mustshiftopt[optional[i]] = 0;
71 limit = strlen(base64);
72 for (i = 0; i < limit; ++i)
73 invbase64[base64[i]] = i;
74
75 needtables = 0;
76}
77
78#define DECLARE_BIT_BUFFER register unsigned long BITbuffer = 0, buffertemp = 0; int bufferbits = 0
79#define BITS_IN_BUFFER bufferbits
80#define WRITE_N_BITS(x, n) ((BITbuffer |= ( ((x) & ~(-1L<<(n))) << (32-(n)-bufferbits) ) ), bufferbits += (n) )
81#define READ_N_BITS(n) ((buffertemp = (BITbuffer >> (32-(n)))), (BITbuffer <<= (n)), (bufferbits -= (n)), buffertemp)
82#define TARGETCHECK {if (target >= targetEnd) {result = targetExhausted; break;}}
83
84ConversionResult ConvertUCS2toUTF7(
85 UCS2** sourceStart, UCS2* sourceEnd,
86 char** targetStart, char* targetEnd,
87 int optional, int verbose)
88{
89 ConversionResult result = ok;
90 DECLARE_BIT_BUFFER;
91 int shifted = 0, needshift = 0, done = 0;
92 register UCS2 *source = *sourceStart;
93 register char *target = *targetStart;
94 char *mustshift;
95
96 if (needtables)
97 tabinit();
98
99 if (optional)
100 mustshift = mustshiftopt;
101 else
102 mustshift = mustshiftsafe;
103
104 do
105 {
106 register UCS2 r;
107
108 if (!(done = (source >= sourceEnd)))
109 r = *source++;
110 needshift = (!done && ((r > 0x7f) || mustshift[r]));
111
112 if (needshift && !shifted)
113 {
114 TARGETCHECK;
115 *target++ = SHIFT_IN;
116 /* Special case handling of the SHIFT_IN character */
117 if (r == (UCS2)SHIFT_IN) {
118 TARGETCHECK;
119 *target++ = SHIFT_OUT;
120 }
121 else
122 shifted = 1;
123 }
124
125 if (shifted)
126 {
127 /* Either write the character to the bit buffer, or pad
128 the bit buffer out to a full base64 character.
129 */
130 if (needshift)
131 WRITE_N_BITS(r, 16);
132 else
133 WRITE_N_BITS(0, (6 - (BITS_IN_BUFFER % 6))%6);
134
135 /* Flush out as many full base64 characters as possible
136 from the bit buffer.
137 */
138 while ((target < targetEnd) && BITS_IN_BUFFER >= 6)
139 {
140 *target++ = base64[READ_N_BITS(6)];
141 }
142
143 if (BITS_IN_BUFFER >= 6)
144 TARGETCHECK;
145
146 if (!needshift)
147 {
148 /* Write the explicit shift out character if
149 1) The caller has requested we always do it, or
150 2) The directly encoded character is in the
151 base64 set, or
152 3) The directly encoded character is SHIFT_OUT.
153 */
154 if (verbose || ((!done) && (invbase64[r] >=0 || r == SHIFT_OUT)))
155 {
156 TARGETCHECK;
157 *target++ = SHIFT_OUT;
158 }
159 shifted = 0;
160 }
161 }
162
163 /* The character can be directly encoded as ASCII. */
164 if (!needshift && !done)
165 {
166 TARGETCHECK;
167 *target++ = (char) r;
168 }
169
170 }
171 while (!done);
172
173 *sourceStart = source;
174 *targetStart = target;
175 return result;
176}
177
178ConversionResult ConvertUTF7toUCS2(
179 char** sourceStart, char* sourceEnd,
180 UCS2** targetStart, UCS2* targetEnd)
181{
182 ConversionResult result = ok;
183 DECLARE_BIT_BUFFER;
184 int shifted = 0, first = 0, wroteone = 0, base64EOF, base64value, done;
185 unsigned int c, prevc;
186 unsigned long junk;
187 register char *source = *sourceStart;
188 register UCS2 *target = *targetStart;
189
190 if (needtables)
191 tabinit();
192
193 do
194 {
195 /* read an ASCII character c */
196 if (!(done = (source >= sourceEnd)))
197 c = *source++;
198 if (shifted)
199 {
200 /* We're done with a base64 string if we hit EOF, it's not a valid
201 ASCII character, or it's not in the base64 set.
202 */
203 base64EOF = done || (c > 0x7f) || (base64value = invbase64[c]) < 0;
204 if (base64EOF)
205 {
206 shifted = 0;
207 /* If the character causing us to drop out was SHIFT_IN or
208 SHIFT_OUT, it may be a special escape for SHIFT_IN. The
209 test for SHIFT_IN is not necessary, but allows an alternate
210 form of UTF-7 where SHIFT_IN is escaped by SHIFT_IN. This
211 only works for some values of SHIFT_IN.
212 */
213 if (!done && (c == SHIFT_IN || c == SHIFT_OUT))
214 {
215 /* get another character c */
216 prevc = c;
217 if (!(done = (source >= sourceEnd)))
218 c = *source++;
219 /* If no base64 characters were encountered, and the
220 character terminating the shift sequence was
221 SHIFT_OUT, then it's a special escape for SHIFT_IN.
222 */
223 if (first && prevc == SHIFT_OUT)
224 {
225 /* write SHIFT_IN unicode */
226 TARGETCHECK;
227 *target++ = (UCS2)SHIFT_IN;
228 }
229 else if (!wroteone)
230 {
231 result = sourceCorrupt;
232 /* fprintf(stderr, "UTF7: empty sequence near byte %ld in input\n", source-sourceStart) */;
233 }
234 }
235 else if (!wroteone)
236 {
237 result = sourceCorrupt;
238 /* fprintf(stderr, "UTF7: empty sequence near byte %ld in input\n", source-sourceStart) */;
239 }
240 }
241 else
242 {
243 /* Add another 6 bits of base64 to the bit buffer. */
244 WRITE_N_BITS(base64value, 6);
245 first = 0;
246 }
247
248 /* Extract as many full 16 bit characters as possible from the
249 bit buffer.
250 */
251 while (BITS_IN_BUFFER >= 16 && (target < targetEnd))
252 {
253 /* write a unicode */
254 *target++ = READ_N_BITS(16);
255 wroteone = 1;
256 }
257
258 if (BITS_IN_BUFFER >= 16)
259 TARGETCHECK;
260
261 if (base64EOF)
262 {
263 junk = READ_N_BITS(BITS_IN_BUFFER);
264 if (junk)
265 {
266 result = sourceCorrupt;
267 /* fprintf(stderr, "UTF7: non-zero pad bits near byte %ld in input\n", source-sourceStart) */;
268 }
269 }
270 }
271
272 if (!shifted && !done)
273 {
274 if (c == SHIFT_IN)
275 {
276 shifted = 1;
277 first = 1;
278 wroteone = 0;
279 }
280 else
281 {
282 /* It must be a directly encoded character. */
283 if (c > 0x7f)
284 {
285 result = sourceCorrupt;
286 /* fprintf(stderr, "UTF7: non-ASCII character near byte %ld in input\n", source-sourceStart) */;
287 }
288 /* write a unicode */
289 TARGETCHECK;
290 *target++ = c;
291 }
292 }
293 }
294 while (!done);
295
296 *sourceStart = source;
297 *targetStart = target;
298 return result;
299}