Lucas Eckels | dc4699f | 2012-08-06 15:22:01 -0700 | [diff] [blame] | 1 | /* ================================================================ */ |
| 2 | /* |
| 3 | File: ConvertUTF7.c |
| 4 | Author: David B. Goldsmith |
| 5 | Copyright (C) 1994, 1996 IBM Corporation All rights reserved. |
| 6 | Revisions: Header update only July, 2001. |
| 7 | |
| 8 | This code is copyrighted. Under the copyright laws, this code may not |
| 9 | be copied, in whole or part, without prior written consent of IBM Corporation. |
| 10 | |
| 11 | IBM Corporation grants the right to use this code as long as this ENTIRE |
| 12 | copyright notice is reproduced in the code. The code is provided |
| 13 | AS-IS, AND IBM CORPORATION DISCLAIMS ALL WARRANTIES, EITHER EXPRESS OR |
| 14 | IMPLIED, INCLUDING, BUT NOT LIMITED TO IMPLIED WARRANTIES OF |
| 15 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT |
| 16 | WILL IBM CORPORATION BE LIABLE FOR ANY DAMAGES WHATSOEVER (INCLUDING, |
| 17 | WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, BUSINESS |
| 18 | INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY |
| 19 | LOSS) ARISING OUT OF THE USE OR INABILITY TO USE THIS CODE, EVEN |
| 20 | IF IBM CORPORATION HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. |
| 21 | BECAUSE SOME STATES DO NOT ALLOW THE EXCLUSION OR LIMITATION OF |
| 22 | LIABILITY FOR CONSEQUENTIAL OR INCIDENTAL DAMAGES, THE ABOVE |
| 23 | LIMITATION MAY NOT APPLY TO YOU. |
| 24 | |
| 25 | RESTRICTED RIGHTS LEGEND: Use, duplication, or disclosure by the |
| 26 | government is subject to restrictions as set forth in subparagraph |
| 27 | (c)(l)(ii) of the Rights in Technical Data and Computer Software |
| 28 | clause at DFARS 252.227-7013 and FAR 52.227-19. |
| 29 | |
| 30 | This code may be protected by one or more U.S. and International |
| 31 | Patents. |
| 32 | |
| 33 | */ |
| 34 | |
| 35 | #include "CVTUTF7.H" |
| 36 | |
| 37 | static char base64[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; |
| 38 | static short invbase64[128]; |
| 39 | |
| 40 | static char direct[] = |
| 41 | "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'(),-./:?"; |
| 42 | static char optional[] = "!\"#$%&*;<=>@[]^_`{|}"; |
| 43 | static char spaces[] = " \011\015\012"; /* space, tab, return, line feed */ |
| 44 | static char mustshiftsafe[128]; |
| 45 | static char mustshiftopt[128]; |
| 46 | |
| 47 | static int needtables = 1; |
| 48 | |
| 49 | #define SHIFT_IN '+' |
| 50 | #define SHIFT_OUT '-' |
| 51 | |
| 52 | static void |
| 53 | tabinit() |
| 54 | { |
| 55 | int i, limit; |
| 56 | |
| 57 | for (i = 0; i < 128; ++i) |
| 58 | { |
| 59 | mustshiftopt[i] = mustshiftsafe[i] = 1; |
| 60 | invbase64[i] = -1; |
| 61 | } |
| 62 | limit = strlen(direct); |
| 63 | for (i = 0; i < limit; ++i) |
| 64 | mustshiftopt[direct[i]] = mustshiftsafe[direct[i]] = 0; |
| 65 | limit = strlen(spaces); |
| 66 | for (i = 0; i < limit; ++i) |
| 67 | mustshiftopt[spaces[i]] = mustshiftsafe[spaces[i]] = 0; |
| 68 | limit = strlen(optional); |
| 69 | for (i = 0; i < limit; ++i) |
| 70 | mustshiftopt[optional[i]] = 0; |
| 71 | limit = strlen(base64); |
| 72 | for (i = 0; i < limit; ++i) |
| 73 | invbase64[base64[i]] = i; |
| 74 | |
| 75 | needtables = 0; |
| 76 | } |
| 77 | |
| 78 | #define DECLARE_BIT_BUFFER register unsigned long BITbuffer = 0, buffertemp = 0; int bufferbits = 0 |
| 79 | #define BITS_IN_BUFFER bufferbits |
| 80 | #define WRITE_N_BITS(x, n) ((BITbuffer |= ( ((x) & ~(-1L<<(n))) << (32-(n)-bufferbits) ) ), bufferbits += (n) ) |
| 81 | #define READ_N_BITS(n) ((buffertemp = (BITbuffer >> (32-(n)))), (BITbuffer <<= (n)), (bufferbits -= (n)), buffertemp) |
| 82 | #define TARGETCHECK {if (target >= targetEnd) {result = targetExhausted; break;}} |
| 83 | |
| 84 | ConversionResult ConvertUCS2toUTF7( |
| 85 | UCS2** sourceStart, UCS2* sourceEnd, |
| 86 | char** targetStart, char* targetEnd, |
| 87 | int optional, int verbose) |
| 88 | { |
| 89 | ConversionResult result = ok; |
| 90 | DECLARE_BIT_BUFFER; |
| 91 | int shifted = 0, needshift = 0, done = 0; |
| 92 | register UCS2 *source = *sourceStart; |
| 93 | register char *target = *targetStart; |
| 94 | char *mustshift; |
| 95 | |
| 96 | if (needtables) |
| 97 | tabinit(); |
| 98 | |
| 99 | if (optional) |
| 100 | mustshift = mustshiftopt; |
| 101 | else |
| 102 | mustshift = mustshiftsafe; |
| 103 | |
| 104 | do |
| 105 | { |
| 106 | register UCS2 r; |
| 107 | |
| 108 | if (!(done = (source >= sourceEnd))) |
| 109 | r = *source++; |
| 110 | needshift = (!done && ((r > 0x7f) || mustshift[r])); |
| 111 | |
| 112 | if (needshift && !shifted) |
| 113 | { |
| 114 | TARGETCHECK; |
| 115 | *target++ = SHIFT_IN; |
| 116 | /* Special case handling of the SHIFT_IN character */ |
| 117 | if (r == (UCS2)SHIFT_IN) { |
| 118 | TARGETCHECK; |
| 119 | *target++ = SHIFT_OUT; |
| 120 | } |
| 121 | else |
| 122 | shifted = 1; |
| 123 | } |
| 124 | |
| 125 | if (shifted) |
| 126 | { |
| 127 | /* Either write the character to the bit buffer, or pad |
| 128 | the bit buffer out to a full base64 character. |
| 129 | */ |
| 130 | if (needshift) |
| 131 | WRITE_N_BITS(r, 16); |
| 132 | else |
| 133 | WRITE_N_BITS(0, (6 - (BITS_IN_BUFFER % 6))%6); |
| 134 | |
| 135 | /* Flush out as many full base64 characters as possible |
| 136 | from the bit buffer. |
| 137 | */ |
| 138 | while ((target < targetEnd) && BITS_IN_BUFFER >= 6) |
| 139 | { |
| 140 | *target++ = base64[READ_N_BITS(6)]; |
| 141 | } |
| 142 | |
| 143 | if (BITS_IN_BUFFER >= 6) |
| 144 | TARGETCHECK; |
| 145 | |
| 146 | if (!needshift) |
| 147 | { |
| 148 | /* Write the explicit shift out character if |
| 149 | 1) The caller has requested we always do it, or |
| 150 | 2) The directly encoded character is in the |
| 151 | base64 set, or |
| 152 | 3) The directly encoded character is SHIFT_OUT. |
| 153 | */ |
| 154 | if (verbose || ((!done) && (invbase64[r] >=0 || r == SHIFT_OUT))) |
| 155 | { |
| 156 | TARGETCHECK; |
| 157 | *target++ = SHIFT_OUT; |
| 158 | } |
| 159 | shifted = 0; |
| 160 | } |
| 161 | } |
| 162 | |
| 163 | /* The character can be directly encoded as ASCII. */ |
| 164 | if (!needshift && !done) |
| 165 | { |
| 166 | TARGETCHECK; |
| 167 | *target++ = (char) r; |
| 168 | } |
| 169 | |
| 170 | } |
| 171 | while (!done); |
| 172 | |
| 173 | *sourceStart = source; |
| 174 | *targetStart = target; |
| 175 | return result; |
| 176 | } |
| 177 | |
| 178 | ConversionResult ConvertUTF7toUCS2( |
| 179 | char** sourceStart, char* sourceEnd, |
| 180 | UCS2** targetStart, UCS2* targetEnd) |
| 181 | { |
| 182 | ConversionResult result = ok; |
| 183 | DECLARE_BIT_BUFFER; |
| 184 | int shifted = 0, first = 0, wroteone = 0, base64EOF, base64value, done; |
| 185 | unsigned int c, prevc; |
| 186 | unsigned long junk; |
| 187 | register char *source = *sourceStart; |
| 188 | register UCS2 *target = *targetStart; |
| 189 | |
| 190 | if (needtables) |
| 191 | tabinit(); |
| 192 | |
| 193 | do |
| 194 | { |
| 195 | /* read an ASCII character c */ |
| 196 | if (!(done = (source >= sourceEnd))) |
| 197 | c = *source++; |
| 198 | if (shifted) |
| 199 | { |
| 200 | /* We're done with a base64 string if we hit EOF, it's not a valid |
| 201 | ASCII character, or it's not in the base64 set. |
| 202 | */ |
| 203 | base64EOF = done || (c > 0x7f) || (base64value = invbase64[c]) < 0; |
| 204 | if (base64EOF) |
| 205 | { |
| 206 | shifted = 0; |
| 207 | /* If the character causing us to drop out was SHIFT_IN or |
| 208 | SHIFT_OUT, it may be a special escape for SHIFT_IN. The |
| 209 | test for SHIFT_IN is not necessary, but allows an alternate |
| 210 | form of UTF-7 where SHIFT_IN is escaped by SHIFT_IN. This |
| 211 | only works for some values of SHIFT_IN. |
| 212 | */ |
| 213 | if (!done && (c == SHIFT_IN || c == SHIFT_OUT)) |
| 214 | { |
| 215 | /* get another character c */ |
| 216 | prevc = c; |
| 217 | if (!(done = (source >= sourceEnd))) |
| 218 | c = *source++; |
| 219 | /* If no base64 characters were encountered, and the |
| 220 | character terminating the shift sequence was |
| 221 | SHIFT_OUT, then it's a special escape for SHIFT_IN. |
| 222 | */ |
| 223 | if (first && prevc == SHIFT_OUT) |
| 224 | { |
| 225 | /* write SHIFT_IN unicode */ |
| 226 | TARGETCHECK; |
| 227 | *target++ = (UCS2)SHIFT_IN; |
| 228 | } |
| 229 | else if (!wroteone) |
| 230 | { |
| 231 | result = sourceCorrupt; |
| 232 | /* fprintf(stderr, "UTF7: empty sequence near byte %ld in input\n", source-sourceStart) */; |
| 233 | } |
| 234 | } |
| 235 | else if (!wroteone) |
| 236 | { |
| 237 | result = sourceCorrupt; |
| 238 | /* fprintf(stderr, "UTF7: empty sequence near byte %ld in input\n", source-sourceStart) */; |
| 239 | } |
| 240 | } |
| 241 | else |
| 242 | { |
| 243 | /* Add another 6 bits of base64 to the bit buffer. */ |
| 244 | WRITE_N_BITS(base64value, 6); |
| 245 | first = 0; |
| 246 | } |
| 247 | |
| 248 | /* Extract as many full 16 bit characters as possible from the |
| 249 | bit buffer. |
| 250 | */ |
| 251 | while (BITS_IN_BUFFER >= 16 && (target < targetEnd)) |
| 252 | { |
| 253 | /* write a unicode */ |
| 254 | *target++ = READ_N_BITS(16); |
| 255 | wroteone = 1; |
| 256 | } |
| 257 | |
| 258 | if (BITS_IN_BUFFER >= 16) |
| 259 | TARGETCHECK; |
| 260 | |
| 261 | if (base64EOF) |
| 262 | { |
| 263 | junk = READ_N_BITS(BITS_IN_BUFFER); |
| 264 | if (junk) |
| 265 | { |
| 266 | result = sourceCorrupt; |
| 267 | /* fprintf(stderr, "UTF7: non-zero pad bits near byte %ld in input\n", source-sourceStart) */; |
| 268 | } |
| 269 | } |
| 270 | } |
| 271 | |
| 272 | if (!shifted && !done) |
| 273 | { |
| 274 | if (c == SHIFT_IN) |
| 275 | { |
| 276 | shifted = 1; |
| 277 | first = 1; |
| 278 | wroteone = 0; |
| 279 | } |
| 280 | else |
| 281 | { |
| 282 | /* It must be a directly encoded character. */ |
| 283 | if (c > 0x7f) |
| 284 | { |
| 285 | result = sourceCorrupt; |
| 286 | /* fprintf(stderr, "UTF7: non-ASCII character near byte %ld in input\n", source-sourceStart) */; |
| 287 | } |
| 288 | /* write a unicode */ |
| 289 | TARGETCHECK; |
| 290 | *target++ = c; |
| 291 | } |
| 292 | } |
| 293 | } |
| 294 | while (!done); |
| 295 | |
| 296 | *sourceStart = source; |
| 297 | *targetStart = target; |
| 298 | return result; |
| 299 | } |