J. Duke | 319a3b9 | 2007-12-01 00:00:00 +0000 | [diff] [blame^] | 1 | /* |
| 2 | * Portions Copyright 2005-2006 Sun Microsystems, Inc. All Rights Reserved. |
| 3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
| 4 | * |
| 5 | * This code is free software; you can redistribute it and/or modify it |
| 6 | * under the terms of the GNU General Public License version 2 only, as |
| 7 | * published by the Free Software Foundation. Sun designates this |
| 8 | * particular file as subject to the "Classpath" exception as provided |
| 9 | * by Sun in the LICENSE file that accompanied this code. |
| 10 | * |
| 11 | * This code is distributed in the hope that it will be useful, but WITHOUT |
| 12 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| 13 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| 14 | * version 2 for more details (a copy is included in the LICENSE file that |
| 15 | * accompanied this code). |
| 16 | * |
| 17 | * You should have received a copy of the GNU General Public License version |
| 18 | * 2 along with this work; if not, write to the Free Software Foundation, |
| 19 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
| 20 | * |
| 21 | * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, |
| 22 | * CA 95054 USA or visit www.sun.com if you need additional information or |
| 23 | * have any questions. |
| 24 | */ |
| 25 | |
| 26 | /* |
| 27 | ******************************************************************************* |
| 28 | * (C) Copyright IBM Corp. 1996-2005 - All Rights Reserved * |
| 29 | * * |
| 30 | * The original version of this source code and documentation is copyrighted * |
| 31 | * and owned by IBM, These materials are provided under terms of a License * |
| 32 | * Agreement between IBM and Sun. This technology is protected by multiple * |
| 33 | * US and International patents. This notice and attribution to IBM may not * |
| 34 | * to removed. * |
| 35 | ******************************************************************************* |
| 36 | */ |
| 37 | |
| 38 | package sun.text.normalizer; |
| 39 | |
| 40 | /** |
| 41 | * <p>Standalone utility class providing UTF16 character conversions and |
| 42 | * indexing conversions.</p> |
| 43 | * <p>Code that uses strings alone rarely need modification. |
| 44 | * By design, UTF-16 does not allow overlap, so searching for strings is a safe |
| 45 | * operation. Similarly, concatenation is always safe. Substringing is safe if |
| 46 | * the start and end are both on UTF-32 boundaries. In normal code, the values |
| 47 | * for start and end are on those boundaries, since they arose from operations |
| 48 | * like searching. If not, the nearest UTF-32 boundaries can be determined |
| 49 | * using <code>bounds()</code>.</p> |
| 50 | * <strong>Examples:</strong> |
| 51 | * <p>The following examples illustrate use of some of these methods. |
| 52 | * <pre> |
| 53 | * // iteration forwards: Original |
| 54 | * for (int i = 0; i < s.length(); ++i) { |
| 55 | * char ch = s.charAt(i); |
| 56 | * doSomethingWith(ch); |
| 57 | * } |
| 58 | * |
| 59 | * // iteration forwards: Changes for UTF-32 |
| 60 | * int ch; |
| 61 | * for (int i = 0; i < s.length(); i+=UTF16.getCharCount(ch)) { |
| 62 | * ch = UTF16.charAt(s,i); |
| 63 | * doSomethingWith(ch); |
| 64 | * } |
| 65 | * |
| 66 | * // iteration backwards: Original |
| 67 | * for (int i = s.length() -1; i >= 0; --i) { |
| 68 | * char ch = s.charAt(i); |
| 69 | * doSomethingWith(ch); |
| 70 | * } |
| 71 | * |
| 72 | * // iteration backwards: Changes for UTF-32 |
| 73 | * int ch; |
| 74 | * for (int i = s.length() -1; i > 0; i-=UTF16.getCharCount(ch)) { |
| 75 | * ch = UTF16.charAt(s,i); |
| 76 | * doSomethingWith(ch); |
| 77 | * } |
| 78 | * </pre> |
| 79 | * <strong>Notes:</strong> |
| 80 | * <ul> |
| 81 | * <li> |
| 82 | * <strong>Naming:</strong> For clarity, High and Low surrogates are called |
| 83 | * <code>Lead</code> and <code>Trail</code> in the API, which gives a better |
| 84 | * sense of their ordering in a string. <code>offset16</code> and |
| 85 | * <code>offset32</code> are used to distinguish offsets to UTF-16 |
| 86 | * boundaries vs offsets to UTF-32 boundaries. <code>int char32</code> is |
| 87 | * used to contain UTF-32 characters, as opposed to <code>char16</code>, |
| 88 | * which is a UTF-16 code unit. |
| 89 | * </li> |
| 90 | * <li> |
| 91 | * <strong>Roundtripping Offsets:</strong> You can always roundtrip from a |
| 92 | * UTF-32 offset to a UTF-16 offset and back. Because of the difference in |
| 93 | * structure, you can roundtrip from a UTF-16 offset to a UTF-32 offset and |
| 94 | * back if and only if <code>bounds(string, offset16) != TRAIL</code>. |
| 95 | * </li> |
| 96 | * <li> |
| 97 | * <strong>Exceptions:</strong> The error checking will throw an exception |
| 98 | * if indices are out of bounds. Other than than that, all methods will |
| 99 | * behave reasonably, even if unmatched surrogates or out-of-bounds UTF-32 |
| 100 | * values are present. <code>UCharacter.isLegal()</code> can be used to check |
| 101 | * for validity if desired. |
| 102 | * </li> |
| 103 | * <li> |
| 104 | * <strong>Unmatched Surrogates:</strong> If the string contains unmatched |
| 105 | * surrogates, then these are counted as one UTF-32 value. This matches |
| 106 | * their iteration behavior, which is vital. It also matches common display |
| 107 | * practice as missing glyphs (see the Unicode Standard Section 5.4, 5.5). |
| 108 | * </li> |
| 109 | * <li> |
| 110 | * <strong>Optimization:</strong> The method implementations may need |
| 111 | * optimization if the compiler doesn't fold static final methods. Since |
| 112 | * surrogate pairs will form an exceeding small percentage of all the text |
| 113 | * in the world, the singleton case should always be optimized for. |
| 114 | * </li> |
| 115 | * </ul> |
| 116 | * @author Mark Davis, with help from Markus Scherer |
| 117 | * @stable ICU 2.1 |
| 118 | */ |
| 119 | |
| 120 | public final class UTF16 |
| 121 | { |
| 122 | // public variables --------------------------------------------------- |
| 123 | |
| 124 | /** |
| 125 | * The lowest Unicode code point value. |
| 126 | * @stable ICU 2.1 |
| 127 | */ |
| 128 | public static final int CODEPOINT_MIN_VALUE = 0; |
| 129 | /** |
| 130 | * The highest Unicode code point value (scalar value) according to the |
| 131 | * Unicode Standard. |
| 132 | * @stable ICU 2.1 |
| 133 | */ |
| 134 | public static final int CODEPOINT_MAX_VALUE = 0x10ffff; |
| 135 | /** |
| 136 | * The minimum value for Supplementary code points |
| 137 | * @stable ICU 2.1 |
| 138 | */ |
| 139 | public static final int SUPPLEMENTARY_MIN_VALUE = 0x10000; |
| 140 | /** |
| 141 | * Lead surrogate minimum value |
| 142 | * @stable ICU 2.1 |
| 143 | */ |
| 144 | public static final int LEAD_SURROGATE_MIN_VALUE = 0xD800; |
| 145 | /** |
| 146 | * Trail surrogate minimum value |
| 147 | * @stable ICU 2.1 |
| 148 | */ |
| 149 | public static final int TRAIL_SURROGATE_MIN_VALUE = 0xDC00; |
| 150 | /** |
| 151 | * Lead surrogate maximum value |
| 152 | * @stable ICU 2.1 |
| 153 | */ |
| 154 | public static final int LEAD_SURROGATE_MAX_VALUE = 0xDBFF; |
| 155 | /** |
| 156 | * Trail surrogate maximum value |
| 157 | * @stable ICU 2.1 |
| 158 | */ |
| 159 | public static final int TRAIL_SURROGATE_MAX_VALUE = 0xDFFF; |
| 160 | /** |
| 161 | * Surrogate minimum value |
| 162 | * @stable ICU 2.1 |
| 163 | */ |
| 164 | public static final int SURROGATE_MIN_VALUE = LEAD_SURROGATE_MIN_VALUE; |
| 165 | |
| 166 | // public method ------------------------------------------------------ |
| 167 | |
| 168 | /** |
| 169 | * Extract a single UTF-32 value from a string. |
| 170 | * Used when iterating forwards or backwards (with |
| 171 | * <code>UTF16.getCharCount()</code>, as well as random access. If a |
| 172 | * validity check is required, use |
| 173 | * <code><a href="../lang/UCharacter.html#isLegal(char)"> |
| 174 | * UCharacter.isLegal()</a></code> on the return value. |
| 175 | * If the char retrieved is part of a surrogate pair, its supplementary |
| 176 | * character will be returned. If a complete supplementary character is |
| 177 | * not found the incomplete character will be returned |
| 178 | * @param source array of UTF-16 chars |
| 179 | * @param offset16 UTF-16 offset to the start of the character. |
| 180 | * @return UTF-32 value for the UTF-32 value that contains the char at |
| 181 | * offset16. The boundaries of that codepoint are the same as in |
| 182 | * <code>bounds32()</code>. |
| 183 | * @exception IndexOutOfBoundsException thrown if offset16 is out of |
| 184 | * bounds. |
| 185 | * @stable ICU 2.1 |
| 186 | */ |
| 187 | public static int charAt(String source, int offset16) |
| 188 | { |
| 189 | if (offset16 < 0 || offset16 >= source.length()) { |
| 190 | throw new StringIndexOutOfBoundsException(offset16); |
| 191 | } |
| 192 | |
| 193 | char single = source.charAt(offset16); |
| 194 | if (single < LEAD_SURROGATE_MIN_VALUE || |
| 195 | single > TRAIL_SURROGATE_MAX_VALUE) { |
| 196 | return single; |
| 197 | } |
| 198 | |
| 199 | // Convert the UTF-16 surrogate pair if necessary. |
| 200 | // For simplicity in usage, and because the frequency of pairs is |
| 201 | // low, look both directions. |
| 202 | |
| 203 | if (single <= LEAD_SURROGATE_MAX_VALUE) { |
| 204 | ++ offset16; |
| 205 | if (source.length() != offset16) { |
| 206 | char trail = source.charAt(offset16); |
| 207 | if (trail >= TRAIL_SURROGATE_MIN_VALUE && |
| 208 | trail <= TRAIL_SURROGATE_MAX_VALUE) { |
| 209 | return UCharacterProperty.getRawSupplementary(single, |
| 210 | trail); |
| 211 | } |
| 212 | } |
| 213 | } |
| 214 | else |
| 215 | { |
| 216 | -- offset16; |
| 217 | if (offset16 >= 0) { |
| 218 | // single is a trail surrogate so |
| 219 | char lead = source.charAt(offset16); |
| 220 | if (lead >= LEAD_SURROGATE_MIN_VALUE && |
| 221 | lead <= LEAD_SURROGATE_MAX_VALUE) { |
| 222 | return UCharacterProperty.getRawSupplementary(lead, |
| 223 | single); |
| 224 | } |
| 225 | } |
| 226 | } |
| 227 | return single; // return unmatched surrogate |
| 228 | } |
| 229 | |
| 230 | /** |
| 231 | * Extract a single UTF-32 value from a substring. |
| 232 | * Used when iterating forwards or backwards (with |
| 233 | * <code>UTF16.getCharCount()</code>, as well as random access. If a |
| 234 | * validity check is required, use |
| 235 | * <code><a href="../lang/UCharacter.html#isLegal(char)">UCharacter.isLegal() |
| 236 | * </a></code> on the return value. |
| 237 | * If the char retrieved is part of a surrogate pair, its supplementary |
| 238 | * character will be returned. If a complete supplementary character is |
| 239 | * not found the incomplete character will be returned |
| 240 | * @param source array of UTF-16 chars |
| 241 | * @param start offset to substring in the source array for analyzing |
| 242 | * @param limit offset to substring in the source array for analyzing |
| 243 | * @param offset16 UTF-16 offset relative to start |
| 244 | * @return UTF-32 value for the UTF-32 value that contains the char at |
| 245 | * offset16. The boundaries of that codepoint are the same as in |
| 246 | * <code>bounds32()</code>. |
| 247 | * @exception IndexOutOfBoundsException thrown if offset16 is not within |
| 248 | * the range of start and limit. |
| 249 | * @stable ICU 2.1 |
| 250 | */ |
| 251 | public static int charAt(char source[], int start, int limit, |
| 252 | int offset16) |
| 253 | { |
| 254 | offset16 += start; |
| 255 | if (offset16 < start || offset16 >= limit) { |
| 256 | throw new ArrayIndexOutOfBoundsException(offset16); |
| 257 | } |
| 258 | |
| 259 | char single = source[offset16]; |
| 260 | if (!isSurrogate(single)) { |
| 261 | return single; |
| 262 | } |
| 263 | |
| 264 | // Convert the UTF-16 surrogate pair if necessary. |
| 265 | // For simplicity in usage, and because the frequency of pairs is |
| 266 | // low, look both directions. |
| 267 | if (single <= LEAD_SURROGATE_MAX_VALUE) { |
| 268 | offset16 ++; |
| 269 | if (offset16 >= limit) { |
| 270 | return single; |
| 271 | } |
| 272 | char trail = source[offset16]; |
| 273 | if (isTrailSurrogate(trail)) { |
| 274 | return UCharacterProperty.getRawSupplementary(single, trail); |
| 275 | } |
| 276 | } |
| 277 | else { // isTrailSurrogate(single), so |
| 278 | if (offset16 == start) { |
| 279 | return single; |
| 280 | } |
| 281 | offset16 --; |
| 282 | char lead = source[offset16]; |
| 283 | if (isLeadSurrogate(lead)) |
| 284 | return UCharacterProperty.getRawSupplementary(lead, single); |
| 285 | } |
| 286 | return single; // return unmatched surrogate |
| 287 | } |
| 288 | |
| 289 | /** |
| 290 | * Determines how many chars this char32 requires. |
| 291 | * If a validity check is required, use <code> |
| 292 | * <a href="../lang/UCharacter.html#isLegal(char)">isLegal()</a></code> on |
| 293 | * char32 before calling. |
| 294 | * @param char32 the input codepoint. |
| 295 | * @return 2 if is in supplementary space, otherwise 1. |
| 296 | * @stable ICU 2.1 |
| 297 | */ |
| 298 | public static int getCharCount(int char32) |
| 299 | { |
| 300 | if (char32 < SUPPLEMENTARY_MIN_VALUE) { |
| 301 | return 1; |
| 302 | } |
| 303 | return 2; |
| 304 | } |
| 305 | |
| 306 | /** |
| 307 | * Determines whether the code value is a surrogate. |
| 308 | * @param char16 the input character. |
| 309 | * @return true iff the input character is a surrogate. |
| 310 | * @stable ICU 2.1 |
| 311 | */ |
| 312 | public static boolean isSurrogate(char char16) |
| 313 | { |
| 314 | return LEAD_SURROGATE_MIN_VALUE <= char16 && |
| 315 | char16 <= TRAIL_SURROGATE_MAX_VALUE; |
| 316 | } |
| 317 | |
| 318 | /** |
| 319 | * Determines whether the character is a trail surrogate. |
| 320 | * @param char16 the input character. |
| 321 | * @return true iff the input character is a trail surrogate. |
| 322 | * @stable ICU 2.1 |
| 323 | */ |
| 324 | public static boolean isTrailSurrogate(char char16) |
| 325 | { |
| 326 | return (TRAIL_SURROGATE_MIN_VALUE <= char16 && |
| 327 | char16 <= TRAIL_SURROGATE_MAX_VALUE); |
| 328 | } |
| 329 | |
| 330 | /** |
| 331 | * Determines whether the character is a lead surrogate. |
| 332 | * @param char16 the input character. |
| 333 | * @return true iff the input character is a lead surrogate |
| 334 | * @stable ICU 2.1 |
| 335 | */ |
| 336 | public static boolean isLeadSurrogate(char char16) |
| 337 | { |
| 338 | return LEAD_SURROGATE_MIN_VALUE <= char16 && |
| 339 | char16 <= LEAD_SURROGATE_MAX_VALUE; |
| 340 | } |
| 341 | |
| 342 | /** |
| 343 | * Returns the lead surrogate. |
| 344 | * If a validity check is required, use |
| 345 | * <code><a href="../lang/UCharacter.html#isLegal(char)">isLegal()</a></code> |
| 346 | * on char32 before calling. |
| 347 | * @param char32 the input character. |
| 348 | * @return lead surrogate if the getCharCount(ch) is 2; <br> |
| 349 | * and 0 otherwise (note: 0 is not a valid lead surrogate). |
| 350 | * @stable ICU 2.1 |
| 351 | */ |
| 352 | public static char getLeadSurrogate(int char32) |
| 353 | { |
| 354 | if (char32 >= SUPPLEMENTARY_MIN_VALUE) { |
| 355 | return (char)(LEAD_SURROGATE_OFFSET_ + |
| 356 | (char32 >> LEAD_SURROGATE_SHIFT_)); |
| 357 | } |
| 358 | |
| 359 | return 0; |
| 360 | } |
| 361 | |
| 362 | /** |
| 363 | * Returns the trail surrogate. |
| 364 | * If a validity check is required, use |
| 365 | * <code><a href="../lang/UCharacter.html#isLegal(char)">isLegal()</a></code> |
| 366 | * on char32 before calling. |
| 367 | * @param char32 the input character. |
| 368 | * @return the trail surrogate if the getCharCount(ch) is 2; <br>otherwise |
| 369 | * the character itself |
| 370 | * @stable ICU 2.1 |
| 371 | */ |
| 372 | public static char getTrailSurrogate(int char32) |
| 373 | { |
| 374 | if (char32 >= SUPPLEMENTARY_MIN_VALUE) { |
| 375 | return (char)(TRAIL_SURROGATE_MIN_VALUE + |
| 376 | (char32 & TRAIL_SURROGATE_MASK_)); |
| 377 | } |
| 378 | |
| 379 | return (char)char32; |
| 380 | } |
| 381 | |
| 382 | /** |
| 383 | * Convenience method corresponding to String.valueOf(char). Returns a one |
| 384 | * or two char string containing the UTF-32 value in UTF16 format. If a |
| 385 | * validity check is required, use |
| 386 | * <code><a href="../lang/UCharacter.html#isLegal(char)">isLegal()</a></code> |
| 387 | * on char32 before calling. |
| 388 | * @param char32 the input character. |
| 389 | * @return string value of char32 in UTF16 format |
| 390 | * @exception IllegalArgumentException thrown if char32 is a invalid |
| 391 | * codepoint. |
| 392 | * @stable ICU 2.1 |
| 393 | */ |
| 394 | public static String valueOf(int char32) |
| 395 | { |
| 396 | if (char32 < CODEPOINT_MIN_VALUE || char32 > CODEPOINT_MAX_VALUE) { |
| 397 | throw new IllegalArgumentException("Illegal codepoint"); |
| 398 | } |
| 399 | return toString(char32); |
| 400 | } |
| 401 | |
| 402 | /** |
| 403 | * Append a single UTF-32 value to the end of a StringBuffer. |
| 404 | * If a validity check is required, use |
| 405 | * <code><a href="../lang/UCharacter.html#isLegal(char)">isLegal()</a></code> |
| 406 | * on char32 before calling. |
| 407 | * @param target the buffer to append to |
| 408 | * @param char32 value to append. |
| 409 | * @return the updated StringBuffer |
| 410 | * @exception IllegalArgumentException thrown when char32 does not lie |
| 411 | * within the range of the Unicode codepoints |
| 412 | * @stable ICU 2.1 |
| 413 | */ |
| 414 | public static StringBuffer append(StringBuffer target, int char32) |
| 415 | { |
| 416 | // Check for irregular values |
| 417 | if (char32 < CODEPOINT_MIN_VALUE || char32 > CODEPOINT_MAX_VALUE) { |
| 418 | throw new IllegalArgumentException("Illegal codepoint: " + Integer.toHexString(char32)); |
| 419 | } |
| 420 | |
| 421 | // Write the UTF-16 values |
| 422 | if (char32 >= SUPPLEMENTARY_MIN_VALUE) |
| 423 | { |
| 424 | target.append(getLeadSurrogate(char32)); |
| 425 | target.append(getTrailSurrogate(char32)); |
| 426 | } |
| 427 | else { |
| 428 | target.append((char)char32); |
| 429 | } |
| 430 | return target; |
| 431 | } |
| 432 | |
| 433 | //// for StringPrep |
| 434 | /** |
| 435 | * Shifts offset16 by the argument number of codepoints within a subarray. |
| 436 | * @param source char array |
| 437 | * @param start position of the subarray to be performed on |
| 438 | * @param limit position of the subarray to be performed on |
| 439 | * @param offset16 UTF16 position to shift relative to start |
| 440 | * @param shift32 number of codepoints to shift |
| 441 | * @return new shifted offset16 relative to start |
| 442 | * @exception IndexOutOfBoundsException if the new offset16 is out of |
| 443 | * bounds with respect to the subarray or the subarray bounds |
| 444 | * are out of range. |
| 445 | * @stable ICU 2.1 |
| 446 | */ |
| 447 | public static int moveCodePointOffset(char source[], int start, int limit, |
| 448 | int offset16, int shift32) |
| 449 | { |
| 450 | int size = source.length; |
| 451 | int count; |
| 452 | char ch; |
| 453 | int result = offset16 + start; |
| 454 | if (start<0 || limit<start) { |
| 455 | throw new StringIndexOutOfBoundsException(start); |
| 456 | } |
| 457 | if (limit>size) { |
| 458 | throw new StringIndexOutOfBoundsException(limit); |
| 459 | } |
| 460 | if (offset16<0 || result>limit) { |
| 461 | throw new StringIndexOutOfBoundsException(offset16); |
| 462 | } |
| 463 | if (shift32 > 0 ) { |
| 464 | if (shift32 + result > size) { |
| 465 | throw new StringIndexOutOfBoundsException(result); |
| 466 | } |
| 467 | count = shift32; |
| 468 | while (result < limit && count > 0) |
| 469 | { |
| 470 | ch = source[result]; |
| 471 | if (isLeadSurrogate(ch) && (result+1 < limit) && |
| 472 | isTrailSurrogate(source[result+1])) { |
| 473 | result ++; |
| 474 | } |
| 475 | count --; |
| 476 | result ++; |
| 477 | } |
| 478 | } else { |
| 479 | if (result + shift32 < start) { |
| 480 | throw new StringIndexOutOfBoundsException(result); |
| 481 | } |
| 482 | for (count=-shift32; count>0; count--) { |
| 483 | result--; |
| 484 | if (result<start) { |
| 485 | break; |
| 486 | } |
| 487 | ch = source[result]; |
| 488 | if (isTrailSurrogate(ch) && result>start && isLeadSurrogate(source[result-1])) { |
| 489 | result--; |
| 490 | } |
| 491 | } |
| 492 | } |
| 493 | if (count != 0) { |
| 494 | throw new StringIndexOutOfBoundsException(shift32); |
| 495 | } |
| 496 | result -= start; |
| 497 | return result; |
| 498 | } |
| 499 | |
| 500 | // private data members ------------------------------------------------- |
| 501 | |
| 502 | /** |
| 503 | * Shift value for lead surrogate to form a supplementary character. |
| 504 | */ |
| 505 | private static final int LEAD_SURROGATE_SHIFT_ = 10; |
| 506 | |
| 507 | /** |
| 508 | * Mask to retrieve the significant value from a trail surrogate. |
| 509 | */ |
| 510 | private static final int TRAIL_SURROGATE_MASK_ = 0x3FF; |
| 511 | |
| 512 | /** |
| 513 | * Value that all lead surrogate starts with |
| 514 | */ |
| 515 | private static final int LEAD_SURROGATE_OFFSET_ = |
| 516 | LEAD_SURROGATE_MIN_VALUE - |
| 517 | (SUPPLEMENTARY_MIN_VALUE |
| 518 | >> LEAD_SURROGATE_SHIFT_); |
| 519 | |
| 520 | // private methods ------------------------------------------------------ |
| 521 | |
| 522 | /** |
| 523 | * <p>Converts argument code point and returns a String object representing |
| 524 | * the code point's value in UTF16 format.</p> |
| 525 | * <p>This method does not check for the validity of the codepoint, the |
| 526 | * results are not guaranteed if a invalid codepoint is passed as |
| 527 | * argument.</p> |
| 528 | * <p>The result is a string whose length is 1 for non-supplementary code |
| 529 | * points, 2 otherwise.</p> |
| 530 | * @param ch code point |
| 531 | * @return string representation of the code point |
| 532 | */ |
| 533 | private static String toString(int ch) |
| 534 | { |
| 535 | if (ch < SUPPLEMENTARY_MIN_VALUE) { |
| 536 | return String.valueOf((char)ch); |
| 537 | } |
| 538 | |
| 539 | StringBuffer result = new StringBuffer(); |
| 540 | result.append(getLeadSurrogate(ch)); |
| 541 | result.append(getTrailSurrogate(ch)); |
| 542 | return result.toString(); |
| 543 | } |
| 544 | } |