The Android Open Source Project | 9066cfe | 2009-03-03 19:31:44 -0800 | [diff] [blame^] | 1 | /* |
| 2 | * Copyright (C) 2005 The Android Open Source Project |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
| 17 | #include <utils/String16.h> |
| 18 | |
| 19 | #include <utils/Debug.h> |
| 20 | #include <utils/Log.h> |
| 21 | #include <utils/String8.h> |
| 22 | #include <utils/TextOutput.h> |
| 23 | #include <utils/threads.h> |
| 24 | |
| 25 | #include <private/utils/Static.h> |
| 26 | |
| 27 | #ifdef HAVE_WINSOCK |
| 28 | # undef nhtol |
| 29 | # undef htonl |
| 30 | # undef nhtos |
| 31 | # undef htons |
| 32 | |
| 33 | # ifdef HAVE_LITTLE_ENDIAN |
| 34 | # define ntohl(x) ( ((x) << 24) | (((x) >> 24) & 255) | (((x) << 8) & 0xff0000) | (((x) >> 8) & 0xff00) ) |
| 35 | # define htonl(x) ntohl(x) |
| 36 | # define ntohs(x) ( (((x) << 8) & 0xff00) | (((x) >> 8) & 255) ) |
| 37 | # define htons(x) ntohs(x) |
| 38 | # else |
| 39 | # define ntohl(x) (x) |
| 40 | # define htonl(x) (x) |
| 41 | # define ntohs(x) (x) |
| 42 | # define htons(x) (x) |
| 43 | # endif |
| 44 | #else |
| 45 | # include <netinet/in.h> |
| 46 | #endif |
| 47 | |
| 48 | #include <memory.h> |
| 49 | #include <stdio.h> |
| 50 | #include <ctype.h> |
| 51 | |
| 52 | // --------------------------------------------------------------------------- |
| 53 | |
| 54 | int strcmp16(const char16_t *s1, const char16_t *s2) |
| 55 | { |
| 56 | char16_t ch; |
| 57 | int d = 0; |
| 58 | |
| 59 | while ( 1 ) { |
| 60 | d = (int)(ch = *s1++) - (int)*s2++; |
| 61 | if ( d || !ch ) |
| 62 | break; |
| 63 | } |
| 64 | |
| 65 | return d; |
| 66 | } |
| 67 | |
| 68 | int strncmp16(const char16_t *s1, const char16_t *s2, size_t n) |
| 69 | { |
| 70 | char16_t ch; |
| 71 | int d = 0; |
| 72 | |
| 73 | while ( n-- ) { |
| 74 | d = (int)(ch = *s1++) - (int)*s2++; |
| 75 | if ( d || !ch ) |
| 76 | break; |
| 77 | } |
| 78 | |
| 79 | return d; |
| 80 | } |
| 81 | |
| 82 | char16_t *strcpy16(char16_t *dst, const char16_t *src) |
| 83 | { |
| 84 | char16_t *q = dst; |
| 85 | const char16_t *p = src; |
| 86 | char16_t ch; |
| 87 | |
| 88 | do { |
| 89 | *q++ = ch = *p++; |
| 90 | } while ( ch ); |
| 91 | |
| 92 | return dst; |
| 93 | } |
| 94 | |
| 95 | size_t strlen16(const char16_t *s) |
| 96 | { |
| 97 | const char16_t *ss = s; |
| 98 | while ( *ss ) |
| 99 | ss++; |
| 100 | return ss-s; |
| 101 | } |
| 102 | |
| 103 | |
| 104 | char16_t *strncpy16(char16_t *dst, const char16_t *src, size_t n) |
| 105 | { |
| 106 | char16_t *q = dst; |
| 107 | const char16_t *p = src; |
| 108 | char ch; |
| 109 | |
| 110 | while (n) { |
| 111 | n--; |
| 112 | *q++ = ch = *p++; |
| 113 | if ( !ch ) |
| 114 | break; |
| 115 | } |
| 116 | |
| 117 | *q = 0; |
| 118 | |
| 119 | return dst; |
| 120 | } |
| 121 | |
| 122 | size_t strnlen16(const char16_t *s, size_t maxlen) |
| 123 | { |
| 124 | const char16_t *ss = s; |
| 125 | |
| 126 | /* Important: the maxlen test must precede the reference through ss; |
| 127 | since the byte beyond the maximum may segfault */ |
| 128 | while ((maxlen > 0) && *ss) { |
| 129 | ss++; |
| 130 | maxlen--; |
| 131 | } |
| 132 | return ss-s; |
| 133 | } |
| 134 | |
| 135 | int strzcmp16(const char16_t *s1, size_t n1, const char16_t *s2, size_t n2) |
| 136 | { |
| 137 | const char16_t* e1 = s1+n1; |
| 138 | const char16_t* e2 = s2+n2; |
| 139 | |
| 140 | while (s1 < e1 && s2 < e2) { |
| 141 | const int d = (int)*s1++ - (int)*s2++; |
| 142 | if (d) { |
| 143 | return d; |
| 144 | } |
| 145 | } |
| 146 | |
| 147 | return n1 < n2 |
| 148 | ? (0 - (int)*s2) |
| 149 | : (n1 > n2 |
| 150 | ? ((int)*s1 - 0) |
| 151 | : 0); |
| 152 | } |
| 153 | |
| 154 | int strzcmp16_h_n(const char16_t *s1H, size_t n1, const char16_t *s2N, size_t n2) |
| 155 | { |
| 156 | const char16_t* e1 = s1H+n1; |
| 157 | const char16_t* e2 = s2N+n2; |
| 158 | |
| 159 | while (s1H < e1 && s2N < e2) { |
| 160 | const char16_t c2 = ntohs(*s2N); |
| 161 | const int d = (int)*s1H++ - (int)c2; |
| 162 | s2N++; |
| 163 | if (d) { |
| 164 | return d; |
| 165 | } |
| 166 | } |
| 167 | |
| 168 | return n1 < n2 |
| 169 | ? (0 - (int)ntohs(*s2N)) |
| 170 | : (n1 > n2 |
| 171 | ? ((int)*s1H - 0) |
| 172 | : 0); |
| 173 | } |
| 174 | |
| 175 | // --------------------------------------------------------------------------- |
| 176 | |
| 177 | namespace android { |
| 178 | |
| 179 | static inline size_t |
| 180 | utf8_char_len(uint8_t ch) |
| 181 | { |
| 182 | return ((0xe5000000 >> ((ch >> 3) & 0x1e)) & 3) + 1; |
| 183 | } |
| 184 | |
| 185 | #define UTF8_SHIFT_AND_MASK(unicode, byte) (unicode)<<=6; (unicode) |= (0x3f & (byte)); |
| 186 | |
| 187 | static inline uint32_t |
| 188 | utf8_to_utf32(const uint8_t *src, size_t length) |
| 189 | { |
| 190 | uint32_t unicode; |
| 191 | |
| 192 | switch (length) |
| 193 | { |
| 194 | case 1: |
| 195 | return src[0]; |
| 196 | case 2: |
| 197 | unicode = src[0] & 0x1f; |
| 198 | UTF8_SHIFT_AND_MASK(unicode, src[1]) |
| 199 | return unicode; |
| 200 | case 3: |
| 201 | unicode = src[0] & 0x0f; |
| 202 | UTF8_SHIFT_AND_MASK(unicode, src[1]) |
| 203 | UTF8_SHIFT_AND_MASK(unicode, src[2]) |
| 204 | return unicode; |
| 205 | case 4: |
| 206 | unicode = src[0] & 0x07; |
| 207 | UTF8_SHIFT_AND_MASK(unicode, src[1]) |
| 208 | UTF8_SHIFT_AND_MASK(unicode, src[2]) |
| 209 | UTF8_SHIFT_AND_MASK(unicode, src[3]) |
| 210 | return unicode; |
| 211 | default: |
| 212 | return 0xffff; |
| 213 | } |
| 214 | |
| 215 | //printf("Char at %p: len=%d, utf-16=%p\n", src, length, (void*)result); |
| 216 | } |
| 217 | |
| 218 | // --------------------------------------------------------------------------- |
| 219 | |
| 220 | static SharedBuffer* gEmptyStringBuf = NULL; |
| 221 | static char16_t* gEmptyString = NULL; |
| 222 | |
| 223 | static inline char16_t* getEmptyString() |
| 224 | { |
| 225 | gEmptyStringBuf->acquire(); |
| 226 | return gEmptyString; |
| 227 | } |
| 228 | |
| 229 | void initialize_string16() |
| 230 | { |
| 231 | SharedBuffer* buf = SharedBuffer::alloc(sizeof(char16_t)); |
| 232 | char16_t* str = (char16_t*)buf->data(); |
| 233 | *str = 0; |
| 234 | gEmptyStringBuf = buf; |
| 235 | gEmptyString = str; |
| 236 | } |
| 237 | |
| 238 | void terminate_string16() |
| 239 | { |
| 240 | SharedBuffer::bufferFromData(gEmptyString)->release(); |
| 241 | gEmptyStringBuf = NULL; |
| 242 | gEmptyString = NULL; |
| 243 | } |
| 244 | |
| 245 | // --------------------------------------------------------------------------- |
| 246 | |
| 247 | // Note: not dealing with generating surrogate pairs. |
| 248 | static char16_t* allocFromUTF8(const char* in, size_t len) |
| 249 | { |
| 250 | if (len == 0) return getEmptyString(); |
| 251 | |
| 252 | size_t chars = 0; |
| 253 | const char* end = in+len; |
| 254 | const char* p = in; |
| 255 | |
| 256 | while (p < end) { |
| 257 | chars++; |
| 258 | p += utf8_char_len(*p); |
| 259 | } |
| 260 | |
| 261 | SharedBuffer* buf = SharedBuffer::alloc((chars+1)*sizeof(char16_t)); |
| 262 | if (buf) { |
| 263 | p = in; |
| 264 | char16_t* str = (char16_t*)buf->data(); |
| 265 | char16_t* d = str; |
| 266 | while (p < end) { |
| 267 | size_t len = utf8_char_len(*p); |
| 268 | *d++ = (char16_t)utf8_to_utf32((const uint8_t*)p, len); |
| 269 | p += len; |
| 270 | } |
| 271 | *d = 0; |
| 272 | |
| 273 | //printf("Created UTF-16 string from UTF-8 \"%s\":", in); |
| 274 | //printHexData(1, str, buf->size(), 16, 1); |
| 275 | //printf("\n"); |
| 276 | |
| 277 | return str; |
| 278 | } |
| 279 | |
| 280 | return getEmptyString(); |
| 281 | } |
| 282 | |
| 283 | // --------------------------------------------------------------------------- |
| 284 | |
| 285 | String16::String16() |
| 286 | : mString(getEmptyString()) |
| 287 | { |
| 288 | } |
| 289 | |
| 290 | String16::String16(const String16& o) |
| 291 | : mString(o.mString) |
| 292 | { |
| 293 | SharedBuffer::bufferFromData(mString)->acquire(); |
| 294 | } |
| 295 | |
| 296 | String16::String16(const String16& o, size_t len, size_t begin) |
| 297 | : mString(getEmptyString()) |
| 298 | { |
| 299 | setTo(o, len, begin); |
| 300 | } |
| 301 | |
| 302 | String16::String16(const char16_t* o) |
| 303 | { |
| 304 | size_t len = strlen16(o); |
| 305 | SharedBuffer* buf = SharedBuffer::alloc((len+1)*sizeof(char16_t)); |
| 306 | LOG_ASSERT(buf, "Unable to allocate shared buffer"); |
| 307 | if (buf) { |
| 308 | char16_t* str = (char16_t*)buf->data(); |
| 309 | strcpy16(str, o); |
| 310 | mString = str; |
| 311 | return; |
| 312 | } |
| 313 | |
| 314 | mString = getEmptyString(); |
| 315 | } |
| 316 | |
| 317 | String16::String16(const char16_t* o, size_t len) |
| 318 | { |
| 319 | SharedBuffer* buf = SharedBuffer::alloc((len+1)*sizeof(char16_t)); |
| 320 | LOG_ASSERT(buf, "Unable to allocate shared buffer"); |
| 321 | if (buf) { |
| 322 | char16_t* str = (char16_t*)buf->data(); |
| 323 | memcpy(str, o, len*sizeof(char16_t)); |
| 324 | str[len] = 0; |
| 325 | mString = str; |
| 326 | return; |
| 327 | } |
| 328 | |
| 329 | mString = getEmptyString(); |
| 330 | } |
| 331 | |
| 332 | String16::String16(const String8& o) |
| 333 | : mString(allocFromUTF8(o.string(), o.size())) |
| 334 | { |
| 335 | } |
| 336 | |
| 337 | String16::String16(const char* o) |
| 338 | : mString(allocFromUTF8(o, strlen(o))) |
| 339 | { |
| 340 | } |
| 341 | |
| 342 | String16::String16(const char* o, size_t len) |
| 343 | : mString(allocFromUTF8(o, len)) |
| 344 | { |
| 345 | } |
| 346 | |
| 347 | String16::~String16() |
| 348 | { |
| 349 | SharedBuffer::bufferFromData(mString)->release(); |
| 350 | } |
| 351 | |
| 352 | void String16::setTo(const String16& other) |
| 353 | { |
| 354 | SharedBuffer::bufferFromData(other.mString)->acquire(); |
| 355 | SharedBuffer::bufferFromData(mString)->release(); |
| 356 | mString = other.mString; |
| 357 | } |
| 358 | |
| 359 | status_t String16::setTo(const String16& other, size_t len, size_t begin) |
| 360 | { |
| 361 | const size_t N = other.size(); |
| 362 | if (begin >= N) { |
| 363 | SharedBuffer::bufferFromData(mString)->release(); |
| 364 | mString = getEmptyString(); |
| 365 | return NO_ERROR; |
| 366 | } |
| 367 | if ((begin+len) > N) len = N-begin; |
| 368 | if (begin == 0 && len == N) { |
| 369 | setTo(other); |
| 370 | return NO_ERROR; |
| 371 | } |
| 372 | |
| 373 | if (&other == this) { |
| 374 | LOG_ALWAYS_FATAL("Not implemented"); |
| 375 | } |
| 376 | |
| 377 | return setTo(other.string()+begin, len); |
| 378 | } |
| 379 | |
| 380 | status_t String16::setTo(const char16_t* other) |
| 381 | { |
| 382 | return setTo(other, strlen16(other)); |
| 383 | } |
| 384 | |
| 385 | status_t String16::setTo(const char16_t* other, size_t len) |
| 386 | { |
| 387 | SharedBuffer* buf = SharedBuffer::bufferFromData(mString) |
| 388 | ->editResize((len+1)*sizeof(char16_t)); |
| 389 | if (buf) { |
| 390 | char16_t* str = (char16_t*)buf->data(); |
| 391 | memcpy(str, other, len*sizeof(char16_t)); |
| 392 | str[len] = 0; |
| 393 | mString = str; |
| 394 | return NO_ERROR; |
| 395 | } |
| 396 | return NO_MEMORY; |
| 397 | } |
| 398 | |
| 399 | status_t String16::append(const String16& other) |
| 400 | { |
| 401 | const size_t myLen = size(); |
| 402 | const size_t otherLen = other.size(); |
| 403 | if (myLen == 0) { |
| 404 | setTo(other); |
| 405 | return NO_ERROR; |
| 406 | } else if (otherLen == 0) { |
| 407 | return NO_ERROR; |
| 408 | } |
| 409 | |
| 410 | SharedBuffer* buf = SharedBuffer::bufferFromData(mString) |
| 411 | ->editResize((myLen+otherLen+1)*sizeof(char16_t)); |
| 412 | if (buf) { |
| 413 | char16_t* str = (char16_t*)buf->data(); |
| 414 | memcpy(str+myLen, other, (otherLen+1)*sizeof(char16_t)); |
| 415 | mString = str; |
| 416 | return NO_ERROR; |
| 417 | } |
| 418 | return NO_MEMORY; |
| 419 | } |
| 420 | |
| 421 | status_t String16::append(const char16_t* chrs, size_t otherLen) |
| 422 | { |
| 423 | const size_t myLen = size(); |
| 424 | if (myLen == 0) { |
| 425 | setTo(chrs, otherLen); |
| 426 | return NO_ERROR; |
| 427 | } else if (otherLen == 0) { |
| 428 | return NO_ERROR; |
| 429 | } |
| 430 | |
| 431 | SharedBuffer* buf = SharedBuffer::bufferFromData(mString) |
| 432 | ->editResize((myLen+otherLen+1)*sizeof(char16_t)); |
| 433 | if (buf) { |
| 434 | char16_t* str = (char16_t*)buf->data(); |
| 435 | memcpy(str+myLen, chrs, otherLen*sizeof(char16_t)); |
| 436 | str[myLen+otherLen] = 0; |
| 437 | mString = str; |
| 438 | return NO_ERROR; |
| 439 | } |
| 440 | return NO_MEMORY; |
| 441 | } |
| 442 | |
| 443 | status_t String16::insert(size_t pos, const char16_t* chrs) |
| 444 | { |
| 445 | return insert(pos, chrs, strlen16(chrs)); |
| 446 | } |
| 447 | |
| 448 | status_t String16::insert(size_t pos, const char16_t* chrs, size_t len) |
| 449 | { |
| 450 | const size_t myLen = size(); |
| 451 | if (myLen == 0) { |
| 452 | return setTo(chrs, len); |
| 453 | return NO_ERROR; |
| 454 | } else if (len == 0) { |
| 455 | return NO_ERROR; |
| 456 | } |
| 457 | |
| 458 | if (pos > myLen) pos = myLen; |
| 459 | |
| 460 | #if 0 |
| 461 | printf("Insert in to %s: pos=%d, len=%d, myLen=%d, chrs=%s\n", |
| 462 | String8(*this).string(), pos, |
| 463 | len, myLen, String8(chrs, len).string()); |
| 464 | #endif |
| 465 | |
| 466 | SharedBuffer* buf = SharedBuffer::bufferFromData(mString) |
| 467 | ->editResize((myLen+len+1)*sizeof(char16_t)); |
| 468 | if (buf) { |
| 469 | char16_t* str = (char16_t*)buf->data(); |
| 470 | if (pos < myLen) { |
| 471 | memmove(str+pos+len, str+pos, (myLen-pos)*sizeof(char16_t)); |
| 472 | } |
| 473 | memcpy(str+pos, chrs, len*sizeof(char16_t)); |
| 474 | str[myLen+len] = 0; |
| 475 | mString = str; |
| 476 | #if 0 |
| 477 | printf("Result (%d chrs): %s\n", size(), String8(*this).string()); |
| 478 | #endif |
| 479 | return NO_ERROR; |
| 480 | } |
| 481 | return NO_MEMORY; |
| 482 | } |
| 483 | |
| 484 | ssize_t String16::findFirst(char16_t c) const |
| 485 | { |
| 486 | const char16_t* str = string(); |
| 487 | const char16_t* p = str; |
| 488 | const char16_t* e = p + size(); |
| 489 | while (p < e) { |
| 490 | if (*p == c) { |
| 491 | return p-str; |
| 492 | } |
| 493 | p++; |
| 494 | } |
| 495 | return -1; |
| 496 | } |
| 497 | |
| 498 | ssize_t String16::findLast(char16_t c) const |
| 499 | { |
| 500 | const char16_t* str = string(); |
| 501 | const char16_t* p = str; |
| 502 | const char16_t* e = p + size(); |
| 503 | while (p < e) { |
| 504 | e--; |
| 505 | if (*e == c) { |
| 506 | return e-str; |
| 507 | } |
| 508 | } |
| 509 | return -1; |
| 510 | } |
| 511 | |
| 512 | bool String16::startsWith(const String16& prefix) const |
| 513 | { |
| 514 | const size_t ps = prefix.size(); |
| 515 | if (ps > size()) return false; |
| 516 | return strzcmp16(mString, ps, prefix.string(), ps) == 0; |
| 517 | } |
| 518 | |
| 519 | bool String16::startsWith(const char16_t* prefix) const |
| 520 | { |
| 521 | const size_t ps = strlen16(prefix); |
| 522 | if (ps > size()) return false; |
| 523 | return strncmp16(mString, prefix, ps) == 0; |
| 524 | } |
| 525 | |
| 526 | status_t String16::makeLower() |
| 527 | { |
| 528 | const size_t N = size(); |
| 529 | const char16_t* str = string(); |
| 530 | char16_t* edit = NULL; |
| 531 | for (size_t i=0; i<N; i++) { |
| 532 | const char16_t v = str[i]; |
| 533 | if (v >= 'A' && v <= 'Z') { |
| 534 | if (!edit) { |
| 535 | SharedBuffer* buf = SharedBuffer::bufferFromData(mString)->edit(); |
| 536 | if (!buf) { |
| 537 | return NO_MEMORY; |
| 538 | } |
| 539 | edit = (char16_t*)buf->data(); |
| 540 | mString = str = edit; |
| 541 | } |
| 542 | edit[i] = tolower((char)v); |
| 543 | } |
| 544 | } |
| 545 | return NO_ERROR; |
| 546 | } |
| 547 | |
| 548 | status_t String16::replaceAll(char16_t replaceThis, char16_t withThis) |
| 549 | { |
| 550 | const size_t N = size(); |
| 551 | const char16_t* str = string(); |
| 552 | char16_t* edit = NULL; |
| 553 | for (size_t i=0; i<N; i++) { |
| 554 | if (str[i] == replaceThis) { |
| 555 | if (!edit) { |
| 556 | SharedBuffer* buf = SharedBuffer::bufferFromData(mString)->edit(); |
| 557 | if (!buf) { |
| 558 | return NO_MEMORY; |
| 559 | } |
| 560 | edit = (char16_t*)buf->data(); |
| 561 | mString = str = edit; |
| 562 | } |
| 563 | edit[i] = withThis; |
| 564 | } |
| 565 | } |
| 566 | return NO_ERROR; |
| 567 | } |
| 568 | |
| 569 | status_t String16::remove(size_t len, size_t begin) |
| 570 | { |
| 571 | const size_t N = size(); |
| 572 | if (begin >= N) { |
| 573 | SharedBuffer::bufferFromData(mString)->release(); |
| 574 | mString = getEmptyString(); |
| 575 | return NO_ERROR; |
| 576 | } |
| 577 | if ((begin+len) > N) len = N-begin; |
| 578 | if (begin == 0 && len == N) { |
| 579 | return NO_ERROR; |
| 580 | } |
| 581 | |
| 582 | if (begin > 0) { |
| 583 | SharedBuffer* buf = SharedBuffer::bufferFromData(mString) |
| 584 | ->editResize((N+1)*sizeof(char16_t)); |
| 585 | if (!buf) { |
| 586 | return NO_MEMORY; |
| 587 | } |
| 588 | char16_t* str = (char16_t*)buf->data(); |
| 589 | memmove(str, str+begin, (N-begin+1)*sizeof(char16_t)); |
| 590 | mString = str; |
| 591 | } |
| 592 | SharedBuffer* buf = SharedBuffer::bufferFromData(mString) |
| 593 | ->editResize((len+1)*sizeof(char16_t)); |
| 594 | if (buf) { |
| 595 | char16_t* str = (char16_t*)buf->data(); |
| 596 | str[len] = 0; |
| 597 | mString = str; |
| 598 | return NO_ERROR; |
| 599 | } |
| 600 | return NO_MEMORY; |
| 601 | } |
| 602 | |
| 603 | TextOutput& operator<<(TextOutput& to, const String16& val) |
| 604 | { |
| 605 | to << String8(val).string(); |
| 606 | return to; |
| 607 | } |
| 608 | |
| 609 | }; // namespace android |