mrossetti@chromium.org | 9422b22 | 2011-04-14 03:43:05 +0900 | [diff] [blame] | 1 | // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
pkasting@chromium.org | 046cd5a | 2009-11-14 04:27:48 +0900 | [diff] [blame] | 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
| 4 | |
| 5 | #include "base/utf_offset_string_conversions.h" |
| 6 | |
mrossetti@chromium.org | 9422b22 | 2011-04-14 03:43:05 +0900 | [diff] [blame] | 7 | #include <algorithm> |
| 8 | |
| 9 | #include "base/scoped_ptr.h" |
pkasting@chromium.org | 046cd5a | 2009-11-14 04:27:48 +0900 | [diff] [blame] | 10 | #include "base/string_piece.h" |
| 11 | #include "base/utf_string_conversion_utils.h" |
| 12 | |
| 13 | using base::PrepareForUTF16Or32Output; |
| 14 | using base::ReadUnicodeCharacter; |
| 15 | using base::WriteUnicodeCharacter; |
| 16 | |
pkasting@chromium.org | 046cd5a | 2009-11-14 04:27:48 +0900 | [diff] [blame] | 17 | // Converts the given source Unicode character type to the given destination |
| 18 | // Unicode character type as a STL string. The given input buffer and size |
| 19 | // determine the source, and the given output STL string will be replaced by |
| 20 | // the result. |
pkasting@chromium.org | e6b5c20 | 2011-05-04 05:03:50 +0900 | [diff] [blame^] | 21 | bool ConvertUnicode(const char* src, |
pkasting@chromium.org | 046cd5a | 2009-11-14 04:27:48 +0900 | [diff] [blame] | 22 | size_t src_len, |
pkasting@chromium.org | e6b5c20 | 2011-05-04 05:03:50 +0900 | [diff] [blame^] | 23 | string16* output, |
mrossetti@chromium.org | 9422b22 | 2011-04-14 03:43:05 +0900 | [diff] [blame] | 24 | std::vector<size_t>* offsets_for_adjustment) { |
| 25 | if (offsets_for_adjustment) { |
| 26 | std::for_each(offsets_for_adjustment->begin(), |
| 27 | offsets_for_adjustment->end(), |
pkasting@chromium.org | e6b5c20 | 2011-05-04 05:03:50 +0900 | [diff] [blame^] | 28 | LimitOffset<string16>(src_len)); |
mrossetti@chromium.org | 9422b22 | 2011-04-14 03:43:05 +0900 | [diff] [blame] | 29 | } |
pkasting@chromium.org | 046cd5a | 2009-11-14 04:27:48 +0900 | [diff] [blame] | 30 | |
| 31 | // ICU requires 32-bit numbers. |
| 32 | bool success = true; |
pkasting@chromium.org | e6b5c20 | 2011-05-04 05:03:50 +0900 | [diff] [blame^] | 33 | OffsetAdjuster offset_adjuster(offsets_for_adjustment); |
pkasting@chromium.org | 046cd5a | 2009-11-14 04:27:48 +0900 | [diff] [blame] | 34 | int32 src_len32 = static_cast<int32>(src_len); |
| 35 | for (int32 i = 0; i < src_len32; i++) { |
| 36 | uint32 code_point; |
| 37 | size_t original_i = i; |
| 38 | size_t chars_written = 0; |
| 39 | if (ReadUnicodeCharacter(src, src_len32, &i, &code_point)) { |
| 40 | chars_written = WriteUnicodeCharacter(code_point, output); |
| 41 | } else { |
cevans@chromium.org | d0e46a4 | 2010-01-02 07:16:38 +0900 | [diff] [blame] | 42 | chars_written = WriteUnicodeCharacter(0xFFFD, output); |
pkasting@chromium.org | 046cd5a | 2009-11-14 04:27:48 +0900 | [diff] [blame] | 43 | success = false; |
| 44 | } |
mrossetti@chromium.org | 9422b22 | 2011-04-14 03:43:05 +0900 | [diff] [blame] | 45 | if (offsets_for_adjustment) { |
pkasting@chromium.org | 046cd5a | 2009-11-14 04:27:48 +0900 | [diff] [blame] | 46 | // NOTE: ReadUnicodeCharacter() adjusts |i| to point _at_ the last |
| 47 | // character read, not after it (so that incrementing it in the loop |
| 48 | // increment will place it at the right location), so we need to account |
| 49 | // for that in determining the amount that was read. |
pkasting@chromium.org | e6b5c20 | 2011-05-04 05:03:50 +0900 | [diff] [blame^] | 50 | offset_adjuster.Add(OffsetAdjuster::Adjustment(original_i, |
| 51 | i - original_i + 1, chars_written)); |
pkasting@chromium.org | 046cd5a | 2009-11-14 04:27:48 +0900 | [diff] [blame] | 52 | } |
| 53 | } |
pkasting@chromium.org | 046cd5a | 2009-11-14 04:27:48 +0900 | [diff] [blame] | 54 | return success; |
| 55 | } |
| 56 | |
pkasting@chromium.org | e6b5c20 | 2011-05-04 05:03:50 +0900 | [diff] [blame^] | 57 | bool UTF8ToUTF16AndAdjustOffset(const char* src, |
| 58 | size_t src_len, |
| 59 | string16* output, |
| 60 | size_t* offset_for_adjustment) { |
mrossetti@chromium.org | 9422b22 | 2011-04-14 03:43:05 +0900 | [diff] [blame] | 61 | std::vector<size_t> offsets; |
| 62 | if (offset_for_adjustment) |
| 63 | offsets.push_back(*offset_for_adjustment); |
pkasting@chromium.org | 046cd5a | 2009-11-14 04:27:48 +0900 | [diff] [blame] | 64 | PrepareForUTF16Or32Output(src, src_len, output); |
mrossetti@chromium.org | 9422b22 | 2011-04-14 03:43:05 +0900 | [diff] [blame] | 65 | bool ret = ConvertUnicode(src, src_len, output, &offsets); |
| 66 | if (offset_for_adjustment) |
| 67 | *offset_for_adjustment = offsets[0]; |
| 68 | return ret; |
| 69 | } |
| 70 | |
pkasting@chromium.org | e6b5c20 | 2011-05-04 05:03:50 +0900 | [diff] [blame^] | 71 | bool UTF8ToUTF16AndAdjustOffsets(const char* src, |
| 72 | size_t src_len, |
| 73 | string16* output, |
| 74 | std::vector<size_t>* offsets_for_adjustment) { |
mrossetti@chromium.org | 9422b22 | 2011-04-14 03:43:05 +0900 | [diff] [blame] | 75 | PrepareForUTF16Or32Output(src, src_len, output); |
| 76 | return ConvertUnicode(src, src_len, output, offsets_for_adjustment); |
pkasting@chromium.org | 046cd5a | 2009-11-14 04:27:48 +0900 | [diff] [blame] | 77 | } |
| 78 | |
pkasting@chromium.org | e6b5c20 | 2011-05-04 05:03:50 +0900 | [diff] [blame^] | 79 | string16 UTF8ToUTF16AndAdjustOffset(const base::StringPiece& utf8, |
pkasting@chromium.org | 046cd5a | 2009-11-14 04:27:48 +0900 | [diff] [blame] | 80 | size_t* offset_for_adjustment) { |
mrossetti@chromium.org | 9422b22 | 2011-04-14 03:43:05 +0900 | [diff] [blame] | 81 | std::vector<size_t> offsets; |
| 82 | if (offset_for_adjustment) |
| 83 | offsets.push_back(*offset_for_adjustment); |
pkasting@chromium.org | e6b5c20 | 2011-05-04 05:03:50 +0900 | [diff] [blame^] | 84 | string16 result; |
| 85 | UTF8ToUTF16AndAdjustOffsets(utf8.data(), utf8.length(), &result, |
mrossetti@chromium.org | 9422b22 | 2011-04-14 03:43:05 +0900 | [diff] [blame] | 86 | &offsets); |
| 87 | if (offset_for_adjustment) |
| 88 | *offset_for_adjustment = offsets[0]; |
| 89 | return result; |
| 90 | } |
| 91 | |
pkasting@chromium.org | e6b5c20 | 2011-05-04 05:03:50 +0900 | [diff] [blame^] | 92 | string16 UTF8ToUTF16AndAdjustOffsets( |
| 93 | const base::StringPiece& utf8, |
mrossetti@chromium.org | 9422b22 | 2011-04-14 03:43:05 +0900 | [diff] [blame] | 94 | std::vector<size_t>* offsets_for_adjustment) { |
pkasting@chromium.org | e6b5c20 | 2011-05-04 05:03:50 +0900 | [diff] [blame^] | 95 | string16 result; |
| 96 | UTF8ToUTF16AndAdjustOffsets(utf8.data(), utf8.length(), &result, |
mrossetti@chromium.org | 9422b22 | 2011-04-14 03:43:05 +0900 | [diff] [blame] | 97 | offsets_for_adjustment); |
| 98 | return result; |
pkasting@chromium.org | 046cd5a | 2009-11-14 04:27:48 +0900 | [diff] [blame] | 99 | } |
| 100 | |
pkasting@chromium.org | e6b5c20 | 2011-05-04 05:03:50 +0900 | [diff] [blame^] | 101 | OffsetAdjuster::Adjustment::Adjustment(size_t original_offset, |
| 102 | size_t original_length, |
| 103 | size_t output_length) |
| 104 | : original_offset(original_offset), |
| 105 | original_length(original_length), |
| 106 | output_length(output_length) { |
| 107 | } |
mrossetti@chromium.org | 9422b22 | 2011-04-14 03:43:05 +0900 | [diff] [blame] | 108 | |
pkasting@chromium.org | e6b5c20 | 2011-05-04 05:03:50 +0900 | [diff] [blame^] | 109 | OffsetAdjuster::OffsetAdjuster(std::vector<size_t>* offsets_for_adjustment) |
| 110 | : offsets_for_adjustment_(offsets_for_adjustment) { |
| 111 | } |
mrossetti@chromium.org | 9422b22 | 2011-04-14 03:43:05 +0900 | [diff] [blame] | 112 | |
pkasting@chromium.org | e6b5c20 | 2011-05-04 05:03:50 +0900 | [diff] [blame^] | 113 | OffsetAdjuster::~OffsetAdjuster() { |
| 114 | if (!offsets_for_adjustment_ || adjustments_.empty()) |
| 115 | return; |
| 116 | for (std::vector<size_t>::iterator i(offsets_for_adjustment_->begin()); |
| 117 | i != offsets_for_adjustment_->end(); ++i) |
| 118 | AdjustOffset(i); |
| 119 | } |
mrossetti@chromium.org | 9422b22 | 2011-04-14 03:43:05 +0900 | [diff] [blame] | 120 | |
pkasting@chromium.org | e6b5c20 | 2011-05-04 05:03:50 +0900 | [diff] [blame^] | 121 | void OffsetAdjuster::Add(const Adjustment& adjustment) { |
| 122 | adjustments_.push_back(adjustment); |
| 123 | } |
| 124 | |
| 125 | void OffsetAdjuster::AdjustOffset(std::vector<size_t>::iterator offset) { |
| 126 | if (*offset == string16::npos) |
mrossetti@chromium.org | 9422b22 | 2011-04-14 03:43:05 +0900 | [diff] [blame] | 127 | return; |
| 128 | size_t adjustment = 0; |
pkasting@chromium.org | e6b5c20 | 2011-05-04 05:03:50 +0900 | [diff] [blame^] | 129 | for (std::vector<Adjustment>::const_iterator i = adjustments_.begin(); |
mrossetti@chromium.org | 9422b22 | 2011-04-14 03:43:05 +0900 | [diff] [blame] | 130 | i != adjustments_.end(); ++i) { |
pkasting@chromium.org | e6b5c20 | 2011-05-04 05:03:50 +0900 | [diff] [blame^] | 131 | if (*offset == i->original_offset && i->output_length == 0) { |
| 132 | *offset = string16::npos; |
mrossetti@chromium.org | 9422b22 | 2011-04-14 03:43:05 +0900 | [diff] [blame] | 133 | return; |
| 134 | } |
pkasting@chromium.org | e6b5c20 | 2011-05-04 05:03:50 +0900 | [diff] [blame^] | 135 | if (*offset <= i->original_offset) |
mrossetti@chromium.org | 9422b22 | 2011-04-14 03:43:05 +0900 | [diff] [blame] | 136 | break; |
pkasting@chromium.org | e6b5c20 | 2011-05-04 05:03:50 +0900 | [diff] [blame^] | 137 | if (*offset < (i->original_offset + i->original_length)) { |
| 138 | *offset = string16::npos; |
mrossetti@chromium.org | 9422b22 | 2011-04-14 03:43:05 +0900 | [diff] [blame] | 139 | return; |
| 140 | } |
pkasting@chromium.org | e6b5c20 | 2011-05-04 05:03:50 +0900 | [diff] [blame^] | 141 | adjustment += (i->original_length - i->output_length); |
mrossetti@chromium.org | 9422b22 | 2011-04-14 03:43:05 +0900 | [diff] [blame] | 142 | } |
pkasting@chromium.org | e6b5c20 | 2011-05-04 05:03:50 +0900 | [diff] [blame^] | 143 | *offset -= adjustment; |
mrossetti@chromium.org | 9422b22 | 2011-04-14 03:43:05 +0900 | [diff] [blame] | 144 | } |