blob: 8405303fd9d95bd5fb8fe3ef0a0bcdb70102cd80 [file] [log] [blame]
mrossetti@chromium.org9422b222011-04-14 03:43:05 +09001// Copyright (c) 2011 The Chromium Authors. All rights reserved.
pkasting@chromium.org046cd5a2009-11-14 04:27:48 +09002// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "base/utf_offset_string_conversions.h"
6
mrossetti@chromium.org9422b222011-04-14 03:43:05 +09007#include <algorithm>
8
thestig@chromium.orgefd4aaf2011-06-15 13:14:23 +09009#include "base/memory/scoped_ptr.h"
pkasting@chromium.org046cd5a2009-11-14 04:27:48 +090010#include "base/string_piece.h"
11#include "base/utf_string_conversion_utils.h"
12
13using base::PrepareForUTF16Or32Output;
kinaba@chromium.orgfb4d5292011-09-08 11:18:10 +090014using base::PrepareForUTF8Output;
pkasting@chromium.org046cd5a2009-11-14 04:27:48 +090015using base::ReadUnicodeCharacter;
16using base::WriteUnicodeCharacter;
17
pkasting@chromium.org046cd5a2009-11-14 04:27:48 +090018// Converts the given source Unicode character type to the given destination
19// Unicode character type as a STL string. The given input buffer and size
20// determine the source, and the given output STL string will be replaced by
21// the result.
kinaba@chromium.orgfb4d5292011-09-08 11:18:10 +090022template<typename SrcChar, typename DestStdString>
23bool ConvertUnicode(const SrcChar* src,
pkasting@chromium.org046cd5a2009-11-14 04:27:48 +090024 size_t src_len,
kinaba@chromium.orgfb4d5292011-09-08 11:18:10 +090025 DestStdString* output,
mrossetti@chromium.org9422b222011-04-14 03:43:05 +090026 std::vector<size_t>* offsets_for_adjustment) {
27 if (offsets_for_adjustment) {
28 std::for_each(offsets_for_adjustment->begin(),
29 offsets_for_adjustment->end(),
kinaba@chromium.orgfb4d5292011-09-08 11:18:10 +090030 LimitOffset<DestStdString>(src_len));
mrossetti@chromium.org9422b222011-04-14 03:43:05 +090031 }
pkasting@chromium.org046cd5a2009-11-14 04:27:48 +090032
33 // ICU requires 32-bit numbers.
34 bool success = true;
pkasting@chromium.orge6b5c202011-05-04 05:03:50 +090035 OffsetAdjuster offset_adjuster(offsets_for_adjustment);
pkasting@chromium.org046cd5a2009-11-14 04:27:48 +090036 int32 src_len32 = static_cast<int32>(src_len);
37 for (int32 i = 0; i < src_len32; i++) {
38 uint32 code_point;
39 size_t original_i = i;
40 size_t chars_written = 0;
41 if (ReadUnicodeCharacter(src, src_len32, &i, &code_point)) {
42 chars_written = WriteUnicodeCharacter(code_point, output);
43 } else {
cevans@chromium.orgd0e46a42010-01-02 07:16:38 +090044 chars_written = WriteUnicodeCharacter(0xFFFD, output);
pkasting@chromium.org046cd5a2009-11-14 04:27:48 +090045 success = false;
46 }
mrossetti@chromium.org9422b222011-04-14 03:43:05 +090047 if (offsets_for_adjustment) {
pkasting@chromium.org046cd5a2009-11-14 04:27:48 +090048 // NOTE: ReadUnicodeCharacter() adjusts |i| to point _at_ the last
49 // character read, not after it (so that incrementing it in the loop
50 // increment will place it at the right location), so we need to account
51 // for that in determining the amount that was read.
pkasting@chromium.orge6b5c202011-05-04 05:03:50 +090052 offset_adjuster.Add(OffsetAdjuster::Adjustment(original_i,
53 i - original_i + 1, chars_written));
pkasting@chromium.org046cd5a2009-11-14 04:27:48 +090054 }
55 }
pkasting@chromium.org046cd5a2009-11-14 04:27:48 +090056 return success;
57}
58
pkasting@chromium.orge6b5c202011-05-04 05:03:50 +090059bool UTF8ToUTF16AndAdjustOffset(const char* src,
60 size_t src_len,
61 string16* output,
62 size_t* offset_for_adjustment) {
mrossetti@chromium.org9422b222011-04-14 03:43:05 +090063 std::vector<size_t> offsets;
64 if (offset_for_adjustment)
65 offsets.push_back(*offset_for_adjustment);
pkasting@chromium.org046cd5a2009-11-14 04:27:48 +090066 PrepareForUTF16Or32Output(src, src_len, output);
mrossetti@chromium.org9422b222011-04-14 03:43:05 +090067 bool ret = ConvertUnicode(src, src_len, output, &offsets);
68 if (offset_for_adjustment)
69 *offset_for_adjustment = offsets[0];
70 return ret;
71}
72
pkasting@chromium.orge6b5c202011-05-04 05:03:50 +090073bool UTF8ToUTF16AndAdjustOffsets(const char* src,
74 size_t src_len,
75 string16* output,
76 std::vector<size_t>* offsets_for_adjustment) {
mrossetti@chromium.org9422b222011-04-14 03:43:05 +090077 PrepareForUTF16Or32Output(src, src_len, output);
78 return ConvertUnicode(src, src_len, output, offsets_for_adjustment);
pkasting@chromium.org046cd5a2009-11-14 04:27:48 +090079}
80
pkasting@chromium.orge6b5c202011-05-04 05:03:50 +090081string16 UTF8ToUTF16AndAdjustOffset(const base::StringPiece& utf8,
pkasting@chromium.org046cd5a2009-11-14 04:27:48 +090082 size_t* offset_for_adjustment) {
mrossetti@chromium.org9422b222011-04-14 03:43:05 +090083 std::vector<size_t> offsets;
84 if (offset_for_adjustment)
85 offsets.push_back(*offset_for_adjustment);
pkasting@chromium.orge6b5c202011-05-04 05:03:50 +090086 string16 result;
87 UTF8ToUTF16AndAdjustOffsets(utf8.data(), utf8.length(), &result,
mrossetti@chromium.org9422b222011-04-14 03:43:05 +090088 &offsets);
89 if (offset_for_adjustment)
90 *offset_for_adjustment = offsets[0];
91 return result;
92}
93
pkasting@chromium.orge6b5c202011-05-04 05:03:50 +090094string16 UTF8ToUTF16AndAdjustOffsets(
95 const base::StringPiece& utf8,
mrossetti@chromium.org9422b222011-04-14 03:43:05 +090096 std::vector<size_t>* offsets_for_adjustment) {
pkasting@chromium.orge6b5c202011-05-04 05:03:50 +090097 string16 result;
98 UTF8ToUTF16AndAdjustOffsets(utf8.data(), utf8.length(), &result,
mrossetti@chromium.org9422b222011-04-14 03:43:05 +090099 offsets_for_adjustment);
100 return result;
pkasting@chromium.org046cd5a2009-11-14 04:27:48 +0900101}
102
kinaba@chromium.orgfb4d5292011-09-08 11:18:10 +0900103std::string UTF16ToUTF8AndAdjustOffset(
104 const base::StringPiece16& utf16,
105 size_t* offset_for_adjustment) {
106 std::vector<size_t> offsets;
107 if (offset_for_adjustment)
108 offsets.push_back(*offset_for_adjustment);
109 std::string result = UTF16ToUTF8AndAdjustOffsets(utf16, &offsets);
110 if (offset_for_adjustment)
111 *offset_for_adjustment = offsets[0];
112 return result;
113}
114
115std::string UTF16ToUTF8AndAdjustOffsets(
116 const base::StringPiece16& utf16,
117 std::vector<size_t>* offsets_for_adjustment) {
118 std::string result;
119 PrepareForUTF8Output(utf16.data(), utf16.length(), &result);
120 ConvertUnicode(utf16.data(), utf16.length(), &result, offsets_for_adjustment);
121 return result;
122}
123
pkasting@chromium.orge6b5c202011-05-04 05:03:50 +0900124OffsetAdjuster::Adjustment::Adjustment(size_t original_offset,
125 size_t original_length,
126 size_t output_length)
127 : original_offset(original_offset),
128 original_length(original_length),
129 output_length(output_length) {
130}
mrossetti@chromium.org9422b222011-04-14 03:43:05 +0900131
pkasting@chromium.orge6b5c202011-05-04 05:03:50 +0900132OffsetAdjuster::OffsetAdjuster(std::vector<size_t>* offsets_for_adjustment)
133 : offsets_for_adjustment_(offsets_for_adjustment) {
134}
mrossetti@chromium.org9422b222011-04-14 03:43:05 +0900135
pkasting@chromium.orge6b5c202011-05-04 05:03:50 +0900136OffsetAdjuster::~OffsetAdjuster() {
137 if (!offsets_for_adjustment_ || adjustments_.empty())
138 return;
139 for (std::vector<size_t>::iterator i(offsets_for_adjustment_->begin());
140 i != offsets_for_adjustment_->end(); ++i)
141 AdjustOffset(i);
142}
mrossetti@chromium.org9422b222011-04-14 03:43:05 +0900143
pkasting@chromium.orge6b5c202011-05-04 05:03:50 +0900144void OffsetAdjuster::Add(const Adjustment& adjustment) {
145 adjustments_.push_back(adjustment);
146}
147
148void OffsetAdjuster::AdjustOffset(std::vector<size_t>::iterator offset) {
149 if (*offset == string16::npos)
mrossetti@chromium.org9422b222011-04-14 03:43:05 +0900150 return;
151 size_t adjustment = 0;
pkasting@chromium.orge6b5c202011-05-04 05:03:50 +0900152 for (std::vector<Adjustment>::const_iterator i = adjustments_.begin();
mrossetti@chromium.org9422b222011-04-14 03:43:05 +0900153 i != adjustments_.end(); ++i) {
pkasting@chromium.orge6b5c202011-05-04 05:03:50 +0900154 if (*offset == i->original_offset && i->output_length == 0) {
155 *offset = string16::npos;
mrossetti@chromium.org9422b222011-04-14 03:43:05 +0900156 return;
157 }
pkasting@chromium.orge6b5c202011-05-04 05:03:50 +0900158 if (*offset <= i->original_offset)
mrossetti@chromium.org9422b222011-04-14 03:43:05 +0900159 break;
pkasting@chromium.orge6b5c202011-05-04 05:03:50 +0900160 if (*offset < (i->original_offset + i->original_length)) {
161 *offset = string16::npos;
mrossetti@chromium.org9422b222011-04-14 03:43:05 +0900162 return;
163 }
pkasting@chromium.orge6b5c202011-05-04 05:03:50 +0900164 adjustment += (i->original_length - i->output_length);
mrossetti@chromium.org9422b222011-04-14 03:43:05 +0900165 }
pkasting@chromium.orge6b5c202011-05-04 05:03:50 +0900166 *offset -= adjustment;
mrossetti@chromium.org9422b222011-04-14 03:43:05 +0900167}