blob: e5f8c81e360cf4fb2bf869864f942528ec94f609 [file] [log] [blame]
mrossetti@chromium.org9422b222011-04-14 03:43:05 +09001// Copyright (c) 2011 The Chromium Authors. All rights reserved.
pkasting@chromium.org046cd5a2009-11-14 04:27:48 +09002// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "base/utf_offset_string_conversions.h"
6
mrossetti@chromium.org9422b222011-04-14 03:43:05 +09007#include <algorithm>
8
9#include "base/scoped_ptr.h"
pkasting@chromium.org046cd5a2009-11-14 04:27:48 +090010#include "base/string_piece.h"
11#include "base/utf_string_conversion_utils.h"
12
13using base::PrepareForUTF16Or32Output;
14using base::ReadUnicodeCharacter;
15using base::WriteUnicodeCharacter;
16
pkasting@chromium.org046cd5a2009-11-14 04:27:48 +090017// Converts the given source Unicode character type to the given destination
18// Unicode character type as a STL string. The given input buffer and size
19// determine the source, and the given output STL string will be replaced by
20// the result.
pkasting@chromium.orge6b5c202011-05-04 05:03:50 +090021bool ConvertUnicode(const char* src,
pkasting@chromium.org046cd5a2009-11-14 04:27:48 +090022 size_t src_len,
pkasting@chromium.orge6b5c202011-05-04 05:03:50 +090023 string16* output,
mrossetti@chromium.org9422b222011-04-14 03:43:05 +090024 std::vector<size_t>* offsets_for_adjustment) {
25 if (offsets_for_adjustment) {
26 std::for_each(offsets_for_adjustment->begin(),
27 offsets_for_adjustment->end(),
pkasting@chromium.orge6b5c202011-05-04 05:03:50 +090028 LimitOffset<string16>(src_len));
mrossetti@chromium.org9422b222011-04-14 03:43:05 +090029 }
pkasting@chromium.org046cd5a2009-11-14 04:27:48 +090030
31 // ICU requires 32-bit numbers.
32 bool success = true;
pkasting@chromium.orge6b5c202011-05-04 05:03:50 +090033 OffsetAdjuster offset_adjuster(offsets_for_adjustment);
pkasting@chromium.org046cd5a2009-11-14 04:27:48 +090034 int32 src_len32 = static_cast<int32>(src_len);
35 for (int32 i = 0; i < src_len32; i++) {
36 uint32 code_point;
37 size_t original_i = i;
38 size_t chars_written = 0;
39 if (ReadUnicodeCharacter(src, src_len32, &i, &code_point)) {
40 chars_written = WriteUnicodeCharacter(code_point, output);
41 } else {
cevans@chromium.orgd0e46a42010-01-02 07:16:38 +090042 chars_written = WriteUnicodeCharacter(0xFFFD, output);
pkasting@chromium.org046cd5a2009-11-14 04:27:48 +090043 success = false;
44 }
mrossetti@chromium.org9422b222011-04-14 03:43:05 +090045 if (offsets_for_adjustment) {
pkasting@chromium.org046cd5a2009-11-14 04:27:48 +090046 // NOTE: ReadUnicodeCharacter() adjusts |i| to point _at_ the last
47 // character read, not after it (so that incrementing it in the loop
48 // increment will place it at the right location), so we need to account
49 // for that in determining the amount that was read.
pkasting@chromium.orge6b5c202011-05-04 05:03:50 +090050 offset_adjuster.Add(OffsetAdjuster::Adjustment(original_i,
51 i - original_i + 1, chars_written));
pkasting@chromium.org046cd5a2009-11-14 04:27:48 +090052 }
53 }
pkasting@chromium.org046cd5a2009-11-14 04:27:48 +090054 return success;
55}
56
pkasting@chromium.orge6b5c202011-05-04 05:03:50 +090057bool UTF8ToUTF16AndAdjustOffset(const char* src,
58 size_t src_len,
59 string16* output,
60 size_t* offset_for_adjustment) {
mrossetti@chromium.org9422b222011-04-14 03:43:05 +090061 std::vector<size_t> offsets;
62 if (offset_for_adjustment)
63 offsets.push_back(*offset_for_adjustment);
pkasting@chromium.org046cd5a2009-11-14 04:27:48 +090064 PrepareForUTF16Or32Output(src, src_len, output);
mrossetti@chromium.org9422b222011-04-14 03:43:05 +090065 bool ret = ConvertUnicode(src, src_len, output, &offsets);
66 if (offset_for_adjustment)
67 *offset_for_adjustment = offsets[0];
68 return ret;
69}
70
pkasting@chromium.orge6b5c202011-05-04 05:03:50 +090071bool UTF8ToUTF16AndAdjustOffsets(const char* src,
72 size_t src_len,
73 string16* output,
74 std::vector<size_t>* offsets_for_adjustment) {
mrossetti@chromium.org9422b222011-04-14 03:43:05 +090075 PrepareForUTF16Or32Output(src, src_len, output);
76 return ConvertUnicode(src, src_len, output, offsets_for_adjustment);
pkasting@chromium.org046cd5a2009-11-14 04:27:48 +090077}
78
pkasting@chromium.orge6b5c202011-05-04 05:03:50 +090079string16 UTF8ToUTF16AndAdjustOffset(const base::StringPiece& utf8,
pkasting@chromium.org046cd5a2009-11-14 04:27:48 +090080 size_t* offset_for_adjustment) {
mrossetti@chromium.org9422b222011-04-14 03:43:05 +090081 std::vector<size_t> offsets;
82 if (offset_for_adjustment)
83 offsets.push_back(*offset_for_adjustment);
pkasting@chromium.orge6b5c202011-05-04 05:03:50 +090084 string16 result;
85 UTF8ToUTF16AndAdjustOffsets(utf8.data(), utf8.length(), &result,
mrossetti@chromium.org9422b222011-04-14 03:43:05 +090086 &offsets);
87 if (offset_for_adjustment)
88 *offset_for_adjustment = offsets[0];
89 return result;
90}
91
pkasting@chromium.orge6b5c202011-05-04 05:03:50 +090092string16 UTF8ToUTF16AndAdjustOffsets(
93 const base::StringPiece& utf8,
mrossetti@chromium.org9422b222011-04-14 03:43:05 +090094 std::vector<size_t>* offsets_for_adjustment) {
pkasting@chromium.orge6b5c202011-05-04 05:03:50 +090095 string16 result;
96 UTF8ToUTF16AndAdjustOffsets(utf8.data(), utf8.length(), &result,
mrossetti@chromium.org9422b222011-04-14 03:43:05 +090097 offsets_for_adjustment);
98 return result;
pkasting@chromium.org046cd5a2009-11-14 04:27:48 +090099}
100
pkasting@chromium.orge6b5c202011-05-04 05:03:50 +0900101OffsetAdjuster::Adjustment::Adjustment(size_t original_offset,
102 size_t original_length,
103 size_t output_length)
104 : original_offset(original_offset),
105 original_length(original_length),
106 output_length(output_length) {
107}
mrossetti@chromium.org9422b222011-04-14 03:43:05 +0900108
pkasting@chromium.orge6b5c202011-05-04 05:03:50 +0900109OffsetAdjuster::OffsetAdjuster(std::vector<size_t>* offsets_for_adjustment)
110 : offsets_for_adjustment_(offsets_for_adjustment) {
111}
mrossetti@chromium.org9422b222011-04-14 03:43:05 +0900112
pkasting@chromium.orge6b5c202011-05-04 05:03:50 +0900113OffsetAdjuster::~OffsetAdjuster() {
114 if (!offsets_for_adjustment_ || adjustments_.empty())
115 return;
116 for (std::vector<size_t>::iterator i(offsets_for_adjustment_->begin());
117 i != offsets_for_adjustment_->end(); ++i)
118 AdjustOffset(i);
119}
mrossetti@chromium.org9422b222011-04-14 03:43:05 +0900120
pkasting@chromium.orge6b5c202011-05-04 05:03:50 +0900121void OffsetAdjuster::Add(const Adjustment& adjustment) {
122 adjustments_.push_back(adjustment);
123}
124
125void OffsetAdjuster::AdjustOffset(std::vector<size_t>::iterator offset) {
126 if (*offset == string16::npos)
mrossetti@chromium.org9422b222011-04-14 03:43:05 +0900127 return;
128 size_t adjustment = 0;
pkasting@chromium.orge6b5c202011-05-04 05:03:50 +0900129 for (std::vector<Adjustment>::const_iterator i = adjustments_.begin();
mrossetti@chromium.org9422b222011-04-14 03:43:05 +0900130 i != adjustments_.end(); ++i) {
pkasting@chromium.orge6b5c202011-05-04 05:03:50 +0900131 if (*offset == i->original_offset && i->output_length == 0) {
132 *offset = string16::npos;
mrossetti@chromium.org9422b222011-04-14 03:43:05 +0900133 return;
134 }
pkasting@chromium.orge6b5c202011-05-04 05:03:50 +0900135 if (*offset <= i->original_offset)
mrossetti@chromium.org9422b222011-04-14 03:43:05 +0900136 break;
pkasting@chromium.orge6b5c202011-05-04 05:03:50 +0900137 if (*offset < (i->original_offset + i->original_length)) {
138 *offset = string16::npos;
mrossetti@chromium.org9422b222011-04-14 03:43:05 +0900139 return;
140 }
pkasting@chromium.orge6b5c202011-05-04 05:03:50 +0900141 adjustment += (i->original_length - i->output_length);
mrossetti@chromium.org9422b222011-04-14 03:43:05 +0900142 }
pkasting@chromium.orge6b5c202011-05-04 05:03:50 +0900143 *offset -= adjustment;
mrossetti@chromium.org9422b222011-04-14 03:43:05 +0900144}