Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 1 | // Copyright 2014 the V8 project authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
| 4 | |
| 5 | #ifndef V8_STRING_BUILDER_H_ |
| 6 | #define V8_STRING_BUILDER_H_ |
| 7 | |
Ben Murdoch | 4a90d5f | 2016-03-22 12:00:34 +0000 | [diff] [blame] | 8 | #include "src/assert-scope.h" |
| 9 | #include "src/factory.h" |
| 10 | #include "src/handles.h" |
| 11 | #include "src/isolate.h" |
| 12 | #include "src/objects.h" |
| 13 | #include "src/utils.h" |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 14 | |
| 15 | namespace v8 { |
| 16 | namespace internal { |
| 17 | |
| 18 | const int kStringBuilderConcatHelperLengthBits = 11; |
| 19 | const int kStringBuilderConcatHelperPositionBits = 19; |
| 20 | |
| 21 | typedef BitField<int, 0, kStringBuilderConcatHelperLengthBits> |
| 22 | StringBuilderSubstringLength; |
| 23 | typedef BitField<int, kStringBuilderConcatHelperLengthBits, |
| 24 | kStringBuilderConcatHelperPositionBits> |
| 25 | StringBuilderSubstringPosition; |
| 26 | |
| 27 | |
| 28 | template <typename sinkchar> |
| 29 | static inline void StringBuilderConcatHelper(String* special, sinkchar* sink, |
| 30 | FixedArray* fixed_array, |
| 31 | int array_length) { |
| 32 | DisallowHeapAllocation no_gc; |
| 33 | int position = 0; |
| 34 | for (int i = 0; i < array_length; i++) { |
| 35 | Object* element = fixed_array->get(i); |
| 36 | if (element->IsSmi()) { |
| 37 | // Smi encoding of position and length. |
| 38 | int encoded_slice = Smi::cast(element)->value(); |
| 39 | int pos; |
| 40 | int len; |
| 41 | if (encoded_slice > 0) { |
| 42 | // Position and length encoded in one smi. |
| 43 | pos = StringBuilderSubstringPosition::decode(encoded_slice); |
| 44 | len = StringBuilderSubstringLength::decode(encoded_slice); |
| 45 | } else { |
| 46 | // Position and length encoded in two smis. |
| 47 | Object* obj = fixed_array->get(++i); |
| 48 | DCHECK(obj->IsSmi()); |
| 49 | pos = Smi::cast(obj)->value(); |
| 50 | len = -encoded_slice; |
| 51 | } |
| 52 | String::WriteToFlat(special, sink + position, pos, pos + len); |
| 53 | position += len; |
| 54 | } else { |
| 55 | String* string = String::cast(element); |
| 56 | int element_length = string->length(); |
| 57 | String::WriteToFlat(string, sink + position, 0, element_length); |
| 58 | position += element_length; |
| 59 | } |
| 60 | } |
| 61 | } |
| 62 | |
| 63 | |
| 64 | // Returns the result length of the concatenation. |
| 65 | // On illegal argument, -1 is returned. |
| 66 | static inline int StringBuilderConcatLength(int special_length, |
| 67 | FixedArray* fixed_array, |
| 68 | int array_length, bool* one_byte) { |
| 69 | DisallowHeapAllocation no_gc; |
| 70 | int position = 0; |
| 71 | for (int i = 0; i < array_length; i++) { |
| 72 | int increment = 0; |
| 73 | Object* elt = fixed_array->get(i); |
| 74 | if (elt->IsSmi()) { |
| 75 | // Smi encoding of position and length. |
| 76 | int smi_value = Smi::cast(elt)->value(); |
| 77 | int pos; |
| 78 | int len; |
| 79 | if (smi_value > 0) { |
| 80 | // Position and length encoded in one smi. |
| 81 | pos = StringBuilderSubstringPosition::decode(smi_value); |
| 82 | len = StringBuilderSubstringLength::decode(smi_value); |
| 83 | } else { |
| 84 | // Position and length encoded in two smis. |
| 85 | len = -smi_value; |
| 86 | // Get the position and check that it is a positive smi. |
| 87 | i++; |
| 88 | if (i >= array_length) return -1; |
| 89 | Object* next_smi = fixed_array->get(i); |
| 90 | if (!next_smi->IsSmi()) return -1; |
| 91 | pos = Smi::cast(next_smi)->value(); |
| 92 | if (pos < 0) return -1; |
| 93 | } |
| 94 | DCHECK(pos >= 0); |
| 95 | DCHECK(len >= 0); |
| 96 | if (pos > special_length || len > special_length - pos) return -1; |
| 97 | increment = len; |
| 98 | } else if (elt->IsString()) { |
| 99 | String* element = String::cast(elt); |
| 100 | int element_length = element->length(); |
| 101 | increment = element_length; |
| 102 | if (*one_byte && !element->HasOnlyOneByteChars()) { |
| 103 | *one_byte = false; |
| 104 | } |
| 105 | } else { |
| 106 | return -1; |
| 107 | } |
| 108 | if (increment > String::kMaxLength - position) { |
| 109 | return kMaxInt; // Provoke throw on allocation. |
| 110 | } |
| 111 | position += increment; |
| 112 | } |
| 113 | return position; |
| 114 | } |
| 115 | |
| 116 | |
| 117 | class FixedArrayBuilder { |
| 118 | public: |
| 119 | explicit FixedArrayBuilder(Isolate* isolate, int initial_capacity) |
| 120 | : array_(isolate->factory()->NewFixedArrayWithHoles(initial_capacity)), |
| 121 | length_(0), |
| 122 | has_non_smi_elements_(false) { |
| 123 | // Require a non-zero initial size. Ensures that doubling the size to |
| 124 | // extend the array will work. |
| 125 | DCHECK(initial_capacity > 0); |
| 126 | } |
| 127 | |
| 128 | explicit FixedArrayBuilder(Handle<FixedArray> backing_store) |
| 129 | : array_(backing_store), length_(0), has_non_smi_elements_(false) { |
| 130 | // Require a non-zero initial size. Ensures that doubling the size to |
| 131 | // extend the array will work. |
| 132 | DCHECK(backing_store->length() > 0); |
| 133 | } |
| 134 | |
| 135 | bool HasCapacity(int elements) { |
| 136 | int length = array_->length(); |
| 137 | int required_length = length_ + elements; |
| 138 | return (length >= required_length); |
| 139 | } |
| 140 | |
| 141 | void EnsureCapacity(int elements) { |
| 142 | int length = array_->length(); |
| 143 | int required_length = length_ + elements; |
| 144 | if (length < required_length) { |
| 145 | int new_length = length; |
| 146 | do { |
| 147 | new_length *= 2; |
| 148 | } while (new_length < required_length); |
| 149 | Handle<FixedArray> extended_array = |
| 150 | array_->GetIsolate()->factory()->NewFixedArrayWithHoles(new_length); |
| 151 | array_->CopyTo(0, *extended_array, 0, length_); |
| 152 | array_ = extended_array; |
| 153 | } |
| 154 | } |
| 155 | |
| 156 | void Add(Object* value) { |
| 157 | DCHECK(!value->IsSmi()); |
| 158 | DCHECK(length_ < capacity()); |
| 159 | array_->set(length_, value); |
| 160 | length_++; |
| 161 | has_non_smi_elements_ = true; |
| 162 | } |
| 163 | |
| 164 | void Add(Smi* value) { |
| 165 | DCHECK(value->IsSmi()); |
| 166 | DCHECK(length_ < capacity()); |
| 167 | array_->set(length_, value); |
| 168 | length_++; |
| 169 | } |
| 170 | |
| 171 | Handle<FixedArray> array() { return array_; } |
| 172 | |
| 173 | int length() { return length_; } |
| 174 | |
| 175 | int capacity() { return array_->length(); } |
| 176 | |
| 177 | Handle<JSArray> ToJSArray(Handle<JSArray> target_array) { |
| 178 | JSArray::SetContent(target_array, array_); |
| 179 | target_array->set_length(Smi::FromInt(length_)); |
| 180 | return target_array; |
| 181 | } |
| 182 | |
| 183 | |
| 184 | private: |
| 185 | Handle<FixedArray> array_; |
| 186 | int length_; |
| 187 | bool has_non_smi_elements_; |
| 188 | }; |
| 189 | |
| 190 | |
| 191 | class ReplacementStringBuilder { |
| 192 | public: |
| 193 | ReplacementStringBuilder(Heap* heap, Handle<String> subject, |
| 194 | int estimated_part_count) |
| 195 | : heap_(heap), |
| 196 | array_builder_(heap->isolate(), estimated_part_count), |
| 197 | subject_(subject), |
| 198 | character_count_(0), |
| 199 | is_one_byte_(subject->IsOneByteRepresentation()) { |
| 200 | // Require a non-zero initial size. Ensures that doubling the size to |
| 201 | // extend the array will work. |
| 202 | DCHECK(estimated_part_count > 0); |
| 203 | } |
| 204 | |
| 205 | static inline void AddSubjectSlice(FixedArrayBuilder* builder, int from, |
| 206 | int to) { |
| 207 | DCHECK(from >= 0); |
| 208 | int length = to - from; |
| 209 | DCHECK(length > 0); |
| 210 | if (StringBuilderSubstringLength::is_valid(length) && |
| 211 | StringBuilderSubstringPosition::is_valid(from)) { |
| 212 | int encoded_slice = StringBuilderSubstringLength::encode(length) | |
| 213 | StringBuilderSubstringPosition::encode(from); |
| 214 | builder->Add(Smi::FromInt(encoded_slice)); |
| 215 | } else { |
| 216 | // Otherwise encode as two smis. |
| 217 | builder->Add(Smi::FromInt(-length)); |
| 218 | builder->Add(Smi::FromInt(from)); |
| 219 | } |
| 220 | } |
| 221 | |
| 222 | |
| 223 | void EnsureCapacity(int elements) { array_builder_.EnsureCapacity(elements); } |
| 224 | |
| 225 | |
| 226 | void AddSubjectSlice(int from, int to) { |
| 227 | AddSubjectSlice(&array_builder_, from, to); |
| 228 | IncrementCharacterCount(to - from); |
| 229 | } |
| 230 | |
| 231 | |
| 232 | void AddString(Handle<String> string) { |
| 233 | int length = string->length(); |
| 234 | DCHECK(length > 0); |
| 235 | AddElement(*string); |
| 236 | if (!string->IsOneByteRepresentation()) { |
| 237 | is_one_byte_ = false; |
| 238 | } |
| 239 | IncrementCharacterCount(length); |
| 240 | } |
| 241 | |
| 242 | |
| 243 | MaybeHandle<String> ToString(); |
| 244 | |
| 245 | |
| 246 | void IncrementCharacterCount(int by) { |
| 247 | if (character_count_ > String::kMaxLength - by) { |
| 248 | STATIC_ASSERT(String::kMaxLength < kMaxInt); |
| 249 | character_count_ = kMaxInt; |
| 250 | } else { |
| 251 | character_count_ += by; |
| 252 | } |
| 253 | } |
| 254 | |
| 255 | private: |
| 256 | void AddElement(Object* element) { |
| 257 | DCHECK(element->IsSmi() || element->IsString()); |
| 258 | DCHECK(array_builder_.capacity() > array_builder_.length()); |
| 259 | array_builder_.Add(element); |
| 260 | } |
| 261 | |
| 262 | Heap* heap_; |
| 263 | FixedArrayBuilder array_builder_; |
| 264 | Handle<String> subject_; |
| 265 | int character_count_; |
| 266 | bool is_one_byte_; |
| 267 | }; |
| 268 | |
| 269 | |
| 270 | class IncrementalStringBuilder { |
| 271 | public: |
| 272 | explicit IncrementalStringBuilder(Isolate* isolate); |
| 273 | |
| 274 | INLINE(String::Encoding CurrentEncoding()) { return encoding_; } |
| 275 | |
| 276 | template <typename SrcChar, typename DestChar> |
| 277 | INLINE(void Append(SrcChar c)); |
| 278 | |
| 279 | INLINE(void AppendCharacter(uint8_t c)) { |
| 280 | if (encoding_ == String::ONE_BYTE_ENCODING) { |
| 281 | Append<uint8_t, uint8_t>(c); |
| 282 | } else { |
| 283 | Append<uint8_t, uc16>(c); |
| 284 | } |
| 285 | } |
| 286 | |
| 287 | INLINE(void AppendCString(const char* s)) { |
| 288 | const uint8_t* u = reinterpret_cast<const uint8_t*>(s); |
| 289 | if (encoding_ == String::ONE_BYTE_ENCODING) { |
| 290 | while (*u != '\0') Append<uint8_t, uint8_t>(*(u++)); |
| 291 | } else { |
| 292 | while (*u != '\0') Append<uint8_t, uc16>(*(u++)); |
| 293 | } |
| 294 | } |
| 295 | |
Ben Murdoch | 61f157c | 2016-09-16 13:49:30 +0100 | [diff] [blame] | 296 | INLINE(void AppendCString(const uc16* s)) { |
| 297 | if (encoding_ == String::ONE_BYTE_ENCODING) { |
| 298 | while (*s != '\0') Append<uc16, uint8_t>(*(s++)); |
| 299 | } else { |
| 300 | while (*s != '\0') Append<uc16, uc16>(*(s++)); |
| 301 | } |
| 302 | } |
| 303 | |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 304 | INLINE(bool CurrentPartCanFit(int length)) { |
| 305 | return part_length_ - current_index_ > length; |
| 306 | } |
| 307 | |
| 308 | void AppendString(Handle<String> string); |
| 309 | |
| 310 | MaybeHandle<String> Finish(); |
| 311 | |
Ben Murdoch | 61f157c | 2016-09-16 13:49:30 +0100 | [diff] [blame] | 312 | INLINE(bool HasOverflowed()) const { return overflowed_; } |
| 313 | |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 314 | // Change encoding to two-byte. |
| 315 | void ChangeEncoding() { |
| 316 | DCHECK_EQ(String::ONE_BYTE_ENCODING, encoding_); |
| 317 | ShrinkCurrentPart(); |
| 318 | encoding_ = String::TWO_BYTE_ENCODING; |
| 319 | Extend(); |
| 320 | } |
| 321 | |
| 322 | template <typename DestChar> |
| 323 | class NoExtend { |
| 324 | public: |
| 325 | explicit NoExtend(Handle<String> string, int offset) { |
| 326 | DCHECK(string->IsSeqOneByteString() || string->IsSeqTwoByteString()); |
| 327 | if (sizeof(DestChar) == 1) { |
| 328 | start_ = reinterpret_cast<DestChar*>( |
| 329 | Handle<SeqOneByteString>::cast(string)->GetChars() + offset); |
| 330 | } else { |
| 331 | start_ = reinterpret_cast<DestChar*>( |
| 332 | Handle<SeqTwoByteString>::cast(string)->GetChars() + offset); |
| 333 | } |
| 334 | cursor_ = start_; |
| 335 | } |
| 336 | |
| 337 | INLINE(void Append(DestChar c)) { *(cursor_++) = c; } |
| 338 | INLINE(void AppendCString(const char* s)) { |
| 339 | const uint8_t* u = reinterpret_cast<const uint8_t*>(s); |
| 340 | while (*u != '\0') Append(*(u++)); |
| 341 | } |
| 342 | |
| 343 | int written() { return static_cast<int>(cursor_ - start_); } |
| 344 | |
| 345 | private: |
| 346 | DestChar* start_; |
| 347 | DestChar* cursor_; |
| 348 | DisallowHeapAllocation no_gc_; |
| 349 | }; |
| 350 | |
| 351 | template <typename DestChar> |
| 352 | class NoExtendString : public NoExtend<DestChar> { |
| 353 | public: |
| 354 | NoExtendString(Handle<String> string, int required_length) |
| 355 | : NoExtend<DestChar>(string, 0), string_(string) { |
| 356 | DCHECK(string->length() >= required_length); |
| 357 | } |
| 358 | |
Ben Murdoch | 4a90d5f | 2016-03-22 12:00:34 +0000 | [diff] [blame] | 359 | Handle<String> Finalize() { |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 360 | Handle<SeqString> string = Handle<SeqString>::cast(string_); |
| 361 | int length = NoExtend<DestChar>::written(); |
Ben Murdoch | 4a90d5f | 2016-03-22 12:00:34 +0000 | [diff] [blame] | 362 | Handle<String> result = SeqString::Truncate(string, length); |
| 363 | string_ = Handle<String>(); |
| 364 | return result; |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 365 | } |
| 366 | |
| 367 | private: |
| 368 | Handle<String> string_; |
| 369 | }; |
| 370 | |
| 371 | template <typename DestChar> |
| 372 | class NoExtendBuilder : public NoExtend<DestChar> { |
| 373 | public: |
| 374 | NoExtendBuilder(IncrementalStringBuilder* builder, int required_length) |
| 375 | : NoExtend<DestChar>(builder->current_part(), builder->current_index_), |
| 376 | builder_(builder) { |
| 377 | DCHECK(builder->CurrentPartCanFit(required_length)); |
| 378 | } |
| 379 | |
| 380 | ~NoExtendBuilder() { |
| 381 | builder_->current_index_ += NoExtend<DestChar>::written(); |
| 382 | } |
| 383 | |
| 384 | private: |
| 385 | IncrementalStringBuilder* builder_; |
| 386 | }; |
| 387 | |
| 388 | private: |
| 389 | Factory* factory() { return isolate_->factory(); } |
| 390 | |
| 391 | INLINE(Handle<String> accumulator()) { return accumulator_; } |
| 392 | |
| 393 | INLINE(void set_accumulator(Handle<String> string)) { |
| 394 | *accumulator_.location() = *string; |
| 395 | } |
| 396 | |
| 397 | INLINE(Handle<String> current_part()) { return current_part_; } |
| 398 | |
| 399 | INLINE(void set_current_part(Handle<String> string)) { |
| 400 | *current_part_.location() = *string; |
| 401 | } |
| 402 | |
| 403 | // Add the current part to the accumulator. |
Ben Murdoch | 4a90d5f | 2016-03-22 12:00:34 +0000 | [diff] [blame] | 404 | void Accumulate(Handle<String> new_part); |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 405 | |
| 406 | // Finish the current part and allocate a new part. |
| 407 | void Extend(); |
| 408 | |
| 409 | // Shrink current part to the right size. |
| 410 | void ShrinkCurrentPart() { |
| 411 | DCHECK(current_index_ < part_length_); |
| 412 | set_current_part(SeqString::Truncate( |
| 413 | Handle<SeqString>::cast(current_part()), current_index_)); |
| 414 | } |
| 415 | |
| 416 | static const int kInitialPartLength = 32; |
| 417 | static const int kMaxPartLength = 16 * 1024; |
| 418 | static const int kPartLengthGrowthFactor = 2; |
| 419 | |
| 420 | Isolate* isolate_; |
| 421 | String::Encoding encoding_; |
| 422 | bool overflowed_; |
| 423 | int part_length_; |
| 424 | int current_index_; |
| 425 | Handle<String> accumulator_; |
| 426 | Handle<String> current_part_; |
| 427 | }; |
| 428 | |
| 429 | |
| 430 | template <typename SrcChar, typename DestChar> |
| 431 | void IncrementalStringBuilder::Append(SrcChar c) { |
| 432 | DCHECK_EQ(encoding_ == String::ONE_BYTE_ENCODING, sizeof(DestChar) == 1); |
| 433 | if (sizeof(DestChar) == 1) { |
| 434 | DCHECK_EQ(String::ONE_BYTE_ENCODING, encoding_); |
| 435 | SeqOneByteString::cast(*current_part_) |
| 436 | ->SeqOneByteStringSet(current_index_++, c); |
| 437 | } else { |
| 438 | DCHECK_EQ(String::TWO_BYTE_ENCODING, encoding_); |
| 439 | SeqTwoByteString::cast(*current_part_) |
| 440 | ->SeqTwoByteStringSet(current_index_++, c); |
| 441 | } |
| 442 | if (current_index_ == part_length_) Extend(); |
| 443 | } |
Ben Murdoch | 4a90d5f | 2016-03-22 12:00:34 +0000 | [diff] [blame] | 444 | } // namespace internal |
| 445 | } // namespace v8 |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 446 | |
| 447 | #endif // V8_STRING_BUILDER_H_ |