blob: b8b251b02b0b62ce51b048cd2ee33672fc7efaa4 [file] [log] [blame]
Tony Mak336fd9e2020-10-27 17:04:20 +00001// Copyright 2020 The Abseil Authors.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14//
15// -----------------------------------------------------------------------------
16// File: cord.h
17// -----------------------------------------------------------------------------
18//
19// This file defines the `absl::Cord` data structure and operations on that data
20// structure. A Cord is a string-like sequence of characters optimized for
21// specific use cases. Unlike a `std::string`, which stores an array of
22// contiguous characters, Cord data is stored in a structure consisting of
23// separate, reference-counted "chunks." (Currently, this implementation is a
24// tree structure, though that implementation may change.)
25//
26// Because a Cord consists of these chunks, data can be added to or removed from
27// a Cord during its lifetime. Chunks may also be shared between Cords. Unlike a
28// `std::string`, a Cord can therefore accomodate data that changes over its
29// lifetime, though it's not quite "mutable"; it can change only in the
30// attachment, detachment, or rearrangement of chunks of its constituent data.
31//
32// A Cord provides some benefit over `std::string` under the following (albeit
33// narrow) circumstances:
34//
35// * Cord data is designed to grow and shrink over a Cord's lifetime. Cord
36// provides efficient insertions and deletions at the start and end of the
37// character sequences, avoiding copies in those cases. Static data should
38// generally be stored as strings.
39// * External memory consisting of string-like data can be directly added to
40// a Cord without requiring copies or allocations.
41// * Cord data may be shared and copied cheaply. Cord provides a copy-on-write
42// implementation and cheap sub-Cord operations. Copying a Cord is an O(1)
43// operation.
44//
45// As a consequence to the above, Cord data is generally large. Small data
46// should generally use strings, as construction of a Cord requires some
47// overhead. Small Cords (<= 15 bytes) are represented inline, but most small
48// Cords are expected to grow over their lifetimes.
49//
50// Note that because a Cord is made up of separate chunked data, random access
51// to character data within a Cord is slower than within a `std::string`.
52//
53// Thread Safety
54//
55// Cord has the same thread-safety properties as many other types like
56// std::string, std::vector<>, int, etc -- it is thread-compatible. In
57// particular, if threads do not call non-const methods, then it is safe to call
58// const methods without synchronization. Copying a Cord produces a new instance
59// that can be used concurrently with the original in arbitrary ways.
60
61#ifndef ABSL_STRINGS_CORD_H_
62#define ABSL_STRINGS_CORD_H_
63
64#include <algorithm>
65#include <cstddef>
66#include <cstdint>
67#include <cstring>
68#include <iosfwd>
69#include <iterator>
70#include <string>
71#include <type_traits>
72
73#include "absl/base/internal/endian.h"
74#include "absl/base/internal/per_thread_tls.h"
75#include "absl/base/macros.h"
76#include "absl/base/port.h"
77#include "absl/container/inlined_vector.h"
78#include "absl/functional/function_ref.h"
79#include "absl/meta/type_traits.h"
80#include "absl/strings/internal/cord_internal.h"
81#include "absl/strings/internal/resize_uninitialized.h"
82#include "absl/strings/string_view.h"
83#include "absl/types/optional.h"
84
85namespace absl {
86ABSL_NAMESPACE_BEGIN
87class Cord;
88class CordTestPeer;
89template <typename Releaser>
90Cord MakeCordFromExternal(absl::string_view, Releaser&&);
91void CopyCordToString(const Cord& src, std::string* dst);
92
93// Cord
94//
95// A Cord is a sequence of characters, designed to be more efficient than a
96// `std::string` in certain circumstances: namely, large string data that needs
97// to change over its lifetime or shared, especially when such data is shared
98// across API boundaries.
99//
100// A Cord stores its character data in a structure that allows efficient prepend
101// and append operations. This makes a Cord useful for large string data sent
102// over in a wire format that may need to be prepended or appended at some point
103// during the data exchange (e.g. HTTP, protocol buffers). For example, a
104// Cord is useful for storing an HTTP request, and prepending an HTTP header to
105// such a request.
106//
107// Cords should not be used for storing general string data, however. They
108// require overhead to construct and are slower than strings for random access.
109//
110// The Cord API provides the following common API operations:
111//
112// * Create or assign Cords out of existing string data, memory, or other Cords
113// * Append and prepend data to an existing Cord
114// * Create new Sub-Cords from existing Cord data
115// * Swap Cord data and compare Cord equality
116// * Write out Cord data by constructing a `std::string`
117//
118// Additionally, the API provides iterator utilities to iterate through Cord
119// data via chunks or character bytes.
120//
121class Cord {
122 private:
123 template <typename T>
124 using EnableIfString =
125 absl::enable_if_t<std::is_same<T, std::string>::value, int>;
126
127 public:
128 // Cord::Cord() Constructors.
129
130 // Creates an empty Cord.
131 constexpr Cord() noexcept;
132
133 // Creates a Cord from an existing Cord. Cord is copyable and efficiently
134 // movable. The moved-from state is valid but unspecified.
135 Cord(const Cord& src);
136 Cord(Cord&& src) noexcept;
137 Cord& operator=(const Cord& x);
138 Cord& operator=(Cord&& x) noexcept;
139
140 // Creates a Cord from a `src` string. This constructor is marked explicit to
141 // prevent implicit Cord constructions from arguments convertible to an
142 // `absl::string_view`.
143 explicit Cord(absl::string_view src);
144 Cord& operator=(absl::string_view src);
145
146 // Creates a Cord from a `std::string&&` rvalue. These constructors are
147 // templated to avoid ambiguities for types that are convertible to both
148 // `absl::string_view` and `std::string`, such as `const char*`.
149 template <typename T, EnableIfString<T> = 0>
150 explicit Cord(T&& src);
151 template <typename T, EnableIfString<T> = 0>
152 Cord& operator=(T&& src);
153
154 // Cord::~Cord()
155 //
156 // Destructs the Cord.
157 ~Cord() {
158 if (contents_.is_tree()) DestroyCordSlow();
159 }
160
161 // MakeCordFromExternal()
162 //
163 // Creates a Cord that takes ownership of external string memory. The
164 // contents of `data` are not copied to the Cord; instead, the external
165 // memory is added to the Cord and reference-counted. This data may not be
166 // changed for the life of the Cord, though it may be prepended or appended
167 // to.
168 //
169 // `MakeCordFromExternal()` takes a callable "releaser" that is invoked when
170 // the reference count for `data` reaches zero. As noted above, this data must
171 // remain live until the releaser is invoked. The callable releaser also must:
172 //
173 // * be move constructible
174 // * support `void operator()(absl::string_view) const` or `void operator()`
175 //
176 // Example:
177 //
178 // Cord MakeCord(BlockPool* pool) {
179 // Block* block = pool->NewBlock();
180 // FillBlock(block);
181 // return absl::MakeCordFromExternal(
182 // block->ToStringView(),
183 // [pool, block](absl::string_view v) {
184 // pool->FreeBlock(block, v);
185 // });
186 // }
187 //
188 // WARNING: Because a Cord can be reference-counted, it's likely a bug if your
189 // releaser doesn't do anything. For example, consider the following:
190 //
191 // void Foo(const char* buffer, int len) {
192 // auto c = absl::MakeCordFromExternal(absl::string_view(buffer, len),
193 // [](absl::string_view) {});
194 //
195 // // BUG: If Bar() copies its cord for any reason, including keeping a
196 // // substring of it, the lifetime of buffer might be extended beyond
197 // // when Foo() returns.
198 // Bar(c);
199 // }
200 template <typename Releaser>
201 friend Cord MakeCordFromExternal(absl::string_view data, Releaser&& releaser);
202
203 // Cord::Clear()
204 //
205 // Releases the Cord data. Any nodes that share data with other Cords, if
206 // applicable, will have their reference counts reduced by 1.
207 void Clear();
208
209 // Cord::Append()
210 //
211 // Appends data to the Cord, which may come from another Cord or other string
212 // data.
213 void Append(const Cord& src);
214 void Append(Cord&& src);
215 void Append(absl::string_view src);
216 template <typename T, EnableIfString<T> = 0>
217 void Append(T&& src);
218
219 // Cord::Prepend()
220 //
221 // Prepends data to the Cord, which may come from another Cord or other string
222 // data.
223 void Prepend(const Cord& src);
224 void Prepend(absl::string_view src);
225 template <typename T, EnableIfString<T> = 0>
226 void Prepend(T&& src);
227
228 // Cord::RemovePrefix()
229 //
230 // Removes the first `n` bytes of a Cord.
231 void RemovePrefix(size_t n);
232 void RemoveSuffix(size_t n);
233
234 // Cord::Subcord()
235 //
236 // Returns a new Cord representing the subrange [pos, pos + new_size) of
237 // *this. If pos >= size(), the result is empty(). If
238 // (pos + new_size) >= size(), the result is the subrange [pos, size()).
239 Cord Subcord(size_t pos, size_t new_size) const;
240
241 // Cord::swap()
242 //
243 // Swaps the contents of the Cord with `other`.
244 void swap(Cord& other) noexcept;
245
246 // swap()
247 //
248 // Swaps the contents of two Cords.
249 friend void swap(Cord& x, Cord& y) noexcept {
250 x.swap(y);
251 }
252
253 // Cord::size()
254 //
255 // Returns the size of the Cord.
256 size_t size() const;
257
258 // Cord::empty()
259 //
260 // Determines whether the given Cord is empty, returning `true` is so.
261 bool empty() const;
262
263 // Cord::EstimatedMemoryUsage()
264 //
265 // Returns the *approximate* number of bytes held in full or in part by this
266 // Cord (which may not remain the same between invocations). Note that Cords
267 // that share memory could each be "charged" independently for the same shared
268 // memory.
269 size_t EstimatedMemoryUsage() const;
270
271 // Cord::Compare()
272 //
273 // Compares 'this' Cord with rhs. This function and its relatives treat Cords
274 // as sequences of unsigned bytes. The comparison is a straightforward
275 // lexicographic comparison. `Cord::Compare()` returns values as follows:
276 //
277 // -1 'this' Cord is smaller
278 // 0 two Cords are equal
279 // 1 'this' Cord is larger
280 int Compare(absl::string_view rhs) const;
281 int Compare(const Cord& rhs) const;
282
283 // Cord::StartsWith()
284 //
285 // Determines whether the Cord starts with the passed string data `rhs`.
286 bool StartsWith(const Cord& rhs) const;
287 bool StartsWith(absl::string_view rhs) const;
288
289 // Cord::EndsWidth()
290 //
291 // Determines whether the Cord ends with the passed string data `rhs`.
292 bool EndsWith(absl::string_view rhs) const;
293 bool EndsWith(const Cord& rhs) const;
294
295 // Cord::operator std::string()
296 //
297 // Converts a Cord into a `std::string()`. This operator is marked explicit to
298 // prevent unintended Cord usage in functions that take a string.
299 explicit operator std::string() const;
300
301 // CopyCordToString()
302 //
303 // Copies the contents of a `src` Cord into a `*dst` string.
304 //
305 // This function optimizes the case of reusing the destination string since it
306 // can reuse previously allocated capacity. However, this function does not
307 // guarantee that pointers previously returned by `dst->data()` remain valid
308 // even if `*dst` had enough capacity to hold `src`. If `*dst` is a new
309 // object, prefer to simply use the conversion operator to `std::string`.
310 friend void CopyCordToString(const Cord& src, std::string* dst);
311
312 class CharIterator;
313
314 //----------------------------------------------------------------------------
315 // Cord::ChunkIterator
316 //----------------------------------------------------------------------------
317 //
318 // A `Cord::ChunkIterator` allows iteration over the constituent chunks of its
319 // Cord. Such iteration allows you to perform non-const operatons on the data
320 // of a Cord without modifying it.
321 //
322 // Generally, you do not instantiate a `Cord::ChunkIterator` directly;
323 // instead, you create one implicitly through use of the `Cord::Chunks()`
324 // member function.
325 //
326 // The `Cord::ChunkIterator` has the following properties:
327 //
328 // * The iterator is invalidated after any non-const operation on the
329 // Cord object over which it iterates.
330 // * The `string_view` returned by dereferencing a valid, non-`end()`
331 // iterator is guaranteed to be non-empty.
332 // * Two `ChunkIterator` objects can be compared equal if and only if they
333 // remain valid and iterate over the same Cord.
334 // * The iterator in this case is a proxy iterator; the `string_view`
335 // returned by the iterator does not live inside the Cord, and its
336 // lifetime is limited to the lifetime of the iterator itself. To help
337 // prevent lifetime issues, `ChunkIterator::reference` is not a true
338 // reference type and is equivalent to `value_type`.
339 // * The iterator keeps state that can grow for Cords that contain many
340 // nodes and are imbalanced due to sharing. Prefer to pass this type by
341 // const reference instead of by value.
342 class ChunkIterator {
343 public:
344 using iterator_category = std::input_iterator_tag;
345 using value_type = absl::string_view;
346 using difference_type = ptrdiff_t;
347 using pointer = const value_type*;
348 using reference = value_type;
349
350 ChunkIterator() = default;
351
352 ChunkIterator& operator++();
353 ChunkIterator operator++(int);
354 bool operator==(const ChunkIterator& other) const;
355 bool operator!=(const ChunkIterator& other) const;
356 reference operator*() const;
357 pointer operator->() const;
358
359 friend class Cord;
360 friend class CharIterator;
361
362 private:
363 // Constructs a `begin()` iterator from `cord`.
364 explicit ChunkIterator(const Cord* cord);
365
366 // Removes `n` bytes from `current_chunk_`. Expects `n` to be smaller than
367 // `current_chunk_.size()`.
368 void RemoveChunkPrefix(size_t n);
369 Cord AdvanceAndReadBytes(size_t n);
370 void AdvanceBytes(size_t n);
371 // Iterates `n` bytes, where `n` is expected to be greater than or equal to
372 // `current_chunk_.size()`.
373 void AdvanceBytesSlowPath(size_t n);
374
375 // A view into bytes of the current `CordRep`. It may only be a view to a
376 // suffix of bytes if this is being used by `CharIterator`.
377 absl::string_view current_chunk_;
378 // The current leaf, or `nullptr` if the iterator points to short data.
379 // If the current chunk is a substring node, current_leaf_ points to the
380 // underlying flat or external node.
381 absl::cord_internal::CordRep* current_leaf_ = nullptr;
382 // The number of bytes left in the `Cord` over which we are iterating.
383 size_t bytes_remaining_ = 0;
384 absl::InlinedVector<absl::cord_internal::CordRep*, 4>
385 stack_of_right_children_;
386 };
387
388 // Cord::ChunkIterator::chunk_begin()
389 //
390 // Returns an iterator to the first chunk of the `Cord`.
391 //
392 // Generally, prefer using `Cord::Chunks()` within a range-based for loop for
393 // iterating over the chunks of a Cord. This method may be useful for getting
394 // a `ChunkIterator` where range-based for-loops are not useful.
395 //
396 // Example:
397 //
398 // absl::Cord::ChunkIterator FindAsChunk(const absl::Cord& c,
399 // absl::string_view s) {
400 // return std::find(c.chunk_begin(), c.chunk_end(), s);
401 // }
402 ChunkIterator chunk_begin() const;
403
404 // Cord::ChunkItertator::chunk_end()
405 //
406 // Returns an iterator one increment past the last chunk of the `Cord`.
407 //
408 // Generally, prefer using `Cord::Chunks()` within a range-based for loop for
409 // iterating over the chunks of a Cord. This method may be useful for getting
410 // a `ChunkIterator` where range-based for-loops may not be available.
411 ChunkIterator chunk_end() const;
412
413 //----------------------------------------------------------------------------
414 // Cord::ChunkIterator::ChunkRange
415 //----------------------------------------------------------------------------
416 //
417 // `ChunkRange` is a helper class for iterating over the chunks of the `Cord`,
418 // producing an iterator which can be used within a range-based for loop.
419 // Construction of a `ChunkRange` will return an iterator pointing to the
420 // first chunk of the Cord. Generally, do not construct a `ChunkRange`
421 // directly; instead, prefer to use the `Cord::Chunks()` method.
422 //
423 // Implementation note: `ChunkRange` is simply a convenience wrapper over
424 // `Cord::chunk_begin()` and `Cord::chunk_end()`.
425 class ChunkRange {
426 public:
427 explicit ChunkRange(const Cord* cord) : cord_(cord) {}
428
429 ChunkIterator begin() const;
430 ChunkIterator end() const;
431
432 private:
433 const Cord* cord_;
434 };
435
436 // Cord::Chunks()
437 //
438 // Returns a `Cord::ChunkIterator::ChunkRange` for iterating over the chunks
439 // of a `Cord` with a range-based for-loop. For most iteration tasks on a
440 // Cord, use `Cord::Chunks()` to retrieve this iterator.
441 //
442 // Example:
443 //
444 // void ProcessChunks(const Cord& cord) {
445 // for (absl::string_view chunk : cord.Chunks()) { ... }
446 // }
447 //
448 // Note that the ordinary caveats of temporary lifetime extension apply:
449 //
450 // void Process() {
451 // for (absl::string_view chunk : CordFactory().Chunks()) {
452 // // The temporary Cord returned by CordFactory has been destroyed!
453 // }
454 // }
455 ChunkRange Chunks() const;
456
457 //----------------------------------------------------------------------------
458 // Cord::CharIterator
459 //----------------------------------------------------------------------------
460 //
461 // A `Cord::CharIterator` allows iteration over the constituent characters of
462 // a `Cord`.
463 //
464 // Generally, you do not instantiate a `Cord::CharIterator` directly; instead,
465 // you create one implicitly through use of the `Cord::Chars()` member
466 // function.
467 //
468 // A `Cord::CharIterator` has the following properties:
469 //
470 // * The iterator is invalidated after any non-const operation on the
471 // Cord object over which it iterates.
472 // * Two `CharIterator` objects can be compared equal if and only if they
473 // remain valid and iterate over the same Cord.
474 // * The iterator keeps state that can grow for Cords that contain many
475 // nodes and are imbalanced due to sharing. Prefer to pass this type by
476 // const reference instead of by value.
477 // * This type cannot act as a forward iterator because a `Cord` can reuse
478 // sections of memory. This fact violates the requirement for forward
479 // iterators to compare equal if dereferencing them returns the same
480 // object.
481 class CharIterator {
482 public:
483 using iterator_category = std::input_iterator_tag;
484 using value_type = char;
485 using difference_type = ptrdiff_t;
486 using pointer = const char*;
487 using reference = const char&;
488
489 CharIterator() = default;
490
491 CharIterator& operator++();
492 CharIterator operator++(int);
493 bool operator==(const CharIterator& other) const;
494 bool operator!=(const CharIterator& other) const;
495 reference operator*() const;
496 pointer operator->() const;
497
498 friend Cord;
499
500 private:
501 explicit CharIterator(const Cord* cord) : chunk_iterator_(cord) {}
502
503 ChunkIterator chunk_iterator_;
504 };
505
506 // Cord::CharIterator::AdvanceAndRead()
507 //
508 // Advances the `Cord::CharIterator` by `n_bytes` and returns the bytes
509 // advanced as a separate `Cord`. `n_bytes` must be less than or equal to the
510 // number of bytes within the Cord; otherwise, behavior is undefined. It is
511 // valid to pass `char_end()` and `0`.
512 static Cord AdvanceAndRead(CharIterator* it, size_t n_bytes);
513
514 // Cord::CharIterator::Advance()
515 //
516 // Advances the `Cord::CharIterator` by `n_bytes`. `n_bytes` must be less than
517 // or equal to the number of bytes remaining within the Cord; otherwise,
518 // behavior is undefined. It is valid to pass `char_end()` and `0`.
519 static void Advance(CharIterator* it, size_t n_bytes);
520
521 // Cord::CharIterator::ChunkRemaining()
522 //
523 // Returns the longest contiguous view starting at the iterator's position.
524 //
525 // `it` must be dereferenceable.
526 static absl::string_view ChunkRemaining(const CharIterator& it);
527
528 // Cord::CharIterator::char_begin()
529 //
530 // Returns an iterator to the first character of the `Cord`.
531 //
532 // Generally, prefer using `Cord::Chars()` within a range-based for loop for
533 // iterating over the chunks of a Cord. This method may be useful for getting
534 // a `CharIterator` where range-based for-loops may not be available.
535 CharIterator char_begin() const;
536
537 // Cord::CharIterator::char_end()
538 //
539 // Returns an iterator to one past the last character of the `Cord`.
540 //
541 // Generally, prefer using `Cord::Chars()` within a range-based for loop for
542 // iterating over the chunks of a Cord. This method may be useful for getting
543 // a `CharIterator` where range-based for-loops are not useful.
544 CharIterator char_end() const;
545
546 // Cord::CharIterator::CharRange
547 //
548 // `CharRange` is a helper class for iterating over the characters of a
549 // producing an iterator which can be used within a range-based for loop.
550 // Construction of a `CharRange` will return an iterator pointing to the first
551 // character of the Cord. Generally, do not construct a `CharRange` directly;
552 // instead, prefer to use the `Cord::Chars()` method show below.
553 //
554 // Implementation note: `CharRange` is simply a convenience wrapper over
555 // `Cord::char_begin()` and `Cord::char_end()`.
556 class CharRange {
557 public:
558 explicit CharRange(const Cord* cord) : cord_(cord) {}
559
560 CharIterator begin() const;
561 CharIterator end() const;
562
563 private:
564 const Cord* cord_;
565 };
566
567 // Cord::CharIterator::Chars()
568 //
569 // Returns a `Cord::CharIterator` for iterating over the characters of a
570 // `Cord` with a range-based for-loop. For most character-based iteration
571 // tasks on a Cord, use `Cord::Chars()` to retrieve this iterator.
572 //
573 // Example:
574 //
575 // void ProcessCord(const Cord& cord) {
576 // for (char c : cord.Chars()) { ... }
577 // }
578 //
579 // Note that the ordinary caveats of temporary lifetime extension apply:
580 //
581 // void Process() {
582 // for (char c : CordFactory().Chars()) {
583 // // The temporary Cord returned by CordFactory has been destroyed!
584 // }
585 // }
586 CharRange Chars() const;
587
588 // Cord::operator[]
589 //
590 // Gets the "i"th character of the Cord and returns it, provided that
591 // 0 <= i < Cord.size().
592 //
593 // NOTE: This routine is reasonably efficient. It is roughly
594 // logarithmic based on the number of chunks that make up the cord. Still,
595 // if you need to iterate over the contents of a cord, you should
596 // use a CharIterator/ChunkIterator rather than call operator[] or Get()
597 // repeatedly in a loop.
598 char operator[](size_t i) const;
599
600 // Cord::TryFlat()
601 //
602 // If this cord's representation is a single flat array, returns a
603 // string_view referencing that array. Otherwise returns nullopt.
604 absl::optional<absl::string_view> TryFlat() const;
605
606 // Cord::Flatten()
607 //
608 // Flattens the cord into a single array and returns a view of the data.
609 //
610 // If the cord was already flat, the contents are not modified.
611 absl::string_view Flatten();
612
613 // Supports absl::Cord as a sink object for absl::Format().
614 friend void AbslFormatFlush(absl::Cord* cord, absl::string_view part) {
615 cord->Append(part);
616 }
617
618 template <typename H>
619 friend H AbslHashValue(H hash_state, const absl::Cord& c) {
620 absl::optional<absl::string_view> maybe_flat = c.TryFlat();
621 if (maybe_flat.has_value()) {
622 return H::combine(std::move(hash_state), *maybe_flat);
623 }
624 return c.HashFragmented(std::move(hash_state));
625 }
626
627 private:
628 friend class CordTestPeer;
629 friend bool operator==(const Cord& lhs, const Cord& rhs);
630 friend bool operator==(const Cord& lhs, absl::string_view rhs);
631
632 // Calls the provided function once for each cord chunk, in order. Unlike
633 // Chunks(), this API will not allocate memory.
634 void ForEachChunk(absl::FunctionRef<void(absl::string_view)>) const;
635
636 // Allocates new contiguous storage for the contents of the cord. This is
637 // called by Flatten() when the cord was not already flat.
638 absl::string_view FlattenSlowPath();
639
640 // Actual cord contents are hidden inside the following simple
641 // class so that we can isolate the bulk of cord.cc from changes
642 // to the representation.
643 //
644 // InlineRep holds either a tree pointer, or an array of kMaxInline bytes.
645 class InlineRep {
646 public:
647 static constexpr unsigned char kMaxInline = 15;
648 static_assert(kMaxInline >= sizeof(absl::cord_internal::CordRep*), "");
649 // Tag byte & kMaxInline means we are storing a pointer.
650 static constexpr unsigned char kTreeFlag = 1 << 4;
651 // Tag byte & kProfiledFlag means we are profiling the Cord.
652 static constexpr unsigned char kProfiledFlag = 1 << 5;
653
654 constexpr InlineRep() : data_{} {}
655 InlineRep(const InlineRep& src);
656 InlineRep(InlineRep&& src);
657 InlineRep& operator=(const InlineRep& src);
658 InlineRep& operator=(InlineRep&& src) noexcept;
659
660 void Swap(InlineRep* rhs);
661 bool empty() const;
662 size_t size() const;
663 const char* data() const; // Returns nullptr if holding pointer
664 void set_data(const char* data, size_t n,
665 bool nullify_tail); // Discards pointer, if any
666 char* set_data(size_t n); // Write data to the result
667 // Returns nullptr if holding bytes
668 absl::cord_internal::CordRep* tree() const;
669 // Discards old pointer, if any
670 void set_tree(absl::cord_internal::CordRep* rep);
671 // Replaces a tree with a new root. This is faster than set_tree, but it
672 // should only be used when it's clear that the old rep was a tree.
673 void replace_tree(absl::cord_internal::CordRep* rep);
674 // Returns non-null iff was holding a pointer
675 absl::cord_internal::CordRep* clear();
676 // Converts to pointer if necessary.
677 absl::cord_internal::CordRep* force_tree(size_t extra_hint);
678 void reduce_size(size_t n); // REQUIRES: holding data
679 void remove_prefix(size_t n); // REQUIRES: holding data
680 void AppendArray(const char* src_data, size_t src_size);
681 absl::string_view FindFlatStartPiece() const;
682 void AppendTree(absl::cord_internal::CordRep* tree);
683 void PrependTree(absl::cord_internal::CordRep* tree);
684 void GetAppendRegion(char** region, size_t* size, size_t max_length);
685 void GetAppendRegion(char** region, size_t* size);
686 bool IsSame(const InlineRep& other) const {
687 return memcmp(data_, other.data_, sizeof(data_)) == 0;
688 }
689 int BitwiseCompare(const InlineRep& other) const {
690 uint64_t x, y;
691 // Use memcpy to avoid anti-aliasing issues.
692 memcpy(&x, data_, sizeof(x));
693 memcpy(&y, other.data_, sizeof(y));
694 if (x == y) {
695 memcpy(&x, data_ + 8, sizeof(x));
696 memcpy(&y, other.data_ + 8, sizeof(y));
697 if (x == y) return 0;
698 }
699 return absl::big_endian::FromHost64(x) < absl::big_endian::FromHost64(y)
700 ? -1
701 : 1;
702 }
703 void CopyTo(std::string* dst) const {
704 // memcpy is much faster when operating on a known size. On most supported
705 // platforms, the small string optimization is large enough that resizing
706 // to 15 bytes does not cause a memory allocation.
707 absl::strings_internal::STLStringResizeUninitialized(dst,
708 sizeof(data_) - 1);
709 memcpy(&(*dst)[0], data_, sizeof(data_) - 1);
710 // erase is faster than resize because the logic for memory allocation is
711 // not needed.
712 dst->erase(data_[kMaxInline]);
713 }
714
715 // Copies the inline contents into `dst`. Assumes the cord is not empty.
716 void CopyToArray(char* dst) const;
717
718 bool is_tree() const { return data_[kMaxInline] > kMaxInline; }
719
720 private:
721 friend class Cord;
722
723 void AssignSlow(const InlineRep& src);
724 // Unrefs the tree, stops profiling, and zeroes the contents
725 void ClearSlow();
726
727 // If the data has length <= kMaxInline, we store it in data_[0..len-1],
728 // and store the length in data_[kMaxInline]. Else we store it in a tree
729 // and store a pointer to that tree in data_[0..sizeof(CordRep*)-1].
730 alignas(absl::cord_internal::CordRep*) char data_[kMaxInline + 1];
731 };
732 InlineRep contents_;
733
734 // Helper for MemoryUsage().
735 static size_t MemoryUsageAux(const absl::cord_internal::CordRep* rep);
736
737 // Helper for GetFlat() and TryFlat().
738 static bool GetFlatAux(absl::cord_internal::CordRep* rep,
739 absl::string_view* fragment);
740
741 // Helper for ForEachChunk().
742 static void ForEachChunkAux(
743 absl::cord_internal::CordRep* rep,
744 absl::FunctionRef<void(absl::string_view)> callback);
745
746 // The destructor for non-empty Cords.
747 void DestroyCordSlow();
748
749 // Out-of-line implementation of slower parts of logic.
750 void CopyToArraySlowPath(char* dst) const;
751 int CompareSlowPath(absl::string_view rhs, size_t compared_size,
752 size_t size_to_compare) const;
753 int CompareSlowPath(const Cord& rhs, size_t compared_size,
754 size_t size_to_compare) const;
755 bool EqualsImpl(absl::string_view rhs, size_t size_to_compare) const;
756 bool EqualsImpl(const Cord& rhs, size_t size_to_compare) const;
757 int CompareImpl(const Cord& rhs) const;
758
759 template <typename ResultType, typename RHS>
760 friend ResultType GenericCompare(const Cord& lhs, const RHS& rhs,
761 size_t size_to_compare);
762 static absl::string_view GetFirstChunk(const Cord& c);
763 static absl::string_view GetFirstChunk(absl::string_view sv);
764
765 // Returns a new reference to contents_.tree(), or steals an existing
766 // reference if called on an rvalue.
767 absl::cord_internal::CordRep* TakeRep() const&;
768 absl::cord_internal::CordRep* TakeRep() &&;
769
770 // Helper for Append().
771 template <typename C>
772 void AppendImpl(C&& src);
773
774 // Helper for AbslHashValue().
775 template <typename H>
776 H HashFragmented(H hash_state) const {
777 typename H::AbslInternalPiecewiseCombiner combiner;
778 ForEachChunk([&combiner, &hash_state](absl::string_view chunk) {
779 hash_state = combiner.add_buffer(std::move(hash_state), chunk.data(),
780 chunk.size());
781 });
782 return H::combine(combiner.finalize(std::move(hash_state)), size());
783 }
784};
785
786ABSL_NAMESPACE_END
787} // namespace absl
788
789namespace absl {
790ABSL_NAMESPACE_BEGIN
791
792// allow a Cord to be logged
793extern std::ostream& operator<<(std::ostream& out, const Cord& cord);
794
795// ------------------------------------------------------------------
796// Internal details follow. Clients should ignore.
797
798namespace cord_internal {
799
800// Fast implementation of memmove for up to 15 bytes. This implementation is
801// safe for overlapping regions. If nullify_tail is true, the destination is
802// padded with '\0' up to 16 bytes.
803inline void SmallMemmove(char* dst, const char* src, size_t n,
804 bool nullify_tail = false) {
805 if (n >= 8) {
806 assert(n <= 16);
807 uint64_t buf1;
808 uint64_t buf2;
809 memcpy(&buf1, src, 8);
810 memcpy(&buf2, src + n - 8, 8);
811 if (nullify_tail) {
812 memset(dst + 8, 0, 8);
813 }
814 memcpy(dst, &buf1, 8);
815 memcpy(dst + n - 8, &buf2, 8);
816 } else if (n >= 4) {
817 uint32_t buf1;
818 uint32_t buf2;
819 memcpy(&buf1, src, 4);
820 memcpy(&buf2, src + n - 4, 4);
821 if (nullify_tail) {
822 memset(dst + 4, 0, 4);
823 memset(dst + 8, 0, 8);
824 }
825 memcpy(dst, &buf1, 4);
826 memcpy(dst + n - 4, &buf2, 4);
827 } else {
828 if (n != 0) {
829 dst[0] = src[0];
830 dst[n / 2] = src[n / 2];
831 dst[n - 1] = src[n - 1];
832 }
833 if (nullify_tail) {
834 memset(dst + 8, 0, 8);
835 memset(dst + n, 0, 8);
836 }
837 }
838}
839
840// Does non-template-specific `CordRepExternal` initialization.
841// Expects `data` to be non-empty.
842void InitializeCordRepExternal(absl::string_view data, CordRepExternal* rep);
843
844// Creates a new `CordRep` that owns `data` and `releaser` and returns a pointer
845// to it, or `nullptr` if `data` was empty.
846template <typename Releaser>
847// NOLINTNEXTLINE - suppress clang-tidy raw pointer return.
848CordRep* NewExternalRep(absl::string_view data, Releaser&& releaser) {
849 using ReleaserType = absl::decay_t<Releaser>;
850 if (data.empty()) {
851 // Never create empty external nodes.
852 InvokeReleaser(Rank0{}, ReleaserType(std::forward<Releaser>(releaser)),
853 data);
854 return nullptr;
855 }
856
857 CordRepExternal* rep = new CordRepExternalImpl<ReleaserType>(
858 std::forward<Releaser>(releaser), 0);
859 InitializeCordRepExternal(data, rep);
860 return rep;
861}
862
863// Overload for function reference types that dispatches using a function
864// pointer because there are no `alignof()` or `sizeof()` a function reference.
865// NOLINTNEXTLINE - suppress clang-tidy raw pointer return.
866inline CordRep* NewExternalRep(absl::string_view data,
867 void (&releaser)(absl::string_view)) {
868 return NewExternalRep(data, &releaser);
869}
870
871} // namespace cord_internal
872
873template <typename Releaser>
874Cord MakeCordFromExternal(absl::string_view data, Releaser&& releaser) {
875 Cord cord;
876 cord.contents_.set_tree(::absl::cord_internal::NewExternalRep(
877 data, std::forward<Releaser>(releaser)));
878 return cord;
879}
880
881inline Cord::InlineRep::InlineRep(const Cord::InlineRep& src) {
882 cord_internal::SmallMemmove(data_, src.data_, sizeof(data_));
883}
884
885inline Cord::InlineRep::InlineRep(Cord::InlineRep&& src) {
886 memcpy(data_, src.data_, sizeof(data_));
887 memset(src.data_, 0, sizeof(data_));
888}
889
890inline Cord::InlineRep& Cord::InlineRep::operator=(const Cord::InlineRep& src) {
891 if (this == &src) {
892 return *this;
893 }
894 if (!is_tree() && !src.is_tree()) {
895 cord_internal::SmallMemmove(data_, src.data_, sizeof(data_));
896 return *this;
897 }
898 AssignSlow(src);
899 return *this;
900}
901
902inline Cord::InlineRep& Cord::InlineRep::operator=(
903 Cord::InlineRep&& src) noexcept {
904 if (is_tree()) {
905 ClearSlow();
906 }
907 memcpy(data_, src.data_, sizeof(data_));
908 memset(src.data_, 0, sizeof(data_));
909 return *this;
910}
911
912inline void Cord::InlineRep::Swap(Cord::InlineRep* rhs) {
913 if (rhs == this) {
914 return;
915 }
916
917 Cord::InlineRep tmp;
918 cord_internal::SmallMemmove(tmp.data_, data_, sizeof(data_));
919 cord_internal::SmallMemmove(data_, rhs->data_, sizeof(data_));
920 cord_internal::SmallMemmove(rhs->data_, tmp.data_, sizeof(data_));
921}
922
923inline const char* Cord::InlineRep::data() const {
924 return is_tree() ? nullptr : data_;
925}
926
927inline absl::cord_internal::CordRep* Cord::InlineRep::tree() const {
928 if (is_tree()) {
929 absl::cord_internal::CordRep* rep;
930 memcpy(&rep, data_, sizeof(rep));
931 return rep;
932 } else {
933 return nullptr;
934 }
935}
936
937inline bool Cord::InlineRep::empty() const { return data_[kMaxInline] == 0; }
938
939inline size_t Cord::InlineRep::size() const {
940 const char tag = data_[kMaxInline];
941 if (tag <= kMaxInline) return tag;
942 return static_cast<size_t>(tree()->length);
943}
944
945inline void Cord::InlineRep::set_tree(absl::cord_internal::CordRep* rep) {
946 if (rep == nullptr) {
947 memset(data_, 0, sizeof(data_));
948 } else {
949 bool was_tree = is_tree();
950 memcpy(data_, &rep, sizeof(rep));
951 memset(data_ + sizeof(rep), 0, sizeof(data_) - sizeof(rep) - 1);
952 if (!was_tree) {
953 data_[kMaxInline] = kTreeFlag;
954 }
955 }
956}
957
958inline void Cord::InlineRep::replace_tree(absl::cord_internal::CordRep* rep) {
959 ABSL_ASSERT(is_tree());
960 if (ABSL_PREDICT_FALSE(rep == nullptr)) {
961 set_tree(rep);
962 return;
963 }
964 memcpy(data_, &rep, sizeof(rep));
965 memset(data_ + sizeof(rep), 0, sizeof(data_) - sizeof(rep) - 1);
966}
967
968inline absl::cord_internal::CordRep* Cord::InlineRep::clear() {
969 const char tag = data_[kMaxInline];
970 absl::cord_internal::CordRep* result = nullptr;
971 if (tag > kMaxInline) {
972 memcpy(&result, data_, sizeof(result));
973 }
974 memset(data_, 0, sizeof(data_)); // Clear the cord
975 return result;
976}
977
978inline void Cord::InlineRep::CopyToArray(char* dst) const {
979 assert(!is_tree());
980 size_t n = data_[kMaxInline];
981 assert(n != 0);
982 cord_internal::SmallMemmove(dst, data_, n);
983}
984
985constexpr inline Cord::Cord() noexcept {}
986
987inline Cord& Cord::operator=(const Cord& x) {
988 contents_ = x.contents_;
989 return *this;
990}
991
992inline Cord::Cord(Cord&& src) noexcept : contents_(std::move(src.contents_)) {}
993
994inline void Cord::swap(Cord& other) noexcept {
995 contents_.Swap(&other.contents_);
996}
997
998inline Cord& Cord::operator=(Cord&& x) noexcept {
999 contents_ = std::move(x.contents_);
1000 return *this;
1001}
1002
1003extern template Cord::Cord(std::string&& src);
1004extern template Cord& Cord::operator=(std::string&& src);
1005
1006inline size_t Cord::size() const {
1007 // Length is 1st field in str.rep_
1008 return contents_.size();
1009}
1010
1011inline bool Cord::empty() const { return contents_.empty(); }
1012
1013inline size_t Cord::EstimatedMemoryUsage() const {
1014 size_t result = sizeof(Cord);
1015 if (const absl::cord_internal::CordRep* rep = contents_.tree()) {
1016 result += MemoryUsageAux(rep);
1017 }
1018 return result;
1019}
1020
1021inline absl::optional<absl::string_view> Cord::TryFlat() const {
1022 absl::cord_internal::CordRep* rep = contents_.tree();
1023 if (rep == nullptr) {
1024 return absl::string_view(contents_.data(), contents_.size());
1025 }
1026 absl::string_view fragment;
1027 if (GetFlatAux(rep, &fragment)) {
1028 return fragment;
1029 }
1030 return absl::nullopt;
1031}
1032
1033inline absl::string_view Cord::Flatten() {
1034 absl::cord_internal::CordRep* rep = contents_.tree();
1035 if (rep == nullptr) {
1036 return absl::string_view(contents_.data(), contents_.size());
1037 } else {
1038 absl::string_view already_flat_contents;
1039 if (GetFlatAux(rep, &already_flat_contents)) {
1040 return already_flat_contents;
1041 }
1042 }
1043 return FlattenSlowPath();
1044}
1045
1046inline void Cord::Append(absl::string_view src) {
1047 contents_.AppendArray(src.data(), src.size());
1048}
1049
1050extern template void Cord::Append(std::string&& src);
1051extern template void Cord::Prepend(std::string&& src);
1052
1053inline int Cord::Compare(const Cord& rhs) const {
1054 if (!contents_.is_tree() && !rhs.contents_.is_tree()) {
1055 return contents_.BitwiseCompare(rhs.contents_);
1056 }
1057
1058 return CompareImpl(rhs);
1059}
1060
1061// Does 'this' cord start/end with rhs
1062inline bool Cord::StartsWith(const Cord& rhs) const {
1063 if (contents_.IsSame(rhs.contents_)) return true;
1064 size_t rhs_size = rhs.size();
1065 if (size() < rhs_size) return false;
1066 return EqualsImpl(rhs, rhs_size);
1067}
1068
1069inline bool Cord::StartsWith(absl::string_view rhs) const {
1070 size_t rhs_size = rhs.size();
1071 if (size() < rhs_size) return false;
1072 return EqualsImpl(rhs, rhs_size);
1073}
1074
1075inline Cord::ChunkIterator::ChunkIterator(const Cord* cord)
1076 : bytes_remaining_(cord->size()) {
1077 if (cord->empty()) return;
1078 if (cord->contents_.is_tree()) {
1079 stack_of_right_children_.push_back(cord->contents_.tree());
1080 operator++();
1081 } else {
1082 current_chunk_ = absl::string_view(cord->contents_.data(), cord->size());
1083 }
1084}
1085
1086inline Cord::ChunkIterator Cord::ChunkIterator::operator++(int) {
1087 ChunkIterator tmp(*this);
1088 operator++();
1089 return tmp;
1090}
1091
1092inline bool Cord::ChunkIterator::operator==(const ChunkIterator& other) const {
1093 return bytes_remaining_ == other.bytes_remaining_;
1094}
1095
1096inline bool Cord::ChunkIterator::operator!=(const ChunkIterator& other) const {
1097 return !(*this == other);
1098}
1099
1100inline Cord::ChunkIterator::reference Cord::ChunkIterator::operator*() const {
1101 ABSL_HARDENING_ASSERT(bytes_remaining_ != 0);
1102 return current_chunk_;
1103}
1104
1105inline Cord::ChunkIterator::pointer Cord::ChunkIterator::operator->() const {
1106 ABSL_HARDENING_ASSERT(bytes_remaining_ != 0);
1107 return &current_chunk_;
1108}
1109
1110inline void Cord::ChunkIterator::RemoveChunkPrefix(size_t n) {
1111 assert(n < current_chunk_.size());
1112 current_chunk_.remove_prefix(n);
1113 bytes_remaining_ -= n;
1114}
1115
1116inline void Cord::ChunkIterator::AdvanceBytes(size_t n) {
1117 if (ABSL_PREDICT_TRUE(n < current_chunk_.size())) {
1118 RemoveChunkPrefix(n);
1119 } else if (n != 0) {
1120 AdvanceBytesSlowPath(n);
1121 }
1122}
1123
1124inline Cord::ChunkIterator Cord::chunk_begin() const {
1125 return ChunkIterator(this);
1126}
1127
1128inline Cord::ChunkIterator Cord::chunk_end() const { return ChunkIterator(); }
1129
1130inline Cord::ChunkIterator Cord::ChunkRange::begin() const {
1131 return cord_->chunk_begin();
1132}
1133
1134inline Cord::ChunkIterator Cord::ChunkRange::end() const {
1135 return cord_->chunk_end();
1136}
1137
1138inline Cord::ChunkRange Cord::Chunks() const { return ChunkRange(this); }
1139
1140inline Cord::CharIterator& Cord::CharIterator::operator++() {
1141 if (ABSL_PREDICT_TRUE(chunk_iterator_->size() > 1)) {
1142 chunk_iterator_.RemoveChunkPrefix(1);
1143 } else {
1144 ++chunk_iterator_;
1145 }
1146 return *this;
1147}
1148
1149inline Cord::CharIterator Cord::CharIterator::operator++(int) {
1150 CharIterator tmp(*this);
1151 operator++();
1152 return tmp;
1153}
1154
1155inline bool Cord::CharIterator::operator==(const CharIterator& other) const {
1156 return chunk_iterator_ == other.chunk_iterator_;
1157}
1158
1159inline bool Cord::CharIterator::operator!=(const CharIterator& other) const {
1160 return !(*this == other);
1161}
1162
1163inline Cord::CharIterator::reference Cord::CharIterator::operator*() const {
1164 return *chunk_iterator_->data();
1165}
1166
1167inline Cord::CharIterator::pointer Cord::CharIterator::operator->() const {
1168 return chunk_iterator_->data();
1169}
1170
1171inline Cord Cord::AdvanceAndRead(CharIterator* it, size_t n_bytes) {
1172 assert(it != nullptr);
1173 return it->chunk_iterator_.AdvanceAndReadBytes(n_bytes);
1174}
1175
1176inline void Cord::Advance(CharIterator* it, size_t n_bytes) {
1177 assert(it != nullptr);
1178 it->chunk_iterator_.AdvanceBytes(n_bytes);
1179}
1180
1181inline absl::string_view Cord::ChunkRemaining(const CharIterator& it) {
1182 return *it.chunk_iterator_;
1183}
1184
1185inline Cord::CharIterator Cord::char_begin() const {
1186 return CharIterator(this);
1187}
1188
1189inline Cord::CharIterator Cord::char_end() const { return CharIterator(); }
1190
1191inline Cord::CharIterator Cord::CharRange::begin() const {
1192 return cord_->char_begin();
1193}
1194
1195inline Cord::CharIterator Cord::CharRange::end() const {
1196 return cord_->char_end();
1197}
1198
1199inline Cord::CharRange Cord::Chars() const { return CharRange(this); }
1200
1201inline void Cord::ForEachChunk(
1202 absl::FunctionRef<void(absl::string_view)> callback) const {
1203 absl::cord_internal::CordRep* rep = contents_.tree();
1204 if (rep == nullptr) {
1205 callback(absl::string_view(contents_.data(), contents_.size()));
1206 } else {
1207 return ForEachChunkAux(rep, callback);
1208 }
1209}
1210
1211// Nonmember Cord-to-Cord relational operarators.
1212inline bool operator==(const Cord& lhs, const Cord& rhs) {
1213 if (lhs.contents_.IsSame(rhs.contents_)) return true;
1214 size_t rhs_size = rhs.size();
1215 if (lhs.size() != rhs_size) return false;
1216 return lhs.EqualsImpl(rhs, rhs_size);
1217}
1218
1219inline bool operator!=(const Cord& x, const Cord& y) { return !(x == y); }
1220inline bool operator<(const Cord& x, const Cord& y) {
1221 return x.Compare(y) < 0;
1222}
1223inline bool operator>(const Cord& x, const Cord& y) {
1224 return x.Compare(y) > 0;
1225}
1226inline bool operator<=(const Cord& x, const Cord& y) {
1227 return x.Compare(y) <= 0;
1228}
1229inline bool operator>=(const Cord& x, const Cord& y) {
1230 return x.Compare(y) >= 0;
1231}
1232
1233// Nonmember Cord-to-absl::string_view relational operators.
1234//
1235// Due to implicit conversions, these also enable comparisons of Cord with
1236// with std::string, ::string, and const char*.
1237inline bool operator==(const Cord& lhs, absl::string_view rhs) {
1238 size_t lhs_size = lhs.size();
1239 size_t rhs_size = rhs.size();
1240 if (lhs_size != rhs_size) return false;
1241 return lhs.EqualsImpl(rhs, rhs_size);
1242}
1243
1244inline bool operator==(absl::string_view x, const Cord& y) { return y == x; }
1245inline bool operator!=(const Cord& x, absl::string_view y) { return !(x == y); }
1246inline bool operator!=(absl::string_view x, const Cord& y) { return !(x == y); }
1247inline bool operator<(const Cord& x, absl::string_view y) {
1248 return x.Compare(y) < 0;
1249}
1250inline bool operator<(absl::string_view x, const Cord& y) {
1251 return y.Compare(x) > 0;
1252}
1253inline bool operator>(const Cord& x, absl::string_view y) { return y < x; }
1254inline bool operator>(absl::string_view x, const Cord& y) { return y < x; }
1255inline bool operator<=(const Cord& x, absl::string_view y) { return !(y < x); }
1256inline bool operator<=(absl::string_view x, const Cord& y) { return !(y < x); }
1257inline bool operator>=(const Cord& x, absl::string_view y) { return !(x < y); }
1258inline bool operator>=(absl::string_view x, const Cord& y) { return !(x < y); }
1259
1260// Some internals exposed to test code.
1261namespace strings_internal {
1262class CordTestAccess {
1263 public:
1264 static size_t FlatOverhead();
1265 static size_t MaxFlatLength();
1266 static size_t SizeofCordRepConcat();
1267 static size_t SizeofCordRepExternal();
1268 static size_t SizeofCordRepSubstring();
1269 static size_t FlatTagToLength(uint8_t tag);
1270 static uint8_t LengthToTag(size_t s);
1271};
1272} // namespace strings_internal
1273ABSL_NAMESPACE_END
1274} // namespace absl
1275
1276#endif // ABSL_STRINGS_CORD_H_