blob: bdfc39f7ec826661735ac4b5be559dea4d7c8abc [file] [log] [blame]
/*
* Copyright (C) 2019 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef SRC_TRACE_PROCESSOR_CONTAINERS_STRING_POOL_H_
#define SRC_TRACE_PROCESSOR_CONTAINERS_STRING_POOL_H_
#include <stddef.h>
#include <stdint.h>
#include <unordered_map>
#include <vector>
#include "perfetto/ext/base/optional.h"
#include "perfetto/ext/base/paged_memory.h"
#include "perfetto/protozero/proto_utils.h"
#include "src/trace_processor/containers/null_term_string_view.h"
namespace perfetto {
namespace trace_processor {
// On 64-bit platforms, the string pool is implemented as a mmaped buffer
// of 4GB with the id being equal ot the offset into this buffer of the string.
// On 32-bit platforms instead, the implementation allocates 32MB blocks of
// mmaped memory with the pointer being directly converted to the id.
constexpr size_t kDefaultBlockSize =
sizeof(void*) == 8
? static_cast<size_t>(4ull * 1024ull * 1024ull * 1024ull) /* 4GB */
: 32ull * 1024ull * 1024ull /* 32MB */;
// Interns strings in a string pool and hands out compact StringIds which can
// be used to retrieve the string in O(1).
class StringPool {
public:
struct Id {
Id() = default;
constexpr Id(uint32_t i) : id(i) {}
bool operator==(const Id& other) const { return other.id == id; }
bool operator!=(const Id& other) const { return !(other == *this); }
bool operator<(const Id& other) const { return id < other.id; }
bool is_null() const { return id == 0u; }
uint32_t id;
};
// Iterator over the strings in the pool.
class Iterator {
public:
Iterator(const StringPool*);
explicit operator bool() const;
Iterator& operator++();
NullTermStringView StringView();
Id StringId();
private:
const StringPool* pool_ = nullptr;
uint32_t block_id_ = 0;
uint32_t block_offset_ = 0;
};
StringPool(size_t block_size_bytes = kDefaultBlockSize);
~StringPool();
// Allow std::move().
StringPool(StringPool&&) noexcept;
StringPool& operator=(StringPool&&);
// Disable implicit copy.
StringPool(const StringPool&) = delete;
StringPool& operator=(const StringPool&) = delete;
Id InternString(base::StringView str) {
if (str.data() == nullptr)
return Id(0);
auto hash = str.Hash();
auto id_it = string_index_.find(hash);
if (id_it != string_index_.end()) {
PERFETTO_DCHECK(Get(id_it->second) == str);
return id_it->second;
}
return InsertString(str, hash);
}
base::Optional<Id> GetId(base::StringView str) const {
if (str.data() == nullptr)
return Id(0u);
auto hash = str.Hash();
auto id_it = string_index_.find(hash);
if (id_it != string_index_.end()) {
PERFETTO_DCHECK(Get(id_it->second) == str);
return id_it->second;
}
return base::nullopt;
}
NullTermStringView Get(Id id) const {
if (id.id == 0)
return NullTermStringView();
return GetFromPtr(IdToPtr(id));
}
Iterator CreateIterator() const { return Iterator(this); }
size_t size() const { return string_index_.size(); }
private:
using StringHash = uint64_t;
struct Block {
explicit Block(size_t size)
: mem_(base::PagedMemory::Allocate(size,
base::PagedMemory::kDontCommit)),
size_(size) {}
~Block() = default;
// Allow std::move().
Block(Block&&) noexcept = default;
Block& operator=(Block&&) = default;
// Disable implicit copy.
Block(const Block&) = delete;
Block& operator=(const Block&) = delete;
uint8_t* Get(uint32_t offset) const {
return static_cast<uint8_t*>(mem_.Get()) + offset;
}
const uint8_t* TryInsert(base::StringView str);
uint32_t OffsetOf(const uint8_t* ptr) const {
PERFETTO_DCHECK(Get(0) < ptr &&
ptr < Get(static_cast<uint32_t>(size_ - 1)));
return static_cast<uint32_t>(ptr - Get(0));
}
uint32_t pos() const { return pos_; }
private:
base::PagedMemory mem_;
uint32_t pos_ = 0;
size_t size_ = 0;
};
friend class Iterator;
// Number of bytes to reserve for size and null terminator.
// This is the upper limit on metadata size: 5 bytes for max uint32,
// plus 1 byte for null terminator. The actual size may be lower.
static constexpr uint8_t kMaxMetadataSize = 6;
// Inserts the string with the given hash into the pool
Id InsertString(base::StringView, uint64_t hash);
// |ptr| should point to the start of the string metadata (i.e. the first byte
// of the size).
Id PtrToId(const uint8_t* ptr) const {
// For a 64 bit architecture, the id is the offset of the pointer inside
// the one and only 4GB block.
if (sizeof(void*) == 8) {
PERFETTO_DCHECK(blocks_.size() == 1);
return Id(blocks_.back().OffsetOf(ptr));
}
// On 32 bit architectures, the size of the pointer is 32-bit so we simply
// use the pointer itself as the id.
// Double cast needed because, on 64 archs, the compiler complains that we
// are losing information.
return Id(static_cast<uint32_t>(reinterpret_cast<uintptr_t>(ptr)));
}
// The returned pointer points to the start of the string metadata (i.e. the
// first byte of the size).
const uint8_t* IdToPtr(Id id) const {
// For a 64 bit architecture, the pointer is simply the found by taking
// the base of the 4GB block and adding the offset given by |id|.
if (sizeof(void*) == 8) {
PERFETTO_DCHECK(blocks_.size() == 1);
return blocks_.back().Get(id.id);
}
// On a 32 bit architecture, the pointer is the same as the id.
return reinterpret_cast<uint8_t*>(id.id);
}
// |ptr| should point to the start of the string metadata (i.e. the first byte
// of the size).
// Returns pointer to the start of the string.
static const uint8_t* ReadSize(const uint8_t* ptr, uint32_t* size) {
uint64_t value = 0;
const uint8_t* str_ptr = protozero::proto_utils::ParseVarInt(
ptr, ptr + kMaxMetadataSize, &value);
PERFETTO_DCHECK(str_ptr != ptr);
PERFETTO_DCHECK(value < std::numeric_limits<uint32_t>::max());
*size = static_cast<uint32_t>(value);
return str_ptr;
}
// |ptr| should point to the start of the string metadata (i.e. the first byte
// of the size).
static NullTermStringView GetFromPtr(const uint8_t* ptr) {
uint32_t size = 0;
const uint8_t* str_ptr = ReadSize(ptr, &size);
return NullTermStringView(reinterpret_cast<const char*>(str_ptr), size);
}
// The minimum size of a new block. A larger block may be created if a string
// is added that is larger than this size.
size_t block_size_bytes_;
// The actual memory storing the strings.
std::vector<Block> blocks_;
// Maps hashes of strings to the Id in the string pool.
// TODO(lalitm): At some point we should benchmark just using a static
// hashtable of 1M elements, we can afford paying a fixed 8MB here
std::unordered_map<StringHash, Id> string_index_;
};
} // namespace trace_processor
} // namespace perfetto
namespace std {
template <>
struct hash< ::perfetto::trace_processor::StringPool::Id> {
using argument_type = ::perfetto::trace_processor::StringPool::Id;
using result_type = size_t;
result_type operator()(const argument_type& r) const {
return std::hash<uint32_t>{}(r.id);
}
};
} // namespace std
#endif // SRC_TRACE_PROCESSOR_CONTAINERS_STRING_POOL_H_