blob: 5fa83109c2d26ed0c1de970cf83f47730fc99705 [file] [log] [blame]
/*
* Copyright (C) 2017 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the
* License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an "AS
* IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language
* governing permissions and limitations under the License.
*/
#include "perfetto/tracing/core/shared_memory_abi.h"
#include <sys/mman.h>
#include "perfetto/base/utils.h"
#include "perfetto/tracing/core/basic_types.h"
namespace perfetto {
namespace {
// Returns the largest 4-bytes aligned chunk size <= |page_size| / |divider|
// for each divider in PageLayout.
constexpr size_t GetChunkSize(size_t page_size, size_t divider) {
return ((page_size - sizeof(SharedMemoryABI::PageHeader)) / divider) & ~3UL;
}
// Initializer for the const |chunk_sizes_| array.
std::array<size_t, SharedMemoryABI::kNumPageLayouts> InitChunkSizes(
size_t page_size) {
static_assert(SharedMemoryABI::kNumPageLayouts ==
base::ArraySize(SharedMemoryABI::kNumChunksForLayout),
"kNumPageLayouts out of date");
std::array<size_t, SharedMemoryABI::kNumPageLayouts> res = {};
for (size_t i = 0; i < SharedMemoryABI::kNumPageLayouts; i++) {
size_t num_chunks = SharedMemoryABI::kNumChunksForLayout[i];
res[i] = num_chunks == 0 ? 0 : GetChunkSize(page_size, num_chunks);
}
return res;
}
} // namespace
// static
constexpr size_t SharedMemoryABI::kNumChunksForLayout[];
constexpr const char* SharedMemoryABI::kChunkStateStr[];
constexpr const size_t SharedMemoryABI::kInvalidPageIdx;
SharedMemoryABI::SharedMemoryABI(uint8_t* start, size_t size, size_t page_size)
: start_(start),
size_(size),
page_size_(page_size),
num_pages_(size / page_size),
chunk_sizes_(InitChunkSizes(page_size)) {
static_assert(sizeof(PageHeader) == 8, "PageHeader size");
static_assert(sizeof(ChunkHeader) == 8, "ChunkHeader size");
static_assert(sizeof(ChunkHeader::PacketsState) == 4, "PacketsState size");
static_assert(alignof(ChunkHeader) == kChunkAlignment,
"ChunkHeader alignment");
// In theory std::atomic does not guarantee that the underlying type
// consists only of the actual atomic word. Theoretically it could have
// locks or other state. In practice most implementations just implement
// them without extra state. The code below overlays the atomic into the
// SMB, hence relies on this implementation detail. This should be fine
// pragmatically (Chrome's base makes the same assumption), but let's have a
// check for this.
static_assert(sizeof(std::atomic<uint32_t>) == sizeof(uint32_t) &&
sizeof(std::atomic<uint16_t>) == sizeof(uint16_t),
"Incompatible STL <atomic> implementation");
// Chec that the kAllChunks(Complete,Free) are consistent with the
// ChunkState enum values.
// These must be zero because rely on zero-initialized memory being
// interpreted as "free".
static_assert(kChunkFree == 0 && kAllChunksFree == 0,
"kChunkFree/kAllChunksFree and must be 0");
static_assert((kAllChunksComplete & kChunkMask) == kChunkComplete,
"kAllChunksComplete out of sync with kChunkComplete");
// Sanity check the consistency of the kMax... constants.
ChunkHeader::Identifier chunk_id = {};
PERFETTO_CHECK((chunk_id.writer_id -= 1) == kMaxWriterID);
PageHeader phdr;
phdr.target_buffer.store(-1);
PERFETTO_CHECK(phdr.target_buffer.load() >= kMaxTraceBuffers - 1);
PERFETTO_CHECK(page_size >= 4096);
PERFETTO_CHECK(page_size % 4096 == 0);
PERFETTO_CHECK(reinterpret_cast<uintptr_t>(start) % 4096 == 0);
PERFETTO_CHECK(size % page_size == 0);
}
SharedMemoryABI::Chunk SharedMemoryABI::GetChunkUnchecked(size_t page_idx,
uint32_t page_layout,
size_t chunk_idx) {
const size_t num_chunks = GetNumChunksForLayout(page_layout);
PERFETTO_DCHECK(chunk_idx < num_chunks);
// Compute the chunk virtual address and write it into |chunk|.
const size_t chunk_size = GetChunkSizeForLayout(page_layout);
size_t chunk_offset_in_page = sizeof(PageHeader) + chunk_idx * chunk_size;
Chunk chunk(page_start(page_idx) + chunk_offset_in_page, chunk_size);
PERFETTO_DCHECK(chunk.end() <= end());
return chunk;
}
SharedMemoryABI::Chunk SharedMemoryABI::TryAcquireChunk(
size_t page_idx,
size_t chunk_idx,
size_t expected_target_buffer,
ChunkState desired_chunk_state,
const ChunkHeader* header) {
PERFETTO_DCHECK(desired_chunk_state == kChunkBeingRead ||
desired_chunk_state == kChunkBeingWritten);
PageHeader* phdr = page_header(page_idx);
uint32_t layout;
uint32_t attempts = 1000;
do {
layout = phdr->layout.load(std::memory_order_acquire);
if (__builtin_expect(
(layout & kLayoutMask) >> kLayoutShift != kPageBeingPartitioned,
true)) {
break;
}
std::this_thread::yield();
} while (--attempts);
// If |attempts| == 0, |num_chunks| below will become 0 and this function
// will return failing.
const size_t num_chunks = GetNumChunksForLayout(layout);
// The page layout has changed (or the page is free).
if (chunk_idx >= num_chunks)
return Chunk();
// The page has been acquired by a writer that is targeting a different
// buffer. The caller has to try with another page.
if (phdr->target_buffer.load(std::memory_order_relaxed) !=
expected_target_buffer) {
return Chunk();
}
// Verify that the chunk is still in a state that allows the transition to
// |desired_chunk_state|. The only allowed transitions are:
// 1. kChunkFree -> kChunkBeingWritten (Producer).
// 2. kChunkComplete -> kChunkBeingRead (Service).
ChunkState expected_chunk_state =
desired_chunk_state == kChunkBeingWritten ? kChunkFree : kChunkComplete;
auto cur_chunk_state = (layout >> (chunk_idx * kChunkShift)) & kChunkMask;
if (cur_chunk_state != expected_chunk_state)
return Chunk();
uint32_t next_layout = layout;
next_layout &= ~(kChunkMask << (chunk_idx * kChunkShift));
next_layout |= (desired_chunk_state << (chunk_idx * kChunkShift));
if (!phdr->layout.compare_exchange_strong(layout, next_layout,
std::memory_order_acq_rel)) {
// TODO: returning here is too pessimistic. We should look at the returned
// |layout| to figure out if some other writer thread took the same chunk
// (in which case we should immediately return false) or if they took
// another chunk in the same page (in which case we should just retry).
return Chunk();
}
// Compute the chunk virtual address and write it into |chunk|.
Chunk chunk = GetChunkUnchecked(page_idx, layout, chunk_idx);
if (desired_chunk_state == kChunkBeingWritten) {
PERFETTO_DCHECK(header);
ChunkHeader* new_header = chunk.header();
new_header->packets_state.store(header->packets_state,
std::memory_order_relaxed);
new_header->identifier.store(header->identifier, std::memory_order_release);
}
return chunk;
}
bool SharedMemoryABI::TryPartitionPage(size_t page_idx,
PageLayout layout,
size_t target_buffer) {
PERFETTO_DCHECK(target_buffer < kMaxTraceBuffers);
PERFETTO_DCHECK(layout >= kPageDiv1 && layout <= kPageDiv14);
uint32_t expected_layout = 0; // Free page.
uint32_t next_layout = (kPageBeingPartitioned << kLayoutShift) & kLayoutMask;
PageHeader* phdr = page_header(page_idx);
if (!phdr->layout.compare_exchange_strong(expected_layout, next_layout,
std::memory_order_acq_rel)) {
return false;
}
// Store any page flag before storing the final |layout|. |layout| is read
// with acquire semantics.
phdr->target_buffer.store(static_cast<uint16_t>(target_buffer),
std::memory_order_relaxed);
phdr->layout.store((layout << kLayoutShift) & kLayoutMask,
std::memory_order_release);
return true;
}
size_t SharedMemoryABI::GetFreeChunks(size_t page_idx) {
uint32_t layout =
page_header(page_idx)->layout.load(std::memory_order_relaxed);
const size_t num_chunks = GetNumChunksForLayout(layout);
size_t res = 0;
for (size_t i = 0; i < num_chunks; i++) {
res |= ((layout & kChunkMask) == kChunkFree) ? (1 << i) : 0;
layout >>= kChunkShift;
}
return res;
}
size_t SharedMemoryABI::ReleaseChunk(Chunk chunk,
ChunkState desired_chunk_state) {
PERFETTO_DCHECK(desired_chunk_state == kChunkComplete ||
desired_chunk_state == kChunkFree);
size_t page_idx;
size_t chunk_idx;
std::tie(page_idx, chunk_idx) = GetPageAndChunkIndex(chunk);
for (int attempt = 0; attempt < 64; attempt++) {
PageHeader* phdr = page_header(page_idx);
uint32_t layout = phdr->layout.load(std::memory_order_relaxed);
const size_t page_chunk_size = GetChunkSizeForLayout(layout);
PERFETTO_CHECK(chunk.size() == page_chunk_size);
const uint32_t chunk_state =
((layout >> (chunk_idx * kChunkShift)) & kChunkMask);
// Verify that the chunk is still in a state that allows the transition to
// |desired_chunk_state|. The only allowed transitions are:
// 1. kChunkBeingWritten -> kChunkComplete (Producer).
// 2. kChunkBeingRead -> kChunkFree (Service).
ChunkState expected_chunk_state;
uint32_t all_chunks_state;
if (desired_chunk_state == kChunkComplete) {
expected_chunk_state = kChunkBeingWritten;
all_chunks_state = kAllChunksComplete;
} else {
expected_chunk_state = kChunkBeingRead;
all_chunks_state = kAllChunksFree;
}
const size_t num_chunks = GetNumChunksForLayout(layout);
all_chunks_state &= (1 << (num_chunks * kChunkShift)) - 1;
PERFETTO_CHECK(chunk_state == expected_chunk_state);
uint32_t next_layout = layout;
next_layout &= ~(kChunkMask << (chunk_idx * kChunkShift));
next_layout |= (desired_chunk_state << (chunk_idx * kChunkShift));
// If we are freeing a chunk and all the other chunks in the page are free
// we should de-partition the page and mark it as clear.
// TODO: maybe even madvise() it?
if ((next_layout & kAllChunksMask) == kAllChunksFree)
next_layout = 0;
if (phdr->layout.compare_exchange_strong(layout, next_layout,
std::memory_order_acq_rel)) {
return (next_layout & kAllChunksMask) == all_chunks_state
? page_idx
: kInvalidPageIdx;
}
std::this_thread::yield();
}
// Too much contention on this page. Give up. This page will be left pending
// forever but there isn't much more we can do at this point.
PERFETTO_DCHECK(false);
return kInvalidPageIdx;
}
bool SharedMemoryABI::TryAcquireAllChunksForReading(size_t page_idx) {
PageHeader* phdr = page_header(page_idx);
uint32_t layout = phdr->layout.load(std::memory_order_relaxed);
const size_t num_chunks = GetNumChunksForLayout(layout);
if (num_chunks == 0)
return false;
uint32_t next_layout = layout & kLayoutMask;
for (size_t chunk_idx = 0; chunk_idx < num_chunks; chunk_idx++) {
const uint32_t chunk_state =
((layout >> (chunk_idx * kChunkShift)) & kChunkMask);
switch (chunk_state) {
case kChunkBeingWritten:
return false;
case kChunkBeingRead:
case kChunkComplete:
next_layout |= kChunkBeingRead << (chunk_idx * kChunkShift);
break;
case kChunkFree:
next_layout |= kChunkFree << (chunk_idx * kChunkShift);
break;
}
}
return phdr->layout.compare_exchange_strong(layout, next_layout,
std::memory_order_acq_rel);
}
void SharedMemoryABI::ReleaseAllChunksAsFree(size_t page_idx) {
PageHeader* phdr = page_header(page_idx);
phdr->layout.store(0, std::memory_order_release);
uint8_t* page_start = start_ + page_idx * page_size_;
// TODO: On Linux/Android this should be MADV_REMOVE if we use
// memfd_create() and tmpfs supports hole punching (need to consult kernel
// sources).
int ret = madvise(reinterpret_cast<uint8_t*>(page_start), page_size_,
MADV_DONTNEED);
PERFETTO_DCHECK(ret == 0);
}
SharedMemoryABI::Chunk::Chunk() = default;
SharedMemoryABI::Chunk::Chunk(uint8_t* begin, size_t size)
: begin_(begin), end_(begin + size) {
PERFETTO_CHECK(reinterpret_cast<uintptr_t>(begin) % kChunkAlignment == 0);
PERFETTO_CHECK(end_ >= begin_);
}
std::pair<size_t, size_t> SharedMemoryABI::GetPageAndChunkIndex(
const Chunk& chunk) {
PERFETTO_DCHECK(chunk.is_valid());
PERFETTO_DCHECK(chunk.begin() >= start_);
PERFETTO_DCHECK(chunk.end() <= start_ + size_);
// TODO(primiano): The divisions below could be avoided if we cached
// |page_shift_|.
const uintptr_t rel_addr = chunk.begin() - start_;
const size_t page_idx = rel_addr / page_size_;
const size_t offset = rel_addr % page_size_;
PERFETTO_DCHECK(offset >= sizeof(PageHeader));
PERFETTO_DCHECK(offset % kChunkAlignment == 0);
PERFETTO_DCHECK((offset - sizeof(PageHeader)) % chunk.size() == 0);
const size_t chunk_idx = (offset - sizeof(PageHeader)) / chunk.size();
PERFETTO_DCHECK(chunk_idx < kMaxChunksPerPage);
PERFETTO_DCHECK(chunk_idx < GetNumChunksForLayout(page_layout_dbg(page_idx)));
return std::make_pair(page_idx, chunk_idx);
}
} // namespace perfetto