blob: 43e2451eae8dbb7aac0e4bf045bfec530ca1502c [file] [log] [blame]
// Protocol Buffers - Google's data interchange format
// Copyright 2008 Google Inc.
// http://code.google.com/p/protobuf/
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Author: kenton@google.com (Kenton Varda)
// Based on original Protocol Buffers design by
// Sanjay Ghemawat, Jeff Dean, and others.
//
// DynamicMessage is implemented by constructing a data structure which
// has roughly the same memory layout as a generated message would have.
// Then, we use GeneratedMessageReflection to implement our reflection
// interface. All the other operations we need to implement (e.g.
// parsing, copying, etc.) are already implemented in terms of
// Message::Reflection, so the rest is easy.
//
// The up side of this strategy is that it's very efficient. We don't
// need to use hash_maps or generic representations of fields. The
// down side is that this is a low-level memory management hack which
// can be tricky to get right.
//
// As mentioned in the header, we only expose a DynamicMessageFactory
// publicly, not the DynamicMessage class itself. This is because
// GenericMessageReflection wants to have a pointer to a "default"
// copy of the class, with all fields initialized to their default
// values. We only want to construct one of these per message type,
// so DynamicMessageFactory stores a cache of default messages for
// each type it sees (each unique Descriptor pointer). The code
// refers to the "default" copy of the class as the "prototype".
//
// Note on memory allocation: This module often calls "operator new()"
// to allocate untyped memory, rather than calling something like
// "new uint8[]". This is because "operator new()" means "Give me some
// space which I can use as I please." while "new uint8[]" means "Give
// me an array of 8-bit integers.". In practice, the later may return
// a pointer that is not aligned correctly for general use. I believe
// Item 8 of "More Effective C++" discusses this in more detail, though
// I don't have the book on me right now so I'm not sure.
#include <algorithm>
#include <google/protobuf/stubs/hash.h>
#include <google/protobuf/stubs/common.h>
#include <google/protobuf/dynamic_message.h>
#include <google/protobuf/descriptor.h>
#include <google/protobuf/descriptor.pb.h>
#include <google/protobuf/generated_message_reflection.h>
#include <google/protobuf/reflection_ops.h>
#include <google/protobuf/repeated_field.h>
#include <google/protobuf/extension_set.h>
#include <google/protobuf/wire_format.h>
namespace google {
namespace protobuf {
using internal::WireFormat;
using internal::ExtensionSet;
using internal::GeneratedMessageReflection;
using internal::GenericRepeatedField;
// ===================================================================
// Some helper tables and functions...
namespace {
// Compute the byte size of the in-memory representation of the field.
int FieldSpaceUsed(const FieldDescriptor* field) {
typedef FieldDescriptor FD; // avoid line wrapping
if (field->label() == FD::LABEL_REPEATED) {
switch (field->cpp_type()) {
case FD::CPPTYPE_INT32 : return sizeof(RepeatedField<int32 >);
case FD::CPPTYPE_INT64 : return sizeof(RepeatedField<int64 >);
case FD::CPPTYPE_UINT32 : return sizeof(RepeatedField<uint32 >);
case FD::CPPTYPE_UINT64 : return sizeof(RepeatedField<uint64 >);
case FD::CPPTYPE_DOUBLE : return sizeof(RepeatedField<double >);
case FD::CPPTYPE_FLOAT : return sizeof(RepeatedField<float >);
case FD::CPPTYPE_BOOL : return sizeof(RepeatedField<bool >);
case FD::CPPTYPE_ENUM : return sizeof(RepeatedField<int >);
case FD::CPPTYPE_MESSAGE: return sizeof(RepeatedPtrField<Message>);
case FD::CPPTYPE_STRING:
return sizeof(RepeatedPtrField<string>);
break;
}
} else {
switch (field->cpp_type()) {
case FD::CPPTYPE_INT32 : return sizeof(int32 );
case FD::CPPTYPE_INT64 : return sizeof(int64 );
case FD::CPPTYPE_UINT32 : return sizeof(uint32 );
case FD::CPPTYPE_UINT64 : return sizeof(uint64 );
case FD::CPPTYPE_DOUBLE : return sizeof(double );
case FD::CPPTYPE_FLOAT : return sizeof(float );
case FD::CPPTYPE_BOOL : return sizeof(bool );
case FD::CPPTYPE_ENUM : return sizeof(int );
case FD::CPPTYPE_MESSAGE: return sizeof(Message*);
case FD::CPPTYPE_STRING:
return sizeof(string*);
break;
}
}
GOOGLE_LOG(DFATAL) << "Can't get here.";
return 0;
}
struct DescendingFieldSizeOrder {
inline bool operator()(const FieldDescriptor* a,
const FieldDescriptor* b) {
// All repeated fields come first.
if (a->is_repeated()) {
if (b->is_repeated()) {
// Repeated fields and are not ordered with respect to each other.
return false;
} else {
return true;
}
} else if (b->is_repeated()) {
return false;
} else {
// Remaining fields in descending order by size.
return FieldSpaceUsed(a) > FieldSpaceUsed(b);
}
}
};
inline int DivideRoundingUp(int i, int j) {
return (i + (j - 1)) / j;
}
#define bitsizeof(T) (sizeof(T) * 8)
} // namespace
// ===================================================================
class DynamicMessage : public Message {
public:
DynamicMessage(const Descriptor* descriptor,
uint8* base, const uint8* prototype_base,
int size, const int offsets[],
const DescriptorPool* pool, DynamicMessageFactory* factory);
~DynamicMessage();
// Called on the prototype after construction to initialize message fields.
void CrossLinkPrototypes(DynamicMessageFactory* factory);
// implements Message ----------------------------------------------
Message* New() const;
int GetCachedSize() const;
void SetCachedSize(int size) const;
const Descriptor* GetDescriptor() const;
const Reflection* GetReflection() const;
Reflection* GetReflection();
private:
GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(DynamicMessage);
inline bool is_prototype() { return base_ == prototype_base_; }
const Descriptor* descriptor_;
const DescriptorPool* descriptor_pool_;
DynamicMessageFactory* factory_;
scoped_ptr<ExtensionSet> extensions_;
GeneratedMessageReflection reflection_;
uint8* base_;
const uint8* prototype_base_;
const int* offsets_;
int size_;
// TODO(kenton): Make this an atomic<int> when C++ supports it.
mutable int cached_byte_size_;
};
DynamicMessage::DynamicMessage(const Descriptor* descriptor,
uint8* base, const uint8* prototype_base,
int size, const int offsets[],
const DescriptorPool* pool,
DynamicMessageFactory* factory)
: descriptor_(descriptor),
descriptor_pool_((pool == NULL) ? descriptor->file()->pool() : pool),
factory_(factory),
extensions_(descriptor->extension_range_count() > 0 ?
new ExtensionSet(descriptor, descriptor_pool_, factory_) :
NULL),
reflection_(descriptor, base, prototype_base, offsets,
// has_bits
reinterpret_cast<uint32*>(base + size) -
DivideRoundingUp(descriptor->field_count(), bitsizeof(uint32)),
extensions_.get()),
base_(base),
prototype_base_(prototype_base),
offsets_(offsets),
size_(size),
cached_byte_size_(0) {
// We need to call constructors for various fields manually and set
// default values where appropriate. We use placement new to call
// constructors. If you haven't heard of placement new, I suggest Googling
// it now. We use placement new even for primitive types that don't have
// constructors for consistency. (In theory, placement new should be used
// any time you are trying to convert untyped memory to typed memory, though
// in practice that's not strictly necessary for types that don't have a
// constructor.)
for (int i = 0; i < descriptor->field_count(); i++) {
const FieldDescriptor* field = descriptor->field(i);
void* field_ptr = base + offsets[i];
switch (field->cpp_type()) {
#define HANDLE_TYPE(CPPTYPE, TYPE) \
case FieldDescriptor::CPPTYPE_##CPPTYPE: \
if (!field->is_repeated()) { \
new(field_ptr) TYPE(field->default_value_##TYPE()); \
} else { \
new(field_ptr) RepeatedField<TYPE>(); \
} \
break;
HANDLE_TYPE(INT32 , int32 );
HANDLE_TYPE(INT64 , int64 );
HANDLE_TYPE(UINT32, uint32);
HANDLE_TYPE(UINT64, uint64);
HANDLE_TYPE(DOUBLE, double);
HANDLE_TYPE(FLOAT , float );
HANDLE_TYPE(BOOL , bool );
#undef HANDLE_TYPE
case FieldDescriptor::CPPTYPE_ENUM:
if (!field->is_repeated()) {
new(field_ptr) int(field->default_value_enum()->number());
} else {
new(field_ptr) RepeatedField<int>();
}
break;
case FieldDescriptor::CPPTYPE_STRING:
if (!field->is_repeated()) {
if (is_prototype()) {
new(field_ptr) const string*(&field->default_value_string());
} else {
string* default_value =
*reinterpret_cast<string* const*>(
prototype_base + offsets[i]);
new(field_ptr) string*(default_value);
}
} else {
new(field_ptr) RepeatedPtrField<string>();
}
break;
case FieldDescriptor::CPPTYPE_MESSAGE: {
// If this object is the prototype, its CPPTYPE_MESSAGE fields
// must be initialized later, in CrossLinkPrototypes(), so we don't
// initialize them here.
if (!is_prototype()) {
if (!field->is_repeated()) {
new(field_ptr) Message*(NULL);
} else {
const RepeatedPtrField<Message>* prototype_field =
reinterpret_cast<const RepeatedPtrField<Message>*>(
prototype_base + offsets[i]);
new(field_ptr) RepeatedPtrField<Message>(
prototype_field->prototype());
}
}
break;
}
}
}
}
DynamicMessage::~DynamicMessage() {
// We need to manually run the destructors for repeated fields and strings,
// just as we ran their constructors in the the DynamicMessage constructor.
// Additionally, if any singular embedded messages have been allocated, we
// need to delete them, UNLESS we are the prototype message of this type,
// in which case any embedded messages are other prototypes and shouldn't
// be touched.
const Descriptor* descriptor = GetDescriptor();
for (int i = 0; i < descriptor->field_count(); i++) {
const FieldDescriptor* field = descriptor->field(i);
void* field_ptr = base_ + offsets_[i];
if (field->is_repeated()) {
GenericRepeatedField* field =
reinterpret_cast<GenericRepeatedField*>(field_ptr);
field->~GenericRepeatedField();
} else if (field->cpp_type() == FieldDescriptor::CPPTYPE_STRING) {
string* ptr = *reinterpret_cast<string**>(field_ptr);
if (ptr != &field->default_value_string()) {
delete ptr;
}
} else if ((field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) &&
!is_prototype()) {
Message* message = *reinterpret_cast<Message**>(field_ptr);
if (message != NULL) {
delete message;
}
}
}
// OK, now we can delete our base pointer.
operator delete(base_);
// When the prototype is deleted, we also want to free the offsets table.
// (The prototype is only deleted when the factory that created it is
// deleted.)
if (is_prototype()) {
delete [] offsets_;
}
}
void DynamicMessage::CrossLinkPrototypes(DynamicMessageFactory* factory) {
// This should only be called on the prototype message.
GOOGLE_CHECK(is_prototype());
// Cross-link default messages.
for (int i = 0; i < descriptor_->field_count(); i++) {
const FieldDescriptor* field = descriptor_->field(i);
void* field_ptr = base_ + offsets_[i];
if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
// For fields with message types, we need to cross-link with the
// prototype for the field's type.
const Message* field_prototype =
factory->GetPrototype(field->message_type());
if (field->is_repeated()) {
// For repeated fields, we actually construct the RepeatedPtrField
// here, but only for fields with message types. All other repeated
// fields are constructed in DynamicMessage's constructor.
new(field_ptr) RepeatedPtrField<Message>(field_prototype);
} else {
// For singular fields, the field is just a pointer which should
// point to the prototype. (OK to const_cast here because the
// prototype itself will only be available const to the outside
// world.)
new(field_ptr) Message*(const_cast<Message*>(field_prototype));
}
}
}
}
Message* DynamicMessage::New() const {
uint8* new_base = reinterpret_cast<uint8*>(operator new(size_));
memset(new_base, 0, size_);
return new DynamicMessage(GetDescriptor(), new_base, prototype_base_,
size_, offsets_, descriptor_pool_, factory_);
}
int DynamicMessage::GetCachedSize() const {
return cached_byte_size_;
}
void DynamicMessage::SetCachedSize(int size) const {
// This is theoretically not thread-compatible, but in practice it works
// because if multiple threads write this simultaneously, they will be
// writing the exact same value.
cached_byte_size_ = size;
}
const Descriptor* DynamicMessage::GetDescriptor() const {
return descriptor_;
}
const Message::Reflection* DynamicMessage::GetReflection() const {
return &reflection_;
}
Message::Reflection* DynamicMessage::GetReflection() {
return &reflection_;
}
// ===================================================================
struct DynamicMessageFactory::PrototypeMap {
typedef hash_map<const Descriptor*, const Message*> Map;
Map map_;
};
DynamicMessageFactory::DynamicMessageFactory()
: pool_(NULL), prototypes_(new PrototypeMap) {
}
DynamicMessageFactory::DynamicMessageFactory(const DescriptorPool* pool)
: pool_(pool), prototypes_(new PrototypeMap) {
}
DynamicMessageFactory::~DynamicMessageFactory() {
for (PrototypeMap::Map::iterator iter = prototypes_->map_.begin();
iter != prototypes_->map_.end(); ++iter) {
delete iter->second;
}
}
const Message* DynamicMessageFactory::GetPrototype(const Descriptor* type) {
const Message** target = &prototypes_->map_[type];
if (*target != NULL) {
// Already exists.
return *target;
}
// We need to construct all the structures passed to
// GeneratedMessageReflection's constructor. This includes:
// - A block of memory that contains space for all the message's fields.
// - An array of integers indicating the byte offset of each field within
// this block.
// - A big bitfield containing a bit for each field indicating whether
// or not that field is set.
// Compute size and offsets.
int* offsets = new int[type->field_count()];
// Sort the fields of this message in descending order by size. We
// assume that if we then pack the fields tightly in this order, all fields
// will end up properly-aligned, since all field sizes are powers of two or
// are multiples of the system word size.
scoped_array<const FieldDescriptor*> ordered_fields(
new const FieldDescriptor*[type->field_count()]);
for (int i = 0; i < type->field_count(); i++) {
ordered_fields[i] = type->field(i);
}
stable_sort(&ordered_fields[0], &ordered_fields[type->field_count()],
DescendingFieldSizeOrder());
// Decide all field offsets by packing in order.
int current_offset = 0;
for (int i = 0; i < type->field_count(); i++) {
offsets[ordered_fields[i]->index()] = current_offset;
current_offset += FieldSpaceUsed(ordered_fields[i]);
}
// Allocate space for all fields plus has_bits. We'll stick has_bits on
// the end.
int size = current_offset +
DivideRoundingUp(type->field_count(), bitsizeof(uint32)) * sizeof(uint32);
// Round size up to the nearest 64-bit boundary just to make sure no
// clever allocators think that alignment is not necessary. This also
// insures that has_bits is properly-aligned, since we'll always align
// has_bits with the end of the structure.
size = DivideRoundingUp(size, sizeof(uint64)) * sizeof(uint64);
uint8* base = reinterpret_cast<uint8*>(operator new(size));
memset(base, 0, size);
// Construct message.
DynamicMessage* result =
new DynamicMessage(type, base, base, size, offsets, pool_, this);
*target = result;
result->CrossLinkPrototypes(this);
return result;
}
} // namespace protobuf
} // namespace google