| // Protocol Buffers - Google's data interchange format |
| // Copyright 2008 Google Inc. |
| // http://code.google.com/p/protobuf/ |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| // Author: kenton@google.com (Kenton Varda) |
| // Based on original Protocol Buffers design by |
| // Sanjay Ghemawat, Jeff Dean, and others. |
| // |
| // DynamicMessage is implemented by constructing a data structure which |
| // has roughly the same memory layout as a generated message would have. |
| // Then, we use GeneratedMessageReflection to implement our reflection |
| // interface. All the other operations we need to implement (e.g. |
| // parsing, copying, etc.) are already implemented in terms of |
| // Message::Reflection, so the rest is easy. |
| // |
| // The up side of this strategy is that it's very efficient. We don't |
| // need to use hash_maps or generic representations of fields. The |
| // down side is that this is a low-level memory management hack which |
| // can be tricky to get right. |
| // |
| // As mentioned in the header, we only expose a DynamicMessageFactory |
| // publicly, not the DynamicMessage class itself. This is because |
| // GenericMessageReflection wants to have a pointer to a "default" |
| // copy of the class, with all fields initialized to their default |
| // values. We only want to construct one of these per message type, |
| // so DynamicMessageFactory stores a cache of default messages for |
| // each type it sees (each unique Descriptor pointer). The code |
| // refers to the "default" copy of the class as the "prototype". |
| // |
| // Note on memory allocation: This module often calls "operator new()" |
| // to allocate untyped memory, rather than calling something like |
| // "new uint8[]". This is because "operator new()" means "Give me some |
| // space which I can use as I please." while "new uint8[]" means "Give |
| // me an array of 8-bit integers.". In practice, the later may return |
| // a pointer that is not aligned correctly for general use. I believe |
| // Item 8 of "More Effective C++" discusses this in more detail, though |
| // I don't have the book on me right now so I'm not sure. |
| |
| #include <algorithm> |
| #include <google/protobuf/stubs/hash.h> |
| |
| #include <google/protobuf/stubs/common.h> |
| |
| #include <google/protobuf/dynamic_message.h> |
| #include <google/protobuf/descriptor.h> |
| #include <google/protobuf/descriptor.pb.h> |
| #include <google/protobuf/generated_message_reflection.h> |
| #include <google/protobuf/reflection_ops.h> |
| #include <google/protobuf/repeated_field.h> |
| #include <google/protobuf/extension_set.h> |
| #include <google/protobuf/wire_format.h> |
| |
| namespace google { |
| namespace protobuf { |
| |
| using internal::WireFormat; |
| using internal::ExtensionSet; |
| using internal::GeneratedMessageReflection; |
| using internal::GenericRepeatedField; |
| |
| |
| // =================================================================== |
| // Some helper tables and functions... |
| |
| namespace { |
| |
| // Compute the byte size of the in-memory representation of the field. |
| int FieldSpaceUsed(const FieldDescriptor* field) { |
| typedef FieldDescriptor FD; // avoid line wrapping |
| if (field->label() == FD::LABEL_REPEATED) { |
| switch (field->cpp_type()) { |
| case FD::CPPTYPE_INT32 : return sizeof(RepeatedField<int32 >); |
| case FD::CPPTYPE_INT64 : return sizeof(RepeatedField<int64 >); |
| case FD::CPPTYPE_UINT32 : return sizeof(RepeatedField<uint32 >); |
| case FD::CPPTYPE_UINT64 : return sizeof(RepeatedField<uint64 >); |
| case FD::CPPTYPE_DOUBLE : return sizeof(RepeatedField<double >); |
| case FD::CPPTYPE_FLOAT : return sizeof(RepeatedField<float >); |
| case FD::CPPTYPE_BOOL : return sizeof(RepeatedField<bool >); |
| case FD::CPPTYPE_ENUM : return sizeof(RepeatedField<int >); |
| case FD::CPPTYPE_MESSAGE: return sizeof(RepeatedPtrField<Message>); |
| |
| case FD::CPPTYPE_STRING: |
| return sizeof(RepeatedPtrField<string>); |
| break; |
| } |
| } else { |
| switch (field->cpp_type()) { |
| case FD::CPPTYPE_INT32 : return sizeof(int32 ); |
| case FD::CPPTYPE_INT64 : return sizeof(int64 ); |
| case FD::CPPTYPE_UINT32 : return sizeof(uint32 ); |
| case FD::CPPTYPE_UINT64 : return sizeof(uint64 ); |
| case FD::CPPTYPE_DOUBLE : return sizeof(double ); |
| case FD::CPPTYPE_FLOAT : return sizeof(float ); |
| case FD::CPPTYPE_BOOL : return sizeof(bool ); |
| case FD::CPPTYPE_ENUM : return sizeof(int ); |
| case FD::CPPTYPE_MESSAGE: return sizeof(Message*); |
| |
| case FD::CPPTYPE_STRING: |
| return sizeof(string*); |
| break; |
| } |
| } |
| |
| GOOGLE_LOG(DFATAL) << "Can't get here."; |
| return 0; |
| } |
| |
| struct DescendingFieldSizeOrder { |
| inline bool operator()(const FieldDescriptor* a, |
| const FieldDescriptor* b) { |
| // All repeated fields come first. |
| if (a->is_repeated()) { |
| if (b->is_repeated()) { |
| // Repeated fields and are not ordered with respect to each other. |
| return false; |
| } else { |
| return true; |
| } |
| } else if (b->is_repeated()) { |
| return false; |
| } else { |
| // Remaining fields in descending order by size. |
| return FieldSpaceUsed(a) > FieldSpaceUsed(b); |
| } |
| } |
| }; |
| |
| inline int DivideRoundingUp(int i, int j) { |
| return (i + (j - 1)) / j; |
| } |
| |
| #define bitsizeof(T) (sizeof(T) * 8) |
| |
| } // namespace |
| |
| // =================================================================== |
| |
| class DynamicMessage : public Message { |
| public: |
| DynamicMessage(const Descriptor* descriptor, |
| uint8* base, const uint8* prototype_base, |
| int size, const int offsets[], |
| const DescriptorPool* pool, DynamicMessageFactory* factory); |
| ~DynamicMessage(); |
| |
| // Called on the prototype after construction to initialize message fields. |
| void CrossLinkPrototypes(DynamicMessageFactory* factory); |
| |
| // implements Message ---------------------------------------------- |
| |
| Message* New() const; |
| |
| int GetCachedSize() const; |
| void SetCachedSize(int size) const; |
| |
| const Descriptor* GetDescriptor() const; |
| const Reflection* GetReflection() const; |
| Reflection* GetReflection(); |
| |
| private: |
| GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(DynamicMessage); |
| |
| inline bool is_prototype() { return base_ == prototype_base_; } |
| |
| const Descriptor* descriptor_; |
| const DescriptorPool* descriptor_pool_; |
| DynamicMessageFactory* factory_; |
| scoped_ptr<ExtensionSet> extensions_; |
| GeneratedMessageReflection reflection_; |
| uint8* base_; |
| const uint8* prototype_base_; |
| const int* offsets_; |
| int size_; |
| |
| // TODO(kenton): Make this an atomic<int> when C++ supports it. |
| mutable int cached_byte_size_; |
| }; |
| |
| DynamicMessage::DynamicMessage(const Descriptor* descriptor, |
| uint8* base, const uint8* prototype_base, |
| int size, const int offsets[], |
| const DescriptorPool* pool, |
| DynamicMessageFactory* factory) |
| : descriptor_(descriptor), |
| descriptor_pool_((pool == NULL) ? descriptor->file()->pool() : pool), |
| factory_(factory), |
| extensions_(descriptor->extension_range_count() > 0 ? |
| new ExtensionSet(descriptor, descriptor_pool_, factory_) : |
| NULL), |
| reflection_(descriptor, base, prototype_base, offsets, |
| // has_bits |
| reinterpret_cast<uint32*>(base + size) - |
| DivideRoundingUp(descriptor->field_count(), bitsizeof(uint32)), |
| extensions_.get()), |
| base_(base), |
| prototype_base_(prototype_base), |
| offsets_(offsets), |
| size_(size), |
| cached_byte_size_(0) { |
| // We need to call constructors for various fields manually and set |
| // default values where appropriate. We use placement new to call |
| // constructors. If you haven't heard of placement new, I suggest Googling |
| // it now. We use placement new even for primitive types that don't have |
| // constructors for consistency. (In theory, placement new should be used |
| // any time you are trying to convert untyped memory to typed memory, though |
| // in practice that's not strictly necessary for types that don't have a |
| // constructor.) |
| for (int i = 0; i < descriptor->field_count(); i++) { |
| const FieldDescriptor* field = descriptor->field(i); |
| void* field_ptr = base + offsets[i]; |
| switch (field->cpp_type()) { |
| #define HANDLE_TYPE(CPPTYPE, TYPE) \ |
| case FieldDescriptor::CPPTYPE_##CPPTYPE: \ |
| if (!field->is_repeated()) { \ |
| new(field_ptr) TYPE(field->default_value_##TYPE()); \ |
| } else { \ |
| new(field_ptr) RepeatedField<TYPE>(); \ |
| } \ |
| break; |
| |
| HANDLE_TYPE(INT32 , int32 ); |
| HANDLE_TYPE(INT64 , int64 ); |
| HANDLE_TYPE(UINT32, uint32); |
| HANDLE_TYPE(UINT64, uint64); |
| HANDLE_TYPE(DOUBLE, double); |
| HANDLE_TYPE(FLOAT , float ); |
| HANDLE_TYPE(BOOL , bool ); |
| #undef HANDLE_TYPE |
| |
| case FieldDescriptor::CPPTYPE_ENUM: |
| if (!field->is_repeated()) { |
| new(field_ptr) int(field->default_value_enum()->number()); |
| } else { |
| new(field_ptr) RepeatedField<int>(); |
| } |
| break; |
| |
| case FieldDescriptor::CPPTYPE_STRING: |
| if (!field->is_repeated()) { |
| if (is_prototype()) { |
| new(field_ptr) const string*(&field->default_value_string()); |
| } else { |
| string* default_value = |
| *reinterpret_cast<string* const*>( |
| prototype_base + offsets[i]); |
| new(field_ptr) string*(default_value); |
| } |
| } else { |
| new(field_ptr) RepeatedPtrField<string>(); |
| } |
| break; |
| |
| case FieldDescriptor::CPPTYPE_MESSAGE: { |
| // If this object is the prototype, its CPPTYPE_MESSAGE fields |
| // must be initialized later, in CrossLinkPrototypes(), so we don't |
| // initialize them here. |
| if (!is_prototype()) { |
| if (!field->is_repeated()) { |
| new(field_ptr) Message*(NULL); |
| } else { |
| const RepeatedPtrField<Message>* prototype_field = |
| reinterpret_cast<const RepeatedPtrField<Message>*>( |
| prototype_base + offsets[i]); |
| new(field_ptr) RepeatedPtrField<Message>( |
| prototype_field->prototype()); |
| } |
| } |
| break; |
| } |
| } |
| } |
| } |
| |
| DynamicMessage::~DynamicMessage() { |
| // We need to manually run the destructors for repeated fields and strings, |
| // just as we ran their constructors in the the DynamicMessage constructor. |
| // Additionally, if any singular embedded messages have been allocated, we |
| // need to delete them, UNLESS we are the prototype message of this type, |
| // in which case any embedded messages are other prototypes and shouldn't |
| // be touched. |
| const Descriptor* descriptor = GetDescriptor(); |
| for (int i = 0; i < descriptor->field_count(); i++) { |
| const FieldDescriptor* field = descriptor->field(i); |
| void* field_ptr = base_ + offsets_[i]; |
| |
| if (field->is_repeated()) { |
| GenericRepeatedField* field = |
| reinterpret_cast<GenericRepeatedField*>(field_ptr); |
| field->~GenericRepeatedField(); |
| |
| } else if (field->cpp_type() == FieldDescriptor::CPPTYPE_STRING) { |
| string* ptr = *reinterpret_cast<string**>(field_ptr); |
| if (ptr != &field->default_value_string()) { |
| delete ptr; |
| } |
| } else if ((field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) && |
| !is_prototype()) { |
| Message* message = *reinterpret_cast<Message**>(field_ptr); |
| if (message != NULL) { |
| delete message; |
| } |
| } |
| } |
| |
| // OK, now we can delete our base pointer. |
| operator delete(base_); |
| |
| // When the prototype is deleted, we also want to free the offsets table. |
| // (The prototype is only deleted when the factory that created it is |
| // deleted.) |
| if (is_prototype()) { |
| delete [] offsets_; |
| } |
| } |
| |
| void DynamicMessage::CrossLinkPrototypes(DynamicMessageFactory* factory) { |
| // This should only be called on the prototype message. |
| GOOGLE_CHECK(is_prototype()); |
| |
| // Cross-link default messages. |
| for (int i = 0; i < descriptor_->field_count(); i++) { |
| const FieldDescriptor* field = descriptor_->field(i); |
| void* field_ptr = base_ + offsets_[i]; |
| |
| if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) { |
| // For fields with message types, we need to cross-link with the |
| // prototype for the field's type. |
| const Message* field_prototype = |
| factory->GetPrototype(field->message_type()); |
| |
| if (field->is_repeated()) { |
| // For repeated fields, we actually construct the RepeatedPtrField |
| // here, but only for fields with message types. All other repeated |
| // fields are constructed in DynamicMessage's constructor. |
| new(field_ptr) RepeatedPtrField<Message>(field_prototype); |
| } else { |
| // For singular fields, the field is just a pointer which should |
| // point to the prototype. (OK to const_cast here because the |
| // prototype itself will only be available const to the outside |
| // world.) |
| new(field_ptr) Message*(const_cast<Message*>(field_prototype)); |
| } |
| } |
| } |
| } |
| |
| Message* DynamicMessage::New() const { |
| uint8* new_base = reinterpret_cast<uint8*>(operator new(size_)); |
| memset(new_base, 0, size_); |
| |
| return new DynamicMessage(GetDescriptor(), new_base, prototype_base_, |
| size_, offsets_, descriptor_pool_, factory_); |
| } |
| |
| int DynamicMessage::GetCachedSize() const { |
| return cached_byte_size_; |
| } |
| |
| void DynamicMessage::SetCachedSize(int size) const { |
| // This is theoretically not thread-compatible, but in practice it works |
| // because if multiple threads write this simultaneously, they will be |
| // writing the exact same value. |
| cached_byte_size_ = size; |
| } |
| |
| const Descriptor* DynamicMessage::GetDescriptor() const { |
| return descriptor_; |
| } |
| |
| const Message::Reflection* DynamicMessage::GetReflection() const { |
| return &reflection_; |
| } |
| |
| Message::Reflection* DynamicMessage::GetReflection() { |
| return &reflection_; |
| } |
| |
| // =================================================================== |
| |
| struct DynamicMessageFactory::PrototypeMap { |
| typedef hash_map<const Descriptor*, const Message*> Map; |
| Map map_; |
| }; |
| |
| DynamicMessageFactory::DynamicMessageFactory() |
| : pool_(NULL), prototypes_(new PrototypeMap) { |
| } |
| |
| DynamicMessageFactory::DynamicMessageFactory(const DescriptorPool* pool) |
| : pool_(pool), prototypes_(new PrototypeMap) { |
| } |
| |
| DynamicMessageFactory::~DynamicMessageFactory() { |
| for (PrototypeMap::Map::iterator iter = prototypes_->map_.begin(); |
| iter != prototypes_->map_.end(); ++iter) { |
| delete iter->second; |
| } |
| } |
| |
| |
| const Message* DynamicMessageFactory::GetPrototype(const Descriptor* type) { |
| const Message** target = &prototypes_->map_[type]; |
| if (*target != NULL) { |
| // Already exists. |
| return *target; |
| } |
| |
| // We need to construct all the structures passed to |
| // GeneratedMessageReflection's constructor. This includes: |
| // - A block of memory that contains space for all the message's fields. |
| // - An array of integers indicating the byte offset of each field within |
| // this block. |
| // - A big bitfield containing a bit for each field indicating whether |
| // or not that field is set. |
| |
| // Compute size and offsets. |
| int* offsets = new int[type->field_count()]; |
| |
| // Sort the fields of this message in descending order by size. We |
| // assume that if we then pack the fields tightly in this order, all fields |
| // will end up properly-aligned, since all field sizes are powers of two or |
| // are multiples of the system word size. |
| scoped_array<const FieldDescriptor*> ordered_fields( |
| new const FieldDescriptor*[type->field_count()]); |
| for (int i = 0; i < type->field_count(); i++) { |
| ordered_fields[i] = type->field(i); |
| } |
| stable_sort(&ordered_fields[0], &ordered_fields[type->field_count()], |
| DescendingFieldSizeOrder()); |
| |
| // Decide all field offsets by packing in order. |
| int current_offset = 0; |
| |
| for (int i = 0; i < type->field_count(); i++) { |
| offsets[ordered_fields[i]->index()] = current_offset; |
| current_offset += FieldSpaceUsed(ordered_fields[i]); |
| } |
| |
| // Allocate space for all fields plus has_bits. We'll stick has_bits on |
| // the end. |
| int size = current_offset + |
| DivideRoundingUp(type->field_count(), bitsizeof(uint32)) * sizeof(uint32); |
| |
| // Round size up to the nearest 64-bit boundary just to make sure no |
| // clever allocators think that alignment is not necessary. This also |
| // insures that has_bits is properly-aligned, since we'll always align |
| // has_bits with the end of the structure. |
| size = DivideRoundingUp(size, sizeof(uint64)) * sizeof(uint64); |
| uint8* base = reinterpret_cast<uint8*>(operator new(size)); |
| memset(base, 0, size); |
| |
| // Construct message. |
| DynamicMessage* result = |
| new DynamicMessage(type, base, base, size, offsets, pool_, this); |
| *target = result; |
| result->CrossLinkPrototypes(this); |
| |
| return result; |
| } |
| |
| } // namespace protobuf |
| |
| } // namespace google |