| // Protocol Buffers - Google's data interchange format |
| // Copyright 2008 Google Inc. |
| // http://code.google.com/p/protobuf/ |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| // Author: kenton@google.com (Kenton Varda) |
| // Based on original Protocol Buffers design by |
| // Sanjay Ghemawat, Jeff Dean, and others. |
| // |
| // This file contains the CodedInputStream and CodedOutputStream classes, |
| // which wrap a ZeroCopyInputStream or ZeroCopyOutputStream, respectively, |
| // and allow you to read or write individual pieces of data in various |
| // formats. In particular, these implement the varint encoding for |
| // integers, a simple variable-length encoding in which smaller numbers |
| // take fewer bytes. |
| // |
| // Typically these classes will only be used internally by the protocol |
| // buffer library in order to encode and decode protocol buffers. Clients |
| // of the library only need to know about this class if they wish to write |
| // custom message parsing or serialization procedures. |
| // |
| // CodedOutputStream example: |
| // // Write some data to "myfile". First we write a 4-byte "magic number" |
| // // to identify the file type, then write a length-delimited string. The |
| // // string is composed of a varint giving the length followed by the raw |
| // // bytes. |
| // int fd = open("myfile", O_WRONLY); |
| // ZeroCopyOutputStream* raw_output = new FileOutputStream(fd); |
| // CodedOutputStream* coded_output = new CodedOutputStream(raw_output); |
| // |
| // int magic_number = 1234; |
| // char text[] = "Hello world!"; |
| // coded_output->WriteLittleEndian32(magic_number); |
| // coded_output->WriteVarint32(strlen(text)); |
| // coded_output->WriteRaw(text, strlen(text)); |
| // |
| // delete coded_output; |
| // delete raw_output; |
| // close(fd); |
| // |
| // CodedInputStream example: |
| // // Read a file created by the above code. |
| // int fd = open("myfile", O_RDONLY); |
| // ZeroCopyInputStream* raw_input = new FileInputStream(fd); |
| // CodedInputStream coded_input = new CodedInputStream(raw_input); |
| // |
| // coded_input->ReadLittleEndian32(&magic_number); |
| // if (magic_number != 1234) { |
| // cerr << "File not in expected format." << endl; |
| // return; |
| // } |
| // |
| // uint32 size; |
| // coded_input->ReadVarint32(&size); |
| // |
| // char* text = new char[size + 1]; |
| // coded_input->ReadRaw(buffer, size); |
| // text[size] = '\0'; |
| // |
| // delete coded_input; |
| // delete raw_input; |
| // close(fd); |
| // |
| // cout << "Text is: " << text << endl; |
| // delete [] text; |
| // |
| // For those who are interested, varint encoding is defined as follows: |
| // |
| // The encoding operates on unsigned integers of up to 64 bits in length. |
| // Each byte of the encoded value has the format: |
| // * bits 0-6: Seven bits of the number being encoded. |
| // * bit 7: Zero if this is the last byte in the encoding (in which |
| // case all remaining bits of the number are zero) or 1 if |
| // more bytes follow. |
| // The first byte contains the least-significant 7 bits of the number, the |
| // second byte (if present) contains the next-least-significant 7 bits, |
| // and so on. So, the binary number 1011000101011 would be encoded in two |
| // bytes as "10101011 00101100". |
| // |
| // In theory, varint could be used to encode integers of any length. |
| // However, for practicality we set a limit at 64 bits. The maximum encoded |
| // length of a number is thus 10 bytes. |
| |
| #ifndef GOOGLE_PROTOBUF_IO_CODED_STREAM_H__ |
| #define GOOGLE_PROTOBUF_IO_CODED_STREAM_H__ |
| |
| #include <string> |
| #include <google/protobuf/stubs/common.h> |
| |
| namespace google { |
| |
| namespace protobuf { |
| namespace io { |
| |
| // Defined in this file. |
| class CodedInputStream; |
| class CodedOutputStream; |
| |
| // Defined in other files. |
| class ZeroCopyInputStream; // zero_copy_stream.h |
| class ZeroCopyOutputStream; // zero_copy_stream.h |
| |
| // Class which reads and decodes binary data which is composed of varint- |
| // encoded integers and fixed-width pieces. Wraps a ZeroCopyInputStream. |
| // Most users will not need to deal with CodedInputStream. |
| // |
| // Most methods of CodedInputStream that return a bool return false if an |
| // underlying I/O error occurs or if the data is malformed. Once such a |
| // failure occurs, the CodedInputStream is broken and is no longer useful. |
| class LIBPROTOBUF_EXPORT CodedInputStream { |
| public: |
| // Create a CodedInputStream that reads from the given ZeroCopyInputStream. |
| explicit CodedInputStream(ZeroCopyInputStream* input); |
| |
| // Destroy the CodedInputStream and position the underlying |
| // ZeroCopyInputStream at the first unread byte. If an error occurred while |
| // reading (causing a method to return false), then the exact position of |
| // the input stream may be anywhere between the last value that was read |
| // successfully and the stream's byte limit. |
| ~CodedInputStream(); |
| |
| |
| // Skips a number of bytes. Returns false if an underlying read error |
| // occurs. |
| bool Skip(int count); |
| |
| // Read raw bytes, copying them into the given buffer. |
| bool ReadRaw(void* buffer, int size); |
| |
| // Like ReadRaw, but reads into a string. |
| // |
| // Implementation Note: ReadString() grows the string gradually as it |
| // reads in the data, rather than allocating the entire requested size |
| // upfront. This prevents denial-of-service attacks in which a client |
| // could claim that a string is going to be MAX_INT bytes long in order to |
| // crash the server because it can't allocate this much space at once. |
| bool ReadString(string* buffer, int size); |
| |
| |
| // Read a 32-bit little-endian integer. |
| bool ReadLittleEndian32(uint32* value); |
| // Read a 64-bit little-endian integer. |
| bool ReadLittleEndian64(uint64* value); |
| |
| // Read an unsigned integer with Varint encoding, truncating to 32 bits. |
| // Reading a 32-bit value is equivalent to reading a 64-bit one and casting |
| // it to uint32, but may be more efficient. |
| bool ReadVarint32(uint32* value); |
| // Read an unsigned integer with Varint encoding. |
| bool ReadVarint64(uint64* value); |
| |
| // Read a tag. This calls ReadVarint32() and returns the result, or returns |
| // zero (which is not a valid tag) if ReadVarint32() fails. Also, it updates |
| // the last tag value, which can be checked with LastTagWas(). |
| // Always inline because this is only called in once place per parse loop |
| // but it is called for every iteration of said loop, so it should be fast. |
| // GCC doesn't want to inline this by default. |
| uint32 ReadTag() GOOGLE_ATTRIBUTE_ALWAYS_INLINE; |
| |
| // Usually returns true if calling ReadVarint32() now would produce the given |
| // value. Will always return false if ReadVarint32() would not return the |
| // given value. If ExpectTag() returns true, it also advances past |
| // the varint. For best performance, use a compile-time constant as the |
| // parameter. |
| // Always inline because this collapses to a small number of instructions |
| // when given a constant parameter, but GCC doesn't want to inline by default. |
| bool ExpectTag(uint32 expected) GOOGLE_ATTRIBUTE_ALWAYS_INLINE; |
| |
| // Usually returns true if no more bytes can be read. Always returns false |
| // if more bytes can be read. If ExpectAtEnd() returns true, a subsequent |
| // call to LastTagWas() will act as if ReadTag() had been called and returned |
| // zero, and ConsumedEntireMessage() will return true. |
| bool ExpectAtEnd(); |
| |
| // If the last call to ReadTag() returned the given value, returns true. |
| // Otherwise, returns false; |
| // |
| // This is needed because parsers for some types of embedded messages |
| // (with field type TYPE_GROUP) don't actually know that they've reached the |
| // end of a message until they see an ENDGROUP tag, which was actually part |
| // of the enclosing message. The enclosing message would like to check that |
| // tag to make sure it had the right number, so it calls LastTagWas() on |
| // return from the embedded parser to check. |
| bool LastTagWas(uint32 expected); |
| |
| // When parsing message (but NOT a group), this method must be called |
| // immediately after MergeFromCodedStream() returns (if it returns true) |
| // to further verify that the message ended in a legitimate way. For |
| // example, this verifies that parsing did not end on an end-group tag. |
| // It also checks for some cases where, due to optimizations, |
| // MergeFromCodedStream() can incorrectly return true. |
| bool ConsumedEntireMessage(); |
| |
| // Limits ---------------------------------------------------------- |
| // Limits are used when parsing length-delimited embedded messages. |
| // After the message's length is read, PushLimit() is used to prevent |
| // the CodedInputStream from reading beyond that length. Once the |
| // embedded message has been parsed, PopLimit() is called to undo the |
| // limit. |
| |
| // Opaque type used with PushLimit() and PopLimit(). Do not modify |
| // values of this type yourself. The only reason that this isn't a |
| // struct with private internals is for efficiency. |
| typedef int Limit; |
| |
| // Places a limit on the number of bytes that the stream may read, |
| // starting from the current position. Once the stream hits this limit, |
| // it will act like the end of the input has been reached until PopLimit() |
| // is called. |
| // |
| // As the names imply, the stream conceptually has a stack of limits. The |
| // shortest limit on the stack is always enforced, even if it is not the |
| // top limit. |
| // |
| // The value returned by PushLimit() is opaque to the caller, and must |
| // be passed unchanged to the corresponding call to PopLimit(). |
| Limit PushLimit(int byte_limit); |
| |
| // Pops the last limit pushed by PushLimit(). The input must be the value |
| // returned by that call to PushLimit(). |
| void PopLimit(Limit limit); |
| |
| // Returns the number of bytes left until the nearest limit on the |
| // stack is hit, or -1 if no limits are in place. |
| int BytesUntilLimit(); |
| |
| // Total Bytes Limit ----------------------------------------------- |
| // To prevent malicious users from sending excessively large messages |
| // and causing integer overflows or memory exhaustion, CodedInputStream |
| // imposes a hard limit on the total number of bytes it will read. |
| |
| // Sets the maximum number of bytes that this CodedInputStream will read |
| // before refusing to continue. To prevent integer overflows in the |
| // protocol buffers implementation, as well as to prevent servers from |
| // allocating enormous amounts of memory to hold parsed messages, the |
| // maximum message length should be limited to the shortest length that |
| // will not harm usability. The theoretical shortest message that could |
| // cause integer overflows is 512MB. The default limit is 64MB. Apps |
| // should set shorter limits if possible. If warning_threshold is not -1, |
| // a warning will be printed to stderr after warning_threshold bytes are |
| // read. An error will always be printed to stderr if the limit is |
| // reached. |
| // |
| // This is unrelated to PushLimit()/PopLimit(). |
| // |
| // Hint: If you are reading this because your program is printing a |
| // warning about dangerously large protocol messages, you may be |
| // confused about what to do next. The best option is to change your |
| // design such that excessively large messages are not necessary. |
| // For example, try to design file formats to consist of many small |
| // messages rather than a single large one. If this is infeasible, |
| // you will need to increase the limit. Chances are, though, that |
| // your code never constructs a CodedInputStream on which the limit |
| // can be set. You probably parse messages by calling things like |
| // Message::ParseFromString(). In this case, you will need to change |
| // your code to instead construct some sort of ZeroCopyInputStream |
| // (e.g. an ArrayInputStream), construct a CodedInputStream around |
| // that, then call Message::ParseFromCodedStream() instead. Then |
| // you can adjust the limit. Yes, it's more work, but you're doing |
| // something unusual. |
| void SetTotalBytesLimit(int total_bytes_limit, int warning_threshold); |
| |
| // Recursion Limit ------------------------------------------------- |
| // To prevent corrupt or malicious messages from causing stack overflows, |
| // we must keep track of the depth of recursion when parsing embedded |
| // messages and groups. CodedInputStream keeps track of this because it |
| // is the only object that is passed down the stack during parsing. |
| |
| // Sets the maximum recursion depth. The default is 64. |
| void SetRecursionLimit(int limit); |
| |
| // Increments the current recursion depth. Returns true if the depth is |
| // under the limit, false if it has gone over. |
| bool IncrementRecursionDepth(); |
| |
| // Decrements the recursion depth. |
| void DecrementRecursionDepth(); |
| |
| private: |
| GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CodedInputStream); |
| |
| ZeroCopyInputStream* input_; |
| const uint8* buffer_; |
| int buffer_size_; // size of current buffer |
| int total_bytes_read_; // total bytes read from input_, including |
| // the current buffer |
| |
| // If total_bytes_read_ surpasses INT_MAX, we record the extra bytes here |
| // so that we can BackUp() on destruction. |
| int overflow_bytes_; |
| |
| // LastTagWas() stuff. |
| uint32 last_tag_; // result of last ReadTag(). |
| |
| // This is set true by ReadVarint32Fallback() if it is called when exactly |
| // at EOF, or by ExpectAtEnd() when it returns true. This happens when we |
| // reach the end of a message and attempt to read another tag. |
| bool legitimate_message_end_; |
| |
| // See EnableAliasing(). |
| bool aliasing_enabled_; |
| |
| // Limits |
| Limit current_limit_; // if position = -1, no limit is applied |
| |
| // For simplicity, if the current buffer crosses a limit (either a normal |
| // limit created by PushLimit() or the total bytes limit), buffer_size_ |
| // only tracks the number of bytes before that limit. This field |
| // contains the number of bytes after it. Note that this implies that if |
| // buffer_size_ == 0 and buffer_size_after_limit_ > 0, we know we've |
| // hit a limit. However, if both are zero, it doesn't necessarily mean |
| // we aren't at a limit -- the buffer may have ended exactly at the limit. |
| int buffer_size_after_limit_; |
| |
| // Maximum number of bytes to read, period. This is unrelated to |
| // current_limit_. Set using SetTotalBytesLimit(). |
| int total_bytes_limit_; |
| int total_bytes_warning_threshold_; |
| |
| // Current recursion depth, controlled by IncrementRecursionDepth() and |
| // DecrementRecursionDepth(). |
| int recursion_depth_; |
| // Recursion depth limit, set by SetRecursionLimit(). |
| int recursion_limit_; |
| |
| // Advance the buffer by a given number of bytes. |
| void Advance(int amount); |
| |
| // Recomputes the value of buffer_size_after_limit_. Must be called after |
| // current_limit_ or total_bytes_limit_ changes. |
| void RecomputeBufferLimits(); |
| |
| // Writes an error message saying that we hit total_bytes_limit_. |
| void PrintTotalBytesLimitError(); |
| |
| // Called when the buffer runs out to request more data. Implies an |
| // Advance(buffer_size_). |
| bool Refresh(); |
| |
| bool ReadVarint32Fallback(uint32* value); |
| }; |
| |
| // Class which encodes and writes binary data which is composed of varint- |
| // encoded integers and fixed-width pieces. Wraps a ZeroCopyOutputStream. |
| // Most users will not need to deal with CodedOutputStream. |
| // |
| // Most methods of CodedOutputStream which return a bool return false if an |
| // underlying I/O error occurs. Once such a failure occurs, the |
| // CodedOutputStream is broken and is no longer useful. |
| class LIBPROTOBUF_EXPORT CodedOutputStream { |
| public: |
| // Create an CodedOutputStream that writes to the given ZeroCopyOutputStream. |
| explicit CodedOutputStream(ZeroCopyOutputStream* output); |
| |
| // Destroy the CodedOutputStream and position the underlying |
| // ZeroCopyOutputStream immediately after the last byte written. |
| ~CodedOutputStream(); |
| |
| // Write raw bytes, copying them from the given buffer. |
| bool WriteRaw(const void* buffer, int size); |
| |
| // Equivalent to WriteRaw(str.data(), str.size()). |
| bool WriteString(const string& str); |
| |
| |
| // Write a 32-bit little-endian integer. |
| bool WriteLittleEndian32(uint32 value); |
| // Write a 64-bit little-endian integer. |
| bool WriteLittleEndian64(uint64 value); |
| |
| // Write an unsigned integer with Varint encoding. Writing a 32-bit value |
| // is equivalent to casting it to uint64 and writing it as a 64-bit value, |
| // but may be more efficient. |
| bool WriteVarint32(uint32 value); |
| // Write an unsigned integer with Varint encoding. |
| bool WriteVarint64(uint64 value); |
| |
| // Equivalent to WriteVarint32() except when the value is negative, |
| // in which case it must be sign-extended to a full 10 bytes. |
| bool WriteVarint32SignExtended(int32 value); |
| |
| // This is identical to WriteVarint32(), but optimized for writing tags. |
| // In particular, if the input is a compile-time constant, this method |
| // compiles down to a couple instructions. |
| // Always inline because otherwise the aformentioned optimization can't work, |
| // but GCC by default doesn't want to inline this. |
| bool WriteTag(uint32 value) GOOGLE_ATTRIBUTE_ALWAYS_INLINE; |
| |
| // Returns the number of bytes needed to encode the given value as a varint. |
| static int VarintSize32(uint32 value); |
| // Returns the number of bytes needed to encode the given value as a varint. |
| static int VarintSize64(uint64 value); |
| |
| // If negative, 10 bytes. Otheriwse, same as VarintSize32(). |
| static int VarintSize32SignExtended(int32 value); |
| |
| // Returns the total number of bytes written since this object was created. |
| inline int ByteCount() const; |
| |
| private: |
| GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CodedOutputStream); |
| |
| ZeroCopyOutputStream* output_; |
| uint8* buffer_; |
| int buffer_size_; |
| int total_bytes_; // Sum of sizes of all buffers seen so far. |
| |
| // Advance the buffer by a given number of bytes. |
| void Advance(int amount); |
| |
| // Called when the buffer runs out to request more data. Implies an |
| // Advance(buffer_size_). |
| bool Refresh(); |
| |
| bool WriteVarint32Fallback(uint32 value); |
| static int VarintSize32Fallback(uint32 value); |
| }; |
| |
| // inline methods ==================================================== |
| // The vast majority of varints are only one byte. These inline |
| // methods optimize for that case. |
| |
| inline bool CodedInputStream::ReadVarint32(uint32* value) { |
| if (buffer_size_ != 0 && *buffer_ < 0x80) { |
| *value = *buffer_; |
| Advance(1); |
| return true; |
| } else { |
| return ReadVarint32Fallback(value); |
| } |
| } |
| |
| inline uint32 CodedInputStream::ReadTag() { |
| if (buffer_size_ != 0 && buffer_[0] < 0x80) { |
| last_tag_ = buffer_[0]; |
| Advance(1); |
| return last_tag_; |
| } else if (buffer_size_ >= 2 && buffer_[1] < 0x80) { |
| last_tag_ = (buffer_[0] & 0x7f) + (buffer_[1] << 7); |
| Advance(2); |
| return last_tag_; |
| } else if (ReadVarint32Fallback(&last_tag_)) { |
| return last_tag_; |
| } else { |
| last_tag_ = 0; |
| return 0; |
| } |
| } |
| |
| inline bool CodedInputStream::LastTagWas(uint32 expected) { |
| return last_tag_ == expected; |
| } |
| |
| inline bool CodedInputStream::ConsumedEntireMessage() { |
| return legitimate_message_end_; |
| } |
| |
| inline bool CodedInputStream::ExpectTag(uint32 expected) { |
| if (expected < (1 << 7)) { |
| if (buffer_size_ != 0 && buffer_[0] == expected) { |
| Advance(1); |
| return true; |
| } else { |
| return false; |
| } |
| } else if (expected < (1 << 14)) { |
| if (buffer_size_ >= 2 && |
| buffer_[0] == static_cast<uint8>(expected | 0x80) && |
| buffer_[1] == static_cast<uint8>(expected >> 7)) { |
| Advance(2); |
| return true; |
| } else { |
| return false; |
| } |
| } else { |
| // Don't bother optimizing for larger values. |
| return false; |
| } |
| } |
| |
| inline bool CodedInputStream::ExpectAtEnd() { |
| // If we are at a limit we know no more bytes can be read. Otherwise, it's |
| // hard to say without calling Refresh(), and we'd rather not do that. |
| |
| if (buffer_size_ == 0 && buffer_size_after_limit_ != 0) { |
| last_tag_ = 0; // Pretend we called ReadTag()... |
| legitimate_message_end_ = true; // ... and it hit EOF. |
| return true; |
| } else { |
| return false; |
| } |
| } |
| |
| inline bool CodedOutputStream::WriteVarint32(uint32 value) { |
| if (value < 0x80 && buffer_size_ > 0) { |
| *buffer_ = value; |
| Advance(1); |
| return true; |
| } else { |
| return WriteVarint32Fallback(value); |
| } |
| } |
| |
| inline bool CodedOutputStream::WriteVarint32SignExtended(int32 value) { |
| if (value < 0) { |
| return WriteVarint64(static_cast<uint64>(value)); |
| } else { |
| return WriteVarint32(static_cast<uint32>(value)); |
| } |
| } |
| |
| inline bool CodedOutputStream::WriteTag(uint32 value) { |
| if (value < (1 << 7)) { |
| if (buffer_size_ != 0) { |
| buffer_[0] = value; |
| Advance(1); |
| return true; |
| } |
| } else if (value < (1 << 14)) { |
| if (buffer_size_ >= 2) { |
| buffer_[0] = static_cast<uint8>(value | 0x80); |
| buffer_[1] = static_cast<uint8>(value >> 7); |
| Advance(2); |
| return true; |
| } |
| } |
| return WriteVarint32Fallback(value); |
| } |
| |
| inline int CodedOutputStream::VarintSize32(uint32 value) { |
| if (value < (1 << 7)) { |
| return 1; |
| } else { |
| return VarintSize32Fallback(value); |
| } |
| } |
| |
| inline int CodedOutputStream::VarintSize32SignExtended(int32 value) { |
| if (value < 0) { |
| return 10; // TODO(kenton): Make this a symbolic constant. |
| } else { |
| return VarintSize32(static_cast<uint32>(value)); |
| } |
| } |
| |
| inline bool CodedOutputStream::WriteString(const string& str) { |
| return WriteRaw(str.data(), str.size()); |
| } |
| |
| inline int CodedOutputStream::ByteCount() const { |
| return total_bytes_ - buffer_size_; |
| } |
| |
| inline void CodedInputStream::Advance(int amount) { |
| buffer_ += amount; |
| buffer_size_ -= amount; |
| } |
| |
| inline void CodedOutputStream::Advance(int amount) { |
| buffer_ += amount; |
| buffer_size_ -= amount; |
| } |
| |
| inline void CodedInputStream::SetRecursionLimit(int limit) { |
| recursion_limit_ = limit; |
| } |
| |
| inline bool CodedInputStream::IncrementRecursionDepth() { |
| ++recursion_depth_; |
| return recursion_depth_ <= recursion_limit_; |
| } |
| |
| inline void CodedInputStream::DecrementRecursionDepth() { |
| if (recursion_depth_ > 0) --recursion_depth_; |
| } |
| |
| } // namespace io |
| } // namespace protobuf |
| |
| } // namespace google |
| #endif // GOOGLE_PROTOBUF_IO_CODED_STREAM_H__ |