src/google/protobuf/io/coded_stream.h - platform/external/protobuf-javalite - Gitiles

 // Protocol Buffers - Google's data interchange format
 // Copyright 2008 Google Inc.
 // http://code.google.com/p/protobuf/
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.

 // Author: kenton@google.com (Kenton Varda)
 //  Based on original Protocol Buffers design by
 //  Sanjay Ghemawat, Jeff Dean, and others.
 //
 // This file contains the CodedInputStream and CodedOutputStream classes,
 // which wrap a ZeroCopyInputStream or ZeroCopyOutputStream, respectively,
 // and allow you to read or write individual pieces of data in various
 // formats.  In particular, these implement the varint encoding for
 // integers, a simple variable-length encoding in which smaller numbers
 // take fewer bytes.
 //
 // Typically these classes will only be used internally by the protocol
 // buffer library in order to encode and decode protocol buffers.  Clients
 // of the library only need to know about this class if they wish to write
 // custom message parsing or serialization procedures.
 //
 // CodedOutputStream example:
 //   // Write some data to "myfile".  First we write a 4-byte "magic number"
 //   // to identify the file type, then write a length-delimited string.  The
 //   // string is composed of a varint giving the length followed by the raw
 //   // bytes.
 //   int fd = open("myfile", O_WRONLY);
 //   ZeroCopyOutputStream* raw_output = new FileOutputStream(fd);
 //   CodedOutputStream* coded_output = new CodedOutputStream(raw_output);
 //
 //   int magic_number = 1234;
 //   char text[] = "Hello world!";
 //   coded_output->WriteLittleEndian32(magic_number);
 //   coded_output->WriteVarint32(strlen(text));
 //   coded_output->WriteRaw(text, strlen(text));
 //
 //   delete coded_output;
 //   delete raw_output;
 //   close(fd);
 //
 // CodedInputStream example:
 //   // Read a file created by the above code.
 //   int fd = open("myfile", O_RDONLY);
 //   ZeroCopyInputStream* raw_input = new FileInputStream(fd);
 //   CodedInputStream coded_input = new CodedInputStream(raw_input);
 //
 //   coded_input->ReadLittleEndian32(&magic_number);
 //   if (magic_number != 1234) {
 //     cerr << "File not in expected format." << endl;
 //     return;
 //   }
 //
 //   uint32 size;
 //   coded_input->ReadVarint32(&size);
 //
 //   char* text = new char[size + 1];
 //   coded_input->ReadRaw(buffer, size);
 //   text[size] = '\0';
 //
 //   delete coded_input;
 //   delete raw_input;
 //   close(fd);
 //
 //   cout << "Text is: " << text << endl;
 //   delete [] text;
 //
 // For those who are interested, varint encoding is defined as follows:
 //
 // The encoding operates on unsigned integers of up to 64 bits in length.
 // Each byte of the encoded value has the format:
 // * bits 0-6: Seven bits of the number being encoded.
 // * bit 7: Zero if this is the last byte in the encoding (in which
 //   case all remaining bits of the number are zero) or 1 if
 //   more bytes follow.
 // The first byte contains the least-significant 7 bits of the number, the
 // second byte (if present) contains the next-least-significant 7 bits,
 // and so on.  So, the binary number 1011000101011 would be encoded in two
 // bytes as "10101011 00101100".
 //
 // In theory, varint could be used to encode integers of any length.
 // However, for practicality we set a limit at 64 bits.  The maximum encoded
 // length of a number is thus 10 bytes.

 #ifndef GOOGLE_PROTOBUF_IO_CODED_STREAM_H__
 #define GOOGLE_PROTOBUF_IO_CODED_STREAM_H__

 #include <string>
 #include <google/protobuf/stubs/common.h>

 namespace google {

 namespace protobuf {
 namespace io {

 // Defined in this file.
 class CodedInputStream;
 class CodedOutputStream;

 // Defined in other files.
 class ZeroCopyInputStream;           // zero_copy_stream.h
 class ZeroCopyOutputStream;          // zero_copy_stream.h

 // Class which reads and decodes binary data which is composed of varint-
 // encoded integers and fixed-width pieces.  Wraps a ZeroCopyInputStream.
 // Most users will not need to deal with CodedInputStream.
 //
 // Most methods of CodedInputStream that return a bool return false if an
 // underlying I/O error occurs or if the data is malformed.  Once such a
 // failure occurs, the CodedInputStream is broken and is no longer useful.
 class LIBPROTOBUF_EXPORT CodedInputStream {
  public:
   // Create a CodedInputStream that reads from the given ZeroCopyInputStream.
   explicit CodedInputStream(ZeroCopyInputStream* input);

   // Destroy the CodedInputStream and position the underlying
   // ZeroCopyInputStream at the first unread byte.  If an error occurred while
   // reading (causing a method to return false), then the exact position of
   // the input stream may be anywhere between the last value that was read
   // successfully and the stream's byte limit.
   ~CodedInputStream();


   // Skips a number of bytes.  Returns false if an underlying read error
   // occurs.
   bool Skip(int count);

   // Read raw bytes, copying them into the given buffer.
   bool ReadRaw(void* buffer, int size);

   // Like ReadRaw, but reads into a string.
   //
   // Implementation Note:  ReadString() grows the string gradually as it
   // reads in the data, rather than allocating the entire requested size
   // upfront.  This prevents denial-of-service attacks in which a client
   // could claim that a string is going to be MAX_INT bytes long in order to
   // crash the server because it can't allocate this much space at once.
   bool ReadString(string* buffer, int size);


   // Read a 32-bit little-endian integer.
   bool ReadLittleEndian32(uint32* value);
   // Read a 64-bit little-endian integer.
   bool ReadLittleEndian64(uint64* value);

   // Read an unsigned integer with Varint encoding, truncating to 32 bits.
   // Reading a 32-bit value is equivalent to reading a 64-bit one and casting
   // it to uint32, but may be more efficient.
   bool ReadVarint32(uint32* value);
   // Read an unsigned integer with Varint encoding.
   bool ReadVarint64(uint64* value);

   // Read a tag.  This calls ReadVarint32() and returns the result, or returns
   // zero (which is not a valid tag) if ReadVarint32() fails.  Also, it updates
   // the last tag value, which can be checked with LastTagWas().
   // Always inline because this is only called in once place per parse loop
   // but it is called for every iteration of said loop, so it should be fast.
   // GCC doesn't want to inline this by default.
   uint32 ReadTag() GOOGLE_ATTRIBUTE_ALWAYS_INLINE;

   // Usually returns true if calling ReadVarint32() now would produce the given
   // value.  Will always return false if ReadVarint32() would not return the
   // given value.  If ExpectTag() returns true, it also advances past
   // the varint.  For best performance, use a compile-time constant as the
   // parameter.
   // Always inline because this collapses to a small number of instructions
   // when given a constant parameter, but GCC doesn't want to inline by default.
   bool ExpectTag(uint32 expected) GOOGLE_ATTRIBUTE_ALWAYS_INLINE;

   // Usually returns true if no more bytes can be read.  Always returns false
   // if more bytes can be read.  If ExpectAtEnd() returns true, a subsequent
   // call to LastTagWas() will act as if ReadTag() had been called and returned
   // zero, and ConsumedEntireMessage() will return true.
   bool ExpectAtEnd();

   // If the last call to ReadTag() returned the given value, returns true.
   // Otherwise, returns false;
   //
   // This is needed because parsers for some types of embedded messages
   // (with field type TYPE_GROUP) don't actually know that they've reached the
   // end of a message until they see an ENDGROUP tag, which was actually part
   // of the enclosing message.  The enclosing message would like to check that
   // tag to make sure it had the right number, so it calls LastTagWas() on
   // return from the embedded parser to check.
   bool LastTagWas(uint32 expected);

   // When parsing message (but NOT a group), this method must be called
   // immediately after MergeFromCodedStream() returns (if it returns true)
   // to further verify that the message ended in a legitimate way.  For
   // example, this verifies that parsing did not end on an end-group tag.
   // It also checks for some cases where, due to optimizations,
   // MergeFromCodedStream() can incorrectly return true.
   bool ConsumedEntireMessage();

   // Limits ----------------------------------------------------------
   // Limits are used when parsing length-delimited embedded messages.
   // After the message's length is read, PushLimit() is used to prevent
   // the CodedInputStream from reading beyond that length.  Once the
   // embedded message has been parsed, PopLimit() is called to undo the
   // limit.

   // Opaque type used with PushLimit() and PopLimit().  Do not modify
   // values of this type yourself.  The only reason that this isn't a
   // struct with private internals is for efficiency.
   typedef int Limit;

   // Places a limit on the number of bytes that the stream may read,
   // starting from the current position.  Once the stream hits this limit,
   // it will act like the end of the input has been reached until PopLimit()
   // is called.
   //
   // As the names imply, the stream conceptually has a stack of limits.  The
   // shortest limit on the stack is always enforced, even if it is not the
   // top limit.
   //
   // The value returned by PushLimit() is opaque to the caller, and must
   // be passed unchanged to the corresponding call to PopLimit().
   Limit PushLimit(int byte_limit);

   // Pops the last limit pushed by PushLimit().  The input must be the value
   // returned by that call to PushLimit().
   void PopLimit(Limit limit);

   // Returns the number of bytes left until the nearest limit on the
   // stack is hit, or -1 if no limits are in place.
   int BytesUntilLimit();

   // Total Bytes Limit -----------------------------------------------
   // To prevent malicious users from sending excessively large messages
   // and causing integer overflows or memory exhaustion, CodedInputStream
   // imposes a hard limit on the total number of bytes it will read.

   // Sets the maximum number of bytes that this CodedInputStream will read
   // before refusing to continue.  To prevent integer overflows in the
   // protocol buffers implementation, as well as to prevent servers from
   // allocating enormous amounts of memory to hold parsed messages, the
   // maximum message length should be limited to the shortest length that
   // will not harm usability.  The theoretical shortest message that could
   // cause integer overflows is 512MB.  The default limit is 64MB.  Apps
   // should set shorter limits if possible.  If warning_threshold is not -1,
   // a warning will be printed to stderr after warning_threshold bytes are
   // read.  An error will always be printed to stderr if the limit is
   // reached.
   //
   // This is unrelated to PushLimit()/PopLimit().
   //
   // Hint:  If you are reading this because your program is printing a
   //   warning about dangerously large protocol messages, you may be
   //   confused about what to do next.  The best option is to change your
   //   design such that excessively large messages are not necessary.
   //   For example, try to design file formats to consist of many small
   //   messages rather than a single large one.  If this is infeasible,
   //   you will need to increase the limit.  Chances are, though, that
   //   your code never constructs a CodedInputStream on which the limit
   //   can be set.  You probably parse messages by calling things like
   //   Message::ParseFromString().  In this case, you will need to change
   //   your code to instead construct some sort of ZeroCopyInputStream
   //   (e.g. an ArrayInputStream), construct a CodedInputStream around
   //   that, then call Message::ParseFromCodedStream() instead.  Then
   //   you can adjust the limit.  Yes, it's more work, but you're doing
   //   something unusual.
   void SetTotalBytesLimit(int total_bytes_limit, int warning_threshold);

   // Recursion Limit -------------------------------------------------
   // To prevent corrupt or malicious messages from causing stack overflows,
   // we must keep track of the depth of recursion when parsing embedded
   // messages and groups.  CodedInputStream keeps track of this because it
   // is the only object that is passed down the stack during parsing.

   // Sets the maximum recursion depth.  The default is 64.
   void SetRecursionLimit(int limit);

   // Increments the current recursion depth.  Returns true if the depth is
   // under the limit, false if it has gone over.
   bool IncrementRecursionDepth();

   // Decrements the recursion depth.
   void DecrementRecursionDepth();

  private:
   GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CodedInputStream);

   ZeroCopyInputStream* input_;
   const uint8* buffer_;
   int buffer_size_;       // size of current buffer
   int total_bytes_read_;  // total bytes read from input_, including
                           // the current buffer

   // If total_bytes_read_ surpasses INT_MAX, we record the extra bytes here
   // so that we can BackUp() on destruction.
   int overflow_bytes_;

   // LastTagWas() stuff.
   uint32 last_tag_;         // result of last ReadTag().

   // This is set true by ReadVarint32Fallback() if it is called when exactly
   // at EOF, or by ExpectAtEnd() when it returns true.  This happens when we
   // reach the end of a message and attempt to read another tag.
   bool legitimate_message_end_;

   // See EnableAliasing().
   bool aliasing_enabled_;

   // Limits
   Limit current_limit_;   // if position = -1, no limit is applied

   // For simplicity, if the current buffer crosses a limit (either a normal
   // limit created by PushLimit() or the total bytes limit), buffer_size_
   // only tracks the number of bytes before that limit.  This field
   // contains the number of bytes after it.  Note that this implies that if
   // buffer_size_ == 0 and buffer_size_after_limit_ > 0, we know we've
   // hit a limit.  However, if both are zero, it doesn't necessarily mean
   // we aren't at a limit -- the buffer may have ended exactly at the limit.
   int buffer_size_after_limit_;

   // Maximum number of bytes to read, period.  This is unrelated to
   // current_limit_.  Set using SetTotalBytesLimit().
   int total_bytes_limit_;
   int total_bytes_warning_threshold_;

   // Current recursion depth, controlled by IncrementRecursionDepth() and
   // DecrementRecursionDepth().
   int recursion_depth_;
   // Recursion depth limit, set by SetRecursionLimit().
   int recursion_limit_;

   // Advance the buffer by a given number of bytes.
   void Advance(int amount);

   // Recomputes the value of buffer_size_after_limit_.  Must be called after
   // current_limit_ or total_bytes_limit_ changes.
   void RecomputeBufferLimits();

   // Writes an error message saying that we hit total_bytes_limit_.
   void PrintTotalBytesLimitError();

   // Called when the buffer runs out to request more data.  Implies an
   // Advance(buffer_size_).
   bool Refresh();

   bool ReadVarint32Fallback(uint32* value);
 };

 // Class which encodes and writes binary data which is composed of varint-
 // encoded integers and fixed-width pieces.  Wraps a ZeroCopyOutputStream.
 // Most users will not need to deal with CodedOutputStream.
 //
 // Most methods of CodedOutputStream which return a bool return false if an
 // underlying I/O error occurs.  Once such a failure occurs, the
 // CodedOutputStream is broken and is no longer useful.
 class LIBPROTOBUF_EXPORT CodedOutputStream {
  public:
   // Create an CodedOutputStream that writes to the given ZeroCopyOutputStream.
   explicit CodedOutputStream(ZeroCopyOutputStream* output);

   // Destroy the CodedOutputStream and position the underlying
   // ZeroCopyOutputStream immediately after the last byte written.
   ~CodedOutputStream();

   // Write raw bytes, copying them from the given buffer.
   bool WriteRaw(const void* buffer, int size);

   // Equivalent to WriteRaw(str.data(), str.size()).
   bool WriteString(const string& str);


   // Write a 32-bit little-endian integer.
   bool WriteLittleEndian32(uint32 value);
   // Write a 64-bit little-endian integer.
   bool WriteLittleEndian64(uint64 value);

   // Write an unsigned integer with Varint encoding.  Writing a 32-bit value
   // is equivalent to casting it to uint64 and writing it as a 64-bit value,
   // but may be more efficient.
   bool WriteVarint32(uint32 value);
   // Write an unsigned integer with Varint encoding.
   bool WriteVarint64(uint64 value);

   // Equivalent to WriteVarint32() except when the value is negative,
   // in which case it must be sign-extended to a full 10 bytes.
   bool WriteVarint32SignExtended(int32 value);

   // This is identical to WriteVarint32(), but optimized for writing tags.
   // In particular, if the input is a compile-time constant, this method
   // compiles down to a couple instructions.
   // Always inline because otherwise the aformentioned optimization can't work,
   // but GCC by default doesn't want to inline this.
   bool WriteTag(uint32 value) GOOGLE_ATTRIBUTE_ALWAYS_INLINE;

   // Returns the number of bytes needed to encode the given value as a varint.
   static int VarintSize32(uint32 value);
   // Returns the number of bytes needed to encode the given value as a varint.
   static int VarintSize64(uint64 value);

   // If negative, 10 bytes.  Otheriwse, same as VarintSize32().
   static int VarintSize32SignExtended(int32 value);

   // Returns the total number of bytes written since this object was created.
   inline int ByteCount() const;

  private:
   GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CodedOutputStream);

   ZeroCopyOutputStream* output_;
   uint8* buffer_;
   int buffer_size_;
   int total_bytes_;  // Sum of sizes of all buffers seen so far.

   // Advance the buffer by a given number of bytes.
   void Advance(int amount);

   // Called when the buffer runs out to request more data.  Implies an
   // Advance(buffer_size_).
   bool Refresh();

   bool WriteVarint32Fallback(uint32 value);
   static int VarintSize32Fallback(uint32 value);
 };

 // inline methods ====================================================
 // The vast majority of varints are only one byte.  These inline
 // methods optimize for that case.

 inline bool CodedInputStream::ReadVarint32(uint32* value) {
   if (buffer_size_ != 0 && *buffer_ < 0x80) {
     *value = *buffer_;
     Advance(1);
     return true;
   } else {
     return ReadVarint32Fallback(value);
   }
 }

 inline uint32 CodedInputStream::ReadTag() {
   if (buffer_size_ != 0 && buffer_[0] < 0x80) {
     last_tag_ = buffer_[0];
     Advance(1);
     return last_tag_;
   } else if (buffer_size_ >= 2 && buffer_[1] < 0x80) {
     last_tag_ = (buffer_[0] & 0x7f) + (buffer_[1] << 7);
     Advance(2);
     return last_tag_;
   } else if (ReadVarint32Fallback(&last_tag_)) {
     return last_tag_;
   } else {
     last_tag_ = 0;
     return 0;
   }
 }

 inline bool CodedInputStream::LastTagWas(uint32 expected) {
   return last_tag_ == expected;
 }

 inline bool CodedInputStream::ConsumedEntireMessage() {
   return legitimate_message_end_;
 }

 inline bool CodedInputStream::ExpectTag(uint32 expected) {
   if (expected < (1 << 7)) {
     if (buffer_size_ != 0 && buffer_[0] == expected) {
       Advance(1);
       return true;
     } else {
       return false;
     }
   } else if (expected < (1 << 14)) {
     if (buffer_size_ >= 2 &&
         buffer_[0] == static_cast<uint8>(expected | 0x80) &&
         buffer_[1] == static_cast<uint8>(expected >> 7)) {
       Advance(2);
       return true;
     } else {
       return false;
     }
   } else {
     // Don't bother optimizing for larger values.
     return false;
   }
 }

 inline bool CodedInputStream::ExpectAtEnd() {
   // If we are at a limit we know no more bytes can be read.  Otherwise, it's
   // hard to say without calling Refresh(), and we'd rather not do that.

   if (buffer_size_ == 0 && buffer_size_after_limit_ != 0) {
     last_tag_ = 0;                   // Pretend we called ReadTag()...
     legitimate_message_end_ = true;  // ... and it hit EOF.
     return true;
   } else {
     return false;
   }
 }

 inline bool CodedOutputStream::WriteVarint32(uint32 value) {
   if (value < 0x80 && buffer_size_ > 0) {
     *buffer_ = value;
     Advance(1);
     return true;
   } else {
     return WriteVarint32Fallback(value);
   }
 }

 inline bool CodedOutputStream::WriteVarint32SignExtended(int32 value) {
   if (value < 0) {
     return WriteVarint64(static_cast<uint64>(value));
   } else {
     return WriteVarint32(static_cast<uint32>(value));
   }
 }

 inline bool CodedOutputStream::WriteTag(uint32 value) {
   if (value < (1 << 7)) {
     if (buffer_size_ != 0) {
       buffer_[0] = value;
       Advance(1);
       return true;
     }
   } else if (value < (1 << 14)) {
     if (buffer_size_ >= 2) {
       buffer_[0] = static_cast<uint8>(value | 0x80);
       buffer_[1] = static_cast<uint8>(value >> 7);
       Advance(2);
       return true;
     }
   }
   return WriteVarint32Fallback(value);
 }

 inline int CodedOutputStream::VarintSize32(uint32 value) {
   if (value < (1 << 7)) {
     return 1;
   } else  {
     return VarintSize32Fallback(value);
   }
 }

 inline int CodedOutputStream::VarintSize32SignExtended(int32 value) {
   if (value < 0) {
     return 10;     // TODO(kenton):  Make this a symbolic constant.
   } else {
     return VarintSize32(static_cast<uint32>(value));
   }
 }

 inline bool CodedOutputStream::WriteString(const string& str) {
   return WriteRaw(str.data(), str.size());
 }

 inline int CodedOutputStream::ByteCount() const {
   return total_bytes_ - buffer_size_;
 }

 inline void CodedInputStream::Advance(int amount) {
   buffer_ += amount;
   buffer_size_ -= amount;
 }

 inline void CodedOutputStream::Advance(int amount) {
   buffer_ += amount;
   buffer_size_ -= amount;
 }

 inline void CodedInputStream::SetRecursionLimit(int limit) {
   recursion_limit_ = limit;
 }

 inline bool CodedInputStream::IncrementRecursionDepth() {
   ++recursion_depth_;
   return recursion_depth_ <= recursion_limit_;
 }

 inline void CodedInputStream::DecrementRecursionDepth() {
   if (recursion_depth_ > 0) --recursion_depth_;
 }

 }  // namespace io
 }  // namespace protobuf

 }  // namespace google
 #endif  // GOOGLE_PROTOBUF_IO_CODED_STREAM_H__