blob: dea4b6508dfac83e4bd7cc7489137331da2e3fac [file] [log] [blame]
temporal40ee5512008-07-10 02:12:20 +00001// Protocol Buffers - Google's data interchange format
kenton@google.com24bf56f2008-09-24 20:31:01 +00002// Copyright 2008 Google Inc. All rights reserved.
Feng Xiaoe4288622014-10-01 16:26:23 -07003// https://developers.google.com/protocol-buffers/
temporal40ee5512008-07-10 02:12:20 +00004//
kenton@google.com24bf56f2008-09-24 20:31:01 +00005// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are
7// met:
temporal40ee5512008-07-10 02:12:20 +00008//
kenton@google.com24bf56f2008-09-24 20:31:01 +00009// * Redistributions of source code must retain the above copyright
10// notice, this list of conditions and the following disclaimer.
11// * Redistributions in binary form must reproduce the above
12// copyright notice, this list of conditions and the following disclaimer
13// in the documentation and/or other materials provided with the
14// distribution.
15// * Neither the name of Google Inc. nor the names of its
16// contributors may be used to endorse or promote products derived from
17// this software without specific prior written permission.
temporal40ee5512008-07-10 02:12:20 +000018//
kenton@google.com24bf56f2008-09-24 20:31:01 +000019// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
temporal40ee5512008-07-10 02:12:20 +000030
31// Author: kenton@google.com (Kenton Varda)
32// Based on original Protocol Buffers design by
33// Sanjay Ghemawat, Jeff Dean, and others.
34//
35// This file contains the CodedInputStream and CodedOutputStream classes,
36// which wrap a ZeroCopyInputStream or ZeroCopyOutputStream, respectively,
37// and allow you to read or write individual pieces of data in various
38// formats. In particular, these implement the varint encoding for
39// integers, a simple variable-length encoding in which smaller numbers
40// take fewer bytes.
41//
42// Typically these classes will only be used internally by the protocol
43// buffer library in order to encode and decode protocol buffers. Clients
44// of the library only need to know about this class if they wish to write
45// custom message parsing or serialization procedures.
46//
47// CodedOutputStream example:
48// // Write some data to "myfile". First we write a 4-byte "magic number"
49// // to identify the file type, then write a length-delimited string. The
50// // string is composed of a varint giving the length followed by the raw
51// // bytes.
acidtonic2ba455f2014-11-04 10:10:54 -050052// int fd = open("myfile", O_CREAT | O_WRONLY);
temporal40ee5512008-07-10 02:12:20 +000053// ZeroCopyOutputStream* raw_output = new FileOutputStream(fd);
54// CodedOutputStream* coded_output = new CodedOutputStream(raw_output);
55//
56// int magic_number = 1234;
57// char text[] = "Hello world!";
58// coded_output->WriteLittleEndian32(magic_number);
59// coded_output->WriteVarint32(strlen(text));
60// coded_output->WriteRaw(text, strlen(text));
61//
62// delete coded_output;
63// delete raw_output;
64// close(fd);
65//
66// CodedInputStream example:
67// // Read a file created by the above code.
68// int fd = open("myfile", O_RDONLY);
69// ZeroCopyInputStream* raw_input = new FileInputStream(fd);
70// CodedInputStream coded_input = new CodedInputStream(raw_input);
71//
72// coded_input->ReadLittleEndian32(&magic_number);
73// if (magic_number != 1234) {
74// cerr << "File not in expected format." << endl;
75// return;
76// }
77//
78// uint32 size;
79// coded_input->ReadVarint32(&size);
80//
81// char* text = new char[size + 1];
82// coded_input->ReadRaw(buffer, size);
83// text[size] = '\0';
84//
85// delete coded_input;
86// delete raw_input;
87// close(fd);
88//
89// cout << "Text is: " << text << endl;
90// delete [] text;
91//
92// For those who are interested, varint encoding is defined as follows:
93//
94// The encoding operates on unsigned integers of up to 64 bits in length.
95// Each byte of the encoded value has the format:
96// * bits 0-6: Seven bits of the number being encoded.
97// * bit 7: Zero if this is the last byte in the encoding (in which
98// case all remaining bits of the number are zero) or 1 if
99// more bytes follow.
100// The first byte contains the least-significant 7 bits of the number, the
101// second byte (if present) contains the next-least-significant 7 bits,
102// and so on. So, the binary number 1011000101011 would be encoded in two
103// bytes as "10101011 00101100".
104//
105// In theory, varint could be used to encode integers of any length.
106// However, for practicality we set a limit at 64 bits. The maximum encoded
107// length of a number is thus 10 bytes.
108
109#ifndef GOOGLE_PROTOBUF_IO_CODED_STREAM_H__
110#define GOOGLE_PROTOBUF_IO_CODED_STREAM_H__
111
112#include <string>
Feng Xiao6ef984a2014-11-10 17:34:54 -0800113#include <utility>
kenton@google.comb3f6a152010-04-05 23:19:54 +0000114#ifdef _MSC_VER
115 #if defined(_M_IX86) && \
116 !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST)
117 #define PROTOBUF_LITTLE_ENDIAN 1
118 #endif
119 #if _MSC_VER >= 1300
120 // If MSVC has "/RTCc" set, it will complain about truncating casts at
121 // runtime. This file contains some intentional truncating casts.
122 #pragma runtime_checks("c", off)
123 #endif
124#else
125 #include <sys/param.h> // __BYTE_ORDER
Kal Conleyc3e92802014-12-16 02:30:45 +0100126 #if ((defined(__LITTLE_ENDIAN__) && !defined(__BIG_ENDIAN__)) || \
127 (defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN)) && \
kenton@google.comb3f6a152010-04-05 23:19:54 +0000128 !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST)
129 #define PROTOBUF_LITTLE_ENDIAN 1
130 #endif
131#endif
temporal40ee5512008-07-10 02:12:20 +0000132#include <google/protobuf/stubs/common.h>
133
liujisi@google.com33165fe2010-11-02 13:14:58 +0000134namespace google {
Feng Xiao6ef984a2014-11-10 17:34:54 -0800135
temporal40ee5512008-07-10 02:12:20 +0000136namespace protobuf {
kenton@google.comfccb1462009-12-18 02:11:36 +0000137
138class DescriptorPool;
139class MessageFactory;
140
temporal40ee5512008-07-10 02:12:20 +0000141namespace io {
142
143// Defined in this file.
144class CodedInputStream;
145class CodedOutputStream;
146
147// Defined in other files.
148class ZeroCopyInputStream; // zero_copy_stream.h
149class ZeroCopyOutputStream; // zero_copy_stream.h
150
151// Class which reads and decodes binary data which is composed of varint-
152// encoded integers and fixed-width pieces. Wraps a ZeroCopyInputStream.
153// Most users will not need to deal with CodedInputStream.
154//
155// Most methods of CodedInputStream that return a bool return false if an
156// underlying I/O error occurs or if the data is malformed. Once such a
157// failure occurs, the CodedInputStream is broken and is no longer useful.
158class LIBPROTOBUF_EXPORT CodedInputStream {
159 public:
160 // Create a CodedInputStream that reads from the given ZeroCopyInputStream.
161 explicit CodedInputStream(ZeroCopyInputStream* input);
162
kenton@google.com80b1d622009-07-29 01:13:20 +0000163 // Create a CodedInputStream that reads from the given flat array. This is
164 // faster than using an ArrayInputStream. PushLimit(size) is implied by
165 // this constructor.
166 explicit CodedInputStream(const uint8* buffer, int size);
167
temporal40ee5512008-07-10 02:12:20 +0000168 // Destroy the CodedInputStream and position the underlying
169 // ZeroCopyInputStream at the first unread byte. If an error occurred while
170 // reading (causing a method to return false), then the exact position of
171 // the input stream may be anywhere between the last value that was read
172 // successfully and the stream's byte limit.
173 ~CodedInputStream();
174
xiaofeng@google.comb55a20f2012-09-22 02:40:50 +0000175 // Return true if this CodedInputStream reads from a flat array instead of
176 // a ZeroCopyInputStream.
177 inline bool IsFlat() const;
temporal40ee5512008-07-10 02:12:20 +0000178
179 // Skips a number of bytes. Returns false if an underlying read error
180 // occurs.
181 bool Skip(int count);
182
kenton@google.com2d6daa72009-01-22 01:27:00 +0000183 // Sets *data to point directly at the unread part of the CodedInputStream's
184 // underlying buffer, and *size to the size of that buffer, but does not
185 // advance the stream's current position. This will always either produce
186 // a non-empty buffer or return false. If the caller consumes any of
187 // this data, it should then call Skip() to skip over the consumed bytes.
188 // This may be useful for implementing external fast parsing routines for
189 // types of data not covered by the CodedInputStream interface.
190 bool GetDirectBufferPointer(const void** data, int* size);
191
kenton@google.comfccb1462009-12-18 02:11:36 +0000192 // Like GetDirectBufferPointer, but this method is inlined, and does not
193 // attempt to Refresh() if the buffer is currently empty.
194 inline void GetDirectBufferPointerInline(const void** data,
195 int* size) GOOGLE_ATTRIBUTE_ALWAYS_INLINE;
196
temporal40ee5512008-07-10 02:12:20 +0000197 // Read raw bytes, copying them into the given buffer.
198 bool ReadRaw(void* buffer, int size);
199
200 // Like ReadRaw, but reads into a string.
201 //
202 // Implementation Note: ReadString() grows the string gradually as it
203 // reads in the data, rather than allocating the entire requested size
204 // upfront. This prevents denial-of-service attacks in which a client
205 // could claim that a string is going to be MAX_INT bytes long in order to
206 // crash the server because it can't allocate this much space at once.
207 bool ReadString(string* buffer, int size);
kenton@google.comfccb1462009-12-18 02:11:36 +0000208 // Like the above, with inlined optimizations. This should only be used
209 // by the protobuf implementation.
210 inline bool InternalReadStringInline(string* buffer,
211 int size) GOOGLE_ATTRIBUTE_ALWAYS_INLINE;
temporal40ee5512008-07-10 02:12:20 +0000212
213
214 // Read a 32-bit little-endian integer.
215 bool ReadLittleEndian32(uint32* value);
216 // Read a 64-bit little-endian integer.
217 bool ReadLittleEndian64(uint64* value);
218
kenton@google.comfccb1462009-12-18 02:11:36 +0000219 // These methods read from an externally provided buffer. The caller is
220 // responsible for ensuring that the buffer has sufficient space.
221 // Read a 32-bit little-endian integer.
222 static const uint8* ReadLittleEndian32FromArray(const uint8* buffer,
223 uint32* value);
224 // Read a 64-bit little-endian integer.
225 static const uint8* ReadLittleEndian64FromArray(const uint8* buffer,
226 uint64* value);
227
temporal40ee5512008-07-10 02:12:20 +0000228 // Read an unsigned integer with Varint encoding, truncating to 32 bits.
229 // Reading a 32-bit value is equivalent to reading a 64-bit one and casting
230 // it to uint32, but may be more efficient.
231 bool ReadVarint32(uint32* value);
232 // Read an unsigned integer with Varint encoding.
233 bool ReadVarint64(uint64* value);
234
235 // Read a tag. This calls ReadVarint32() and returns the result, or returns
236 // zero (which is not a valid tag) if ReadVarint32() fails. Also, it updates
237 // the last tag value, which can be checked with LastTagWas().
jieluo@google.com4de8f552014-07-18 00:47:59 +0000238 // Always inline because this is only called in one place per parse loop
temporal40ee5512008-07-10 02:12:20 +0000239 // but it is called for every iteration of said loop, so it should be fast.
240 // GCC doesn't want to inline this by default.
241 uint32 ReadTag() GOOGLE_ATTRIBUTE_ALWAYS_INLINE;
242
jieluo@google.com4de8f552014-07-18 00:47:59 +0000243 // This usually a faster alternative to ReadTag() when cutoff is a manifest
244 // constant. It does particularly well for cutoff >= 127. The first part
245 // of the return value is the tag that was read, though it can also be 0 in
246 // the cases where ReadTag() would return 0. If the second part is true
247 // then the tag is known to be in [0, cutoff]. If not, the tag either is
248 // above cutoff or is 0. (There's intentional wiggle room when tag is 0,
249 // because that can arise in several ways, and for best performance we want
250 // to avoid an extra "is tag == 0?" check here.)
251 inline std::pair<uint32, bool> ReadTagWithCutoff(uint32 cutoff)
252 GOOGLE_ATTRIBUTE_ALWAYS_INLINE;
253
temporal40ee5512008-07-10 02:12:20 +0000254 // Usually returns true if calling ReadVarint32() now would produce the given
255 // value. Will always return false if ReadVarint32() would not return the
256 // given value. If ExpectTag() returns true, it also advances past
257 // the varint. For best performance, use a compile-time constant as the
258 // parameter.
259 // Always inline because this collapses to a small number of instructions
260 // when given a constant parameter, but GCC doesn't want to inline by default.
261 bool ExpectTag(uint32 expected) GOOGLE_ATTRIBUTE_ALWAYS_INLINE;
262
kenton@google.comfccb1462009-12-18 02:11:36 +0000263 // Like above, except this reads from the specified buffer. The caller is
264 // responsible for ensuring that the buffer is large enough to read a varint
265 // of the expected size. For best performance, use a compile-time constant as
266 // the expected tag parameter.
267 //
268 // Returns a pointer beyond the expected tag if it was found, or NULL if it
269 // was not.
270 static const uint8* ExpectTagFromArray(
271 const uint8* buffer,
272 uint32 expected) GOOGLE_ATTRIBUTE_ALWAYS_INLINE;
273
temporal40ee5512008-07-10 02:12:20 +0000274 // Usually returns true if no more bytes can be read. Always returns false
275 // if more bytes can be read. If ExpectAtEnd() returns true, a subsequent
276 // call to LastTagWas() will act as if ReadTag() had been called and returned
277 // zero, and ConsumedEntireMessage() will return true.
278 bool ExpectAtEnd();
279
jieluo@google.com4de8f552014-07-18 00:47:59 +0000280 // If the last call to ReadTag() or ReadTagWithCutoff() returned the
281 // given value, returns true. Otherwise, returns false;
temporal40ee5512008-07-10 02:12:20 +0000282 //
283 // This is needed because parsers for some types of embedded messages
284 // (with field type TYPE_GROUP) don't actually know that they've reached the
285 // end of a message until they see an ENDGROUP tag, which was actually part
286 // of the enclosing message. The enclosing message would like to check that
287 // tag to make sure it had the right number, so it calls LastTagWas() on
288 // return from the embedded parser to check.
289 bool LastTagWas(uint32 expected);
290
291 // When parsing message (but NOT a group), this method must be called
292 // immediately after MergeFromCodedStream() returns (if it returns true)
293 // to further verify that the message ended in a legitimate way. For
294 // example, this verifies that parsing did not end on an end-group tag.
295 // It also checks for some cases where, due to optimizations,
296 // MergeFromCodedStream() can incorrectly return true.
297 bool ConsumedEntireMessage();
298
299 // Limits ----------------------------------------------------------
300 // Limits are used when parsing length-delimited embedded messages.
301 // After the message's length is read, PushLimit() is used to prevent
302 // the CodedInputStream from reading beyond that length. Once the
303 // embedded message has been parsed, PopLimit() is called to undo the
304 // limit.
305
306 // Opaque type used with PushLimit() and PopLimit(). Do not modify
307 // values of this type yourself. The only reason that this isn't a
308 // struct with private internals is for efficiency.
309 typedef int Limit;
310
311 // Places a limit on the number of bytes that the stream may read,
312 // starting from the current position. Once the stream hits this limit,
313 // it will act like the end of the input has been reached until PopLimit()
314 // is called.
315 //
316 // As the names imply, the stream conceptually has a stack of limits. The
317 // shortest limit on the stack is always enforced, even if it is not the
318 // top limit.
319 //
320 // The value returned by PushLimit() is opaque to the caller, and must
321 // be passed unchanged to the corresponding call to PopLimit().
322 Limit PushLimit(int byte_limit);
323
324 // Pops the last limit pushed by PushLimit(). The input must be the value
325 // returned by that call to PushLimit().
326 void PopLimit(Limit limit);
327
328 // Returns the number of bytes left until the nearest limit on the
329 // stack is hit, or -1 if no limits are in place.
xiaofeng@google.comb55a20f2012-09-22 02:40:50 +0000330 int BytesUntilLimit() const;
331
332 // Returns current position relative to the beginning of the input stream.
333 int CurrentPosition() const;
temporal40ee5512008-07-10 02:12:20 +0000334
335 // Total Bytes Limit -----------------------------------------------
336 // To prevent malicious users from sending excessively large messages
337 // and causing integer overflows or memory exhaustion, CodedInputStream
338 // imposes a hard limit on the total number of bytes it will read.
339
340 // Sets the maximum number of bytes that this CodedInputStream will read
341 // before refusing to continue. To prevent integer overflows in the
342 // protocol buffers implementation, as well as to prevent servers from
343 // allocating enormous amounts of memory to hold parsed messages, the
344 // maximum message length should be limited to the shortest length that
345 // will not harm usability. The theoretical shortest message that could
346 // cause integer overflows is 512MB. The default limit is 64MB. Apps
347 // should set shorter limits if possible. If warning_threshold is not -1,
348 // a warning will be printed to stderr after warning_threshold bytes are
jieluo@google.com4de8f552014-07-18 00:47:59 +0000349 // read. For backwards compatibility all negative values get squashed to -1,
xiaofeng@google.comb55a20f2012-09-22 02:40:50 +0000350 // as other negative values might have special internal meanings.
351 // An error will always be printed to stderr if the limit is reached.
temporal40ee5512008-07-10 02:12:20 +0000352 //
353 // This is unrelated to PushLimit()/PopLimit().
354 //
355 // Hint: If you are reading this because your program is printing a
356 // warning about dangerously large protocol messages, you may be
357 // confused about what to do next. The best option is to change your
358 // design such that excessively large messages are not necessary.
359 // For example, try to design file formats to consist of many small
360 // messages rather than a single large one. If this is infeasible,
361 // you will need to increase the limit. Chances are, though, that
362 // your code never constructs a CodedInputStream on which the limit
363 // can be set. You probably parse messages by calling things like
364 // Message::ParseFromString(). In this case, you will need to change
365 // your code to instead construct some sort of ZeroCopyInputStream
366 // (e.g. an ArrayInputStream), construct a CodedInputStream around
367 // that, then call Message::ParseFromCodedStream() instead. Then
368 // you can adjust the limit. Yes, it's more work, but you're doing
369 // something unusual.
370 void SetTotalBytesLimit(int total_bytes_limit, int warning_threshold);
371
jieluo@google.com4de8f552014-07-18 00:47:59 +0000372 // The Total Bytes Limit minus the Current Position, or -1 if there
373 // is no Total Bytes Limit.
374 int BytesUntilTotalBytesLimit() const;
375
temporal40ee5512008-07-10 02:12:20 +0000376 // Recursion Limit -------------------------------------------------
377 // To prevent corrupt or malicious messages from causing stack overflows,
378 // we must keep track of the depth of recursion when parsing embedded
379 // messages and groups. CodedInputStream keeps track of this because it
380 // is the only object that is passed down the stack during parsing.
381
xiaofeng@google.comb55a20f2012-09-22 02:40:50 +0000382 // Sets the maximum recursion depth. The default is 100.
temporal40ee5512008-07-10 02:12:20 +0000383 void SetRecursionLimit(int limit);
384
xiaofeng@google.comb55a20f2012-09-22 02:40:50 +0000385
temporal40ee5512008-07-10 02:12:20 +0000386 // Increments the current recursion depth. Returns true if the depth is
387 // under the limit, false if it has gone over.
388 bool IncrementRecursionDepth();
389
390 // Decrements the recursion depth.
391 void DecrementRecursionDepth();
392
Feng Xiao6ef984a2014-11-10 17:34:54 -0800393 // Shorthand for make_pair(PushLimit(byte_limit), --recursion_budget_).
394 // Using this can reduce code size and complexity in some cases. The caller
395 // is expected to check that the second part of the result is non-negative (to
396 // bail out if the depth of recursion is too high) and, if all is well, to
397 // later pass the first part of the result to PopLimit() or similar.
398 std::pair<CodedInputStream::Limit, int> IncrementRecursionDepthAndPushLimit(
399 int byte_limit);
400
401 // Helper that is equivalent to: {
402 // bool result = ConsumedEntireMessage();
403 // PopLimit(limit);
404 // DecrementRecursionDepth();
405 // return result; }
406 // Using this can reduce code size and complexity in some cases.
407 // Do not use unless the current recursion depth is greater than zero.
408 bool DecrementRecursionDepthAndPopLimit(Limit limit);
409
kenton@google.comfccb1462009-12-18 02:11:36 +0000410 // Extension Registry ----------------------------------------------
411 // ADVANCED USAGE: 99.9% of people can ignore this section.
412 //
413 // By default, when parsing extensions, the parser looks for extension
414 // definitions in the pool which owns the outer message's Descriptor.
415 // However, you may call SetExtensionRegistry() to provide an alternative
416 // pool instead. This makes it possible, for example, to parse a message
417 // using a generated class, but represent some extensions using
418 // DynamicMessage.
419
420 // Set the pool used to look up extensions. Most users do not need to call
421 // this as the correct pool will be chosen automatically.
422 //
423 // WARNING: It is very easy to misuse this. Carefully read the requirements
424 // below. Do not use this unless you are sure you need it. Almost no one
425 // does.
426 //
427 // Let's say you are parsing a message into message object m, and you want
428 // to take advantage of SetExtensionRegistry(). You must follow these
429 // requirements:
430 //
431 // The given DescriptorPool must contain m->GetDescriptor(). It is not
432 // sufficient for it to simply contain a descriptor that has the same name
433 // and content -- it must be the *exact object*. In other words:
434 // assert(pool->FindMessageTypeByName(m->GetDescriptor()->full_name()) ==
435 // m->GetDescriptor());
436 // There are two ways to satisfy this requirement:
437 // 1) Use m->GetDescriptor()->pool() as the pool. This is generally useless
438 // because this is the pool that would be used anyway if you didn't call
439 // SetExtensionRegistry() at all.
440 // 2) Use a DescriptorPool which has m->GetDescriptor()->pool() as an
441 // "underlay". Read the documentation for DescriptorPool for more
442 // information about underlays.
443 //
444 // You must also provide a MessageFactory. This factory will be used to
445 // construct Message objects representing extensions. The factory's
446 // GetPrototype() MUST return non-NULL for any Descriptor which can be found
447 // through the provided pool.
448 //
449 // If the provided factory might return instances of protocol-compiler-
450 // generated (i.e. compiled-in) types, or if the outer message object m is
451 // a generated type, then the given factory MUST have this property: If
452 // GetPrototype() is given a Descriptor which resides in
453 // DescriptorPool::generated_pool(), the factory MUST return the same
454 // prototype which MessageFactory::generated_factory() would return. That
455 // is, given a descriptor for a generated type, the factory must return an
456 // instance of the generated class (NOT DynamicMessage). However, when
457 // given a descriptor for a type that is NOT in generated_pool, the factory
458 // is free to return any implementation.
459 //
460 // The reason for this requirement is that generated sub-objects may be
461 // accessed via the standard (non-reflection) extension accessor methods,
462 // and these methods will down-cast the object to the generated class type.
463 // If the object is not actually of that type, the results would be undefined.
464 // On the other hand, if an extension is not compiled in, then there is no
465 // way the code could end up accessing it via the standard accessors -- the
466 // only way to access the extension is via reflection. When using reflection,
467 // DynamicMessage and generated messages are indistinguishable, so it's fine
468 // if these objects are represented using DynamicMessage.
469 //
470 // Using DynamicMessageFactory on which you have called
471 // SetDelegateToGeneratedFactory(true) should be sufficient to satisfy the
472 // above requirement.
473 //
474 // If either pool or factory is NULL, both must be NULL.
475 //
476 // Note that this feature is ignored when parsing "lite" messages as they do
477 // not have descriptors.
xiaofeng@google.comb55a20f2012-09-22 02:40:50 +0000478 void SetExtensionRegistry(const DescriptorPool* pool,
479 MessageFactory* factory);
kenton@google.comfccb1462009-12-18 02:11:36 +0000480
481 // Get the DescriptorPool set via SetExtensionRegistry(), or NULL if no pool
482 // has been provided.
483 const DescriptorPool* GetExtensionPool();
484
485 // Get the MessageFactory set via SetExtensionRegistry(), or NULL if no
486 // factory has been provided.
487 MessageFactory* GetExtensionFactory();
488
temporal40ee5512008-07-10 02:12:20 +0000489 private:
490 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CodedInputStream);
491
temporal40ee5512008-07-10 02:12:20 +0000492 const uint8* buffer_;
kenton@google.comfccb1462009-12-18 02:11:36 +0000493 const uint8* buffer_end_; // pointer to the end of the buffer.
Feng Xiao6ef984a2014-11-10 17:34:54 -0800494 ZeroCopyInputStream* input_;
temporal40ee5512008-07-10 02:12:20 +0000495 int total_bytes_read_; // total bytes read from input_, including
496 // the current buffer
497
498 // If total_bytes_read_ surpasses INT_MAX, we record the extra bytes here
499 // so that we can BackUp() on destruction.
500 int overflow_bytes_;
501
502 // LastTagWas() stuff.
jieluo@google.com4de8f552014-07-18 00:47:59 +0000503 uint32 last_tag_; // result of last ReadTag() or ReadTagWithCutoff().
temporal40ee5512008-07-10 02:12:20 +0000504
kenton@google.comfccb1462009-12-18 02:11:36 +0000505 // This is set true by ReadTag{Fallback/Slow}() if it is called when exactly
temporal40ee5512008-07-10 02:12:20 +0000506 // at EOF, or by ExpectAtEnd() when it returns true. This happens when we
507 // reach the end of a message and attempt to read another tag.
508 bool legitimate_message_end_;
509
510 // See EnableAliasing().
511 bool aliasing_enabled_;
512
513 // Limits
514 Limit current_limit_; // if position = -1, no limit is applied
515
516 // For simplicity, if the current buffer crosses a limit (either a normal
517 // limit created by PushLimit() or the total bytes limit), buffer_size_
518 // only tracks the number of bytes before that limit. This field
519 // contains the number of bytes after it. Note that this implies that if
520 // buffer_size_ == 0 and buffer_size_after_limit_ > 0, we know we've
521 // hit a limit. However, if both are zero, it doesn't necessarily mean
522 // we aren't at a limit -- the buffer may have ended exactly at the limit.
523 int buffer_size_after_limit_;
524
525 // Maximum number of bytes to read, period. This is unrelated to
526 // current_limit_. Set using SetTotalBytesLimit().
527 int total_bytes_limit_;
xiaofeng@google.comb55a20f2012-09-22 02:40:50 +0000528
529 // If positive/0: Limit for bytes read after which a warning due to size
530 // should be logged.
531 // If -1: Printing of warning disabled. Can be set by client.
532 // If -2: Internal: Limit has been reached, print full size when destructing.
temporal40ee5512008-07-10 02:12:20 +0000533 int total_bytes_warning_threshold_;
534
Feng Xiao6ef984a2014-11-10 17:34:54 -0800535 // Current recursion budget, controlled by IncrementRecursionDepth() and
536 // similar. Starts at recursion_limit_ and goes down: if this reaches
537 // -1 we are over budget.
538 int recursion_budget_;
temporal40ee5512008-07-10 02:12:20 +0000539 // Recursion depth limit, set by SetRecursionLimit().
540 int recursion_limit_;
541
kenton@google.comfccb1462009-12-18 02:11:36 +0000542 // See SetExtensionRegistry().
543 const DescriptorPool* extension_pool_;
544 MessageFactory* extension_factory_;
545
546 // Private member functions.
547
temporal40ee5512008-07-10 02:12:20 +0000548 // Advance the buffer by a given number of bytes.
549 void Advance(int amount);
550
kenton@google.com80b1d622009-07-29 01:13:20 +0000551 // Back up input_ to the current buffer position.
552 void BackUpInputToCurrentPosition();
553
temporal40ee5512008-07-10 02:12:20 +0000554 // Recomputes the value of buffer_size_after_limit_. Must be called after
555 // current_limit_ or total_bytes_limit_ changes.
556 void RecomputeBufferLimits();
557
558 // Writes an error message saying that we hit total_bytes_limit_.
559 void PrintTotalBytesLimitError();
560
561 // Called when the buffer runs out to request more data. Implies an
kenton@google.comfccb1462009-12-18 02:11:36 +0000562 // Advance(BufferSize()).
temporal40ee5512008-07-10 02:12:20 +0000563 bool Refresh();
564
kenton@google.comfccb1462009-12-18 02:11:36 +0000565 // When parsing varints, we optimize for the common case of small values, and
566 // then optimize for the case when the varint fits within the current buffer
567 // piece. The Fallback method is used when we can't use the one-byte
568 // optimization. The Slow method is yet another fallback when the buffer is
569 // not large enough. Making the slow path out-of-line speeds up the common
570 // case by 10-15%. The slow path is fairly uncommon: it only triggers when a
571 // message crosses multiple buffers.
temporal40ee5512008-07-10 02:12:20 +0000572 bool ReadVarint32Fallback(uint32* value);
kenton@google.comfccb1462009-12-18 02:11:36 +0000573 bool ReadVarint64Fallback(uint64* value);
574 bool ReadVarint32Slow(uint32* value);
575 bool ReadVarint64Slow(uint64* value);
576 bool ReadLittleEndian32Fallback(uint32* value);
577 bool ReadLittleEndian64Fallback(uint64* value);
578 // Fallback/slow methods for reading tags. These do not update last_tag_,
579 // but will set legitimate_message_end_ if we are at the end of the input
580 // stream.
581 uint32 ReadTagFallback();
582 uint32 ReadTagSlow();
583 bool ReadStringFallback(string* buffer, int size);
584
585 // Return the size of the buffer.
586 int BufferSize() const;
587
588 static const int kDefaultTotalBytesLimit = 64 << 20; // 64MB
589
590 static const int kDefaultTotalBytesWarningThreshold = 32 << 20; // 32MB
xiaofeng@google.comb55a20f2012-09-22 02:40:50 +0000591
592 static int default_recursion_limit_; // 100 by default.
temporal40ee5512008-07-10 02:12:20 +0000593};
594
595// Class which encodes and writes binary data which is composed of varint-
596// encoded integers and fixed-width pieces. Wraps a ZeroCopyOutputStream.
597// Most users will not need to deal with CodedOutputStream.
598//
599// Most methods of CodedOutputStream which return a bool return false if an
600// underlying I/O error occurs. Once such a failure occurs, the
kenton@google.comd37d46d2009-04-25 02:53:47 +0000601// CodedOutputStream is broken and is no longer useful. The Write* methods do
602// not return the stream status, but will invalidate the stream if an error
603// occurs. The client can probe HadError() to determine the status.
604//
605// Note that every method of CodedOutputStream which writes some data has
606// a corresponding static "ToArray" version. These versions write directly
607// to the provided buffer, returning a pointer past the last written byte.
608// They require that the buffer has sufficient capacity for the encoded data.
609// This allows an optimization where we check if an output stream has enough
610// space for an entire message before we start writing and, if there is, we
611// call only the ToArray methods to avoid doing bound checks for each
612// individual value.
613// i.e., in the example above:
614//
615// CodedOutputStream coded_output = new CodedOutputStream(raw_output);
616// int magic_number = 1234;
617// char text[] = "Hello world!";
618//
619// int coded_size = sizeof(magic_number) +
liujisi@google.com5d8d2b02010-12-06 06:20:14 +0000620// CodedOutputStream::VarintSize32(strlen(text)) +
kenton@google.comd37d46d2009-04-25 02:53:47 +0000621// strlen(text);
622//
623// uint8* buffer =
624// coded_output->GetDirectBufferForNBytesAndAdvance(coded_size);
625// if (buffer != NULL) {
626// // The output stream has enough space in the buffer: write directly to
627// // the array.
628// buffer = CodedOutputStream::WriteLittleEndian32ToArray(magic_number,
629// buffer);
630// buffer = CodedOutputStream::WriteVarint32ToArray(strlen(text), buffer);
631// buffer = CodedOutputStream::WriteRawToArray(text, strlen(text), buffer);
632// } else {
633// // Make bound-checked writes, which will ask the underlying stream for
634// // more space as needed.
635// coded_output->WriteLittleEndian32(magic_number);
636// coded_output->WriteVarint32(strlen(text));
637// coded_output->WriteRaw(text, strlen(text));
638// }
639//
640// delete coded_output;
temporal40ee5512008-07-10 02:12:20 +0000641class LIBPROTOBUF_EXPORT CodedOutputStream {
642 public:
643 // Create an CodedOutputStream that writes to the given ZeroCopyOutputStream.
644 explicit CodedOutputStream(ZeroCopyOutputStream* output);
645
646 // Destroy the CodedOutputStream and position the underlying
647 // ZeroCopyOutputStream immediately after the last byte written.
648 ~CodedOutputStream();
649
Jisi Liu885b6122015-02-28 14:51:22 -0800650 // Trims any unused space in the underlying buffer so that its size matches
651 // the number of bytes written by this stream. The underlying buffer will
652 // automatically be trimmed when this stream is destroyed; this call is only
653 // necessary if the underlying buffer is accessed *before* the stream is
654 // destroyed.
655 void Trim();
656
kenton@google.com2d6daa72009-01-22 01:27:00 +0000657 // Skips a number of bytes, leaving the bytes unmodified in the underlying
658 // buffer. Returns false if an underlying write error occurs. This is
659 // mainly useful with GetDirectBufferPointer().
660 bool Skip(int count);
661
662 // Sets *data to point directly at the unwritten part of the
663 // CodedOutputStream's underlying buffer, and *size to the size of that
664 // buffer, but does not advance the stream's current position. This will
665 // always either produce a non-empty buffer or return false. If the caller
666 // writes any data to this buffer, it should then call Skip() to skip over
667 // the consumed bytes. This may be useful for implementing external fast
668 // serialization routines for types of data not covered by the
669 // CodedOutputStream interface.
670 bool GetDirectBufferPointer(void** data, int* size);
671
kenton@google.comd37d46d2009-04-25 02:53:47 +0000672 // If there are at least "size" bytes available in the current buffer,
673 // returns a pointer directly into the buffer and advances over these bytes.
674 // The caller may then write directly into this buffer (e.g. using the
675 // *ToArray static methods) rather than go through CodedOutputStream. If
676 // there are not enough bytes available, returns NULL. The return pointer is
677 // invalidated as soon as any other non-const method of CodedOutputStream
678 // is called.
679 inline uint8* GetDirectBufferForNBytesAndAdvance(int size);
680
temporal40ee5512008-07-10 02:12:20 +0000681 // Write raw bytes, copying them from the given buffer.
kenton@google.comd37d46d2009-04-25 02:53:47 +0000682 void WriteRaw(const void* buffer, int size);
jieluo@google.com4de8f552014-07-18 00:47:59 +0000683 // Like WriteRaw() but will try to write aliased data if aliasing is
684 // turned on.
685 void WriteRawMaybeAliased(const void* data, int size);
kenton@google.comd37d46d2009-04-25 02:53:47 +0000686 // Like WriteRaw() but writing directly to the target array.
687 // This is _not_ inlined, as the compiler often optimizes memcpy into inline
688 // copy loops. Since this gets called by every field with string or bytes
689 // type, inlining may lead to a significant amount of code bloat, with only a
690 // minor performance gain.
691 static uint8* WriteRawToArray(const void* buffer, int size, uint8* target);
temporal40ee5512008-07-10 02:12:20 +0000692
693 // Equivalent to WriteRaw(str.data(), str.size()).
kenton@google.comd37d46d2009-04-25 02:53:47 +0000694 void WriteString(const string& str);
695 // Like WriteString() but writing directly to the target array.
696 static uint8* WriteStringToArray(const string& str, uint8* target);
jieluo@google.com4de8f552014-07-18 00:47:59 +0000697 // Write the varint-encoded size of str followed by str.
698 static uint8* WriteStringWithSizeToArray(const string& str, uint8* target);
temporal40ee5512008-07-10 02:12:20 +0000699
700
jieluo@google.com4de8f552014-07-18 00:47:59 +0000701 // Instructs the CodedOutputStream to allow the underlying
702 // ZeroCopyOutputStream to hold pointers to the original structure instead of
703 // copying, if it supports it (i.e. output->AllowsAliasing() is true). If the
704 // underlying stream does not support aliasing, then enabling it has no
705 // affect. For now, this only affects the behavior of
706 // WriteRawMaybeAliased().
707 //
708 // NOTE: It is caller's responsibility to ensure that the chunk of memory
709 // remains live until all of the data has been consumed from the stream.
710 void EnableAliasing(bool enabled);
711
temporal40ee5512008-07-10 02:12:20 +0000712 // Write a 32-bit little-endian integer.
kenton@google.comd37d46d2009-04-25 02:53:47 +0000713 void WriteLittleEndian32(uint32 value);
714 // Like WriteLittleEndian32() but writing directly to the target array.
715 static uint8* WriteLittleEndian32ToArray(uint32 value, uint8* target);
temporal40ee5512008-07-10 02:12:20 +0000716 // Write a 64-bit little-endian integer.
kenton@google.comd37d46d2009-04-25 02:53:47 +0000717 void WriteLittleEndian64(uint64 value);
718 // Like WriteLittleEndian64() but writing directly to the target array.
719 static uint8* WriteLittleEndian64ToArray(uint64 value, uint8* target);
temporal40ee5512008-07-10 02:12:20 +0000720
721 // Write an unsigned integer with Varint encoding. Writing a 32-bit value
722 // is equivalent to casting it to uint64 and writing it as a 64-bit value,
723 // but may be more efficient.
kenton@google.comd37d46d2009-04-25 02:53:47 +0000724 void WriteVarint32(uint32 value);
725 // Like WriteVarint32() but writing directly to the target array.
726 static uint8* WriteVarint32ToArray(uint32 value, uint8* target);
temporal40ee5512008-07-10 02:12:20 +0000727 // Write an unsigned integer with Varint encoding.
kenton@google.comd37d46d2009-04-25 02:53:47 +0000728 void WriteVarint64(uint64 value);
729 // Like WriteVarint64() but writing directly to the target array.
730 static uint8* WriteVarint64ToArray(uint64 value, uint8* target);
temporal40ee5512008-07-10 02:12:20 +0000731
732 // Equivalent to WriteVarint32() except when the value is negative,
733 // in which case it must be sign-extended to a full 10 bytes.
kenton@google.comd37d46d2009-04-25 02:53:47 +0000734 void WriteVarint32SignExtended(int32 value);
735 // Like WriteVarint32SignExtended() but writing directly to the target array.
736 static uint8* WriteVarint32SignExtendedToArray(int32 value, uint8* target);
temporal40ee5512008-07-10 02:12:20 +0000737
738 // This is identical to WriteVarint32(), but optimized for writing tags.
739 // In particular, if the input is a compile-time constant, this method
740 // compiles down to a couple instructions.
741 // Always inline because otherwise the aformentioned optimization can't work,
742 // but GCC by default doesn't want to inline this.
kenton@google.comd37d46d2009-04-25 02:53:47 +0000743 void WriteTag(uint32 value);
744 // Like WriteTag() but writing directly to the target array.
745 static uint8* WriteTagToArray(
746 uint32 value, uint8* target) GOOGLE_ATTRIBUTE_ALWAYS_INLINE;
temporal40ee5512008-07-10 02:12:20 +0000747
748 // Returns the number of bytes needed to encode the given value as a varint.
749 static int VarintSize32(uint32 value);
750 // Returns the number of bytes needed to encode the given value as a varint.
751 static int VarintSize64(uint64 value);
752
753 // If negative, 10 bytes. Otheriwse, same as VarintSize32().
754 static int VarintSize32SignExtended(int32 value);
755
pliard@google.com324779a2012-03-02 14:00:20 +0000756 // Compile-time equivalent of VarintSize32().
757 template <uint32 Value>
758 struct StaticVarintSize32 {
759 static const int value =
760 (Value < (1 << 7))
761 ? 1
762 : (Value < (1 << 14))
763 ? 2
764 : (Value < (1 << 21))
765 ? 3
766 : (Value < (1 << 28))
767 ? 4
768 : 5;
769 };
770
temporal40ee5512008-07-10 02:12:20 +0000771 // Returns the total number of bytes written since this object was created.
772 inline int ByteCount() const;
773
kenton@google.comd37d46d2009-04-25 02:53:47 +0000774 // Returns true if there was an underlying I/O error since this object was
775 // created.
776 bool HadError() const { return had_error_; }
777
temporal40ee5512008-07-10 02:12:20 +0000778 private:
779 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CodedOutputStream);
780
781 ZeroCopyOutputStream* output_;
782 uint8* buffer_;
783 int buffer_size_;
784 int total_bytes_; // Sum of sizes of all buffers seen so far.
kenton@google.comd37d46d2009-04-25 02:53:47 +0000785 bool had_error_; // Whether an error occurred during output.
jieluo@google.com4de8f552014-07-18 00:47:59 +0000786 bool aliasing_enabled_; // See EnableAliasing().
temporal40ee5512008-07-10 02:12:20 +0000787
788 // Advance the buffer by a given number of bytes.
789 void Advance(int amount);
790
791 // Called when the buffer runs out to request more data. Implies an
792 // Advance(buffer_size_).
793 bool Refresh();
794
jieluo@google.com4de8f552014-07-18 00:47:59 +0000795 // Like WriteRaw() but may avoid copying if the underlying
796 // ZeroCopyOutputStream supports it.
797 void WriteAliasedRaw(const void* buffer, int size);
798
Jisi Liu885b6122015-02-28 14:51:22 -0800799 // If this write might cross the end of the buffer, we compose the bytes first
800 // then use WriteRaw().
801 void WriteVarint32SlowPath(uint32 value);
kenton@google.comd37d46d2009-04-25 02:53:47 +0000802
803 // Always-inlined versions of WriteVarint* functions so that code can be
804 // reused, while still controlling size. For instance, WriteVarint32ToArray()
805 // should not directly call this: since it is inlined itself, doing so
806 // would greatly increase the size of generated code. Instead, it should call
807 // WriteVarint32FallbackToArray. Meanwhile, WriteVarint32() is already
808 // out-of-line, so it should just invoke this directly to avoid any extra
809 // function call overhead.
kenton@google.comd37d46d2009-04-25 02:53:47 +0000810 static uint8* WriteVarint64ToArrayInline(
811 uint64 value, uint8* target) GOOGLE_ATTRIBUTE_ALWAYS_INLINE;
812
temporal40ee5512008-07-10 02:12:20 +0000813 static int VarintSize32Fallback(uint32 value);
814};
815
816// inline methods ====================================================
817// The vast majority of varints are only one byte. These inline
818// methods optimize for that case.
819
820inline bool CodedInputStream::ReadVarint32(uint32* value) {
kenton@google.comfccb1462009-12-18 02:11:36 +0000821 if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_) && *buffer_ < 0x80) {
temporal40ee5512008-07-10 02:12:20 +0000822 *value = *buffer_;
823 Advance(1);
824 return true;
825 } else {
826 return ReadVarint32Fallback(value);
827 }
828}
829
kenton@google.comfccb1462009-12-18 02:11:36 +0000830inline bool CodedInputStream::ReadVarint64(uint64* value) {
831 if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_) && *buffer_ < 0x80) {
832 *value = *buffer_;
833 Advance(1);
834 return true;
835 } else {
836 return ReadVarint64Fallback(value);
837 }
838}
839
840// static
841inline const uint8* CodedInputStream::ReadLittleEndian32FromArray(
842 const uint8* buffer,
843 uint32* value) {
kenton@google.comb3f6a152010-04-05 23:19:54 +0000844#if defined(PROTOBUF_LITTLE_ENDIAN)
kenton@google.comfccb1462009-12-18 02:11:36 +0000845 memcpy(value, buffer, sizeof(*value));
846 return buffer + sizeof(*value);
847#else
848 *value = (static_cast<uint32>(buffer[0]) ) |
849 (static_cast<uint32>(buffer[1]) << 8) |
850 (static_cast<uint32>(buffer[2]) << 16) |
851 (static_cast<uint32>(buffer[3]) << 24);
852 return buffer + sizeof(*value);
853#endif
854}
855// static
856inline const uint8* CodedInputStream::ReadLittleEndian64FromArray(
857 const uint8* buffer,
858 uint64* value) {
kenton@google.comb3f6a152010-04-05 23:19:54 +0000859#if defined(PROTOBUF_LITTLE_ENDIAN)
kenton@google.comfccb1462009-12-18 02:11:36 +0000860 memcpy(value, buffer, sizeof(*value));
861 return buffer + sizeof(*value);
862#else
863 uint32 part0 = (static_cast<uint32>(buffer[0]) ) |
864 (static_cast<uint32>(buffer[1]) << 8) |
865 (static_cast<uint32>(buffer[2]) << 16) |
866 (static_cast<uint32>(buffer[3]) << 24);
867 uint32 part1 = (static_cast<uint32>(buffer[4]) ) |
868 (static_cast<uint32>(buffer[5]) << 8) |
869 (static_cast<uint32>(buffer[6]) << 16) |
870 (static_cast<uint32>(buffer[7]) << 24);
871 *value = static_cast<uint64>(part0) |
872 (static_cast<uint64>(part1) << 32);
873 return buffer + sizeof(*value);
874#endif
875}
876
877inline bool CodedInputStream::ReadLittleEndian32(uint32* value) {
liujisi@google.com0acafda2010-12-01 04:13:50 +0000878#if defined(PROTOBUF_LITTLE_ENDIAN)
kenton@google.com21138402010-01-11 18:38:22 +0000879 if (GOOGLE_PREDICT_TRUE(BufferSize() >= static_cast<int>(sizeof(*value)))) {
kenton@google.comfccb1462009-12-18 02:11:36 +0000880 memcpy(value, buffer_, sizeof(*value));
881 Advance(sizeof(*value));
882 return true;
883 } else {
884 return ReadLittleEndian32Fallback(value);
885 }
886#else
887 return ReadLittleEndian32Fallback(value);
888#endif
889}
890
891inline bool CodedInputStream::ReadLittleEndian64(uint64* value) {
liujisi@google.com0acafda2010-12-01 04:13:50 +0000892#if defined(PROTOBUF_LITTLE_ENDIAN)
kenton@google.com21138402010-01-11 18:38:22 +0000893 if (GOOGLE_PREDICT_TRUE(BufferSize() >= static_cast<int>(sizeof(*value)))) {
kenton@google.comfccb1462009-12-18 02:11:36 +0000894 memcpy(value, buffer_, sizeof(*value));
895 Advance(sizeof(*value));
896 return true;
897 } else {
898 return ReadLittleEndian64Fallback(value);
899 }
900#else
901 return ReadLittleEndian64Fallback(value);
902#endif
903}
904
temporal40ee5512008-07-10 02:12:20 +0000905inline uint32 CodedInputStream::ReadTag() {
kenton@google.comfccb1462009-12-18 02:11:36 +0000906 if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_) && buffer_[0] < 0x80) {
temporal40ee5512008-07-10 02:12:20 +0000907 last_tag_ = buffer_[0];
908 Advance(1);
909 return last_tag_;
temporal40ee5512008-07-10 02:12:20 +0000910 } else {
kenton@google.comfccb1462009-12-18 02:11:36 +0000911 last_tag_ = ReadTagFallback();
912 return last_tag_;
temporal40ee5512008-07-10 02:12:20 +0000913 }
914}
915
jieluo@google.com4de8f552014-07-18 00:47:59 +0000916inline std::pair<uint32, bool> CodedInputStream::ReadTagWithCutoff(
917 uint32 cutoff) {
918 // In performance-sensitive code we can expect cutoff to be a compile-time
919 // constant, and things like "cutoff >= kMax1ByteVarint" to be evaluated at
920 // compile time.
921 if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_)) {
922 // Hot case: buffer_ non_empty, buffer_[0] in [1, 128).
923 // TODO(gpike): Is it worth rearranging this? E.g., if the number of fields
924 // is large enough then is it better to check for the two-byte case first?
925 if (static_cast<int8>(buffer_[0]) > 0) {
926 const uint32 kMax1ByteVarint = 0x7f;
927 uint32 tag = last_tag_ = buffer_[0];
928 Advance(1);
Jisi Liu885b6122015-02-28 14:51:22 -0800929 return std::make_pair(tag, cutoff >= kMax1ByteVarint || tag <= cutoff);
jieluo@google.com4de8f552014-07-18 00:47:59 +0000930 }
931 // Other hot case: cutoff >= 0x80, buffer_ has at least two bytes available,
932 // and tag is two bytes. The latter is tested by bitwise-and-not of the
933 // first byte and the second byte.
934 if (cutoff >= 0x80 &&
935 GOOGLE_PREDICT_TRUE(buffer_ + 1 < buffer_end_) &&
936 GOOGLE_PREDICT_TRUE((buffer_[0] & ~buffer_[1]) >= 0x80)) {
937 const uint32 kMax2ByteVarint = (0x7f << 7) + 0x7f;
938 uint32 tag = last_tag_ = (1u << 7) * buffer_[1] + (buffer_[0] - 0x80);
939 Advance(2);
940 // It might make sense to test for tag == 0 now, but it is so rare that
941 // that we don't bother. A varint-encoded 0 should be one byte unless
942 // the encoder lost its mind. The second part of the return value of
943 // this function is allowed to be either true or false if the tag is 0,
944 // so we don't have to check for tag == 0. We may need to check whether
945 // it exceeds cutoff.
946 bool at_or_below_cutoff = cutoff >= kMax2ByteVarint || tag <= cutoff;
Jisi Liu885b6122015-02-28 14:51:22 -0800947 return std::make_pair(tag, at_or_below_cutoff);
jieluo@google.com4de8f552014-07-18 00:47:59 +0000948 }
949 }
950 // Slow path
951 last_tag_ = ReadTagFallback();
Jisi Liu885b6122015-02-28 14:51:22 -0800952 return std::make_pair(last_tag_, static_cast<uint32>(last_tag_ - 1) < cutoff);
jieluo@google.com4de8f552014-07-18 00:47:59 +0000953}
954
temporal40ee5512008-07-10 02:12:20 +0000955inline bool CodedInputStream::LastTagWas(uint32 expected) {
956 return last_tag_ == expected;
957}
958
959inline bool CodedInputStream::ConsumedEntireMessage() {
960 return legitimate_message_end_;
961}
962
963inline bool CodedInputStream::ExpectTag(uint32 expected) {
964 if (expected < (1 << 7)) {
kenton@google.comfccb1462009-12-18 02:11:36 +0000965 if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_) && buffer_[0] == expected) {
temporal40ee5512008-07-10 02:12:20 +0000966 Advance(1);
967 return true;
968 } else {
969 return false;
970 }
971 } else if (expected < (1 << 14)) {
kenton@google.comfccb1462009-12-18 02:11:36 +0000972 if (GOOGLE_PREDICT_TRUE(BufferSize() >= 2) &&
temporal40ee5512008-07-10 02:12:20 +0000973 buffer_[0] == static_cast<uint8>(expected | 0x80) &&
974 buffer_[1] == static_cast<uint8>(expected >> 7)) {
975 Advance(2);
976 return true;
977 } else {
978 return false;
979 }
980 } else {
981 // Don't bother optimizing for larger values.
982 return false;
983 }
984}
985
kenton@google.comfccb1462009-12-18 02:11:36 +0000986inline const uint8* CodedInputStream::ExpectTagFromArray(
987 const uint8* buffer, uint32 expected) {
988 if (expected < (1 << 7)) {
989 if (buffer[0] == expected) {
990 return buffer + 1;
991 }
992 } else if (expected < (1 << 14)) {
993 if (buffer[0] == static_cast<uint8>(expected | 0x80) &&
994 buffer[1] == static_cast<uint8>(expected >> 7)) {
995 return buffer + 2;
996 }
997 }
998 return NULL;
999}
1000
1001inline void CodedInputStream::GetDirectBufferPointerInline(const void** data,
1002 int* size) {
1003 *data = buffer_;
1004 *size = buffer_end_ - buffer_;
1005}
1006
temporal40ee5512008-07-10 02:12:20 +00001007inline bool CodedInputStream::ExpectAtEnd() {
1008 // If we are at a limit we know no more bytes can be read. Otherwise, it's
1009 // hard to say without calling Refresh(), and we'd rather not do that.
1010
xiaofeng@google.comb55a20f2012-09-22 02:40:50 +00001011 if (buffer_ == buffer_end_ &&
1012 ((buffer_size_after_limit_ != 0) ||
1013 (total_bytes_read_ == current_limit_))) {
temporal40ee5512008-07-10 02:12:20 +00001014 last_tag_ = 0; // Pretend we called ReadTag()...
1015 legitimate_message_end_ = true; // ... and it hit EOF.
1016 return true;
1017 } else {
1018 return false;
1019 }
1020}
1021
xiaofeng@google.comb55a20f2012-09-22 02:40:50 +00001022inline int CodedInputStream::CurrentPosition() const {
1023 return total_bytes_read_ - (BufferSize() + buffer_size_after_limit_);
1024}
1025
kenton@google.comd37d46d2009-04-25 02:53:47 +00001026inline uint8* CodedOutputStream::GetDirectBufferForNBytesAndAdvance(int size) {
1027 if (buffer_size_ < size) {
1028 return NULL;
temporal40ee5512008-07-10 02:12:20 +00001029 } else {
kenton@google.comd37d46d2009-04-25 02:53:47 +00001030 uint8* result = buffer_;
1031 Advance(size);
1032 return result;
temporal40ee5512008-07-10 02:12:20 +00001033 }
1034}
1035
kenton@google.comd37d46d2009-04-25 02:53:47 +00001036inline uint8* CodedOutputStream::WriteVarint32ToArray(uint32 value,
Jisi Liu885b6122015-02-28 14:51:22 -08001037 uint8* target) {
1038 while (value >= 0x80) {
1039 *target = static_cast<uint8>(value | 0x80);
1040 value >>= 7;
1041 ++target;
kenton@google.comd37d46d2009-04-25 02:53:47 +00001042 }
Jisi Liu885b6122015-02-28 14:51:22 -08001043 *target = static_cast<uint8>(value);
1044 return target + 1;
kenton@google.comd37d46d2009-04-25 02:53:47 +00001045}
1046
1047inline void CodedOutputStream::WriteVarint32SignExtended(int32 value) {
temporal40ee5512008-07-10 02:12:20 +00001048 if (value < 0) {
kenton@google.comd37d46d2009-04-25 02:53:47 +00001049 WriteVarint64(static_cast<uint64>(value));
temporal40ee5512008-07-10 02:12:20 +00001050 } else {
kenton@google.comd37d46d2009-04-25 02:53:47 +00001051 WriteVarint32(static_cast<uint32>(value));
temporal40ee5512008-07-10 02:12:20 +00001052 }
1053}
1054
kenton@google.comd37d46d2009-04-25 02:53:47 +00001055inline uint8* CodedOutputStream::WriteVarint32SignExtendedToArray(
1056 int32 value, uint8* target) {
1057 if (value < 0) {
1058 return WriteVarint64ToArray(static_cast<uint64>(value), target);
1059 } else {
1060 return WriteVarint32ToArray(static_cast<uint32>(value), target);
temporal40ee5512008-07-10 02:12:20 +00001061 }
kenton@google.comd37d46d2009-04-25 02:53:47 +00001062}
1063
kenton@google.com80b1d622009-07-29 01:13:20 +00001064inline uint8* CodedOutputStream::WriteLittleEndian32ToArray(uint32 value,
1065 uint8* target) {
kenton@google.comb3f6a152010-04-05 23:19:54 +00001066#if defined(PROTOBUF_LITTLE_ENDIAN)
kenton@google.com80b1d622009-07-29 01:13:20 +00001067 memcpy(target, &value, sizeof(value));
1068#else
kenton@google.comfccb1462009-12-18 02:11:36 +00001069 target[0] = static_cast<uint8>(value);
kenton@google.com80b1d622009-07-29 01:13:20 +00001070 target[1] = static_cast<uint8>(value >> 8);
1071 target[2] = static_cast<uint8>(value >> 16);
1072 target[3] = static_cast<uint8>(value >> 24);
1073#endif
1074 return target + sizeof(value);
1075}
1076
1077inline uint8* CodedOutputStream::WriteLittleEndian64ToArray(uint64 value,
1078 uint8* target) {
kenton@google.comb3f6a152010-04-05 23:19:54 +00001079#if defined(PROTOBUF_LITTLE_ENDIAN)
kenton@google.com80b1d622009-07-29 01:13:20 +00001080 memcpy(target, &value, sizeof(value));
1081#else
1082 uint32 part0 = static_cast<uint32>(value);
1083 uint32 part1 = static_cast<uint32>(value >> 32);
1084
kenton@google.comfccb1462009-12-18 02:11:36 +00001085 target[0] = static_cast<uint8>(part0);
kenton@google.com80b1d622009-07-29 01:13:20 +00001086 target[1] = static_cast<uint8>(part0 >> 8);
1087 target[2] = static_cast<uint8>(part0 >> 16);
1088 target[3] = static_cast<uint8>(part0 >> 24);
kenton@google.comfccb1462009-12-18 02:11:36 +00001089 target[4] = static_cast<uint8>(part1);
kenton@google.com80b1d622009-07-29 01:13:20 +00001090 target[5] = static_cast<uint8>(part1 >> 8);
1091 target[6] = static_cast<uint8>(part1 >> 16);
1092 target[7] = static_cast<uint8>(part1 >> 24);
1093#endif
1094 return target + sizeof(value);
1095}
1096
Jisi Liu885b6122015-02-28 14:51:22 -08001097inline void CodedOutputStream::WriteVarint32(uint32 value) {
1098 if (buffer_size_ >= 5) {
1099 // Fast path: We have enough bytes left in the buffer to guarantee that
1100 // this write won't cross the end, so we can skip the checks.
1101 uint8* target = buffer_;
1102 uint8* end = WriteVarint32ToArray(value, target);
1103 int size = end - target;
1104 Advance(size);
1105 } else {
1106 WriteVarint32SlowPath(value);
1107 }
1108}
1109
kenton@google.comd37d46d2009-04-25 02:53:47 +00001110inline void CodedOutputStream::WriteTag(uint32 value) {
1111 WriteVarint32(value);
1112}
1113
1114inline uint8* CodedOutputStream::WriteTagToArray(
1115 uint32 value, uint8* target) {
Jisi Liu885b6122015-02-28 14:51:22 -08001116 return WriteVarint32ToArray(value, target);
temporal40ee5512008-07-10 02:12:20 +00001117}
1118
1119inline int CodedOutputStream::VarintSize32(uint32 value) {
1120 if (value < (1 << 7)) {
1121 return 1;
1122 } else {
1123 return VarintSize32Fallback(value);
1124 }
1125}
1126
1127inline int CodedOutputStream::VarintSize32SignExtended(int32 value) {
1128 if (value < 0) {
1129 return 10; // TODO(kenton): Make this a symbolic constant.
1130 } else {
1131 return VarintSize32(static_cast<uint32>(value));
1132 }
1133}
1134
kenton@google.comd37d46d2009-04-25 02:53:47 +00001135inline void CodedOutputStream::WriteString(const string& str) {
kenton@google.combaac9c32010-11-30 23:54:03 +00001136 WriteRaw(str.data(), static_cast<int>(str.size()));
kenton@google.comd37d46d2009-04-25 02:53:47 +00001137}
1138
jieluo@google.com4de8f552014-07-18 00:47:59 +00001139inline void CodedOutputStream::WriteRawMaybeAliased(
1140 const void* data, int size) {
1141 if (aliasing_enabled_) {
1142 WriteAliasedRaw(data, size);
1143 } else {
1144 WriteRaw(data, size);
1145 }
1146}
1147
kenton@google.comd37d46d2009-04-25 02:53:47 +00001148inline uint8* CodedOutputStream::WriteStringToArray(
1149 const string& str, uint8* target) {
kenton@google.combaac9c32010-11-30 23:54:03 +00001150 return WriteRawToArray(str.data(), static_cast<int>(str.size()), target);
temporal40ee5512008-07-10 02:12:20 +00001151}
1152
1153inline int CodedOutputStream::ByteCount() const {
1154 return total_bytes_ - buffer_size_;
1155}
1156
1157inline void CodedInputStream::Advance(int amount) {
1158 buffer_ += amount;
temporal40ee5512008-07-10 02:12:20 +00001159}
1160
1161inline void CodedOutputStream::Advance(int amount) {
1162 buffer_ += amount;
1163 buffer_size_ -= amount;
1164}
1165
1166inline void CodedInputStream::SetRecursionLimit(int limit) {
Feng Xiao6ef984a2014-11-10 17:34:54 -08001167 recursion_budget_ += limit - recursion_limit_;
temporal40ee5512008-07-10 02:12:20 +00001168 recursion_limit_ = limit;
1169}
1170
1171inline bool CodedInputStream::IncrementRecursionDepth() {
Feng Xiao6ef984a2014-11-10 17:34:54 -08001172 --recursion_budget_;
1173 return recursion_budget_ >= 0;
temporal40ee5512008-07-10 02:12:20 +00001174}
1175
1176inline void CodedInputStream::DecrementRecursionDepth() {
Feng Xiao6ef984a2014-11-10 17:34:54 -08001177 if (recursion_budget_ < recursion_limit_) ++recursion_budget_;
temporal40ee5512008-07-10 02:12:20 +00001178}
1179
xiaofeng@google.comb55a20f2012-09-22 02:40:50 +00001180inline void CodedInputStream::SetExtensionRegistry(const DescriptorPool* pool,
kenton@google.comfccb1462009-12-18 02:11:36 +00001181 MessageFactory* factory) {
1182 extension_pool_ = pool;
1183 extension_factory_ = factory;
1184}
1185
1186inline const DescriptorPool* CodedInputStream::GetExtensionPool() {
1187 return extension_pool_;
1188}
1189
1190inline MessageFactory* CodedInputStream::GetExtensionFactory() {
1191 return extension_factory_;
1192}
1193
1194inline int CodedInputStream::BufferSize() const {
1195 return buffer_end_ - buffer_;
1196}
1197
1198inline CodedInputStream::CodedInputStream(ZeroCopyInputStream* input)
Feng Xiao6ef984a2014-11-10 17:34:54 -08001199 : buffer_(NULL),
kenton@google.comfccb1462009-12-18 02:11:36 +00001200 buffer_end_(NULL),
Feng Xiao6ef984a2014-11-10 17:34:54 -08001201 input_(input),
kenton@google.comfccb1462009-12-18 02:11:36 +00001202 total_bytes_read_(0),
1203 overflow_bytes_(0),
1204 last_tag_(0),
1205 legitimate_message_end_(false),
1206 aliasing_enabled_(false),
kenton@google.com769ab6f2010-02-09 02:09:08 +00001207 current_limit_(kint32max),
kenton@google.comfccb1462009-12-18 02:11:36 +00001208 buffer_size_after_limit_(0),
1209 total_bytes_limit_(kDefaultTotalBytesLimit),
1210 total_bytes_warning_threshold_(kDefaultTotalBytesWarningThreshold),
Feng Xiao6ef984a2014-11-10 17:34:54 -08001211 recursion_budget_(default_recursion_limit_),
xiaofeng@google.comb55a20f2012-09-22 02:40:50 +00001212 recursion_limit_(default_recursion_limit_),
kenton@google.comfccb1462009-12-18 02:11:36 +00001213 extension_pool_(NULL),
1214 extension_factory_(NULL) {
1215 // Eagerly Refresh() so buffer space is immediately available.
1216 Refresh();
1217}
1218
1219inline CodedInputStream::CodedInputStream(const uint8* buffer, int size)
Feng Xiao6ef984a2014-11-10 17:34:54 -08001220 : buffer_(buffer),
kenton@google.comfccb1462009-12-18 02:11:36 +00001221 buffer_end_(buffer + size),
Feng Xiao6ef984a2014-11-10 17:34:54 -08001222 input_(NULL),
kenton@google.comfccb1462009-12-18 02:11:36 +00001223 total_bytes_read_(size),
1224 overflow_bytes_(0),
1225 last_tag_(0),
1226 legitimate_message_end_(false),
1227 aliasing_enabled_(false),
1228 current_limit_(size),
1229 buffer_size_after_limit_(0),
1230 total_bytes_limit_(kDefaultTotalBytesLimit),
1231 total_bytes_warning_threshold_(kDefaultTotalBytesWarningThreshold),
Feng Xiao6ef984a2014-11-10 17:34:54 -08001232 recursion_budget_(default_recursion_limit_),
xiaofeng@google.comb55a20f2012-09-22 02:40:50 +00001233 recursion_limit_(default_recursion_limit_),
kenton@google.comfccb1462009-12-18 02:11:36 +00001234 extension_pool_(NULL),
1235 extension_factory_(NULL) {
1236 // Note that setting current_limit_ == size is important to prevent some
1237 // code paths from trying to access input_ and segfaulting.
1238}
1239
xiaofeng@google.comb55a20f2012-09-22 02:40:50 +00001240inline bool CodedInputStream::IsFlat() const {
1241 return input_ == NULL;
kenton@google.comfccb1462009-12-18 02:11:36 +00001242}
1243
temporal40ee5512008-07-10 02:12:20 +00001244} // namespace io
1245} // namespace protobuf
1246
kenton@google.comb3f6a152010-04-05 23:19:54 +00001247
1248#if defined(_MSC_VER) && _MSC_VER >= 1300
1249 #pragma runtime_checks("c", restore)
1250#endif // _MSC_VER
1251
liujisi@google.com33165fe2010-11-02 13:14:58 +00001252} // namespace google
temporal40ee5512008-07-10 02:12:20 +00001253#endif // GOOGLE_PROTOBUF_IO_CODED_STREAM_H__