parse_context.h source code [include/google/protobuf/parse_context.h]

1	// Protocol Buffers - Google's data interchange format
2	// Copyright 2008 Google Inc. All rights reserved.
3	// https://developers.google.com/protocol-buffers/
4	//
5	// Redistribution and use in source and binary forms, with or without
6	// modification, are permitted provided that the following conditions are
7	// met:
8	//
9	// Redistributions of source code must retain the above copyright*
10	// notice, this list of conditions and the following disclaimer.
11	// Redistributions in binary form must reproduce the above*
12	// copyright notice, this list of conditions and the following disclaimer
13	// in the documentation and/or other materials provided with the
14	// distribution.
15	// Neither the name of Google Inc. nor the names of its*
16	// contributors may be used to endorse or promote products derived from
17	// this software without specific prior written permission.
18	//
19	// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20	// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21	// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22	// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23	// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24	// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25	// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26	// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27	// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28	// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29	// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31	#ifndef GOOGLE_PROTOBUF_PARSE_CONTEXT_H__
32	#define GOOGLE_PROTOBUF_PARSE_CONTEXT_H__
33
34	#include <cstdint>
35	#include <cstring>
36	#include <string>
37
38	#include <google/protobuf/io/coded_stream.h>
39	#include <google/protobuf/io/zero_copy_stream.h>
40	#include <google/protobuf/arena.h>
41	#include <google/protobuf/arenastring.h>
42	#include <google/protobuf/implicit_weak_message.h>
43	#include <google/protobuf/metadata_lite.h>
44	#include <google/protobuf/port.h>
45	#include <google/protobuf/repeated_field.h>
46	#include <google/protobuf/wire_format_lite.h>
47	#include <google/protobuf/stubs/strutil.h>
48
49	#include <google/protobuf/port_def.inc>
50
51
52	namespace google {
53	namespace protobuf {
54
55	class UnknownFieldSet;
56	class DescriptorPool;
57	class MessageFactory;
58
59	namespace internal {
60
61	// Template code below needs to know about the existence of these functions.
62	PROTOBUF_EXPORT void WriteVarint(uint32 num, uint64 val, std::string* s);
63	PROTOBUF_EXPORT void WriteLengthDelimited(uint32 num, StringPiece val,
64	std::string* s);
65	// Inline because it is just forwarding to s->WriteVarint
66	inline void WriteVarint(uint32 num, uint64 val, UnknownFieldSet* s);
67	inline void WriteLengthDelimited(uint32 num, StringPiece val,
68	UnknownFieldSet* s);
69
70
71	// The basic abstraction the parser is designed for is a slight modification
72	// of the ZeroCopyInputStream (ZCIS) abstraction. A ZCIS presents a serialized
73	// stream as a series of buffers that concatenate to the full stream.
74	// Pictorially a ZCIS presents a stream in chunks like so
75	// [---------------------------------------------------------------]
76	// [---------------------] chunk 1
77	// [----------------------------] chunk 2
78	// chunk 3 [--------------]
79	//
80	// Where the '-' represent the bytes which are vertically lined up with the
81	// bytes of the stream. The proto parser requires its input to be presented
82	// similarly with the extra
83	// property that each chunk has kSlopBytes past its end that overlaps with the
84	// first kSlopBytes of the next chunk, or if there is no next chunk at least its
85	// still valid to read those bytes. Again, pictorially, we now have
86	//
87	// [---------------------------------------------------------------]
88	// [-------------------....] chunk 1
89	// [------------------------....] chunk 2
90	// chunk 3 [------------------..]
91	// chunk 4 [--**]
92	// Here '-' mean the bytes of the stream or chunk and '.' means bytes past the
93	// chunk that match up with the start of the next chunk. Above each chunk has
94	// 4 '.' after the chunk. In the case these 'overflow' bytes represents bytes
95	// past the stream, indicated by '' above, their values are unspecified. It is*
96	// still legal to read them (ie. should not segfault). Reading past the
97	// end should be detected by the user and indicated as an error.
98	//
99	// The reason for this, admittedly, unconventional invariant is to ruthlessly
100	// optimize the protobuf parser. Having an overlap helps in two important ways.
101	// Firstly it alleviates having to performing bounds checks if a piece of code
102	// is guaranteed to not read more than kSlopBytes. Secondly, and more
103	// importantly, the protobuf wireformat is such that reading a key/value pair is
104	// always less than 16 bytes. This removes the need to change to next buffer in
105	// the middle of reading primitive values. Hence there is no need to store and
106	// load the current position.
107
108	class PROTOBUF_EXPORT EpsCopyInputStream {
109	public:
110	enum { kSlopBytes = `16`, kMaxCordBytesToCopy = `512` };
111
112	explicit EpsCopyInputStream(bool enable_aliasing)
113	: aliasing_(enable_aliasing ? kOnPatch : kNoAliasing) {}
114
115	void BackUp(const char* ptr) {
116	GOOGLE_DCHECK(ptr <= buffer_end_ + kSlopBytes);
117	int count;
118	if (next_chunk_ == buffer_) {
119	count = static_cast<int>(buffer_end_ + kSlopBytes - ptr);
120	} else {
121	count = size_ + static_cast<int>(buffer_end_ - ptr);
122	}
123	if (count > `0`) StreamBackUp(count);
124	}
125
126	// If return value is negative it's an error
127	PROTOBUF_MUST_USE_RESULT int PushLimit(const char* ptr, int limit) {
128	GOOGLE_DCHECK(limit >= `0` && limit <= INT_MAX - kSlopBytes);
129	// This add is safe due to the invariant above, because
130	// ptr - buffer_end_ <= kSlopBytes.
131	limit += static_cast<int>(ptr - buffer_end_);
132	limit_end_ = buffer_end_ + (std::min)(a: `0`, b: limit);
133	auto old_limit = limit_;
134	limit_ = limit;
135	return old_limit - limit;
136	}
137
138	PROTOBUF_MUST_USE_RESULT bool PopLimit(int delta) {
139	if (PROTOBUF_PREDICT_FALSE(!EndedAtLimit())) return false;
140	limit_ = limit_ + delta;
141	// TODO(gerbens) We could remove this line and hoist the code to
142	// DoneFallback. Study the perf/bin-size effects.
143	limit_end_ = buffer_end_ + (std::min)(a: `0`, b: limit_);
144	return true;
145	}
146
147	PROTOBUF_MUST_USE_RESULT const char* Skip(const char* ptr, int size) {
148	if (size <= buffer_end_ + kSlopBytes - ptr) {
149	return ptr + size;
150	}
151	return SkipFallback(ptr, size);
152	}
153	PROTOBUF_MUST_USE_RESULT const char* ReadString(const char* ptr, int size,
154	std::string* s) {
155	if (size <= buffer_end_ + kSlopBytes - ptr) {
156	s->assign(s: ptr, n: size);
157	return ptr + size;
158	}
159	return ReadStringFallback(ptr, size, str: s);
160	}
161	PROTOBUF_MUST_USE_RESULT const char* AppendString(const char* ptr, int size,
162	std::string* s) {
163	if (size <= buffer_end_ + kSlopBytes - ptr) {
164	s->append(s: ptr, n: size);
165	return ptr + size;
166	}
167	return AppendStringFallback(ptr, size, str: s);
168	}
169
170	template <typename Tag, typename T>
171	PROTOBUF_MUST_USE_RESULT const char* ReadRepeatedFixed(const char* ptr,
172	Tag expected_tag,
173	RepeatedField<T>* out);
174
175	template <typename T>
176	PROTOBUF_MUST_USE_RESULT const char* ReadPackedFixed(const char* ptr,
177	int size,
178	RepeatedField<T>* out);
179	template <typename Add>
180	PROTOBUF_MUST_USE_RESULT const char* ReadPackedVarint(const char* ptr,
181	Add add);
182
183	uint32 LastTag() const { return last_tag_minus_1_ + `1`; }
184	bool ConsumeEndGroup(uint32 start_tag) {
185	bool res = last_tag_minus_1_ == start_tag;
186	last_tag_minus_1_ = `0`;
187	return res;
188	}
189	bool EndedAtLimit() const { return last_tag_minus_1_ == `0`; }
190	bool EndedAtEndOfStream() const { return last_tag_minus_1_ == `1`; }
191	void SetLastTag(uint32 tag) { last_tag_minus_1_ = tag - `1`; }
192	void SetEndOfStream() { last_tag_minus_1_ = `1`; }
193	bool IsExceedingLimit(const char* ptr) {
194	return ptr > limit_end_ &&
195	(next_chunk_ == nullptr \|\| ptr - buffer_end_ > limit_);
196	}
197	int BytesUntilLimit(const char* ptr) const {
198	return limit_ + static_cast<int>(buffer_end_ - ptr);
199	}
200	// Returns true if more data is available, if false is returned one has to
201	// call Done for further checks.
202	bool DataAvailable(const char* ptr) { return ptr < limit_end_; }
203
204	protected:
205	// Returns true is limit (either an explicit limit or end of stream) is
206	// reached. It aligns ptr across buffer seams.*
207	// If limit is exceeded it returns true and ptr is set to null.
208	bool DoneWithCheck(const char** ptr, int d) {
209	GOOGLE_DCHECK(*ptr);
210	if (PROTOBUF_PREDICT_TRUE(ptr < limit_end_)) return* false;
211	// No need to fetch buffer if we ended on a limit in the slop region
212	if ((ptr - buffer_end_) == limit_) return* true;
213	auto res = DoneFallback(ptr: *ptr, d);
214	*ptr = res.first;
215	return res.second;
216	}
217
218	const char* InitFrom(StringPiece flat) {
219	overall_limit_ = `0`;
220	if (flat.size() > kSlopBytes) {
221	limit_ = kSlopBytes;
222	limit_end_ = buffer_end_ = flat.data() + flat.size() - kSlopBytes;
223	next_chunk_ = buffer_;
224	if (aliasing_ == kOnPatch) aliasing_ = kNoDelta;
225	return flat.data();
226	} else {
227	std::memcpy(dest: buffer_, src: flat.data(), n: flat.size());
228	limit_ = `0`;
229	limit_end_ = buffer_end_ = buffer_ + flat.size();
230	next_chunk_ = nullptr;
231	if (aliasing_ == kOnPatch) {
232	aliasing_ = reinterpret_cast<std::uintptr_t>(flat.data()) -
233	reinterpret_cast<std::uintptr_t>(buffer_);
234	}
235	return buffer_;
236	}
237	}
238
239	const char* InitFrom(io::ZeroCopyInputStream* zcis);
240
241	const char* InitFrom(io::ZeroCopyInputStream* zcis, int limit) {
242	if (limit == -`1`) return InitFrom(zcis);
243	overall_limit_ = limit;
244	auto res = InitFrom(zcis);
245	limit_ = limit - static_cast<int>(buffer_end_ - res);
246	limit_end_ = buffer_end_ + (std::min)(a: `0`, b: limit_);
247	return res;
248	}
249
250	private:
251	const char* limit_end_; // buffer_end_ + min(limit_, 0)
252	const char* buffer_end_;
253	const char* next_chunk_;
254	int size_;
255	int limit_; // relative to buffer_end_;
256	io::ZeroCopyInputStream* zcis_ = nullptr;
257	char buffer_[`2` * kSlopBytes] = {};
258	enum { kNoAliasing = `0`, kOnPatch = `1`, kNoDelta = `2` };
259	std::uintptr_t aliasing_ = kNoAliasing;
260	// This variable is used to communicate how the parse ended, in order to
261	// completely verify the parsed data. A wire-format parse can end because of
262	// one of the following conditions:
263	// 1) A parse can end on a pushed limit.
264	// 2) A parse can end on End Of Stream (EOS).
265	// 3) A parse can end on 0 tag (only valid for toplevel message).
266	// 4) A parse can end on an end-group tag.
267	// This variable should always be set to 0, which indicates case 1. If the
268	// parse terminated due to EOS (case 2), it's set to 1. In case the parse
269	// ended due to a terminating tag (case 3 and 4) it's set to (tag - 1).
270	// This var doesn't really belong in EpsCopyInputStream and should be part of
271	// the ParseContext, but case 2 is most easily and optimally implemented in
272	// DoneFallback.
273	uint32 last_tag_minus_1_ = `0`;
274	int overall_limit_ = INT_MAX; // Overall limit independent of pushed limits.
275	// Pretty random large number that seems like a safe allocation on most
276	// systems. TODO(gerbens) do we need to set this as build flag?
277	enum { kSafeStringSize = `50000000` };
278
279	std::pair<const char, bool> DoneFallback(const* char* ptr, int d);
280	const char* Next(int overrun, int d);
281	const char* SkipFallback(const char* ptr, int size);
282	const char* AppendStringFallback(const char* ptr, int size, std::string* str);
283	const char* ReadStringFallback(const char* ptr, int size, std::string* str);
284	bool StreamNext(const void** data) {
285	bool res = zcis_->Next(data, size: &size_);
286	if (res) overall_limit_ -= size_;
287	return res;
288	}
289	void StreamBackUp(int count) {
290	zcis_->BackUp(count);
291	overall_limit_ += count;
292	}
293
294	template <typename A>
295	const char* AppendSize(const char* ptr, int size, const A& append) {
296	int chunk_size = buffer_end_ + kSlopBytes - ptr;
297	do {
298	GOOGLE_DCHECK(size > chunk_size);
299	append(ptr, chunk_size);
300	ptr += chunk_size;
301	size -= chunk_size;
302	// DoneFallBack asserts it isn't called when exactly on the limit. If this
303	// happens we fail the parse, as we are at the limit and still more bytes
304	// to read.
305	if (limit_ == kSlopBytes) return nullptr;
306	auto res = DoneFallback(ptr, d: -`1`);
307	if (res.second) return nullptr; // If done we passed the limit
308	ptr = res.first;
309	chunk_size = buffer_end_ + kSlopBytes - ptr;
310	} while (size > chunk_size);
311	append(ptr, size);
312	return ptr + size;
313	}
314
315	// AppendUntilEnd appends data until a limit (either a PushLimit or end of
316	// stream. Normal payloads are from length delimited fields which have an
317	// explicit size. Reading until limit only comes when the string takes
318	// the place of a protobuf, ie RawMessage/StringRawMessage, lazy fields and
319	// implicit weak messages. We keep these methods private and friend them.
320	template <typename A>
321	const char* AppendUntilEnd(const char* ptr, const A& append) {
322	while (!DoneWithCheck(ptr: &ptr, d: -`1`)) {
323	append(ptr, limit_end_ - ptr);
324	ptr = limit_end_;
325	}
326	return ptr;
327	}
328
329	PROTOBUF_MUST_USE_RESULT const char* AppendString(const char* ptr,
330	std::string* str) {
331	return AppendUntilEnd(
332	ptr, append: [str](const char* p, ptrdiff_t s) { str->append(s: p, n: s); });
333	}
334	friend class ImplicitWeakMessage;
335	};
336
337	// ParseContext holds all data that is global to the entire parse. Most
338	// importantly it contains the input stream, but also recursion depth and also
339	// stores the end group tag, in case a parser ended on a endgroup, to verify
340	// matching start/end group tags.
341	class PROTOBUF_EXPORT ParseContext : public EpsCopyInputStream {
342	public:
343	struct Data {
344	const DescriptorPool* pool = nullptr;
345	MessageFactory* factory = nullptr;
346	};
347
348	template <typename... T>
349	ParseContext(int depth, bool aliasing, const char** start, T&&... args)
350	: EpsCopyInputStream(aliasing), depth_(depth) {
351	*start = InitFrom(std::forward<T>(args)...);
352	}
353
354	void TrackCorrectEnding() { group_depth_ = `0`; }
355
356	bool Done(const char ptr) { return** DoneWithCheck(ptr, d: group_depth_); }
357	bool DoneNoSlopCheck(const char ptr) { return** DoneWithCheck(ptr, d: -`1`); }
358
359	int depth() const { return depth_; }
360
361	Data& data() { return data_; }
362	const Data& data() const { return data_; }
363
364	template <typename T>
365	PROTOBUF_MUST_USE_RESULT const char* ParseMessage(T* msg, const char* ptr);
366	// We outline when the type is generic and we go through a virtual
367	const char* ParseMessage(MessageLite* msg, const char* ptr);
368	const char* ParseMessage(Message* msg, const char* ptr);
369
370	template <typename T>
371	PROTOBUF_MUST_USE_RESULT PROTOBUF_ALWAYS_INLINE const char* ParseGroup(
372	T* msg, const char* ptr, uint32 tag) {
373	if (--depth_ < `0`) return nullptr;
374	group_depth_++;
375	ptr = msg->_InternalParse(ptr, this);
376	group_depth_--;
377	depth_++;
378	if (PROTOBUF_PREDICT_FALSE(!ConsumeEndGroup(tag))) return nullptr;
379	return ptr;
380	}
381
382	private:
383	// The context keeps an internal stack to keep track of the recursive
384	// part of the parse state.
385	// Current depth of the active parser, depth counts down.
386	// This is used to limit recursion depth (to prevent overflow on malicious
387	// data), but is also used to index in stack_ to store the current state.
388	int depth_;
389	// Unfortunately necessary for the fringe case of ending on 0 or end-group tag
390	// in the last kSlopBytes of a ZeroCopyInputStream chunk.
391	int group_depth_ = INT_MIN;
392	Data data_;
393	};
394
395	template <uint32 tag>
396	bool ExpectTag(const char* ptr) {
397	if (tag < `128`) {
398	return *ptr == tag;
399	} else {
400	static_assert(tag < `128` * `128`, "We only expect tags for 1 or 2 bytes");
401	char buf[`2`] = {static_cast<char>(tag \| `0x80`), static_cast<char>(tag >> `7`)};
402	return std::memcmp(s1: ptr, s2: buf, n: `2`) == `0`;
403	}
404	}
405
406	template <int>
407	struct EndianHelper;
408
409	template <>
410	struct EndianHelper<`1`> {
411	static uint8 Load(const void* p) { return *static_cast<const uint8*>(p); }
412	};
413
414	template <>
415	struct EndianHelper<`2`> {
416	static uint16 Load(const void* p) {
417	uint16 tmp;
418	std::memcpy(dest: &tmp, src: p, n: `2`);
419	#ifndef PROTOBUF_LITTLE_ENDIAN
420	tmp = bswap_16(tmp);
421	#endif
422	return tmp;
423	}
424	};
425
426	template <>
427	struct EndianHelper<`4`> {
428	static uint32 Load(const void* p) {
429	uint32 tmp;
430	std::memcpy(dest: &tmp, src: p, n: `4`);
431	#ifndef PROTOBUF_LITTLE_ENDIAN
432	tmp = bswap_32(tmp);
433	#endif
434	return tmp;
435	}
436	};
437
438	template <>
439	struct EndianHelper<`8`> {
440	static uint64 Load(const void* p) {
441	uint64 tmp;
442	std::memcpy(dest: &tmp, src: p, n: `8`);
443	#ifndef PROTOBUF_LITTLE_ENDIAN
444	tmp = bswap_64(tmp);
445	#endif
446	return tmp;
447	}
448	};
449
450	template <typename T>
451	T UnalignedLoad(const char* p) {
452	auto tmp = EndianHelper<sizeof(T)>::Load(p);
453	T res;
454	memcpy(&res, &tmp, sizeof(T));
455	return res;
456	}
457
458	PROTOBUF_EXPORT
459	std::pair<const char, uint32> VarintParseSlow32(const* char* p, uint32 res);
460	PROTOBUF_EXPORT
461	std::pair<const char, uint64> VarintParseSlow64(const* char* p, uint32 res);
462
463	inline const char* VarintParseSlow(const char* p, uint32 res, uint32* out) {
464	auto tmp = VarintParseSlow32(p, res);
465	*out = tmp.second;
466	return tmp.first;
467	}
468
469	inline const char* VarintParseSlow(const char* p, uint32 res, uint64* out) {
470	auto tmp = VarintParseSlow64(p, res);
471	*out = tmp.second;
472	return tmp.first;
473	}
474
475	template <typename T>
476	PROTOBUF_MUST_USE_RESULT const char* VarintParse(const char* p, T* out) {
477	auto ptr = reinterpret_cast<const uint8*>(p);
478	uint32 res = ptr[`0`];
479	if (!(res & `0x80`)) {
480	*out = res;
481	return p + `1`;
482	}
483	uint32 byte = ptr[`1`];
484	res += (byte - `1`) << `7`;
485	if (!(byte & `0x80`)) {
486	*out = res;
487	return p + `2`;
488	}
489	return VarintParseSlow(p, res, out);
490	}
491
492	// Used for tags, could read up to 5 bytes which must be available.
493	// Caller must ensure its safe to call.
494
495	PROTOBUF_EXPORT
496	std::pair<const char, uint32> ReadTagFallback(const* char* p, uint32 res);
497
498	// Same as ParseVarint but only accept 5 bytes at most.
499	inline const char* ReadTag(const char* p, uint32* out, uint32 /max_tag/ = `0`) {
500	uint32 res = static_cast<uint8>(p[`0`]);
501	if (res < `128`) {
502	*out = res;
503	return p + `1`;
504	}
505	uint32 second = static_cast<uint8>(p[`1`]);
506	res += (second - `1`) << `7`;
507	if (second < `128`) {
508	*out = res;
509	return p + `2`;
510	}
511	auto tmp = ReadTagFallback(p, res);
512	*out = tmp.second;
513	return tmp.first;
514	}
515
516	// Decode 2 consecutive bytes of a varint and returns the value, shifted left
517	// by 1. It simultaneous updates ptr to ptr + 1 or ptr + 2 depending if the*
518	// first byte's continuation bit is set.
519	// If bit 15 of return value is set (equivalent to the continuation bits of both
520	// bytes being set) the varint continues, otherwise the parse is done. On x86
521	// movsx eax, dil
522	// add edi, eax
523	// adc [rsi], 1
524	// add eax, eax
525	// and eax, edi
526	inline uint32 DecodeTwoBytes(const char** ptr) {
527	uint32 value = UnalignedLoad<uint16>(p: *ptr);
528	// Sign extend the low byte continuation bit
529	uint32_t x = static_cast<int8_t>(value);
530	// This add is an amazing operation, it cancels the low byte continuation bit
531	// from y transferring it to the carry. Simultaneously it also shifts the 7
532	// LSB left by one tightly against high byte varint bits. Hence value now
533	// contains the unpacked value shifted left by 1.
534	value += x;
535	// Use the carry to update the ptr appropriately.
536	*ptr += value < x ? `2` : `1`;
537	return value & (x + x); // Mask out the high byte iff no continuation
538	}
539
540	// More efficient varint parsing for big varints
541	inline const char* ParseBigVarint(const char* p, uint64* out) {
542	auto pnew = p;
543	auto tmp = DecodeTwoBytes(ptr: &pnew);
544	uint64 res = tmp >> `1`;
545	if (PROTOBUF_PREDICT_TRUE(std::int16_t(tmp) >= `0`)) {
546	*out = res;
547	return pnew;
548	}
549	for (std::uint32_t i = `1`; i < `5`; i++) {
550	pnew = p + `2` * i;
551	tmp = DecodeTwoBytes(ptr: &pnew);
552	res += (static_cast<std::uint64_t>(tmp) - `2`) << (`14` * i - `1`);
553	if (PROTOBUF_PREDICT_TRUE(std::int16_t(tmp) >= `0`)) {
554	*out = res;
555	return pnew;
556	}
557	}
558	return nullptr;
559	}
560
561	PROTOBUF_EXPORT
562	std::pair<const char, int32> ReadSizeFallback(const* char* p, uint32 first);
563	// Used for tags, could read up to 5 bytes which must be available. Additionally
564	// it makes sure the unsigned value fits a int32, otherwise returns nullptr.
565	// Caller must ensure its safe to call.
566	inline uint32 ReadSize(const char** pp) {
567	auto p = *pp;
568	uint32 res = static_cast<uint8>(p[`0`]);
569	if (res < `128`) {
570	*pp = p + `1`;
571	return res;
572	}
573	auto x = ReadSizeFallback(p, first: res);
574	*pp = x.first;
575	return x.second;
576	}
577
578	// Some convenience functions to simplify the generated parse loop code.
579	// Returning the value and updating the buffer pointer allows for nicer
580	// function composition. We rely on the compiler to inline this.
581	// Also in debug compiles having local scoped variables tend to generated
582	// stack frames that scale as O(num fields).
583	inline uint64 ReadVarint64(const char** p) {
584	uint64 tmp;
585	p = VarintParse(p: p, out: &tmp);
586	return tmp;
587	}
588
589	inline uint32 ReadVarint32(const char** p) {
590	uint32 tmp;
591	p = VarintParse(p: p, out: &tmp);
592	return tmp;
593	}
594
595	inline int64 ReadVarintZigZag64(const char** p) {
596	uint64 tmp;
597	p = VarintParse(p: p, out: &tmp);
598	return WireFormatLite::ZigZagDecode64(n: tmp);
599	}
600
601	inline int32 ReadVarintZigZag32(const char** p) {
602	uint64 tmp;
603	p = VarintParse(p: p, out: &tmp);
604	return WireFormatLite::ZigZagDecode32(n: static_cast<uint32>(tmp));
605	}
606
607	template <typename T>
608	PROTOBUF_MUST_USE_RESULT const char* ParseContext::ParseMessage(
609	T* msg, const char* ptr) {
610	int size = ReadSize(pp: &ptr);
611	if (!ptr) return nullptr;
612	auto old = PushLimit(ptr, limit: size);
613	if (--depth_ < `0`) return nullptr;
614	ptr = msg->_InternalParse(ptr, this);
615	if (PROTOBUF_PREDICT_FALSE(ptr == nullptr)) return nullptr;
616	depth_++;
617	if (!PopLimit(delta: old)) return nullptr;
618	return ptr;
619	}
620
621	template <typename Add>
622	const char* EpsCopyInputStream::ReadPackedVarint(const char* ptr, Add add) {
623	int size = ReadSize(pp: &ptr);
624	if (ptr == nullptr) return nullptr;
625	auto old = PushLimit(ptr, limit: size);
626	if (old < `0`) return nullptr;
627	while (!DoneWithCheck(ptr: &ptr, d: -`1`)) {
628	uint64 varint;
629	ptr = VarintParse(p: ptr, out: &varint);
630	if (!ptr) return nullptr;
631	add(varint);
632	}
633	if (!PopLimit(delta: old)) return nullptr;
634	return ptr;
635	}
636
637	// Helper for verification of utf8
638	PROTOBUF_EXPORT
639	bool VerifyUTF8(StringPiece s, const char* field_name);
640
641	inline bool VerifyUTF8(const std::string* s, const char* field_name) {
642	return VerifyUTF8(s: *s, field_name);
643	}
644
645	// All the string parsers with or without UTF checking and for all CTypes.
646	PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* InlineGreedyStringParser(
647	std::string* s, const char* ptr, ParseContext* ctx);
648
649
650	// Add any of the following lines to debug which parse function is failing.
651
652	#define GOOGLE_PROTOBUF_ASSERT_RETURN(predicate, ret) \
653	if (!(predicate)) { \
654	/* ::raise(SIGINT); */ \
655	/* GOOGLE_LOG(ERROR) << "Parse failure"; */ \
656	return ret; \
657	}
658
659	#define GOOGLE_PROTOBUF_PARSER_ASSERT(predicate) \
660	GOOGLE_PROTOBUF_ASSERT_RETURN(predicate, nullptr)
661
662	template <typename T>
663	PROTOBUF_MUST_USE_RESULT const char* FieldParser(uint64 tag, T& field_parser,
664	const char* ptr,
665	ParseContext* ctx) {
666	uint32 number = tag >> `3`;
667	GOOGLE_PROTOBUF_PARSER_ASSERT(number != `0`);
668	using WireType = internal::WireFormatLite::WireType;
669	switch (tag & `7`) {
670	case WireType::WIRETYPE_VARINT: {
671	uint64 value;
672	ptr = VarintParse(p: ptr, out: &value);
673	GOOGLE_PROTOBUF_PARSER_ASSERT(ptr);
674	field_parser.AddVarint(number, value);
675	break;
676	}
677	case WireType::WIRETYPE_FIXED64: {
678	uint64 value = UnalignedLoad<uint64>(p: ptr);
679	ptr += `8`;
680	field_parser.AddFixed64(number, value);
681	break;
682	}
683	case WireType::WIRETYPE_LENGTH_DELIMITED: {
684	ptr = field_parser.ParseLengthDelimited(number, ptr, ctx);
685	GOOGLE_PROTOBUF_PARSER_ASSERT(ptr);
686	break;
687	}
688	case WireType::WIRETYPE_START_GROUP: {
689	ptr = field_parser.ParseGroup(number, ptr, ctx);
690	GOOGLE_PROTOBUF_PARSER_ASSERT(ptr);
691	break;
692	}
693	case WireType::WIRETYPE_END_GROUP: {
694	GOOGLE_LOG(FATAL) << "Can't happen";
695	break;
696	}
697	case WireType::WIRETYPE_FIXED32: {
698	uint32 value = UnalignedLoad<uint32>(p: ptr);
699	ptr += `4`;
700	field_parser.AddFixed32(number, value);
701	break;
702	}
703	default:
704	return nullptr;
705	}
706	return ptr;
707	}
708
709	template <typename T>
710	PROTOBUF_MUST_USE_RESULT const char* WireFormatParser(T& field_parser,
711	const char* ptr,
712	ParseContext* ctx) {
713	while (!ctx->Done(ptr: &ptr)) {
714	uint32 tag;
715	ptr = ReadTag(p: ptr, out: &tag);
716	GOOGLE_PROTOBUF_PARSER_ASSERT(ptr != nullptr);
717	if (tag == `0` \|\| (tag & `7`) == `4`) {
718	ctx->SetLastTag(tag);
719	return ptr;
720	}
721	ptr = FieldParser(tag, field_parser, ptr, ctx);
722	GOOGLE_PROTOBUF_PARSER_ASSERT(ptr != nullptr);
723	}
724	return ptr;
725	}
726
727	// The packed parsers parse repeated numeric primitives directly into the
728	// corresponding field
729
730	// These are packed varints
731	PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedInt32Parser(
732	void* object, const char* ptr, ParseContext* ctx);
733	PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedUInt32Parser(
734	void* object, const char* ptr, ParseContext* ctx);
735	PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedInt64Parser(
736	void* object, const char* ptr, ParseContext* ctx);
737	PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedUInt64Parser(
738	void* object, const char* ptr, ParseContext* ctx);
739	PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedSInt32Parser(
740	void* object, const char* ptr, ParseContext* ctx);
741	PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedSInt64Parser(
742	void* object, const char* ptr, ParseContext* ctx);
743	PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedEnumParser(
744	void* object, const char* ptr, ParseContext* ctx);
745
746	template <typename T>
747	PROTOBUF_EXPORT_TEMPLATE_DEFINE
748	PROTOBUF_MUST_USE_RESULT const
749	char* PackedEnumParser(void* object, const char* ptr, ParseContext* ctx,
750	bool (is_valid)(int), InternalMetadata metadata,
751	int field_num) {
752	return ctx->ReadPackedVarint(
753	ptr, [object, is_valid, metadata, field_num](uint64 val) {
754	if (is_valid(val)) {
755	static_cast<RepeatedField<int>*>(object)->Add(value: val);
756	} else {
757	WriteVarint(field_num, val, metadata->mutable_unknown_fields<T>());
758	}
759	});
760	}
761
762	template <typename T>
763	PROTOBUF_EXPORT_TEMPLATE_DEFINE
764	PROTOBUF_MUST_USE_RESULT const
765	char* PackedEnumParserArg(void* object, const char* ptr, ParseContext* ctx,
766	bool (is_valid)(const* void, int*),
767	const void* data, InternalMetadata* metadata,
768	int field_num) {
769	return ctx->ReadPackedVarint(
770	ptr, [object, is_valid, data, metadata, field_num](uint64 val) {
771	if (is_valid(data, val)) {
772	static_cast<RepeatedField<int>*>(object)->Add(value: val);
773	} else {
774	WriteVarint(field_num, val, metadata->mutable_unknown_fields<T>());
775	}
776	});
777	}
778
779	PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedBoolParser(
780	void* object, const char* ptr, ParseContext* ctx);
781	PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedFixed32Parser(
782	void* object, const char* ptr, ParseContext* ctx);
783	PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedSFixed32Parser(
784	void* object, const char* ptr, ParseContext* ctx);
785	PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedFixed64Parser(
786	void* object, const char* ptr, ParseContext* ctx);
787	PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedSFixed64Parser(
788	void* object, const char* ptr, ParseContext* ctx);
789	PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedFloatParser(
790	void* object, const char* ptr, ParseContext* ctx);
791	PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedDoubleParser(
792	void* object, const char* ptr, ParseContext* ctx);
793
794	// This is the only recursive parser.
795	PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* UnknownGroupLiteParse(
796	std::string* unknown, const char* ptr, ParseContext* ctx);
797	// This is a helper to for the UnknownGroupLiteParse but is actually also
798	// useful in the generated code. It uses overload on std::string vs*
799	// UnknownFieldSet to make the generated code isomorphic between full and lite.*
800	PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* UnknownFieldParse(
801	uint32 tag, std::string* unknown, const char* ptr, ParseContext* ctx);
802
803	} // namespace internal
804	} // namespace protobuf
805	} // namespace google
806
807	#include <google/protobuf/port_undef.inc>
808
809	#endif // GOOGLE_PROTOBUF_PARSE_CONTEXT_H__
810

source code of include/google/protobuf/parse_context.h