fastmem.h source code [include/google/protobuf/stubs/fastmem.h]

1	// Protocol Buffers - Google's data interchange format
2	// Copyright 2014 Google Inc. All rights reserved.
3	// https://developers.google.com/protocol-buffers/
4	//
5	// Redistribution and use in source and binary forms, with or without
6	// modification, are permitted provided that the following conditions are
7	// met:
8	//
9	// Redistributions of source code must retain the above copyright*
10	// notice, this list of conditions and the following disclaimer.
11	// Redistributions in binary form must reproduce the above*
12	// copyright notice, this list of conditions and the following disclaimer
13	// in the documentation and/or other materials provided with the
14	// distribution.
15	// Neither the name of Google Inc. nor the names of its*
16	// contributors may be used to endorse or promote products derived from
17	// this software without specific prior written permission.
18	//
19	// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20	// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21	// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22	// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23	// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24	// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25	// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26	// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27	// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28	// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29	// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31	// Fast memory copying and comparison routines.
32	// strings::fastmemcmp_inlined() replaces memcmp()
33	// strings::memcpy_inlined() replaces memcpy()
34	// strings::memeq(a, b, n) replaces memcmp(a, b, n) == 0
35	//
36	// strings::_inlined() routines are inline versions of the*
37	// routines exported by this module. Sometimes using the inlined
38	// versions is faster. Measure before using the inlined versions.
39	//
40	// Performance measurement:
41	// strings::fastmemcmp_inlined
42	// Analysis: memcmp, fastmemcmp_inlined, fastmemcmp
43	// 2012-01-30
44
45	#ifndef GOOGLE_PROTOBUF_STUBS_FASTMEM_H_
46	#define GOOGLE_PROTOBUF_STUBS_FASTMEM_H_
47
48	#include <stddef.h>
49	#include <stdio.h>
50	#include <string.h>
51
52	#include <google/protobuf/stubs/common.h>
53
54	#include <google/protobuf/port_def.inc>
55
56	namespace google {
57	namespace protobuf {
58	namespace internal {
59
60	// Return true if the n bytes at a equal the n bytes at b.
61	// The regions are allowed to overlap.
62	//
63	// The performance is similar to the performance memcmp(), but faster for
64	// moderately-sized inputs, or inputs that share a common prefix and differ
65	// somewhere in their last 8 bytes. Further optimizations can be added later
66	// if it makes sense to do so.:w
67	inline bool memeq(const char* a, const char* b, size_t n) {
68	size_t n_rounded_down = n & ~static_cast<size_t>(`7`);
69	if (PROTOBUF_PREDICT_FALSE(n_rounded_down == `0`)) { // n <= 7
70	return memcmp(s1: a, s2: b, n: n) == `0`;
71	}
72	// n >= 8
73	uint64 u = GOOGLE_UNALIGNED_LOAD64(p: a) ^ GOOGLE_UNALIGNED_LOAD64(p: b);
74	uint64 v = GOOGLE_UNALIGNED_LOAD64(p: a + n - `8`) ^ GOOGLE_UNALIGNED_LOAD64(p: b + n - `8`);
75	if ((u \| v) != `0`) { // The first or last 8 bytes differ.
76	return false;
77	}
78	a += `8`;
79	b += `8`;
80	n = n_rounded_down - `8`;
81	if (n > `128`) {
82	// As of 2012, memcmp on x86-64 uses a big unrolled loop with SSE2
83	// instructions, and while we could try to do something faster, it
84	// doesn't seem worth pursuing.
85	return memcmp(s1: a, s2: b, n: n) == `0`;
86	}
87	for (; n >= `16`; n -= `16`) {
88	uint64 x = GOOGLE_UNALIGNED_LOAD64(p: a) ^ GOOGLE_UNALIGNED_LOAD64(p: b);
89	uint64 y = GOOGLE_UNALIGNED_LOAD64(p: a + `8`) ^ GOOGLE_UNALIGNED_LOAD64(p: b + `8`);
90	if ((x \| y) != `0`) {
91	return false;
92	}
93	a += `16`;
94	b += `16`;
95	}
96	// n must be 0 or 8 now because it was a multiple of 8 at the top of the loop.
97	return n == `0` \|\| GOOGLE_UNALIGNED_LOAD64(p: a) == GOOGLE_UNALIGNED_LOAD64(p: b);
98	}
99
100	inline int fastmemcmp_inlined(const char a, const* char *b, size_t n) {
101	if (n >= `64`) {
102	return memcmp(s1: a, s2: b, n: n);
103	}
104	const char* a_limit = a + n;
105	while (a + sizeof(uint64) <= a_limit &&
106	GOOGLE_UNALIGNED_LOAD64(p: a) == GOOGLE_UNALIGNED_LOAD64(p: b)) {
107	a += sizeof(uint64);
108	b += sizeof(uint64);
109	}
110	if (a + sizeof(uint32) <= a_limit &&
111	GOOGLE_UNALIGNED_LOAD32(p: a) == GOOGLE_UNALIGNED_LOAD32(p: b)) {
112	a += sizeof(uint32);
113	b += sizeof(uint32);
114	}
115	while (a < a_limit) {
116	int d =
117	static_cast<int>(static_cast<uint32>(a++) - static_cast<uint32>(b++));
118	if (d) return d;
119	}
120	return `0`;
121	}
122
123	// The standard memcpy operation is slow for variable small sizes.
124	// This implementation inlines the optimal realization for sizes 1 to 16.
125	// To avoid code bloat don't use it in case of not performance-critical spots,
126	// nor when you don't expect very frequent values of size <= 16.
127	inline void memcpy_inlined(char dst, const* char *src, size_t size) {
128	// Compiler inlines code with minimal amount of data movement when third
129	// parameter of memcpy is a constant.
130	switch (size) {
131	case `1`: memcpy(dest: dst, src: src, n: `1`); break;
132	case `2`: memcpy(dest: dst, src: src, n: `2`); break;
133	case `3`: memcpy(dest: dst, src: src, n: `3`); break;
134	case `4`: memcpy(dest: dst, src: src, n: `4`); break;
135	case `5`: memcpy(dest: dst, src: src, n: `5`); break;
136	case `6`: memcpy(dest: dst, src: src, n: `6`); break;
137	case `7`: memcpy(dest: dst, src: src, n: `7`); break;
138	case `8`: memcpy(dest: dst, src: src, n: `8`); break;
139	case `9`: memcpy(dest: dst, src: src, n: `9`); break;
140	case `10`: memcpy(dest: dst, src: src, n: `10`); break;
141	case `11`: memcpy(dest: dst, src: src, n: `11`); break;
142	case `12`: memcpy(dest: dst, src: src, n: `12`); break;
143	case `13`: memcpy(dest: dst, src: src, n: `13`); break;
144	case `14`: memcpy(dest: dst, src: src, n: `14`); break;
145	case `15`: memcpy(dest: dst, src: src, n: `15`); break;
146	case `16`: memcpy(dest: dst, src: src, n: `16`); break;
147	default: memcpy(dest: dst, src: src, n: size); break;
148	}
149	}
150
151	} // namespace internal
152	} // namespace protobuf
153	} // namespace google
154
155	#include <google/protobuf/port_undef.inc>
156
157	#endif // GOOGLE_PROTOBUF_STUBS_FASTMEM_H_
158

Provided by KDAB

Improve your Profiling and Debugging skills

Find out more

Definitions

source code of include/google/protobuf/stubs/fastmem.h