1 | // Protocol Buffers - Google's data interchange format |
2 | // Copyright 2014 Google Inc. All rights reserved. |
3 | // https://developers.google.com/protocol-buffers/ |
4 | // |
5 | // Redistribution and use in source and binary forms, with or without |
6 | // modification, are permitted provided that the following conditions are |
7 | // met: |
8 | // |
9 | // * Redistributions of source code must retain the above copyright |
10 | // notice, this list of conditions and the following disclaimer. |
11 | // * Redistributions in binary form must reproduce the above |
12 | // copyright notice, this list of conditions and the following disclaimer |
13 | // in the documentation and/or other materials provided with the |
14 | // distribution. |
15 | // * Neither the name of Google Inc. nor the names of its |
16 | // contributors may be used to endorse or promote products derived from |
17 | // this software without specific prior written permission. |
18 | // |
19 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
20 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
21 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
22 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
23 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
24 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
25 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
26 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
27 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
28 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
29 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
30 | |
31 | // Fast memory copying and comparison routines. |
32 | // strings::fastmemcmp_inlined() replaces memcmp() |
33 | // strings::memcpy_inlined() replaces memcpy() |
34 | // strings::memeq(a, b, n) replaces memcmp(a, b, n) == 0 |
35 | // |
36 | // strings::*_inlined() routines are inline versions of the |
37 | // routines exported by this module. Sometimes using the inlined |
38 | // versions is faster. Measure before using the inlined versions. |
39 | // |
40 | // Performance measurement: |
41 | // strings::fastmemcmp_inlined |
42 | // Analysis: memcmp, fastmemcmp_inlined, fastmemcmp |
43 | // 2012-01-30 |
44 | |
45 | #ifndef GOOGLE_PROTOBUF_STUBS_FASTMEM_H_ |
46 | #define GOOGLE_PROTOBUF_STUBS_FASTMEM_H_ |
47 | |
48 | #include <stddef.h> |
49 | #include <stdio.h> |
50 | #include <string.h> |
51 | |
52 | #include <google/protobuf/stubs/common.h> |
53 | |
54 | #include <google/protobuf/port_def.inc> |
55 | |
56 | namespace google { |
57 | namespace protobuf { |
58 | namespace internal { |
59 | |
60 | // Return true if the n bytes at a equal the n bytes at b. |
61 | // The regions are allowed to overlap. |
62 | // |
63 | // The performance is similar to the performance memcmp(), but faster for |
64 | // moderately-sized inputs, or inputs that share a common prefix and differ |
65 | // somewhere in their last 8 bytes. Further optimizations can be added later |
66 | // if it makes sense to do so.:w |
67 | inline bool memeq(const char* a, const char* b, size_t n) { |
68 | size_t n_rounded_down = n & ~static_cast<size_t>(7); |
69 | if (PROTOBUF_PREDICT_FALSE(n_rounded_down == 0)) { // n <= 7 |
70 | return memcmp(s1: a, s2: b, n: n) == 0; |
71 | } |
72 | // n >= 8 |
73 | uint64 u = GOOGLE_UNALIGNED_LOAD64(p: a) ^ GOOGLE_UNALIGNED_LOAD64(p: b); |
74 | uint64 v = GOOGLE_UNALIGNED_LOAD64(p: a + n - 8) ^ GOOGLE_UNALIGNED_LOAD64(p: b + n - 8); |
75 | if ((u | v) != 0) { // The first or last 8 bytes differ. |
76 | return false; |
77 | } |
78 | a += 8; |
79 | b += 8; |
80 | n = n_rounded_down - 8; |
81 | if (n > 128) { |
82 | // As of 2012, memcmp on x86-64 uses a big unrolled loop with SSE2 |
83 | // instructions, and while we could try to do something faster, it |
84 | // doesn't seem worth pursuing. |
85 | return memcmp(s1: a, s2: b, n: n) == 0; |
86 | } |
87 | for (; n >= 16; n -= 16) { |
88 | uint64 x = GOOGLE_UNALIGNED_LOAD64(p: a) ^ GOOGLE_UNALIGNED_LOAD64(p: b); |
89 | uint64 y = GOOGLE_UNALIGNED_LOAD64(p: a + 8) ^ GOOGLE_UNALIGNED_LOAD64(p: b + 8); |
90 | if ((x | y) != 0) { |
91 | return false; |
92 | } |
93 | a += 16; |
94 | b += 16; |
95 | } |
96 | // n must be 0 or 8 now because it was a multiple of 8 at the top of the loop. |
97 | return n == 0 || GOOGLE_UNALIGNED_LOAD64(p: a) == GOOGLE_UNALIGNED_LOAD64(p: b); |
98 | } |
99 | |
100 | inline int fastmemcmp_inlined(const char *a, const char *b, size_t n) { |
101 | if (n >= 64) { |
102 | return memcmp(s1: a, s2: b, n: n); |
103 | } |
104 | const char* a_limit = a + n; |
105 | while (a + sizeof(uint64) <= a_limit && |
106 | GOOGLE_UNALIGNED_LOAD64(p: a) == GOOGLE_UNALIGNED_LOAD64(p: b)) { |
107 | a += sizeof(uint64); |
108 | b += sizeof(uint64); |
109 | } |
110 | if (a + sizeof(uint32) <= a_limit && |
111 | GOOGLE_UNALIGNED_LOAD32(p: a) == GOOGLE_UNALIGNED_LOAD32(p: b)) { |
112 | a += sizeof(uint32); |
113 | b += sizeof(uint32); |
114 | } |
115 | while (a < a_limit) { |
116 | int d = |
117 | static_cast<int>(static_cast<uint32>(*a++) - static_cast<uint32>(*b++)); |
118 | if (d) return d; |
119 | } |
120 | return 0; |
121 | } |
122 | |
123 | // The standard memcpy operation is slow for variable small sizes. |
124 | // This implementation inlines the optimal realization for sizes 1 to 16. |
125 | // To avoid code bloat don't use it in case of not performance-critical spots, |
126 | // nor when you don't expect very frequent values of size <= 16. |
127 | inline void memcpy_inlined(char *dst, const char *src, size_t size) { |
128 | // Compiler inlines code with minimal amount of data movement when third |
129 | // parameter of memcpy is a constant. |
130 | switch (size) { |
131 | case 1: memcpy(dest: dst, src: src, n: 1); break; |
132 | case 2: memcpy(dest: dst, src: src, n: 2); break; |
133 | case 3: memcpy(dest: dst, src: src, n: 3); break; |
134 | case 4: memcpy(dest: dst, src: src, n: 4); break; |
135 | case 5: memcpy(dest: dst, src: src, n: 5); break; |
136 | case 6: memcpy(dest: dst, src: src, n: 6); break; |
137 | case 7: memcpy(dest: dst, src: src, n: 7); break; |
138 | case 8: memcpy(dest: dst, src: src, n: 8); break; |
139 | case 9: memcpy(dest: dst, src: src, n: 9); break; |
140 | case 10: memcpy(dest: dst, src: src, n: 10); break; |
141 | case 11: memcpy(dest: dst, src: src, n: 11); break; |
142 | case 12: memcpy(dest: dst, src: src, n: 12); break; |
143 | case 13: memcpy(dest: dst, src: src, n: 13); break; |
144 | case 14: memcpy(dest: dst, src: src, n: 14); break; |
145 | case 15: memcpy(dest: dst, src: src, n: 15); break; |
146 | case 16: memcpy(dest: dst, src: src, n: 16); break; |
147 | default: memcpy(dest: dst, src: src, n: size); break; |
148 | } |
149 | } |
150 | |
151 | } // namespace internal |
152 | } // namespace protobuf |
153 | } // namespace google |
154 | |
155 | #include <google/protobuf/port_undef.inc> |
156 | |
157 | #endif // GOOGLE_PROTOBUF_STUBS_FASTMEM_H_ |
158 | |