1 | // Protocol Buffers - Google's data interchange format |
2 | // Copyright 2008 Google Inc. All rights reserved. |
3 | // https://developers.google.com/protocol-buffers/ |
4 | // |
5 | // Redistribution and use in source and binary forms, with or without |
6 | // modification, are permitted provided that the following conditions are |
7 | // met: |
8 | // |
9 | // * Redistributions of source code must retain the above copyright |
10 | // notice, this list of conditions and the following disclaimer. |
11 | // * Redistributions in binary form must reproduce the above |
12 | // copyright notice, this list of conditions and the following disclaimer |
13 | // in the documentation and/or other materials provided with the |
14 | // distribution. |
15 | // * Neither the name of Google Inc. nor the names of its |
16 | // contributors may be used to endorse or promote products derived from |
17 | // this software without specific prior written permission. |
18 | // |
19 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
20 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
21 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
22 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
23 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
24 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
25 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
26 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
27 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
28 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
29 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
30 | |
31 | // from google3/strings/strutil.h |
32 | |
33 | #ifndef GOOGLE_PROTOBUF_STUBS_STRUTIL_H__ |
34 | #define GOOGLE_PROTOBUF_STUBS_STRUTIL_H__ |
35 | |
36 | #include <google/protobuf/stubs/common.h> |
37 | #include <google/protobuf/stubs/stringpiece.h> |
38 | #include <stdlib.h> |
39 | |
40 | #include <cstring> |
41 | #include <google/protobuf/port_def.inc> |
42 | #include <vector> |
43 | |
44 | namespace google { |
45 | namespace protobuf { |
46 | |
47 | #if defined(_MSC_VER) && _MSC_VER < 1800 |
48 | #define strtoll _strtoi64 |
49 | #define strtoull _strtoui64 |
50 | #elif defined(__DECCXX) && defined(__osf__) |
51 | // HP C++ on Tru64 does not have strtoll, but strtol is already 64-bit. |
52 | #define strtoll strtol |
53 | #define strtoull strtoul |
54 | #endif |
55 | |
56 | // ---------------------------------------------------------------------- |
57 | // ascii_isalnum() |
58 | // Check if an ASCII character is alphanumeric. We can't use ctype's |
59 | // isalnum() because it is affected by locale. This function is applied |
60 | // to identifiers in the protocol buffer language, not to natural-language |
61 | // strings, so locale should not be taken into account. |
62 | // ascii_isdigit() |
63 | // Like above, but only accepts digits. |
64 | // ascii_isspace() |
65 | // Check if the character is a space character. |
66 | // ---------------------------------------------------------------------- |
67 | |
68 | inline bool ascii_isalnum(char c) { |
69 | return ('a' <= c && c <= 'z') || |
70 | ('A' <= c && c <= 'Z') || |
71 | ('0' <= c && c <= '9'); |
72 | } |
73 | |
74 | inline bool ascii_isdigit(char c) { |
75 | return ('0' <= c && c <= '9'); |
76 | } |
77 | |
78 | inline bool ascii_isspace(char c) { |
79 | return c == ' ' || c == '\t' || c == '\n' || c == '\v' || c == '\f' || |
80 | c == '\r'; |
81 | } |
82 | |
83 | inline bool ascii_isupper(char c) { |
84 | return c >= 'A' && c <= 'Z'; |
85 | } |
86 | |
87 | inline bool ascii_islower(char c) { |
88 | return c >= 'a' && c <= 'z'; |
89 | } |
90 | |
91 | inline char ascii_toupper(char c) { |
92 | return ascii_islower(c) ? c - ('a' - 'A') : c; |
93 | } |
94 | |
95 | inline char ascii_tolower(char c) { |
96 | return ascii_isupper(c) ? c + ('a' - 'A') : c; |
97 | } |
98 | |
99 | inline int hex_digit_to_int(char c) { |
100 | /* Assume ASCII. */ |
101 | int x = static_cast<unsigned char>(c); |
102 | if (x > '9') { |
103 | x += 9; |
104 | } |
105 | return x & 0xf; |
106 | } |
107 | |
108 | // ---------------------------------------------------------------------- |
109 | // HasPrefixString() |
110 | // Check if a string begins with a given prefix. |
111 | // StripPrefixString() |
112 | // Given a string and a putative prefix, returns the string minus the |
113 | // prefix string if the prefix matches, otherwise the original |
114 | // string. |
115 | // ---------------------------------------------------------------------- |
116 | inline bool HasPrefixString(const string& str, |
117 | const string& prefix) { |
118 | return str.size() >= prefix.size() && |
119 | str.compare(pos: 0, n: prefix.size(), str: prefix) == 0; |
120 | } |
121 | |
122 | inline bool HasPrefixString(StringPiece str, StringPiece prefix) { |
123 | return str.size() >= prefix.size() && |
124 | memcmp(s1: str.data(), s2: prefix.data(), n: prefix.size()) == 0; |
125 | } |
126 | |
127 | inline string StripPrefixString(const string& str, const string& prefix) { |
128 | if (HasPrefixString(str, prefix)) { |
129 | return str.substr(pos: prefix.size()); |
130 | } else { |
131 | return str; |
132 | } |
133 | } |
134 | |
135 | // ---------------------------------------------------------------------- |
136 | // HasSuffixString() |
137 | // Return true if str ends in suffix. |
138 | // StripSuffixString() |
139 | // Given a string and a putative suffix, returns the string minus the |
140 | // suffix string if the suffix matches, otherwise the original |
141 | // string. |
142 | // ---------------------------------------------------------------------- |
143 | inline bool HasSuffixString(const string& str, |
144 | const string& suffix) { |
145 | return str.size() >= suffix.size() && |
146 | str.compare(pos: str.size() - suffix.size(), n: suffix.size(), str: suffix) == 0; |
147 | } |
148 | |
149 | inline string StripSuffixString(const string& str, const string& suffix) { |
150 | if (HasSuffixString(str, suffix)) { |
151 | return str.substr(pos: 0, n: str.size() - suffix.size()); |
152 | } else { |
153 | return str; |
154 | } |
155 | } |
156 | |
157 | // ---------------------------------------------------------------------- |
158 | // ReplaceCharacters |
159 | // Replaces any occurrence of the character 'remove' (or the characters |
160 | // in 'remove') with the character 'replacewith'. |
161 | // Good for keeping html characters or protocol characters (\t) out |
162 | // of places where they might cause a problem. |
163 | // StripWhitespace |
164 | // Removes whitespaces from both ends of the given string. |
165 | // ---------------------------------------------------------------------- |
166 | PROTOBUF_EXPORT void ReplaceCharacters(string* s, const char* remove, |
167 | char replacewith); |
168 | |
169 | PROTOBUF_EXPORT void StripWhitespace(string* s); |
170 | |
171 | // ---------------------------------------------------------------------- |
172 | // LowerString() |
173 | // UpperString() |
174 | // ToUpper() |
175 | // Convert the characters in "s" to lowercase or uppercase. ASCII-only: |
176 | // these functions intentionally ignore locale because they are applied to |
177 | // identifiers used in the Protocol Buffer language, not to natural-language |
178 | // strings. |
179 | // ---------------------------------------------------------------------- |
180 | |
181 | inline void LowerString(string * s) { |
182 | string::iterator end = s->end(); |
183 | for (string::iterator i = s->begin(); i != end; ++i) { |
184 | // tolower() changes based on locale. We don't want this! |
185 | if ('A' <= *i && *i <= 'Z') *i += 'a' - 'A'; |
186 | } |
187 | } |
188 | |
189 | inline void UpperString(string * s) { |
190 | string::iterator end = s->end(); |
191 | for (string::iterator i = s->begin(); i != end; ++i) { |
192 | // toupper() changes based on locale. We don't want this! |
193 | if ('a' <= *i && *i <= 'z') *i += 'A' - 'a'; |
194 | } |
195 | } |
196 | |
197 | inline string ToUpper(const string& s) { |
198 | string out = s; |
199 | UpperString(s: &out); |
200 | return out; |
201 | } |
202 | |
203 | // ---------------------------------------------------------------------- |
204 | // StringReplace() |
205 | // Give me a string and two patterns "old" and "new", and I replace |
206 | // the first instance of "old" in the string with "new", if it |
207 | // exists. RETURN a new string, regardless of whether the replacement |
208 | // happened or not. |
209 | // ---------------------------------------------------------------------- |
210 | |
211 | PROTOBUF_EXPORT string StringReplace(const string& s, const string& oldsub, |
212 | const string& newsub, bool replace_all); |
213 | |
214 | // ---------------------------------------------------------------------- |
215 | // SplitStringUsing() |
216 | // Split a string using a character delimiter. Append the components |
217 | // to 'result'. If there are consecutive delimiters, this function skips |
218 | // over all of them. |
219 | // ---------------------------------------------------------------------- |
220 | PROTOBUF_EXPORT void SplitStringUsing(const string& full, const char* delim, |
221 | std::vector<string>* res); |
222 | |
223 | // Split a string using one or more byte delimiters, presented |
224 | // as a nul-terminated c string. Append the components to 'result'. |
225 | // If there are consecutive delimiters, this function will return |
226 | // corresponding empty strings. If you want to drop the empty |
227 | // strings, try SplitStringUsing(). |
228 | // |
229 | // If "full" is the empty string, yields an empty string as the only value. |
230 | // ---------------------------------------------------------------------- |
231 | PROTOBUF_EXPORT void SplitStringAllowEmpty(const string& full, |
232 | const char* delim, |
233 | std::vector<string>* result); |
234 | |
235 | // ---------------------------------------------------------------------- |
236 | // Split() |
237 | // Split a string using a character delimiter. |
238 | // ---------------------------------------------------------------------- |
239 | inline std::vector<string> Split( |
240 | const string& full, const char* delim, bool skip_empty = true) { |
241 | std::vector<string> result; |
242 | if (skip_empty) { |
243 | SplitStringUsing(full, delim, res: &result); |
244 | } else { |
245 | SplitStringAllowEmpty(full, delim, result: &result); |
246 | } |
247 | return result; |
248 | } |
249 | |
250 | // ---------------------------------------------------------------------- |
251 | // JoinStrings() |
252 | // These methods concatenate a vector of strings into a C++ string, using |
253 | // the C-string "delim" as a separator between components. There are two |
254 | // flavors of the function, one flavor returns the concatenated string, |
255 | // another takes a pointer to the target string. In the latter case the |
256 | // target string is cleared and overwritten. |
257 | // ---------------------------------------------------------------------- |
258 | PROTOBUF_EXPORT void JoinStrings(const std::vector<string>& components, |
259 | const char* delim, string* result); |
260 | |
261 | inline string JoinStrings(const std::vector<string>& components, |
262 | const char* delim) { |
263 | string result; |
264 | JoinStrings(components, delim, result: &result); |
265 | return result; |
266 | } |
267 | |
268 | // ---------------------------------------------------------------------- |
269 | // UnescapeCEscapeSequences() |
270 | // Copies "source" to "dest", rewriting C-style escape sequences |
271 | // -- '\n', '\r', '\\', '\ooo', etc -- to their ASCII |
272 | // equivalents. "dest" must be sufficiently large to hold all |
273 | // the characters in the rewritten string (i.e. at least as large |
274 | // as strlen(source) + 1 should be safe, since the replacements |
275 | // are always shorter than the original escaped sequences). It's |
276 | // safe for source and dest to be the same. RETURNS the length |
277 | // of dest. |
278 | // |
279 | // It allows hex sequences \xhh, or generally \xhhhhh with an |
280 | // arbitrary number of hex digits, but all of them together must |
281 | // specify a value of a single byte (e.g. \x0045 is equivalent |
282 | // to \x45, and \x1234 is erroneous). |
283 | // |
284 | // It also allows escape sequences of the form \uhhhh (exactly four |
285 | // hex digits, upper or lower case) or \Uhhhhhhhh (exactly eight |
286 | // hex digits, upper or lower case) to specify a Unicode code |
287 | // point. The dest array will contain the UTF8-encoded version of |
288 | // that code-point (e.g., if source contains \u2019, then dest will |
289 | // contain the three bytes 0xE2, 0x80, and 0x99). |
290 | // |
291 | // Errors: In the first form of the call, errors are reported with |
292 | // LOG(ERROR). The same is true for the second form of the call if |
293 | // the pointer to the string std::vector is nullptr; otherwise, error |
294 | // messages are stored in the std::vector. In either case, the effect on |
295 | // the dest array is not defined, but rest of the source will be |
296 | // processed. |
297 | // ---------------------------------------------------------------------- |
298 | |
299 | PROTOBUF_EXPORT int UnescapeCEscapeSequences(const char* source, char* dest); |
300 | PROTOBUF_EXPORT int UnescapeCEscapeSequences(const char* source, char* dest, |
301 | std::vector<string>* errors); |
302 | |
303 | // ---------------------------------------------------------------------- |
304 | // UnescapeCEscapeString() |
305 | // This does the same thing as UnescapeCEscapeSequences, but creates |
306 | // a new string. The caller does not need to worry about allocating |
307 | // a dest buffer. This should be used for non performance critical |
308 | // tasks such as printing debug messages. It is safe for src and dest |
309 | // to be the same. |
310 | // |
311 | // The second call stores its errors in a supplied string vector. |
312 | // If the string vector pointer is nullptr, it reports the errors with LOG(). |
313 | // |
314 | // In the first and second calls, the length of dest is returned. In the |
315 | // the third call, the new string is returned. |
316 | // ---------------------------------------------------------------------- |
317 | |
318 | PROTOBUF_EXPORT int UnescapeCEscapeString(const string& src, string* dest); |
319 | PROTOBUF_EXPORT int UnescapeCEscapeString(const string& src, string* dest, |
320 | std::vector<string>* errors); |
321 | PROTOBUF_EXPORT string UnescapeCEscapeString(const string& src); |
322 | |
323 | // ---------------------------------------------------------------------- |
324 | // CEscape() |
325 | // Escapes 'src' using C-style escape sequences and returns the resulting |
326 | // string. |
327 | // |
328 | // Escaped chars: \n, \r, \t, ", ', \, and !isprint(). |
329 | // ---------------------------------------------------------------------- |
330 | PROTOBUF_EXPORT string CEscape(const string& src); |
331 | |
332 | // ---------------------------------------------------------------------- |
333 | // CEscapeAndAppend() |
334 | // Escapes 'src' using C-style escape sequences, and appends the escaped |
335 | // string to 'dest'. |
336 | // ---------------------------------------------------------------------- |
337 | PROTOBUF_EXPORT void CEscapeAndAppend(StringPiece src, string* dest); |
338 | |
339 | namespace strings { |
340 | // Like CEscape() but does not escape bytes with the upper bit set. |
341 | PROTOBUF_EXPORT string Utf8SafeCEscape(const string& src); |
342 | |
343 | // Like CEscape() but uses hex (\x) escapes instead of octals. |
344 | PROTOBUF_EXPORT string CHexEscape(const string& src); |
345 | } // namespace strings |
346 | |
347 | // ---------------------------------------------------------------------- |
348 | // strto32() |
349 | // strtou32() |
350 | // strto64() |
351 | // strtou64() |
352 | // Architecture-neutral plug compatible replacements for strtol() and |
353 | // strtoul(). Long's have different lengths on ILP-32 and LP-64 |
354 | // platforms, so using these is safer, from the point of view of |
355 | // overflow behavior, than using the standard libc functions. |
356 | // ---------------------------------------------------------------------- |
357 | PROTOBUF_EXPORT int32 strto32_adaptor(const char* nptr, char** endptr, |
358 | int base); |
359 | PROTOBUF_EXPORT uint32 strtou32_adaptor(const char* nptr, char** endptr, |
360 | int base); |
361 | |
362 | inline int32 strto32(const char *nptr, char **endptr, int base) { |
363 | if (sizeof(int32) == sizeof(long)) |
364 | return strtol(nptr: nptr, endptr: endptr, base: base); |
365 | else |
366 | return strto32_adaptor(nptr, endptr, base); |
367 | } |
368 | |
369 | inline uint32 strtou32(const char *nptr, char **endptr, int base) { |
370 | if (sizeof(uint32) == sizeof(unsigned long)) |
371 | return strtoul(nptr: nptr, endptr: endptr, base: base); |
372 | else |
373 | return strtou32_adaptor(nptr, endptr, base); |
374 | } |
375 | |
376 | // For now, long long is 64-bit on all the platforms we care about, so these |
377 | // functions can simply pass the call to strto[u]ll. |
378 | inline int64 strto64(const char *nptr, char **endptr, int base) { |
379 | GOOGLE_COMPILE_ASSERT(sizeof(int64) == sizeof(long long), |
380 | sizeof_int64_is_not_sizeof_long_long); |
381 | return strtoll(nptr: nptr, endptr: endptr, base: base); |
382 | } |
383 | |
384 | inline uint64 strtou64(const char *nptr, char **endptr, int base) { |
385 | GOOGLE_COMPILE_ASSERT(sizeof(uint64) == sizeof(unsigned long long), |
386 | sizeof_uint64_is_not_sizeof_long_long); |
387 | return strtoull(nptr: nptr, endptr: endptr, base: base); |
388 | } |
389 | |
390 | // ---------------------------------------------------------------------- |
391 | // safe_strtob() |
392 | // safe_strto32() |
393 | // safe_strtou32() |
394 | // safe_strto64() |
395 | // safe_strtou64() |
396 | // safe_strtof() |
397 | // safe_strtod() |
398 | // ---------------------------------------------------------------------- |
399 | PROTOBUF_EXPORT bool safe_strtob(StringPiece str, bool* value); |
400 | |
401 | PROTOBUF_EXPORT bool safe_strto32(const string& str, int32* value); |
402 | PROTOBUF_EXPORT bool safe_strtou32(const string& str, uint32* value); |
403 | inline bool safe_strto32(const char* str, int32* value) { |
404 | return safe_strto32(str: string(str), value); |
405 | } |
406 | inline bool safe_strto32(StringPiece str, int32* value) { |
407 | return safe_strto32(str: str.ToString(), value); |
408 | } |
409 | inline bool safe_strtou32(const char* str, uint32* value) { |
410 | return safe_strtou32(str: string(str), value); |
411 | } |
412 | inline bool safe_strtou32(StringPiece str, uint32* value) { |
413 | return safe_strtou32(str: str.ToString(), value); |
414 | } |
415 | |
416 | PROTOBUF_EXPORT bool safe_strto64(const string& str, int64* value); |
417 | PROTOBUF_EXPORT bool safe_strtou64(const string& str, uint64* value); |
418 | inline bool safe_strto64(const char* str, int64* value) { |
419 | return safe_strto64(str: string(str), value); |
420 | } |
421 | inline bool safe_strto64(StringPiece str, int64* value) { |
422 | return safe_strto64(str: str.ToString(), value); |
423 | } |
424 | inline bool safe_strtou64(const char* str, uint64* value) { |
425 | return safe_strtou64(str: string(str), value); |
426 | } |
427 | inline bool safe_strtou64(StringPiece str, uint64* value) { |
428 | return safe_strtou64(str: str.ToString(), value); |
429 | } |
430 | |
431 | PROTOBUF_EXPORT bool safe_strtof(const char* str, float* value); |
432 | PROTOBUF_EXPORT bool safe_strtod(const char* str, double* value); |
433 | inline bool safe_strtof(const string& str, float* value) { |
434 | return safe_strtof(str: str.c_str(), value); |
435 | } |
436 | inline bool safe_strtod(const string& str, double* value) { |
437 | return safe_strtod(str: str.c_str(), value); |
438 | } |
439 | inline bool safe_strtof(StringPiece str, float* value) { |
440 | return safe_strtof(str: str.ToString(), value); |
441 | } |
442 | inline bool safe_strtod(StringPiece str, double* value) { |
443 | return safe_strtod(str: str.ToString(), value); |
444 | } |
445 | |
446 | // ---------------------------------------------------------------------- |
447 | // FastIntToBuffer() |
448 | // FastHexToBuffer() |
449 | // FastHex64ToBuffer() |
450 | // FastHex32ToBuffer() |
451 | // FastTimeToBuffer() |
452 | // These are intended for speed. FastIntToBuffer() assumes the |
453 | // integer is non-negative. FastHexToBuffer() puts output in |
454 | // hex rather than decimal. FastTimeToBuffer() puts the output |
455 | // into RFC822 format. |
456 | // |
457 | // FastHex64ToBuffer() puts a 64-bit unsigned value in hex-format, |
458 | // padded to exactly 16 bytes (plus one byte for '\0') |
459 | // |
460 | // FastHex32ToBuffer() puts a 32-bit unsigned value in hex-format, |
461 | // padded to exactly 8 bytes (plus one byte for '\0') |
462 | // |
463 | // All functions take the output buffer as an arg. |
464 | // They all return a pointer to the beginning of the output, |
465 | // which may not be the beginning of the input buffer. |
466 | // ---------------------------------------------------------------------- |
467 | |
468 | // Suggested buffer size for FastToBuffer functions. Also works with |
469 | // DoubleToBuffer() and FloatToBuffer(). |
470 | static const int kFastToBufferSize = 32; |
471 | |
472 | PROTOBUF_EXPORT char* FastInt32ToBuffer(int32 i, char* buffer); |
473 | PROTOBUF_EXPORT char* FastInt64ToBuffer(int64 i, char* buffer); |
474 | char* FastUInt32ToBuffer(uint32 i, char* buffer); // inline below |
475 | char* FastUInt64ToBuffer(uint64 i, char* buffer); // inline below |
476 | PROTOBUF_EXPORT char* FastHexToBuffer(int i, char* buffer); |
477 | PROTOBUF_EXPORT char* FastHex64ToBuffer(uint64 i, char* buffer); |
478 | PROTOBUF_EXPORT char* FastHex32ToBuffer(uint32 i, char* buffer); |
479 | |
480 | // at least 22 bytes long |
481 | inline char* FastIntToBuffer(int i, char* buffer) { |
482 | return (sizeof(i) == 4 ? |
483 | FastInt32ToBuffer(i, buffer) : FastInt64ToBuffer(i, buffer)); |
484 | } |
485 | inline char* FastUIntToBuffer(unsigned int i, char* buffer) { |
486 | return (sizeof(i) == 4 ? |
487 | FastUInt32ToBuffer(i, buffer) : FastUInt64ToBuffer(i, buffer)); |
488 | } |
489 | inline char* FastLongToBuffer(long i, char* buffer) { |
490 | return (sizeof(i) == 4 ? |
491 | FastInt32ToBuffer(i, buffer) : FastInt64ToBuffer(i, buffer)); |
492 | } |
493 | inline char* FastULongToBuffer(unsigned long i, char* buffer) { |
494 | return (sizeof(i) == 4 ? |
495 | FastUInt32ToBuffer(i, buffer) : FastUInt64ToBuffer(i, buffer)); |
496 | } |
497 | |
498 | // ---------------------------------------------------------------------- |
499 | // FastInt32ToBufferLeft() |
500 | // FastUInt32ToBufferLeft() |
501 | // FastInt64ToBufferLeft() |
502 | // FastUInt64ToBufferLeft() |
503 | // |
504 | // Like the Fast*ToBuffer() functions above, these are intended for speed. |
505 | // Unlike the Fast*ToBuffer() functions, however, these functions write |
506 | // their output to the beginning of the buffer (hence the name, as the |
507 | // output is left-aligned). The caller is responsible for ensuring that |
508 | // the buffer has enough space to hold the output. |
509 | // |
510 | // Returns a pointer to the end of the string (i.e. the null character |
511 | // terminating the string). |
512 | // ---------------------------------------------------------------------- |
513 | |
514 | PROTOBUF_EXPORT char* FastInt32ToBufferLeft(int32 i, char* buffer); |
515 | PROTOBUF_EXPORT char* FastUInt32ToBufferLeft(uint32 i, char* buffer); |
516 | PROTOBUF_EXPORT char* FastInt64ToBufferLeft(int64 i, char* buffer); |
517 | PROTOBUF_EXPORT char* FastUInt64ToBufferLeft(uint64 i, char* buffer); |
518 | |
519 | // Just define these in terms of the above. |
520 | inline char* FastUInt32ToBuffer(uint32 i, char* buffer) { |
521 | FastUInt32ToBufferLeft(i, buffer); |
522 | return buffer; |
523 | } |
524 | inline char* FastUInt64ToBuffer(uint64 i, char* buffer) { |
525 | FastUInt64ToBufferLeft(i, buffer); |
526 | return buffer; |
527 | } |
528 | |
529 | inline string SimpleBtoa(bool value) { |
530 | return value ? "true" : "false" ; |
531 | } |
532 | |
533 | // ---------------------------------------------------------------------- |
534 | // SimpleItoa() |
535 | // Description: converts an integer to a string. |
536 | // |
537 | // Return value: string |
538 | // ---------------------------------------------------------------------- |
539 | PROTOBUF_EXPORT string SimpleItoa(int i); |
540 | PROTOBUF_EXPORT string SimpleItoa(unsigned int i); |
541 | PROTOBUF_EXPORT string SimpleItoa(long i); |
542 | PROTOBUF_EXPORT string SimpleItoa(unsigned long i); |
543 | PROTOBUF_EXPORT string SimpleItoa(long long i); |
544 | PROTOBUF_EXPORT string SimpleItoa(unsigned long long i); |
545 | |
546 | // ---------------------------------------------------------------------- |
547 | // SimpleDtoa() |
548 | // SimpleFtoa() |
549 | // DoubleToBuffer() |
550 | // FloatToBuffer() |
551 | // Description: converts a double or float to a string which, if |
552 | // passed to NoLocaleStrtod(), will produce the exact same original double |
553 | // (except in case of NaN; all NaNs are considered the same value). |
554 | // We try to keep the string short but it's not guaranteed to be as |
555 | // short as possible. |
556 | // |
557 | // DoubleToBuffer() and FloatToBuffer() write the text to the given |
558 | // buffer and return it. The buffer must be at least |
559 | // kDoubleToBufferSize bytes for doubles and kFloatToBufferSize |
560 | // bytes for floats. kFastToBufferSize is also guaranteed to be large |
561 | // enough to hold either. |
562 | // |
563 | // Return value: string |
564 | // ---------------------------------------------------------------------- |
565 | PROTOBUF_EXPORT string SimpleDtoa(double value); |
566 | PROTOBUF_EXPORT string SimpleFtoa(float value); |
567 | |
568 | PROTOBUF_EXPORT char* DoubleToBuffer(double i, char* buffer); |
569 | PROTOBUF_EXPORT char* FloatToBuffer(float i, char* buffer); |
570 | |
571 | // In practice, doubles should never need more than 24 bytes and floats |
572 | // should never need more than 14 (including null terminators), but we |
573 | // overestimate to be safe. |
574 | static const int kDoubleToBufferSize = 32; |
575 | static const int kFloatToBufferSize = 24; |
576 | |
577 | namespace strings { |
578 | |
579 | enum PadSpec { |
580 | NO_PAD = 1, |
581 | ZERO_PAD_2, |
582 | ZERO_PAD_3, |
583 | ZERO_PAD_4, |
584 | ZERO_PAD_5, |
585 | ZERO_PAD_6, |
586 | ZERO_PAD_7, |
587 | ZERO_PAD_8, |
588 | ZERO_PAD_9, |
589 | ZERO_PAD_10, |
590 | ZERO_PAD_11, |
591 | ZERO_PAD_12, |
592 | ZERO_PAD_13, |
593 | ZERO_PAD_14, |
594 | ZERO_PAD_15, |
595 | ZERO_PAD_16, |
596 | }; |
597 | |
598 | struct Hex { |
599 | uint64 value; |
600 | enum PadSpec spec; |
601 | template <class Int> |
602 | explicit Hex(Int v, PadSpec s = NO_PAD) |
603 | : spec(s) { |
604 | // Prevent sign-extension by casting integers to |
605 | // their unsigned counterparts. |
606 | #ifdef LANG_CXX11 |
607 | static_assert( |
608 | sizeof(v) == 1 || sizeof(v) == 2 || sizeof(v) == 4 || sizeof(v) == 8, |
609 | "Unknown integer type" ); |
610 | #endif |
611 | value = sizeof(v) == 1 ? static_cast<uint8>(v) |
612 | : sizeof(v) == 2 ? static_cast<uint16>(v) |
613 | : sizeof(v) == 4 ? static_cast<uint32>(v) |
614 | : static_cast<uint64>(v); |
615 | } |
616 | }; |
617 | |
618 | struct PROTOBUF_EXPORT AlphaNum { |
619 | const char *piece_data_; // move these to string_ref eventually |
620 | size_t piece_size_; // move these to string_ref eventually |
621 | |
622 | char digits[kFastToBufferSize]; |
623 | |
624 | // No bool ctor -- bools convert to an integral type. |
625 | // A bool ctor would also convert incoming pointers (bletch). |
626 | |
627 | AlphaNum(int i32) |
628 | : piece_data_(digits), |
629 | piece_size_(FastInt32ToBufferLeft(i: i32, buffer: digits) - &digits[0]) {} |
630 | AlphaNum(unsigned int u32) |
631 | : piece_data_(digits), |
632 | piece_size_(FastUInt32ToBufferLeft(i: u32, buffer: digits) - &digits[0]) {} |
633 | AlphaNum(long long i64) |
634 | : piece_data_(digits), |
635 | piece_size_(FastInt64ToBufferLeft(i: i64, buffer: digits) - &digits[0]) {} |
636 | AlphaNum(unsigned long long u64) |
637 | : piece_data_(digits), |
638 | piece_size_(FastUInt64ToBufferLeft(i: u64, buffer: digits) - &digits[0]) {} |
639 | |
640 | // Note: on some architectures, "long" is only 32 bits, not 64, but the |
641 | // performance hit of using FastInt64ToBufferLeft to handle 32-bit values |
642 | // is quite minor. |
643 | AlphaNum(long i64) |
644 | : piece_data_(digits), |
645 | piece_size_(FastInt64ToBufferLeft(i: i64, buffer: digits) - &digits[0]) {} |
646 | AlphaNum(unsigned long u64) |
647 | : piece_data_(digits), |
648 | piece_size_(FastUInt64ToBufferLeft(i: u64, buffer: digits) - &digits[0]) {} |
649 | |
650 | AlphaNum(float f) |
651 | : piece_data_(digits), piece_size_(strlen(s: FloatToBuffer(i: f, buffer: digits))) {} |
652 | AlphaNum(double f) |
653 | : piece_data_(digits), piece_size_(strlen(s: DoubleToBuffer(i: f, buffer: digits))) {} |
654 | |
655 | AlphaNum(Hex hex); |
656 | |
657 | AlphaNum(const char* c_str) |
658 | : piece_data_(c_str), piece_size_(strlen(s: c_str)) {} |
659 | // TODO: Add a string_ref constructor, eventually |
660 | // AlphaNum(const StringPiece &pc) : piece(pc) {} |
661 | |
662 | AlphaNum(const string& str) |
663 | : piece_data_(str.data()), piece_size_(str.size()) {} |
664 | |
665 | AlphaNum(StringPiece str) |
666 | : piece_data_(str.data()), piece_size_(str.size()) {} |
667 | |
668 | AlphaNum(internal::StringPiecePod str) |
669 | : piece_data_(str.data()), piece_size_(str.size()) {} |
670 | |
671 | size_t size() const { return piece_size_; } |
672 | const char *data() const { return piece_data_; } |
673 | |
674 | private: |
675 | // Use ":" not ':' |
676 | AlphaNum(char c); // NOLINT(runtime/explicit) |
677 | |
678 | // Disallow copy and assign. |
679 | AlphaNum(const AlphaNum&); |
680 | void operator=(const AlphaNum&); |
681 | }; |
682 | |
683 | } // namespace strings |
684 | |
685 | using strings::AlphaNum; |
686 | |
687 | // ---------------------------------------------------------------------- |
688 | // StrCat() |
689 | // This merges the given strings or numbers, with no delimiter. This |
690 | // is designed to be the fastest possible way to construct a string out |
691 | // of a mix of raw C strings, strings, bool values, |
692 | // and numeric values. |
693 | // |
694 | // Don't use this for user-visible strings. The localization process |
695 | // works poorly on strings built up out of fragments. |
696 | // |
697 | // For clarity and performance, don't use StrCat when appending to a |
698 | // string. In particular, avoid using any of these (anti-)patterns: |
699 | // str.append(StrCat(...) |
700 | // str += StrCat(...) |
701 | // str = StrCat(str, ...) |
702 | // where the last is the worse, with the potential to change a loop |
703 | // from a linear time operation with O(1) dynamic allocations into a |
704 | // quadratic time operation with O(n) dynamic allocations. StrAppend |
705 | // is a better choice than any of the above, subject to the restriction |
706 | // of StrAppend(&str, a, b, c, ...) that none of the a, b, c, ... may |
707 | // be a reference into str. |
708 | // ---------------------------------------------------------------------- |
709 | |
710 | PROTOBUF_EXPORT string StrCat(const AlphaNum& a, const AlphaNum& b); |
711 | PROTOBUF_EXPORT string StrCat(const AlphaNum& a, const AlphaNum& b, |
712 | const AlphaNum& c); |
713 | PROTOBUF_EXPORT string StrCat(const AlphaNum& a, const AlphaNum& b, |
714 | const AlphaNum& c, const AlphaNum& d); |
715 | PROTOBUF_EXPORT string StrCat(const AlphaNum& a, const AlphaNum& b, |
716 | const AlphaNum& c, const AlphaNum& d, |
717 | const AlphaNum& e); |
718 | PROTOBUF_EXPORT string StrCat(const AlphaNum& a, const AlphaNum& b, |
719 | const AlphaNum& c, const AlphaNum& d, |
720 | const AlphaNum& e, const AlphaNum& f); |
721 | PROTOBUF_EXPORT string StrCat(const AlphaNum& a, const AlphaNum& b, |
722 | const AlphaNum& c, const AlphaNum& d, |
723 | const AlphaNum& e, const AlphaNum& f, |
724 | const AlphaNum& g); |
725 | PROTOBUF_EXPORT string StrCat(const AlphaNum& a, const AlphaNum& b, |
726 | const AlphaNum& c, const AlphaNum& d, |
727 | const AlphaNum& e, const AlphaNum& f, |
728 | const AlphaNum& g, const AlphaNum& h); |
729 | PROTOBUF_EXPORT string StrCat(const AlphaNum& a, const AlphaNum& b, |
730 | const AlphaNum& c, const AlphaNum& d, |
731 | const AlphaNum& e, const AlphaNum& f, |
732 | const AlphaNum& g, const AlphaNum& h, |
733 | const AlphaNum& i); |
734 | |
735 | inline string StrCat(const AlphaNum& a) { return string(a.data(), a.size()); } |
736 | |
737 | // ---------------------------------------------------------------------- |
738 | // StrAppend() |
739 | // Same as above, but adds the output to the given string. |
740 | // WARNING: For speed, StrAppend does not try to check each of its input |
741 | // arguments to be sure that they are not a subset of the string being |
742 | // appended to. That is, while this will work: |
743 | // |
744 | // string s = "foo"; |
745 | // s += s; |
746 | // |
747 | // This will not (necessarily) work: |
748 | // |
749 | // string s = "foo"; |
750 | // StrAppend(&s, s); |
751 | // |
752 | // Note: while StrCat supports appending up to 9 arguments, StrAppend |
753 | // is currently limited to 4. That's rarely an issue except when |
754 | // automatically transforming StrCat to StrAppend, and can easily be |
755 | // worked around as consecutive calls to StrAppend are quite efficient. |
756 | // ---------------------------------------------------------------------- |
757 | |
758 | PROTOBUF_EXPORT void StrAppend(string* dest, const AlphaNum& a); |
759 | PROTOBUF_EXPORT void StrAppend(string* dest, const AlphaNum& a, |
760 | const AlphaNum& b); |
761 | PROTOBUF_EXPORT void StrAppend(string* dest, const AlphaNum& a, |
762 | const AlphaNum& b, const AlphaNum& c); |
763 | PROTOBUF_EXPORT void StrAppend(string* dest, const AlphaNum& a, |
764 | const AlphaNum& b, const AlphaNum& c, |
765 | const AlphaNum& d); |
766 | |
767 | // ---------------------------------------------------------------------- |
768 | // Join() |
769 | // These methods concatenate a range of components into a C++ string, using |
770 | // the C-string "delim" as a separator between components. |
771 | // ---------------------------------------------------------------------- |
772 | template <typename Iterator> |
773 | void Join(Iterator start, Iterator end, |
774 | const char* delim, string* result) { |
775 | for (Iterator it = start; it != end; ++it) { |
776 | if (it != start) { |
777 | result->append(s: delim); |
778 | } |
779 | StrAppend(result, *it); |
780 | } |
781 | } |
782 | |
783 | template <typename Range> |
784 | string Join(const Range& components, |
785 | const char* delim) { |
786 | string result; |
787 | Join(components.begin(), components.end(), delim, &result); |
788 | return result; |
789 | } |
790 | |
791 | // ---------------------------------------------------------------------- |
792 | // ToHex() |
793 | // Return a lower-case hex string representation of the given integer. |
794 | // ---------------------------------------------------------------------- |
795 | PROTOBUF_EXPORT string ToHex(uint64 num); |
796 | |
797 | // ---------------------------------------------------------------------- |
798 | // GlobalReplaceSubstring() |
799 | // Replaces all instances of a substring in a string. Does nothing |
800 | // if 'substring' is empty. Returns the number of replacements. |
801 | // |
802 | // NOTE: The string pieces must not overlap s. |
803 | // ---------------------------------------------------------------------- |
804 | PROTOBUF_EXPORT int GlobalReplaceSubstring(const string& substring, |
805 | const string& replacement, |
806 | string* s); |
807 | |
808 | // ---------------------------------------------------------------------- |
809 | // Base64Unescape() |
810 | // Converts "src" which is encoded in Base64 to its binary equivalent and |
811 | // writes it to "dest". If src contains invalid characters, dest is cleared |
812 | // and the function returns false. Returns true on success. |
813 | // ---------------------------------------------------------------------- |
814 | PROTOBUF_EXPORT bool Base64Unescape(StringPiece src, string* dest); |
815 | |
816 | // ---------------------------------------------------------------------- |
817 | // WebSafeBase64Unescape() |
818 | // This is a variation of Base64Unescape which uses '-' instead of '+', and |
819 | // '_' instead of '/'. src is not null terminated, instead specify len. I |
820 | // recommend that slen<szdest, but we honor szdest anyway. |
821 | // RETURNS the length of dest, or -1 if src contains invalid chars. |
822 | |
823 | // The variation that stores into a string clears the string first, and |
824 | // returns false (with dest empty) if src contains invalid chars; for |
825 | // this version src and dest must be different strings. |
826 | // ---------------------------------------------------------------------- |
827 | PROTOBUF_EXPORT int WebSafeBase64Unescape(const char* src, int slen, char* dest, |
828 | int szdest); |
829 | PROTOBUF_EXPORT bool WebSafeBase64Unescape(StringPiece src, string* dest); |
830 | |
831 | // Return the length to use for the output buffer given to the base64 escape |
832 | // routines. Make sure to use the same value for do_padding in both. |
833 | // This function may return incorrect results if given input_len values that |
834 | // are extremely high, which should happen rarely. |
835 | PROTOBUF_EXPORT int CalculateBase64EscapedLen(int input_len, bool do_padding); |
836 | // Use this version when calling Base64Escape without a do_padding arg. |
837 | PROTOBUF_EXPORT int CalculateBase64EscapedLen(int input_len); |
838 | |
839 | // ---------------------------------------------------------------------- |
840 | // Base64Escape() |
841 | // WebSafeBase64Escape() |
842 | // Encode "src" to "dest" using base64 encoding. |
843 | // src is not null terminated, instead specify len. |
844 | // 'dest' should have at least CalculateBase64EscapedLen() length. |
845 | // RETURNS the length of dest. |
846 | // The WebSafe variation use '-' instead of '+' and '_' instead of '/' |
847 | // so that we can place the out in the URL or cookies without having |
848 | // to escape them. It also has an extra parameter "do_padding", |
849 | // which when set to false will prevent padding with "=". |
850 | // ---------------------------------------------------------------------- |
851 | PROTOBUF_EXPORT int Base64Escape(const unsigned char* src, int slen, char* dest, |
852 | int szdest); |
853 | PROTOBUF_EXPORT int WebSafeBase64Escape(const unsigned char* src, int slen, |
854 | char* dest, int szdest, |
855 | bool do_padding); |
856 | // Encode src into dest with padding. |
857 | PROTOBUF_EXPORT void Base64Escape(StringPiece src, string* dest); |
858 | // Encode src into dest web-safely without padding. |
859 | PROTOBUF_EXPORT void WebSafeBase64Escape(StringPiece src, string* dest); |
860 | // Encode src into dest web-safely with padding. |
861 | PROTOBUF_EXPORT void WebSafeBase64EscapeWithPadding(StringPiece src, |
862 | string* dest); |
863 | |
864 | PROTOBUF_EXPORT void Base64Escape(const unsigned char* src, int szsrc, |
865 | string* dest, bool do_padding); |
866 | PROTOBUF_EXPORT void WebSafeBase64Escape(const unsigned char* src, int szsrc, |
867 | string* dest, bool do_padding); |
868 | |
869 | inline bool IsValidCodePoint(uint32 code_point) { |
870 | return code_point < 0xD800 || |
871 | (code_point >= 0xE000 && code_point <= 0x10FFFF); |
872 | } |
873 | |
874 | static const int UTFmax = 4; |
875 | // ---------------------------------------------------------------------- |
876 | // EncodeAsUTF8Char() |
877 | // Helper to append a Unicode code point to a string as UTF8, without bringing |
878 | // in any external dependencies. The output buffer must be as least 4 bytes |
879 | // large. |
880 | // ---------------------------------------------------------------------- |
881 | PROTOBUF_EXPORT int EncodeAsUTF8Char(uint32 code_point, char* output); |
882 | |
883 | // ---------------------------------------------------------------------- |
884 | // UTF8FirstLetterNumBytes() |
885 | // Length of the first UTF-8 character. |
886 | // ---------------------------------------------------------------------- |
887 | PROTOBUF_EXPORT int UTF8FirstLetterNumBytes(const char* src, int len); |
888 | |
889 | // From google3/third_party/absl/strings/escaping.h |
890 | |
891 | // ---------------------------------------------------------------------- |
892 | // CleanStringLineEndings() |
893 | // Clean up a multi-line string to conform to Unix line endings. |
894 | // Reads from src and appends to dst, so usually dst should be empty. |
895 | // |
896 | // If there is no line ending at the end of a non-empty string, it can |
897 | // be added automatically. |
898 | // |
899 | // Four different types of input are correctly handled: |
900 | // |
901 | // - Unix/Linux files: line ending is LF: pass through unchanged |
902 | // |
903 | // - DOS/Windows files: line ending is CRLF: convert to LF |
904 | // |
905 | // - Legacy Mac files: line ending is CR: convert to LF |
906 | // |
907 | // - Garbled files: random line endings: convert gracefully |
908 | // lonely CR, lonely LF, CRLF: convert to LF |
909 | // |
910 | // @param src The multi-line string to convert |
911 | // @param dst The converted string is appended to this string |
912 | // @param auto_end_last_line Automatically terminate the last line |
913 | // |
914 | // Limitations: |
915 | // |
916 | // This does not do the right thing for CRCRLF files created by |
917 | // broken programs that do another Unix->DOS conversion on files |
918 | // that are already in CRLF format. For this, a two-pass approach |
919 | // brute-force would be needed that |
920 | // |
921 | // (1) determines the presence of LF (first one is ok) |
922 | // (2) if yes, removes any CR, else convert every CR to LF |
923 | PROTOBUF_EXPORT void CleanStringLineEndings(const string& src, string* dst, |
924 | bool auto_end_last_line); |
925 | |
926 | // Same as above, but transforms the argument in place. |
927 | PROTOBUF_EXPORT void CleanStringLineEndings(string* str, |
928 | bool auto_end_last_line); |
929 | |
930 | namespace strings { |
931 | inline bool EndsWith(StringPiece text, StringPiece suffix) { |
932 | return suffix.empty() || |
933 | (text.size() >= suffix.size() && |
934 | memcmp(s1: text.data() + (text.size() - suffix.size()), s2: suffix.data(), |
935 | n: suffix.size()) == 0); |
936 | } |
937 | } // namespace strings |
938 | |
939 | namespace internal { |
940 | |
941 | // A locale-independent version of the standard strtod(), which always |
942 | // uses a dot as the decimal separator. |
943 | double NoLocaleStrtod(const char* str, char** endptr); |
944 | |
945 | } // namespace internal |
946 | |
947 | } // namespace protobuf |
948 | } // namespace google |
949 | |
950 | #include <google/protobuf/port_undef.inc> |
951 | |
952 | #endif // GOOGLE_PROTOBUF_STUBS_STRUTIL_H__ |
953 | |