1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html |
3 | // Copyright (C) 2009-2013, International Business Machines |
4 | // Corporation and others. All Rights Reserved. |
5 | // |
6 | // Copyright 2001 and onwards Google Inc. |
7 | // Author: Sanjay Ghemawat |
8 | |
9 | // This code is a contribution of Google code, and the style used here is |
10 | // a compromise between the original Google code and the ICU coding guidelines. |
11 | // For example, data types are ICU-ified (size_t,int->int32_t), |
12 | // and API comments doxygen-ified, but function names and behavior are |
13 | // as in the original, if possible. |
14 | // Assertion-style error handling, not available in ICU, was changed to |
15 | // parameter "pinning" similar to UnicodeString. |
16 | // |
17 | // In addition, this is only a partial port of the original Google code, |
18 | // limited to what was needed so far. The (nearly) complete original code |
19 | // is in the ICU svn repository at icuhtml/trunk/design/strings/contrib |
20 | // (see ICU ticket 6765, r25517). |
21 | |
22 | #ifndef __STRINGPIECE_H__ |
23 | #define __STRINGPIECE_H__ |
24 | |
25 | /** |
26 | * \file |
27 | * \brief C++ API: StringPiece: Read-only byte string wrapper class. |
28 | */ |
29 | |
30 | #include "unicode/utypes.h" |
31 | |
32 | #if U_SHOW_CPLUSPLUS_API |
33 | |
34 | #include <cstddef> |
35 | #include <type_traits> |
36 | |
37 | #include "unicode/uobject.h" |
38 | #include "unicode/std_string.h" |
39 | |
40 | // Arghh! I wish C++ literals were "string". |
41 | |
42 | U_NAMESPACE_BEGIN |
43 | |
44 | /** |
45 | * A string-like object that points to a sized piece of memory. |
46 | * |
47 | * We provide non-explicit singleton constructors so users can pass |
48 | * in a "const char*" or a "string" wherever a "StringPiece" is |
49 | * expected. |
50 | * |
51 | * Functions or methods may use StringPiece parameters to accept either a |
52 | * "const char*" or a "string" value that will be implicitly converted to a |
53 | * StringPiece. |
54 | * |
55 | * Systematic usage of StringPiece is encouraged as it will reduce unnecessary |
56 | * conversions from "const char*" to "string" and back again. |
57 | * |
58 | * @stable ICU 4.2 |
59 | */ |
60 | class U_COMMON_API StringPiece : public UMemory { |
61 | private: |
62 | const char* ptr_; |
63 | int32_t length_; |
64 | |
65 | public: |
66 | /** |
67 | * Default constructor, creates an empty StringPiece. |
68 | * @stable ICU 4.2 |
69 | */ |
70 | StringPiece() : ptr_(nullptr), length_(0) { } |
71 | |
72 | /** |
73 | * Constructs from a NUL-terminated const char * pointer. |
74 | * @param str a NUL-terminated const char * pointer |
75 | * @stable ICU 4.2 |
76 | */ |
77 | StringPiece(const char* str); |
78 | #if defined(__cpp_char8_t) || defined(U_IN_DOXYGEN) |
79 | /** |
80 | * Constructs from a NUL-terminated const char8_t * pointer. |
81 | * @param str a NUL-terminated const char8_t * pointer |
82 | * @stable ICU 67 |
83 | */ |
84 | StringPiece(const char8_t* str) : StringPiece(reinterpret_cast<const char*>(str)) {} |
85 | #endif |
86 | /** |
87 | * Constructs an empty StringPiece. |
88 | * Needed for type disambiguation from multiple other overloads. |
89 | * @param p nullptr |
90 | * @stable ICU 67 |
91 | */ |
92 | StringPiece(std::nullptr_t p) : ptr_(p), length_(0) {} |
93 | |
94 | /** |
95 | * Constructs from a std::string. |
96 | * @stable ICU 4.2 |
97 | */ |
98 | StringPiece(const std::string& str) |
99 | : ptr_(str.data()), length_(static_cast<int32_t>(str.size())) { } |
100 | #if defined(__cpp_lib_char8_t) || defined(U_IN_DOXYGEN) |
101 | /** |
102 | * Constructs from a std::u8string. |
103 | * @stable ICU 67 |
104 | */ |
105 | StringPiece(const std::u8string& str) |
106 | : ptr_(reinterpret_cast<const char*>(str.data())), |
107 | length_(static_cast<int32_t>(str.size())) { } |
108 | #endif |
109 | |
110 | /** |
111 | * Constructs from some other implementation of a string piece class, from any |
112 | * C++ record type that has these two methods: |
113 | * |
114 | * \code{.cpp} |
115 | * |
116 | * struct OtherStringPieceClass { |
117 | * const char* data(); // or const char8_t* |
118 | * size_t size(); |
119 | * }; |
120 | * |
121 | * \endcode |
122 | * |
123 | * The other string piece class will typically be std::string_view from C++17 |
124 | * or absl::string_view from Abseil. |
125 | * |
126 | * Starting with C++20, data() may also return a const char8_t* pointer, |
127 | * as from std::u8string_view. |
128 | * |
129 | * @param str the other string piece |
130 | * @stable ICU 65 |
131 | */ |
132 | template <typename T, |
133 | typename = typename std::enable_if< |
134 | (std::is_same<decltype(T().data()), const char*>::value |
135 | #if defined(__cpp_char8_t) |
136 | || std::is_same<decltype(T().data()), const char8_t*>::value |
137 | #endif |
138 | ) && |
139 | std::is_same<decltype(T().size()), size_t>::value>::type> |
140 | StringPiece(T str) |
141 | : ptr_(reinterpret_cast<const char*>(str.data())), |
142 | length_(static_cast<int32_t>(str.size())) {} |
143 | |
144 | /** |
145 | * Constructs from a const char * pointer and a specified length. |
146 | * @param offset a const char * pointer (need not be terminated) |
147 | * @param len the length of the string; must be non-negative |
148 | * @stable ICU 4.2 |
149 | */ |
150 | StringPiece(const char* offset, int32_t len) : ptr_(offset), length_(len) { } |
151 | #if defined(__cpp_char8_t) || defined(U_IN_DOXYGEN) |
152 | /** |
153 | * Constructs from a const char8_t * pointer and a specified length. |
154 | * @param str a const char8_t * pointer (need not be terminated) |
155 | * @param len the length of the string; must be non-negative |
156 | * @stable ICU 67 |
157 | */ |
158 | StringPiece(const char8_t* str, int32_t len) : |
159 | StringPiece(reinterpret_cast<const char*>(str), len) {} |
160 | #endif |
161 | |
162 | /** |
163 | * Substring of another StringPiece. |
164 | * @param x the other StringPiece |
165 | * @param pos start position in x; must be non-negative and <= x.length(). |
166 | * @stable ICU 4.2 |
167 | */ |
168 | StringPiece(const StringPiece& x, int32_t pos); |
169 | /** |
170 | * Substring of another StringPiece. |
171 | * @param x the other StringPiece |
172 | * @param pos start position in x; must be non-negative and <= x.length(). |
173 | * @param len length of the substring; |
174 | * must be non-negative and will be pinned to at most x.length() - pos. |
175 | * @stable ICU 4.2 |
176 | */ |
177 | StringPiece(const StringPiece& x, int32_t pos, int32_t len); |
178 | |
179 | /** |
180 | * Returns the string pointer. May be nullptr if it is empty. |
181 | * |
182 | * data() may return a pointer to a buffer with embedded NULs, and the |
183 | * returned buffer may or may not be null terminated. Therefore it is |
184 | * typically a mistake to pass data() to a routine that expects a NUL |
185 | * terminated string. |
186 | * @return the string pointer |
187 | * @stable ICU 4.2 |
188 | */ |
189 | const char* data() const { return ptr_; } |
190 | /** |
191 | * Returns the string length. Same as length(). |
192 | * @return the string length |
193 | * @stable ICU 4.2 |
194 | */ |
195 | int32_t size() const { return length_; } |
196 | /** |
197 | * Returns the string length. Same as size(). |
198 | * @return the string length |
199 | * @stable ICU 4.2 |
200 | */ |
201 | int32_t length() const { return length_; } |
202 | /** |
203 | * Returns whether the string is empty. |
204 | * @return true if the string is empty |
205 | * @stable ICU 4.2 |
206 | */ |
207 | UBool empty() const { return length_ == 0; } |
208 | |
209 | /** |
210 | * Sets to an empty string. |
211 | * @stable ICU 4.2 |
212 | */ |
213 | void clear() { ptr_ = nullptr; length_ = 0; } |
214 | |
215 | /** |
216 | * Reset the stringpiece to refer to new data. |
217 | * @param xdata pointer the new string data. Need not be nul terminated. |
218 | * @param len the length of the new data |
219 | * @stable ICU 4.8 |
220 | */ |
221 | void set(const char* xdata, int32_t len) { ptr_ = xdata; length_ = len; } |
222 | |
223 | /** |
224 | * Reset the stringpiece to refer to new data. |
225 | * @param str a pointer to a NUL-terminated string. |
226 | * @stable ICU 4.8 |
227 | */ |
228 | void set(const char* str); |
229 | |
230 | #if defined(__cpp_char8_t) || defined(U_IN_DOXYGEN) |
231 | /** |
232 | * Resets the stringpiece to refer to new data. |
233 | * @param xdata pointer the new string data. Need not be NUL-terminated. |
234 | * @param len the length of the new data |
235 | * @stable ICU 67 |
236 | */ |
237 | inline void set(const char8_t* xdata, int32_t len) { |
238 | set(reinterpret_cast<const char*>(xdata), len); |
239 | } |
240 | |
241 | /** |
242 | * Resets the stringpiece to refer to new data. |
243 | * @param str a pointer to a NUL-terminated string. |
244 | * @stable ICU 67 |
245 | */ |
246 | inline void set(const char8_t* str) { |
247 | set(reinterpret_cast<const char*>(str)); |
248 | } |
249 | #endif |
250 | |
251 | /** |
252 | * Removes the first n string units. |
253 | * @param n prefix length, must be non-negative and <=length() |
254 | * @stable ICU 4.2 |
255 | */ |
256 | void remove_prefix(int32_t n) { |
257 | if (n >= 0) { |
258 | if (n > length_) { |
259 | n = length_; |
260 | } |
261 | ptr_ += n; |
262 | length_ -= n; |
263 | } |
264 | } |
265 | |
266 | /** |
267 | * Removes the last n string units. |
268 | * @param n suffix length, must be non-negative and <=length() |
269 | * @stable ICU 4.2 |
270 | */ |
271 | void remove_suffix(int32_t n) { |
272 | if (n >= 0) { |
273 | if (n <= length_) { |
274 | length_ -= n; |
275 | } else { |
276 | length_ = 0; |
277 | } |
278 | } |
279 | } |
280 | |
281 | /** |
282 | * Searches the StringPiece for the given search string (needle); |
283 | * @param needle The string for which to search. |
284 | * @param offset Where to start searching within this string (haystack). |
285 | * @return The offset of needle in haystack, or -1 if not found. |
286 | * @stable ICU 67 |
287 | */ |
288 | int32_t find(StringPiece needle, int32_t offset); |
289 | |
290 | /** |
291 | * Compares this StringPiece with the other StringPiece, with semantics |
292 | * similar to std::string::compare(). |
293 | * @param other The string to compare to. |
294 | * @return below zero if this < other; above zero if this > other; 0 if this == other. |
295 | * @stable ICU 67 |
296 | */ |
297 | int32_t compare(StringPiece other); |
298 | |
299 | /** |
300 | * Maximum integer, used as a default value for substring methods. |
301 | * @stable ICU 4.2 |
302 | */ |
303 | static const int32_t npos; // = 0x7fffffff; |
304 | |
305 | /** |
306 | * Returns a substring of this StringPiece. |
307 | * @param pos start position; must be non-negative and <= length(). |
308 | * @param len length of the substring; |
309 | * must be non-negative and will be pinned to at most length() - pos. |
310 | * @return the substring StringPiece |
311 | * @stable ICU 4.2 |
312 | */ |
313 | StringPiece substr(int32_t pos, int32_t len = npos) const { |
314 | return StringPiece(*this, pos, len); |
315 | } |
316 | }; |
317 | |
318 | /** |
319 | * Global operator == for StringPiece |
320 | * @param x The first StringPiece to compare. |
321 | * @param y The second StringPiece to compare. |
322 | * @return true if the string data is equal |
323 | * @stable ICU 4.8 |
324 | */ |
325 | U_EXPORT UBool U_EXPORT2 |
326 | operator==(const StringPiece& x, const StringPiece& y); |
327 | |
328 | /** |
329 | * Global operator != for StringPiece |
330 | * @param x The first StringPiece to compare. |
331 | * @param y The second StringPiece to compare. |
332 | * @return true if the string data is not equal |
333 | * @stable ICU 4.8 |
334 | */ |
335 | inline bool operator!=(const StringPiece& x, const StringPiece& y) { |
336 | return !(x == y); |
337 | } |
338 | |
339 | U_NAMESPACE_END |
340 | |
341 | #endif /* U_SHOW_CPLUSPLUS_API */ |
342 | |
343 | #endif // __STRINGPIECE_H__ |
344 | |