| 1 | // © 2016 and later: Unicode, Inc. and others. | 
| 2 | // License & terms of use: http://www.unicode.org/copyright.html | 
| 3 | // Copyright (C) 2009-2013, International Business Machines | 
| 4 | // Corporation and others. All Rights Reserved. | 
| 5 | // | 
| 6 | // Copyright 2001 and onwards Google Inc. | 
| 7 | // Author: Sanjay Ghemawat | 
| 8 |  | 
| 9 | // This code is a contribution of Google code, and the style used here is | 
| 10 | // a compromise between the original Google code and the ICU coding guidelines. | 
| 11 | // For example, data types are ICU-ified (size_t,int->int32_t), | 
| 12 | // and API comments doxygen-ified, but function names and behavior are | 
| 13 | // as in the original, if possible. | 
| 14 | // Assertion-style error handling, not available in ICU, was changed to | 
| 15 | // parameter "pinning" similar to UnicodeString. | 
| 16 | // | 
| 17 | // In addition, this is only a partial port of the original Google code, | 
| 18 | // limited to what was needed so far. The (nearly) complete original code | 
| 19 | // is in the ICU svn repository at icuhtml/trunk/design/strings/contrib | 
| 20 | // (see ICU ticket 6765, r25517). | 
| 21 |  | 
| 22 | #ifndef __STRINGPIECE_H__ | 
| 23 | #define __STRINGPIECE_H__ | 
| 24 |  | 
| 25 | /** | 
| 26 |  * \file  | 
| 27 |  * \brief C++ API: StringPiece: Read-only byte string wrapper class. | 
| 28 |  */ | 
| 29 |  | 
| 30 | #include "unicode/utypes.h" | 
| 31 |  | 
| 32 | #if U_SHOW_CPLUSPLUS_API | 
| 33 |  | 
| 34 | #include <cstddef> | 
| 35 | #include <type_traits> | 
| 36 |  | 
| 37 | #include "unicode/uobject.h" | 
| 38 | #include "unicode/std_string.h" | 
| 39 |  | 
| 40 | // Arghh!  I wish C++ literals were "string". | 
| 41 |  | 
| 42 | U_NAMESPACE_BEGIN | 
| 43 |  | 
| 44 | /** | 
| 45 |  * A string-like object that points to a sized piece of memory. | 
| 46 |  * | 
| 47 |  * We provide non-explicit singleton constructors so users can pass | 
| 48 |  * in a "const char*" or a "string" wherever a "StringPiece" is | 
| 49 |  * expected. | 
| 50 |  * | 
| 51 |  * Functions or methods may use StringPiece parameters to accept either a | 
| 52 |  * "const char*" or a "string" value that will be implicitly converted to a | 
| 53 |  * StringPiece. | 
| 54 |  * | 
| 55 |  * Systematic usage of StringPiece is encouraged as it will reduce unnecessary | 
| 56 |  * conversions from "const char*" to "string" and back again. | 
| 57 |  * | 
| 58 |  * @stable ICU 4.2 | 
| 59 |  */ | 
| 60 | class U_COMMON_API StringPiece : public UMemory { | 
| 61 |  private: | 
| 62 |   const char*   ptr_; | 
| 63 |   int32_t       length_; | 
| 64 |  | 
| 65 |  public: | 
| 66 |   /** | 
| 67 |    * Default constructor, creates an empty StringPiece. | 
| 68 |    * @stable ICU 4.2 | 
| 69 |    */ | 
| 70 |   StringPiece() : ptr_(nullptr), length_(0) { } | 
| 71 |  | 
| 72 |   /** | 
| 73 |    * Constructs from a NUL-terminated const char * pointer. | 
| 74 |    * @param str a NUL-terminated const char * pointer | 
| 75 |    * @stable ICU 4.2 | 
| 76 |    */ | 
| 77 |   StringPiece(const char* str); | 
| 78 | #if defined(__cpp_char8_t) || defined(U_IN_DOXYGEN) | 
| 79 |   /** | 
| 80 |    * Constructs from a NUL-terminated const char8_t * pointer. | 
| 81 |    * @param str a NUL-terminated const char8_t * pointer | 
| 82 |    * @stable ICU 67 | 
| 83 |    */ | 
| 84 |   StringPiece(const char8_t* str) : StringPiece(reinterpret_cast<const char*>(str)) {} | 
| 85 | #endif | 
| 86 |   /** | 
| 87 |    * Constructs an empty StringPiece. | 
| 88 |    * Needed for type disambiguation from multiple other overloads. | 
| 89 |    * @param p nullptr | 
| 90 |    * @stable ICU 67 | 
| 91 |    */ | 
| 92 |   StringPiece(std::nullptr_t p) : ptr_(p), length_(0) {} | 
| 93 |  | 
| 94 |   /** | 
| 95 |    * Constructs from a std::string. | 
| 96 |    * @stable ICU 4.2 | 
| 97 |    */ | 
| 98 |   StringPiece(const std::string& str) | 
| 99 |     : ptr_(str.data()), length_(static_cast<int32_t>(str.size())) { } | 
| 100 | #if defined(__cpp_lib_char8_t) || defined(U_IN_DOXYGEN) | 
| 101 |   /** | 
| 102 |    * Constructs from a std::u8string. | 
| 103 |    * @stable ICU 67 | 
| 104 |    */ | 
| 105 |   StringPiece(const std::u8string& str) | 
| 106 |     : ptr_(reinterpret_cast<const char*>(str.data())), | 
| 107 |       length_(static_cast<int32_t>(str.size())) { } | 
| 108 | #endif | 
| 109 |  | 
| 110 |   /** | 
| 111 |    * Constructs from some other implementation of a string piece class, from any | 
| 112 |    * C++ record type that has these two methods: | 
| 113 |    * | 
| 114 |    * \code{.cpp} | 
| 115 |    * | 
| 116 |    *   struct OtherStringPieceClass { | 
| 117 |    *     const char* data();  // or const char8_t* | 
| 118 |    *     size_t size(); | 
| 119 |    *   }; | 
| 120 |    * | 
| 121 |    * \endcode | 
| 122 |    * | 
| 123 |    * The other string piece class will typically be std::string_view from C++17 | 
| 124 |    * or absl::string_view from Abseil. | 
| 125 |    * | 
| 126 |    * Starting with C++20, data() may also return a const char8_t* pointer, | 
| 127 |    * as from std::u8string_view. | 
| 128 |    * | 
| 129 |    * @param str the other string piece | 
| 130 |    * @stable ICU 65 | 
| 131 |    */ | 
| 132 |   template <typename T, | 
| 133 |             typename = typename std::enable_if< | 
| 134 |                 (std::is_same<decltype(T().data()), const char*>::value | 
| 135 | #if defined(__cpp_char8_t) | 
| 136 |                     || std::is_same<decltype(T().data()), const char8_t*>::value | 
| 137 | #endif | 
| 138 |                 ) && | 
| 139 |                 std::is_same<decltype(T().size()), size_t>::value>::type> | 
| 140 |   StringPiece(T str) | 
| 141 |       : ptr_(reinterpret_cast<const char*>(str.data())), | 
| 142 |         length_(static_cast<int32_t>(str.size())) {} | 
| 143 |  | 
| 144 |   /** | 
| 145 |    * Constructs from a const char * pointer and a specified length. | 
| 146 |    * @param offset a const char * pointer (need not be terminated) | 
| 147 |    * @param len the length of the string; must be non-negative | 
| 148 |    * @stable ICU 4.2 | 
| 149 |    */ | 
| 150 |   StringPiece(const char* offset, int32_t len) : ptr_(offset), length_(len) { } | 
| 151 | #if defined(__cpp_char8_t) || defined(U_IN_DOXYGEN) | 
| 152 |   /** | 
| 153 |    * Constructs from a const char8_t * pointer and a specified length. | 
| 154 |    * @param str a const char8_t * pointer (need not be terminated) | 
| 155 |    * @param len the length of the string; must be non-negative | 
| 156 |    * @stable ICU 67 | 
| 157 |    */ | 
| 158 |   StringPiece(const char8_t* str, int32_t len) : | 
| 159 |       StringPiece(reinterpret_cast<const char*>(str), len) {} | 
| 160 | #endif | 
| 161 |  | 
| 162 |   /** | 
| 163 |    * Substring of another StringPiece. | 
| 164 |    * @param x the other StringPiece | 
| 165 |    * @param pos start position in x; must be non-negative and <= x.length(). | 
| 166 |    * @stable ICU 4.2 | 
| 167 |    */ | 
| 168 |   StringPiece(const StringPiece& x, int32_t pos); | 
| 169 |   /** | 
| 170 |    * Substring of another StringPiece. | 
| 171 |    * @param x the other StringPiece | 
| 172 |    * @param pos start position in x; must be non-negative and <= x.length(). | 
| 173 |    * @param len length of the substring; | 
| 174 |    *            must be non-negative and will be pinned to at most x.length() - pos. | 
| 175 |    * @stable ICU 4.2 | 
| 176 |    */ | 
| 177 |   StringPiece(const StringPiece& x, int32_t pos, int32_t len); | 
| 178 |  | 
| 179 |   /** | 
| 180 |    * Returns the string pointer. May be nullptr if it is empty. | 
| 181 |    * | 
| 182 |    * data() may return a pointer to a buffer with embedded NULs, and the | 
| 183 |    * returned buffer may or may not be null terminated.  Therefore it is | 
| 184 |    * typically a mistake to pass data() to a routine that expects a NUL | 
| 185 |    * terminated string. | 
| 186 |    * @return the string pointer | 
| 187 |    * @stable ICU 4.2 | 
| 188 |    */ | 
| 189 |   const char* data() const { return ptr_; } | 
| 190 |   /** | 
| 191 |    * Returns the string length. Same as length(). | 
| 192 |    * @return the string length | 
| 193 |    * @stable ICU 4.2 | 
| 194 |    */ | 
| 195 |   int32_t size() const { return length_; } | 
| 196 |   /** | 
| 197 |    * Returns the string length. Same as size(). | 
| 198 |    * @return the string length | 
| 199 |    * @stable ICU 4.2 | 
| 200 |    */ | 
| 201 |   int32_t length() const { return length_; } | 
| 202 |   /** | 
| 203 |    * Returns whether the string is empty. | 
| 204 |    * @return true if the string is empty | 
| 205 |    * @stable ICU 4.2 | 
| 206 |    */ | 
| 207 |   UBool empty() const { return length_ == 0; } | 
| 208 |  | 
| 209 |   /** | 
| 210 |    * Sets to an empty string. | 
| 211 |    * @stable ICU 4.2 | 
| 212 |    */ | 
| 213 |   void clear() { ptr_ = nullptr; length_ = 0; } | 
| 214 |  | 
| 215 |   /** | 
| 216 |    * Reset the stringpiece to refer to new data. | 
| 217 |    * @param xdata pointer the new string data.  Need not be nul terminated. | 
| 218 |    * @param len the length of the new data | 
| 219 |    * @stable ICU 4.8 | 
| 220 |    */ | 
| 221 |   void set(const char* xdata, int32_t len) { ptr_ = xdata; length_ = len; } | 
| 222 |  | 
| 223 |   /** | 
| 224 |    * Reset the stringpiece to refer to new data. | 
| 225 |    * @param str a pointer to a NUL-terminated string.  | 
| 226 |    * @stable ICU 4.8 | 
| 227 |    */ | 
| 228 |   void set(const char* str); | 
| 229 |  | 
| 230 | #if defined(__cpp_char8_t) || defined(U_IN_DOXYGEN) | 
| 231 |   /** | 
| 232 |    * Resets the stringpiece to refer to new data. | 
| 233 |    * @param xdata pointer the new string data. Need not be NUL-terminated. | 
| 234 |    * @param len the length of the new data | 
| 235 |    * @stable ICU 67 | 
| 236 |    */ | 
| 237 |   inline void set(const char8_t* xdata, int32_t len) { | 
| 238 |       set(reinterpret_cast<const char*>(xdata), len); | 
| 239 |   } | 
| 240 |  | 
| 241 |   /** | 
| 242 |    * Resets the stringpiece to refer to new data. | 
| 243 |    * @param str a pointer to a NUL-terminated string. | 
| 244 |    * @stable ICU 67 | 
| 245 |    */ | 
| 246 |   inline void set(const char8_t* str) { | 
| 247 |       set(reinterpret_cast<const char*>(str)); | 
| 248 |   } | 
| 249 | #endif | 
| 250 |  | 
| 251 |   /** | 
| 252 |    * Removes the first n string units. | 
| 253 |    * @param n prefix length, must be non-negative and <=length() | 
| 254 |    * @stable ICU 4.2 | 
| 255 |    */ | 
| 256 |   void remove_prefix(int32_t n) { | 
| 257 |     if (n >= 0) { | 
| 258 |       if (n > length_) { | 
| 259 |         n = length_; | 
| 260 |       } | 
| 261 |       ptr_ += n; | 
| 262 |       length_ -= n; | 
| 263 |     } | 
| 264 |   } | 
| 265 |  | 
| 266 |   /** | 
| 267 |    * Removes the last n string units. | 
| 268 |    * @param n suffix length, must be non-negative and <=length() | 
| 269 |    * @stable ICU 4.2 | 
| 270 |    */ | 
| 271 |   void remove_suffix(int32_t n) { | 
| 272 |     if (n >= 0) { | 
| 273 |       if (n <= length_) { | 
| 274 |         length_ -= n; | 
| 275 |       } else { | 
| 276 |         length_ = 0; | 
| 277 |       } | 
| 278 |     } | 
| 279 |   } | 
| 280 |  | 
| 281 |   /** | 
| 282 |    * Searches the StringPiece for the given search string (needle); | 
| 283 |    * @param needle The string for which to search. | 
| 284 |    * @param offset Where to start searching within this string (haystack). | 
| 285 |    * @return The offset of needle in haystack, or -1 if not found. | 
| 286 |    * @stable ICU 67 | 
| 287 |    */ | 
| 288 |   int32_t find(StringPiece needle, int32_t offset); | 
| 289 |  | 
| 290 |   /** | 
| 291 |    * Compares this StringPiece with the other StringPiece, with semantics | 
| 292 |    * similar to std::string::compare(). | 
| 293 |    * @param other The string to compare to. | 
| 294 |    * @return below zero if this < other; above zero if this > other; 0 if this == other. | 
| 295 |    * @stable ICU 67 | 
| 296 |    */ | 
| 297 |   int32_t compare(StringPiece other); | 
| 298 |  | 
| 299 |   /** | 
| 300 |    * Maximum integer, used as a default value for substring methods. | 
| 301 |    * @stable ICU 4.2 | 
| 302 |    */ | 
| 303 |   static const int32_t npos; // = 0x7fffffff; | 
| 304 |  | 
| 305 |   /** | 
| 306 |    * Returns a substring of this StringPiece. | 
| 307 |    * @param pos start position; must be non-negative and <= length(). | 
| 308 |    * @param len length of the substring; | 
| 309 |    *            must be non-negative and will be pinned to at most length() - pos. | 
| 310 |    * @return the substring StringPiece | 
| 311 |    * @stable ICU 4.2 | 
| 312 |    */ | 
| 313 |   StringPiece substr(int32_t pos, int32_t len = npos) const { | 
| 314 |     return StringPiece(*this, pos, len); | 
| 315 |   } | 
| 316 | }; | 
| 317 |  | 
| 318 | /** | 
| 319 |  * Global operator == for StringPiece | 
| 320 |  * @param x The first StringPiece to compare. | 
| 321 |  * @param y The second StringPiece to compare. | 
| 322 |  * @return true if the string data is equal | 
| 323 |  * @stable ICU 4.8 | 
| 324 |  */ | 
| 325 | U_EXPORT UBool U_EXPORT2  | 
| 326 | operator==(const StringPiece& x, const StringPiece& y); | 
| 327 |  | 
| 328 | /** | 
| 329 |  * Global operator != for StringPiece | 
| 330 |  * @param x The first StringPiece to compare. | 
| 331 |  * @param y The second StringPiece to compare. | 
| 332 |  * @return true if the string data is not equal | 
| 333 |  * @stable ICU 4.8 | 
| 334 |  */ | 
| 335 | inline bool operator!=(const StringPiece& x, const StringPiece& y) { | 
| 336 |   return !(x == y); | 
| 337 | } | 
| 338 |  | 
| 339 | U_NAMESPACE_END | 
| 340 |  | 
| 341 | #endif /* U_SHOW_CPLUSPLUS_API */ | 
| 342 |  | 
| 343 | #endif  // __STRINGPIECE_H__ | 
| 344 |  |