1 | //===- StringRef.h - Constant String Reference Wrapper ----------*- C++ -*-===// |
---|---|

2 | // |

3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |

4 | // See https://llvm.org/LICENSE.txt for license information. |

5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |

6 | // |

7 | //===----------------------------------------------------------------------===// |

8 | |

9 | #ifndef LLVM_ADT_STRINGREF_H |

10 | #define LLVM_ADT_STRINGREF_H |

11 | |

12 | #include "llvm/ADT/DenseMapInfo.h" |

13 | #include "llvm/ADT/STLFunctionalExtras.h" |

14 | #include "llvm/ADT/iterator_range.h" |

15 | #include "llvm/Support/Compiler.h" |

16 | #include <algorithm> |

17 | #include <cassert> |

18 | #include <cstddef> |

19 | #include <cstring> |

20 | #include <limits> |

21 | #include <string> |

22 | #include <string_view> |

23 | #include <type_traits> |

24 | #include <utility> |

25 | |

26 | namespace llvm { |

27 | |

28 | class APInt; |

29 | class hash_code; |

30 | template <typename T> class SmallVectorImpl; |

31 | class StringRef; |

32 | |

33 | /// Helper functions for StringRef::getAsInteger. |

34 | bool getAsUnsignedInteger(StringRef Str, unsigned Radix, |

35 | unsigned long long &Result); |

36 | |

37 | bool getAsSignedInteger(StringRef Str, unsigned Radix, long long &Result); |

38 | |

39 | bool consumeUnsignedInteger(StringRef &Str, unsigned Radix, |

40 | unsigned long long &Result); |

41 | bool consumeSignedInteger(StringRef &Str, unsigned Radix, long long &Result); |

42 | |

43 | /// StringRef - Represent a constant reference to a string, i.e. a character |

44 | /// array and a length, which need not be null terminated. |

45 | /// |

46 | /// This class does not own the string data, it is expected to be used in |

47 | /// situations where the character data resides in some other buffer, whose |

48 | /// lifetime extends past that of the StringRef. For this reason, it is not in |

49 | /// general safe to store a StringRef. |

50 | class LLVM_GSL_POINTER StringRef { |

51 | public: |

52 | static constexpr size_t npos = ~size_t(0); |

53 | |

54 | using iterator = const char *; |

55 | using const_iterator = const char *; |

56 | using size_type = size_t; |

57 | |

58 | private: |

59 | /// The start of the string, in an external buffer. |

60 | const char *Data = nullptr; |

61 | |

62 | /// The length of the string. |

63 | size_t Length = 0; |

64 | |

65 | // Workaround memcmp issue with null pointers (undefined behavior) |

66 | // by providing a specialized version |

67 | static int compareMemory(const char *Lhs, const char *Rhs, size_t Length) { |

68 | if (Length == 0) { return 0; } |

69 | return ::memcmp(s1: Lhs,s2: Rhs,n: Length); |

70 | } |

71 | |

72 | public: |

73 | /// @name Constructors |

74 | /// @{ |

75 | |

76 | /// Construct an empty string ref. |

77 | /*implicit*/ StringRef() = default; |

78 | |

79 | /// Disable conversion from nullptr. This prevents things like |

80 | /// if (S == nullptr) |

81 | StringRef(std::nullptr_t) = delete; |

82 | |

83 | /// Construct a string ref from a cstring. |

84 | /*implicit*/ constexpr StringRef(const char *Str) |

85 | : Data(Str), Length(Str ? |

86 | // GCC 7 doesn't have constexpr char_traits. Fall back to __builtin_strlen. |

87 | #if defined(_GLIBCXX_RELEASE) && _GLIBCXX_RELEASE < 8 |

88 | __builtin_strlen(Str) |

89 | #else |

90 | std::char_traits<char>::length(s: Str) |

91 | #endif |

92 | : 0) { |

93 | } |

94 | |

95 | /// Construct a string ref from a pointer and length. |

96 | /*implicit*/ constexpr StringRef(const char *data, size_t length) |

97 | : Data(data), Length(length) {} |

98 | |

99 | /// Construct a string ref from an std::string. |

100 | /*implicit*/ StringRef(const std::string &Str) |

101 | : Data(Str.data()), Length(Str.length()) {} |

102 | |

103 | /// Construct a string ref from an std::string_view. |

104 | /*implicit*/ constexpr StringRef(std::string_view Str) |

105 | : Data(Str.data()), Length(Str.size()) {} |

106 | |

107 | /// @} |

108 | /// @name Iterators |

109 | /// @{ |

110 | |

111 | iterator begin() const { return Data; } |

112 | |

113 | iterator end() const { return Data + Length; } |

114 | |

115 | const unsigned char *bytes_begin() const { |

116 | return reinterpret_cast<const unsigned char *>(begin()); |

117 | } |

118 | const unsigned char *bytes_end() const { |

119 | return reinterpret_cast<const unsigned char *>(end()); |

120 | } |

121 | iterator_range<const unsigned char *> bytes() const { |

122 | return make_range(x: bytes_begin(), y: bytes_end()); |

123 | } |

124 | |

125 | /// @} |

126 | /// @name String Operations |

127 | /// @{ |

128 | |

129 | /// data - Get a pointer to the start of the string (which may not be null |

130 | /// terminated). |

131 | [[nodiscard]] constexpr const char *data() const { return Data; } |

132 | |

133 | /// empty - Check if the string is empty. |

134 | [[nodiscard]] constexpr bool empty() const { return Length == 0; } |

135 | |

136 | /// size - Get the string size. |

137 | [[nodiscard]] constexpr size_t size() const { return Length; } |

138 | |

139 | /// front - Get the first character in the string. |

140 | [[nodiscard]] char front() const { |

141 | assert(!empty()); |

142 | return Data[0]; |

143 | } |

144 | |

145 | /// back - Get the last character in the string. |

146 | [[nodiscard]] char back() const { |

147 | assert(!empty()); |

148 | return Data[Length-1]; |

149 | } |

150 | |

151 | // copy - Allocate copy in Allocator and return StringRef to it. |

152 | template <typename Allocator> |

153 | [[nodiscard]] StringRef copy(Allocator &A) const { |

154 | // Don't request a length 0 copy from the allocator. |

155 | if (empty()) |

156 | return StringRef(); |

157 | char *S = A.template Allocate<char>(Length); |

158 | std::copy(begin(), end(), S); |

159 | return StringRef(S, Length); |

160 | } |

161 | |

162 | /// equals - Check for string equality, this is more efficient than |

163 | /// compare() when the relative ordering of inequal strings isn't needed. |

164 | [[nodiscard]] bool equals(StringRef RHS) const { |

165 | return (Length == RHS.Length && |

166 | compareMemory(Lhs: Data, Rhs: RHS.Data, Length: RHS.Length) == 0); |

167 | } |

168 | |

169 | /// Check for string equality, ignoring case. |

170 | [[nodiscard]] bool equals_insensitive(StringRef RHS) const { |

171 | return Length == RHS.Length && compare_insensitive(RHS) == 0; |

172 | } |

173 | |

174 | /// compare - Compare two strings; the result is negative, zero, or positive |

175 | /// if this string is lexicographically less than, equal to, or greater than |

176 | /// the \p RHS. |

177 | [[nodiscard]] int compare(StringRef RHS) const { |

178 | // Check the prefix for a mismatch. |

179 | if (int Res = compareMemory(Lhs: Data, Rhs: RHS.Data, Length: std::min(a: Length, b: RHS.Length))) |

180 | return Res < 0 ? -1 : 1; |

181 | |

182 | // Otherwise the prefixes match, so we only need to check the lengths. |

183 | if (Length == RHS.Length) |

184 | return 0; |

185 | return Length < RHS.Length ? -1 : 1; |

186 | } |

187 | |

188 | /// Compare two strings, ignoring case. |

189 | [[nodiscard]] int compare_insensitive(StringRef RHS) const; |

190 | |

191 | /// compare_numeric - Compare two strings, treating sequences of digits as |

192 | /// numbers. |

193 | [[nodiscard]] int compare_numeric(StringRef RHS) const; |

194 | |

195 | /// Determine the edit distance between this string and another |

196 | /// string. |

197 | /// |

198 | /// \param Other the string to compare this string against. |

199 | /// |

200 | /// \param AllowReplacements whether to allow character |

201 | /// replacements (change one character into another) as a single |

202 | /// operation, rather than as two operations (an insertion and a |

203 | /// removal). |

204 | /// |

205 | /// \param MaxEditDistance If non-zero, the maximum edit distance that |

206 | /// this routine is allowed to compute. If the edit distance will exceed |

207 | /// that maximum, returns \c MaxEditDistance+1. |

208 | /// |

209 | /// \returns the minimum number of character insertions, removals, |

210 | /// or (if \p AllowReplacements is \c true) replacements needed to |

211 | /// transform one of the given strings into the other. If zero, |

212 | /// the strings are identical. |

213 | [[nodiscard]] unsigned edit_distance(StringRef Other, |

214 | bool AllowReplacements = true, |

215 | unsigned MaxEditDistance = 0) const; |

216 | |

217 | [[nodiscard]] unsigned |

218 | edit_distance_insensitive(StringRef Other, bool AllowReplacements = true, |

219 | unsigned MaxEditDistance = 0) const; |

220 | |

221 | /// str - Get the contents as an std::string. |

222 | [[nodiscard]] std::string str() const { |

223 | if (!Data) return std::string(); |

224 | return std::string(Data, Length); |

225 | } |

226 | |

227 | /// @} |

228 | /// @name Operator Overloads |

229 | /// @{ |

230 | |

231 | [[nodiscard]] char operator[](size_t Index) const { |

232 | assert(Index < Length && "Invalid index!"); |

233 | return Data[Index]; |

234 | } |

235 | |

236 | /// Disallow accidental assignment from a temporary std::string. |

237 | /// |

238 | /// The declaration here is extra complicated so that `stringRef = {}` |

239 | /// and `stringRef = "abc"` continue to select the move assignment operator. |

240 | template <typename T> |

241 | std::enable_if_t<std::is_same<T, std::string>::value, StringRef> & |

242 | operator=(T &&Str) = delete; |

243 | |

244 | /// @} |

245 | /// @name Type Conversions |

246 | /// @{ |

247 | |

248 | constexpr operator std::string_view() const { |

249 | return std::string_view(data(), size()); |

250 | } |

251 | |

252 | /// @} |

253 | /// @name String Predicates |

254 | /// @{ |

255 | |

256 | /// Check if this string starts with the given \p Prefix. |

257 | [[nodiscard]] bool starts_with(StringRef Prefix) const { |

258 | return Length >= Prefix.Length && |

259 | compareMemory(Lhs: Data, Rhs: Prefix.Data, Length: Prefix.Length) == 0; |

260 | } |

261 | |

262 | /// Check if this string starts with the given \p Prefix, ignoring case. |

263 | [[nodiscard]] bool starts_with_insensitive(StringRef Prefix) const; |

264 | |

265 | /// Check if this string ends with the given \p Suffix. |

266 | [[nodiscard]] bool ends_with(StringRef Suffix) const { |

267 | return Length >= Suffix.Length && |

268 | compareMemory(Lhs: end() - Suffix.Length, Rhs: Suffix.Data, Length: Suffix.Length) == |

269 | 0; |

270 | } |

271 | |

272 | /// Check if this string ends with the given \p Suffix, ignoring case. |

273 | [[nodiscard]] bool ends_with_insensitive(StringRef Suffix) const; |

274 | |

275 | /// @} |

276 | /// @name String Searching |

277 | /// @{ |

278 | |

279 | /// Search for the first character \p C in the string. |

280 | /// |

281 | /// \returns The index of the first occurrence of \p C, or npos if not |

282 | /// found. |

283 | [[nodiscard]] size_t find(char C, size_t From = 0) const { |

284 | return std::string_view(*this).find(c: C, pos: From); |

285 | } |

286 | |

287 | /// Search for the first character \p C in the string, ignoring case. |

288 | /// |

289 | /// \returns The index of the first occurrence of \p C, or npos if not |

290 | /// found. |

291 | [[nodiscard]] size_t find_insensitive(char C, size_t From = 0) const; |

292 | |

293 | /// Search for the first character satisfying the predicate \p F |

294 | /// |

295 | /// \returns The index of the first character satisfying \p F starting from |

296 | /// \p From, or npos if not found. |

297 | [[nodiscard]] size_t find_if(function_ref<bool(char)> F, |

298 | size_t From = 0) const { |

299 | StringRef S = drop_front(N: From); |

300 | while (!S.empty()) { |

301 | if (F(S.front())) |

302 | return size() - S.size(); |

303 | S = S.drop_front(); |

304 | } |

305 | return npos; |

306 | } |

307 | |

308 | /// Search for the first character not satisfying the predicate \p F |

309 | /// |

310 | /// \returns The index of the first character not satisfying \p F starting |

311 | /// from \p From, or npos if not found. |

312 | [[nodiscard]] size_t find_if_not(function_ref<bool(char)> F, |

313 | size_t From = 0) const { |

314 | return find_if(F: [F](char c) { return !F(c); }, From); |

315 | } |

316 | |

317 | /// Search for the first string \p Str in the string. |

318 | /// |

319 | /// \returns The index of the first occurrence of \p Str, or npos if not |

320 | /// found. |

321 | [[nodiscard]] size_t find(StringRef Str, size_t From = 0) const; |

322 | |

323 | /// Search for the first string \p Str in the string, ignoring case. |

324 | /// |

325 | /// \returns The index of the first occurrence of \p Str, or npos if not |

326 | /// found. |

327 | [[nodiscard]] size_t find_insensitive(StringRef Str, size_t From = 0) const; |

328 | |

329 | /// Search for the last character \p C in the string. |

330 | /// |

331 | /// \returns The index of the last occurrence of \p C, or npos if not |

332 | /// found. |

333 | [[nodiscard]] size_t rfind(char C, size_t From = npos) const { |

334 | size_t I = std::min(a: From, b: Length); |

335 | while (I) { |

336 | --I; |

337 | if (Data[I] == C) |

338 | return I; |

339 | } |

340 | return npos; |

341 | } |

342 | |

343 | /// Search for the last character \p C in the string, ignoring case. |

344 | /// |

345 | /// \returns The index of the last occurrence of \p C, or npos if not |

346 | /// found. |

347 | [[nodiscard]] size_t rfind_insensitive(char C, size_t From = npos) const; |

348 | |

349 | /// Search for the last string \p Str in the string. |

350 | /// |

351 | /// \returns The index of the last occurrence of \p Str, or npos if not |

352 | /// found. |

353 | [[nodiscard]] size_t rfind(StringRef Str) const; |

354 | |

355 | /// Search for the last string \p Str in the string, ignoring case. |

356 | /// |

357 | /// \returns The index of the last occurrence of \p Str, or npos if not |

358 | /// found. |

359 | [[nodiscard]] size_t rfind_insensitive(StringRef Str) const; |

360 | |

361 | /// Find the first character in the string that is \p C, or npos if not |

362 | /// found. Same as find. |

363 | [[nodiscard]] size_t find_first_of(char C, size_t From = 0) const { |

364 | return find(C, From); |

365 | } |

366 | |

367 | /// Find the first character in the string that is in \p Chars, or npos if |

368 | /// not found. |

369 | /// |

370 | /// Complexity: O(size() + Chars.size()) |

371 | [[nodiscard]] size_t find_first_of(StringRef Chars, size_t From = 0) const; |

372 | |

373 | /// Find the first character in the string that is not \p C or npos if not |

374 | /// found. |

375 | [[nodiscard]] size_t find_first_not_of(char C, size_t From = 0) const; |

376 | |

377 | /// Find the first character in the string that is not in the string |

378 | /// \p Chars, or npos if not found. |

379 | /// |

380 | /// Complexity: O(size() + Chars.size()) |

381 | [[nodiscard]] size_t find_first_not_of(StringRef Chars, |

382 | size_t From = 0) const; |

383 | |

384 | /// Find the last character in the string that is \p C, or npos if not |

385 | /// found. |

386 | [[nodiscard]] size_t find_last_of(char C, size_t From = npos) const { |

387 | return rfind(C, From); |

388 | } |

389 | |

390 | /// Find the last character in the string that is in \p C, or npos if not |

391 | /// found. |

392 | /// |

393 | /// Complexity: O(size() + Chars.size()) |

394 | [[nodiscard]] size_t find_last_of(StringRef Chars, |

395 | size_t From = npos) const; |

396 | |

397 | /// Find the last character in the string that is not \p C, or npos if not |

398 | /// found. |

399 | [[nodiscard]] size_t find_last_not_of(char C, size_t From = npos) const; |

400 | |

401 | /// Find the last character in the string that is not in \p Chars, or |

402 | /// npos if not found. |

403 | /// |

404 | /// Complexity: O(size() + Chars.size()) |

405 | [[nodiscard]] size_t find_last_not_of(StringRef Chars, |

406 | size_t From = npos) const; |

407 | |

408 | /// Return true if the given string is a substring of *this, and false |

409 | /// otherwise. |

410 | [[nodiscard]] bool contains(StringRef Other) const { |

411 | return find(Str: Other) != npos; |

412 | } |

413 | |

414 | /// Return true if the given character is contained in *this, and false |

415 | /// otherwise. |

416 | [[nodiscard]] bool contains(char C) const { |

417 | return find_first_of(C) != npos; |

418 | } |

419 | |

420 | /// Return true if the given string is a substring of *this, and false |

421 | /// otherwise. |

422 | [[nodiscard]] bool contains_insensitive(StringRef Other) const { |

423 | return find_insensitive(Str: Other) != npos; |

424 | } |

425 | |

426 | /// Return true if the given character is contained in *this, and false |

427 | /// otherwise. |

428 | [[nodiscard]] bool contains_insensitive(char C) const { |

429 | return find_insensitive(C) != npos; |

430 | } |

431 | |

432 | /// @} |

433 | /// @name Helpful Algorithms |

434 | /// @{ |

435 | |

436 | /// Return the number of occurrences of \p C in the string. |

437 | [[nodiscard]] size_t count(char C) const { |

438 | size_t Count = 0; |

439 | for (size_t I = 0; I != Length; ++I) |

440 | if (Data[I] == C) |

441 | ++Count; |

442 | return Count; |

443 | } |

444 | |

445 | /// Return the number of non-overlapped occurrences of \p Str in |

446 | /// the string. |

447 | size_t count(StringRef Str) const; |

448 | |

449 | /// Parse the current string as an integer of the specified radix. If |

450 | /// \p Radix is specified as zero, this does radix autosensing using |

451 | /// extended C rules: 0 is octal, 0x is hex, 0b is binary. |

452 | /// |

453 | /// If the string is invalid or if only a subset of the string is valid, |

454 | /// this returns true to signify the error. The string is considered |

455 | /// erroneous if empty or if it overflows T. |

456 | template <typename T> bool getAsInteger(unsigned Radix, T &Result) const { |

457 | if constexpr (std::numeric_limits<T>::is_signed) { |

458 | long long LLVal; |

459 | if (getAsSignedInteger(Str: *this, Radix, Result&: LLVal) || |

460 | static_cast<T>(LLVal) != LLVal) |

461 | return true; |

462 | Result = LLVal; |

463 | } else { |

464 | unsigned long long ULLVal; |

465 | // The additional cast to unsigned long long is required to avoid the |

466 | // Visual C++ warning C4805: '!=' : unsafe mix of type 'bool' and type |

467 | // 'unsigned __int64' when instantiating getAsInteger with T = bool. |

468 | if (getAsUnsignedInteger(Str: *this, Radix, Result&: ULLVal) || |

469 | static_cast<unsigned long long>(static_cast<T>(ULLVal)) != ULLVal) |

470 | return true; |

471 | Result = ULLVal; |

472 | } |

473 | return false; |

474 | } |

475 | |

476 | /// Parse the current string as an integer of the specified radix. If |

477 | /// \p Radix is specified as zero, this does radix autosensing using |

478 | /// extended C rules: 0 is octal, 0x is hex, 0b is binary. |

479 | /// |

480 | /// If the string does not begin with a number of the specified radix, |

481 | /// this returns true to signify the error. The string is considered |

482 | /// erroneous if empty or if it overflows T. |

483 | /// The portion of the string representing the discovered numeric value |

484 | /// is removed from the beginning of the string. |

485 | template <typename T> bool consumeInteger(unsigned Radix, T &Result) { |

486 | if constexpr (std::numeric_limits<T>::is_signed) { |

487 | long long LLVal; |

488 | if (consumeSignedInteger(Str&: *this, Radix, Result&: LLVal) || |

489 | static_cast<long long>(static_cast<T>(LLVal)) != LLVal) |

490 | return true; |

491 | Result = LLVal; |

492 | } else { |

493 | unsigned long long ULLVal; |

494 | if (consumeUnsignedInteger(Str&: *this, Radix, Result&: ULLVal) || |

495 | static_cast<unsigned long long>(static_cast<T>(ULLVal)) != ULLVal) |

496 | return true; |

497 | Result = ULLVal; |

498 | } |

499 | return false; |

500 | } |

501 | |

502 | /// Parse the current string as an integer of the specified \p Radix, or of |

503 | /// an autosensed radix if the \p Radix given is 0. The current value in |

504 | /// \p Result is discarded, and the storage is changed to be wide enough to |

505 | /// store the parsed integer. |

506 | /// |

507 | /// \returns true if the string does not solely consist of a valid |

508 | /// non-empty number in the appropriate base. |

509 | /// |

510 | /// APInt::fromString is superficially similar but assumes the |

511 | /// string is well-formed in the given radix. |

512 | bool getAsInteger(unsigned Radix, APInt &Result) const; |

513 | |

514 | /// Parse the current string as an integer of the specified \p Radix. If |

515 | /// \p Radix is specified as zero, this does radix autosensing using |

516 | /// extended C rules: 0 is octal, 0x is hex, 0b is binary. |

517 | /// |

518 | /// If the string does not begin with a number of the specified radix, |

519 | /// this returns true to signify the error. The string is considered |

520 | /// erroneous if empty. |

521 | /// The portion of the string representing the discovered numeric value |

522 | /// is removed from the beginning of the string. |

523 | bool consumeInteger(unsigned Radix, APInt &Result); |

524 | |

525 | /// Parse the current string as an IEEE double-precision floating |

526 | /// point value. The string must be a well-formed double. |

527 | /// |

528 | /// If \p AllowInexact is false, the function will fail if the string |

529 | /// cannot be represented exactly. Otherwise, the function only fails |

530 | /// in case of an overflow or underflow, or an invalid floating point |

531 | /// representation. |

532 | bool getAsDouble(double &Result, bool AllowInexact = true) const; |

533 | |

534 | /// @} |

535 | /// @name String Operations |

536 | /// @{ |

537 | |

538 | // Convert the given ASCII string to lowercase. |

539 | [[nodiscard]] std::string lower() const; |

540 | |

541 | /// Convert the given ASCII string to uppercase. |

542 | [[nodiscard]] std::string upper() const; |

543 | |

544 | /// @} |

545 | /// @name Substring Operations |

546 | /// @{ |

547 | |

548 | /// Return a reference to the substring from [Start, Start + N). |

549 | /// |

550 | /// \param Start The index of the starting character in the substring; if |

551 | /// the index is npos or greater than the length of the string then the |

552 | /// empty substring will be returned. |

553 | /// |

554 | /// \param N The number of characters to included in the substring. If N |

555 | /// exceeds the number of characters remaining in the string, the string |

556 | /// suffix (starting with \p Start) will be returned. |

557 | [[nodiscard]] constexpr StringRef substr(size_t Start, |

558 | size_t N = npos) const { |

559 | Start = std::min(a: Start, b: Length); |

560 | return StringRef(Data + Start, std::min(a: N, b: Length - Start)); |

561 | } |

562 | |

563 | /// Return a StringRef equal to 'this' but with only the first \p N |

564 | /// elements remaining. If \p N is greater than the length of the |

565 | /// string, the entire string is returned. |

566 | [[nodiscard]] StringRef take_front(size_t N = 1) const { |

567 | if (N >= size()) |

568 | return *this; |

569 | return drop_back(N: size() - N); |

570 | } |

571 | |

572 | /// Return a StringRef equal to 'this' but with only the last \p N |

573 | /// elements remaining. If \p N is greater than the length of the |

574 | /// string, the entire string is returned. |

575 | [[nodiscard]] StringRef take_back(size_t N = 1) const { |

576 | if (N >= size()) |

577 | return *this; |

578 | return drop_front(N: size() - N); |

579 | } |

580 | |

581 | /// Return the longest prefix of 'this' such that every character |

582 | /// in the prefix satisfies the given predicate. |

583 | [[nodiscard]] StringRef take_while(function_ref<bool(char)> F) const { |

584 | return substr(Start: 0, N: find_if_not(F)); |

585 | } |

586 | |

587 | /// Return the longest prefix of 'this' such that no character in |

588 | /// the prefix satisfies the given predicate. |

589 | [[nodiscard]] StringRef take_until(function_ref<bool(char)> F) const { |

590 | return substr(Start: 0, N: find_if(F)); |

591 | } |

592 | |

593 | /// Return a StringRef equal to 'this' but with the first \p N elements |

594 | /// dropped. |

595 | [[nodiscard]] StringRef drop_front(size_t N = 1) const { |

596 | assert(size() >= N && "Dropping more elements than exist"); |

597 | return substr(Start: N); |

598 | } |

599 | |

600 | /// Return a StringRef equal to 'this' but with the last \p N elements |

601 | /// dropped. |

602 | [[nodiscard]] StringRef drop_back(size_t N = 1) const { |

603 | assert(size() >= N && "Dropping more elements than exist"); |

604 | return substr(Start: 0, N: size()-N); |

605 | } |

606 | |

607 | /// Return a StringRef equal to 'this', but with all characters satisfying |

608 | /// the given predicate dropped from the beginning of the string. |

609 | [[nodiscard]] StringRef drop_while(function_ref<bool(char)> F) const { |

610 | return substr(Start: find_if_not(F)); |

611 | } |

612 | |

613 | /// Return a StringRef equal to 'this', but with all characters not |

614 | /// satisfying the given predicate dropped from the beginning of the string. |

615 | [[nodiscard]] StringRef drop_until(function_ref<bool(char)> F) const { |

616 | return substr(Start: find_if(F)); |

617 | } |

618 | |

619 | /// Returns true if this StringRef has the given prefix and removes that |

620 | /// prefix. |

621 | bool consume_front(StringRef Prefix) { |

622 | if (!starts_with(Prefix)) |

623 | return false; |

624 | |

625 | *this = substr(Start: Prefix.size()); |

626 | return true; |

627 | } |

628 | |

629 | /// Returns true if this StringRef has the given prefix, ignoring case, |

630 | /// and removes that prefix. |

631 | bool consume_front_insensitive(StringRef Prefix) { |

632 | if (!starts_with_insensitive(Prefix)) |

633 | return false; |

634 | |

635 | *this = substr(Start: Prefix.size()); |

636 | return true; |

637 | } |

638 | |

639 | /// Returns true if this StringRef has the given suffix and removes that |

640 | /// suffix. |

641 | bool consume_back(StringRef Suffix) { |

642 | if (!ends_with(Suffix)) |

643 | return false; |

644 | |

645 | *this = substr(Start: 0, N: size() - Suffix.size()); |

646 | return true; |

647 | } |

648 | |

649 | /// Returns true if this StringRef has the given suffix, ignoring case, |

650 | /// and removes that suffix. |

651 | bool consume_back_insensitive(StringRef Suffix) { |

652 | if (!ends_with_insensitive(Suffix)) |

653 | return false; |

654 | |

655 | *this = substr(Start: 0, N: size() - Suffix.size()); |

656 | return true; |

657 | } |

658 | |

659 | /// Return a reference to the substring from [Start, End). |

660 | /// |

661 | /// \param Start The index of the starting character in the substring; if |

662 | /// the index is npos or greater than the length of the string then the |

663 | /// empty substring will be returned. |

664 | /// |

665 | /// \param End The index following the last character to include in the |

666 | /// substring. If this is npos or exceeds the number of characters |

667 | /// remaining in the string, the string suffix (starting with \p Start) |

668 | /// will be returned. If this is less than \p Start, an empty string will |

669 | /// be returned. |

670 | [[nodiscard]] StringRef slice(size_t Start, size_t End) const { |

671 | Start = std::min(a: Start, b: Length); |

672 | End = std::clamp(val: End, lo: Start, hi: Length); |

673 | return StringRef(Data + Start, End - Start); |

674 | } |

675 | |

676 | /// Split into two substrings around the first occurrence of a separator |

677 | /// character. |

678 | /// |

679 | /// If \p Separator is in the string, then the result is a pair (LHS, RHS) |

680 | /// such that (*this == LHS + Separator + RHS) is true and RHS is |

681 | /// maximal. If \p Separator is not in the string, then the result is a |

682 | /// pair (LHS, RHS) where (*this == LHS) and (RHS == ""). |

683 | /// |

684 | /// \param Separator The character to split on. |

685 | /// \returns The split substrings. |

686 | [[nodiscard]] std::pair<StringRef, StringRef> split(char Separator) const { |

687 | return split(Separator: StringRef(&Separator, 1)); |

688 | } |

689 | |

690 | /// Split into two substrings around the first occurrence of a separator |

691 | /// string. |

692 | /// |

693 | /// If \p Separator is in the string, then the result is a pair (LHS, RHS) |

694 | /// such that (*this == LHS + Separator + RHS) is true and RHS is |

695 | /// maximal. If \p Separator is not in the string, then the result is a |

696 | /// pair (LHS, RHS) where (*this == LHS) and (RHS == ""). |

697 | /// |

698 | /// \param Separator - The string to split on. |

699 | /// \return - The split substrings. |

700 | [[nodiscard]] std::pair<StringRef, StringRef> |

701 | split(StringRef Separator) const { |

702 | size_t Idx = find(Str: Separator); |

703 | if (Idx == npos) |

704 | return std::make_pair(x: *this, y: StringRef()); |

705 | return std::make_pair(x: slice(Start: 0, End: Idx), y: slice(Start: Idx + Separator.size(), End: npos)); |

706 | } |

707 | |

708 | /// Split into two substrings around the last occurrence of a separator |

709 | /// string. |

710 | /// |

711 | /// If \p Separator is in the string, then the result is a pair (LHS, RHS) |

712 | /// such that (*this == LHS + Separator + RHS) is true and RHS is |

713 | /// minimal. If \p Separator is not in the string, then the result is a |

714 | /// pair (LHS, RHS) where (*this == LHS) and (RHS == ""). |

715 | /// |

716 | /// \param Separator - The string to split on. |

717 | /// \return - The split substrings. |

718 | [[nodiscard]] std::pair<StringRef, StringRef> |

719 | rsplit(StringRef Separator) const { |

720 | size_t Idx = rfind(Str: Separator); |

721 | if (Idx == npos) |

722 | return std::make_pair(x: *this, y: StringRef()); |

723 | return std::make_pair(x: slice(Start: 0, End: Idx), y: slice(Start: Idx + Separator.size(), End: npos)); |

724 | } |

725 | |

726 | /// Split into substrings around the occurrences of a separator string. |

727 | /// |

728 | /// Each substring is stored in \p A. If \p MaxSplit is >= 0, at most |

729 | /// \p MaxSplit splits are done and consequently <= \p MaxSplit + 1 |

730 | /// elements are added to A. |

731 | /// If \p KeepEmpty is false, empty strings are not added to \p A. They |

732 | /// still count when considering \p MaxSplit |

733 | /// An useful invariant is that |

734 | /// Separator.join(A) == *this if MaxSplit == -1 and KeepEmpty == true |

735 | /// |

736 | /// \param A - Where to put the substrings. |

737 | /// \param Separator - The string to split on. |

738 | /// \param MaxSplit - The maximum number of times the string is split. |

739 | /// \param KeepEmpty - True if empty substring should be added. |

740 | void split(SmallVectorImpl<StringRef> &A, |

741 | StringRef Separator, int MaxSplit = -1, |

742 | bool KeepEmpty = true) const; |

743 | |

744 | /// Split into substrings around the occurrences of a separator character. |

745 | /// |

746 | /// Each substring is stored in \p A. If \p MaxSplit is >= 0, at most |

747 | /// \p MaxSplit splits are done and consequently <= \p MaxSplit + 1 |

748 | /// elements are added to A. |

749 | /// If \p KeepEmpty is false, empty strings are not added to \p A. They |

750 | /// still count when considering \p MaxSplit |

751 | /// An useful invariant is that |

752 | /// Separator.join(A) == *this if MaxSplit == -1 and KeepEmpty == true |

753 | /// |

754 | /// \param A - Where to put the substrings. |

755 | /// \param Separator - The string to split on. |

756 | /// \param MaxSplit - The maximum number of times the string is split. |

757 | /// \param KeepEmpty - True if empty substring should be added. |

758 | void split(SmallVectorImpl<StringRef> &A, char Separator, int MaxSplit = -1, |

759 | bool KeepEmpty = true) const; |

760 | |

761 | /// Split into two substrings around the last occurrence of a separator |

762 | /// character. |

763 | /// |

764 | /// If \p Separator is in the string, then the result is a pair (LHS, RHS) |

765 | /// such that (*this == LHS + Separator + RHS) is true and RHS is |

766 | /// minimal. If \p Separator is not in the string, then the result is a |

767 | /// pair (LHS, RHS) where (*this == LHS) and (RHS == ""). |

768 | /// |

769 | /// \param Separator - The character to split on. |

770 | /// \return - The split substrings. |

771 | [[nodiscard]] std::pair<StringRef, StringRef> rsplit(char Separator) const { |

772 | return rsplit(Separator: StringRef(&Separator, 1)); |

773 | } |

774 | |

775 | /// Return string with consecutive \p Char characters starting from the |

776 | /// the left removed. |

777 | [[nodiscard]] StringRef ltrim(char Char) const { |

778 | return drop_front(N: std::min(a: Length, b: find_first_not_of(C: Char))); |

779 | } |

780 | |

781 | /// Return string with consecutive characters in \p Chars starting from |

782 | /// the left removed. |

783 | [[nodiscard]] StringRef ltrim(StringRef Chars = " \t\n\v\f\r") const { |

784 | return drop_front(N: std::min(a: Length, b: find_first_not_of(Chars))); |

785 | } |

786 | |

787 | /// Return string with consecutive \p Char characters starting from the |

788 | /// right removed. |

789 | [[nodiscard]] StringRef rtrim(char Char) const { |

790 | return drop_back(N: Length - std::min(a: Length, b: find_last_not_of(C: Char) + 1)); |

791 | } |

792 | |

793 | /// Return string with consecutive characters in \p Chars starting from |

794 | /// the right removed. |

795 | [[nodiscard]] StringRef rtrim(StringRef Chars = " \t\n\v\f\r") const { |

796 | return drop_back(N: Length - std::min(a: Length, b: find_last_not_of(Chars) + 1)); |

797 | } |

798 | |

799 | /// Return string with consecutive \p Char characters starting from the |

800 | /// left and right removed. |

801 | [[nodiscard]] StringRef trim(char Char) const { |

802 | return ltrim(Char).rtrim(Char); |

803 | } |

804 | |

805 | /// Return string with consecutive characters in \p Chars starting from |

806 | /// the left and right removed. |

807 | [[nodiscard]] StringRef trim(StringRef Chars = " \t\n\v\f\r") const { |

808 | return ltrim(Chars).rtrim(Chars); |

809 | } |

810 | |

811 | /// Detect the line ending style of the string. |

812 | /// |

813 | /// If the string contains a line ending, return the line ending character |

814 | /// sequence that is detected. Otherwise return '\n' for unix line endings. |

815 | /// |

816 | /// \return - The line ending character sequence. |

817 | [[nodiscard]] StringRef detectEOL() const { |

818 | size_t Pos = find(C: '\r'); |

819 | if (Pos == npos) { |

820 | // If there is no carriage return, assume unix |

821 | return "\n"; |

822 | } |

823 | if (Pos + 1 < Length && Data[Pos + 1] == '\n') |

824 | return "\r\n"; // Windows |

825 | if (Pos > 0 && Data[Pos - 1] == '\n') |

826 | return "\n\r"; // You monster! |

827 | return "\r"; // Classic Mac |

828 | } |

829 | /// @} |

830 | }; |

831 | |

832 | /// A wrapper around a string literal that serves as a proxy for constructing |

833 | /// global tables of StringRefs with the length computed at compile time. |

834 | /// In order to avoid the invocation of a global constructor, StringLiteral |

835 | /// should *only* be used in a constexpr context, as such: |

836 | /// |

837 | /// constexpr StringLiteral S("test"); |

838 | /// |

839 | class StringLiteral : public StringRef { |

840 | private: |

841 | constexpr StringLiteral(const char *Str, size_t N) : StringRef(Str, N) { |

842 | } |

843 | |

844 | public: |

845 | template <size_t N> |

846 | constexpr StringLiteral(const char (&Str)[N]) |

847 | #if defined(__clang__) && __has_attribute(enable_if) |

848 | #pragma clang diagnostic push |

849 | #pragma clang diagnostic ignored "-Wgcc-compat" |

850 | __attribute((enable_if(__builtin_strlen(Str) == N - 1, |

851 | "invalid string literal"))) |

852 | #pragma clang diagnostic pop |

853 | #endif |

854 | : StringRef(Str, N - 1) { |

855 | } |

856 | |

857 | // Explicit construction for strings like "foo\0bar". |

858 | template <size_t N> |

859 | static constexpr StringLiteral withInnerNUL(const char (&Str)[N]) { |

860 | return StringLiteral(Str, N - 1); |

861 | } |

862 | }; |

863 | |

864 | /// @name StringRef Comparison Operators |

865 | /// @{ |

866 | |

867 | inline bool operator==(StringRef LHS, StringRef RHS) { |

868 | return LHS.equals(RHS); |

869 | } |

870 | |

871 | inline bool operator!=(StringRef LHS, StringRef RHS) { return !(LHS == RHS); } |

872 | |

873 | inline bool operator<(StringRef LHS, StringRef RHS) { |

874 | return LHS.compare(RHS) < 0; |

875 | } |

876 | |

877 | inline bool operator<=(StringRef LHS, StringRef RHS) { |

878 | return LHS.compare(RHS) <= 0; |

879 | } |

880 | |

881 | inline bool operator>(StringRef LHS, StringRef RHS) { |

882 | return LHS.compare(RHS) > 0; |

883 | } |

884 | |

885 | inline bool operator>=(StringRef LHS, StringRef RHS) { |

886 | return LHS.compare(RHS) >= 0; |

887 | } |

888 | |

889 | inline std::string &operator+=(std::string &buffer, StringRef string) { |

890 | return buffer.append(s: string.data(), n: string.size()); |

891 | } |

892 | |

893 | /// @} |

894 | |

895 | /// Compute a hash_code for a StringRef. |

896 | [[nodiscard]] hash_code hash_value(StringRef S); |

897 | |

898 | // Provide DenseMapInfo for StringRefs. |

899 | template <> struct DenseMapInfo<StringRef, void> { |

900 | static inline StringRef getEmptyKey() { |

901 | return StringRef( |

902 | reinterpret_cast<const char *>(~static_cast<uintptr_t>(0)), 0); |

903 | } |

904 | |

905 | static inline StringRef getTombstoneKey() { |

906 | return StringRef( |

907 | reinterpret_cast<const char *>(~static_cast<uintptr_t>(1)), 0); |

908 | } |

909 | |

910 | static unsigned getHashValue(StringRef Val); |

911 | |

912 | static bool isEqual(StringRef LHS, StringRef RHS) { |

913 | if (RHS.data() == getEmptyKey().data()) |

914 | return LHS.data() == getEmptyKey().data(); |

915 | if (RHS.data() == getTombstoneKey().data()) |

916 | return LHS.data() == getTombstoneKey().data(); |

917 | return LHS == RHS; |

918 | } |

919 | }; |

920 | |

921 | } // end namespace llvm |

922 | |

923 | #endif // LLVM_ADT_STRINGREF_H |

924 |