1 | //===- StringRef.h - Constant String Reference Wrapper ----------*- C++ -*-===// |
---|---|

2 | // |

3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |

4 | // See https://llvm.org/LICENSE.txt for license information. |

5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |

6 | // |

7 | //===----------------------------------------------------------------------===// |

8 | |

9 | #ifndef LLVM_ADT_STRINGREF_H |

10 | #define LLVM_ADT_STRINGREF_H |

11 | |

12 | #include "llvm/ADT/DenseMapInfo.h" |

13 | #include "llvm/ADT/STLFunctionalExtras.h" |

14 | #include "llvm/ADT/iterator_range.h" |

15 | #include "llvm/Support/Compiler.h" |

16 | #include <algorithm> |

17 | #include <cassert> |

18 | #include <cstddef> |

19 | #include <cstring> |

20 | #include <limits> |

21 | #include <string> |

22 | #include <string_view> |

23 | #include <type_traits> |

24 | #include <utility> |

25 | |

26 | namespace llvm { |

27 | |

28 | class APInt; |

29 | class hash_code; |

30 | template <typename T> class SmallVectorImpl; |

31 | class StringRef; |

32 | |

33 | /// Helper functions for StringRef::getAsInteger. |

34 | bool getAsUnsignedInteger(StringRef Str, unsigned Radix, |

35 | unsigned long long &Result); |

36 | |

37 | bool getAsSignedInteger(StringRef Str, unsigned Radix, long long &Result); |

38 | |

39 | bool consumeUnsignedInteger(StringRef &Str, unsigned Radix, |

40 | unsigned long long &Result); |

41 | bool consumeSignedInteger(StringRef &Str, unsigned Radix, long long &Result); |

42 | |

43 | /// StringRef - Represent a constant reference to a string, i.e. a character |

44 | /// array and a length, which need not be null terminated. |

45 | /// |

46 | /// This class does not own the string data, it is expected to be used in |

47 | /// situations where the character data resides in some other buffer, whose |

48 | /// lifetime extends past that of the StringRef. For this reason, it is not in |

49 | /// general safe to store a StringRef. |

50 | class LLVM_GSL_POINTER StringRef { |

51 | public: |

52 | static constexpr size_t npos = ~size_t(0); |

53 | |

54 | using iterator = const char *; |

55 | using const_iterator = const char *; |

56 | using size_type = size_t; |

57 | |

58 | private: |

59 | /// The start of the string, in an external buffer. |

60 | const char *Data = nullptr; |

61 | |

62 | /// The length of the string. |

63 | size_t Length = 0; |

64 | |

65 | // Workaround memcmp issue with null pointers (undefined behavior) |

66 | // by providing a specialized version |

67 | static int compareMemory(const char *Lhs, const char *Rhs, size_t Length) { |

68 | if (Length == 0) { return 0; } |

69 | return ::memcmp(s1: Lhs,s2: Rhs,n: Length); |

70 | } |

71 | |

72 | public: |

73 | /// @name Constructors |

74 | /// @{ |

75 | |

76 | /// Construct an empty string ref. |

77 | /*implicit*/ StringRef() = default; |

78 | |

79 | /// Disable conversion from nullptr. This prevents things like |

80 | /// if (S == nullptr) |

81 | StringRef(std::nullptr_t) = delete; |

82 | |

83 | /// Construct a string ref from a cstring. |

84 | /*implicit*/ constexpr StringRef(const char *Str) |

85 | : Data(Str), Length(Str ? |

86 | // GCC 7 doesn't have constexpr char_traits. Fall back to __builtin_strlen. |

87 | #if defined(_GLIBCXX_RELEASE) && _GLIBCXX_RELEASE < 8 |

88 | __builtin_strlen(Str) |

89 | #else |

90 | std::char_traits<char>::length(s: Str) |

91 | #endif |

92 | : 0) { |

93 | } |

94 | |

95 | /// Construct a string ref from a pointer and length. |

96 | /*implicit*/ constexpr StringRef(const char *data, size_t length) |

97 | : Data(data), Length(length) {} |

98 | |

99 | /// Construct a string ref from an std::string. |

100 | /*implicit*/ StringRef(const std::string &Str) |

101 | : Data(Str.data()), Length(Str.length()) {} |

102 | |

103 | /// Construct a string ref from an std::string_view. |

104 | /*implicit*/ constexpr StringRef(std::string_view Str) |

105 | : Data(Str.data()), Length(Str.size()) {} |

106 | |

107 | /// @} |

108 | /// @name Iterators |

109 | /// @{ |

110 | |

111 | iterator begin() const { return Data; } |

112 | |

113 | iterator end() const { return Data + Length; } |

114 | |

115 | const unsigned char *bytes_begin() const { |

116 | return reinterpret_cast<const unsigned char *>(begin()); |

117 | } |

118 | const unsigned char *bytes_end() const { |

119 | return reinterpret_cast<const unsigned char *>(end()); |

120 | } |

121 | iterator_range<const unsigned char *> bytes() const { |

122 | return make_range(x: bytes_begin(), y: bytes_end()); |

123 | } |

124 | |

125 | /// @} |

126 | /// @name String Operations |

127 | /// @{ |

128 | |

129 | /// data - Get a pointer to the start of the string (which may not be null |

130 | /// terminated). |

131 | [[nodiscard]] const char *data() const { return Data; } |

132 | |

133 | /// empty - Check if the string is empty. |

134 | [[nodiscard]] constexpr bool empty() const { return Length == 0; } |

135 | |

136 | /// size - Get the string size. |

137 | [[nodiscard]] constexpr size_t size() const { return Length; } |

138 | |

139 | /// front - Get the first character in the string. |

140 | [[nodiscard]] char front() const { |

141 | assert(!empty()); |

142 | return Data[0]; |

143 | } |

144 | |

145 | /// back - Get the last character in the string. |

146 | [[nodiscard]] char back() const { |

147 | assert(!empty()); |

148 | return Data[Length-1]; |

149 | } |

150 | |

151 | // copy - Allocate copy in Allocator and return StringRef to it. |

152 | template <typename Allocator> |

153 | [[nodiscard]] StringRef copy(Allocator &A) const { |

154 | // Don't request a length 0 copy from the allocator. |

155 | if (empty()) |

156 | return StringRef(); |

157 | char *S = A.template Allocate<char>(Length); |

158 | std::copy(begin(), end(), S); |

159 | return StringRef(S, Length); |

160 | } |

161 | |

162 | /// equals - Check for string equality, this is more efficient than |

163 | /// compare() when the relative ordering of inequal strings isn't needed. |

164 | [[nodiscard]] bool equals(StringRef RHS) const { |

165 | return (Length == RHS.Length && |

166 | compareMemory(Lhs: Data, Rhs: RHS.Data, Length: RHS.Length) == 0); |

167 | } |

168 | |

169 | /// Check for string equality, ignoring case. |

170 | [[nodiscard]] bool equals_insensitive(StringRef RHS) const { |

171 | return Length == RHS.Length && compare_insensitive(RHS) == 0; |

172 | } |

173 | |

174 | /// compare - Compare two strings; the result is negative, zero, or positive |

175 | /// if this string is lexicographically less than, equal to, or greater than |

176 | /// the \p RHS. |

177 | [[nodiscard]] int compare(StringRef RHS) const { |

178 | // Check the prefix for a mismatch. |

179 | if (int Res = compareMemory(Lhs: Data, Rhs: RHS.Data, Length: std::min(a: Length, b: RHS.Length))) |

180 | return Res < 0 ? -1 : 1; |

181 | |

182 | // Otherwise the prefixes match, so we only need to check the lengths. |

183 | if (Length == RHS.Length) |

184 | return 0; |

185 | return Length < RHS.Length ? -1 : 1; |

186 | } |

187 | |

188 | /// Compare two strings, ignoring case. |

189 | [[nodiscard]] int compare_insensitive(StringRef RHS) const; |

190 | |

191 | /// compare_numeric - Compare two strings, treating sequences of digits as |

192 | /// numbers. |

193 | [[nodiscard]] int compare_numeric(StringRef RHS) const; |

194 | |

195 | /// Determine the edit distance between this string and another |

196 | /// string. |

197 | /// |

198 | /// \param Other the string to compare this string against. |

199 | /// |

200 | /// \param AllowReplacements whether to allow character |

201 | /// replacements (change one character into another) as a single |

202 | /// operation, rather than as two operations (an insertion and a |

203 | /// removal). |

204 | /// |

205 | /// \param MaxEditDistance If non-zero, the maximum edit distance that |

206 | /// this routine is allowed to compute. If the edit distance will exceed |

207 | /// that maximum, returns \c MaxEditDistance+1. |

208 | /// |

209 | /// \returns the minimum number of character insertions, removals, |

210 | /// or (if \p AllowReplacements is \c true) replacements needed to |

211 | /// transform one of the given strings into the other. If zero, |

212 | /// the strings are identical. |

213 | [[nodiscard]] unsigned edit_distance(StringRef Other, |

214 | bool AllowReplacements = true, |

215 | unsigned MaxEditDistance = 0) const; |

216 | |

217 | [[nodiscard]] unsigned |

218 | edit_distance_insensitive(StringRef Other, bool AllowReplacements = true, |

219 | unsigned MaxEditDistance = 0) const; |

220 | |

221 | /// str - Get the contents as an std::string. |

222 | [[nodiscard]] std::string str() const { |

223 | if (!Data) return std::string(); |

224 | return std::string(Data, Length); |

225 | } |

226 | |

227 | /// @} |

228 | /// @name Operator Overloads |

229 | /// @{ |

230 | |

231 | [[nodiscard]] char operator[](size_t Index) const { |

232 | assert(Index < Length && "Invalid index!"); |

233 | return Data[Index]; |

234 | } |

235 | |

236 | /// Disallow accidental assignment from a temporary std::string. |

237 | /// |

238 | /// The declaration here is extra complicated so that `stringRef = {}` |

239 | /// and `stringRef = "abc"` continue to select the move assignment operator. |

240 | template <typename T> |

241 | std::enable_if_t<std::is_same<T, std::string>::value, StringRef> & |

242 | operator=(T &&Str) = delete; |

243 | |

244 | /// @} |

245 | /// @name Type Conversions |

246 | /// @{ |

247 | |

248 | operator std::string_view() const { |

249 | return std::string_view(data(), size()); |

250 | } |

251 | |

252 | /// @} |

253 | /// @name String Predicates |

254 | /// @{ |

255 | |

256 | /// Check if this string starts with the given \p Prefix. |

257 | [[nodiscard]] bool starts_with(StringRef Prefix) const { |

258 | return Length >= Prefix.Length && |

259 | compareMemory(Lhs: Data, Rhs: Prefix.Data, Length: Prefix.Length) == 0; |

260 | } |

261 | [[nodiscard]] bool startswith(StringRef Prefix) const { |

262 | return starts_with(Prefix); |

263 | } |

264 | |

265 | /// Check if this string starts with the given \p Prefix, ignoring case. |

266 | [[nodiscard]] bool starts_with_insensitive(StringRef Prefix) const; |

267 | [[nodiscard]] bool startswith_insensitive(StringRef Prefix) const { |

268 | return starts_with_insensitive(Prefix); |

269 | } |

270 | |

271 | /// Check if this string ends with the given \p Suffix. |

272 | [[nodiscard]] bool ends_with(StringRef Suffix) const { |

273 | return Length >= Suffix.Length && |

274 | compareMemory(Lhs: end() - Suffix.Length, Rhs: Suffix.Data, Length: Suffix.Length) == |

275 | 0; |

276 | } |

277 | [[nodiscard]] bool endswith(StringRef Suffix) const { |

278 | return ends_with(Suffix); |

279 | } |

280 | |

281 | /// Check if this string ends with the given \p Suffix, ignoring case. |

282 | [[nodiscard]] bool ends_with_insensitive(StringRef Suffix) const; |

283 | [[nodiscard]] bool endswith_insensitive(StringRef Suffix) const { |

284 | return ends_with_insensitive(Suffix); |

285 | } |

286 | |

287 | /// @} |

288 | /// @name String Searching |

289 | /// @{ |

290 | |

291 | /// Search for the first character \p C in the string. |

292 | /// |

293 | /// \returns The index of the first occurrence of \p C, or npos if not |

294 | /// found. |

295 | [[nodiscard]] size_t find(char C, size_t From = 0) const { |

296 | return std::string_view(*this).find(c: C, pos: From); |

297 | } |

298 | |

299 | /// Search for the first character \p C in the string, ignoring case. |

300 | /// |

301 | /// \returns The index of the first occurrence of \p C, or npos if not |

302 | /// found. |

303 | [[nodiscard]] size_t find_insensitive(char C, size_t From = 0) const; |

304 | |

305 | /// Search for the first character satisfying the predicate \p F |

306 | /// |

307 | /// \returns The index of the first character satisfying \p F starting from |

308 | /// \p From, or npos if not found. |

309 | [[nodiscard]] size_t find_if(function_ref<bool(char)> F, |

310 | size_t From = 0) const { |

311 | StringRef S = drop_front(N: From); |

312 | while (!S.empty()) { |

313 | if (F(S.front())) |

314 | return size() - S.size(); |

315 | S = S.drop_front(); |

316 | } |

317 | return npos; |

318 | } |

319 | |

320 | /// Search for the first character not satisfying the predicate \p F |

321 | /// |

322 | /// \returns The index of the first character not satisfying \p F starting |

323 | /// from \p From, or npos if not found. |

324 | [[nodiscard]] size_t find_if_not(function_ref<bool(char)> F, |

325 | size_t From = 0) const { |

326 | return find_if(F: [F](char c) { return !F(c); }, From); |

327 | } |

328 | |

329 | /// Search for the first string \p Str in the string. |

330 | /// |

331 | /// \returns The index of the first occurrence of \p Str, or npos if not |

332 | /// found. |

333 | [[nodiscard]] size_t find(StringRef Str, size_t From = 0) const; |

334 | |

335 | /// Search for the first string \p Str in the string, ignoring case. |

336 | /// |

337 | /// \returns The index of the first occurrence of \p Str, or npos if not |

338 | /// found. |

339 | [[nodiscard]] size_t find_insensitive(StringRef Str, size_t From = 0) const; |

340 | |

341 | /// Search for the last character \p C in the string. |

342 | /// |

343 | /// \returns The index of the last occurrence of \p C, or npos if not |

344 | /// found. |

345 | [[nodiscard]] size_t rfind(char C, size_t From = npos) const { |

346 | size_t I = std::min(a: From, b: Length); |

347 | while (I) { |

348 | --I; |

349 | if (Data[I] == C) |

350 | return I; |

351 | } |

352 | return npos; |

353 | } |

354 | |

355 | /// Search for the last character \p C in the string, ignoring case. |

356 | /// |

357 | /// \returns The index of the last occurrence of \p C, or npos if not |

358 | /// found. |

359 | [[nodiscard]] size_t rfind_insensitive(char C, size_t From = npos) const; |

360 | |

361 | /// Search for the last string \p Str in the string. |

362 | /// |

363 | /// \returns The index of the last occurrence of \p Str, or npos if not |

364 | /// found. |

365 | [[nodiscard]] size_t rfind(StringRef Str) const; |

366 | |

367 | /// Search for the last string \p Str in the string, ignoring case. |

368 | /// |

369 | /// \returns The index of the last occurrence of \p Str, or npos if not |

370 | /// found. |

371 | [[nodiscard]] size_t rfind_insensitive(StringRef Str) const; |

372 | |

373 | /// Find the first character in the string that is \p C, or npos if not |

374 | /// found. Same as find. |

375 | [[nodiscard]] size_t find_first_of(char C, size_t From = 0) const { |

376 | return find(C, From); |

377 | } |

378 | |

379 | /// Find the first character in the string that is in \p Chars, or npos if |

380 | /// not found. |

381 | /// |

382 | /// Complexity: O(size() + Chars.size()) |

383 | [[nodiscard]] size_t find_first_of(StringRef Chars, size_t From = 0) const; |

384 | |

385 | /// Find the first character in the string that is not \p C or npos if not |

386 | /// found. |

387 | [[nodiscard]] size_t find_first_not_of(char C, size_t From = 0) const; |

388 | |

389 | /// Find the first character in the string that is not in the string |

390 | /// \p Chars, or npos if not found. |

391 | /// |

392 | /// Complexity: O(size() + Chars.size()) |

393 | [[nodiscard]] size_t find_first_not_of(StringRef Chars, |

394 | size_t From = 0) const; |

395 | |

396 | /// Find the last character in the string that is \p C, or npos if not |

397 | /// found. |

398 | [[nodiscard]] size_t find_last_of(char C, size_t From = npos) const { |

399 | return rfind(C, From); |

400 | } |

401 | |

402 | /// Find the last character in the string that is in \p C, or npos if not |

403 | /// found. |

404 | /// |

405 | /// Complexity: O(size() + Chars.size()) |

406 | [[nodiscard]] size_t find_last_of(StringRef Chars, |

407 | size_t From = npos) const; |

408 | |

409 | /// Find the last character in the string that is not \p C, or npos if not |

410 | /// found. |

411 | [[nodiscard]] size_t find_last_not_of(char C, size_t From = npos) const; |

412 | |

413 | /// Find the last character in the string that is not in \p Chars, or |

414 | /// npos if not found. |

415 | /// |

416 | /// Complexity: O(size() + Chars.size()) |

417 | [[nodiscard]] size_t find_last_not_of(StringRef Chars, |

418 | size_t From = npos) const; |

419 | |

420 | /// Return true if the given string is a substring of *this, and false |

421 | /// otherwise. |

422 | [[nodiscard]] bool contains(StringRef Other) const { |

423 | return find(Str: Other) != npos; |

424 | } |

425 | |

426 | /// Return true if the given character is contained in *this, and false |

427 | /// otherwise. |

428 | [[nodiscard]] bool contains(char C) const { |

429 | return find_first_of(C) != npos; |

430 | } |

431 | |

432 | /// Return true if the given string is a substring of *this, and false |

433 | /// otherwise. |

434 | [[nodiscard]] bool contains_insensitive(StringRef Other) const { |

435 | return find_insensitive(Str: Other) != npos; |

436 | } |

437 | |

438 | /// Return true if the given character is contained in *this, and false |

439 | /// otherwise. |

440 | [[nodiscard]] bool contains_insensitive(char C) const { |

441 | return find_insensitive(C) != npos; |

442 | } |

443 | |

444 | /// @} |

445 | /// @name Helpful Algorithms |

446 | /// @{ |

447 | |

448 | /// Return the number of occurrences of \p C in the string. |

449 | [[nodiscard]] size_t count(char C) const { |

450 | size_t Count = 0; |

451 | for (size_t I = 0; I != Length; ++I) |

452 | if (Data[I] == C) |

453 | ++Count; |

454 | return Count; |

455 | } |

456 | |

457 | /// Return the number of non-overlapped occurrences of \p Str in |

458 | /// the string. |

459 | size_t count(StringRef Str) const; |

460 | |

461 | /// Parse the current string as an integer of the specified radix. If |

462 | /// \p Radix is specified as zero, this does radix autosensing using |

463 | /// extended C rules: 0 is octal, 0x is hex, 0b is binary. |

464 | /// |

465 | /// If the string is invalid or if only a subset of the string is valid, |

466 | /// this returns true to signify the error. The string is considered |

467 | /// erroneous if empty or if it overflows T. |

468 | template <typename T> bool getAsInteger(unsigned Radix, T &Result) const { |

469 | if constexpr (std::numeric_limits<T>::is_signed) { |

470 | long long LLVal; |

471 | if (getAsSignedInteger(Str: *this, Radix, Result&: LLVal) || |

472 | static_cast<T>(LLVal) != LLVal) |

473 | return true; |

474 | Result = LLVal; |

475 | } else { |

476 | unsigned long long ULLVal; |

477 | // The additional cast to unsigned long long is required to avoid the |

478 | // Visual C++ warning C4805: '!=' : unsafe mix of type 'bool' and type |

479 | // 'unsigned __int64' when instantiating getAsInteger with T = bool. |

480 | if (getAsUnsignedInteger(Str: *this, Radix, Result&: ULLVal) || |

481 | static_cast<unsigned long long>(static_cast<T>(ULLVal)) != ULLVal) |

482 | return true; |

483 | Result = ULLVal; |

484 | } |

485 | return false; |

486 | } |

487 | |

488 | /// Parse the current string as an integer of the specified radix. If |

489 | /// \p Radix is specified as zero, this does radix autosensing using |

490 | /// extended C rules: 0 is octal, 0x is hex, 0b is binary. |

491 | /// |

492 | /// If the string does not begin with a number of the specified radix, |

493 | /// this returns true to signify the error. The string is considered |

494 | /// erroneous if empty or if it overflows T. |

495 | /// The portion of the string representing the discovered numeric value |

496 | /// is removed from the beginning of the string. |

497 | template <typename T> bool consumeInteger(unsigned Radix, T &Result) { |

498 | if constexpr (std::numeric_limits<T>::is_signed) { |

499 | long long LLVal; |

500 | if (consumeSignedInteger(Str&: *this, Radix, Result&: LLVal) || |

501 | static_cast<long long>(static_cast<T>(LLVal)) != LLVal) |

502 | return true; |

503 | Result = LLVal; |

504 | } else { |

505 | unsigned long long ULLVal; |

506 | if (consumeUnsignedInteger(Str&: *this, Radix, Result&: ULLVal) || |

507 | static_cast<unsigned long long>(static_cast<T>(ULLVal)) != ULLVal) |

508 | return true; |

509 | Result = ULLVal; |

510 | } |

511 | return false; |

512 | } |

513 | |

514 | /// Parse the current string as an integer of the specified \p Radix, or of |

515 | /// an autosensed radix if the \p Radix given is 0. The current value in |

516 | /// \p Result is discarded, and the storage is changed to be wide enough to |

517 | /// store the parsed integer. |

518 | /// |

519 | /// \returns true if the string does not solely consist of a valid |

520 | /// non-empty number in the appropriate base. |

521 | /// |

522 | /// APInt::fromString is superficially similar but assumes the |

523 | /// string is well-formed in the given radix. |

524 | bool getAsInteger(unsigned Radix, APInt &Result) const; |

525 | |

526 | /// Parse the current string as an IEEE double-precision floating |

527 | /// point value. The string must be a well-formed double. |

528 | /// |

529 | /// If \p AllowInexact is false, the function will fail if the string |

530 | /// cannot be represented exactly. Otherwise, the function only fails |

531 | /// in case of an overflow or underflow, or an invalid floating point |

532 | /// representation. |

533 | bool getAsDouble(double &Result, bool AllowInexact = true) const; |

534 | |

535 | /// @} |

536 | /// @name String Operations |

537 | /// @{ |

538 | |

539 | // Convert the given ASCII string to lowercase. |

540 | [[nodiscard]] std::string lower() const; |

541 | |

542 | /// Convert the given ASCII string to uppercase. |

543 | [[nodiscard]] std::string upper() const; |

544 | |

545 | /// @} |

546 | /// @name Substring Operations |

547 | /// @{ |

548 | |

549 | /// Return a reference to the substring from [Start, Start + N). |

550 | /// |

551 | /// \param Start The index of the starting character in the substring; if |

552 | /// the index is npos or greater than the length of the string then the |

553 | /// empty substring will be returned. |

554 | /// |

555 | /// \param N The number of characters to included in the substring. If N |

556 | /// exceeds the number of characters remaining in the string, the string |

557 | /// suffix (starting with \p Start) will be returned. |

558 | [[nodiscard]] constexpr StringRef substr(size_t Start, |

559 | size_t N = npos) const { |

560 | Start = std::min(a: Start, b: Length); |

561 | return StringRef(Data + Start, std::min(a: N, b: Length - Start)); |

562 | } |

563 | |

564 | /// Return a StringRef equal to 'this' but with only the first \p N |

565 | /// elements remaining. If \p N is greater than the length of the |

566 | /// string, the entire string is returned. |

567 | [[nodiscard]] StringRef take_front(size_t N = 1) const { |

568 | if (N >= size()) |

569 | return *this; |

570 | return drop_back(N: size() - N); |

571 | } |

572 | |

573 | /// Return a StringRef equal to 'this' but with only the last \p N |

574 | /// elements remaining. If \p N is greater than the length of the |

575 | /// string, the entire string is returned. |

576 | [[nodiscard]] StringRef take_back(size_t N = 1) const { |

577 | if (N >= size()) |

578 | return *this; |

579 | return drop_front(N: size() - N); |

580 | } |

581 | |

582 | /// Return the longest prefix of 'this' such that every character |

583 | /// in the prefix satisfies the given predicate. |

584 | [[nodiscard]] StringRef take_while(function_ref<bool(char)> F) const { |

585 | return substr(Start: 0, N: find_if_not(F)); |

586 | } |

587 | |

588 | /// Return the longest prefix of 'this' such that no character in |

589 | /// the prefix satisfies the given predicate. |

590 | [[nodiscard]] StringRef take_until(function_ref<bool(char)> F) const { |

591 | return substr(Start: 0, N: find_if(F)); |

592 | } |

593 | |

594 | /// Return a StringRef equal to 'this' but with the first \p N elements |

595 | /// dropped. |

596 | [[nodiscard]] StringRef drop_front(size_t N = 1) const { |

597 | assert(size() >= N && "Dropping more elements than exist"); |

598 | return substr(Start: N); |

599 | } |

600 | |

601 | /// Return a StringRef equal to 'this' but with the last \p N elements |

602 | /// dropped. |

603 | [[nodiscard]] StringRef drop_back(size_t N = 1) const { |

604 | assert(size() >= N && "Dropping more elements than exist"); |

605 | return substr(Start: 0, N: size()-N); |

606 | } |

607 | |

608 | /// Return a StringRef equal to 'this', but with all characters satisfying |

609 | /// the given predicate dropped from the beginning of the string. |

610 | [[nodiscard]] StringRef drop_while(function_ref<bool(char)> F) const { |

611 | return substr(Start: find_if_not(F)); |

612 | } |

613 | |

614 | /// Return a StringRef equal to 'this', but with all characters not |

615 | /// satisfying the given predicate dropped from the beginning of the string. |

616 | [[nodiscard]] StringRef drop_until(function_ref<bool(char)> F) const { |

617 | return substr(Start: find_if(F)); |

618 | } |

619 | |

620 | /// Returns true if this StringRef has the given prefix and removes that |

621 | /// prefix. |

622 | bool consume_front(StringRef Prefix) { |

623 | if (!starts_with(Prefix)) |

624 | return false; |

625 | |

626 | *this = substr(Start: Prefix.size()); |

627 | return true; |

628 | } |

629 | |

630 | /// Returns true if this StringRef has the given prefix, ignoring case, |

631 | /// and removes that prefix. |

632 | bool consume_front_insensitive(StringRef Prefix) { |

633 | if (!startswith_insensitive(Prefix)) |

634 | return false; |

635 | |

636 | *this = substr(Start: Prefix.size()); |

637 | return true; |

638 | } |

639 | |

640 | /// Returns true if this StringRef has the given suffix and removes that |

641 | /// suffix. |

642 | bool consume_back(StringRef Suffix) { |

643 | if (!ends_with(Suffix)) |

644 | return false; |

645 | |

646 | *this = substr(Start: 0, N: size() - Suffix.size()); |

647 | return true; |

648 | } |

649 | |

650 | /// Returns true if this StringRef has the given suffix, ignoring case, |

651 | /// and removes that suffix. |

652 | bool consume_back_insensitive(StringRef Suffix) { |

653 | if (!endswith_insensitive(Suffix)) |

654 | return false; |

655 | |

656 | *this = substr(Start: 0, N: size() - Suffix.size()); |

657 | return true; |

658 | } |

659 | |

660 | /// Return a reference to the substring from [Start, End). |

661 | /// |

662 | /// \param Start The index of the starting character in the substring; if |

663 | /// the index is npos or greater than the length of the string then the |

664 | /// empty substring will be returned. |

665 | /// |

666 | /// \param End The index following the last character to include in the |

667 | /// substring. If this is npos or exceeds the number of characters |

668 | /// remaining in the string, the string suffix (starting with \p Start) |

669 | /// will be returned. If this is less than \p Start, an empty string will |

670 | /// be returned. |

671 | [[nodiscard]] StringRef slice(size_t Start, size_t End) const { |

672 | Start = std::min(a: Start, b: Length); |

673 | End = std::clamp(val: End, lo: Start, hi: Length); |

674 | return StringRef(Data + Start, End - Start); |

675 | } |

676 | |

677 | /// Split into two substrings around the first occurrence of a separator |

678 | /// character. |

679 | /// |

680 | /// If \p Separator is in the string, then the result is a pair (LHS, RHS) |

681 | /// such that (*this == LHS + Separator + RHS) is true and RHS is |

682 | /// maximal. If \p Separator is not in the string, then the result is a |

683 | /// pair (LHS, RHS) where (*this == LHS) and (RHS == ""). |

684 | /// |

685 | /// \param Separator The character to split on. |

686 | /// \returns The split substrings. |

687 | [[nodiscard]] std::pair<StringRef, StringRef> split(char Separator) const { |

688 | return split(Separator: StringRef(&Separator, 1)); |

689 | } |

690 | |

691 | /// Split into two substrings around the first occurrence of a separator |

692 | /// string. |

693 | /// |

694 | /// If \p Separator is in the string, then the result is a pair (LHS, RHS) |

695 | /// such that (*this == LHS + Separator + RHS) is true and RHS is |

696 | /// maximal. If \p Separator is not in the string, then the result is a |

697 | /// pair (LHS, RHS) where (*this == LHS) and (RHS == ""). |

698 | /// |

699 | /// \param Separator - The string to split on. |

700 | /// \return - The split substrings. |

701 | [[nodiscard]] std::pair<StringRef, StringRef> |

702 | split(StringRef Separator) const { |

703 | size_t Idx = find(Str: Separator); |

704 | if (Idx == npos) |

705 | return std::make_pair(x: *this, y: StringRef()); |

706 | return std::make_pair(x: slice(Start: 0, End: Idx), y: slice(Start: Idx + Separator.size(), End: npos)); |

707 | } |

708 | |

709 | /// Split into two substrings around the last occurrence of a separator |

710 | /// string. |

711 | /// |

712 | /// If \p Separator is in the string, then the result is a pair (LHS, RHS) |

713 | /// such that (*this == LHS + Separator + RHS) is true and RHS is |

714 | /// minimal. If \p Separator is not in the string, then the result is a |

715 | /// pair (LHS, RHS) where (*this == LHS) and (RHS == ""). |

716 | /// |

717 | /// \param Separator - The string to split on. |

718 | /// \return - The split substrings. |

719 | [[nodiscard]] std::pair<StringRef, StringRef> |

720 | rsplit(StringRef Separator) const { |

721 | size_t Idx = rfind(Str: Separator); |

722 | if (Idx == npos) |

723 | return std::make_pair(x: *this, y: StringRef()); |

724 | return std::make_pair(x: slice(Start: 0, End: Idx), y: slice(Start: Idx + Separator.size(), End: npos)); |

725 | } |

726 | |

727 | /// Split into substrings around the occurrences of a separator string. |

728 | /// |

729 | /// Each substring is stored in \p A. If \p MaxSplit is >= 0, at most |

730 | /// \p MaxSplit splits are done and consequently <= \p MaxSplit + 1 |

731 | /// elements are added to A. |

732 | /// If \p KeepEmpty is false, empty strings are not added to \p A. They |

733 | /// still count when considering \p MaxSplit |

734 | /// An useful invariant is that |

735 | /// Separator.join(A) == *this if MaxSplit == -1 and KeepEmpty == true |

736 | /// |

737 | /// \param A - Where to put the substrings. |

738 | /// \param Separator - The string to split on. |

739 | /// \param MaxSplit - The maximum number of times the string is split. |

740 | /// \param KeepEmpty - True if empty substring should be added. |

741 | void split(SmallVectorImpl<StringRef> &A, |

742 | StringRef Separator, int MaxSplit = -1, |

743 | bool KeepEmpty = true) const; |

744 | |

745 | /// Split into substrings around the occurrences of a separator character. |

746 | /// |

747 | /// Each substring is stored in \p A. If \p MaxSplit is >= 0, at most |

748 | /// \p MaxSplit splits are done and consequently <= \p MaxSplit + 1 |

749 | /// elements are added to A. |

750 | /// If \p KeepEmpty is false, empty strings are not added to \p A. They |

751 | /// still count when considering \p MaxSplit |

752 | /// An useful invariant is that |

753 | /// Separator.join(A) == *this if MaxSplit == -1 and KeepEmpty == true |

754 | /// |

755 | /// \param A - Where to put the substrings. |

756 | /// \param Separator - The string to split on. |

757 | /// \param MaxSplit - The maximum number of times the string is split. |

758 | /// \param KeepEmpty - True if empty substring should be added. |

759 | void split(SmallVectorImpl<StringRef> &A, char Separator, int MaxSplit = -1, |

760 | bool KeepEmpty = true) const; |

761 | |

762 | /// Split into two substrings around the last occurrence of a separator |

763 | /// character. |

764 | /// |

765 | /// If \p Separator is in the string, then the result is a pair (LHS, RHS) |

766 | /// such that (*this == LHS + Separator + RHS) is true and RHS is |

767 | /// minimal. If \p Separator is not in the string, then the result is a |

768 | /// pair (LHS, RHS) where (*this == LHS) and (RHS == ""). |

769 | /// |

770 | /// \param Separator - The character to split on. |

771 | /// \return - The split substrings. |

772 | [[nodiscard]] std::pair<StringRef, StringRef> rsplit(char Separator) const { |

773 | return rsplit(Separator: StringRef(&Separator, 1)); |

774 | } |

775 | |

776 | /// Return string with consecutive \p Char characters starting from the |

777 | /// the left removed. |

778 | [[nodiscard]] StringRef ltrim(char Char) const { |

779 | return drop_front(N: std::min(a: Length, b: find_first_not_of(C: Char))); |

780 | } |

781 | |

782 | /// Return string with consecutive characters in \p Chars starting from |

783 | /// the left removed. |

784 | [[nodiscard]] StringRef ltrim(StringRef Chars = " \t\n\v\f\r") const { |

785 | return drop_front(N: std::min(a: Length, b: find_first_not_of(Chars))); |

786 | } |

787 | |

788 | /// Return string with consecutive \p Char characters starting from the |

789 | /// right removed. |

790 | [[nodiscard]] StringRef rtrim(char Char) const { |

791 | return drop_back(N: Length - std::min(a: Length, b: find_last_not_of(C: Char) + 1)); |

792 | } |

793 | |

794 | /// Return string with consecutive characters in \p Chars starting from |

795 | /// the right removed. |

796 | [[nodiscard]] StringRef rtrim(StringRef Chars = " \t\n\v\f\r") const { |

797 | return drop_back(N: Length - std::min(a: Length, b: find_last_not_of(Chars) + 1)); |

798 | } |

799 | |

800 | /// Return string with consecutive \p Char characters starting from the |

801 | /// left and right removed. |

802 | [[nodiscard]] StringRef trim(char Char) const { |

803 | return ltrim(Char).rtrim(Char); |

804 | } |

805 | |

806 | /// Return string with consecutive characters in \p Chars starting from |

807 | /// the left and right removed. |

808 | [[nodiscard]] StringRef trim(StringRef Chars = " \t\n\v\f\r") const { |

809 | return ltrim(Chars).rtrim(Chars); |

810 | } |

811 | |

812 | /// Detect the line ending style of the string. |

813 | /// |

814 | /// If the string contains a line ending, return the line ending character |

815 | /// sequence that is detected. Otherwise return '\n' for unix line endings. |

816 | /// |

817 | /// \return - The line ending character sequence. |

818 | [[nodiscard]] StringRef detectEOL() const { |

819 | size_t Pos = find(C: '\r'); |

820 | if (Pos == npos) { |

821 | // If there is no carriage return, assume unix |

822 | return "\n"; |

823 | } |

824 | if (Pos + 1 < Length && Data[Pos + 1] == '\n') |

825 | return "\r\n"; // Windows |

826 | if (Pos > 0 && Data[Pos - 1] == '\n') |

827 | return "\n\r"; // You monster! |

828 | return "\r"; // Classic Mac |

829 | } |

830 | /// @} |

831 | }; |

832 | |

833 | /// A wrapper around a string literal that serves as a proxy for constructing |

834 | /// global tables of StringRefs with the length computed at compile time. |

835 | /// In order to avoid the invocation of a global constructor, StringLiteral |

836 | /// should *only* be used in a constexpr context, as such: |

837 | /// |

838 | /// constexpr StringLiteral S("test"); |

839 | /// |

840 | class StringLiteral : public StringRef { |

841 | private: |

842 | constexpr StringLiteral(const char *Str, size_t N) : StringRef(Str, N) { |

843 | } |

844 | |

845 | public: |

846 | template <size_t N> |

847 | constexpr StringLiteral(const char (&Str)[N]) |

848 | #if defined(__clang__) && __has_attribute(enable_if) |

849 | #pragma clang diagnostic push |

850 | #pragma clang diagnostic ignored "-Wgcc-compat" |

851 | __attribute((enable_if(__builtin_strlen(Str) == N - 1, |

852 | "invalid string literal"))) |

853 | #pragma clang diagnostic pop |

854 | #endif |

855 | : StringRef(Str, N - 1) { |

856 | } |

857 | |

858 | // Explicit construction for strings like "foo\0bar". |

859 | template <size_t N> |

860 | static constexpr StringLiteral withInnerNUL(const char (&Str)[N]) { |

861 | return StringLiteral(Str, N - 1); |

862 | } |

863 | }; |

864 | |

865 | /// @name StringRef Comparison Operators |

866 | /// @{ |

867 | |

868 | inline bool operator==(StringRef LHS, StringRef RHS) { |

869 | return LHS.equals(RHS); |

870 | } |

871 | |

872 | inline bool operator!=(StringRef LHS, StringRef RHS) { return !(LHS == RHS); } |

873 | |

874 | inline bool operator<(StringRef LHS, StringRef RHS) { |

875 | return LHS.compare(RHS) < 0; |

876 | } |

877 | |

878 | inline bool operator<=(StringRef LHS, StringRef RHS) { |

879 | return LHS.compare(RHS) <= 0; |

880 | } |

881 | |

882 | inline bool operator>(StringRef LHS, StringRef RHS) { |

883 | return LHS.compare(RHS) > 0; |

884 | } |

885 | |

886 | inline bool operator>=(StringRef LHS, StringRef RHS) { |

887 | return LHS.compare(RHS) >= 0; |

888 | } |

889 | |

890 | inline std::string &operator+=(std::string &buffer, StringRef string) { |

891 | return buffer.append(s: string.data(), n: string.size()); |

892 | } |

893 | |

894 | /// @} |

895 | |

896 | /// Compute a hash_code for a StringRef. |

897 | [[nodiscard]] hash_code hash_value(StringRef S); |

898 | |

899 | // Provide DenseMapInfo for StringRefs. |

900 | template <> struct DenseMapInfo<StringRef, void> { |

901 | static inline StringRef getEmptyKey() { |

902 | return StringRef( |

903 | reinterpret_cast<const char *>(~static_cast<uintptr_t>(0)), 0); |

904 | } |

905 | |

906 | static inline StringRef getTombstoneKey() { |

907 | return StringRef( |

908 | reinterpret_cast<const char *>(~static_cast<uintptr_t>(1)), 0); |

909 | } |

910 | |

911 | static unsigned getHashValue(StringRef Val); |

912 | |

913 | static bool isEqual(StringRef LHS, StringRef RHS) { |

914 | if (RHS.data() == getEmptyKey().data()) |

915 | return LHS.data() == getEmptyKey().data(); |

916 | if (RHS.data() == getTombstoneKey().data()) |

917 | return LHS.data() == getTombstoneKey().data(); |

918 | return LHS == RHS; |

919 | } |

920 | }; |

921 | |

922 | } // end namespace llvm |

923 | |

924 | #endif // LLVM_ADT_STRINGREF_H |

925 |