1 | //===-- ConstString.h -------------------------------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #ifndef LLDB_UTILITY_CONSTSTRING_H |
10 | #define LLDB_UTILITY_CONSTSTRING_H |
11 | |
12 | #include "llvm/ADT/DenseMapInfo.h" |
13 | #include "llvm/ADT/StringRef.h" |
14 | #include "llvm/Support/FormatVariadic.h" |
15 | |
16 | #include <cstddef> |
17 | #include <string_view> |
18 | |
19 | namespace lldb_private { |
20 | class Stream; |
21 | } |
22 | namespace llvm { |
23 | class raw_ostream; |
24 | } |
25 | |
26 | namespace lldb_private { |
27 | |
28 | /// \class ConstString ConstString.h "lldb/Utility/ConstString.h" |
29 | /// A uniqued constant string class. |
30 | /// |
31 | /// Provides an efficient way to store strings as uniqued strings. After the |
32 | /// strings are uniqued, finding strings that are equal to one another is very |
33 | /// fast as just the pointers need to be compared. It also allows for many |
34 | /// common strings from many different sources to be shared to keep the memory |
35 | /// footprint low. |
36 | /// |
37 | /// No reference counting is done on strings that are added to the string |
38 | /// pool, once strings are added they are in the string pool for the life of |
39 | /// the program. |
40 | class ConstString { |
41 | public: |
42 | /// Default constructor |
43 | /// |
44 | /// Initializes the string to an empty string. |
45 | ConstString() = default; |
46 | |
47 | explicit ConstString(llvm::StringRef s); |
48 | |
49 | /// Construct with C String value |
50 | /// |
51 | /// Constructs this object with a C string by looking to see if the |
52 | /// C string already exists in the global string pool. If it doesn't |
53 | /// exist, it is added to the string pool. |
54 | /// |
55 | /// \param[in] cstr |
56 | /// A NULL terminated C string to add to the string pool. |
57 | explicit ConstString(const char *cstr); |
58 | |
59 | /// Construct with C String value with max length |
60 | /// |
61 | /// Constructs this object with a C string with a length. If \a max_cstr_len |
62 | /// is greater than the actual length of the string, the string length will |
63 | /// be truncated. This allows substrings to be created without the need to |
64 | /// NULL terminate the string as it is passed into this function. |
65 | /// |
66 | /// \param[in] cstr |
67 | /// A pointer to the first character in the C string. The C |
68 | /// string can be NULL terminated in a buffer that contains |
69 | /// more characters than the length of the string, or the |
70 | /// string can be part of another string and a new substring |
71 | /// can be created. |
72 | /// |
73 | /// \param[in] max_cstr_len |
74 | /// The max length of \a cstr. If the string length of \a cstr |
75 | /// is less than \a max_cstr_len, then the string will be |
76 | /// truncated. If the string length of \a cstr is greater than |
77 | /// \a max_cstr_len, then only max_cstr_len bytes will be used |
78 | /// from \a cstr. |
79 | explicit ConstString(const char *cstr, size_t max_cstr_len); |
80 | |
81 | /// Convert to bool operator. |
82 | /// |
83 | /// This allows code to check a ConstString object to see if it contains a |
84 | /// valid string using code such as: |
85 | /// |
86 | /// \code |
87 | /// ConstString str(...); |
88 | /// if (str) |
89 | /// { ... |
90 | /// \endcode |
91 | /// |
92 | /// \return |
93 | /// /b True this object contains a valid non-empty C string, \b |
94 | /// false otherwise. |
95 | explicit operator bool() const { return !IsEmpty(); } |
96 | |
97 | /// Equal to operator |
98 | /// |
99 | /// Returns true if this string is equal to the string in \a rhs. This |
100 | /// operation is very fast as it results in a pointer comparison since all |
101 | /// strings are in a uniqued in a global string pool. |
102 | /// |
103 | /// \param[in] rhs |
104 | /// Another string object to compare this object to. |
105 | /// |
106 | /// \return |
107 | /// true if this object is equal to \a rhs. |
108 | /// false if this object is not equal to \a rhs. |
109 | bool operator==(ConstString rhs) const { |
110 | // We can do a pointer compare to compare these strings since they must |
111 | // come from the same pool in order to be equal. |
112 | return m_string == rhs.m_string; |
113 | } |
114 | |
115 | /// Equal to operator against a non-ConstString value. |
116 | /// |
117 | /// Returns true if this string is equal to the string in \a rhs. This |
118 | /// overload is usually slower than comparing against a ConstString value. |
119 | /// However, if the rhs string not already a ConstString and it is impractical |
120 | /// to turn it into a non-temporary variable, then this overload is faster. |
121 | /// |
122 | /// \param[in] rhs |
123 | /// Another string object to compare this object to. |
124 | /// |
125 | /// \return |
126 | /// \b true if this object is equal to \a rhs. |
127 | /// \b false if this object is not equal to \a rhs. |
128 | bool operator==(const char *rhs) const { |
129 | // ConstString differentiates between empty strings and nullptr strings, but |
130 | // StringRef doesn't. Therefore we have to do this check manually now. |
131 | if (m_string == nullptr && rhs != nullptr) |
132 | return false; |
133 | if (m_string != nullptr && rhs == nullptr) |
134 | return false; |
135 | |
136 | return GetStringRef() == rhs; |
137 | } |
138 | |
139 | /// Not equal to operator |
140 | /// |
141 | /// Returns true if this string is not equal to the string in \a rhs. This |
142 | /// operation is very fast as it results in a pointer comparison since all |
143 | /// strings are in a uniqued in a global string pool. |
144 | /// |
145 | /// \param[in] rhs |
146 | /// Another string object to compare this object to. |
147 | /// |
148 | /// \return |
149 | /// \b true if this object is not equal to \a rhs. |
150 | /// \b false if this object is equal to \a rhs. |
151 | bool operator!=(ConstString rhs) const { return m_string != rhs.m_string; } |
152 | |
153 | /// Not equal to operator against a non-ConstString value. |
154 | /// |
155 | /// Returns true if this string is not equal to the string in \a rhs. This |
156 | /// overload is usually slower than comparing against a ConstString value. |
157 | /// However, if the rhs string not already a ConstString and it is impractical |
158 | /// to turn it into a non-temporary variable, then this overload is faster. |
159 | /// |
160 | /// \param[in] rhs |
161 | /// Another string object to compare this object to. |
162 | /// |
163 | /// \return \b true if this object is not equal to \a rhs, false otherwise. |
164 | bool operator!=(const char *rhs) const { return !(*this == rhs); } |
165 | |
166 | bool operator<(ConstString rhs) const; |
167 | |
168 | // Implicitly convert \class ConstString instances to \class StringRef. |
169 | operator llvm::StringRef() const { return GetStringRef(); } |
170 | |
171 | // Implicitly convert \class ConstString instances to \class std::string_view. |
172 | operator std::string_view() const { |
173 | return std::string_view(m_string, GetLength()); |
174 | } |
175 | |
176 | // Explicitly convert \class ConstString instances to \class std::string. |
177 | explicit operator std::string() const { return GetString(); } |
178 | |
179 | /// Get the string value as a C string. |
180 | /// |
181 | /// Get the value of the contained string as a NULL terminated C string |
182 | /// value. |
183 | /// |
184 | /// If \a value_if_empty is nullptr, then nullptr will be returned. |
185 | /// |
186 | /// \return Returns \a value_if_empty if the string is empty, otherwise |
187 | /// the C string value contained in this object. |
188 | const char *AsCString(const char *value_if_empty = nullptr) const { |
189 | return (IsEmpty() ? value_if_empty : m_string); |
190 | } |
191 | |
192 | /// Get the string value as a llvm::StringRef |
193 | /// |
194 | /// \return |
195 | /// Returns a new llvm::StringRef object filled in with the |
196 | /// needed data. |
197 | llvm::StringRef GetStringRef() const { |
198 | return llvm::StringRef(m_string, GetLength()); |
199 | } |
200 | |
201 | /// Get the string value as a std::string |
202 | std::string GetString() const { return std::string(m_string, GetLength()); } |
203 | |
204 | /// Get the string value as a C string. |
205 | /// |
206 | /// Get the value of the contained string as a NULL terminated C string |
207 | /// value. Similar to the ConstString::AsCString() function, yet this |
208 | /// function will always return nullptr if the string is not valid. So this |
209 | /// function is a direct accessor to the string pointer value. |
210 | /// |
211 | /// \return |
212 | /// Returns nullptr the string is invalid, otherwise the C string |
213 | /// value contained in this object. |
214 | const char *GetCString() const { return m_string; } |
215 | |
216 | /// Get the length in bytes of string value. |
217 | /// |
218 | /// The string pool stores the length of the string, so we can avoid calling |
219 | /// strlen() on the pointer value with this function. |
220 | /// |
221 | /// \return |
222 | /// Returns the number of bytes that this string occupies in |
223 | /// memory, not including the NULL termination byte. |
224 | size_t GetLength() const; |
225 | |
226 | /// Clear this object's state. |
227 | /// |
228 | /// Clear any contained string and reset the value to the empty string |
229 | /// value. |
230 | void Clear() { m_string = nullptr; } |
231 | |
232 | /// Equal to operator |
233 | /// |
234 | /// Returns true if this string is equal to the string in \a rhs. If case |
235 | /// sensitive equality is tested, this operation is very fast as it results |
236 | /// in a pointer comparison since all strings are in a uniqued in a global |
237 | /// string pool. |
238 | /// |
239 | /// \param[in] lhs |
240 | /// The Left Hand Side const ConstString object reference. |
241 | /// |
242 | /// \param[in] rhs |
243 | /// The Right Hand Side const ConstString object reference. |
244 | /// |
245 | /// \param[in] case_sensitive |
246 | /// Case sensitivity. If true, case sensitive equality |
247 | /// will be tested, otherwise character case will be ignored |
248 | /// |
249 | /// \return \b true if this object is equal to \a rhs, \b false otherwise. |
250 | static bool Equals(ConstString lhs, ConstString rhs, |
251 | const bool case_sensitive = true); |
252 | |
253 | /// Compare two string objects. |
254 | /// |
255 | /// Compares the C string values contained in \a lhs and \a rhs and returns |
256 | /// an integer result. |
257 | /// |
258 | /// NOTE: only call this function when you want a true string |
259 | /// comparison. If you want string equality use the, use the == operator as |
260 | /// it is much more efficient. Also if you want string inequality, use the |
261 | /// != operator for the same reasons. |
262 | /// |
263 | /// \param[in] lhs |
264 | /// The Left Hand Side const ConstString object reference. |
265 | /// |
266 | /// \param[in] rhs |
267 | /// The Right Hand Side const ConstString object reference. |
268 | /// |
269 | /// \param[in] case_sensitive |
270 | /// Case sensitivity of compare. If true, case sensitive compare |
271 | /// will be performed, otherwise character case will be ignored |
272 | /// |
273 | /// \return -1 if lhs < rhs, 0 if lhs == rhs, 1 if lhs > rhs |
274 | static int Compare(ConstString lhs, ConstString rhs, |
275 | const bool case_sensitive = true); |
276 | |
277 | /// Dump the object description to a stream. |
278 | /// |
279 | /// Dump the string value to the stream \a s. If the contained string is |
280 | /// empty, print \a value_if_empty to the stream instead. If \a |
281 | /// value_if_empty is nullptr, then nothing will be dumped to the stream. |
282 | /// |
283 | /// \param[in] s |
284 | /// The stream that will be used to dump the object description. |
285 | /// |
286 | /// \param[in] value_if_empty |
287 | /// The value to dump if the string is empty. If nullptr, nothing |
288 | /// will be output to the stream. |
289 | void Dump(Stream *s, const char *value_if_empty = nullptr) const; |
290 | |
291 | /// Dump the object debug description to a stream. |
292 | /// |
293 | /// \param[in] s |
294 | /// The stream that will be used to dump the object description. |
295 | void DumpDebug(Stream *s) const; |
296 | |
297 | /// Test for empty string. |
298 | /// |
299 | /// \return |
300 | /// \b true if the contained string is empty. |
301 | /// \b false if the contained string is not empty. |
302 | bool IsEmpty() const { return m_string == nullptr || m_string[0] == '\0'; } |
303 | |
304 | /// Test for null string. |
305 | /// |
306 | /// \return |
307 | /// \b true if there is no string associated with this instance. |
308 | /// \b false if there is a string associated with this instance. |
309 | bool IsNull() const { return m_string == nullptr; } |
310 | |
311 | /// Set the C string value. |
312 | /// |
313 | /// Set the string value in the object by uniquing the \a cstr string value |
314 | /// in our global string pool. |
315 | /// |
316 | /// If the C string already exists in the global string pool, it finds the |
317 | /// current entry and returns the existing value. If it doesn't exist, it is |
318 | /// added to the string pool. |
319 | /// |
320 | /// \param[in] cstr |
321 | /// A NULL terminated C string to add to the string pool. |
322 | void SetCString(const char *cstr); |
323 | |
324 | void SetString(llvm::StringRef s); |
325 | |
326 | /// Set the C string value and its mangled counterpart. |
327 | /// |
328 | /// Object files and debug symbols often use mangled string to represent the |
329 | /// linkage name for a symbol, function or global. The string pool can |
330 | /// efficiently store these values and their counterparts so when we run |
331 | /// into another instance of a mangled name, we can avoid calling the name |
332 | /// demangler over and over on the same strings and then trying to unique |
333 | /// them. |
334 | /// |
335 | /// \param[in] demangled |
336 | /// The demangled string to correlate with the \a mangled name. |
337 | /// |
338 | /// \param[in] mangled |
339 | /// The already uniqued mangled ConstString to correlate the |
340 | /// soon to be uniqued version of \a demangled. |
341 | void SetStringWithMangledCounterpart(llvm::StringRef demangled, |
342 | ConstString mangled); |
343 | |
344 | /// Retrieve the mangled or demangled counterpart for a mangled or demangled |
345 | /// ConstString. |
346 | /// |
347 | /// Object files and debug symbols often use mangled string to represent the |
348 | /// linkage name for a symbol, function or global. The string pool can |
349 | /// efficiently store these values and their counterparts so when we run |
350 | /// into another instance of a mangled name, we can avoid calling the name |
351 | /// demangler over and over on the same strings and then trying to unique |
352 | /// them. |
353 | /// |
354 | /// \param[in] counterpart |
355 | /// A reference to a ConstString object that might get filled in |
356 | /// with the demangled/mangled counterpart. |
357 | /// |
358 | /// \return |
359 | /// /b True if \a counterpart was filled in with the counterpart |
360 | /// /b false otherwise. |
361 | bool GetMangledCounterpart(ConstString &counterpart) const; |
362 | |
363 | /// Set the C string value with length. |
364 | /// |
365 | /// Set the string value in the object by uniquing \a cstr_len bytes |
366 | /// starting at the \a cstr string value in our global string pool. If trim |
367 | /// is true, then \a cstr_len indicates a maximum length of the CString and |
368 | /// if the actual length of the string is less, then it will be trimmed. |
369 | /// |
370 | /// If the C string already exists in the global string pool, it finds the |
371 | /// current entry and returns the existing value. If it doesn't exist, it is |
372 | /// added to the string pool. |
373 | /// |
374 | /// \param[in] cstr |
375 | /// A NULL terminated C string to add to the string pool. |
376 | /// |
377 | /// \param[in] cstr_len |
378 | /// The maximum length of the C string. |
379 | void SetCStringWithLength(const char *cstr, size_t cstr_len); |
380 | |
381 | /// Set the C string value with the minimum length between \a fixed_cstr_len |
382 | /// and the actual length of the C string. This can be used for data |
383 | /// structures that have a fixed length to store a C string where the string |
384 | /// might not be NULL terminated if the string takes the entire buffer. |
385 | void SetTrimmedCStringWithLength(const char *cstr, size_t fixed_cstr_len); |
386 | |
387 | /// Get the memory cost of this object. |
388 | /// |
389 | /// Return the size in bytes that this object takes in memory. This returns |
390 | /// the size in bytes of this object, which does not include any the shared |
391 | /// string values it may refer to. |
392 | /// |
393 | /// \return |
394 | /// The number of bytes that this object occupies in memory. |
395 | size_t MemorySize() const { return sizeof(ConstString); } |
396 | |
397 | struct MemoryStats { |
398 | size_t GetBytesTotal() const { return bytes_total; } |
399 | size_t GetBytesUsed() const { return bytes_used; } |
400 | size_t GetBytesUnused() const { return bytes_total - bytes_used; } |
401 | size_t bytes_total = 0; |
402 | size_t bytes_used = 0; |
403 | }; |
404 | |
405 | static MemoryStats GetMemoryStats(); |
406 | |
407 | protected: |
408 | template <typename T, typename Enable> friend struct ::llvm::DenseMapInfo; |
409 | /// Only used by DenseMapInfo. |
410 | static ConstString FromStringPoolPointer(const char *ptr) { |
411 | ConstString s; |
412 | s.m_string = ptr; |
413 | return s; |
414 | }; |
415 | |
416 | const char *m_string = nullptr; |
417 | }; |
418 | |
419 | /// Stream the string value \a str to the stream \a s |
420 | Stream &operator<<(Stream &s, ConstString str); |
421 | |
422 | } // namespace lldb_private |
423 | |
424 | namespace llvm { |
425 | template <> struct format_provider<lldb_private::ConstString> { |
426 | static void format(const lldb_private::ConstString &CS, llvm::raw_ostream &OS, |
427 | llvm::StringRef Options); |
428 | }; |
429 | |
430 | /// DenseMapInfo implementation. |
431 | /// \{ |
432 | template <> struct DenseMapInfo<lldb_private::ConstString> { |
433 | static inline lldb_private::ConstString getEmptyKey() { |
434 | return lldb_private::ConstString::FromStringPoolPointer( |
435 | ptr: DenseMapInfo<const char *>::getEmptyKey()); |
436 | } |
437 | static inline lldb_private::ConstString getTombstoneKey() { |
438 | return lldb_private::ConstString::FromStringPoolPointer( |
439 | ptr: DenseMapInfo<const char *>::getTombstoneKey()); |
440 | } |
441 | static unsigned getHashValue(lldb_private::ConstString val) { |
442 | return DenseMapInfo<const char *>::getHashValue(PtrVal: val.m_string); |
443 | } |
444 | static bool isEqual(lldb_private::ConstString LHS, |
445 | lldb_private::ConstString RHS) { |
446 | return LHS == RHS; |
447 | } |
448 | }; |
449 | /// \} |
450 | |
451 | inline raw_ostream &operator<<(raw_ostream &os, lldb_private::ConstString s) { |
452 | os << s.GetStringRef(); |
453 | return os; |
454 | } |
455 | } // namespace llvm |
456 | |
457 | #endif // LLDB_UTILITY_CONSTSTRING_H |
458 | |