1 | //===- Twine.h - Fast Temporary String Concatenation ------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #ifndef LLVM_ADT_TWINE_H |
10 | #define LLVM_ADT_TWINE_H |
11 | |
12 | #include "llvm/ADT/SmallVector.h" |
13 | #include "llvm/ADT/StringRef.h" |
14 | #include "llvm/Support/ErrorHandling.h" |
15 | #include <cassert> |
16 | #include <cstdint> |
17 | #include <string> |
18 | #include <string_view> |
19 | |
20 | namespace llvm { |
21 | |
22 | class formatv_object_base; |
23 | class raw_ostream; |
24 | |
25 | /// Twine - A lightweight data structure for efficiently representing the |
26 | /// concatenation of temporary values as strings. |
27 | /// |
28 | /// A Twine is a kind of rope, it represents a concatenated string using a |
29 | /// binary-tree, where the string is the preorder of the nodes. Since the |
30 | /// Twine can be efficiently rendered into a buffer when its result is used, |
31 | /// it avoids the cost of generating temporary values for intermediate string |
32 | /// results -- particularly in cases when the Twine result is never |
33 | /// required. By explicitly tracking the type of leaf nodes, we can also avoid |
34 | /// the creation of temporary strings for conversions operations (such as |
35 | /// appending an integer to a string). |
36 | /// |
37 | /// A Twine is not intended for use directly and should not be stored, its |
38 | /// implementation relies on the ability to store pointers to temporary stack |
39 | /// objects which may be deallocated at the end of a statement. Twines should |
40 | /// only be used as const references in arguments, when an API wishes |
41 | /// to accept possibly-concatenated strings. |
42 | /// |
43 | /// Twines support a special 'null' value, which always concatenates to form |
44 | /// itself, and renders as an empty string. This can be returned from APIs to |
45 | /// effectively nullify any concatenations performed on the result. |
46 | /// |
47 | /// \b Implementation |
48 | /// |
49 | /// Given the nature of a Twine, it is not possible for the Twine's |
50 | /// concatenation method to construct interior nodes; the result must be |
51 | /// represented inside the returned value. For this reason a Twine object |
52 | /// actually holds two values, the left- and right-hand sides of a |
53 | /// concatenation. We also have nullary Twine objects, which are effectively |
54 | /// sentinel values that represent empty strings. |
55 | /// |
56 | /// Thus, a Twine can effectively have zero, one, or two children. The \see |
57 | /// isNullary(), \see isUnary(), and \see isBinary() predicates exist for |
58 | /// testing the number of children. |
59 | /// |
60 | /// We maintain a number of invariants on Twine objects (FIXME: Why): |
61 | /// - Nullary twines are always represented with their Kind on the left-hand |
62 | /// side, and the Empty kind on the right-hand side. |
63 | /// - Unary twines are always represented with the value on the left-hand |
64 | /// side, and the Empty kind on the right-hand side. |
65 | /// - If a Twine has another Twine as a child, that child should always be |
66 | /// binary (otherwise it could have been folded into the parent). |
67 | /// |
68 | /// These invariants are check by \see isValid(). |
69 | /// |
70 | /// \b Efficiency Considerations |
71 | /// |
72 | /// The Twine is designed to yield efficient and small code for common |
73 | /// situations. For this reason, the concat() method is inlined so that |
74 | /// concatenations of leaf nodes can be optimized into stores directly into a |
75 | /// single stack allocated object. |
76 | /// |
77 | /// In practice, not all compilers can be trusted to optimize concat() fully, |
78 | /// so we provide two additional methods (and accompanying operator+ |
79 | /// overloads) to guarantee that particularly important cases (cstring plus |
80 | /// StringRef) codegen as desired. |
81 | class Twine { |
82 | /// NodeKind - Represent the type of an argument. |
83 | enum NodeKind : unsigned char { |
84 | /// An empty string; the result of concatenating anything with it is also |
85 | /// empty. |
86 | NullKind, |
87 | |
88 | /// The empty string. |
89 | EmptyKind, |
90 | |
91 | /// A pointer to a Twine instance. |
92 | TwineKind, |
93 | |
94 | /// A pointer to a C string instance. |
95 | CStringKind, |
96 | |
97 | /// A pointer to an std::string instance. |
98 | StdStringKind, |
99 | |
100 | /// A Pointer and Length representation. Used for std::string_view, |
101 | /// StringRef, and SmallString. Can't use a StringRef here |
102 | /// because they are not trivally constructible. |
103 | PtrAndLengthKind, |
104 | |
105 | /// A pointer and length representation that's also null-terminated. |
106 | /// Guaranteed to be constructed from a compile-time string literal. |
107 | StringLiteralKind, |
108 | |
109 | /// A pointer to a formatv_object_base instance. |
110 | FormatvObjectKind, |
111 | |
112 | /// A char value, to render as a character. |
113 | CharKind, |
114 | |
115 | /// An unsigned int value, to render as an unsigned decimal integer. |
116 | DecUIKind, |
117 | |
118 | /// An int value, to render as a signed decimal integer. |
119 | DecIKind, |
120 | |
121 | /// A pointer to an unsigned long value, to render as an unsigned decimal |
122 | /// integer. |
123 | DecULKind, |
124 | |
125 | /// A pointer to a long value, to render as a signed decimal integer. |
126 | DecLKind, |
127 | |
128 | /// A pointer to an unsigned long long value, to render as an unsigned |
129 | /// decimal integer. |
130 | DecULLKind, |
131 | |
132 | /// A pointer to a long long value, to render as a signed decimal integer. |
133 | DecLLKind, |
134 | |
135 | /// A pointer to a uint64_t value, to render as an unsigned hexadecimal |
136 | /// integer. |
137 | UHexKind |
138 | }; |
139 | |
140 | union Child |
141 | { |
142 | const Twine *twine; |
143 | const char *cString; |
144 | const std::string *stdString; |
145 | struct { |
146 | const char *ptr; |
147 | size_t length; |
148 | } ptrAndLength; |
149 | const formatv_object_base *formatvObject; |
150 | char character; |
151 | unsigned int decUI; |
152 | int decI; |
153 | const unsigned long *decUL; |
154 | const long *decL; |
155 | const unsigned long long *decULL; |
156 | const long long *decLL; |
157 | const uint64_t *uHex; |
158 | }; |
159 | |
160 | /// LHS - The prefix in the concatenation, which may be uninitialized for |
161 | /// Null or Empty kinds. |
162 | Child LHS; |
163 | |
164 | /// RHS - The suffix in the concatenation, which may be uninitialized for |
165 | /// Null or Empty kinds. |
166 | Child RHS; |
167 | |
168 | /// LHSKind - The NodeKind of the left hand side, \see getLHSKind(). |
169 | NodeKind LHSKind = EmptyKind; |
170 | |
171 | /// RHSKind - The NodeKind of the right hand side, \see getRHSKind(). |
172 | NodeKind RHSKind = EmptyKind; |
173 | |
174 | /// Construct a nullary twine; the kind must be NullKind or EmptyKind. |
175 | explicit Twine(NodeKind Kind) : LHSKind(Kind) { |
176 | assert(isNullary() && "Invalid kind!" ); |
177 | } |
178 | |
179 | /// Construct a binary twine. |
180 | explicit Twine(const Twine &LHS, const Twine &RHS) |
181 | : LHSKind(TwineKind), RHSKind(TwineKind) { |
182 | this->LHS.twine = &LHS; |
183 | this->RHS.twine = &RHS; |
184 | assert(isValid() && "Invalid twine!" ); |
185 | } |
186 | |
187 | /// Construct a twine from explicit values. |
188 | explicit Twine(Child LHS, NodeKind LHSKind, Child RHS, NodeKind RHSKind) |
189 | : LHS(LHS), RHS(RHS), LHSKind(LHSKind), RHSKind(RHSKind) { |
190 | assert(isValid() && "Invalid twine!" ); |
191 | } |
192 | |
193 | /// Check for the null twine. |
194 | bool isNull() const { |
195 | return getLHSKind() == NullKind; |
196 | } |
197 | |
198 | /// Check for the empty twine. |
199 | bool isEmpty() const { |
200 | return getLHSKind() == EmptyKind; |
201 | } |
202 | |
203 | /// Check if this is a nullary twine (null or empty). |
204 | bool isNullary() const { |
205 | return isNull() || isEmpty(); |
206 | } |
207 | |
208 | /// Check if this is a unary twine. |
209 | bool isUnary() const { |
210 | return getRHSKind() == EmptyKind && !isNullary(); |
211 | } |
212 | |
213 | /// Check if this is a binary twine. |
214 | bool isBinary() const { |
215 | return getLHSKind() != NullKind && getRHSKind() != EmptyKind; |
216 | } |
217 | |
218 | /// Check if this is a valid twine (satisfying the invariants on |
219 | /// order and number of arguments). |
220 | bool isValid() const { |
221 | // Nullary twines always have Empty on the RHS. |
222 | if (isNullary() && getRHSKind() != EmptyKind) |
223 | return false; |
224 | |
225 | // Null should never appear on the RHS. |
226 | if (getRHSKind() == NullKind) |
227 | return false; |
228 | |
229 | // The RHS cannot be non-empty if the LHS is empty. |
230 | if (getRHSKind() != EmptyKind && getLHSKind() == EmptyKind) |
231 | return false; |
232 | |
233 | // A twine child should always be binary. |
234 | if (getLHSKind() == TwineKind && |
235 | !LHS.twine->isBinary()) |
236 | return false; |
237 | if (getRHSKind() == TwineKind && |
238 | !RHS.twine->isBinary()) |
239 | return false; |
240 | |
241 | return true; |
242 | } |
243 | |
244 | /// Get the NodeKind of the left-hand side. |
245 | NodeKind getLHSKind() const { return LHSKind; } |
246 | |
247 | /// Get the NodeKind of the right-hand side. |
248 | NodeKind getRHSKind() const { return RHSKind; } |
249 | |
250 | /// Print one child from a twine. |
251 | void printOneChild(raw_ostream &OS, Child Ptr, NodeKind Kind) const; |
252 | |
253 | /// Print the representation of one child from a twine. |
254 | void printOneChildRepr(raw_ostream &OS, Child Ptr, |
255 | NodeKind Kind) const; |
256 | |
257 | public: |
258 | /// @name Constructors |
259 | /// @{ |
260 | |
261 | /// Construct from an empty string. |
262 | /*implicit*/ Twine() { |
263 | assert(isValid() && "Invalid twine!" ); |
264 | } |
265 | |
266 | Twine(const Twine &) = default; |
267 | |
268 | /// Construct from a C string. |
269 | /// |
270 | /// We take care here to optimize "" into the empty twine -- this will be |
271 | /// optimized out for string constants. This allows Twine arguments have |
272 | /// default "" values, without introducing unnecessary string constants. |
273 | /*implicit*/ Twine(const char *Str) { |
274 | if (Str[0] != '\0') { |
275 | LHS.cString = Str; |
276 | LHSKind = CStringKind; |
277 | } else |
278 | LHSKind = EmptyKind; |
279 | |
280 | assert(isValid() && "Invalid twine!" ); |
281 | } |
282 | /// Delete the implicit conversion from nullptr as Twine(const char *) |
283 | /// cannot take nullptr. |
284 | /*implicit*/ Twine(std::nullptr_t) = delete; |
285 | |
286 | /// Construct from an std::string. |
287 | /*implicit*/ Twine(const std::string &Str) : LHSKind(StdStringKind) { |
288 | LHS.stdString = &Str; |
289 | assert(isValid() && "Invalid twine!" ); |
290 | } |
291 | |
292 | /// Construct from an std::string_view by converting it to a pointer and |
293 | /// length. This handles string_views on a pure API basis, and avoids |
294 | /// storing one (or a pointer to one) inside a Twine, which avoids problems |
295 | /// when mixing code compiled under various C++ standards. |
296 | /*implicit*/ Twine(const std::string_view &Str) |
297 | : LHSKind(PtrAndLengthKind) { |
298 | LHS.ptrAndLength.ptr = Str.data(); |
299 | LHS.ptrAndLength.length = Str.length(); |
300 | assert(isValid() && "Invalid twine!" ); |
301 | } |
302 | |
303 | /// Construct from a StringRef. |
304 | /*implicit*/ Twine(const StringRef &Str) : LHSKind(PtrAndLengthKind) { |
305 | LHS.ptrAndLength.ptr = Str.data(); |
306 | LHS.ptrAndLength.length = Str.size(); |
307 | assert(isValid() && "Invalid twine!" ); |
308 | } |
309 | |
310 | /// Construct from a StringLiteral. |
311 | /*implicit*/ Twine(const StringLiteral &Str) |
312 | : LHSKind(StringLiteralKind) { |
313 | LHS.ptrAndLength.ptr = Str.data(); |
314 | LHS.ptrAndLength.length = Str.size(); |
315 | assert(isValid() && "Invalid twine!" ); |
316 | } |
317 | |
318 | /// Construct from a SmallString. |
319 | /*implicit*/ Twine(const SmallVectorImpl<char> &Str) |
320 | : LHSKind(PtrAndLengthKind) { |
321 | LHS.ptrAndLength.ptr = Str.data(); |
322 | LHS.ptrAndLength.length = Str.size(); |
323 | assert(isValid() && "Invalid twine!" ); |
324 | } |
325 | |
326 | /// Construct from a formatv_object_base. |
327 | /*implicit*/ Twine(const formatv_object_base &Fmt) |
328 | : LHSKind(FormatvObjectKind) { |
329 | LHS.formatvObject = &Fmt; |
330 | assert(isValid() && "Invalid twine!" ); |
331 | } |
332 | |
333 | /// Construct from a char. |
334 | explicit Twine(char Val) : LHSKind(CharKind) { |
335 | LHS.character = Val; |
336 | } |
337 | |
338 | /// Construct from a signed char. |
339 | explicit Twine(signed char Val) : LHSKind(CharKind) { |
340 | LHS.character = static_cast<char>(Val); |
341 | } |
342 | |
343 | /// Construct from an unsigned char. |
344 | explicit Twine(unsigned char Val) : LHSKind(CharKind) { |
345 | LHS.character = static_cast<char>(Val); |
346 | } |
347 | |
348 | /// Construct a twine to print \p Val as an unsigned decimal integer. |
349 | explicit Twine(unsigned Val) : LHSKind(DecUIKind) { |
350 | LHS.decUI = Val; |
351 | } |
352 | |
353 | /// Construct a twine to print \p Val as a signed decimal integer. |
354 | explicit Twine(int Val) : LHSKind(DecIKind) { |
355 | LHS.decI = Val; |
356 | } |
357 | |
358 | /// Construct a twine to print \p Val as an unsigned decimal integer. |
359 | explicit Twine(const unsigned long &Val) : LHSKind(DecULKind) { |
360 | LHS.decUL = &Val; |
361 | } |
362 | |
363 | /// Construct a twine to print \p Val as a signed decimal integer. |
364 | explicit Twine(const long &Val) : LHSKind(DecLKind) { |
365 | LHS.decL = &Val; |
366 | } |
367 | |
368 | /// Construct a twine to print \p Val as an unsigned decimal integer. |
369 | explicit Twine(const unsigned long long &Val) : LHSKind(DecULLKind) { |
370 | LHS.decULL = &Val; |
371 | } |
372 | |
373 | /// Construct a twine to print \p Val as a signed decimal integer. |
374 | explicit Twine(const long long &Val) : LHSKind(DecLLKind) { |
375 | LHS.decLL = &Val; |
376 | } |
377 | |
378 | // FIXME: Unfortunately, to make sure this is as efficient as possible we |
379 | // need extra binary constructors from particular types. We can't rely on |
380 | // the compiler to be smart enough to fold operator+()/concat() down to the |
381 | // right thing. Yet. |
382 | |
383 | /// Construct as the concatenation of a C string and a StringRef. |
384 | /*implicit*/ Twine(const char *LHS, const StringRef &RHS) |
385 | : LHSKind(CStringKind), RHSKind(PtrAndLengthKind) { |
386 | this->LHS.cString = LHS; |
387 | this->RHS.ptrAndLength.ptr = RHS.data(); |
388 | this->RHS.ptrAndLength.length = RHS.size(); |
389 | assert(isValid() && "Invalid twine!" ); |
390 | } |
391 | |
392 | /// Construct as the concatenation of a StringRef and a C string. |
393 | /*implicit*/ Twine(const StringRef &LHS, const char *RHS) |
394 | : LHSKind(PtrAndLengthKind), RHSKind(CStringKind) { |
395 | this->LHS.ptrAndLength.ptr = LHS.data(); |
396 | this->LHS.ptrAndLength.length = LHS.size(); |
397 | this->RHS.cString = RHS; |
398 | assert(isValid() && "Invalid twine!" ); |
399 | } |
400 | |
401 | /// Since the intended use of twines is as temporary objects, assignments |
402 | /// when concatenating might cause undefined behavior or stack corruptions |
403 | Twine &operator=(const Twine &) = delete; |
404 | |
405 | /// Create a 'null' string, which is an empty string that always |
406 | /// concatenates to form another empty string. |
407 | static Twine createNull() { |
408 | return Twine(NullKind); |
409 | } |
410 | |
411 | /// @} |
412 | /// @name Numeric Conversions |
413 | /// @{ |
414 | |
415 | // Construct a twine to print \p Val as an unsigned hexadecimal integer. |
416 | static Twine utohexstr(const uint64_t &Val) { |
417 | Child LHS, RHS; |
418 | LHS.uHex = &Val; |
419 | RHS.twine = nullptr; |
420 | return Twine(LHS, UHexKind, RHS, EmptyKind); |
421 | } |
422 | |
423 | /// @} |
424 | /// @name Predicate Operations |
425 | /// @{ |
426 | |
427 | /// Check if this twine is trivially empty; a false return value does not |
428 | /// necessarily mean the twine is empty. |
429 | bool isTriviallyEmpty() const { |
430 | return isNullary(); |
431 | } |
432 | |
433 | /// Check if this twine is guaranteed to refer to single string literal. |
434 | bool isSingleStringLiteral() const { |
435 | return isUnary() && getLHSKind() == StringLiteralKind; |
436 | } |
437 | |
438 | /// Return true if this twine can be dynamically accessed as a single |
439 | /// StringRef value with getSingleStringRef(). |
440 | bool isSingleStringRef() const { |
441 | if (getRHSKind() != EmptyKind) return false; |
442 | |
443 | switch (getLHSKind()) { |
444 | case EmptyKind: |
445 | case CStringKind: |
446 | case StdStringKind: |
447 | case PtrAndLengthKind: |
448 | case StringLiteralKind: |
449 | return true; |
450 | default: |
451 | return false; |
452 | } |
453 | } |
454 | |
455 | /// @} |
456 | /// @name String Operations |
457 | /// @{ |
458 | |
459 | Twine concat(const Twine &Suffix) const; |
460 | |
461 | /// @} |
462 | /// @name Output & Conversion. |
463 | /// @{ |
464 | |
465 | /// Return the twine contents as a std::string. |
466 | std::string str() const; |
467 | |
468 | /// Append the concatenated string into the given SmallString or SmallVector. |
469 | void toVector(SmallVectorImpl<char> &Out) const; |
470 | |
471 | /// This returns the twine as a single StringRef. This method is only valid |
472 | /// if isSingleStringRef() is true. |
473 | StringRef getSingleStringRef() const { |
474 | assert(isSingleStringRef() &&"This cannot be had as a single stringref!" ); |
475 | switch (getLHSKind()) { |
476 | default: llvm_unreachable("Out of sync with isSingleStringRef" ); |
477 | case EmptyKind: |
478 | return StringRef(); |
479 | case CStringKind: |
480 | return StringRef(LHS.cString); |
481 | case StdStringKind: |
482 | return StringRef(*LHS.stdString); |
483 | case PtrAndLengthKind: |
484 | case StringLiteralKind: |
485 | return StringRef(LHS.ptrAndLength.ptr, LHS.ptrAndLength.length); |
486 | } |
487 | } |
488 | |
489 | /// This returns the twine as a single StringRef if it can be |
490 | /// represented as such. Otherwise the twine is written into the given |
491 | /// SmallVector and a StringRef to the SmallVector's data is returned. |
492 | StringRef toStringRef(SmallVectorImpl<char> &Out) const { |
493 | if (isSingleStringRef()) |
494 | return getSingleStringRef(); |
495 | toVector(Out); |
496 | return StringRef(Out.data(), Out.size()); |
497 | } |
498 | |
499 | /// This returns the twine as a single null terminated StringRef if it |
500 | /// can be represented as such. Otherwise the twine is written into the |
501 | /// given SmallVector and a StringRef to the SmallVector's data is returned. |
502 | /// |
503 | /// The returned StringRef's size does not include the null terminator. |
504 | StringRef toNullTerminatedStringRef(SmallVectorImpl<char> &Out) const; |
505 | |
506 | /// Write the concatenated string represented by this twine to the |
507 | /// stream \p OS. |
508 | void print(raw_ostream &OS) const; |
509 | |
510 | /// Dump the concatenated string represented by this twine to stderr. |
511 | void dump() const; |
512 | |
513 | /// Write the representation of this twine to the stream \p OS. |
514 | void printRepr(raw_ostream &OS) const; |
515 | |
516 | /// Dump the representation of this twine to stderr. |
517 | void dumpRepr() const; |
518 | |
519 | /// @} |
520 | }; |
521 | |
522 | /// @name Twine Inline Implementations |
523 | /// @{ |
524 | |
525 | inline Twine Twine::concat(const Twine &Suffix) const { |
526 | // Concatenation with null is null. |
527 | if (isNull() || Suffix.isNull()) |
528 | return Twine(NullKind); |
529 | |
530 | // Concatenation with empty yields the other side. |
531 | if (isEmpty()) |
532 | return Suffix; |
533 | if (Suffix.isEmpty()) |
534 | return *this; |
535 | |
536 | // Otherwise we need to create a new node, taking care to fold in unary |
537 | // twines. |
538 | Child NewLHS, NewRHS; |
539 | NewLHS.twine = this; |
540 | NewRHS.twine = &Suffix; |
541 | NodeKind NewLHSKind = TwineKind, NewRHSKind = TwineKind; |
542 | if (isUnary()) { |
543 | NewLHS = LHS; |
544 | NewLHSKind = getLHSKind(); |
545 | } |
546 | if (Suffix.isUnary()) { |
547 | NewRHS = Suffix.LHS; |
548 | NewRHSKind = Suffix.getLHSKind(); |
549 | } |
550 | |
551 | return Twine(NewLHS, NewLHSKind, NewRHS, NewRHSKind); |
552 | } |
553 | |
554 | inline Twine operator+(const Twine &LHS, const Twine &RHS) { |
555 | return LHS.concat(Suffix: RHS); |
556 | } |
557 | |
558 | /// Additional overload to guarantee simplified codegen; this is equivalent to |
559 | /// concat(). |
560 | |
561 | inline Twine operator+(const char *LHS, const StringRef &RHS) { |
562 | return Twine(LHS, RHS); |
563 | } |
564 | |
565 | /// Additional overload to guarantee simplified codegen; this is equivalent to |
566 | /// concat(). |
567 | |
568 | inline Twine operator+(const StringRef &LHS, const char *RHS) { |
569 | return Twine(LHS, RHS); |
570 | } |
571 | |
572 | inline raw_ostream &operator<<(raw_ostream &OS, const Twine &RHS) { |
573 | RHS.print(OS); |
574 | return OS; |
575 | } |
576 | |
577 | /// @} |
578 | |
579 | } // end namespace llvm |
580 | |
581 | #endif // LLVM_ADT_TWINE_H |
582 | |