1 | //===- Format.h - Utilities for String Format -------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file declares utilities for formatting strings. They are specially |
10 | // tailored to the needs of TableGen'ing op definitions and rewrite rules, |
11 | // so they are not expected to be used as widely applicable utilities. |
12 | // |
13 | //===----------------------------------------------------------------------===// |
14 | |
15 | #ifndef MLIR_TABLEGEN_FORMAT_H_ |
16 | #define MLIR_TABLEGEN_FORMAT_H_ |
17 | |
18 | #include "mlir/Support/LLVM.h" |
19 | #include "llvm/ADT/DenseMap.h" |
20 | #include "llvm/ADT/StringMap.h" |
21 | #include "llvm/Support/FormatVariadic.h" |
22 | |
23 | namespace mlir { |
24 | namespace tblgen { |
25 | |
26 | /// Format context containing substitutions for special placeholders. |
27 | /// |
28 | /// This context divides special placeholders into two categories: builtin ones |
29 | /// and custom ones. |
30 | /// |
31 | /// Builtin placeholders are baked into `FmtContext` and each one of them has a |
32 | /// dedicated setter. They can be used in all dialects. Their names follow the |
33 | /// convention of `$_<name>`. The rationale of the leading underscore is to |
34 | /// avoid confusion and name collision: op arguments/attributes/results are |
35 | /// named as $<name>, and we can potentially support referencing those entities |
36 | /// directly in the format template in the future. |
37 | // |
38 | /// Custom ones are registered by dialect-specific TableGen backends and use the |
39 | /// same unified setter. |
40 | class FmtContext { |
41 | public: |
42 | // Placeholder kinds |
43 | enum class PHKind : char { |
44 | None, |
45 | Custom, // For custom placeholders |
46 | Builder, // For the $_builder placeholder |
47 | Self, // For the $_self placeholder |
48 | }; |
49 | |
50 | FmtContext() = default; |
51 | |
52 | // Create a format context with a list of substitutions. |
53 | FmtContext(ArrayRef<std::pair<StringRef, StringRef>> subs); |
54 | |
55 | // Setter for custom placeholders |
56 | FmtContext &addSubst(StringRef placeholder, const Twine &subst); |
57 | |
58 | // Setters for builtin placeholders |
59 | FmtContext &withBuilder(Twine subst); |
60 | FmtContext &withSelf(Twine subst); |
61 | |
62 | std::optional<StringRef> getSubstFor(PHKind placeholder) const; |
63 | std::optional<StringRef> getSubstFor(StringRef placeholder) const; |
64 | |
65 | static PHKind getPlaceHolderKind(StringRef str); |
66 | |
67 | private: |
68 | struct PHKindInfo : DenseMapInfo<PHKind> { |
69 | using CharInfo = DenseMapInfo<char>; |
70 | |
71 | static inline PHKind getEmptyKey() { |
72 | return static_cast<PHKind>(CharInfo::getEmptyKey()); |
73 | } |
74 | static inline PHKind getTombstoneKey() { |
75 | return static_cast<PHKind>(CharInfo::getTombstoneKey()); |
76 | } |
77 | static unsigned getHashValue(const PHKind &val) { |
78 | return CharInfo::getHashValue(Val: static_cast<char>(val)); |
79 | } |
80 | |
81 | static bool isEqual(const PHKind &lhs, const PHKind &rhs) { |
82 | return lhs == rhs; |
83 | } |
84 | }; |
85 | |
86 | llvm::SmallDenseMap<PHKind, std::string, 4, PHKindInfo> builtinSubstMap; |
87 | llvm::StringMap<std::string> customSubstMap; |
88 | }; |
89 | |
90 | /// Struct representing a replacement segment for the formatted string. It can |
91 | /// be a segment of the formatting template (for `Literal`) or a replacement |
92 | /// parameter (for `PositionalPH`, `PositionalRangePH` and `SpecialPH`). |
93 | struct FmtReplacement { |
94 | enum class Type { |
95 | Empty, |
96 | Literal, |
97 | PositionalPH, |
98 | PositionalRangePH, |
99 | SpecialPH |
100 | }; |
101 | |
102 | FmtReplacement() = default; |
103 | explicit FmtReplacement(StringRef literal) |
104 | : type(Type::Literal), spec(literal) {} |
105 | FmtReplacement(StringRef spec, size_t index) |
106 | : type(Type::PositionalPH), spec(spec), index(index) {} |
107 | FmtReplacement(StringRef spec, size_t index, size_t end) |
108 | : type(Type::PositionalRangePH), spec(spec), index(index), end(end) {} |
109 | FmtReplacement(StringRef spec, FmtContext::PHKind placeholder) |
110 | : type(Type::SpecialPH), spec(spec), placeholder(placeholder) {} |
111 | |
112 | Type type = Type::Empty; |
113 | StringRef spec; |
114 | size_t index = 0; |
115 | size_t end = kUnset; |
116 | FmtContext::PHKind placeholder = FmtContext::PHKind::None; |
117 | |
118 | static constexpr size_t kUnset = -1; |
119 | }; |
120 | |
121 | class FmtObjectBase { |
122 | private: |
123 | static std::pair<FmtReplacement, StringRef> splitFmtSegment(StringRef fmt); |
124 | static std::vector<FmtReplacement> parseFormatString(StringRef fmt); |
125 | |
126 | protected: |
127 | // The parameters are stored in a std::tuple, which does not provide runtime |
128 | // indexing capabilities. In order to enable runtime indexing, we use this |
129 | // structure to put the parameters into a std::vector. Since the parameters |
130 | // are not all the same type, we use some type-erasure by wrapping the |
131 | // parameters in a template class that derives from a non-template superclass. |
132 | // Essentially, we are converting a std::tuple<Derived<Ts...>> to a |
133 | // std::vector<Base*>. |
134 | struct CreateAdapters { |
135 | template <typename... Ts> |
136 | std::vector<llvm::support::detail::format_adapter *> |
137 | operator()(Ts &...items) { |
138 | return std::vector<llvm::support::detail::format_adapter *>{&items...}; |
139 | } |
140 | }; |
141 | |
142 | StringRef fmt; |
143 | const FmtContext *context; |
144 | std::vector<llvm::support::detail::format_adapter *> adapters; |
145 | std::vector<FmtReplacement> replacements; |
146 | |
147 | public: |
148 | FmtObjectBase(StringRef fmt, const FmtContext *ctx, size_t numParams) |
149 | : fmt(fmt), context(ctx), replacements(parseFormatString(fmt)) {} |
150 | |
151 | FmtObjectBase(const FmtObjectBase &that) = delete; |
152 | |
153 | FmtObjectBase(FmtObjectBase &&that) |
154 | : fmt(that.fmt), context(that.context), |
155 | adapters(), // adapters are initialized by FmtObject |
156 | replacements(std::move(that.replacements)) {} |
157 | |
158 | void format(llvm::raw_ostream &s) const; |
159 | |
160 | std::string str() const { |
161 | std::string result; |
162 | llvm::raw_string_ostream s(result); |
163 | format(s); |
164 | return s.str(); |
165 | } |
166 | |
167 | template <unsigned N> |
168 | SmallString<N> sstr() const { |
169 | SmallString<N> result; |
170 | llvm::raw_svector_ostream s(result); |
171 | format(s); |
172 | return result; |
173 | } |
174 | |
175 | template <unsigned N> |
176 | operator SmallString<N>() const { |
177 | return sstr<N>(); |
178 | } |
179 | |
180 | operator std::string() const { return str(); } |
181 | }; |
182 | |
183 | template <typename Tuple> |
184 | class FmtObject : public FmtObjectBase { |
185 | // Storage for the parameter adapters. Since the base class erases the type |
186 | // of the parameters, we have to own the storage for the parameters here, and |
187 | // have the base class store type-erased pointers into this tuple. |
188 | Tuple parameters; |
189 | |
190 | public: |
191 | FmtObject(StringRef fmt, const FmtContext *ctx, Tuple &¶ms) |
192 | : FmtObjectBase(fmt, ctx, std::tuple_size<Tuple>::value), |
193 | parameters(std::move(params)) { |
194 | adapters.reserve(n: std::tuple_size<Tuple>::value); |
195 | adapters = std::apply(CreateAdapters(), parameters); |
196 | } |
197 | |
198 | FmtObject(FmtObject const &that) = delete; |
199 | |
200 | FmtObject(FmtObject &&that) |
201 | : FmtObjectBase(std::move(that)), parameters(std::move(that.parameters)) { |
202 | adapters.reserve(n: that.adapters.size()); |
203 | adapters = std::apply(CreateAdapters(), parameters); |
204 | } |
205 | }; |
206 | |
207 | class FmtStrVecObject : public FmtObjectBase { |
208 | public: |
209 | using StrFormatAdapter = decltype(llvm::support::detail::build_format_adapter( |
210 | Item: std::declval<std::string>())); |
211 | |
212 | FmtStrVecObject(StringRef fmt, const FmtContext *ctx, |
213 | ArrayRef<std::string> params); |
214 | FmtStrVecObject(FmtStrVecObject const &that) = delete; |
215 | FmtStrVecObject(FmtStrVecObject &&that); |
216 | |
217 | private: |
218 | SmallVector<StrFormatAdapter, 16> parameters; |
219 | }; |
220 | |
221 | /// Formats text by substituting placeholders in format string with replacement |
222 | /// parameters. |
223 | /// |
224 | /// There are two categories of placeholders accepted, both led by a '$' sign: |
225 | /// |
226 | /// 1.a Positional placeholder: $[0-9]+ |
227 | /// 1.b Positional range placeholder: $[0-9]+... |
228 | /// 2. Special placeholder: $[a-zA-Z_][a-zA-Z0-9_]* |
229 | /// |
230 | /// Replacement parameters for positional placeholders are supplied as the |
231 | /// `vals` parameter pack with 1:1 mapping. That is, $0 will be replaced by the |
232 | /// first parameter in `vals`, $1 by the second one, and so on. Note that you |
233 | /// can use the positional placeholders in any order and repeat any times, for |
234 | /// example, "$2 $1 $1 $0" is accepted. |
235 | /// |
236 | /// Replace parameters for positional range placeholders are supplied as if |
237 | /// positional placeholders were specified with commas separating them. |
238 | /// |
239 | /// Replacement parameters for special placeholders are supplied using the `ctx` |
240 | /// format context. |
241 | /// |
242 | /// The `fmt` is recorded as a `StringRef` inside the returned `FmtObject`. |
243 | /// The caller needs to make sure the underlying data is available when the |
244 | /// `FmtObject` is used. |
245 | /// |
246 | /// `ctx` accepts a nullptr if there is no special placeholder is used. |
247 | /// |
248 | /// If no substitution is provided for a placeholder or any error happens during |
249 | /// format string parsing or replacement, the placeholder will be outputted |
250 | /// as-is with an additional marker '<no-subst-found>', to aid debugging. |
251 | /// |
252 | /// To print a '$' literally, escape it with '$$'. |
253 | /// |
254 | /// This utility function is inspired by LLVM formatv(), with modifications |
255 | /// specially tailored for TableGen C++ generation usage: |
256 | /// |
257 | /// 1. This utility use '$' instead of '{' and '}' for denoting the placeholder |
258 | /// because '{' and '}' are frequently used in C++ code. |
259 | /// 2. This utility does not support format layout because it is rarely needed |
260 | /// in C++ code generation. |
261 | template <typename... Ts> |
262 | inline auto tgfmt(StringRef fmt, const FmtContext *ctx, Ts &&...vals) |
263 | -> FmtObject< |
264 | decltype(std::make_tuple(llvm::support::detail::build_format_adapter( |
265 | std::forward<Ts>(vals))...))> { |
266 | using ParamTuple = decltype(std::make_tuple( |
267 | llvm::support::detail::build_format_adapter(std::forward<Ts>(vals))...)); |
268 | return FmtObject<ParamTuple>( |
269 | fmt, ctx, |
270 | std::make_tuple(llvm::support::detail::build_format_adapter( |
271 | std::forward<Ts>(vals))...)); |
272 | } |
273 | |
274 | inline FmtStrVecObject tgfmt(StringRef fmt, const FmtContext *ctx, |
275 | ArrayRef<std::string> params) { |
276 | return FmtStrVecObject(fmt, ctx, params); |
277 | } |
278 | |
279 | } // namespace tblgen |
280 | } // namespace mlir |
281 | |
282 | #endif // MLIR_TABLEGEN_FORMAT_H_ |
283 | |