| 1 | //===- Format.h - Utilities for String Format -------------------*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file declares utilities for formatting strings. They are specially |
| 10 | // tailored to the needs of TableGen'ing op definitions and rewrite rules, |
| 11 | // so they are not expected to be used as widely applicable utilities. |
| 12 | // |
| 13 | //===----------------------------------------------------------------------===// |
| 14 | |
| 15 | #ifndef MLIR_TABLEGEN_FORMAT_H_ |
| 16 | #define MLIR_TABLEGEN_FORMAT_H_ |
| 17 | |
| 18 | #include "mlir/Support/LLVM.h" |
| 19 | #include "llvm/ADT/DenseMap.h" |
| 20 | #include "llvm/ADT/StringMap.h" |
| 21 | #include "llvm/Support/FormatVariadic.h" |
| 22 | |
| 23 | namespace mlir { |
| 24 | namespace tblgen { |
| 25 | |
| 26 | /// Format context containing substitutions for special placeholders. |
| 27 | /// |
| 28 | /// This context divides special placeholders into two categories: builtin ones |
| 29 | /// and custom ones. |
| 30 | /// |
| 31 | /// Builtin placeholders are baked into `FmtContext` and each one of them has a |
| 32 | /// dedicated setter. They can be used in all dialects. Their names follow the |
| 33 | /// convention of `$_<name>`. The rationale of the leading underscore is to |
| 34 | /// avoid confusion and name collision: op arguments/attributes/results are |
| 35 | /// named as $<name>, and we can potentially support referencing those entities |
| 36 | /// directly in the format template in the future. |
| 37 | // |
| 38 | /// Custom ones are registered by dialect-specific TableGen backends and use the |
| 39 | /// same unified setter. |
| 40 | class FmtContext { |
| 41 | public: |
| 42 | // Placeholder kinds |
| 43 | enum class PHKind : char { |
| 44 | None, |
| 45 | Custom, // For custom placeholders |
| 46 | Builder, // For the $_builder placeholder |
| 47 | Self, // For the $_self placeholder |
| 48 | }; |
| 49 | |
| 50 | FmtContext() = default; |
| 51 | |
| 52 | // Create a format context with a list of substitutions. |
| 53 | FmtContext(ArrayRef<std::pair<StringRef, StringRef>> subs); |
| 54 | |
| 55 | // Setter for custom placeholders |
| 56 | FmtContext &addSubst(StringRef placeholder, const Twine &subst); |
| 57 | |
| 58 | // Setters for builtin placeholders |
| 59 | FmtContext &withBuilder(Twine subst); |
| 60 | FmtContext &withSelf(Twine subst); |
| 61 | |
| 62 | std::optional<StringRef> getSubstFor(PHKind placeholder) const; |
| 63 | std::optional<StringRef> getSubstFor(StringRef placeholder) const; |
| 64 | |
| 65 | static PHKind getPlaceHolderKind(StringRef str); |
| 66 | |
| 67 | private: |
| 68 | struct PHKindInfo : DenseMapInfo<PHKind> { |
| 69 | using CharInfo = DenseMapInfo<char>; |
| 70 | |
| 71 | static inline PHKind getEmptyKey() { |
| 72 | return static_cast<PHKind>(CharInfo::getEmptyKey()); |
| 73 | } |
| 74 | static inline PHKind getTombstoneKey() { |
| 75 | return static_cast<PHKind>(CharInfo::getTombstoneKey()); |
| 76 | } |
| 77 | static unsigned getHashValue(const PHKind &val) { |
| 78 | return CharInfo::getHashValue(Val: static_cast<char>(val)); |
| 79 | } |
| 80 | |
| 81 | static bool isEqual(const PHKind &lhs, const PHKind &rhs) { |
| 82 | return lhs == rhs; |
| 83 | } |
| 84 | }; |
| 85 | |
| 86 | llvm::SmallDenseMap<PHKind, std::string, 4, PHKindInfo> builtinSubstMap; |
| 87 | llvm::StringMap<std::string> customSubstMap; |
| 88 | }; |
| 89 | |
| 90 | /// Struct representing a replacement segment for the formatted string. It can |
| 91 | /// be a segment of the formatting template (for `Literal`) or a replacement |
| 92 | /// parameter (for `PositionalPH`, `PositionalRangePH` and `SpecialPH`). |
| 93 | struct FmtReplacement { |
| 94 | enum class Type { |
| 95 | Empty, |
| 96 | Literal, |
| 97 | PositionalPH, |
| 98 | PositionalRangePH, |
| 99 | SpecialPH |
| 100 | }; |
| 101 | |
| 102 | FmtReplacement() = default; |
| 103 | explicit FmtReplacement(StringRef literal) |
| 104 | : type(Type::Literal), spec(literal) {} |
| 105 | FmtReplacement(StringRef spec, size_t index) |
| 106 | : type(Type::PositionalPH), spec(spec), index(index) {} |
| 107 | FmtReplacement(StringRef spec, size_t index, size_t end) |
| 108 | : type(Type::PositionalRangePH), spec(spec), index(index), end(end) {} |
| 109 | FmtReplacement(StringRef spec, FmtContext::PHKind placeholder) |
| 110 | : type(Type::SpecialPH), spec(spec), placeholder(placeholder) {} |
| 111 | |
| 112 | Type type = Type::Empty; |
| 113 | StringRef spec; |
| 114 | size_t index = 0; |
| 115 | size_t end = kUnset; |
| 116 | FmtContext::PHKind placeholder = FmtContext::PHKind::None; |
| 117 | |
| 118 | static constexpr size_t kUnset = -1; |
| 119 | }; |
| 120 | |
| 121 | class FmtObjectBase { |
| 122 | private: |
| 123 | static std::pair<FmtReplacement, StringRef> splitFmtSegment(StringRef fmt); |
| 124 | static std::vector<FmtReplacement> parseFormatString(StringRef fmt); |
| 125 | |
| 126 | protected: |
| 127 | // The parameters are stored in a std::tuple, which does not provide runtime |
| 128 | // indexing capabilities. In order to enable runtime indexing, we use this |
| 129 | // structure to put the parameters into a std::vector. Since the parameters |
| 130 | // are not all the same type, we use some type-erasure by wrapping the |
| 131 | // parameters in a template class that derives from a non-template superclass. |
| 132 | // Essentially, we are converting a std::tuple<Derived<Ts...>> to a |
| 133 | // std::vector<Base*>. |
| 134 | struct CreateAdapters { |
| 135 | template <typename... Ts> |
| 136 | std::vector<llvm::support::detail::format_adapter *> |
| 137 | operator()(Ts &...items) { |
| 138 | return std::vector<llvm::support::detail::format_adapter *>{&items...}; |
| 139 | } |
| 140 | }; |
| 141 | |
| 142 | StringRef fmt; |
| 143 | const FmtContext *context; |
| 144 | std::vector<llvm::support::detail::format_adapter *> adapters; |
| 145 | std::vector<FmtReplacement> replacements; |
| 146 | |
| 147 | public: |
| 148 | FmtObjectBase(StringRef fmt, const FmtContext *ctx, size_t numParams) |
| 149 | : fmt(fmt), context(ctx), replacements(parseFormatString(fmt)) {} |
| 150 | |
| 151 | FmtObjectBase(const FmtObjectBase &that) = delete; |
| 152 | |
| 153 | FmtObjectBase(FmtObjectBase &&that) |
| 154 | : fmt(that.fmt), context(that.context), |
| 155 | adapters(), // adapters are initialized by FmtObject |
| 156 | replacements(std::move(that.replacements)) {} |
| 157 | |
| 158 | void format(llvm::raw_ostream &s) const; |
| 159 | |
| 160 | std::string str() const { |
| 161 | std::string result; |
| 162 | llvm::raw_string_ostream s(result); |
| 163 | format(s); |
| 164 | return s.str(); |
| 165 | } |
| 166 | |
| 167 | template <unsigned N> |
| 168 | SmallString<N> sstr() const { |
| 169 | SmallString<N> result; |
| 170 | llvm::raw_svector_ostream s(result); |
| 171 | format(s); |
| 172 | return result; |
| 173 | } |
| 174 | |
| 175 | template <unsigned N> |
| 176 | operator SmallString<N>() const { |
| 177 | return sstr<N>(); |
| 178 | } |
| 179 | |
| 180 | operator std::string() const { return str(); } |
| 181 | }; |
| 182 | |
| 183 | template <typename Tuple> |
| 184 | class FmtObject : public FmtObjectBase { |
| 185 | // Storage for the parameter adapters. Since the base class erases the type |
| 186 | // of the parameters, we have to own the storage for the parameters here, and |
| 187 | // have the base class store type-erased pointers into this tuple. |
| 188 | Tuple parameters; |
| 189 | |
| 190 | public: |
| 191 | FmtObject(StringRef fmt, const FmtContext *ctx, Tuple &¶ms) |
| 192 | : FmtObjectBase(fmt, ctx, std::tuple_size<Tuple>::value), |
| 193 | parameters(std::move(params)) { |
| 194 | adapters.reserve(n: std::tuple_size<Tuple>::value); |
| 195 | adapters = std::apply(CreateAdapters(), parameters); |
| 196 | } |
| 197 | |
| 198 | FmtObject(FmtObject const &that) = delete; |
| 199 | |
| 200 | FmtObject(FmtObject &&that) |
| 201 | : FmtObjectBase(std::move(that)), parameters(std::move(that.parameters)) { |
| 202 | adapters.reserve(n: that.adapters.size()); |
| 203 | adapters = std::apply(CreateAdapters(), parameters); |
| 204 | } |
| 205 | }; |
| 206 | |
| 207 | class FmtStrVecObject : public FmtObjectBase { |
| 208 | public: |
| 209 | using StrFormatAdapter = decltype(llvm::support::detail::build_format_adapter( |
| 210 | Item: std::declval<std::string>())); |
| 211 | |
| 212 | FmtStrVecObject(StringRef fmt, const FmtContext *ctx, |
| 213 | ArrayRef<std::string> params); |
| 214 | FmtStrVecObject(FmtStrVecObject const &that) = delete; |
| 215 | FmtStrVecObject(FmtStrVecObject &&that); |
| 216 | |
| 217 | private: |
| 218 | SmallVector<StrFormatAdapter, 16> parameters; |
| 219 | }; |
| 220 | |
| 221 | /// Formats text by substituting placeholders in format string with replacement |
| 222 | /// parameters. |
| 223 | /// |
| 224 | /// There are two categories of placeholders accepted, both led by a '$' sign: |
| 225 | /// |
| 226 | /// 1.a Positional placeholder: $[0-9]+ |
| 227 | /// 1.b Positional range placeholder: $[0-9]+... |
| 228 | /// 2. Special placeholder: $[a-zA-Z_][a-zA-Z0-9_]* |
| 229 | /// |
| 230 | /// Replacement parameters for positional placeholders are supplied as the |
| 231 | /// `vals` parameter pack with 1:1 mapping. That is, $0 will be replaced by the |
| 232 | /// first parameter in `vals`, $1 by the second one, and so on. Note that you |
| 233 | /// can use the positional placeholders in any order and repeat any times, for |
| 234 | /// example, "$2 $1 $1 $0" is accepted. |
| 235 | /// |
| 236 | /// Replace parameters for positional range placeholders are supplied as if |
| 237 | /// positional placeholders were specified with commas separating them. |
| 238 | /// |
| 239 | /// Replacement parameters for special placeholders are supplied using the `ctx` |
| 240 | /// format context. |
| 241 | /// |
| 242 | /// The `fmt` is recorded as a `StringRef` inside the returned `FmtObject`. |
| 243 | /// The caller needs to make sure the underlying data is available when the |
| 244 | /// `FmtObject` is used. |
| 245 | /// |
| 246 | /// `ctx` accepts a nullptr if there is no special placeholder is used. |
| 247 | /// |
| 248 | /// If no substitution is provided for a placeholder or any error happens during |
| 249 | /// format string parsing or replacement, the placeholder will be outputted |
| 250 | /// as-is with an additional marker '<no-subst-found>', to aid debugging. |
| 251 | /// |
| 252 | /// To print a '$' literally, escape it with '$$'. |
| 253 | /// |
| 254 | /// This utility function is inspired by LLVM formatv(), with modifications |
| 255 | /// specially tailored for TableGen C++ generation usage: |
| 256 | /// |
| 257 | /// 1. This utility use '$' instead of '{' and '}' for denoting the placeholder |
| 258 | /// because '{' and '}' are frequently used in C++ code. |
| 259 | /// 2. This utility does not support format layout because it is rarely needed |
| 260 | /// in C++ code generation. |
| 261 | template <typename... Ts> |
| 262 | inline auto tgfmt(StringRef fmt, const FmtContext *ctx, Ts &&...vals) |
| 263 | -> FmtObject< |
| 264 | decltype(std::make_tuple(llvm::support::detail::build_format_adapter( |
| 265 | std::forward<Ts>(vals))...))> { |
| 266 | using ParamTuple = decltype(std::make_tuple( |
| 267 | llvm::support::detail::build_format_adapter(std::forward<Ts>(vals))...)); |
| 268 | return FmtObject<ParamTuple>( |
| 269 | fmt, ctx, |
| 270 | std::make_tuple(llvm::support::detail::build_format_adapter( |
| 271 | std::forward<Ts>(vals))...)); |
| 272 | } |
| 273 | |
| 274 | inline FmtStrVecObject tgfmt(StringRef fmt, const FmtContext *ctx, |
| 275 | ArrayRef<std::string> params) { |
| 276 | return FmtStrVecObject(fmt, ctx, params); |
| 277 | } |
| 278 | |
| 279 | } // namespace tblgen |
| 280 | } // namespace mlir |
| 281 | |
| 282 | #endif // MLIR_TABLEGEN_FORMAT_H_ |
| 283 | |