1//===- FormatGen.h - Utilities for custom assembly formats ------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains common classes for building custom assembly format parsers
10// and generators.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef MLIR_TOOLS_MLIRTBLGEN_FORMATGEN_H_
15#define MLIR_TOOLS_MLIRTBLGEN_FORMATGEN_H_
16
17#include "mlir/Support/LLVM.h"
18#include "mlir/Support/LogicalResult.h"
19#include "llvm/ADT/StringRef.h"
20#include "llvm/ADT/StringSet.h"
21#include "llvm/Support/Allocator.h"
22#include "llvm/Support/CommandLine.h"
23#include "llvm/Support/SMLoc.h"
24#include <vector>
25
26namespace llvm {
27class SourceMgr;
28} // namespace llvm
29
30namespace mlir {
31namespace tblgen {
32
33//===----------------------------------------------------------------------===//
34// FormatToken
35//===----------------------------------------------------------------------===//
36
37/// This class represents a specific token in the input format.
38class FormatToken {
39public:
40 /// Basic token kinds.
41 enum Kind {
42 // Markers.
43 eof,
44 error,
45
46 // Tokens with no info.
47 l_paren,
48 r_paren,
49 caret,
50 colon,
51 comma,
52 equal,
53 less,
54 greater,
55 question,
56 star,
57 pipe,
58
59 // Keywords.
60 keyword_start,
61 kw_attr_dict,
62 kw_attr_dict_w_keyword,
63 kw_prop_dict,
64 kw_custom,
65 kw_functional_type,
66 kw_oilist,
67 kw_operands,
68 kw_params,
69 kw_qualified,
70 kw_ref,
71 kw_regions,
72 kw_results,
73 kw_struct,
74 kw_successors,
75 kw_type,
76 keyword_end,
77
78 // String valued tokens.
79 identifier,
80 literal,
81 variable,
82 string,
83 };
84
85 FormatToken(Kind kind, StringRef spelling) : kind(kind), spelling(spelling) {}
86
87 /// Return the bytes that make up this token.
88 StringRef getSpelling() const { return spelling; }
89
90 /// Return the kind of this token.
91 Kind getKind() const { return kind; }
92
93 /// Return a location for this token.
94 SMLoc getLoc() const;
95
96 /// Returns true if the token is of the given kind.
97 bool is(Kind kind) { return getKind() == kind; }
98
99 /// Return if this token is a keyword.
100 bool isKeyword() const {
101 return getKind() > Kind::keyword_start && getKind() < Kind::keyword_end;
102 }
103
104private:
105 /// Discriminator that indicates the kind of token this is.
106 Kind kind;
107
108 /// A reference to the entire token contents; this is always a pointer into
109 /// a memory buffer owned by the source manager.
110 StringRef spelling;
111};
112
113//===----------------------------------------------------------------------===//
114// FormatLexer
115//===----------------------------------------------------------------------===//
116
117/// This class implements a simple lexer for operation assembly format strings.
118class FormatLexer {
119public:
120 FormatLexer(llvm::SourceMgr &mgr, SMLoc loc);
121
122 /// Lex the next token and return it.
123 FormatToken lexToken();
124
125 /// Emit an error to the lexer with the given location and message.
126 FormatToken emitError(SMLoc loc, const Twine &msg);
127 FormatToken emitError(const char *loc, const Twine &msg);
128
129 FormatToken emitErrorAndNote(SMLoc loc, const Twine &msg, const Twine &note);
130
131private:
132 /// Return the next character in the stream.
133 int getNextChar();
134
135 /// Lex an identifier, literal, variable, or string.
136 FormatToken lexIdentifier(const char *tokStart);
137 FormatToken lexLiteral(const char *tokStart);
138 FormatToken lexVariable(const char *tokStart);
139 FormatToken lexString(const char *tokStart);
140
141 /// Create a token with the current pointer and a start pointer.
142 FormatToken formToken(FormatToken::Kind kind, const char *tokStart) {
143 return FormatToken(kind, StringRef(tokStart, curPtr - tokStart));
144 }
145
146 /// The source manager containing the format string.
147 llvm::SourceMgr &mgr;
148 /// Location of the format string.
149 SMLoc loc;
150 /// Buffer containing the format string.
151 StringRef curBuffer;
152 /// Current pointer in the buffer.
153 const char *curPtr;
154};
155
156//===----------------------------------------------------------------------===//
157// FormatElement
158//===----------------------------------------------------------------------===//
159
160/// This class represents a single format element.
161///
162/// If you squint and take a close look, you can see the outline of a `Format`
163/// dialect.
164class FormatElement {
165public:
166 virtual ~FormatElement();
167
168 // The top-level kinds of format elements.
169 enum Kind { Literal, String, Variable, Whitespace, Directive, Optional };
170
171 /// Support LLVM-style RTTI.
172 static bool classof(const FormatElement *el) { return true; }
173
174 /// Get the element kind.
175 Kind getKind() const { return kind; }
176
177protected:
178 /// Create a format element with the given kind.
179 FormatElement(Kind kind) : kind(kind) {}
180
181private:
182 /// The kind of the element.
183 Kind kind;
184};
185
186/// The base class for all format elements. This class implements common methods
187/// for LLVM-style RTTI.
188template <FormatElement::Kind ElementKind>
189class FormatElementBase : public FormatElement {
190public:
191 /// Support LLVM-style RTTI.
192 static bool classof(const FormatElement *el) {
193 return ElementKind == el->getKind();
194 }
195
196protected:
197 /// Create a format element with the given kind.
198 FormatElementBase() : FormatElement(ElementKind) {}
199};
200
201/// This class represents a literal element. A literal is either one of the
202/// supported punctuation characters (e.g. `(` or `,`) or a string literal (e.g.
203/// `literal`).
204class LiteralElement : public FormatElementBase<FormatElement::Literal> {
205public:
206 /// Create a literal element with the given spelling.
207 explicit LiteralElement(StringRef spelling) : spelling(spelling) {}
208
209 /// Get the spelling of the literal.
210 StringRef getSpelling() const { return spelling; }
211
212private:
213 /// The spelling of the variable, i.e. the string contained within the
214 /// backticks.
215 StringRef spelling;
216};
217
218/// This class represents a raw string that can contain arbitrary C++ code.
219class StringElement : public FormatElementBase<FormatElement::String> {
220public:
221 /// Create a string element with the given contents.
222 explicit StringElement(std::string value) : value(std::move(value)) {}
223
224 /// Get the value of the string element.
225 StringRef getValue() const { return value; }
226
227private:
228 /// The contents of the string.
229 std::string value;
230};
231
232/// This class represents a variable element. A variable refers to some part of
233/// the object being parsed, e.g. an attribute or operand on an operation or a
234/// parameter on an attribute.
235class VariableElement : public FormatElementBase<FormatElement::Variable> {
236public:
237 /// These are the kinds of variables.
238 enum Kind {
239 Attribute,
240 Operand,
241 Region,
242 Result,
243 Successor,
244 Parameter,
245 Property
246 };
247
248 /// Get the kind of variable.
249 Kind getKind() const { return kind; }
250
251protected:
252 /// Create a variable with a kind.
253 VariableElement(Kind kind) : kind(kind) {}
254
255private:
256 /// The kind of variable.
257 Kind kind;
258};
259
260/// Base class for variable elements. This class implements common methods for
261/// LLVM-style RTTI.
262template <VariableElement::Kind VariableKind>
263class VariableElementBase : public VariableElement {
264public:
265 /// An element is of this class if it is a variable and has the same variable
266 /// type.
267 static bool classof(const FormatElement *el) {
268 if (auto *varEl = dyn_cast<VariableElement>(Val: el))
269 return VariableKind == varEl->getKind();
270 return false;
271 }
272
273protected:
274 /// Create a variable element with the given variable kind.
275 VariableElementBase() : VariableElement(VariableKind) {}
276};
277
278/// This class represents a whitespace element, e.g. a newline or space. It is a
279/// literal that is printed but never parsed. When the value is empty, i.e. ``,
280/// a space is elided where one would have been printed automatically.
281class WhitespaceElement : public FormatElementBase<FormatElement::Whitespace> {
282public:
283 /// Create a whitespace element.
284 explicit WhitespaceElement(StringRef value) : value(value) {}
285
286 /// Get the whitespace value.
287 StringRef getValue() const { return value; }
288
289private:
290 /// The value of the whitespace element. Can be empty.
291 StringRef value;
292};
293
294class DirectiveElement : public FormatElementBase<FormatElement::Directive> {
295public:
296 /// These are the kinds of directives.
297 enum Kind {
298 AttrDict,
299 PropDict,
300 Custom,
301 FunctionalType,
302 OIList,
303 Operands,
304 Ref,
305 Regions,
306 Results,
307 Successors,
308 Type,
309 Params,
310 Struct
311 };
312
313 /// Get the directive kind.
314 Kind getKind() const { return kind; }
315
316protected:
317 /// Create a directive element with a kind.
318 DirectiveElement(Kind kind) : kind(kind) {}
319
320private:
321 /// The directive kind.
322 Kind kind;
323};
324
325/// Base class for directive elements. This class implements common methods for
326/// LLVM-style RTTI.
327template <DirectiveElement::Kind DirectiveKind>
328class DirectiveElementBase : public DirectiveElement {
329public:
330 /// Create a directive element with the specified kind.
331 DirectiveElementBase() : DirectiveElement(DirectiveKind) {}
332
333 /// A format element is of this class if it is a directive element and has the
334 /// same kind.
335 static bool classof(const FormatElement *el) {
336 if (auto *directiveEl = dyn_cast<DirectiveElement>(Val: el))
337 return DirectiveKind == directiveEl->getKind();
338 return false;
339 }
340};
341
342/// This class represents a custom format directive that is implemented by the
343/// user in C++. The directive accepts a list of arguments that is passed to the
344/// C++ function.
345class CustomDirective : public DirectiveElementBase<DirectiveElement::Custom> {
346public:
347 /// Create a custom directive with a name and list of arguments.
348 CustomDirective(StringRef name, std::vector<FormatElement *> &&arguments)
349 : name(name), arguments(std::move(arguments)) {}
350
351 /// Get the custom directive name.
352 StringRef getName() const { return name; }
353
354 /// Get the arguments to the custom directive.
355 ArrayRef<FormatElement *> getArguments() const { return arguments; }
356
357private:
358 /// The name of the custom directive. The name is used to call two C++
359 /// methods: `parse{name}` and `print{name}` with the given arguments.
360 StringRef name;
361 /// The arguments with which to call the custom functions. These are either
362 /// variables (for which the functions are responsible for populating) or
363 /// references to variables.
364 std::vector<FormatElement *> arguments;
365};
366
367/// This class represents a reference directive. This directive can be used to
368/// reference but not bind a previously bound variable or format object. Its
369/// current only use is to pass variables as arguments to the custom directive.
370class RefDirective : public DirectiveElementBase<DirectiveElement::Ref> {
371public:
372 /// Create a reference directive with the single referenced child.
373 RefDirective(FormatElement *arg) : arg(arg) {}
374
375 /// Get the reference argument.
376 FormatElement *getArg() const { return arg; }
377
378private:
379 /// The referenced argument.
380 FormatElement *arg;
381};
382
383/// This class represents a group of elements that are optionally emitted based
384/// on an optional variable "anchor" and a group of elements that are emitted
385/// when the anchor element is not present.
386class OptionalElement : public FormatElementBase<FormatElement::Optional> {
387public:
388 /// Create an optional group with the given child elements.
389 OptionalElement(std::vector<FormatElement *> &&thenElements,
390 std::vector<FormatElement *> &&elseElements,
391 unsigned thenParseStart, unsigned elseParseStart,
392 FormatElement *anchor, bool inverted)
393 : thenElements(std::move(thenElements)),
394 elseElements(std::move(elseElements)), thenParseStart(thenParseStart),
395 elseParseStart(elseParseStart), anchor(anchor), inverted(inverted) {}
396
397 /// Return the `then` elements of the optional group. Drops the first
398 /// `thenParseStart` whitespace elements if `parseable` is true.
399 ArrayRef<FormatElement *> getThenElements(bool parseable = false) const {
400 return llvm::ArrayRef(thenElements)
401 .drop_front(N: parseable ? thenParseStart : 0);
402 }
403
404 /// Return the `else` elements of the optional group. Drops the first
405 /// `elseParseStart` whitespace elements if `parseable` is true.
406 ArrayRef<FormatElement *> getElseElements(bool parseable = false) const {
407 return llvm::ArrayRef(elseElements)
408 .drop_front(N: parseable ? elseParseStart : 0);
409 }
410
411 /// Return the anchor of the optional group.
412 FormatElement *getAnchor() const { return anchor; }
413
414 /// Return true if the optional group is inverted.
415 bool isInverted() const { return inverted; }
416
417private:
418 /// The child elements emitted when the anchor is present.
419 std::vector<FormatElement *> thenElements;
420 /// The child elements emitted when the anchor is not present.
421 std::vector<FormatElement *> elseElements;
422 /// The index of the first element that is parsed in `thenElements`. That is,
423 /// the first non-whitespace element.
424 unsigned thenParseStart;
425 /// The index of the first element that is parsed in `elseElements`. That is,
426 /// the first non-whitespace element.
427 unsigned elseParseStart;
428 /// The anchor element of the optional group.
429 FormatElement *anchor;
430 /// Whether the optional group condition is inverted and the anchor element is
431 /// in the else group.
432 bool inverted;
433};
434
435//===----------------------------------------------------------------------===//
436// FormatParserBase
437//===----------------------------------------------------------------------===//
438
439/// Base class for a parser that implements an assembly format. This class
440/// defines a common assembly format syntax and the creation of format elements.
441/// Subclasses will need to implement parsing for the format elements they
442/// support.
443class FormatParser {
444public:
445 /// Vtable anchor.
446 virtual ~FormatParser();
447
448 /// Parse the assembly format.
449 FailureOr<std::vector<FormatElement *>> parse();
450
451protected:
452 /// The current context of the parser when parsing an element.
453 enum Context {
454 /// The element is being parsed in a "top-level" context, i.e. at the top of
455 /// the format or in an optional group.
456 TopLevelContext,
457 /// The element is being parsed as a custom directive child.
458 CustomDirectiveContext,
459 /// The element is being parsed as a type directive child.
460 TypeDirectiveContext,
461 /// The element is being parsed as a reference directive child.
462 RefDirectiveContext,
463 /// The element is being parsed as a struct directive child.
464 StructDirectiveContext
465 };
466
467 /// Create a format parser with the given source manager and a location.
468 explicit FormatParser(llvm::SourceMgr &mgr, llvm::SMLoc loc)
469 : lexer(mgr, loc), curToken(lexer.lexToken()) {}
470
471 /// Allocate and construct a format element.
472 template <typename FormatElementT, typename... Args>
473 FormatElementT *create(Args &&...args) {
474 // FormatElementT *ptr = allocator.Allocate<FormatElementT>();
475 // ::new (ptr) FormatElementT(std::forward<Args>(args)...);
476 // return ptr;
477 auto mem = std::make_unique<FormatElementT>(std::forward<Args>(args)...);
478 FormatElementT *ptr = mem.get();
479 allocator.push_back(std::move(mem));
480 return ptr;
481 }
482
483 //===--------------------------------------------------------------------===//
484 // Element Parsing
485
486 /// Parse a single element of any kind.
487 FailureOr<FormatElement *> parseElement(Context ctx);
488 /// Parse a literal.
489 FailureOr<FormatElement *> parseLiteral(Context ctx);
490 /// Parse a string.
491 FailureOr<FormatElement *> parseString(Context ctx);
492 /// Parse a variable.
493 FailureOr<FormatElement *> parseVariable(Context ctx);
494 /// Parse a directive.
495 FailureOr<FormatElement *> parseDirective(Context ctx);
496 /// Parse an optional group.
497 FailureOr<FormatElement *> parseOptionalGroup(Context ctx);
498
499 /// Parse a custom directive.
500 FailureOr<FormatElement *> parseCustomDirective(llvm::SMLoc loc, Context ctx);
501
502 /// Parse a format-specific variable kind.
503 virtual FailureOr<FormatElement *>
504 parseVariableImpl(llvm::SMLoc loc, StringRef name, Context ctx) = 0;
505 /// Parse a format-specific directive kind.
506 virtual FailureOr<FormatElement *>
507 parseDirectiveImpl(llvm::SMLoc loc, FormatToken::Kind kind, Context ctx) = 0;
508
509 //===--------------------------------------------------------------------===//
510 // Format Verification
511
512 /// Verify that the format is well-formed.
513 virtual LogicalResult verify(llvm::SMLoc loc,
514 ArrayRef<FormatElement *> elements) = 0;
515 /// Verify the arguments to a custom directive.
516 virtual LogicalResult
517 verifyCustomDirectiveArguments(llvm::SMLoc loc,
518 ArrayRef<FormatElement *> arguments) = 0;
519 /// Verify the elements of an optional group.
520 virtual LogicalResult
521 verifyOptionalGroupElements(llvm::SMLoc loc,
522 ArrayRef<FormatElement *> elements,
523 FormatElement *anchor) = 0;
524
525 //===--------------------------------------------------------------------===//
526 // Lexer Utilities
527
528 /// Emit an error at the given location.
529 LogicalResult emitError(llvm::SMLoc loc, const Twine &msg) {
530 lexer.emitError(loc, msg);
531 return failure();
532 }
533
534 /// Emit an error and a note at the given notation.
535 LogicalResult emitErrorAndNote(llvm::SMLoc loc, const Twine &msg,
536 const Twine &note) {
537 lexer.emitErrorAndNote(loc, msg, note);
538 return failure();
539 }
540
541 /// Parse a single token of the expected kind.
542 FailureOr<FormatToken> parseToken(FormatToken::Kind kind, const Twine &msg) {
543 if (!curToken.is(kind))
544 return emitError(loc: curToken.getLoc(), msg);
545 FormatToken tok = curToken;
546 consumeToken();
547 return tok;
548 }
549
550 /// Advance the lexer to the next token.
551 void consumeToken() {
552 assert(!curToken.is(FormatToken::eof) && !curToken.is(FormatToken::error) &&
553 "shouldn't advance past EOF or errors");
554 curToken = lexer.lexToken();
555 }
556
557 /// Get the current token.
558 FormatToken peekToken() { return curToken; }
559
560private:
561 /// The format parser retains ownership of the format elements in a bump
562 /// pointer allocator.
563 // FIXME: FormatElement with `std::vector` need to be converted to use
564 // trailing objects.
565 // llvm::BumpPtrAllocator allocator;
566 std::vector<std::unique_ptr<FormatElement>> allocator;
567 /// The format lexer to use.
568 FormatLexer lexer;
569 /// The current token in the lexer.
570 FormatToken curToken;
571};
572
573//===----------------------------------------------------------------------===//
574// Utility Functions
575//===----------------------------------------------------------------------===//
576
577/// Whether a space needs to be emitted before a literal. E.g., two keywords
578/// back-to-back require a space separator, but a keyword followed by '<' does
579/// not require a space.
580bool shouldEmitSpaceBefore(StringRef value, bool lastWasPunctuation);
581
582/// Returns true if the given string can be formatted as a keyword.
583bool canFormatStringAsKeyword(StringRef value,
584 function_ref<void(Twine)> emitError = nullptr);
585
586/// Returns true if the given string is valid format literal element.
587/// If `emitError` is provided, it is invoked with the reason for the failure.
588bool isValidLiteral(StringRef value,
589 function_ref<void(Twine)> emitError = nullptr);
590
591/// Whether a failure in parsing the assembly format should be a fatal error.
592extern llvm::cl::opt<bool> formatErrorIsFatal;
593
594} // namespace tblgen
595} // namespace mlir
596
597#endif // MLIR_TOOLS_MLIRTBLGEN_FORMATGEN_H_
598

source code of mlir/tools/mlir-tblgen/FormatGen.h