1//===- FormatGen.h - Utilities for custom assembly formats ------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains common classes for building custom assembly format parsers
10// and generators.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef MLIR_TOOLS_MLIRTBLGEN_FORMATGEN_H_
15#define MLIR_TOOLS_MLIRTBLGEN_FORMATGEN_H_
16
17#include "mlir/Support/LLVM.h"
18#include "llvm/ADT/StringRef.h"
19#include "llvm/ADT/StringSet.h"
20#include "llvm/Support/Allocator.h"
21#include "llvm/Support/CommandLine.h"
22#include "llvm/Support/SMLoc.h"
23#include <vector>
24
25namespace llvm {
26class SourceMgr;
27} // namespace llvm
28
29namespace mlir {
30namespace tblgen {
31
32//===----------------------------------------------------------------------===//
33// FormatToken
34//===----------------------------------------------------------------------===//
35
36/// This class represents a specific token in the input format.
37class FormatToken {
38public:
39 /// Basic token kinds.
40 enum Kind {
41 // Markers.
42 eof,
43 error,
44
45 // Tokens with no info.
46 l_paren,
47 r_paren,
48 caret,
49 colon,
50 comma,
51 equal,
52 less,
53 greater,
54 question,
55 star,
56 pipe,
57
58 // Keywords.
59 keyword_start,
60 kw_attr_dict,
61 kw_attr_dict_w_keyword,
62 kw_prop_dict,
63 kw_custom,
64 kw_functional_type,
65 kw_oilist,
66 kw_operands,
67 kw_params,
68 kw_qualified,
69 kw_ref,
70 kw_regions,
71 kw_results,
72 kw_struct,
73 kw_successors,
74 kw_type,
75 keyword_end,
76
77 // String valued tokens.
78 identifier,
79 literal,
80 variable,
81 string,
82 };
83
84 FormatToken(Kind kind, StringRef spelling) : kind(kind), spelling(spelling) {}
85
86 /// Return the bytes that make up this token.
87 StringRef getSpelling() const { return spelling; }
88
89 /// Return the kind of this token.
90 Kind getKind() const { return kind; }
91
92 /// Return a location for this token.
93 SMLoc getLoc() const;
94
95 /// Returns true if the token is of the given kind.
96 bool is(Kind kind) { return getKind() == kind; }
97
98 /// Return if this token is a keyword.
99 bool isKeyword() const {
100 return getKind() > Kind::keyword_start && getKind() < Kind::keyword_end;
101 }
102
103private:
104 /// Discriminator that indicates the kind of token this is.
105 Kind kind;
106
107 /// A reference to the entire token contents; this is always a pointer into
108 /// a memory buffer owned by the source manager.
109 StringRef spelling;
110};
111
112//===----------------------------------------------------------------------===//
113// FormatLexer
114//===----------------------------------------------------------------------===//
115
116/// This class implements a simple lexer for operation assembly format strings.
117class FormatLexer {
118public:
119 FormatLexer(llvm::SourceMgr &mgr, SMLoc loc);
120
121 /// Lex the next token and return it.
122 FormatToken lexToken();
123
124 /// Emit an error to the lexer with the given location and message.
125 FormatToken emitError(SMLoc loc, const Twine &msg);
126 FormatToken emitError(const char *loc, const Twine &msg);
127
128 FormatToken emitErrorAndNote(SMLoc loc, const Twine &msg, const Twine &note);
129
130private:
131 /// Return the next character in the stream.
132 int getNextChar();
133
134 /// Lex an identifier, literal, variable, or string.
135 FormatToken lexIdentifier(const char *tokStart);
136 FormatToken lexLiteral(const char *tokStart);
137 FormatToken lexVariable(const char *tokStart);
138 FormatToken lexString(const char *tokStart);
139
140 /// Create a token with the current pointer and a start pointer.
141 FormatToken formToken(FormatToken::Kind kind, const char *tokStart) {
142 return FormatToken(kind, StringRef(tokStart, curPtr - tokStart));
143 }
144
145 /// The source manager containing the format string.
146 llvm::SourceMgr &mgr;
147 /// Location of the format string.
148 SMLoc loc;
149 /// Buffer containing the format string.
150 StringRef curBuffer;
151 /// Current pointer in the buffer.
152 const char *curPtr;
153};
154
155//===----------------------------------------------------------------------===//
156// FormatElement
157//===----------------------------------------------------------------------===//
158
159/// This class represents a single format element.
160///
161/// If you squint and take a close look, you can see the outline of a `Format`
162/// dialect.
163class FormatElement {
164public:
165 virtual ~FormatElement();
166
167 // The top-level kinds of format elements.
168 enum Kind { Literal, String, Variable, Whitespace, Directive, Optional };
169
170 /// Support LLVM-style RTTI.
171 static bool classof(const FormatElement *el) { return true; }
172
173 /// Get the element kind.
174 Kind getKind() const { return kind; }
175
176protected:
177 /// Create a format element with the given kind.
178 FormatElement(Kind kind) : kind(kind) {}
179
180private:
181 /// The kind of the element.
182 Kind kind;
183};
184
185/// The base class for all format elements. This class implements common methods
186/// for LLVM-style RTTI.
187template <FormatElement::Kind ElementKind>
188class FormatElementBase : public FormatElement {
189public:
190 /// Support LLVM-style RTTI.
191 static bool classof(const FormatElement *el) {
192 return ElementKind == el->getKind();
193 }
194
195protected:
196 /// Create a format element with the given kind.
197 FormatElementBase() : FormatElement(ElementKind) {}
198};
199
200/// This class represents a literal element. A literal is either one of the
201/// supported punctuation characters (e.g. `(` or `,`) or a string literal (e.g.
202/// `literal`).
203class LiteralElement : public FormatElementBase<FormatElement::Literal> {
204public:
205 /// Create a literal element with the given spelling.
206 explicit LiteralElement(StringRef spelling) : spelling(spelling) {}
207
208 /// Get the spelling of the literal.
209 StringRef getSpelling() const { return spelling; }
210
211private:
212 /// The spelling of the variable, i.e. the string contained within the
213 /// backticks.
214 StringRef spelling;
215};
216
217/// This class represents a raw string that can contain arbitrary C++ code.
218class StringElement : public FormatElementBase<FormatElement::String> {
219public:
220 /// Create a string element with the given contents.
221 explicit StringElement(std::string value) : value(std::move(value)) {}
222
223 /// Get the value of the string element.
224 StringRef getValue() const { return value; }
225
226private:
227 /// The contents of the string.
228 std::string value;
229};
230
231/// This class represents a variable element. A variable refers to some part of
232/// the object being parsed, e.g. an attribute or operand on an operation or a
233/// parameter on an attribute.
234class VariableElement : public FormatElementBase<FormatElement::Variable> {
235public:
236 /// These are the kinds of variables.
237 enum Kind {
238 Attribute,
239 Operand,
240 Region,
241 Result,
242 Successor,
243 Parameter,
244 Property
245 };
246
247 /// Get the kind of variable.
248 Kind getKind() const { return kind; }
249
250protected:
251 /// Create a variable with a kind.
252 VariableElement(Kind kind) : kind(kind) {}
253
254private:
255 /// The kind of variable.
256 Kind kind;
257};
258
259/// Base class for variable elements. This class implements common methods for
260/// LLVM-style RTTI.
261template <VariableElement::Kind VariableKind>
262class VariableElementBase : public VariableElement {
263public:
264 /// An element is of this class if it is a variable and has the same variable
265 /// type.
266 static bool classof(const FormatElement *el) {
267 if (auto *varEl = dyn_cast<VariableElement>(Val: el))
268 return VariableKind == varEl->getKind();
269 return false;
270 }
271
272protected:
273 /// Create a variable element with the given variable kind.
274 VariableElementBase() : VariableElement(VariableKind) {}
275};
276
277/// This class represents a whitespace element, e.g. a newline or space. It is a
278/// literal that is printed but never parsed. When the value is empty, i.e. ``,
279/// a space is elided where one would have been printed automatically.
280class WhitespaceElement : public FormatElementBase<FormatElement::Whitespace> {
281public:
282 /// Create a whitespace element.
283 explicit WhitespaceElement(StringRef value) : value(value) {}
284
285 /// Get the whitespace value.
286 StringRef getValue() const { return value; }
287
288private:
289 /// The value of the whitespace element. Can be empty.
290 StringRef value;
291};
292
293class DirectiveElement : public FormatElementBase<FormatElement::Directive> {
294public:
295 /// These are the kinds of directives.
296 enum Kind {
297 AttrDict,
298 PropDict,
299 Custom,
300 FunctionalType,
301 OIList,
302 Operands,
303 Ref,
304 Regions,
305 Results,
306 Successors,
307 Type,
308 Params,
309 Struct
310 };
311
312 /// Get the directive kind.
313 Kind getKind() const { return kind; }
314
315protected:
316 /// Create a directive element with a kind.
317 DirectiveElement(Kind kind) : kind(kind) {}
318
319private:
320 /// The directive kind.
321 Kind kind;
322};
323
324/// Base class for directive elements. This class implements common methods for
325/// LLVM-style RTTI.
326template <DirectiveElement::Kind DirectiveKind>
327class DirectiveElementBase : public DirectiveElement {
328public:
329 /// Create a directive element with the specified kind.
330 DirectiveElementBase() : DirectiveElement(DirectiveKind) {}
331
332 /// A format element is of this class if it is a directive element and has the
333 /// same kind.
334 static bool classof(const FormatElement *el) {
335 if (auto *directiveEl = dyn_cast<DirectiveElement>(Val: el))
336 return DirectiveKind == directiveEl->getKind();
337 return false;
338 }
339};
340
341/// Base class for a directive that contains references to elements of type `T`
342/// in a vector.
343template <DirectiveElement::Kind DirectiveKind, typename T>
344class VectorDirectiveBase : public DirectiveElementBase<DirectiveKind> {
345public:
346 using Base = VectorDirectiveBase<DirectiveKind, T>;
347
348 VectorDirectiveBase(std::vector<T> &&elems) : elems(std::move(elems)) {}
349
350 /// Get the elements contained in this directive.
351 ArrayRef<T> getElements() const { return elems; }
352
353 /// Get the number of elements.
354 unsigned getNumElements() const { return elems.size(); }
355
356 /// Take all of the elements from this directive.
357 std::vector<T> takeElements() { return std::move(elems); }
358
359protected:
360 /// The elements captured by this directive.
361 std::vector<T> elems;
362};
363
364/// This class represents a custom format directive that is implemented by the
365/// user in C++. The directive accepts a list of arguments that is passed to the
366/// C++ function.
367class CustomDirective
368 : public VectorDirectiveBase<DirectiveElement::Custom, FormatElement *> {
369public:
370 using Base::Base;
371 /// Create a custom directive with a name and list of arguments.
372 CustomDirective(StringRef name, std::vector<FormatElement *> &&arguments)
373 : Base(std::move(arguments)), name(name) {}
374
375 /// Get the custom directive name.
376 StringRef getName() const { return name; }
377
378 template <typename T>
379 FailureOr<T *> getFrontAs() const {
380 if (getNumElements() != 1)
381 return failure();
382 if (T *elem = dyn_cast<T>(getElements()[0]))
383 return elem;
384 return failure();
385 }
386
387private:
388 /// The name of the custom directive. The name is used to call two C++
389 /// methods: `parse{name}` and `print{name}` with the given arguments.
390 StringRef name;
391};
392
393/// This class represents a reference directive. This directive can be used to
394/// reference but not bind a previously bound variable or format object. Its
395/// current only use is to pass variables as arguments to the custom directive.
396class RefDirective : public DirectiveElementBase<DirectiveElement::Ref> {
397public:
398 /// Create a reference directive with the single referenced child.
399 RefDirective(FormatElement *arg) : arg(arg) {}
400
401 /// Get the reference argument.
402 FormatElement *getArg() const { return arg; }
403
404private:
405 /// The referenced argument.
406 FormatElement *arg;
407};
408
409/// This class represents a group of elements that are optionally emitted based
410/// on an optional variable "anchor" and a group of elements that are emitted
411/// when the anchor element is not present.
412class OptionalElement : public FormatElementBase<FormatElement::Optional> {
413public:
414 /// Create an optional group with the given child elements.
415 OptionalElement(std::vector<FormatElement *> &&thenElements,
416 std::vector<FormatElement *> &&elseElements,
417 unsigned thenParseStart, unsigned elseParseStart,
418 FormatElement *anchor, bool inverted)
419 : thenElements(std::move(thenElements)),
420 elseElements(std::move(elseElements)), thenParseStart(thenParseStart),
421 elseParseStart(elseParseStart), anchor(anchor), inverted(inverted) {}
422
423 /// Return the `then` elements of the optional group. Drops the first
424 /// `thenParseStart` whitespace elements if `parseable` is true.
425 ArrayRef<FormatElement *> getThenElements(bool parseable = false) const {
426 return llvm::ArrayRef(thenElements)
427 .drop_front(N: parseable ? thenParseStart : 0);
428 }
429
430 /// Return the `else` elements of the optional group. Drops the first
431 /// `elseParseStart` whitespace elements if `parseable` is true.
432 ArrayRef<FormatElement *> getElseElements(bool parseable = false) const {
433 return llvm::ArrayRef(elseElements)
434 .drop_front(N: parseable ? elseParseStart : 0);
435 }
436
437 /// Return the anchor of the optional group.
438 FormatElement *getAnchor() const { return anchor; }
439
440 /// Return true if the optional group is inverted.
441 bool isInverted() const { return inverted; }
442
443private:
444 /// The child elements emitted when the anchor is present.
445 std::vector<FormatElement *> thenElements;
446 /// The child elements emitted when the anchor is not present.
447 std::vector<FormatElement *> elseElements;
448 /// The index of the first element that is parsed in `thenElements`. That is,
449 /// the first non-whitespace element.
450 unsigned thenParseStart;
451 /// The index of the first element that is parsed in `elseElements`. That is,
452 /// the first non-whitespace element.
453 unsigned elseParseStart;
454 /// The anchor element of the optional group.
455 FormatElement *anchor;
456 /// Whether the optional group condition is inverted and the anchor element is
457 /// in the else group.
458 bool inverted;
459};
460
461//===----------------------------------------------------------------------===//
462// FormatParserBase
463//===----------------------------------------------------------------------===//
464
465/// Base class for a parser that implements an assembly format. This class
466/// defines a common assembly format syntax and the creation of format elements.
467/// Subclasses will need to implement parsing for the format elements they
468/// support.
469class FormatParser {
470public:
471 /// Vtable anchor.
472 virtual ~FormatParser();
473
474 /// Parse the assembly format.
475 FailureOr<std::vector<FormatElement *>> parse();
476
477protected:
478 /// The current context of the parser when parsing an element.
479 enum Context {
480 /// The element is being parsed in a "top-level" context, i.e. at the top of
481 /// the format or in an optional group.
482 TopLevelContext,
483 /// The element is being parsed as a custom directive child.
484 CustomDirectiveContext,
485 /// The element is being parsed as a type directive child.
486 TypeDirectiveContext,
487 /// The element is being parsed as a reference directive child.
488 RefDirectiveContext,
489 /// The element is being parsed as a struct directive child.
490 StructDirectiveContext
491 };
492
493 /// Create a format parser with the given source manager and a location.
494 explicit FormatParser(llvm::SourceMgr &mgr, llvm::SMLoc loc)
495 : lexer(mgr, loc), curToken(lexer.lexToken()) {}
496
497 /// Allocate and construct a format element.
498 template <typename FormatElementT, typename... Args>
499 FormatElementT *create(Args &&...args) {
500 // FormatElementT *ptr = allocator.Allocate<FormatElementT>();
501 // ::new (ptr) FormatElementT(std::forward<Args>(args)...);
502 // return ptr;
503 auto mem = std::make_unique<FormatElementT>(std::forward<Args>(args)...);
504 FormatElementT *ptr = mem.get();
505 allocator.push_back(std::move(mem));
506 return ptr;
507 }
508
509 //===--------------------------------------------------------------------===//
510 // Element Parsing
511
512 /// Parse a single element of any kind.
513 FailureOr<FormatElement *> parseElement(Context ctx);
514 /// Parse a literal.
515 FailureOr<FormatElement *> parseLiteral(Context ctx);
516 /// Parse a string.
517 FailureOr<FormatElement *> parseString(Context ctx);
518 /// Parse a variable.
519 FailureOr<FormatElement *> parseVariable(Context ctx);
520 /// Parse a directive.
521 FailureOr<FormatElement *> parseDirective(Context ctx);
522 /// Parse an optional group.
523 FailureOr<FormatElement *> parseOptionalGroup(Context ctx);
524 /// Parse a custom directive.
525 FailureOr<FormatElement *> parseCustomDirective(llvm::SMLoc loc, Context ctx);
526 /// Parse a ref directive.
527 FailureOr<FormatElement *> parseRefDirective(SMLoc loc, Context context);
528 /// Parse a qualified directive.
529 FailureOr<FormatElement *> parseQualifiedDirective(SMLoc loc, Context ctx);
530
531 /// Parse a format-specific variable kind.
532 virtual FailureOr<FormatElement *>
533 parseVariableImpl(llvm::SMLoc loc, StringRef name, Context ctx) = 0;
534 /// Parse a format-specific directive kind.
535 virtual FailureOr<FormatElement *>
536 parseDirectiveImpl(llvm::SMLoc loc, FormatToken::Kind kind, Context ctx) = 0;
537
538 //===--------------------------------------------------------------------===//
539 // Format Verification
540
541 /// Verify that the format is well-formed.
542 virtual LogicalResult verify(llvm::SMLoc loc,
543 ArrayRef<FormatElement *> elements) = 0;
544 /// Verify the arguments to a custom directive.
545 virtual LogicalResult
546 verifyCustomDirectiveArguments(llvm::SMLoc loc,
547 ArrayRef<FormatElement *> arguments) = 0;
548 /// Verify the elements of an optional group.
549 virtual LogicalResult
550 verifyOptionalGroupElements(llvm::SMLoc loc,
551 ArrayRef<FormatElement *> elements,
552 FormatElement *anchor) = 0;
553
554 /// Mark 'element' as qualified. If 'element' cannot be qualified an error
555 /// should be emitted and failure returned.
556 virtual LogicalResult markQualified(llvm::SMLoc loc,
557 FormatElement *element) = 0;
558
559 //===--------------------------------------------------------------------===//
560 // Lexer Utilities
561
562 /// Emit an error at the given location.
563 LogicalResult emitError(llvm::SMLoc loc, const Twine &msg) {
564 lexer.emitError(loc, msg);
565 return failure();
566 }
567
568 /// Emit an error and a note at the given notation.
569 LogicalResult emitErrorAndNote(llvm::SMLoc loc, const Twine &msg,
570 const Twine &note) {
571 lexer.emitErrorAndNote(loc, msg, note);
572 return failure();
573 }
574
575 /// Parse a single token of the expected kind.
576 FailureOr<FormatToken> parseToken(FormatToken::Kind kind, const Twine &msg) {
577 if (!curToken.is(kind))
578 return emitError(loc: curToken.getLoc(), msg);
579 FormatToken tok = curToken;
580 consumeToken();
581 return tok;
582 }
583
584 /// Advance the lexer to the next token.
585 void consumeToken() {
586 assert(!curToken.is(FormatToken::eof) && !curToken.is(FormatToken::error) &&
587 "shouldn't advance past EOF or errors");
588 curToken = lexer.lexToken();
589 }
590
591 /// Get the current token.
592 FormatToken peekToken() { return curToken; }
593
594private:
595 /// The format parser retains ownership of the format elements in a bump
596 /// pointer allocator.
597 // FIXME: FormatElement with `std::vector` need to be converted to use
598 // trailing objects.
599 // llvm::BumpPtrAllocator allocator;
600 std::vector<std::unique_ptr<FormatElement>> allocator;
601 /// The format lexer to use.
602 FormatLexer lexer;
603 /// The current token in the lexer.
604 FormatToken curToken;
605};
606
607//===----------------------------------------------------------------------===//
608// Utility Functions
609//===----------------------------------------------------------------------===//
610
611/// Whether a space needs to be emitted before a literal. E.g., two keywords
612/// back-to-back require a space separator, but a keyword followed by '<' does
613/// not require a space.
614bool shouldEmitSpaceBefore(StringRef value, bool lastWasPunctuation);
615
616/// Returns true if the given string can be formatted as a keyword.
617bool canFormatStringAsKeyword(StringRef value,
618 function_ref<void(Twine)> emitError = nullptr);
619
620/// Returns true if the given string is valid format literal element.
621/// If `emitError` is provided, it is invoked with the reason for the failure.
622bool isValidLiteral(StringRef value,
623 function_ref<void(Twine)> emitError = nullptr);
624
625/// Whether a failure in parsing the assembly format should be a fatal error.
626extern llvm::cl::opt<bool> formatErrorIsFatal;
627
628} // namespace tblgen
629} // namespace mlir
630
631#endif // MLIR_TOOLS_MLIRTBLGEN_FORMATGEN_H_
632

Provided by KDAB

Privacy Policy
Update your C++ knowledge – Modern C++11/14/17 Training
Find out more

source code of mlir/tools/mlir-tblgen/FormatGen.h