1 | //===--- Main.cpp - Compile BNF grammar -----------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This is a tool to compile a BNF grammar, it is used by the build system to |
10 | // generate a necessary data bits to statically construct core pieces (Grammar, |
11 | // LRTable etc) of the LR parser. |
12 | // |
13 | //===----------------------------------------------------------------------===// |
14 | |
15 | #include "clang-pseudo/grammar/Grammar.h" |
16 | #include "llvm/ADT/StringExtras.h" |
17 | #include "llvm/Support/CommandLine.h" |
18 | #include "llvm/Support/FileSystem.h" |
19 | #include "llvm/Support/FormatVariadic.h" |
20 | #include "llvm/Support/MemoryBuffer.h" |
21 | #include "llvm/Support/ToolOutputFile.h" |
22 | #include <algorithm> |
23 | |
24 | using llvm::cl::desc; |
25 | using llvm::cl::init; |
26 | using llvm::cl::opt; |
27 | using llvm::cl::Required; |
28 | using llvm::cl::value_desc; |
29 | using llvm::cl::values; |
30 | |
31 | namespace { |
32 | enum EmitType { |
33 | EmitSymbolList, |
34 | EmitGrammarContent, |
35 | }; |
36 | |
37 | opt<std::string> Grammar("grammar" , desc("Parse a BNF grammar file." ), |
38 | Required); |
39 | opt<EmitType> |
40 | Emit(desc("which information to emit:" ), |
41 | values(clEnumValN(EmitSymbolList, "emit-symbol-list" , |
42 | "Print nonterminal symbols (default)" ), |
43 | clEnumValN(EmitGrammarContent, "emit-grammar-content" , |
44 | "Print the BNF grammar content as a string" ))); |
45 | |
46 | opt<std::string> OutputFilename("o" , init(Val: "-" ), desc("Output" ), |
47 | value_desc("file" )); |
48 | |
49 | std::string readOrDie(llvm::StringRef Path) { |
50 | llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text = |
51 | llvm::MemoryBuffer::getFile(Filename: Path); |
52 | if (std::error_code EC = Text.getError()) { |
53 | llvm::errs() << "Error: can't read grammar file '" << Path |
54 | << "': " << EC.message() << "\n" ; |
55 | ::exit(status: 1); |
56 | } |
57 | return Text.get()->getBuffer().str(); |
58 | } |
59 | } // namespace |
60 | |
61 | namespace clang { |
62 | namespace pseudo { |
63 | namespace { |
64 | |
65 | // Mangles a symbol name into a valid identifier. |
66 | // |
67 | // These follow names in the grammar fairly closely: |
68 | // nonterminal: `ptr-declarator` becomes `ptr_declarator`; |
69 | // punctuator: `,` becomes `COMMA`; |
70 | // keyword: `INT` becomes `INT`; |
71 | // terminal: `IDENTIFIER` becomes `IDENTIFIER`; |
72 | std::string mangleSymbol(SymbolID SID, const Grammar &G) { |
73 | static auto &TokNames = *new std::vector<std::string>{ |
74 | #define TOK(X) llvm::StringRef(#X).upper(), |
75 | #define KEYWORD(Keyword, Condition) llvm::StringRef(#Keyword).upper(), |
76 | #include "clang/Basic/TokenKinds.def" |
77 | }; |
78 | if (isToken(ID: SID)) |
79 | return TokNames[symbolToToken(SID)]; |
80 | std::string Name = G.symbolName(SID).str(); |
81 | // translation-unit -> translation_unit |
82 | std::replace(Name.begin(), Name.end(), '-', '_'); |
83 | return Name; |
84 | } |
85 | |
86 | // Mangles the RHS of a rule definition into a valid identifier. |
87 | // |
88 | // These are unique only for a fixed LHS. |
89 | // e.g. for the grammar rule `ptr-declarator := ptr-operator ptr-declarator`, |
90 | // it is `ptr_operator__ptr_declarator`. |
91 | std::string mangleRule(RuleID RID, const Grammar &G) { |
92 | const auto &R = G.lookupRule(RID); |
93 | std::string MangleName = mangleSymbol(R.seq().front(), G); |
94 | for (SymbolID S : R.seq().drop_front()) { |
95 | MangleName.append("__" ); |
96 | MangleName.append(mangleSymbol(S, G)); |
97 | } |
98 | return MangleName; |
99 | } |
100 | |
101 | } // namespace |
102 | } // namespace pseudo |
103 | } // namespace clang |
104 | |
105 | int main(int argc, char *argv[]) { |
106 | llvm::cl::ParseCommandLineOptions(argc, argv, Overview: "" ); |
107 | |
108 | std::string GrammarText = readOrDie(Path: Grammar); |
109 | std::vector<std::string> Diags; |
110 | auto G = clang::pseudo::Grammar::parseBNF(BNF: GrammarText, Diags); |
111 | |
112 | if (!Diags.empty()) { |
113 | llvm::errs() << llvm::join(R&: Diags, Separator: "\n" ); |
114 | return 1; |
115 | } |
116 | |
117 | std::error_code EC; |
118 | llvm::ToolOutputFile Out{OutputFilename, EC, llvm::sys::fs::OF_None}; |
119 | if (EC) { |
120 | llvm::errs() << EC.message() << '\n'; |
121 | return 1; |
122 | } |
123 | |
124 | switch (Emit) { |
125 | case EmitSymbolList: |
126 | Out.os() << R"cpp( |
127 | #ifndef NONTERMINAL |
128 | #define NONTERMINAL(NAME, ID) |
129 | #endif |
130 | #ifndef RULE |
131 | #define RULE(LHS, RHS, ID) |
132 | #endif |
133 | #ifndef EXTENSION |
134 | #define EXTENSION(NAME, ID) |
135 | #endif |
136 | )cpp" ; |
137 | for (clang::pseudo::SymbolID ID = 0; ID < G.table().Nonterminals.size(); |
138 | ++ID) { |
139 | Out.os() << llvm::formatv(Fmt: "NONTERMINAL({0}, {1})\n" , |
140 | Vals: clang::pseudo::mangleSymbol(SID: ID, G), Vals&: ID); |
141 | for (const clang::pseudo::Rule &R : G.rulesFor(SID: ID)) { |
142 | clang::pseudo::RuleID RID = &R - G.table().Rules.data(); |
143 | Out.os() << llvm::formatv(Fmt: "RULE({0}, {1}, {2})\n" , |
144 | Vals: clang::pseudo::mangleSymbol(SID: R.Target, G), |
145 | Vals: clang::pseudo::mangleRule(RID, G), Vals&: RID); |
146 | } |
147 | } |
148 | for (clang::pseudo::ExtensionID EID = 1 /*skip the sentinel 0 value*/; |
149 | EID < G.table().AttributeValues.size(); ++EID) { |
150 | llvm::StringRef Name = G.table().AttributeValues[EID]; |
151 | assert(!Name.empty()); |
152 | Out.os() << llvm::formatv(Fmt: "EXTENSION({0}, {1})\n" , Vals&: Name, Vals&: EID); |
153 | } |
154 | Out.os() << R"cpp( |
155 | #undef NONTERMINAL |
156 | #undef RULE |
157 | #undef EXTENSION |
158 | )cpp" ; |
159 | break; |
160 | case EmitGrammarContent: |
161 | for (llvm::StringRef Line : llvm::split(Str: GrammarText, Separator: '\n')) { |
162 | Out.os() << '"'; |
163 | Out.os().write_escaped(Str: (Line + "\n" ).str()); |
164 | Out.os() << "\"\n" ; |
165 | } |
166 | break; |
167 | } |
168 | |
169 | Out.keep(); |
170 | |
171 | return 0; |
172 | } |
173 | |