1//===--- Main.cpp - Compile BNF grammar -----------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This is a tool to compile a BNF grammar, it is used by the build system to
10// generate a necessary data bits to statically construct core pieces (Grammar,
11// LRTable etc) of the LR parser.
12//
13//===----------------------------------------------------------------------===//
14
15#include "clang-pseudo/grammar/Grammar.h"
16#include "llvm/ADT/StringExtras.h"
17#include "llvm/Support/CommandLine.h"
18#include "llvm/Support/FileSystem.h"
19#include "llvm/Support/FormatVariadic.h"
20#include "llvm/Support/MemoryBuffer.h"
21#include "llvm/Support/ToolOutputFile.h"
22#include <algorithm>
23
24using llvm::cl::desc;
25using llvm::cl::init;
26using llvm::cl::opt;
27using llvm::cl::Required;
28using llvm::cl::value_desc;
29using llvm::cl::values;
30
31namespace {
32enum EmitType {
33 EmitSymbolList,
34 EmitGrammarContent,
35};
36
37opt<std::string> Grammar("grammar", desc("Parse a BNF grammar file."),
38 Required);
39opt<EmitType>
40 Emit(desc("which information to emit:"),
41 values(clEnumValN(EmitSymbolList, "emit-symbol-list",
42 "Print nonterminal symbols (default)"),
43 clEnumValN(EmitGrammarContent, "emit-grammar-content",
44 "Print the BNF grammar content as a string")));
45
46opt<std::string> OutputFilename("o", init(Val: "-"), desc("Output"),
47 value_desc("file"));
48
49std::string readOrDie(llvm::StringRef Path) {
50 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text =
51 llvm::MemoryBuffer::getFile(Filename: Path);
52 if (std::error_code EC = Text.getError()) {
53 llvm::errs() << "Error: can't read grammar file '" << Path
54 << "': " << EC.message() << "\n";
55 ::exit(status: 1);
56 }
57 return Text.get()->getBuffer().str();
58}
59} // namespace
60
61namespace clang {
62namespace pseudo {
63namespace {
64
65// Mangles a symbol name into a valid identifier.
66//
67// These follow names in the grammar fairly closely:
68// nonterminal: `ptr-declarator` becomes `ptr_declarator`;
69// punctuator: `,` becomes `COMMA`;
70// keyword: `INT` becomes `INT`;
71// terminal: `IDENTIFIER` becomes `IDENTIFIER`;
72std::string mangleSymbol(SymbolID SID, const Grammar &G) {
73 static auto &TokNames = *new std::vector<std::string>{
74#define TOK(X) llvm::StringRef(#X).upper(),
75#define KEYWORD(Keyword, Condition) llvm::StringRef(#Keyword).upper(),
76#include "clang/Basic/TokenKinds.def"
77 };
78 if (isToken(ID: SID))
79 return TokNames[symbolToToken(SID)];
80 std::string Name = G.symbolName(SID).str();
81 // translation-unit -> translation_unit
82 std::replace(Name.begin(), Name.end(), '-', '_');
83 return Name;
84}
85
86// Mangles the RHS of a rule definition into a valid identifier.
87//
88// These are unique only for a fixed LHS.
89// e.g. for the grammar rule `ptr-declarator := ptr-operator ptr-declarator`,
90// it is `ptr_operator__ptr_declarator`.
91std::string mangleRule(RuleID RID, const Grammar &G) {
92 const auto &R = G.lookupRule(RID);
93 std::string MangleName = mangleSymbol(R.seq().front(), G);
94 for (SymbolID S : R.seq().drop_front()) {
95 MangleName.append("__");
96 MangleName.append(mangleSymbol(S, G));
97 }
98 return MangleName;
99}
100
101} // namespace
102} // namespace pseudo
103} // namespace clang
104
105int main(int argc, char *argv[]) {
106 llvm::cl::ParseCommandLineOptions(argc, argv, Overview: "");
107
108 std::string GrammarText = readOrDie(Path: Grammar);
109 std::vector<std::string> Diags;
110 auto G = clang::pseudo::Grammar::parseBNF(BNF: GrammarText, Diags);
111
112 if (!Diags.empty()) {
113 llvm::errs() << llvm::join(R&: Diags, Separator: "\n");
114 return 1;
115 }
116
117 std::error_code EC;
118 llvm::ToolOutputFile Out{OutputFilename, EC, llvm::sys::fs::OF_None};
119 if (EC) {
120 llvm::errs() << EC.message() << '\n';
121 return 1;
122 }
123
124 switch (Emit) {
125 case EmitSymbolList:
126 Out.os() << R"cpp(
127#ifndef NONTERMINAL
128#define NONTERMINAL(NAME, ID)
129#endif
130#ifndef RULE
131#define RULE(LHS, RHS, ID)
132#endif
133#ifndef EXTENSION
134#define EXTENSION(NAME, ID)
135#endif
136)cpp";
137 for (clang::pseudo::SymbolID ID = 0; ID < G.table().Nonterminals.size();
138 ++ID) {
139 Out.os() << llvm::formatv(Fmt: "NONTERMINAL({0}, {1})\n",
140 Vals: clang::pseudo::mangleSymbol(SID: ID, G), Vals&: ID);
141 for (const clang::pseudo::Rule &R : G.rulesFor(SID: ID)) {
142 clang::pseudo::RuleID RID = &R - G.table().Rules.data();
143 Out.os() << llvm::formatv(Fmt: "RULE({0}, {1}, {2})\n",
144 Vals: clang::pseudo::mangleSymbol(SID: R.Target, G),
145 Vals: clang::pseudo::mangleRule(RID, G), Vals&: RID);
146 }
147 }
148 for (clang::pseudo::ExtensionID EID = 1 /*skip the sentinel 0 value*/;
149 EID < G.table().AttributeValues.size(); ++EID) {
150 llvm::StringRef Name = G.table().AttributeValues[EID];
151 assert(!Name.empty());
152 Out.os() << llvm::formatv(Fmt: "EXTENSION({0}, {1})\n", Vals&: Name, Vals&: EID);
153 }
154 Out.os() << R"cpp(
155#undef NONTERMINAL
156#undef RULE
157#undef EXTENSION
158)cpp";
159 break;
160 case EmitGrammarContent:
161 for (llvm::StringRef Line : llvm::split(Str: GrammarText, Separator: '\n')) {
162 Out.os() << '"';
163 Out.os().write_escaped(Str: (Line + "\n").str());
164 Out.os() << "\"\n";
165 }
166 break;
167 }
168
169 Out.keep();
170
171 return 0;
172}
173

source code of clang-tools-extra/pseudo/gen/Main.cpp