1//===-- lib/Parser/preprocessor.cpp ---------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "flang/Parser/preprocessor.h"
10
11#include "prescan.h"
12#include "flang/Common/idioms.h"
13#include "flang/Parser/characters.h"
14#include "flang/Parser/message.h"
15#include "llvm/Support/raw_ostream.h"
16#include <algorithm>
17#include <cinttypes>
18#include <cstddef>
19#include <ctime>
20#include <map>
21#include <memory>
22#include <optional>
23#include <set>
24#include <utility>
25#include <vector>
26
27namespace Fortran::parser {
28
29Definition::Definition(
30 const TokenSequence &repl, std::size_t firstToken, std::size_t tokens)
31 : replacement_{Tokenize({}, repl, firstToken, tokens)} {}
32
33Definition::Definition(const std::vector<std::string> &argNames,
34 const TokenSequence &repl, std::size_t firstToken, std::size_t tokens,
35 bool isVariadic)
36 : isFunctionLike_{true}, isVariadic_{isVariadic}, argNames_{argNames},
37 replacement_{Tokenize(argNames, repl, firstToken, tokens)} {}
38
39Definition::Definition(const std::string &predefined, AllSources &sources)
40 : isPredefined_{true},
41 replacement_{
42 predefined, sources.AddCompilerInsertion(predefined).start()} {}
43
44bool Definition::set_isDisabled(bool disable) {
45 bool was{isDisabled_};
46 isDisabled_ = disable;
47 return was;
48}
49
50void Definition::Print(llvm::raw_ostream &out, const char *macroName) const {
51 if (!isFunctionLike_) {
52 // If it's not a function-like macro, then just print the replacement.
53 out << ' ' << replacement_.ToString();
54 return;
55 }
56
57 size_t argCount{argumentCount()};
58
59 out << '(';
60 for (size_t i{0}; i != argCount; ++i) {
61 if (i != 0) {
62 out << ", ";
63 }
64 out << argNames_[i];
65 }
66 if (isVariadic_) {
67 out << ", ...";
68 }
69 out << ") ";
70
71 for (size_t i{0}, e{replacement_.SizeInTokens()}; i != e; ++i) {
72 std::string tok{replacement_.TokenAt(i).ToString()};
73 if (size_t idx{GetArgumentIndex(tok)}; idx < argCount) {
74 out << argNames_[idx];
75 } else {
76 out << tok;
77 }
78 }
79}
80
81static bool IsLegalIdentifierStart(const CharBlock &cpl) {
82 return cpl.size() > 0 && IsLegalIdentifierStart(cpl[0]);
83}
84
85TokenSequence Definition::Tokenize(const std::vector<std::string> &argNames,
86 const TokenSequence &token, std::size_t firstToken, std::size_t tokens) {
87 std::map<std::string, std::string> args;
88 char argIndex{'A'};
89 for (const std::string &arg : argNames) {
90 CHECK(args.find(arg) == args.end());
91 args[arg] = "~"s + argIndex++;
92 }
93 TokenSequence result;
94 for (std::size_t j{0}; j < tokens; ++j) {
95 CharBlock tok{token.TokenAt(firstToken + j)};
96 if (IsLegalIdentifierStart(tok)) {
97 auto it{args.find(tok.ToString())};
98 if (it != args.end()) {
99 result.Put(it->second, token.GetTokenProvenance(j));
100 continue;
101 }
102 }
103 result.Put(token, firstToken + j, 1);
104 }
105 return result;
106}
107
108std::size_t Definition::GetArgumentIndex(const CharBlock &token) const {
109 if (token.size() >= 2 && token[0] == '~') {
110 return static_cast<size_t>(token[1] - 'A');
111 }
112 return argumentCount();
113}
114
115static TokenSequence Stringify(
116 const TokenSequence &tokens, AllSources &allSources) {
117 TokenSequence result;
118 Provenance quoteProvenance{allSources.CompilerInsertionProvenance('"')};
119 result.PutNextTokenChar('"', quoteProvenance);
120 for (std::size_t j{0}; j < tokens.SizeInTokens(); ++j) {
121 const CharBlock &token{tokens.TokenAt(j)};
122 std::size_t bytes{token.size()};
123 for (std::size_t k{0}; k < bytes; ++k) {
124 char ch{token[k]};
125 Provenance from{tokens.GetTokenProvenance(j, k)};
126 if (ch == '"' || ch == '\\') {
127 result.PutNextTokenChar(ch, from);
128 }
129 result.PutNextTokenChar(ch, from);
130 }
131 }
132 result.PutNextTokenChar('"', quoteProvenance);
133 result.CloseToken();
134 return result;
135}
136
137constexpr bool IsTokenPasting(CharBlock opr) {
138 return opr.size() == 2 && opr[0] == '#' && opr[1] == '#';
139}
140
141static bool AnyTokenPasting(const TokenSequence &text) {
142 std::size_t tokens{text.SizeInTokens()};
143 for (std::size_t j{0}; j < tokens; ++j) {
144 if (IsTokenPasting(text.TokenAt(j))) {
145 return true;
146 }
147 }
148 return false;
149}
150
151static TokenSequence TokenPasting(TokenSequence &&text) {
152 if (!AnyTokenPasting(text)) {
153 return std::move(text);
154 }
155 TokenSequence result;
156 std::size_t tokens{text.SizeInTokens()};
157 bool pasting{false};
158 for (std::size_t j{0}; j < tokens; ++j) {
159 if (IsTokenPasting(text.TokenAt(j))) {
160 if (!pasting) {
161 while (!result.empty() &&
162 result.TokenAt(result.SizeInTokens() - 1).IsBlank()) {
163 result.pop_back();
164 }
165 if (!result.empty()) {
166 result.ReopenLastToken();
167 pasting = true;
168 }
169 }
170 } else if (pasting && text.TokenAt(j).IsBlank()) {
171 } else {
172 result.Put(text, j, 1);
173 pasting = false;
174 }
175 }
176 return result;
177}
178
179TokenSequence Definition::Apply(
180 const std::vector<TokenSequence> &args, Prescanner &prescanner) {
181 TokenSequence result;
182 bool skipping{false};
183 int parenthesesNesting{0};
184 std::size_t tokens{replacement_.SizeInTokens()};
185 for (std::size_t j{0}; j < tokens; ++j) {
186 CharBlock token{replacement_.TokenAt(j)};
187 std::size_t bytes{token.size()};
188 if (skipping) {
189 char ch{token.OnlyNonBlank()};
190 if (ch == '(') {
191 ++parenthesesNesting;
192 } else if (ch == ')') {
193 if (parenthesesNesting > 0) {
194 --parenthesesNesting;
195 }
196 skipping = parenthesesNesting > 0;
197 }
198 continue;
199 }
200 if (bytes == 2 && token[0] == '~') { // argument substitution
201 std::size_t index{GetArgumentIndex(token)};
202 if (index >= args.size()) {
203 continue;
204 }
205 std::size_t prev{j};
206 while (prev > 0 && replacement_.TokenAt(prev - 1).IsBlank()) {
207 --prev;
208 }
209 if (prev > 0 && replacement_.TokenAt(prev - 1).size() == 1 &&
210 replacement_.TokenAt(prev - 1)[0] ==
211 '#') { // stringify argument without macro replacement
212 std::size_t resultSize{result.SizeInTokens()};
213 while (resultSize > 0 && result.TokenAt(resultSize - 1).IsBlank()) {
214 result.pop_back();
215 --resultSize;
216 }
217 CHECK(resultSize > 0 &&
218 result.TokenAt(resultSize - 1) == replacement_.TokenAt(prev - 1));
219 result.pop_back();
220 result.Put(Stringify(args[index], prescanner.allSources()));
221 } else {
222 const TokenSequence *arg{&args[index]};
223 std::optional<TokenSequence> replaced;
224 // Don't replace macros in the actual argument if it is preceded or
225 // followed by the token-pasting operator ## in the replacement text.
226 if (prev == 0 || !IsTokenPasting(replacement_.TokenAt(prev - 1))) {
227 auto next{replacement_.SkipBlanks(j + 1)};
228 if (next >= tokens || !IsTokenPasting(replacement_.TokenAt(next))) {
229 // Apply macro replacement to the actual argument
230 replaced =
231 prescanner.preprocessor().MacroReplacement(*arg, prescanner);
232 if (replaced) {
233 arg = &*replaced;
234 }
235 }
236 }
237 result.Put(DEREF(arg));
238 }
239 } else if (bytes == 11 && isVariadic_ &&
240 token.ToString() == "__VA_ARGS__") {
241 Provenance commaProvenance{
242 prescanner.preprocessor().allSources().CompilerInsertionProvenance(
243 ',')};
244 for (std::size_t k{argumentCount()}; k < args.size(); ++k) {
245 if (k > argumentCount()) {
246 result.Put(","s, commaProvenance);
247 }
248 result.Put(args[k]);
249 }
250 } else if (bytes == 10 && isVariadic_ && token.ToString() == "__VA_OPT__" &&
251 j + 2 < tokens && replacement_.TokenAt(j + 1).OnlyNonBlank() == '(' &&
252 parenthesesNesting == 0) {
253 parenthesesNesting = 1;
254 skipping = args.size() == argumentCount();
255 ++j;
256 } else {
257 if (parenthesesNesting > 0) {
258 char ch{token.OnlyNonBlank()};
259 if (ch == '(') {
260 ++parenthesesNesting;
261 } else if (ch == ')') {
262 if (--parenthesesNesting == 0) {
263 skipping = false;
264 continue;
265 }
266 }
267 }
268 result.Put(replacement_, j);
269 }
270 }
271 return TokenPasting(std::move(result));
272}
273
274static std::string FormatTime(const std::time_t &now, const char *format) {
275 char buffer[16];
276 return {buffer,
277 std::strftime(s: buffer, maxsize: sizeof buffer, format: format, tp: std::localtime(timer: &now))};
278}
279
280Preprocessor::Preprocessor(AllSources &allSources) : allSources_{allSources} {}
281
282void Preprocessor::DefineStandardMacros() {
283 // Capture current local date & time once now to avoid having the values
284 // of __DATE__ or __TIME__ change during compilation.
285 std::time_t now;
286 std::time(&now);
287 Define("__DATE__"s, FormatTime(now, "\"%h %e %Y\"")); // e.g., "Jun 16 1904"
288 Define("__TIME__"s, FormatTime(now, "\"%T\"")); // e.g., "23:59:60"
289 // The values of these predefined macros depend on their invocation sites.
290 Define("__FILE__"s, "__FILE__"s);
291 Define("__LINE__"s, "__LINE__"s);
292}
293
294void Preprocessor::Define(const std::string &macro, const std::string &value) {
295 definitions_.emplace(SaveTokenAsName(macro), Definition{value, allSources_});
296}
297
298void Preprocessor::Undefine(std::string macro) { definitions_.erase(macro); }
299
300std::optional<TokenSequence> Preprocessor::MacroReplacement(
301 const TokenSequence &input, Prescanner &prescanner,
302 std::optional<std::size_t> *partialFunctionLikeMacro) {
303 // Do quick scan for any use of a defined name.
304 if (definitions_.empty()) {
305 return std::nullopt;
306 }
307 std::size_t tokens{input.SizeInTokens()};
308 std::size_t j{0};
309 for (; j < tokens; ++j) {
310 CharBlock token{input.TokenAt(j)};
311 if (!token.empty() && IsLegalIdentifierStart(token[0]) &&
312 IsNameDefined(token)) {
313 break;
314 }
315 }
316 if (j == tokens) {
317 return std::nullopt; // input contains nothing that would be replaced
318 }
319 TokenSequence result{input, 0, j};
320
321 // After rescanning after macro replacement has failed due to an unclosed
322 // function-like macro call (no left parenthesis yet, or no closing
323 // parenthesis), if tokens remain in the input, append them to the
324 // replacement text and attempt to proceed. Otherwise, return, so that
325 // the caller may try again with remaining tokens in its input.
326 auto CompleteFunctionLikeMacro{
327 [this, &input, &prescanner, &result, &partialFunctionLikeMacro](
328 std::size_t after, const TokenSequence &replacement,
329 std::size_t pFLMOffset) {
330 if (after < input.SizeInTokens()) {
331 result.Put(replacement, 0, pFLMOffset);
332 TokenSequence suffix;
333 suffix.Put(
334 replacement, pFLMOffset, replacement.SizeInTokens() - pFLMOffset);
335 suffix.Put(input, after, input.SizeInTokens() - after);
336 auto further{
337 ReplaceMacros(suffix, prescanner, partialFunctionLikeMacro)};
338 if (partialFunctionLikeMacro && *partialFunctionLikeMacro) {
339 // still not closed
340 **partialFunctionLikeMacro += result.SizeInTokens();
341 }
342 result.Put(further);
343 return true;
344 } else {
345 if (partialFunctionLikeMacro) {
346 *partialFunctionLikeMacro = pFLMOffset + result.SizeInTokens();
347 }
348 return false;
349 }
350 }};
351
352 for (; j < tokens; ++j) {
353 CharBlock token{input.TokenAt(j)};
354 if (token.IsBlank() || !IsLegalIdentifierStart(token[0])) {
355 result.Put(input, j);
356 continue;
357 }
358 auto it{definitions_.find(token)};
359 if (it == definitions_.end()) {
360 result.Put(input, j);
361 continue;
362 }
363 Definition *def{&it->second};
364 if (def->isDisabled()) {
365 result.Put(input, j);
366 continue;
367 }
368 if (!def->isFunctionLike()) {
369 if (def->isPredefined() && !def->replacement().empty()) {
370 std::string repl;
371 std::string name{def->replacement().TokenAt(0).ToString()};
372 if (name == "__FILE__") {
373 repl = "\""s +
374 allSources_.GetPath(prescanner.GetCurrentProvenance()) + '"';
375 } else if (name == "__LINE__") {
376 std::string buf;
377 llvm::raw_string_ostream ss{buf};
378 ss << allSources_.GetLineNumber(prescanner.GetCurrentProvenance());
379 repl = ss.str();
380 }
381 if (!repl.empty()) {
382 ProvenanceRange insert{allSources_.AddCompilerInsertion(repl)};
383 ProvenanceRange call{allSources_.AddMacroCall(
384 insert, input.GetTokenProvenanceRange(j), repl)};
385 result.Put(repl, call.start());
386 continue;
387 }
388 }
389 std::optional<std::size_t> partialFLM;
390 def->set_isDisabled(true);
391 TokenSequence replaced{TokenPasting(
392 ReplaceMacros(def->replacement(), prescanner, &partialFLM))};
393 def->set_isDisabled(false);
394 if (partialFLM &&
395 CompleteFunctionLikeMacro(j + 1, replaced, *partialFLM)) {
396 return result;
397 }
398 if (!replaced.empty()) {
399 ProvenanceRange from{def->replacement().GetProvenanceRange()};
400 ProvenanceRange use{input.GetTokenProvenanceRange(j)};
401 ProvenanceRange newRange{
402 allSources_.AddMacroCall(from, use, replaced.ToString())};
403 result.Put(replaced, newRange);
404 }
405 } else {
406 // Possible function-like macro call. Skip spaces and newlines to see
407 // whether '(' is next.
408 std::size_t k{j};
409 bool leftParen{false};
410 while (++k < tokens) {
411 const CharBlock &lookAhead{input.TokenAt(k)};
412 if (!lookAhead.IsBlank() && lookAhead[0] != '\n') {
413 leftParen = lookAhead[0] == '(' && lookAhead.size() == 1;
414 break;
415 }
416 }
417 if (!leftParen) {
418 if (partialFunctionLikeMacro) {
419 *partialFunctionLikeMacro = result.SizeInTokens();
420 result.Put(input, j, tokens - j);
421 return result;
422 } else {
423 result.Put(input, j);
424 continue;
425 }
426 }
427 std::vector<std::size_t> argStart{++k};
428 for (int nesting{0}; k < tokens; ++k) {
429 CharBlock token{input.TokenAt(k)};
430 char ch{token.OnlyNonBlank()};
431 if (ch == '(') {
432 ++nesting;
433 } else if (ch == ')') {
434 if (nesting == 0) {
435 break;
436 }
437 --nesting;
438 } else if (ch == ',' && nesting == 0) {
439 argStart.push_back(k + 1);
440 }
441 }
442 if (argStart.size() == 1 && k == argStart[0] &&
443 def->argumentCount() == 0) {
444 // Subtle: () is zero arguments, not one empty argument,
445 // unless one argument was expected.
446 argStart.clear();
447 }
448 if (k >= tokens && partialFunctionLikeMacro) {
449 *partialFunctionLikeMacro = result.SizeInTokens();
450 result.Put(input, j, tokens - j);
451 return result;
452 } else if (k >= tokens || argStart.size() < def->argumentCount() ||
453 (argStart.size() > def->argumentCount() && !def->isVariadic())) {
454 result.Put(input, j);
455 continue;
456 }
457 std::vector<TokenSequence> args;
458 for (std::size_t n{0}; n < argStart.size(); ++n) {
459 std::size_t at{argStart[n]};
460 std::size_t count{
461 (n + 1 == argStart.size() ? k : argStart[n + 1] - 1) - at};
462 args.emplace_back(TokenSequence(input, at, count));
463 }
464 TokenSequence applied{def->Apply(args, prescanner)};
465 std::optional<std::size_t> partialFLM;
466 def->set_isDisabled(true);
467 TokenSequence replaced{
468 ReplaceMacros(std::move(applied), prescanner, &partialFLM)};
469 def->set_isDisabled(false);
470 if (partialFLM &&
471 CompleteFunctionLikeMacro(k + 1, replaced, *partialFLM)) {
472 return result;
473 }
474 if (!replaced.empty()) {
475 ProvenanceRange from{def->replacement().GetProvenanceRange()};
476 ProvenanceRange use{input.GetIntervalProvenanceRange(j, k - j)};
477 ProvenanceRange newRange{
478 allSources_.AddMacroCall(from, use, replaced.ToString())};
479 result.Put(replaced, newRange);
480 }
481 j = k; // advance to the terminal ')'
482 }
483 }
484 return result;
485}
486
487TokenSequence Preprocessor::ReplaceMacros(const TokenSequence &tokens,
488 Prescanner &prescanner,
489 std::optional<std::size_t> *partialFunctionLikeMacro) {
490 if (std::optional<TokenSequence> repl{
491 MacroReplacement(tokens, prescanner, partialFunctionLikeMacro)}) {
492 return std::move(*repl);
493 }
494 return tokens;
495}
496
497void Preprocessor::Directive(const TokenSequence &dir, Prescanner &prescanner) {
498 std::size_t tokens{dir.SizeInTokens()};
499 std::size_t j{dir.SkipBlanks(0)};
500 if (j == tokens) {
501 return;
502 }
503 if (dir.TokenAt(j).ToString() != "#") {
504 prescanner.Say(dir.GetTokenProvenanceRange(j), "missing '#'"_err_en_US);
505 return;
506 }
507 j = dir.SkipBlanks(j + 1);
508 while (tokens > 0 && dir.TokenAt(tokens - 1).IsBlank()) {
509 --tokens;
510 }
511 if (j == tokens) {
512 return;
513 }
514 if (IsDecimalDigit(dir.TokenAt(j)[0]) || dir.TokenAt(j)[0] == '"') {
515 LineDirective(dir, j, prescanner);
516 return;
517 }
518 std::size_t dirOffset{j};
519 std::string dirName{ToLowerCaseLetters(dir.TokenAt(dirOffset).ToString())};
520 j = dir.SkipBlanks(j + 1);
521 CharBlock nameToken;
522 if (j < tokens && IsLegalIdentifierStart(dir.TokenAt(j)[0])) {
523 nameToken = dir.TokenAt(j);
524 }
525 if (dirName == "line") {
526 LineDirective(dir, j, prescanner);
527 } else if (dirName == "define") {
528 if (nameToken.empty()) {
529 prescanner.Say(dir.GetTokenProvenanceRange(j < tokens ? j : tokens - 1),
530 "#define: missing or invalid name"_err_en_US);
531 return;
532 }
533 nameToken = SaveTokenAsName(nameToken);
534 definitions_.erase(nameToken);
535 if (++j < tokens && dir.TokenAt(j).OnlyNonBlank() == '(') {
536 j = dir.SkipBlanks(j + 1);
537 std::vector<std::string> argName;
538 bool isVariadic{false};
539 if (dir.TokenAt(j).OnlyNonBlank() != ')') {
540 while (true) {
541 std::string an{dir.TokenAt(j).ToString()};
542 if (an == "...") {
543 isVariadic = true;
544 } else {
545 if (an.empty() || !IsLegalIdentifierStart(an[0])) {
546 prescanner.Say(dir.GetTokenProvenanceRange(j),
547 "#define: missing or invalid argument name"_err_en_US);
548 return;
549 }
550 argName.push_back(an);
551 }
552 j = dir.SkipBlanks(j + 1);
553 if (j == tokens) {
554 prescanner.Say(dir.GetTokenProvenanceRange(tokens - 1),
555 "#define: malformed argument list"_err_en_US);
556 return;
557 }
558 char punc{dir.TokenAt(j).OnlyNonBlank()};
559 if (punc == ')') {
560 break;
561 }
562 if (isVariadic || punc != ',') {
563 prescanner.Say(dir.GetTokenProvenanceRange(j),
564 "#define: malformed argument list"_err_en_US);
565 return;
566 }
567 j = dir.SkipBlanks(j + 1);
568 if (j == tokens) {
569 prescanner.Say(dir.GetTokenProvenanceRange(tokens - 1),
570 "#define: malformed argument list"_err_en_US);
571 return;
572 }
573 }
574 if (std::set<std::string>(argName.begin(), argName.end()).size() !=
575 argName.size()) {
576 prescanner.Say(dir.GetTokenProvenance(dirOffset),
577 "#define: argument names are not distinct"_err_en_US);
578 return;
579 }
580 }
581 j = dir.SkipBlanks(j + 1);
582 definitions_.emplace(std::make_pair(
583 nameToken, Definition{argName, dir, j, tokens - j, isVariadic}));
584 } else {
585 j = dir.SkipBlanks(j + 1);
586 definitions_.emplace(
587 std::make_pair(nameToken, Definition{dir, j, tokens - j}));
588 }
589 } else if (dirName == "undef") {
590 if (nameToken.empty()) {
591 prescanner.Say(
592 dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset),
593 "# missing or invalid name"_err_en_US);
594 } else {
595 if (dir.IsAnythingLeft(++j)) {
596 prescanner.Say(dir.GetIntervalProvenanceRange(j, tokens - j),
597 "#undef: excess tokens at end of directive"_port_en_US);
598 } else {
599 definitions_.erase(nameToken);
600 }
601 }
602 } else if (dirName == "ifdef" || dirName == "ifndef") {
603 bool doThen{false};
604 if (nameToken.empty()) {
605 prescanner.Say(
606 dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset),
607 "#%s: missing name"_err_en_US, dirName);
608 } else {
609 if (dir.IsAnythingLeft(++j)) {
610 prescanner.Say(dir.GetIntervalProvenanceRange(j, tokens - j),
611 "#%s: excess tokens at end of directive"_port_en_US, dirName);
612 }
613 doThen = IsNameDefined(nameToken) == (dirName == "ifdef");
614 }
615 if (doThen) {
616 ifStack_.push(CanDeadElseAppear::Yes);
617 } else {
618 SkipDisabledConditionalCode(dirName, IsElseActive::Yes, prescanner,
619 dir.GetTokenProvenance(dirOffset));
620 }
621 } else if (dirName == "if") {
622 if (IsIfPredicateTrue(dir, j, tokens - j, prescanner)) {
623 ifStack_.push(CanDeadElseAppear::Yes);
624 } else {
625 SkipDisabledConditionalCode(dirName, IsElseActive::Yes, prescanner,
626 dir.GetTokenProvenanceRange(dirOffset));
627 }
628 } else if (dirName == "else") {
629 if (dir.IsAnythingLeft(j)) {
630 prescanner.Say(dir.GetIntervalProvenanceRange(j, tokens - j),
631 "#else: excess tokens at end of directive"_port_en_US);
632 } else if (ifStack_.empty()) {
633 prescanner.Say(dir.GetTokenProvenanceRange(dirOffset),
634 "#else: not nested within #if, #ifdef, or #ifndef"_err_en_US);
635 } else if (ifStack_.top() != CanDeadElseAppear::Yes) {
636 prescanner.Say(dir.GetTokenProvenanceRange(dirOffset),
637 "#else: already appeared within this #if, #ifdef, or #ifndef"_err_en_US);
638 } else {
639 ifStack_.pop();
640 SkipDisabledConditionalCode("else", IsElseActive::No, prescanner,
641 dir.GetTokenProvenanceRange(dirOffset));
642 }
643 } else if (dirName == "elif") {
644 if (ifStack_.empty()) {
645 prescanner.Say(dir.GetTokenProvenanceRange(dirOffset),
646 "#elif: not nested within #if, #ifdef, or #ifndef"_err_en_US);
647 } else if (ifStack_.top() != CanDeadElseAppear::Yes) {
648 prescanner.Say(dir.GetTokenProvenanceRange(dirOffset),
649 "#elif: #else previously appeared within this #if, #ifdef, or #ifndef"_err_en_US);
650 } else {
651 ifStack_.pop();
652 SkipDisabledConditionalCode("elif", IsElseActive::No, prescanner,
653 dir.GetTokenProvenanceRange(dirOffset));
654 }
655 } else if (dirName == "endif") {
656 if (dir.IsAnythingLeft(j)) {
657 prescanner.Say(dir.GetIntervalProvenanceRange(j, tokens - j),
658 "#endif: excess tokens at end of directive"_port_en_US);
659 } else if (ifStack_.empty()) {
660 prescanner.Say(dir.GetTokenProvenanceRange(dirOffset),
661 "#endif: no #if, #ifdef, or #ifndef"_err_en_US);
662 } else {
663 ifStack_.pop();
664 }
665 } else if (dirName == "error") {
666 prescanner.Say(
667 dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset),
668 "%s"_err_en_US, dir.ToString());
669 } else if (dirName == "warning") {
670 prescanner.Say(
671 dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset),
672 "%s"_warn_en_US, dir.ToString());
673 } else if (dirName == "comment" || dirName == "note") {
674 prescanner.Say(
675 dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset),
676 "%s"_en_US, dir.ToString());
677 } else if (dirName == "include") {
678 if (j == tokens) {
679 prescanner.Say(
680 dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset),
681 "#include: missing name of file to include"_err_en_US);
682 return;
683 }
684 std::optional<std::string> prependPath;
685 TokenSequence path{dir, j, tokens - j};
686 std::string include{path.TokenAt(0).ToString()};
687 if (include != "<" && include.substr(0, 1) != "\"" &&
688 include.substr(0, 1) != "'") {
689 path = ReplaceMacros(path, prescanner);
690 include = path.empty() ? ""s : path.TokenAt(0).ToString();
691 }
692 auto pathTokens{path.SizeInTokens()};
693 std::size_t k{0};
694 if (include == "<") { // #include <foo>
695 k = 1;
696 if (k >= pathTokens) {
697 prescanner.Say(dir.GetIntervalProvenanceRange(j, pathTokens),
698 "#include: file name missing"_err_en_US);
699 return;
700 }
701 while (k < pathTokens && path.TokenAt(k) != ">") {
702 ++k;
703 }
704 if (k >= pathTokens) {
705 prescanner.Say(dir.GetIntervalProvenanceRange(j, tokens - j),
706 "#include: expected '>' at end of included file"_port_en_US);
707 }
708 TokenSequence braced{path, 1, k - 1};
709 include = braced.ToString();
710 } else if ((include.substr(0, 1) == "\"" || include.substr(0, 1) == "'") &&
711 include.front() == include.back()) {
712 // #include "foo" and #include 'foo'
713 include = include.substr(1, include.size() - 2);
714 // Start search in directory of file containing the directive
715 auto prov{dir.GetTokenProvenanceRange(dirOffset).start()};
716 if (const auto *currentFile{allSources_.GetSourceFile(prov)}) {
717 prependPath = DirectoryName(currentFile->path());
718 }
719 } else {
720 prescanner.Say(dir.GetTokenProvenanceRange(j < tokens ? j : tokens - 1),
721 "#include %s: expected name of file to include"_err_en_US,
722 path.ToString());
723 return;
724 }
725 if (include.empty()) {
726 prescanner.Say(dir.GetTokenProvenanceRange(dirOffset),
727 "#include %s: empty include file name"_err_en_US, path.ToString());
728 return;
729 }
730 k = path.SkipBlanks(k + 1);
731 if (k < pathTokens && path.TokenAt(k).ToString() != "!") {
732 prescanner.Say(dir.GetIntervalProvenanceRange(j, tokens - j),
733 "#include: extra stuff ignored after file name"_port_en_US);
734 }
735 std::string buf;
736 llvm::raw_string_ostream error{buf};
737 const SourceFile *included{
738 allSources_.Open(include, error, std::move(prependPath))};
739 if (!included) {
740 prescanner.Say(dir.GetTokenProvenanceRange(j), "#include: %s"_err_en_US,
741 error.str());
742 } else if (included->bytes() > 0) {
743 ProvenanceRange fileRange{
744 allSources_.AddIncludedFile(*included, dir.GetProvenanceRange())};
745 Prescanner{prescanner}
746 .set_encoding(included->encoding())
747 .Prescan(fileRange);
748 }
749 } else {
750 prescanner.Say(dir.GetTokenProvenanceRange(dirOffset),
751 "#%s: unknown or unimplemented directive"_err_en_US, dirName);
752 }
753}
754
755void Preprocessor::PrintMacros(llvm::raw_ostream &out) const {
756 // std::set is ordered. Use that to print the macros in an
757 // alphabetical order.
758 std::set<std::string> macroNames;
759 for (const auto &[name, _] : definitions_) {
760 macroNames.insert(name.ToString());
761 }
762
763 for (const std::string &name : macroNames) {
764 out << "#define " << name;
765 definitions_.at(name).Print(out, name.c_str());
766 out << '\n';
767 }
768}
769
770CharBlock Preprocessor::SaveTokenAsName(const CharBlock &t) {
771 names_.push_back(t.ToString());
772 return {names_.back().data(), names_.back().size()};
773}
774
775bool Preprocessor::IsNameDefined(const CharBlock &token) {
776 return definitions_.find(token) != definitions_.end();
777}
778
779bool Preprocessor::IsFunctionLikeDefinition(const CharBlock &token) {
780 auto it{definitions_.find(token)};
781 return it != definitions_.end() && it->second.isFunctionLike();
782}
783
784static std::string GetDirectiveName(
785 const TokenSequence &line, std::size_t *rest) {
786 std::size_t tokens{line.SizeInTokens()};
787 std::size_t j{line.SkipBlanks(0)};
788 if (j == tokens || line.TokenAt(j).ToString() != "#") {
789 *rest = tokens;
790 return "";
791 }
792 j = line.SkipBlanks(j + 1);
793 if (j == tokens) {
794 *rest = tokens;
795 return "";
796 }
797 *rest = line.SkipBlanks(j + 1);
798 return ToLowerCaseLetters(line.TokenAt(j).ToString());
799}
800
801void Preprocessor::SkipDisabledConditionalCode(const std::string &dirName,
802 IsElseActive isElseActive, Prescanner &prescanner,
803 ProvenanceRange provenanceRange) {
804 int nesting{0};
805 while (!prescanner.IsAtEnd()) {
806 if (!prescanner.IsNextLinePreprocessorDirective()) {
807 prescanner.NextLine();
808 continue;
809 }
810 TokenSequence line{prescanner.TokenizePreprocessorDirective()};
811 std::size_t rest{0};
812 std::string dn{GetDirectiveName(line, &rest)};
813 if (dn == "ifdef" || dn == "ifndef" || dn == "if") {
814 ++nesting;
815 } else if (dn == "endif") {
816 if (nesting-- == 0) {
817 return;
818 }
819 } else if (isElseActive == IsElseActive::Yes && nesting == 0) {
820 if (dn == "else") {
821 ifStack_.push(CanDeadElseAppear::No);
822 return;
823 }
824 if (dn == "elif" &&
825 IsIfPredicateTrue(
826 line, rest, line.SizeInTokens() - rest, prescanner)) {
827 ifStack_.push(CanDeadElseAppear::Yes);
828 return;
829 }
830 }
831 }
832 prescanner.Say(provenanceRange, "#%s: missing #endif"_err_en_US, dirName);
833}
834
835// Precedence level codes used here to accommodate mixed Fortran and C:
836// 15: parentheses and constants, logical !, bitwise ~
837// 14: unary + and -
838// 13: **
839// 12: *, /, % (modulus)
840// 11: + and -
841// 10: << and >>
842// 9: bitwise &
843// 8: bitwise ^
844// 7: bitwise |
845// 6: relations (.EQ., ==, &c.)
846// 5: .NOT.
847// 4: .AND., &&
848// 3: .OR., ||
849// 2: .EQV. and .NEQV. / .XOR.
850// 1: ? :
851// 0: ,
852static std::int64_t ExpressionValue(const TokenSequence &token,
853 int minimumPrecedence, std::size_t *atToken,
854 std::optional<Message> *error) {
855 enum Operator {
856 PARENS,
857 CONST,
858 NOTZERO, // !
859 COMPLEMENT, // ~
860 UPLUS,
861 UMINUS,
862 POWER,
863 TIMES,
864 DIVIDE,
865 MODULUS,
866 ADD,
867 SUBTRACT,
868 LEFTSHIFT,
869 RIGHTSHIFT,
870 BITAND,
871 BITXOR,
872 BITOR,
873 LT,
874 LE,
875 EQ,
876 NE,
877 GE,
878 GT,
879 NOT,
880 AND,
881 OR,
882 EQV,
883 NEQV,
884 SELECT,
885 COMMA
886 };
887 static const int precedence[]{
888 15, 15, 15, 15, // (), 6, !, ~
889 14, 14, // unary +, -
890 13, 12, 12, 12, 11, 11, 10, 10, // **, *, /, %, +, -, <<, >>
891 9, 8, 7, // &, ^, |
892 6, 6, 6, 6, 6, 6, // relations .LT. to .GT.
893 5, 4, 3, 2, 2, // .NOT., .AND., .OR., .EQV., .NEQV.
894 1, 0 // ?: and ,
895 };
896 static const int operandPrecedence[]{0, -1, 15, 15, 15, 15, 13, 12, 12, 12,
897 11, 11, 11, 11, 9, 8, 7, 7, 7, 7, 7, 7, 7, 6, 4, 3, 3, 3, 1, 0};
898
899 static std::map<std::string, enum Operator> opNameMap;
900 if (opNameMap.empty()) {
901 opNameMap["("] = PARENS;
902 opNameMap["!"] = NOTZERO;
903 opNameMap["~"] = COMPLEMENT;
904 opNameMap["**"] = POWER;
905 opNameMap["*"] = TIMES;
906 opNameMap["/"] = DIVIDE;
907 opNameMap["%"] = MODULUS;
908 opNameMap["+"] = ADD;
909 opNameMap["-"] = SUBTRACT;
910 opNameMap["<<"] = LEFTSHIFT;
911 opNameMap[">>"] = RIGHTSHIFT;
912 opNameMap["&"] = BITAND;
913 opNameMap["^"] = BITXOR;
914 opNameMap["|"] = BITOR;
915 opNameMap[".lt."] = opNameMap["<"] = LT;
916 opNameMap[".le."] = opNameMap["<="] = LE;
917 opNameMap[".eq."] = opNameMap["=="] = EQ;
918 opNameMap[".ne."] = opNameMap["/="] = opNameMap["!="] = NE;
919 opNameMap[".ge."] = opNameMap[">="] = GE;
920 opNameMap[".gt."] = opNameMap[">"] = GT;
921 opNameMap[".not."] = NOT;
922 opNameMap[".and."] = opNameMap[".a."] = opNameMap["&&"] = AND;
923 opNameMap[".or."] = opNameMap[".o."] = opNameMap["||"] = OR;
924 opNameMap[".eqv."] = EQV;
925 opNameMap[".neqv."] = opNameMap[".xor."] = opNameMap[".x."] = NEQV;
926 opNameMap["?"] = SELECT;
927 opNameMap[","] = COMMA;
928 }
929
930 std::size_t tokens{token.SizeInTokens()};
931 CHECK(tokens > 0);
932 if (*atToken >= tokens) {
933 *error =
934 Message{token.GetProvenanceRange(), "incomplete expression"_err_en_US};
935 return 0;
936 }
937
938 // Parse and evaluate a primary or a unary operator and its operand.
939 std::size_t opAt{*atToken};
940 std::string t{token.TokenAt(opAt).ToString()};
941 enum Operator op;
942 std::int64_t left{0};
943 if (t == "(") {
944 op = PARENS;
945 } else if (IsDecimalDigit(t[0])) {
946 op = CONST;
947 std::size_t consumed{0};
948 left = std::stoll(str: t, idx: &consumed, base: 0 /*base to be detected*/);
949 if (consumed < t.size()) {
950 *error = Message{token.GetTokenProvenanceRange(opAt),
951 "Uninterpretable numeric constant '%s'"_err_en_US, t};
952 return 0;
953 }
954 } else if (IsLegalIdentifierStart(cpl: t[0])) {
955 // undefined macro name -> zero
956 // TODO: BOZ constants?
957 op = CONST;
958 } else if (t == "+") {
959 op = UPLUS;
960 } else if (t == "-") {
961 op = UMINUS;
962 } else if (t == "." && *atToken + 2 < tokens &&
963 ToLowerCaseLetters(token.TokenAt(*atToken + 1).ToString()) == "not" &&
964 token.TokenAt(*atToken + 2).ToString() == ".") {
965 op = NOT;
966 *atToken += 2;
967 } else {
968 auto it{opNameMap.find(x: t)};
969 if (it != opNameMap.end()) {
970 op = it->second;
971 } else {
972 *error = Message{token.GetTokenProvenanceRange(opAt),
973 "operand expected in expression"_err_en_US};
974 return 0;
975 }
976 }
977 if (precedence[op] < minimumPrecedence) {
978 *error = Message{token.GetTokenProvenanceRange(opAt),
979 "operator precedence error"_err_en_US};
980 return 0;
981 }
982 ++*atToken;
983 if (op != CONST) {
984 left = ExpressionValue(token, operandPrecedence[op], atToken, error);
985 if (*error) {
986 return 0;
987 }
988 switch (op) {
989 case PARENS:
990 if (*atToken < tokens && token.TokenAt(*atToken).OnlyNonBlank() == ')') {
991 ++*atToken;
992 break;
993 }
994 if (*atToken >= tokens) {
995 *error = Message{token.GetProvenanceRange(),
996 "')' missing from expression"_err_en_US};
997 } else {
998 *error = Message{
999 token.GetTokenProvenanceRange(*atToken), "expected ')'"_err_en_US};
1000 }
1001 return 0;
1002 case NOTZERO:
1003 left = !left;
1004 break;
1005 case COMPLEMENT:
1006 left = ~left;
1007 break;
1008 case UPLUS:
1009 break;
1010 case UMINUS:
1011 left = -left;
1012 break;
1013 case NOT:
1014 left = -!left;
1015 break;
1016 default:
1017 CRASH_NO_CASE;
1018 }
1019 }
1020
1021 // Parse and evaluate binary operators and their second operands, if present.
1022 while (*atToken < tokens) {
1023 int advance{1};
1024 t = token.TokenAt(*atToken).ToString();
1025 if (t == "." && *atToken + 2 < tokens &&
1026 token.TokenAt(*atToken + 2).ToString() == ".") {
1027 t += ToLowerCaseLetters(token.TokenAt(*atToken + 1).ToString()) + '.';
1028 advance = 3;
1029 }
1030 auto it{opNameMap.find(x: t)};
1031 if (it == opNameMap.end()) {
1032 break;
1033 }
1034 op = it->second;
1035 if (op < POWER || precedence[op] < minimumPrecedence) {
1036 break;
1037 }
1038 opAt = *atToken;
1039 *atToken += advance;
1040
1041 std::int64_t right{
1042 ExpressionValue(token, operandPrecedence[op], atToken, error)};
1043 if (*error) {
1044 return 0;
1045 }
1046
1047 switch (op) {
1048 case POWER:
1049 if (left == 0) {
1050 if (right < 0) {
1051 *error = Message{token.GetTokenProvenanceRange(opAt),
1052 "0 ** negative power"_err_en_US};
1053 }
1054 } else if (left != 1 && right != 1) {
1055 if (right <= 0) {
1056 left = !right;
1057 } else {
1058 std::int64_t power{1};
1059 for (; right > 0; --right) {
1060 if ((power * left) / left != power) {
1061 *error = Message{token.GetTokenProvenanceRange(opAt),
1062 "overflow in exponentation"_err_en_US};
1063 left = 1;
1064 }
1065 power *= left;
1066 }
1067 left = power;
1068 }
1069 }
1070 break;
1071 case TIMES:
1072 if (left != 0 && right != 0 && ((left * right) / left) != right) {
1073 *error = Message{token.GetTokenProvenanceRange(opAt),
1074 "overflow in multiplication"_err_en_US};
1075 }
1076 left = left * right;
1077 break;
1078 case DIVIDE:
1079 if (right == 0) {
1080 *error = Message{
1081 token.GetTokenProvenanceRange(opAt), "division by zero"_err_en_US};
1082 left = 0;
1083 } else {
1084 left = left / right;
1085 }
1086 break;
1087 case MODULUS:
1088 if (right == 0) {
1089 *error = Message{
1090 token.GetTokenProvenanceRange(opAt), "modulus by zero"_err_en_US};
1091 left = 0;
1092 } else {
1093 left = left % right;
1094 }
1095 break;
1096 case ADD:
1097 if ((left < 0) == (right < 0) && (left < 0) != (left + right < 0)) {
1098 *error = Message{token.GetTokenProvenanceRange(opAt),
1099 "overflow in addition"_err_en_US};
1100 }
1101 left = left + right;
1102 break;
1103 case SUBTRACT:
1104 if ((left < 0) != (right < 0) && (left < 0) == (left - right < 0)) {
1105 *error = Message{token.GetTokenProvenanceRange(opAt),
1106 "overflow in subtraction"_err_en_US};
1107 }
1108 left = left - right;
1109 break;
1110 case LEFTSHIFT:
1111 if (right < 0 || right > 64) {
1112 *error = Message{token.GetTokenProvenanceRange(opAt),
1113 "bad left shift count"_err_en_US};
1114 }
1115 left = right >= 64 ? 0 : left << right;
1116 break;
1117 case RIGHTSHIFT:
1118 if (right < 0 || right > 64) {
1119 *error = Message{token.GetTokenProvenanceRange(opAt),
1120 "bad right shift count"_err_en_US};
1121 }
1122 left = right >= 64 ? 0 : left >> right;
1123 break;
1124 case BITAND:
1125 case AND:
1126 left = left & right;
1127 break;
1128 case BITXOR:
1129 left = left ^ right;
1130 break;
1131 case BITOR:
1132 case OR:
1133 left = left | right;
1134 break;
1135 case LT:
1136 left = -(left < right);
1137 break;
1138 case LE:
1139 left = -(left <= right);
1140 break;
1141 case EQ:
1142 left = -(left == right);
1143 break;
1144 case NE:
1145 left = -(left != right);
1146 break;
1147 case GE:
1148 left = -(left >= right);
1149 break;
1150 case GT:
1151 left = -(left > right);
1152 break;
1153 case EQV:
1154 left = -(!left == !right);
1155 break;
1156 case NEQV:
1157 left = -(!left != !right);
1158 break;
1159 case SELECT:
1160 if (*atToken >= tokens || token.TokenAt(*atToken).ToString() != ":") {
1161 *error = Message{token.GetTokenProvenanceRange(opAt),
1162 "':' required in selection expression"_err_en_US};
1163 return 0;
1164 } else {
1165 ++*atToken;
1166 std::int64_t third{
1167 ExpressionValue(token, operandPrecedence[op], atToken, error)};
1168 left = left != 0 ? right : third;
1169 }
1170 break;
1171 case COMMA:
1172 left = right;
1173 break;
1174 default:
1175 CRASH_NO_CASE;
1176 }
1177 }
1178 return left;
1179}
1180
1181bool Preprocessor::IsIfPredicateTrue(const TokenSequence &expr,
1182 std::size_t first, std::size_t exprTokens, Prescanner &prescanner) {
1183 TokenSequence expr1{expr, first, exprTokens};
1184 if (expr1.HasBlanks()) {
1185 expr1.RemoveBlanks();
1186 }
1187 TokenSequence expr2;
1188 for (std::size_t j{0}; j < expr1.SizeInTokens(); ++j) {
1189 if (ToLowerCaseLetters(expr1.TokenAt(j).ToString()) == "defined") {
1190 CharBlock name;
1191 if (j + 3 < expr1.SizeInTokens() &&
1192 expr1.TokenAt(j + 1).OnlyNonBlank() == '(' &&
1193 expr1.TokenAt(j + 3).OnlyNonBlank() == ')') {
1194 name = expr1.TokenAt(j + 2);
1195 j += 3;
1196 } else if (j + 1 < expr1.SizeInTokens() &&
1197 IsLegalIdentifierStart(expr1.TokenAt(j + 1))) {
1198 name = expr1.TokenAt(++j);
1199 }
1200 if (!name.empty()) {
1201 char truth{IsNameDefined(name) ? '1' : '0'};
1202 expr2.Put(&truth, 1, allSources_.CompilerInsertionProvenance(truth));
1203 continue;
1204 }
1205 }
1206 expr2.Put(expr1, j);
1207 }
1208 TokenSequence expr3{ReplaceMacros(expr2, prescanner)};
1209 if (expr3.HasBlanks()) {
1210 expr3.RemoveBlanks();
1211 }
1212 if (expr3.empty()) {
1213 prescanner.Say(expr.GetProvenanceRange(), "empty expression"_err_en_US);
1214 return false;
1215 }
1216 std::size_t atToken{0};
1217 std::optional<Message> error;
1218 bool result{ExpressionValue(expr3, 0, &atToken, &error) != 0};
1219 if (error) {
1220 prescanner.Say(std::move(*error));
1221 } else if (atToken < expr3.SizeInTokens() &&
1222 expr3.TokenAt(atToken).ToString() != "!") {
1223 prescanner.Say(expr3.GetIntervalProvenanceRange(
1224 atToken, expr3.SizeInTokens() - atToken),
1225 atToken == 0 ? "could not parse any expression"_err_en_US
1226 : "excess characters after expression"_err_en_US);
1227 }
1228 return result;
1229}
1230
1231void Preprocessor::LineDirective(
1232 const TokenSequence &dir, std::size_t j, Prescanner &prescanner) {
1233 std::size_t tokens{dir.SizeInTokens()};
1234 const std::string *linePath{nullptr};
1235 std::optional<int> lineNumber;
1236 SourceFile *sourceFile{nullptr};
1237 std::optional<SourcePosition> pos;
1238 for (; j < tokens; j = dir.SkipBlanks(j + 1)) {
1239 std::string tstr{dir.TokenAt(j).ToString()};
1240 Provenance provenance{dir.GetTokenProvenance(j)};
1241 if (!pos) {
1242 pos = allSources_.GetSourcePosition(provenance);
1243 }
1244 if (!sourceFile && pos) {
1245 sourceFile = const_cast<SourceFile *>(&*pos->sourceFile);
1246 }
1247 if (tstr.front() == '"' && tstr.back() == '"') {
1248 tstr = tstr.substr(1, tstr.size() - 2);
1249 if (!tstr.empty() && sourceFile) {
1250 linePath = &sourceFile->SavePath(std::move(tstr));
1251 }
1252 } else if (IsDecimalDigit(tstr[0])) {
1253 if (!lineNumber) { // ignore later column number
1254 int ln{0};
1255 for (char c : tstr) {
1256 if (IsDecimalDigit(c)) {
1257 int nln{10 * ln + c - '0'};
1258 if (nln / 10 == ln && nln % 10 == c - '0') {
1259 ln = nln;
1260 continue;
1261 }
1262 }
1263 prescanner.Say(provenance,
1264 "bad line number '%s' in #line directive"_err_en_US, tstr);
1265 return;
1266 }
1267 lineNumber = ln;
1268 }
1269 } else {
1270 prescanner.Say(
1271 provenance, "bad token '%s' in #line directive"_err_en_US, tstr);
1272 return;
1273 }
1274 }
1275 if (lineNumber && sourceFile) {
1276 CHECK(pos);
1277 if (!linePath) {
1278 linePath = &*pos->path;
1279 }
1280 sourceFile->LineDirective(pos->trueLineNumber + 1, *linePath, *lineNumber);
1281 }
1282}
1283
1284} // namespace Fortran::parser
1285

source code of flang/lib/Parser/preprocessor.cpp