Warning: This file is not a C or C++ file. It does not have highlighting.
1 | //===-- include/flang/Parser/char-set.h -------------------------*- C++ -*-===// |
---|---|
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #ifndef FORTRAN_PARSER_CHAR_SET_H_ |
10 | #define FORTRAN_PARSER_CHAR_SET_H_ |
11 | |
12 | // Sets of distinct characters that are valid in Fortran programs outside |
13 | // character literals are encoded as 64-bit integers by mapping them to a 6-bit |
14 | // character set encoding in which the case of letters is lost (even if |
15 | // mixed case input reached the parser, which it does not). These sets |
16 | // need to be suitable for constexprs, so std::bitset<> was not eligible. |
17 | |
18 | #include <cinttypes> |
19 | #include <string> |
20 | |
21 | namespace Fortran::parser { |
22 | |
23 | struct SetOfChars { |
24 | constexpr SetOfChars() {} |
25 | |
26 | constexpr SetOfChars(char c) { |
27 | // This is basically the old DECSIX encoding, which maps the |
28 | // 7-bit ASCII codes [32..95] to [0..63]. Only '#', '&', '?', '\', and '^' |
29 | // in that range are unused in Fortran after preprocessing outside |
30 | // character literals. We repurpose '^' and '?' for newline and unknown |
31 | // characters (resp.), leaving the others alone in case this code might |
32 | // be useful in preprocssing. |
33 | if (c == '\n') { |
34 | // map newline to '^' |
35 | c = '^'; |
36 | } else if (c < 32 || c >= 127) { |
37 | // map other control characters, DEL, and 8-bit characters to '?' |
38 | c = '?'; |
39 | } else if (c >= 96) { |
40 | // map lower-case letters to upper-case |
41 | c -= 32; |
42 | } |
43 | // range is now [32..95]; reduce to [0..63] and use as a shift count |
44 | bits_ = static_cast<std::uint64_t>(1) << (c - 32); |
45 | } |
46 | |
47 | constexpr SetOfChars(const char str[], std::size_t n) { |
48 | for (std::size_t j{0}; j < n; ++j) { |
49 | bits_ |= SetOfChars{str[j]}.bits_; |
50 | } |
51 | } |
52 | |
53 | constexpr SetOfChars(const SetOfChars &) = default; |
54 | constexpr SetOfChars(SetOfChars &&) = default; |
55 | constexpr SetOfChars &operator=(const SetOfChars &) = default; |
56 | constexpr SetOfChars &operator=(SetOfChars &&) = default; |
57 | constexpr bool empty() const { return bits_ == 0; } |
58 | |
59 | constexpr bool Has(SetOfChars that) const { |
60 | return (that.bits_ & ~bits_) == 0; |
61 | } |
62 | constexpr SetOfChars Union(SetOfChars that) const { |
63 | return SetOfChars{bits_ | that.bits_}; |
64 | } |
65 | constexpr SetOfChars Intersection(SetOfChars that) const { |
66 | return SetOfChars{bits_ & that.bits_}; |
67 | } |
68 | constexpr SetOfChars Difference(SetOfChars that) const { |
69 | return SetOfChars{bits_ & ~that.bits_}; |
70 | } |
71 | |
72 | std::string ToString() const; |
73 | |
74 | private: |
75 | constexpr SetOfChars(std::uint64_t b) : bits_{b} {} |
76 | std::uint64_t bits_{0}; |
77 | }; |
78 | } // namespace Fortran::parser |
79 | #endif // FORTRAN_PARSER_CHAR_SET_H_ |
80 |
Warning: This file is not a C or C++ file. It does not have highlighting.