1 | // Copyright 2009-2021 Intel Corporation |
2 | // SPDX-License-Identifier: Apache-2.0 |
3 | |
4 | #pragma once |
5 | |
6 | #include "stream.h" |
7 | #include <string> |
8 | #include <vector> |
9 | |
10 | namespace embree |
11 | { |
12 | /*! token class */ |
13 | class Token |
14 | { |
15 | public: |
16 | |
17 | enum Type { TY_EOF, TY_CHAR, TY_INT, TY_FLOAT, TY_IDENTIFIER, TY_STRING, TY_SYMBOL }; |
18 | |
19 | Token ( const ParseLocation& loc = ParseLocation()) : ty(TY_EOF ), loc(loc) {} |
20 | Token (char c, const ParseLocation& loc = ParseLocation()) : ty(TY_CHAR ), c(c), loc(loc) {} |
21 | Token (int i, const ParseLocation& loc = ParseLocation()) : ty(TY_INT ), i(i), loc(loc) {} |
22 | Token (float f,const ParseLocation& loc = ParseLocation()) : ty(TY_FLOAT), f(f), loc(loc) {} |
23 | Token (std::string str, Type ty, const ParseLocation& loc = ParseLocation()) : ty(ty), str(str), loc(loc) {} |
24 | |
25 | static Token Eof() { return Token(); } |
26 | static Token Sym(std::string str) { return Token(str,TY_SYMBOL); } |
27 | static Token Str(std::string str) { return Token(str,TY_STRING); } |
28 | static Token Id (std::string str) { return Token(str,TY_IDENTIFIER); } |
29 | |
30 | char Char() const { |
31 | if (ty == TY_CHAR) return c; |
32 | THROW_RUNTIME_ERROR(loc.str()+": character expected" ); |
33 | } |
34 | |
35 | int Int() const { |
36 | if (ty == TY_INT) return i; |
37 | THROW_RUNTIME_ERROR(loc.str()+": integer expected" ); |
38 | } |
39 | |
40 | float Float(bool cast = true) const { |
41 | if (ty == TY_FLOAT) return f; |
42 | if (ty == TY_INT && cast) return (float)i; |
43 | THROW_RUNTIME_ERROR(loc.str()+": float expected" ); |
44 | } |
45 | |
46 | std::string Identifier() const { |
47 | if (ty == TY_IDENTIFIER) return str; |
48 | THROW_RUNTIME_ERROR(loc.str()+": identifier expected" ); |
49 | } |
50 | |
51 | std::string String() const { |
52 | if (ty == TY_STRING) return str; |
53 | THROW_RUNTIME_ERROR(loc.str()+": string expected" ); |
54 | } |
55 | |
56 | std::string Symbol() const { |
57 | if (ty == TY_SYMBOL) return str; |
58 | THROW_RUNTIME_ERROR(loc.str()+": symbol expected" ); |
59 | } |
60 | |
61 | const ParseLocation& Location() const { return loc; } |
62 | |
63 | friend bool operator==(const Token& a, const Token& b) |
64 | { |
65 | if (a.ty != b.ty) return false; |
66 | if (a.ty == TY_CHAR) return a.c == b.c; |
67 | if (a.ty == TY_INT) return a.i == b.i; |
68 | if (a.ty == TY_FLOAT) return a.f == b.f; |
69 | if (a.ty == TY_IDENTIFIER) return a.str == b.str; |
70 | if (a.ty == TY_STRING) return a.str == b.str; |
71 | if (a.ty == TY_SYMBOL) return a.str == b.str; |
72 | return true; |
73 | } |
74 | |
75 | friend bool operator!=(const Token& a, const Token& b) { |
76 | return !(a == b); |
77 | } |
78 | |
79 | friend bool operator <( const Token& a, const Token& b ) { |
80 | if (a.ty != b.ty) return (int)a.ty < (int)b.ty; |
81 | if (a.ty == TY_CHAR) return a.c < b.c; |
82 | if (a.ty == TY_INT) return a.i < b.i; |
83 | if (a.ty == TY_FLOAT) return a.f < b.f; |
84 | if (a.ty == TY_IDENTIFIER) return a.str < b.str; |
85 | if (a.ty == TY_STRING) return a.str < b.str; |
86 | if (a.ty == TY_SYMBOL) return a.str < b.str; |
87 | return false; |
88 | } |
89 | |
90 | friend std::ostream& operator<<(std::ostream& cout, const Token& t) |
91 | { |
92 | if (t.ty == TY_EOF) return cout << "eof" ; |
93 | if (t.ty == TY_CHAR) return cout << "Char(" << t.c << ")" ; |
94 | if (t.ty == TY_INT) return cout << "Int(" << t.i << ")" ; |
95 | if (t.ty == TY_FLOAT) return cout << "Float(" << t.f << ")" ; |
96 | if (t.ty == TY_IDENTIFIER) return cout << "Id(" << t.str << ")" ; |
97 | if (t.ty == TY_STRING) return cout << "String(" << t.str << ")" ; |
98 | if (t.ty == TY_SYMBOL) return cout << "Symbol(" << t.str << ")" ; |
99 | return cout << "unknown" ; |
100 | } |
101 | |
102 | private: |
103 | Type ty; //< the type of the token |
104 | union { |
105 | char c; //< data for char tokens |
106 | int i; //< data for int tokens |
107 | float f; //< data for float tokens |
108 | }; |
109 | std::string str; //< data for string and identifier tokens |
110 | ParseLocation loc; //< the location the token is from |
111 | }; |
112 | |
113 | /*! build full tokenizer that takes list of valid characters and keywords */ |
114 | class TokenStream : public Stream<Token> |
115 | { |
116 | public: |
117 | |
118 | /*! shorthands for common sets of characters */ |
119 | static const std::string alpha; |
120 | static const std::string ALPHA; |
121 | static const std::string numbers; |
122 | static const std::string separators; |
123 | static const std::string stringChars; |
124 | |
125 | public: |
126 | TokenStream(const Ref<Stream<int> >& cin, |
127 | const std::string& alpha, //< valid characters for identifiers |
128 | const std::string& seps, //< characters that act as separators |
129 | const std::vector<std::string>& symbols = std::vector<std::string>()); //< symbols |
130 | public: |
131 | ParseLocation location() { return cin->loc(); } |
132 | Token next(); |
133 | bool trySymbol(const std::string& symbol); |
134 | |
135 | private: |
136 | void skipSeparators(); |
137 | bool decDigits(std::string& str); |
138 | bool decDigits1(std::string& str); |
139 | bool trySymbols(Token& token, const ParseLocation& loc); |
140 | bool tryFloat(Token& token, const ParseLocation& loc); |
141 | bool tryInt(Token& token, const ParseLocation& loc); |
142 | bool tryString(Token& token, const ParseLocation& loc); |
143 | bool tryIdentifier(Token& token, const ParseLocation& loc); |
144 | |
145 | Ref<Stream<int> > cin; |
146 | bool isSepMap[256]; |
147 | bool isAlphaMap[256]; |
148 | bool isStringCharMap[256]; |
149 | std::vector<std::string> symbols; |
150 | |
151 | /*! checks if a character is a separator */ |
152 | __forceinline bool isSeparator(unsigned int c) const { return c<256 && isSepMap[c]; } |
153 | |
154 | /*! checks if a character is a number */ |
155 | __forceinline bool isDigit(unsigned int c) const { return c >= '0' && c <= '9'; } |
156 | |
157 | /*! checks if a character is valid inside a string */ |
158 | __forceinline bool isStringChar(unsigned int c) const { return c<256 && isStringCharMap[c]; } |
159 | |
160 | /*! checks if a character is legal for an identifier */ |
161 | __forceinline bool isAlpha(unsigned int c) const { return c<256 && isAlphaMap[c]; } |
162 | __forceinline bool isAlphaNum(unsigned int c) const { return isAlpha(c) || isDigit(c); } |
163 | }; |
164 | } |
165 | |