1 | #ifndef SASS_LEXER_H |
2 | #define SASS_LEXER_H |
3 | |
4 | #include <cstring> |
5 | |
6 | namespace Sass { |
7 | namespace Prelexer { |
8 | |
9 | //#################################### |
10 | // BASIC CHARACTER MATCHERS |
11 | //#################################### |
12 | |
13 | // Match standard control chars |
14 | const char* kwd_at(const char* src); |
15 | const char* kwd_dot(const char* src); |
16 | const char* kwd_comma(const char* src); |
17 | const char* kwd_colon(const char* src); |
18 | const char* kwd_star(const char* src); |
19 | const char* kwd_plus(const char* src); |
20 | const char* kwd_minus(const char* src); |
21 | const char* kwd_slash(const char* src); |
22 | |
23 | //#################################### |
24 | // BASIC CLASS MATCHERS |
25 | //#################################### |
26 | |
27 | // Matches ASCII digits, +, and -. |
28 | bool is_number(char src); |
29 | |
30 | bool is_uri_character(char src); |
31 | bool escapable_character(char src); |
32 | |
33 | // Match a single ctype predicate. |
34 | const char* space(const char* src); |
35 | const char* alpha(const char* src); |
36 | const char* digit(const char* src); |
37 | const char* xdigit(const char* src); |
38 | const char* alnum(const char* src); |
39 | const char* hyphen(const char* src); |
40 | const char* nonascii(const char* src); |
41 | const char* uri_character(const char* src); |
42 | const char* escapable_character(const char* src); |
43 | |
44 | // Match multiple ctype characters. |
45 | const char* spaces(const char* src); |
46 | const char* digits(const char* src); |
47 | const char* hyphens(const char* src); |
48 | |
49 | // Whitespace handling. |
50 | const char* no_spaces(const char* src); |
51 | const char* optional_spaces(const char* src); |
52 | |
53 | // Match any single character (/./). |
54 | const char* any_char(const char* src); |
55 | |
56 | // Assert word boundary (/\b/) |
57 | // Is a zero-width positive lookaheads |
58 | const char* word_boundary(const char* src); |
59 | |
60 | // Match a single linebreak (/(?:\n|\r\n?)/). |
61 | const char* re_linebreak(const char* src); |
62 | |
63 | // Assert string boundaries (/\Z|\z|\A/) |
64 | // There are zero-width positive lookaheads |
65 | const char* end_of_line(const char* src); |
66 | |
67 | // Assert end_of_file boundary (/\z/) |
68 | const char* end_of_file(const char* src); |
69 | // const char* start_of_string(const char* src); |
70 | |
71 | // Type definition for prelexer functions |
72 | typedef const char* (*prelexer)(const char*); |
73 | |
74 | //#################################### |
75 | // BASIC "REGEX" CONSTRUCTORS |
76 | //#################################### |
77 | |
78 | // Match a single character literal. |
79 | // Regex equivalent: /(?:x)/ |
80 | template <char chr> |
81 | const char* exactly(const char* src) { |
82 | return *src == chr ? src + 1 : 0; |
83 | } |
84 | |
85 | // Match the full string literal. |
86 | // Regex equivalent: /(?:literal)/ |
87 | template <const char* str> |
88 | const char* exactly(const char* src) { |
89 | if (str == NULL) return 0; |
90 | const char* pre = str; |
91 | if (src == NULL) return 0; |
92 | // there is a small chance that the search string |
93 | // is longer than the rest of the string to look at |
94 | while (*pre && *src == *pre) { |
95 | ++src, ++pre; |
96 | } |
97 | // did the matcher finish? |
98 | return *pre == 0 ? src : 0; |
99 | } |
100 | |
101 | |
102 | // Match a single character literal. |
103 | // Regex equivalent: /(?:x)/i |
104 | // only define lower case alpha chars |
105 | template <char chr> |
106 | const char* insensitive(const char* src) { |
107 | return *src == chr || *src+32 == chr ? src + 1 : 0; |
108 | } |
109 | |
110 | // Match the full string literal. |
111 | // Regex equivalent: /(?:literal)/i |
112 | // only define lower case alpha chars |
113 | template <const char* str> |
114 | const char* insensitive(const char* src) { |
115 | if (str == NULL) return 0; |
116 | const char* pre = str; |
117 | if (src == NULL) return 0; |
118 | // there is a small chance that the search string |
119 | // is longer than the rest of the string to look at |
120 | while (*pre && (*src == *pre || *src+32 == *pre)) { |
121 | ++src, ++pre; |
122 | } |
123 | // did the matcher finish? |
124 | return *pre == 0 ? src : 0; |
125 | } |
126 | |
127 | // Match for members of char class. |
128 | // Regex equivalent: /[axy]/ |
129 | template <const char* char_class> |
130 | const char* class_char(const char* src) { |
131 | const char* cc = char_class; |
132 | while (*cc && *src != *cc) ++cc; |
133 | return *cc ? src + 1 : 0; |
134 | } |
135 | |
136 | // Match for members of char class. |
137 | // Regex equivalent: /[axy]+/ |
138 | template <const char* char_class> |
139 | const char* class_chars(const char* src) { |
140 | const char* p = src; |
141 | while (class_char<char_class>(p)) ++p; |
142 | return p == src ? 0 : p; |
143 | } |
144 | |
145 | // Match for members of char class. |
146 | // Regex equivalent: /[^axy]/ |
147 | template <const char* neg_char_class> |
148 | const char* neg_class_char(const char* src) { |
149 | if (*src == 0) return 0; |
150 | const char* cc = neg_char_class; |
151 | while (*cc && *src != *cc) ++cc; |
152 | return *cc ? 0 : src + 1; |
153 | } |
154 | |
155 | // Match for members of char class. |
156 | // Regex equivalent: /[^axy]+/ |
157 | template <const char* neg_char_class> |
158 | const char* neg_class_chars(const char* src) { |
159 | const char* p = src; |
160 | while (neg_class_char<neg_char_class>(p)) ++p; |
161 | return p == src ? 0 : p; |
162 | } |
163 | |
164 | // Match all except the supplied one. |
165 | // Regex equivalent: /[^x]/ |
166 | template <const char chr> |
167 | const char* any_char_but(const char* src) { |
168 | return (*src && *src != chr) ? src + 1 : 0; |
169 | } |
170 | |
171 | // Succeeds if the matcher fails. |
172 | // Aka. zero-width negative lookahead. |
173 | // Regex equivalent: /(?!literal)/ |
174 | template <prelexer mx> |
175 | const char* negate(const char* src) { |
176 | return mx(src) ? 0 : src; |
177 | } |
178 | |
179 | // Succeeds if the matcher succeeds. |
180 | // Aka. zero-width positive lookahead. |
181 | // Regex equivalent: /(?=literal)/ |
182 | // just hangs around until we need it |
183 | template <prelexer mx> |
184 | const char* lookahead(const char* src) { |
185 | return mx(src) ? src : 0; |
186 | } |
187 | |
188 | // Tries supplied matchers in order. |
189 | // Succeeds if one of them succeeds. |
190 | // Regex equivalent: /(?:FOO|BAR)/ |
191 | template <const prelexer mx> |
192 | const char* alternatives(const char* src) { |
193 | const char* rslt; |
194 | if ((rslt = mx(src))) return rslt; |
195 | return 0; |
196 | } |
197 | template <const prelexer mx1, const prelexer mx2, const prelexer... mxs> |
198 | const char* alternatives(const char* src) { |
199 | const char* rslt; |
200 | if ((rslt = mx1(src))) return rslt; |
201 | return alternatives<mx2, mxs...>(src); |
202 | } |
203 | |
204 | // Tries supplied matchers in order. |
205 | // Succeeds if all of them succeeds. |
206 | // Regex equivalent: /(?:FOO)(?:BAR)/ |
207 | template <const prelexer mx1> |
208 | const char* sequence(const char* src) { |
209 | const char* rslt = src; |
210 | if (!(rslt = mx1(rslt))) return 0; |
211 | return rslt; |
212 | } |
213 | template <const prelexer mx1, const prelexer mx2, const prelexer... mxs> |
214 | const char* sequence(const char* src) { |
215 | const char* rslt = src; |
216 | if (!(rslt = mx1(rslt))) return 0; |
217 | return sequence<mx2, mxs...>(rslt); |
218 | } |
219 | |
220 | |
221 | // Match a pattern or not. Always succeeds. |
222 | // Regex equivalent: /(?:literal)?/ |
223 | template <prelexer mx> |
224 | const char* optional(const char* src) { |
225 | const char* p = mx(src); |
226 | return p ? p : src; |
227 | } |
228 | |
229 | // Match zero or more of the patterns. |
230 | // Regex equivalent: /(?:literal)*/ |
231 | template <prelexer mx> |
232 | const char* zero_plus(const char* src) { |
233 | const char* p = mx(src); |
234 | while (p) src = p, p = mx(src); |
235 | return src; |
236 | } |
237 | |
238 | // Match one or more of the patterns. |
239 | // Regex equivalent: /(?:literal)+/ |
240 | template <prelexer mx> |
241 | const char* one_plus(const char* src) { |
242 | const char* p = mx(src); |
243 | if (!p) return 0; |
244 | while (p) src = p, p = mx(src); |
245 | return src; |
246 | } |
247 | |
248 | // Match mx non-greedy until delimiter. |
249 | // Other prelexers are greedy by default. |
250 | // Regex equivalent: /(?:$mx)*?(?=$delim)\b/ |
251 | template <prelexer mx, prelexer delim> |
252 | const char* non_greedy(const char* src) { |
253 | while (!delim(src)) { |
254 | const char* p = mx(src); |
255 | if (p == src) return 0; |
256 | if (p == 0) return 0; |
257 | src = p; |
258 | } |
259 | return src; |
260 | } |
261 | |
262 | //#################################### |
263 | // ADVANCED "REGEX" CONSTRUCTORS |
264 | //#################################### |
265 | |
266 | // Match with word boundary rule. |
267 | // Regex equivalent: /(?:$mx)\b/i |
268 | template <const char* str> |
269 | const char* keyword(const char* src) { |
270 | return sequence < |
271 | insensitive < str >, |
272 | word_boundary |
273 | >(src); |
274 | } |
275 | |
276 | // Match with word boundary rule. |
277 | // Regex equivalent: /(?:$mx)\b/ |
278 | template <const char* str> |
279 | const char* word(const char* src) { |
280 | return sequence < |
281 | exactly < str >, |
282 | word_boundary |
283 | >(src); |
284 | } |
285 | |
286 | template <char chr> |
287 | const char* loosely(const char* src) { |
288 | return sequence < |
289 | optional_spaces, |
290 | exactly < chr > |
291 | >(src); |
292 | } |
293 | template <const char* str> |
294 | const char* loosely(const char* src) { |
295 | return sequence < |
296 | optional_spaces, |
297 | exactly < str > |
298 | >(src); |
299 | } |
300 | |
301 | } |
302 | } |
303 | |
304 | #endif |
305 | |