1/****************************************************************************
2**
3** Copyright (C) 2015 The Qt Company Ltd.
4** Contact: http://www.qt.io/licensing/
5**
6** This file is part of the QtScript module of the Qt Toolkit.
7**
8** $QT_BEGIN_LICENSE:LGPL$
9** Commercial License Usage
10** Licensees holding valid commercial Qt licenses may use this file in
11** accordance with the commercial license agreement provided with the
12** Software or, alternatively, in accordance with the terms contained in
13** a written agreement between you and The Qt Company. For licensing terms
14** and conditions see https://www.qt.io/terms-conditions. For further
15** information use the contact form at https://www.qt.io/contact-us.
16**
17** GNU Lesser General Public License Usage
18** Alternatively, this file may be used under the terms of the GNU Lesser
19** General Public License version 3 as published by the Free Software
20** Foundation and appearing in the file LICENSE.LGPL3 included in the
21** packaging of this file. Please review the following information to
22** ensure the GNU Lesser General Public License version 3 requirements
23** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
24**
25** GNU General Public License Usage
26** Alternatively, this file may be used under the terms of the GNU
27** General Public License version 2.0 or (at your option) the GNU General
28** Public license version 3 or any later version approved by the KDE Free
29** Qt Foundation. The licenses are as published by the Free Software
30** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
31** included in the packaging of this file. Please review the following
32** information to ensure the GNU General Public License requirements will
33** be met: https://www.gnu.org/licenses/gpl-2.0.html and
34** https://www.gnu.org/licenses/gpl-3.0.html.
35**
36** $QT_END_LICENSE$
37**
38****************************************************************************/
39
40#include "qscriptlexer_p.h"
41
42#include "qscriptgrammar_p.h"
43#include <ctype.h>
44#include <stdlib.h>
45#include <stdio.h>
46#include <string.h>
47
48QT_BEGIN_NAMESPACE
49
50#define shiftWindowsLineBreak() \
51 do { \
52 if (((current == '\r') && (next1 == '\n')) \
53 || ((current == '\n') && (next1 == '\r'))) { \
54 shift(1); \
55 } \
56 } \
57 while (0)
58
59typedef double qsreal; // ###
60
61namespace QScript {
62extern qsreal integerFromString(const char *buf, int size, int radix);
63}
64
65QScript::Lexer::Lexer(QScriptEnginePrivate *eng)
66 : driver(eng),
67 yylineno(0),
68 size8(128), size16(128), restrKeyword(false),
69 stackToken(-1), pos(0),
70 code(0), length(0),
71 bol(true),
72 current(0), next1(0), next2(0), next3(0),
73 err(NoError),
74 check_reserved(true),
75 parenthesesState(IgnoreParentheses),
76 prohibitAutomaticSemicolon(false)
77{
78 // allocate space for read buffers
79 buffer8 = new char[size8];
80 buffer16 = new QChar[size16];
81 pattern = 0;
82 flags = 0;
83
84}
85
86QScript::Lexer::~Lexer()
87{
88 delete [] buffer8;
89 delete [] buffer16;
90}
91
92void QScript::Lexer::setCode(const QString &c, int lineno)
93{
94 errmsg = QString();
95 yylineno = lineno;
96 yycolumn = 1;
97 restrKeyword = false;
98 delimited = false;
99 stackToken = -1;
100 pos = 0;
101 code = c.unicode();
102 length = c.length();
103 bol = true;
104
105 // read first characters
106 current = (length > 0) ? code[0].unicode() : 0;
107 next1 = (length > 1) ? code[1].unicode() : 0;
108 next2 = (length > 2) ? code[2].unicode() : 0;
109 next3 = (length > 3) ? code[3].unicode() : 0;
110}
111
112void QScript::Lexer::shift(uint p)
113{
114 while (p--) {
115 ++pos;
116 ++yycolumn;
117 current = next1;
118 next1 = next2;
119 next2 = next3;
120 next3 = (pos + 3 < length) ? code[pos+3].unicode() : 0;
121 }
122}
123
124void QScript::Lexer::setDone(State s)
125{
126 state = s;
127 done = true;
128}
129
130int QScript::Lexer::findReservedWord(const QChar *c, int size) const
131{
132 switch (size) {
133 case 2: {
134 if (c[0] == QLatin1Char('d') && c[1] == QLatin1Char('o'))
135 return QScriptGrammar::T_DO;
136 else if (c[0] == QLatin1Char('i') && c[1] == QLatin1Char('f'))
137 return QScriptGrammar::T_IF;
138 else if (c[0] == QLatin1Char('i') && c[1] == QLatin1Char('n'))
139 return QScriptGrammar::T_IN;
140 } break;
141
142 case 3: {
143 if (c[0] == QLatin1Char('f') && c[1] == QLatin1Char('o') && c[2] == QLatin1Char('r'))
144 return QScriptGrammar::T_FOR;
145 else if (c[0] == QLatin1Char('n') && c[1] == QLatin1Char('e') && c[2] == QLatin1Char('w'))
146 return QScriptGrammar::T_NEW;
147 else if (c[0] == QLatin1Char('t') && c[1] == QLatin1Char('r') && c[2] == QLatin1Char('y'))
148 return QScriptGrammar::T_TRY;
149 else if (c[0] == QLatin1Char('v') && c[1] == QLatin1Char('a') && c[2] == QLatin1Char('r'))
150 return QScriptGrammar::T_VAR;
151 else if (check_reserved) {
152 if (c[0] == QLatin1Char('i') && c[1] == QLatin1Char('n') && c[2] == QLatin1Char('t'))
153 return QScriptGrammar::T_RESERVED_WORD;
154 }
155 } break;
156
157 case 4: {
158 if (c[0] == QLatin1Char('c') && c[1] == QLatin1Char('a')
159 && c[2] == QLatin1Char('s') && c[3] == QLatin1Char('e'))
160 return QScriptGrammar::T_CASE;
161 else if (c[0] == QLatin1Char('e') && c[1] == QLatin1Char('l')
162 && c[2] == QLatin1Char('s') && c[3] == QLatin1Char('e'))
163 return QScriptGrammar::T_ELSE;
164 else if (c[0] == QLatin1Char('t') && c[1] == QLatin1Char('h')
165 && c[2] == QLatin1Char('i') && c[3] == QLatin1Char('s'))
166 return QScriptGrammar::T_THIS;
167 else if (c[0] == QLatin1Char('v') && c[1] == QLatin1Char('o')
168 && c[2] == QLatin1Char('i') && c[3] == QLatin1Char('d'))
169 return QScriptGrammar::T_VOID;
170 else if (c[0] == QLatin1Char('w') && c[1] == QLatin1Char('i')
171 && c[2] == QLatin1Char('t') && c[3] == QLatin1Char('h'))
172 return QScriptGrammar::T_WITH;
173 else if (c[0] == QLatin1Char('t') && c[1] == QLatin1Char('r')
174 && c[2] == QLatin1Char('u') && c[3] == QLatin1Char('e'))
175 return QScriptGrammar::T_TRUE;
176 else if (c[0] == QLatin1Char('n') && c[1] == QLatin1Char('u')
177 && c[2] == QLatin1Char('l') && c[3] == QLatin1Char('l'))
178 return QScriptGrammar::T_NULL;
179 else if (check_reserved) {
180 if (c[0] == QLatin1Char('e') && c[1] == QLatin1Char('n')
181 && c[2] == QLatin1Char('u') && c[3] == QLatin1Char('m'))
182 return QScriptGrammar::T_RESERVED_WORD;
183 else if (c[0] == QLatin1Char('b') && c[1] == QLatin1Char('y')
184 && c[2] == QLatin1Char('t') && c[3] == QLatin1Char('e'))
185 return QScriptGrammar::T_RESERVED_WORD;
186 else if (c[0] == QLatin1Char('l') && c[1] == QLatin1Char('o')
187 && c[2] == QLatin1Char('n') && c[3] == QLatin1Char('g'))
188 return QScriptGrammar::T_RESERVED_WORD;
189 else if (c[0] == QLatin1Char('c') && c[1] == QLatin1Char('h')
190 && c[2] == QLatin1Char('a') && c[3] == QLatin1Char('r'))
191 return QScriptGrammar::T_RESERVED_WORD;
192 else if (c[0] == QLatin1Char('g') && c[1] == QLatin1Char('o')
193 && c[2] == QLatin1Char('t') && c[3] == QLatin1Char('o'))
194 return QScriptGrammar::T_RESERVED_WORD;
195 }
196 } break;
197
198 case 5: {
199 if (c[0] == QLatin1Char('b') && c[1] == QLatin1Char('r')
200 && c[2] == QLatin1Char('e') && c[3] == QLatin1Char('a')
201 && c[4] == QLatin1Char('k'))
202 return QScriptGrammar::T_BREAK;
203 else if (c[0] == QLatin1Char('c') && c[1] == QLatin1Char('a')
204 && c[2] == QLatin1Char('t') && c[3] == QLatin1Char('c')
205 && c[4] == QLatin1Char('h'))
206 return QScriptGrammar::T_CATCH;
207 else if (c[0] == QLatin1Char('t') && c[1] == QLatin1Char('h')
208 && c[2] == QLatin1Char('r') && c[3] == QLatin1Char('o')
209 && c[4] == QLatin1Char('w'))
210 return QScriptGrammar::T_THROW;
211 else if (c[0] == QLatin1Char('w') && c[1] == QLatin1Char('h')
212 && c[2] == QLatin1Char('i') && c[3] == QLatin1Char('l')
213 && c[4] == QLatin1Char('e'))
214 return QScriptGrammar::T_WHILE;
215 else if (c[0] == QLatin1Char('c') && c[1] == QLatin1Char('o')
216 && c[2] == QLatin1Char('n') && c[3] == QLatin1Char('s')
217 && c[4] == QLatin1Char('t'))
218 return QScriptGrammar::T_CONST;
219 else if (c[0] == QLatin1Char('f') && c[1] == QLatin1Char('a')
220 && c[2] == QLatin1Char('l') && c[3] == QLatin1Char('s')
221 && c[4] == QLatin1Char('e'))
222 return QScriptGrammar::T_FALSE;
223 else if (check_reserved) {
224 if (c[0] == QLatin1Char('s') && c[1] == QLatin1Char('h')
225 && c[2] == QLatin1Char('o') && c[3] == QLatin1Char('r')
226 && c[4] == QLatin1Char('t'))
227 return QScriptGrammar::T_RESERVED_WORD;
228 else if (c[0] == QLatin1Char('s') && c[1] == QLatin1Char('u')
229 && c[2] == QLatin1Char('p') && c[3] == QLatin1Char('e')
230 && c[4] == QLatin1Char('r'))
231 return QScriptGrammar::T_RESERVED_WORD;
232 else if (c[0] == QLatin1Char('f') && c[1] == QLatin1Char('i')
233 && c[2] == QLatin1Char('n') && c[3] == QLatin1Char('a')
234 && c[4] == QLatin1Char('l'))
235 return QScriptGrammar::T_RESERVED_WORD;
236 else if (c[0] == QLatin1Char('c') && c[1] == QLatin1Char('l')
237 && c[2] == QLatin1Char('a') && c[3] == QLatin1Char('s')
238 && c[4] == QLatin1Char('s'))
239 return QScriptGrammar::T_RESERVED_WORD;
240 else if (c[0] == QLatin1Char('f') && c[1] == QLatin1Char('l')
241 && c[2] == QLatin1Char('o') && c[3] == QLatin1Char('a')
242 && c[4] == QLatin1Char('t'))
243 return QScriptGrammar::T_RESERVED_WORD;
244 }
245 } break;
246
247 case 6: {
248 if (c[0] == QLatin1Char('d') && c[1] == QLatin1Char('e')
249 && c[2] == QLatin1Char('l') && c[3] == QLatin1Char('e')
250 && c[4] == QLatin1Char('t') && c[5] == QLatin1Char('e'))
251 return QScriptGrammar::T_DELETE;
252 else if (c[0] == QLatin1Char('r') && c[1] == QLatin1Char('e')
253 && c[2] == QLatin1Char('t') && c[3] == QLatin1Char('u')
254 && c[4] == QLatin1Char('r') && c[5] == QLatin1Char('n'))
255 return QScriptGrammar::T_RETURN;
256 else if (c[0] == QLatin1Char('s') && c[1] == QLatin1Char('w')
257 && c[2] == QLatin1Char('i') && c[3] == QLatin1Char('t')
258 && c[4] == QLatin1Char('c') && c[5] == QLatin1Char('h'))
259 return QScriptGrammar::T_SWITCH;
260 else if (c[0] == QLatin1Char('t') && c[1] == QLatin1Char('y')
261 && c[2] == QLatin1Char('p') && c[3] == QLatin1Char('e')
262 && c[4] == QLatin1Char('o') && c[5] == QLatin1Char('f'))
263 return QScriptGrammar::T_TYPEOF;
264 else if (check_reserved) {
265 if (c[0] == QLatin1Char('e') && c[1] == QLatin1Char('x')
266 && c[2] == QLatin1Char('p') && c[3] == QLatin1Char('o')
267 && c[4] == QLatin1Char('r') && c[5] == QLatin1Char('t'))
268 return QScriptGrammar::T_RESERVED_WORD;
269 else if (c[0] == QLatin1Char('s') && c[1] == QLatin1Char('t')
270 && c[2] == QLatin1Char('a') && c[3] == QLatin1Char('t')
271 && c[4] == QLatin1Char('i') && c[5] == QLatin1Char('c'))
272 return QScriptGrammar::T_RESERVED_WORD;
273 else if (c[0] == QLatin1Char('d') && c[1] == QLatin1Char('o')
274 && c[2] == QLatin1Char('u') && c[3] == QLatin1Char('b')
275 && c[4] == QLatin1Char('l') && c[5] == QLatin1Char('e'))
276 return QScriptGrammar::T_RESERVED_WORD;
277 else if (c[0] == QLatin1Char('i') && c[1] == QLatin1Char('m')
278 && c[2] == QLatin1Char('p') && c[3] == QLatin1Char('o')
279 && c[4] == QLatin1Char('r') && c[5] == QLatin1Char('t'))
280 return QScriptGrammar::T_RESERVED_WORD;
281 else if (c[0] == QLatin1Char('p') && c[1] == QLatin1Char('u')
282 && c[2] == QLatin1Char('b') && c[3] == QLatin1Char('l')
283 && c[4] == QLatin1Char('i') && c[5] == QLatin1Char('c'))
284 return QScriptGrammar::T_RESERVED_WORD;
285 else if (c[0] == QLatin1Char('n') && c[1] == QLatin1Char('a')
286 && c[2] == QLatin1Char('t') && c[3] == QLatin1Char('i')
287 && c[4] == QLatin1Char('v') && c[5] == QLatin1Char('e'))
288 return QScriptGrammar::T_RESERVED_WORD;
289 else if (c[0] == QLatin1Char('t') && c[1] == QLatin1Char('h')
290 && c[2] == QLatin1Char('r') && c[3] == QLatin1Char('o')
291 && c[4] == QLatin1Char('w') && c[5] == QLatin1Char('s'))
292 return QScriptGrammar::T_RESERVED_WORD;
293 }
294 } break;
295
296 case 7: {
297 if (c[0] == QLatin1Char('d') && c[1] == QLatin1Char('e')
298 && c[2] == QLatin1Char('f') && c[3] == QLatin1Char('a')
299 && c[4] == QLatin1Char('u') && c[5] == QLatin1Char('l')
300 && c[6] == QLatin1Char('t'))
301 return QScriptGrammar::T_DEFAULT;
302 else if (c[0] == QLatin1Char('f') && c[1] == QLatin1Char('i')
303 && c[2] == QLatin1Char('n') && c[3] == QLatin1Char('a')
304 && c[4] == QLatin1Char('l') && c[5] == QLatin1Char('l')
305 && c[6] == QLatin1Char('y'))
306 return QScriptGrammar::T_FINALLY;
307 else if (check_reserved) {
308 if (c[0] == QLatin1Char('b') && c[1] == QLatin1Char('o')
309 && c[2] == QLatin1Char('o') && c[3] == QLatin1Char('l')
310 && c[4] == QLatin1Char('e') && c[5] == QLatin1Char('a')
311 && c[6] == QLatin1Char('n'))
312 return QScriptGrammar::T_RESERVED_WORD;
313 else if (c[0] == QLatin1Char('e') && c[1] == QLatin1Char('x')
314 && c[2] == QLatin1Char('t') && c[3] == QLatin1Char('e')
315 && c[4] == QLatin1Char('n') && c[5] == QLatin1Char('d')
316 && c[6] == QLatin1Char('s'))
317 return QScriptGrammar::T_RESERVED_WORD;
318 else if (c[0] == QLatin1Char('p') && c[1] == QLatin1Char('a')
319 && c[2] == QLatin1Char('c') && c[3] == QLatin1Char('k')
320 && c[4] == QLatin1Char('a') && c[5] == QLatin1Char('g')
321 && c[6] == QLatin1Char('e'))
322 return QScriptGrammar::T_RESERVED_WORD;
323 else if (c[0] == QLatin1Char('p') && c[1] == QLatin1Char('r')
324 && c[2] == QLatin1Char('i') && c[3] == QLatin1Char('v')
325 && c[4] == QLatin1Char('a') && c[5] == QLatin1Char('t')
326 && c[6] == QLatin1Char('e'))
327 return QScriptGrammar::T_RESERVED_WORD;
328 }
329 } break;
330
331 case 8: {
332 if (c[0] == QLatin1Char('c') && c[1] == QLatin1Char('o')
333 && c[2] == QLatin1Char('n') && c[3] == QLatin1Char('t')
334 && c[4] == QLatin1Char('i') && c[5] == QLatin1Char('n')
335 && c[6] == QLatin1Char('u') && c[7] == QLatin1Char('e'))
336 return QScriptGrammar::T_CONTINUE;
337 else if (c[0] == QLatin1Char('f') && c[1] == QLatin1Char('u')
338 && c[2] == QLatin1Char('n') && c[3] == QLatin1Char('c')
339 && c[4] == QLatin1Char('t') && c[5] == QLatin1Char('i')
340 && c[6] == QLatin1Char('o') && c[7] == QLatin1Char('n'))
341 return QScriptGrammar::T_FUNCTION;
342 else if (c[0] == QLatin1Char('d') && c[1] == QLatin1Char('e')
343 && c[2] == QLatin1Char('b') && c[3] == QLatin1Char('u')
344 && c[4] == QLatin1Char('g') && c[5] == QLatin1Char('g')
345 && c[6] == QLatin1Char('e') && c[7] == QLatin1Char('r'))
346 return QScriptGrammar::T_DEBUGGER;
347 else if (check_reserved) {
348 if (c[0] == QLatin1Char('a') && c[1] == QLatin1Char('b')
349 && c[2] == QLatin1Char('s') && c[3] == QLatin1Char('t')
350 && c[4] == QLatin1Char('r') && c[5] == QLatin1Char('a')
351 && c[6] == QLatin1Char('c') && c[7] == QLatin1Char('t'))
352 return QScriptGrammar::T_RESERVED_WORD;
353 else if (c[0] == QLatin1Char('v') && c[1] == QLatin1Char('o')
354 && c[2] == QLatin1Char('l') && c[3] == QLatin1Char('a')
355 && c[4] == QLatin1Char('t') && c[5] == QLatin1Char('i')
356 && c[6] == QLatin1Char('l') && c[7] == QLatin1Char('e'))
357 return QScriptGrammar::T_RESERVED_WORD;
358 }
359 } break;
360
361 case 9: {
362 if (check_reserved) {
363 if (c[0] == QLatin1Char('i') && c[1] == QLatin1Char('n')
364 && c[2] == QLatin1Char('t') && c[3] == QLatin1Char('e')
365 && c[4] == QLatin1Char('r') && c[5] == QLatin1Char('f')
366 && c[6] == QLatin1Char('a') && c[7] == QLatin1Char('c')
367 && c[8] == QLatin1Char('e'))
368 return QScriptGrammar::T_RESERVED_WORD;
369 else if (c[0] == QLatin1Char('t') && c[1] == QLatin1Char('r')
370 && c[2] == QLatin1Char('a') && c[3] == QLatin1Char('n')
371 && c[4] == QLatin1Char('s') && c[5] == QLatin1Char('i')
372 && c[6] == QLatin1Char('e') && c[7] == QLatin1Char('n')
373 && c[8] == QLatin1Char('t'))
374 return QScriptGrammar::T_RESERVED_WORD;
375 else if (c[0] == QLatin1Char('p') && c[1] == QLatin1Char('r')
376 && c[2] == QLatin1Char('o') && c[3] == QLatin1Char('t')
377 && c[4] == QLatin1Char('e') && c[5] == QLatin1Char('c')
378 && c[6] == QLatin1Char('t') && c[7] == QLatin1Char('e')
379 && c[8] == QLatin1Char('d'))
380 return QScriptGrammar::T_RESERVED_WORD;
381 }
382 } break;
383
384 case 10: {
385 if (c[0] == QLatin1Char('i') && c[1] == QLatin1Char('n')
386 && c[2] == QLatin1Char('s') && c[3] == QLatin1Char('t')
387 && c[4] == QLatin1Char('a') && c[5] == QLatin1Char('n')
388 && c[6] == QLatin1Char('c') && c[7] == QLatin1Char('e')
389 && c[8] == QLatin1Char('o') && c[9] == QLatin1Char('f'))
390 return QScriptGrammar::T_INSTANCEOF;
391 else if (check_reserved) {
392 if (c[0] == QLatin1Char('i') && c[1] == QLatin1Char('m')
393 && c[2] == QLatin1Char('p') && c[3] == QLatin1Char('l')
394 && c[4] == QLatin1Char('e') && c[5] == QLatin1Char('m')
395 && c[6] == QLatin1Char('e') && c[7] == QLatin1Char('n')
396 && c[8] == QLatin1Char('t') && c[9] == QLatin1Char('s'))
397 return QScriptGrammar::T_RESERVED_WORD;
398 }
399 } break;
400
401 case 12: {
402 if (check_reserved) {
403 if (c[0] == QLatin1Char('s') && c[1] == QLatin1Char('y')
404 && c[2] == QLatin1Char('n') && c[3] == QLatin1Char('c')
405 && c[4] == QLatin1Char('h') && c[5] == QLatin1Char('r')
406 && c[6] == QLatin1Char('o') && c[7] == QLatin1Char('n')
407 && c[8] == QLatin1Char('i') && c[9] == QLatin1Char('z')
408 && c[10] == QLatin1Char('e') && c[11] == QLatin1Char('d'))
409 return QScriptGrammar::T_RESERVED_WORD;
410 }
411 } break;
412
413 } // switch
414
415 return -1;
416}
417
418int QScript::Lexer::lex()
419{
420 int token = 0;
421 state = Start;
422 ushort stringType = 0; // either single or double quotes
423 pos8 = pos16 = 0;
424 done = false;
425 terminator = false;
426
427 // did we push a token on the stack previously ?
428 // (after an automatic semicolon insertion)
429 if (stackToken >= 0) {
430 setDone(Other);
431 token = stackToken;
432 stackToken = -1;
433 }
434
435 while (!done) {
436 switch (state) {
437 case Start:
438 if (isWhiteSpace()) {
439 // do nothing
440 } else if (current == '/' && next1 == '/') {
441 recordStartPos();
442 shift(p: 1);
443 state = InSingleLineComment;
444 } else if (current == '/' && next1 == '*') {
445 recordStartPos();
446 shift(p: 1);
447 state = InMultiLineComment;
448 } else if (current == 0) {
449 syncProhibitAutomaticSemicolon();
450 if (!terminator && !delimited && !prohibitAutomaticSemicolon) {
451 // automatic semicolon insertion if program incomplete
452 token = QScriptGrammar::T_SEMICOLON;
453 stackToken = 0;
454 setDone(Other);
455 } else {
456 setDone(Eof);
457 }
458 } else if (isLineTerminator()) {
459 shiftWindowsLineBreak();
460 yylineno++;
461 yycolumn = 0;
462 bol = true;
463 terminator = true;
464 syncProhibitAutomaticSemicolon();
465 if (restrKeyword) {
466 token = QScriptGrammar::T_SEMICOLON;
467 setDone(Other);
468 }
469 } else if (current == '"' || current == '\'') {
470 recordStartPos();
471 state = InString;
472 stringType = current;
473 } else if (isIdentLetter(c: current)) {
474 recordStartPos();
475 record16(c: current);
476 state = InIdentifier;
477 } else if (current == '0') {
478 recordStartPos();
479 record8(c: current);
480 state = InNum0;
481 } else if (isDecimalDigit(c: current)) {
482 recordStartPos();
483 record8(c: current);
484 state = InNum;
485 } else if (current == '.' && isDecimalDigit(c: next1)) {
486 recordStartPos();
487 record8(c: current);
488 state = InDecimal;
489 } else {
490 recordStartPos();
491 token = matchPunctuator(c1: current, c2: next1, c3: next2, c4: next3);
492 if (token != -1) {
493 if (terminator && !delimited && !prohibitAutomaticSemicolon
494 && (token == QScriptGrammar::T_PLUS_PLUS
495 || token == QScriptGrammar::T_MINUS_MINUS)) {
496 // automatic semicolon insertion
497 stackToken = token;
498 token = QScriptGrammar::T_SEMICOLON;
499 }
500 setDone(Other);
501 }
502 else {
503 setDone(Bad);
504 err = IllegalCharacter;
505 errmsg = QLatin1String("Illegal character");
506 }
507 }
508 break;
509 case InString:
510 if (current == stringType) {
511 shift(p: 1);
512 setDone(String);
513 } else if (current == 0 || isLineTerminator()) {
514 setDone(Bad);
515 err = UnclosedStringLiteral;
516 errmsg = QLatin1String("Unclosed string at end of line");
517 } else if (current == '\\') {
518 state = InEscapeSequence;
519 } else {
520 record16(c: current);
521 }
522 break;
523 // Escape Sequences inside of strings
524 case InEscapeSequence:
525 if (isOctalDigit(c: current)) {
526 if (current >= '0' && current <= '3' &&
527 isOctalDigit(c: next1) && isOctalDigit(c: next2)) {
528 record16(c: convertOctal(c1: current, c2: next1, c3: next2));
529 shift(p: 2);
530 state = InString;
531 } else if (isOctalDigit(c: current) &&
532 isOctalDigit(c: next1)) {
533 record16(c: convertOctal(c1: '0', c2: current, c3: next1));
534 shift(p: 1);
535 state = InString;
536 } else if (isOctalDigit(c: current)) {
537 record16(c: convertOctal(c1: '0', c2: '0', c3: current));
538 state = InString;
539 } else {
540 setDone(Bad);
541 err = IllegalEscapeSequence;
542 errmsg = QLatin1String("Illegal escape squence");
543 }
544 } else if (current == 'x')
545 state = InHexEscape;
546 else if (current == 'u')
547 state = InUnicodeEscape;
548 else {
549 if (isLineTerminator()) {
550 shiftWindowsLineBreak();
551 yylineno++;
552 yycolumn = 0;
553 bol = true;
554 } else {
555 record16(c: singleEscape(c: current));
556 }
557 state = InString;
558 }
559 break;
560 case InHexEscape:
561 if (isHexDigit(c: current) && isHexDigit(c: next1)) {
562 state = InString;
563 record16(c: QLatin1Char(convertHex(c1: current, c2: next1)));
564 shift(p: 1);
565 } else if (current == stringType) {
566 record16(c: QLatin1Char('x'));
567 shift(p: 1);
568 setDone(String);
569 } else {
570 record16(c: QLatin1Char('x'));
571 record16(c: current);
572 state = InString;
573 }
574 break;
575 case InUnicodeEscape:
576 if (isHexDigit(c: current) && isHexDigit(c: next1) &&
577 isHexDigit(c: next2) && isHexDigit(c: next3)) {
578 record16(c: convertUnicode(c1: current, c2: next1, c3: next2, c4: next3));
579 shift(p: 3);
580 state = InString;
581 } else if (current == stringType) {
582 record16(c: QLatin1Char('u'));
583 shift(p: 1);
584 setDone(String);
585 } else {
586 setDone(Bad);
587 err = IllegalUnicodeEscapeSequence;
588 errmsg = QLatin1String("Illegal unicode escape sequence");
589 }
590 break;
591 case InSingleLineComment:
592 if (isLineTerminator()) {
593 shiftWindowsLineBreak();
594 yylineno++;
595 yycolumn = 0;
596 terminator = true;
597 bol = true;
598 if (restrKeyword) {
599 token = QScriptGrammar::T_SEMICOLON;
600 setDone(Other);
601 } else
602 state = Start;
603 } else if (current == 0) {
604 setDone(Eof);
605 }
606 break;
607 case InMultiLineComment:
608 if (current == 0) {
609 setDone(Bad);
610 err = UnclosedComment;
611 errmsg = QLatin1String("Unclosed comment at end of file");
612 } else if (isLineTerminator()) {
613 shiftWindowsLineBreak();
614 yylineno++;
615 } else if (current == '*' && next1 == '/') {
616 state = Start;
617 shift(p: 1);
618 }
619 break;
620 case InIdentifier:
621 if (isIdentLetter(c: current) || isDecimalDigit(c: current)) {
622 record16(c: current);
623 break;
624 }
625 setDone(Identifier);
626 break;
627 case InNum0:
628 if (current == 'x' || current == 'X') {
629 record8(c: current);
630 state = InHex;
631 } else if (current == '.') {
632 record8(c: current);
633 state = InDecimal;
634 } else if (current == 'e' || current == 'E') {
635 record8(c: current);
636 state = InExponentIndicator;
637 } else if (isOctalDigit(c: current)) {
638 record8(c: current);
639 state = InOctal;
640 } else if (isDecimalDigit(c: current)) {
641 record8(c: current);
642 state = InDecimal;
643 } else {
644 setDone(Number);
645 }
646 break;
647 case InHex:
648 if (isHexDigit(c: current))
649 record8(c: current);
650 else
651 setDone(Hex);
652 break;
653 case InOctal:
654 if (isOctalDigit(c: current)) {
655 record8(c: current);
656 } else if (isDecimalDigit(c: current)) {
657 record8(c: current);
658 state = InDecimal;
659 } else {
660 setDone(Octal);
661 }
662 break;
663 case InNum:
664 if (isDecimalDigit(c: current)) {
665 record8(c: current);
666 } else if (current == '.') {
667 record8(c: current);
668 state = InDecimal;
669 } else if (current == 'e' || current == 'E') {
670 record8(c: current);
671 state = InExponentIndicator;
672 } else {
673 setDone(Number);
674 }
675 break;
676 case InDecimal:
677 if (isDecimalDigit(c: current)) {
678 record8(c: current);
679 } else if (current == 'e' || current == 'E') {
680 record8(c: current);
681 state = InExponentIndicator;
682 } else {
683 setDone(Number);
684 }
685 break;
686 case InExponentIndicator:
687 if (current == '+' || current == '-') {
688 record8(c: current);
689 } else if (isDecimalDigit(c: current)) {
690 record8(c: current);
691 state = InExponent;
692 } else {
693 setDone(Bad);
694 err = IllegalExponentIndicator;
695 errmsg = QLatin1String("Illegal syntax for exponential number");
696 }
697 break;
698 case InExponent:
699 if (isDecimalDigit(c: current)) {
700 record8(c: current);
701 } else {
702 setDone(Number);
703 }
704 break;
705 default:
706 Q_ASSERT_X(0, "Lexer::lex", "Unhandled state in switch statement");
707 }
708
709 // move on to the next character
710 if (!done)
711 shift(p: 1);
712 if (state != Start && state != InSingleLineComment)
713 bol = false;
714 }
715
716 // no identifiers allowed directly after numeric literal, e.g. "3in" is bad
717 if ((state == Number || state == Octal || state == Hex)
718 && isIdentLetter(c: current)) {
719 state = Bad;
720 err = IllegalIdentifier;
721 errmsg = QLatin1String("Identifier cannot start with numeric literal");
722 }
723
724 // terminate string
725 buffer8[pos8] = '\0';
726
727 double dval = 0;
728 if (state == Number) {
729 dval = QByteArray::fromRawData(buffer8, size: pos8).toDouble();
730 } else if (state == Hex) { // scan hex numbers
731 dval = QScript::integerFromString(buf: buffer8, size: pos8, radix: 16);
732 state = Number;
733 } else if (state == Octal) { // scan octal number
734 dval = QScript::integerFromString(buf: buffer8, size: pos8, radix: 8);
735 state = Number;
736 }
737
738 restrKeyword = false;
739 delimited = false;
740
741 switch (parenthesesState) {
742 case IgnoreParentheses:
743 break;
744 case CountParentheses:
745 if (token == QScriptGrammar::T_RPAREN) {
746 --parenthesesCount;
747 if (parenthesesCount == 0)
748 parenthesesState = BalancedParentheses;
749 } else if (token == QScriptGrammar::T_LPAREN) {
750 ++parenthesesCount;
751 }
752 break;
753 case BalancedParentheses:
754 parenthesesState = IgnoreParentheses;
755 break;
756 }
757
758 switch (state) {
759 case Eof:
760 return 0;
761 case Other:
762 if(token == QScriptGrammar::T_RBRACE || token == QScriptGrammar::T_SEMICOLON)
763 delimited = true;
764 return token;
765 case Identifier:
766 if ((token = findReservedWord(c: buffer16, size: pos16)) < 0) {
767 /* TODO: close leak on parse error. same holds true for String */
768 if (driver) {
769 Q_ASSERT_X(false, Q_FUNC_INFO, "not implemented");
770 qsyylval.ustr = 0; // driver->intern(buffer16, pos16);
771 } else
772 qsyylval.ustr = 0;
773 return QScriptGrammar::T_IDENTIFIER;
774 }
775 if (token == QScriptGrammar::T_CONTINUE || token == QScriptGrammar::T_BREAK
776 || token == QScriptGrammar::T_RETURN || token == QScriptGrammar::T_THROW) {
777 restrKeyword = true;
778 } else if (token == QScriptGrammar::T_IF || token == QScriptGrammar::T_FOR
779 || token == QScriptGrammar::T_WHILE || token == QScriptGrammar::T_WITH) {
780 parenthesesState = CountParentheses;
781 parenthesesCount = 0;
782 } else if (token == QScriptGrammar::T_DO) {
783 parenthesesState = BalancedParentheses;
784 }
785 return token;
786 case String:
787 if (driver) {
788 Q_ASSERT_X(false, Q_FUNC_INFO, "not implemented");
789 qsyylval.ustr = 0; // driver->intern(buffer16, pos16);
790 } else
791 qsyylval.ustr = 0;
792 return QScriptGrammar::T_STRING_LITERAL;
793 case Number:
794 qsyylval.dval = dval;
795 return QScriptGrammar::T_NUMERIC_LITERAL;
796 case Bad:
797 return -1;
798 default:
799 Q_ASSERT(!"unhandled numeration value in switch");
800 return -1;
801 }
802}
803
804bool QScript::Lexer::isWhiteSpace() const
805{
806 return (current == ' ' || current == '\t' ||
807 current == 0x0b || current == 0x0c);
808}
809
810bool QScript::Lexer::isLineTerminator() const
811{
812 return (current == '\n' || current == '\r');
813}
814
815bool QScript::Lexer::isIdentLetter(ushort c)
816{
817 /* TODO: allow other legitimate unicode chars */
818 return ((c >= 'a' && c <= 'z')
819 || (c >= 'A' && c <= 'Z')
820 || c == '$'
821 || c == '_');
822}
823
824bool QScript::Lexer::isDecimalDigit(ushort c)
825{
826 return (c >= '0' && c <= '9');
827}
828
829bool QScript::Lexer::isHexDigit(ushort c) const
830{
831 return ((c >= '0' && c <= '9')
832 || (c >= 'a' && c <= 'f')
833 || (c >= 'A' && c <= 'F'));
834}
835
836bool QScript::Lexer::isOctalDigit(ushort c) const
837{
838 return (c >= '0' && c <= '7');
839}
840
841int QScript::Lexer::matchPunctuator(ushort c1, ushort c2,
842 ushort c3, ushort c4)
843{
844 if (c1 == '>' && c2 == '>' && c3 == '>' && c4 == '=') {
845 shift(p: 4);
846 return QScriptGrammar::T_GT_GT_GT_EQ;
847 } else if (c1 == '=' && c2 == '=' && c3 == '=') {
848 shift(p: 3);
849 return QScriptGrammar::T_EQ_EQ_EQ;
850 } else if (c1 == '!' && c2 == '=' && c3 == '=') {
851 shift(p: 3);
852 return QScriptGrammar::T_NOT_EQ_EQ;
853 } else if (c1 == '>' && c2 == '>' && c3 == '>') {
854 shift(p: 3);
855 return QScriptGrammar::T_GT_GT_GT;
856 } else if (c1 == '<' && c2 == '<' && c3 == '=') {
857 shift(p: 3);
858 return QScriptGrammar::T_LT_LT_EQ;
859 } else if (c1 == '>' && c2 == '>' && c3 == '=') {
860 shift(p: 3);
861 return QScriptGrammar::T_GT_GT_EQ;
862 } else if (c1 == '<' && c2 == '=') {
863 shift(p: 2);
864 return QScriptGrammar::T_LE;
865 } else if (c1 == '>' && c2 == '=') {
866 shift(p: 2);
867 return QScriptGrammar::T_GE;
868 } else if (c1 == '!' && c2 == '=') {
869 shift(p: 2);
870 return QScriptGrammar::T_NOT_EQ;
871 } else if (c1 == '+' && c2 == '+') {
872 shift(p: 2);
873 return QScriptGrammar::T_PLUS_PLUS;
874 } else if (c1 == '-' && c2 == '-') {
875 shift(p: 2);
876 return QScriptGrammar::T_MINUS_MINUS;
877 } else if (c1 == '=' && c2 == '=') {
878 shift(p: 2);
879 return QScriptGrammar::T_EQ_EQ;
880 } else if (c1 == '+' && c2 == '=') {
881 shift(p: 2);
882 return QScriptGrammar::T_PLUS_EQ;
883 } else if (c1 == '-' && c2 == '=') {
884 shift(p: 2);
885 return QScriptGrammar::T_MINUS_EQ;
886 } else if (c1 == '*' && c2 == '=') {
887 shift(p: 2);
888 return QScriptGrammar::T_STAR_EQ;
889 } else if (c1 == '/' && c2 == '=') {
890 shift(p: 2);
891 return QScriptGrammar::T_DIVIDE_EQ;
892 } else if (c1 == '&' && c2 == '=') {
893 shift(p: 2);
894 return QScriptGrammar::T_AND_EQ;
895 } else if (c1 == '^' && c2 == '=') {
896 shift(p: 2);
897 return QScriptGrammar::T_XOR_EQ;
898 } else if (c1 == '%' && c2 == '=') {
899 shift(p: 2);
900 return QScriptGrammar::T_REMAINDER_EQ;
901 } else if (c1 == '|' && c2 == '=') {
902 shift(p: 2);
903 return QScriptGrammar::T_OR_EQ;
904 } else if (c1 == '<' && c2 == '<') {
905 shift(p: 2);
906 return QScriptGrammar::T_LT_LT;
907 } else if (c1 == '>' && c2 == '>') {
908 shift(p: 2);
909 return QScriptGrammar::T_GT_GT;
910 } else if (c1 == '&' && c2 == '&') {
911 shift(p: 2);
912 return QScriptGrammar::T_AND_AND;
913 } else if (c1 == '|' && c2 == '|') {
914 shift(p: 2);
915 return QScriptGrammar::T_OR_OR;
916 }
917
918 switch(c1) {
919 case '=': shift(p: 1); return QScriptGrammar::T_EQ;
920 case '>': shift(p: 1); return QScriptGrammar::T_GT;
921 case '<': shift(p: 1); return QScriptGrammar::T_LT;
922 case ',': shift(p: 1); return QScriptGrammar::T_COMMA;
923 case '!': shift(p: 1); return QScriptGrammar::T_NOT;
924 case '~': shift(p: 1); return QScriptGrammar::T_TILDE;
925 case '?': shift(p: 1); return QScriptGrammar::T_QUESTION;
926 case ':': shift(p: 1); return QScriptGrammar::T_COLON;
927 case '.': shift(p: 1); return QScriptGrammar::T_DOT;
928 case '+': shift(p: 1); return QScriptGrammar::T_PLUS;
929 case '-': shift(p: 1); return QScriptGrammar::T_MINUS;
930 case '*': shift(p: 1); return QScriptGrammar::T_STAR;
931 case '/': shift(p: 1); return QScriptGrammar::T_DIVIDE_;
932 case '&': shift(p: 1); return QScriptGrammar::T_AND;
933 case '|': shift(p: 1); return QScriptGrammar::T_OR;
934 case '^': shift(p: 1); return QScriptGrammar::T_XOR;
935 case '%': shift(p: 1); return QScriptGrammar::T_REMAINDER;
936 case '(': shift(p: 1); return QScriptGrammar::T_LPAREN;
937 case ')': shift(p: 1); return QScriptGrammar::T_RPAREN;
938 case '{': shift(p: 1); return QScriptGrammar::T_LBRACE;
939 case '}': shift(p: 1); return QScriptGrammar::T_RBRACE;
940 case '[': shift(p: 1); return QScriptGrammar::T_LBRACKET;
941 case ']': shift(p: 1); return QScriptGrammar::T_RBRACKET;
942 case ';': shift(p: 1); return QScriptGrammar::T_SEMICOLON;
943
944 default: return -1;
945 }
946}
947
948ushort QScript::Lexer::singleEscape(ushort c) const
949{
950 switch(c) {
951 case 'b':
952 return 0x08;
953 case 't':
954 return 0x09;
955 case 'n':
956 return 0x0A;
957 case 'v':
958 return 0x0B;
959 case 'f':
960 return 0x0C;
961 case 'r':
962 return 0x0D;
963 case '"':
964 return 0x22;
965 case '\'':
966 return 0x27;
967 case '\\':
968 return 0x5C;
969 default:
970 return c;
971 }
972}
973
974ushort QScript::Lexer::convertOctal(ushort c1, ushort c2,
975 ushort c3) const
976{
977 return ((c1 - '0') * 64 + (c2 - '0') * 8 + c3 - '0');
978}
979
980unsigned char QScript::Lexer::convertHex(ushort c)
981{
982 if (c >= '0' && c <= '9')
983 return (c - '0');
984 else if (c >= 'a' && c <= 'f')
985 return (c - 'a' + 10);
986 else
987 return (c - 'A' + 10);
988}
989
990unsigned char QScript::Lexer::convertHex(ushort c1, ushort c2)
991{
992 return ((convertHex(c: c1) << 4) + convertHex(c: c2));
993}
994
995QChar QScript::Lexer::convertUnicode(ushort c1, ushort c2,
996 ushort c3, ushort c4)
997{
998 return QChar((convertHex(c: c3) << 4) + convertHex(c: c4),
999 (convertHex(c: c1) << 4) + convertHex(c: c2));
1000}
1001
1002void QScript::Lexer::record8(ushort c)
1003{
1004 Q_ASSERT(c <= 0xff);
1005
1006 // enlarge buffer if full
1007 if (pos8 >= size8 - 1) {
1008 char *tmp = new char[2 * size8];
1009 memcpy(dest: tmp, src: buffer8, n: size8 * sizeof(char));
1010 delete [] buffer8;
1011 buffer8 = tmp;
1012 size8 *= 2;
1013 }
1014
1015 buffer8[pos8++] = (char) c;
1016}
1017
1018void QScript::Lexer::record16(QChar c)
1019{
1020 // enlarge buffer if full
1021 if (pos16 >= size16 - 1) {
1022 QChar *tmp = new QChar[2 * size16];
1023 memcpy(dest: tmp, src: buffer16, n: size16 * sizeof(QChar));
1024 delete [] buffer16;
1025 buffer16 = tmp;
1026 size16 *= 2;
1027 }
1028
1029 buffer16[pos16++] = c;
1030}
1031
1032void QScript::Lexer::recordStartPos()
1033{
1034 startlineno = yylineno;
1035 startcolumn = yycolumn;
1036}
1037
1038bool QScript::Lexer::scanRegExp(RegExpBodyPrefix prefix)
1039{
1040 pos16 = 0;
1041 bool lastWasEscape = false;
1042
1043 if (prefix == EqualPrefix)
1044 record16(c: QLatin1Char('='));
1045
1046 while (1) {
1047 if (isLineTerminator() || current == 0) {
1048 errmsg = QLatin1String("Unterminated regular expression literal");
1049 return false;
1050 }
1051 else if (current != '/' || lastWasEscape == true)
1052 {
1053 record16(c: current);
1054 lastWasEscape = !lastWasEscape && (current == '\\');
1055 }
1056 else {
1057 if (driver) {
1058 Q_ASSERT_X(false, Q_FUNC_INFO, "not implemented");
1059 pattern = 0; // driver->intern(buffer16, pos16);
1060 } else
1061 pattern = 0;
1062 pos16 = 0;
1063 shift(p: 1);
1064 break;
1065 }
1066 shift(p: 1);
1067 }
1068
1069 flags = 0;
1070 while (isIdentLetter(c: current)) {
1071 // current version was remade from this line:
1072 //int flag = QScript::Ecma::RegExp::flagFromChar(current);
1073 //code was "inlined" because it was only one call to this function
1074 int flag;
1075 switch (current) {
1076 case 'g': flag = 0x01; break;
1077 case 'm': flag = 0x02; break;
1078 case 'i': flag = 0x04; break;
1079 default: flag = 0;
1080 }
1081 if (flag == 0) {
1082 errmsg = QString::fromLatin1(str: "Invalid regular expression flag '%0'")
1083 .arg(a: QChar(current));
1084 return false;
1085 }
1086 flags |= flag;
1087 record16(c: current);
1088 shift(p: 1);
1089 }
1090
1091 return true;
1092}
1093
1094void QScript::Lexer::syncProhibitAutomaticSemicolon()
1095{
1096 if (parenthesesState == BalancedParentheses) {
1097 // we have seen something like "if (foo)", which means we should
1098 // never insert an automatic semicolon at this point, since it would
1099 // then be expanded into an empty statement (ECMA-262 7.9.1)
1100 prohibitAutomaticSemicolon = true;
1101 parenthesesState = IgnoreParentheses;
1102 } else {
1103 prohibitAutomaticSemicolon = false;
1104 }
1105}
1106
1107QT_END_NAMESPACE
1108

source code of qtscript/src/script/parser/qscriptlexer.cpp